theman 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +25 -0
- data/README.rdoc +69 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/lib/theman/themans_agency.rb +109 -0
- data/lib/theman.rb +4 -0
- data/spec/fixtures/temp_one.csv +5 -0
- data/spec/fixtures/temp_two.csv +21 -0
- data/spec/spec_helper.rb +15 -0
- data/spec/theman_spec.rb +91 -0
- data/theman.gemspec +55 -0
- metadata +92 -0
data/.document
ADDED
data/.gitignore
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
## MAC OS
|
2
|
+
.DS_Store
|
3
|
+
|
4
|
+
## TEXTMATE
|
5
|
+
*.tmproj
|
6
|
+
tmtags
|
7
|
+
|
8
|
+
## EMACS
|
9
|
+
*~
|
10
|
+
\#*
|
11
|
+
.\#*
|
12
|
+
|
13
|
+
## VIM
|
14
|
+
*.swp
|
15
|
+
|
16
|
+
## PROJECT::GENERAL
|
17
|
+
coverage
|
18
|
+
rdoc
|
19
|
+
pkg
|
20
|
+
|
21
|
+
## PROJECT::SPECIFIC
|
22
|
+
spec/database.yml
|
23
|
+
log/
|
24
|
+
spec/fixtures/fucking_huge.csv
|
25
|
+
spec/fucking_huge_spec.rb
|
data/README.rdoc
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
= theman
|
2
|
+
|
3
|
+
The man getting you down?
|
4
|
+
|
5
|
+
FasterCSV is great and all but when you get to 100mb files it takes a while and you may only be looking for certain records that match some criteria, enter theman.
|
6
|
+
|
7
|
+
== Installation
|
8
|
+
|
9
|
+
config.gem 'theman'
|
10
|
+
|
11
|
+
Or
|
12
|
+
|
13
|
+
gem install 'theman'
|
14
|
+
|
15
|
+
== Basic Usage
|
16
|
+
|
17
|
+
cabinet = ::Theman::Agency.new 'pretty.csv'
|
18
|
+
temp_model = cabinet.instance
|
19
|
+
temp_model.count
|
20
|
+
|
21
|
+
== Advanced Usage
|
22
|
+
|
23
|
+
cabinet = ::Theman::Agency.new 'ugly.csv' do |cabinet|
|
24
|
+
cabinet.nulls /"N"/, /"UNKNOWN"/, /""/
|
25
|
+
cabinet.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
|
26
|
+
cabinet.table do |t|
|
27
|
+
t.date :date
|
28
|
+
t.integer :ext_id
|
29
|
+
t.float :amount
|
30
|
+
t.boolean :exited
|
31
|
+
end
|
32
|
+
end
|
33
|
+
temp_model = cabinet.instance
|
34
|
+
temp_model.where(:exited => true).count
|
35
|
+
|
36
|
+
In the above example we ommitted the last 15 rows and made some things null.
|
37
|
+
|
38
|
+
If you do not provide a table block your columns will be VARCHAR(255), you can cherry pick cols to change data types.
|
39
|
+
|
40
|
+
The temp table has no id col but you could add one after if you wanted
|
41
|
+
|
42
|
+
== Troubles
|
43
|
+
|
44
|
+
Table empty? the man has given you crappy data and PostgresSQL has silently dissed your data
|
45
|
+
|
46
|
+
== Copyright
|
47
|
+
|
48
|
+
(The MIT License)
|
49
|
+
|
50
|
+
Copyright (c) 2010 {mynameisrufus (Rufus Post)}[http://github.com/mynameisrufus]
|
51
|
+
|
52
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
53
|
+
a copy of this software and associated documentation files (the
|
54
|
+
"Software"), to deal in the Software without restriction, including
|
55
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
56
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
57
|
+
permit persons to whom the Software is furnished to do so, subject to
|
58
|
+
the following conditions:
|
59
|
+
|
60
|
+
The above copyright notice and this permission notice shall be
|
61
|
+
included in all copies or substantial portions of the Software.
|
62
|
+
|
63
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
64
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
65
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
66
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
67
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
68
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
69
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "theman"
|
8
|
+
gem.summary = %Q{PostgreSQL AR temporary table generator using PostgreSQL COPY}
|
9
|
+
gem.description = %Q{longer description of your gem}
|
10
|
+
gem.email = "rufuspost@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/mynameisrufus/theman"
|
12
|
+
gem.authors = ["Rufus Post"]
|
13
|
+
gem.add_development_dependency "rspec", ">= 2.0.0.beta.20"
|
14
|
+
end
|
15
|
+
Jeweler::GemcutterTasks.new
|
16
|
+
rescue LoadError
|
17
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
18
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
@@ -0,0 +1,109 @@
|
|
1
|
+
module Theman
|
2
|
+
class Agency
|
3
|
+
attr_reader :instance, :table_proc, :null_replacements, :stream_location, :column_names
|
4
|
+
|
5
|
+
def initialize(*args)
|
6
|
+
cabinet_id = "c#{10.times.map{rand(9)}.join}"
|
7
|
+
@column_names = {}
|
8
|
+
@instance = Class.new(parent) do
|
9
|
+
instance_eval <<-EOV, __FILE__, __LINE__ + 1
|
10
|
+
set_table_name "#{cabinet_id}"
|
11
|
+
def table_name
|
12
|
+
"#{cabinet_id}"
|
13
|
+
end
|
14
|
+
def inspect
|
15
|
+
"Agent (#{cabinet_id})"
|
16
|
+
end
|
17
|
+
EOV
|
18
|
+
end
|
19
|
+
yield self if block_given?
|
20
|
+
unless args.first.nil?
|
21
|
+
@stream_location = args.first
|
22
|
+
create_table
|
23
|
+
pipe_it
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def instance_parent(klass)
|
28
|
+
@parent = klass
|
29
|
+
end
|
30
|
+
|
31
|
+
def table(&block)
|
32
|
+
yield self if block_given?
|
33
|
+
end
|
34
|
+
|
35
|
+
def nulls(*args)
|
36
|
+
@null_replacements = args
|
37
|
+
end
|
38
|
+
|
39
|
+
def seds(*args)
|
40
|
+
@extra_seds = args
|
41
|
+
end
|
42
|
+
|
43
|
+
def symbolize(name)
|
44
|
+
name.gsub(/ /,"_").gsub(/\W/, "").downcase.to_sym
|
45
|
+
end
|
46
|
+
|
47
|
+
%w( string text integer float decimal datetime timestamp time date binary boolean ).each do |column_type|
|
48
|
+
class_eval <<-EOV, __FILE__, __LINE__ + 1
|
49
|
+
def #{column_type}(*args)
|
50
|
+
column(args[0], '#{column_type}', args[1].nil? ? {} : args[1])
|
51
|
+
end
|
52
|
+
EOV
|
53
|
+
end
|
54
|
+
|
55
|
+
protected
|
56
|
+
def sed_to_s
|
57
|
+
seds = []
|
58
|
+
seds << "| sed #{nulls_to_sed.join(" ")}" unless @null_replacements.nil?
|
59
|
+
seds << "| sed #{@extra_seds.join("| sed ")}" unless @extra_seds.nil?
|
60
|
+
return seds.join(" ") unless seds.empty?
|
61
|
+
end
|
62
|
+
|
63
|
+
def nulls_to_sed
|
64
|
+
@null_replacements.map do |null|
|
65
|
+
"-e 's/#{null.source}//g'"
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def parent
|
70
|
+
@parent ||= ::ActiveRecord::Base
|
71
|
+
end
|
72
|
+
|
73
|
+
def create_table
|
74
|
+
f = File.open(stream_location, 'r')
|
75
|
+
instance.connection.create_table(instance.table_name, :temporary => true, :id => false) do |t|
|
76
|
+
f.each_line do |line|
|
77
|
+
line.split(/,/).each do |col|
|
78
|
+
column_name = col.is_a?(Hash) ? col : symbolize(col)
|
79
|
+
if custom = @column_names.fetch(column_name, nil)
|
80
|
+
t.column(*custom)
|
81
|
+
else
|
82
|
+
t.string column_name
|
83
|
+
end
|
84
|
+
end
|
85
|
+
break
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def column(name, type, options)
|
91
|
+
@column_names.merge! name.to_sym => [name, type, options]
|
92
|
+
end
|
93
|
+
|
94
|
+
def pipe_it(l = "")
|
95
|
+
raw = instance.connection.raw_connection
|
96
|
+
raw.query "COPY #{instance.table_name} FROM STDIN WITH CSV HEADER"
|
97
|
+
command = "cat #{stream_location} #{sed_to_s}"
|
98
|
+
f = IO.popen(command)
|
99
|
+
begin
|
100
|
+
while f.read(8192, l)
|
101
|
+
raw.put_copy_data l
|
102
|
+
end
|
103
|
+
rescue EOFError
|
104
|
+
f.close
|
105
|
+
end
|
106
|
+
raw.put_copy_end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
data/lib/theman.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
"DATE","ORG_CODE","JOB_SEEKER_ID"
|
2
|
+
"23/09/2010","XXXX","342310094609"
|
3
|
+
"23/09/2010","XXXX","234583366709"
|
4
|
+
"23/09/2010","XXXX","342939870209"
|
5
|
+
"23/09/2010","XXXX","234098040009"
|
6
|
+
"23/09/2010","XXXX","987919906509"
|
7
|
+
|
8
|
+
"@!!@"
|
9
|
+
"Subscription ID: XXXXXXXXXXXXXXXXXXXXXX"
|
10
|
+
"Managed By: XXXXX"
|
11
|
+
"Record Count: XXXXX"
|
12
|
+
"Report Date: XXXXXXX"
|
13
|
+
"Extract Date: XXXXXXX"
|
14
|
+
"Data Load Freq: DAILY"
|
15
|
+
"Report Name: XXXXX"
|
16
|
+
"Report Desc: XXX XXXX XXXXX XXX XXX XX XX"
|
17
|
+
"Report Message: "
|
18
|
+
"Solution Name: XXXXXX"
|
19
|
+
"Solution Desc: "
|
20
|
+
"REPORT CRITERIA BELOW"
|
21
|
+
"Organisation: XXXX"
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
2
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
|
+
require 'rails'
|
4
|
+
|
5
|
+
require 'active_record'
|
6
|
+
Rails.env = 'test'
|
7
|
+
ActiveRecord::Base.configurations = YAML.load_file(File.join("spec", "database.yml"))
|
8
|
+
FileUtils.mkdir_p "#{Dir.pwd}/log"
|
9
|
+
logfile= File.new("#{Dir.pwd}/log/database.log", "w")
|
10
|
+
ActiveRecord::Base.logger = Logger.new(File.open(logfile, 'w'))
|
11
|
+
ActiveRecord::Base.establish_connection(ActiveRecord::Base.configurations.fetch(Rails.env))
|
12
|
+
|
13
|
+
require 'theman'
|
14
|
+
require 'rspec'
|
15
|
+
require 'rspec/autorun'
|
data/spec/theman_spec.rb
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Theman::Agency, "instance object" do
|
4
|
+
before do
|
5
|
+
@instance = ::Theman::Agency.new.instance
|
6
|
+
end
|
7
|
+
|
8
|
+
it "should superclass active record" do
|
9
|
+
@instance.superclass.should == ActiveRecord::Base
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should have connection" do
|
13
|
+
@instance.connection.class.should == ActiveRecord::ConnectionAdapters::PostgreSQLAdapter
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should have a table name" do
|
17
|
+
@instance.table_name.should match /c[0-9]{10}/
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should have an ispect method" do
|
21
|
+
@instance.inspect.should match /Agent/
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe Theman::Agency, "instance methods" do
|
26
|
+
it "should downcase and symbolize" do
|
27
|
+
Theman::Agency.new.symbolize("STRANGE NAME").should == :strange_name
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe Theman::Agency, "basic" do
|
32
|
+
before do
|
33
|
+
@csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_one.csv'))
|
34
|
+
@cabinet = ::Theman::Agency.new @csv
|
35
|
+
@instance = @cabinet.instance
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should have all the records from the csv" do
|
39
|
+
@instance.count.should == 4
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
describe Theman::Agency, "sed chomp" do
|
44
|
+
before do
|
45
|
+
@csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_two.csv'))
|
46
|
+
@cabinet = ::Theman::Agency.new @csv do |cabinet|
|
47
|
+
cabinet.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
|
48
|
+
end
|
49
|
+
@instance = @cabinet.instance
|
50
|
+
end
|
51
|
+
|
52
|
+
it "should have all the records from the csv" do
|
53
|
+
@instance.count.should == 5
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
describe Theman::Agency, "data types" do
|
58
|
+
before do
|
59
|
+
@csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_one.csv'))
|
60
|
+
@cabinet = ::Theman::Agency.new @csv do |cabinet|
|
61
|
+
cabinet.nulls /"N"/, /"UNKNOWN"/, /""/
|
62
|
+
cabinet.table do |t|
|
63
|
+
t.date :col_date
|
64
|
+
t.boolean :col_four
|
65
|
+
t.float :col_five
|
66
|
+
end
|
67
|
+
end
|
68
|
+
@instance = @cabinet.instance
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should create date col" do
|
72
|
+
@instance.first.col_date.class.should == Date
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should create boolean col" do
|
76
|
+
@instance.where(:col_four => true).count.should == 2
|
77
|
+
end
|
78
|
+
|
79
|
+
it "should create float col" do
|
80
|
+
@instance.where("col_five > 10.0").count.should == 2
|
81
|
+
end
|
82
|
+
|
83
|
+
it "should have an array of nulls" do
|
84
|
+
@cabinet.null_replacements.should == [/"N"/, /"UNKNOWN"/, /""/]
|
85
|
+
end
|
86
|
+
|
87
|
+
it "should have nulls not strings" do
|
88
|
+
@instance.where(:col_two => nil).count.should == 2
|
89
|
+
@instance.where(:col_three => nil).count.should == 2
|
90
|
+
end
|
91
|
+
end
|
data/theman.gemspec
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{theman}
|
8
|
+
s.version = "0.0.1"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Rufus Post"]
|
12
|
+
s.date = %q{2010-09-28}
|
13
|
+
s.description = %q{longer description of your gem}
|
14
|
+
s.email = %q{rufuspost@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"README.rdoc"
|
17
|
+
]
|
18
|
+
s.files = [
|
19
|
+
".document",
|
20
|
+
".gitignore",
|
21
|
+
"README.rdoc",
|
22
|
+
"Rakefile",
|
23
|
+
"VERSION",
|
24
|
+
"lib/theman.rb",
|
25
|
+
"lib/theman/themans_agency.rb",
|
26
|
+
"spec/fixtures/temp_one.csv",
|
27
|
+
"spec/fixtures/temp_two.csv",
|
28
|
+
"spec/spec_helper.rb",
|
29
|
+
"spec/theman_spec.rb",
|
30
|
+
"theman.gemspec"
|
31
|
+
]
|
32
|
+
s.homepage = %q{http://github.com/mynameisrufus/theman}
|
33
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
34
|
+
s.require_paths = ["lib"]
|
35
|
+
s.rubygems_version = %q{1.3.7}
|
36
|
+
s.summary = %q{PostgreSQL AR temporary table generator using PostgreSQL COPY}
|
37
|
+
s.test_files = [
|
38
|
+
"spec/theman_spec.rb",
|
39
|
+
"spec/spec_helper.rb"
|
40
|
+
]
|
41
|
+
|
42
|
+
if s.respond_to? :specification_version then
|
43
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
44
|
+
s.specification_version = 3
|
45
|
+
|
46
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
47
|
+
s.add_development_dependency(%q<rspec>, [">= 2.0.0.beta.20"])
|
48
|
+
else
|
49
|
+
s.add_dependency(%q<rspec>, [">= 2.0.0.beta.20"])
|
50
|
+
end
|
51
|
+
else
|
52
|
+
s.add_dependency(%q<rspec>, [">= 2.0.0.beta.20"])
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
metadata
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: theman
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Rufus Post
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-09-28 00:00:00 +10:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: rspec
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 2
|
30
|
+
- 0
|
31
|
+
- 0
|
32
|
+
- beta
|
33
|
+
- 20
|
34
|
+
version: 2.0.0.beta.20
|
35
|
+
type: :development
|
36
|
+
version_requirements: *id001
|
37
|
+
description: longer description of your gem
|
38
|
+
email: rufuspost@gmail.com
|
39
|
+
executables: []
|
40
|
+
|
41
|
+
extensions: []
|
42
|
+
|
43
|
+
extra_rdoc_files:
|
44
|
+
- README.rdoc
|
45
|
+
files:
|
46
|
+
- .document
|
47
|
+
- .gitignore
|
48
|
+
- README.rdoc
|
49
|
+
- Rakefile
|
50
|
+
- VERSION
|
51
|
+
- lib/theman.rb
|
52
|
+
- lib/theman/themans_agency.rb
|
53
|
+
- spec/fixtures/temp_one.csv
|
54
|
+
- spec/fixtures/temp_two.csv
|
55
|
+
- spec/spec_helper.rb
|
56
|
+
- spec/theman_spec.rb
|
57
|
+
- theman.gemspec
|
58
|
+
has_rdoc: true
|
59
|
+
homepage: http://github.com/mynameisrufus/theman
|
60
|
+
licenses: []
|
61
|
+
|
62
|
+
post_install_message:
|
63
|
+
rdoc_options:
|
64
|
+
- --charset=UTF-8
|
65
|
+
require_paths:
|
66
|
+
- lib
|
67
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
68
|
+
none: false
|
69
|
+
requirements:
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
segments:
|
73
|
+
- 0
|
74
|
+
version: "0"
|
75
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
76
|
+
none: false
|
77
|
+
requirements:
|
78
|
+
- - ">="
|
79
|
+
- !ruby/object:Gem::Version
|
80
|
+
segments:
|
81
|
+
- 0
|
82
|
+
version: "0"
|
83
|
+
requirements: []
|
84
|
+
|
85
|
+
rubyforge_project:
|
86
|
+
rubygems_version: 1.3.7
|
87
|
+
signing_key:
|
88
|
+
specification_version: 3
|
89
|
+
summary: PostgreSQL AR temporary table generator using PostgreSQL COPY
|
90
|
+
test_files:
|
91
|
+
- spec/theman_spec.rb
|
92
|
+
- spec/spec_helper.rb
|