theman 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +72 -9
- data/lib/theman/themans_agency.rb +17 -4
- data/lib/theman/version.rb +1 -1
- data/spec/fixtures/temp_five.csv +5 -0
- data/spec/fixtures/temp_four.csv +5 -0
- data/spec/fixtures/temp_three.csv +5 -0
- data/spec/theman_spec.rb +78 -0
- data/theman.gemspec +4 -0
- metadata +44 -2
data/README.rdoc
CHANGED
@@ -2,16 +2,26 @@
|
|
2
2
|
|
3
3
|
The man getting you down?
|
4
4
|
|
5
|
-
FasterCSV is great and all but when you get to
|
5
|
+
FasterCSV is great and all but when you get to 100MB files it takes a
|
6
|
+
while and you may only be looking for certain records that match some
|
7
|
+
criteria, enter Theman.
|
6
8
|
|
7
9
|
== Installation
|
8
10
|
|
11
|
+
=== Rails3
|
12
|
+
|
13
|
+
gem 'theman'
|
14
|
+
|
15
|
+
=== Rails2.x
|
16
|
+
|
9
17
|
config.gem 'theman'
|
10
18
|
|
11
19
|
Or
|
12
20
|
|
13
21
|
gem install 'theman'
|
14
22
|
|
23
|
+
Only needs active record and postgresql gem.
|
24
|
+
|
15
25
|
== Basic Usage
|
16
26
|
|
17
27
|
my_agent = ::Theman::Agency.new 'pretty.csv'
|
@@ -20,10 +30,10 @@ Or
|
|
20
30
|
|
21
31
|
== Advanced Usage
|
22
32
|
|
23
|
-
my_agent = ::Theman::Agency.new 'ugly.csv' do |
|
24
|
-
|
25
|
-
|
26
|
-
|
33
|
+
my_agent = ::Theman::Agency.new 'ugly.csv' do |smith|
|
34
|
+
smith.nulls /"N"/, /"UNKNOWN"/, /""/
|
35
|
+
smith.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
|
36
|
+
smith.table do |t|
|
27
37
|
t.date :date
|
28
38
|
t.integer :ext_id
|
29
39
|
t.float :amount
|
@@ -33,15 +43,68 @@ Or
|
|
33
43
|
temp_model = my_agent.instance
|
34
44
|
temp_model.where(:exited => true).count
|
35
45
|
|
36
|
-
In the above example we
|
46
|
+
In the above example we omitted the last 15 rows and made some things null.
|
47
|
+
|
48
|
+
If you do not provide a table block your columns will be VARCHAR(255), you
|
49
|
+
can cherry pick cols to change data types.
|
50
|
+
|
51
|
+
The temp table has no id column but you could add one after if you wanted.
|
52
|
+
|
53
|
+
If you want to call this procedural just don't pass in the path to the file
|
54
|
+
and Theman will not create a table in which case
|
55
|
+
you will need to call everything explicitly:
|
56
|
+
|
57
|
+
smith = ::Theman::Agency.new
|
58
|
+
smith.stream 'real_ugly.csv'
|
59
|
+
smith.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
|
60
|
+
smith.nulls /"XXXX"/
|
61
|
+
smith.date :date
|
62
|
+
|
63
|
+
smith.create_table
|
64
|
+
smith.pipe_it
|
65
|
+
|
66
|
+
== Dates
|
67
|
+
|
68
|
+
Ah dates, everybodys joy. Use datestyle to tell Theman to tell postgresql:
|
69
|
+
|
70
|
+
my_agent = ::Theman::Agency.new 'uber_foie_gras.csv' do |schmit|
|
71
|
+
schmit.datestyle 'European'
|
72
|
+
schmit.table do |t|
|
73
|
+
t.date :col_date
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
Refer to postgrsql docs for more info but here is some copy and paste:
|
78
|
+
|
79
|
+
ISO
|
80
|
+
|
81
|
+
* Use ISO 8601-style dates and times (YYYY-MM-DD HH:MM:SS). This is the default.
|
82
|
+
|
83
|
+
SQL
|
84
|
+
|
85
|
+
* Use Oracle/Ingres-style dates and times. Note that this style has nothing to do with SQL
|
86
|
+
(which mandates ISO 8601 style), the naming of this option is a historical accident.
|
87
|
+
|
88
|
+
PostgreSQL
|
89
|
+
|
90
|
+
* Use traditional PostgreSQL format.
|
91
|
+
|
92
|
+
German
|
93
|
+
|
94
|
+
dd.mm.yyyy
|
95
|
+
|
96
|
+
European
|
97
|
+
|
98
|
+
dd/mm/yyyy
|
37
99
|
|
38
|
-
|
100
|
+
US
|
39
101
|
|
40
|
-
|
102
|
+
mm/dd/yyyy
|
41
103
|
|
42
104
|
== Troubles
|
43
105
|
|
44
|
-
Table empty? the man has given you crappy data and PostgresSQL
|
106
|
+
Table empty? the man (the real life one) has given you crappy data and PostgresSQL
|
107
|
+
has silently dissed it.
|
45
108
|
|
46
109
|
== Copyright
|
47
110
|
|
@@ -2,8 +2,6 @@ module Theman
|
|
2
2
|
class Agency
|
3
3
|
attr_reader :instance, :column_names, :null_replacements, :sed_commands
|
4
4
|
|
5
|
-
attr_writer :stream
|
6
|
-
|
7
5
|
def initialize(stream = nil, parent = ::ActiveRecord::Base)
|
8
6
|
# source of the data
|
9
7
|
@stream = stream
|
@@ -70,12 +68,27 @@ module Theman
|
|
70
68
|
end
|
71
69
|
end
|
72
70
|
end
|
73
|
-
|
71
|
+
|
72
|
+
def stream(path)
|
73
|
+
@stream = path
|
74
|
+
end
|
75
|
+
|
76
|
+
def datestyle(local)
|
77
|
+
@psql_datestyle = local
|
78
|
+
end
|
79
|
+
|
80
|
+
def psql_command
|
81
|
+
psql = []
|
82
|
+
psql << "SET DATESTYLE TO #{@psql_datestyle}" unless @psql_datestyle.nil?
|
83
|
+
psql << "COPY #{instance.table_name} FROM STDIN WITH CSV HEADER"
|
84
|
+
psql.join("; ")
|
85
|
+
end
|
86
|
+
|
74
87
|
# use postgress COPY command using STDIN with CSV HEADER
|
75
88
|
# reads chunks of 8192 bytes to save memory
|
76
89
|
def pipe_it(l = "")
|
77
90
|
raw = instance.connection.raw_connection
|
78
|
-
raw.query
|
91
|
+
raw.query psql_command
|
79
92
|
command = "cat #{@stream} #{seds_join}"
|
80
93
|
f = IO.popen(command)
|
81
94
|
begin
|
data/lib/theman/version.rb
CHANGED
data/spec/theman_spec.rb
CHANGED
@@ -89,3 +89,81 @@ describe Theman::Agency, "data types" do
|
|
89
89
|
@instance.where(:col_three => nil).count.should == 2
|
90
90
|
end
|
91
91
|
end
|
92
|
+
|
93
|
+
describe Theman::Agency, "european date styles" do
|
94
|
+
before do
|
95
|
+
@csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_three.csv'))
|
96
|
+
@agent = ::Theman::Agency.new @csv do |smith|
|
97
|
+
smith.datestyle 'European'
|
98
|
+
smith.table do |t|
|
99
|
+
t.date :col_date
|
100
|
+
end
|
101
|
+
end
|
102
|
+
@instance = @agent.instance
|
103
|
+
end
|
104
|
+
|
105
|
+
it "should have correct date" do
|
106
|
+
date = @instance.first.col_date
|
107
|
+
date.day.should == 25
|
108
|
+
date.month.should == 12
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
describe Theman::Agency, "US date styles" do
|
113
|
+
before do
|
114
|
+
@csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_four.csv'))
|
115
|
+
@agent = ::Theman::Agency.new @csv do |smith|
|
116
|
+
smith.datestyle 'US'
|
117
|
+
smith.table do |t|
|
118
|
+
t.date :col_date
|
119
|
+
end
|
120
|
+
end
|
121
|
+
@instance = @agent.instance
|
122
|
+
end
|
123
|
+
|
124
|
+
it "should have correct date" do
|
125
|
+
date = @instance.first.col_date
|
126
|
+
date.day.should == 25
|
127
|
+
date.month.should == 12
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
describe Theman::Agency, "ISO date styles" do
|
132
|
+
before do
|
133
|
+
@csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_five.csv'))
|
134
|
+
@agent = ::Theman::Agency.new @csv do |smith|
|
135
|
+
smith.datestyle 'ISO'
|
136
|
+
smith.table do |t|
|
137
|
+
t.date :col_date
|
138
|
+
end
|
139
|
+
end
|
140
|
+
@instance = @agent.instance
|
141
|
+
end
|
142
|
+
|
143
|
+
it "should have correct date" do
|
144
|
+
date = @instance.first.col_date
|
145
|
+
date.day.should == 25
|
146
|
+
date.month.should == 12
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
describe Theman::Agency, "procedural" do
|
151
|
+
before do
|
152
|
+
@csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_two.csv'))
|
153
|
+
end
|
154
|
+
|
155
|
+
it "should be able to be called procedural" do
|
156
|
+
smith = ::Theman::Agency.new
|
157
|
+
smith.stream @csv
|
158
|
+
smith.datestyle "European"
|
159
|
+
smith.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
|
160
|
+
smith.nulls /"XXXX"/
|
161
|
+
smith.date :date
|
162
|
+
smith.create_table
|
163
|
+
smith.pipe_it
|
164
|
+
my_model = smith.instance
|
165
|
+
my_model.first.date.class.should == Date
|
166
|
+
my_model.first.org_code.class.should == NilClass
|
167
|
+
my_model.count.should == 5
|
168
|
+
end
|
169
|
+
end
|
data/theman.gemspec
CHANGED
@@ -17,6 +17,10 @@ Gem::Specification.new do |s|
|
|
17
17
|
s.add_development_dependency "bundler", ">= 1.0.0"
|
18
18
|
s.add_development_dependency "rspec", ">= 2.0.0"
|
19
19
|
s.add_development_dependency "activerecord", ">= 3.0.0"
|
20
|
+
s.add_development_dependency "pg"
|
21
|
+
|
22
|
+
s.add_runtime_dependency "activerecord"
|
23
|
+
s.add_runtime_dependency "pg"
|
20
24
|
|
21
25
|
s.files = `git ls-files`.split("\n")
|
22
26
|
s.executables = `git ls-files`.split("\n").map{|f| f =~ /^bin\/(.*)/ ? $1 : nil}.compact
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 4
|
9
|
+
version: 0.0.4
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Rufus Post
|
@@ -62,6 +62,45 @@ dependencies:
|
|
62
62
|
version: 3.0.0
|
63
63
|
type: :development
|
64
64
|
version_requirements: *id003
|
65
|
+
- !ruby/object:Gem::Dependency
|
66
|
+
name: pg
|
67
|
+
prerelease: false
|
68
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
69
|
+
none: false
|
70
|
+
requirements:
|
71
|
+
- - ">="
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
segments:
|
74
|
+
- 0
|
75
|
+
version: "0"
|
76
|
+
type: :development
|
77
|
+
version_requirements: *id004
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: activerecord
|
80
|
+
prerelease: false
|
81
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
82
|
+
none: false
|
83
|
+
requirements:
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
segments:
|
87
|
+
- 0
|
88
|
+
version: "0"
|
89
|
+
type: :runtime
|
90
|
+
version_requirements: *id005
|
91
|
+
- !ruby/object:Gem::Dependency
|
92
|
+
name: pg
|
93
|
+
prerelease: false
|
94
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
95
|
+
none: false
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
segments:
|
100
|
+
- 0
|
101
|
+
version: "0"
|
102
|
+
type: :runtime
|
103
|
+
version_requirements: *id006
|
65
104
|
description: FasterCSV is great and all but when you get to 100mb files it takes a while and you may only be looking for certain records that match some criteria, enter theman
|
66
105
|
email:
|
67
106
|
- rufuspost@gmail.com
|
@@ -79,7 +118,10 @@ files:
|
|
79
118
|
- lib/theman.rb
|
80
119
|
- lib/theman/themans_agency.rb
|
81
120
|
- lib/theman/version.rb
|
121
|
+
- spec/fixtures/temp_five.csv
|
122
|
+
- spec/fixtures/temp_four.csv
|
82
123
|
- spec/fixtures/temp_one.csv
|
124
|
+
- spec/fixtures/temp_three.csv
|
83
125
|
- spec/fixtures/temp_two.csv
|
84
126
|
- spec/spec_helper.rb
|
85
127
|
- spec/theman_spec.rb
|