theman 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +72 -9
- data/lib/theman/themans_agency.rb +17 -4
- data/lib/theman/version.rb +1 -1
- data/spec/fixtures/temp_five.csv +5 -0
- data/spec/fixtures/temp_four.csv +5 -0
- data/spec/fixtures/temp_three.csv +5 -0
- data/spec/theman_spec.rb +78 -0
- data/theman.gemspec +4 -0
- metadata +44 -2
data/README.rdoc
CHANGED
@@ -2,16 +2,26 @@
|
|
2
2
|
|
3
3
|
The man getting you down?
|
4
4
|
|
5
|
-
FasterCSV is great and all but when you get to
|
5
|
+
FasterCSV is great and all but when you get to 100MB files it takes a
|
6
|
+
while and you may only be looking for certain records that match some
|
7
|
+
criteria, enter Theman.
|
6
8
|
|
7
9
|
== Installation
|
8
10
|
|
11
|
+
=== Rails3
|
12
|
+
|
13
|
+
gem 'theman'
|
14
|
+
|
15
|
+
=== Rails2.x
|
16
|
+
|
9
17
|
config.gem 'theman'
|
10
18
|
|
11
19
|
Or
|
12
20
|
|
13
21
|
gem install 'theman'
|
14
22
|
|
23
|
+
Only needs active record and postgresql gem.
|
24
|
+
|
15
25
|
== Basic Usage
|
16
26
|
|
17
27
|
my_agent = ::Theman::Agency.new 'pretty.csv'
|
@@ -20,10 +30,10 @@ Or
|
|
20
30
|
|
21
31
|
== Advanced Usage
|
22
32
|
|
23
|
-
my_agent = ::Theman::Agency.new 'ugly.csv' do |
|
24
|
-
|
25
|
-
|
26
|
-
|
33
|
+
my_agent = ::Theman::Agency.new 'ugly.csv' do |smith|
|
34
|
+
smith.nulls /"N"/, /"UNKNOWN"/, /""/
|
35
|
+
smith.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
|
36
|
+
smith.table do |t|
|
27
37
|
t.date :date
|
28
38
|
t.integer :ext_id
|
29
39
|
t.float :amount
|
@@ -33,15 +43,68 @@ Or
|
|
33
43
|
temp_model = my_agent.instance
|
34
44
|
temp_model.where(:exited => true).count
|
35
45
|
|
36
|
-
In the above example we
|
46
|
+
In the above example we omitted the last 15 rows and made some things null.
|
47
|
+
|
48
|
+
If you do not provide a table block your columns will be VARCHAR(255), you
|
49
|
+
can cherry pick cols to change data types.
|
50
|
+
|
51
|
+
The temp table has no id column but you could add one after if you wanted.
|
52
|
+
|
53
|
+
If you want to call this procedural just don't pass in the path to the file
|
54
|
+
and Theman will not create a table in which case
|
55
|
+
you will need to call everything explicitly:
|
56
|
+
|
57
|
+
smith = ::Theman::Agency.new
|
58
|
+
smith.stream 'real_ugly.csv'
|
59
|
+
smith.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
|
60
|
+
smith.nulls /"XXXX"/
|
61
|
+
smith.date :date
|
62
|
+
|
63
|
+
smith.create_table
|
64
|
+
smith.pipe_it
|
65
|
+
|
66
|
+
== Dates
|
67
|
+
|
68
|
+
Ah dates, everybodys joy. Use datestyle to tell Theman to tell postgresql:
|
69
|
+
|
70
|
+
my_agent = ::Theman::Agency.new 'uber_foie_gras.csv' do |schmit|
|
71
|
+
schmit.datestyle 'European'
|
72
|
+
schmit.table do |t|
|
73
|
+
t.date :col_date
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
Refer to postgrsql docs for more info but here is some copy and paste:
|
78
|
+
|
79
|
+
ISO
|
80
|
+
|
81
|
+
* Use ISO 8601-style dates and times (YYYY-MM-DD HH:MM:SS). This is the default.
|
82
|
+
|
83
|
+
SQL
|
84
|
+
|
85
|
+
* Use Oracle/Ingres-style dates and times. Note that this style has nothing to do with SQL
|
86
|
+
(which mandates ISO 8601 style), the naming of this option is a historical accident.
|
87
|
+
|
88
|
+
PostgreSQL
|
89
|
+
|
90
|
+
* Use traditional PostgreSQL format.
|
91
|
+
|
92
|
+
German
|
93
|
+
|
94
|
+
dd.mm.yyyy
|
95
|
+
|
96
|
+
European
|
97
|
+
|
98
|
+
dd/mm/yyyy
|
37
99
|
|
38
|
-
|
100
|
+
US
|
39
101
|
|
40
|
-
|
102
|
+
mm/dd/yyyy
|
41
103
|
|
42
104
|
== Troubles
|
43
105
|
|
44
|
-
Table empty? the man has given you crappy data and PostgresSQL
|
106
|
+
Table empty? the man (the real life one) has given you crappy data and PostgresSQL
|
107
|
+
has silently dissed it.
|
45
108
|
|
46
109
|
== Copyright
|
47
110
|
|
@@ -2,8 +2,6 @@ module Theman
|
|
2
2
|
class Agency
|
3
3
|
attr_reader :instance, :column_names, :null_replacements, :sed_commands
|
4
4
|
|
5
|
-
attr_writer :stream
|
6
|
-
|
7
5
|
def initialize(stream = nil, parent = ::ActiveRecord::Base)
|
8
6
|
# source of the data
|
9
7
|
@stream = stream
|
@@ -70,12 +68,27 @@ module Theman
|
|
70
68
|
end
|
71
69
|
end
|
72
70
|
end
|
73
|
-
|
71
|
+
|
72
|
+
def stream(path)
|
73
|
+
@stream = path
|
74
|
+
end
|
75
|
+
|
76
|
+
def datestyle(local)
|
77
|
+
@psql_datestyle = local
|
78
|
+
end
|
79
|
+
|
80
|
+
def psql_command
|
81
|
+
psql = []
|
82
|
+
psql << "SET DATESTYLE TO #{@psql_datestyle}" unless @psql_datestyle.nil?
|
83
|
+
psql << "COPY #{instance.table_name} FROM STDIN WITH CSV HEADER"
|
84
|
+
psql.join("; ")
|
85
|
+
end
|
86
|
+
|
74
87
|
# use postgress COPY command using STDIN with CSV HEADER
|
75
88
|
# reads chunks of 8192 bytes to save memory
|
76
89
|
def pipe_it(l = "")
|
77
90
|
raw = instance.connection.raw_connection
|
78
|
-
raw.query
|
91
|
+
raw.query psql_command
|
79
92
|
command = "cat #{@stream} #{seds_join}"
|
80
93
|
f = IO.popen(command)
|
81
94
|
begin
|
data/lib/theman/version.rb
CHANGED
data/spec/theman_spec.rb
CHANGED
@@ -89,3 +89,81 @@ describe Theman::Agency, "data types" do
|
|
89
89
|
@instance.where(:col_three => nil).count.should == 2
|
90
90
|
end
|
91
91
|
end
|
92
|
+
|
93
|
+
describe Theman::Agency, "european date styles" do
|
94
|
+
before do
|
95
|
+
@csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_three.csv'))
|
96
|
+
@agent = ::Theman::Agency.new @csv do |smith|
|
97
|
+
smith.datestyle 'European'
|
98
|
+
smith.table do |t|
|
99
|
+
t.date :col_date
|
100
|
+
end
|
101
|
+
end
|
102
|
+
@instance = @agent.instance
|
103
|
+
end
|
104
|
+
|
105
|
+
it "should have correct date" do
|
106
|
+
date = @instance.first.col_date
|
107
|
+
date.day.should == 25
|
108
|
+
date.month.should == 12
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
describe Theman::Agency, "US date styles" do
|
113
|
+
before do
|
114
|
+
@csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_four.csv'))
|
115
|
+
@agent = ::Theman::Agency.new @csv do |smith|
|
116
|
+
smith.datestyle 'US'
|
117
|
+
smith.table do |t|
|
118
|
+
t.date :col_date
|
119
|
+
end
|
120
|
+
end
|
121
|
+
@instance = @agent.instance
|
122
|
+
end
|
123
|
+
|
124
|
+
it "should have correct date" do
|
125
|
+
date = @instance.first.col_date
|
126
|
+
date.day.should == 25
|
127
|
+
date.month.should == 12
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
describe Theman::Agency, "ISO date styles" do
|
132
|
+
before do
|
133
|
+
@csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_five.csv'))
|
134
|
+
@agent = ::Theman::Agency.new @csv do |smith|
|
135
|
+
smith.datestyle 'ISO'
|
136
|
+
smith.table do |t|
|
137
|
+
t.date :col_date
|
138
|
+
end
|
139
|
+
end
|
140
|
+
@instance = @agent.instance
|
141
|
+
end
|
142
|
+
|
143
|
+
it "should have correct date" do
|
144
|
+
date = @instance.first.col_date
|
145
|
+
date.day.should == 25
|
146
|
+
date.month.should == 12
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
describe Theman::Agency, "procedural" do
|
151
|
+
before do
|
152
|
+
@csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_two.csv'))
|
153
|
+
end
|
154
|
+
|
155
|
+
it "should be able to be called procedural" do
|
156
|
+
smith = ::Theman::Agency.new
|
157
|
+
smith.stream @csv
|
158
|
+
smith.datestyle "European"
|
159
|
+
smith.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
|
160
|
+
smith.nulls /"XXXX"/
|
161
|
+
smith.date :date
|
162
|
+
smith.create_table
|
163
|
+
smith.pipe_it
|
164
|
+
my_model = smith.instance
|
165
|
+
my_model.first.date.class.should == Date
|
166
|
+
my_model.first.org_code.class.should == NilClass
|
167
|
+
my_model.count.should == 5
|
168
|
+
end
|
169
|
+
end
|
data/theman.gemspec
CHANGED
@@ -17,6 +17,10 @@ Gem::Specification.new do |s|
|
|
17
17
|
s.add_development_dependency "bundler", ">= 1.0.0"
|
18
18
|
s.add_development_dependency "rspec", ">= 2.0.0"
|
19
19
|
s.add_development_dependency "activerecord", ">= 3.0.0"
|
20
|
+
s.add_development_dependency "pg"
|
21
|
+
|
22
|
+
s.add_runtime_dependency "activerecord"
|
23
|
+
s.add_runtime_dependency "pg"
|
20
24
|
|
21
25
|
s.files = `git ls-files`.split("\n")
|
22
26
|
s.executables = `git ls-files`.split("\n").map{|f| f =~ /^bin\/(.*)/ ? $1 : nil}.compact
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 4
|
9
|
+
version: 0.0.4
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Rufus Post
|
@@ -62,6 +62,45 @@ dependencies:
|
|
62
62
|
version: 3.0.0
|
63
63
|
type: :development
|
64
64
|
version_requirements: *id003
|
65
|
+
- !ruby/object:Gem::Dependency
|
66
|
+
name: pg
|
67
|
+
prerelease: false
|
68
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
69
|
+
none: false
|
70
|
+
requirements:
|
71
|
+
- - ">="
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
segments:
|
74
|
+
- 0
|
75
|
+
version: "0"
|
76
|
+
type: :development
|
77
|
+
version_requirements: *id004
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: activerecord
|
80
|
+
prerelease: false
|
81
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
82
|
+
none: false
|
83
|
+
requirements:
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
segments:
|
87
|
+
- 0
|
88
|
+
version: "0"
|
89
|
+
type: :runtime
|
90
|
+
version_requirements: *id005
|
91
|
+
- !ruby/object:Gem::Dependency
|
92
|
+
name: pg
|
93
|
+
prerelease: false
|
94
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
95
|
+
none: false
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
segments:
|
100
|
+
- 0
|
101
|
+
version: "0"
|
102
|
+
type: :runtime
|
103
|
+
version_requirements: *id006
|
65
104
|
description: FasterCSV is great and all but when you get to 100mb files it takes a while and you may only be looking for certain records that match some criteria, enter theman
|
66
105
|
email:
|
67
106
|
- rufuspost@gmail.com
|
@@ -79,7 +118,10 @@ files:
|
|
79
118
|
- lib/theman.rb
|
80
119
|
- lib/theman/themans_agency.rb
|
81
120
|
- lib/theman/version.rb
|
121
|
+
- spec/fixtures/temp_five.csv
|
122
|
+
- spec/fixtures/temp_four.csv
|
82
123
|
- spec/fixtures/temp_one.csv
|
124
|
+
- spec/fixtures/temp_three.csv
|
83
125
|
- spec/fixtures/temp_two.csv
|
84
126
|
- spec/spec_helper.rb
|
85
127
|
- spec/theman_spec.rb
|