theman 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -2,16 +2,26 @@
2
2
 
3
3
  The man getting you down?
4
4
 
5
- FasterCSV is great and all but when you get to 100mb files it takes a while and you may only be looking for certain records that match some criteria, enter theman.
5
+ FasterCSV is great and all but when you get to 100MB files it takes a
6
+ while and you may only be looking for certain records that match some
7
+ criteria, enter Theman.
6
8
 
7
9
  == Installation
8
10
 
11
+ === Rails3
12
+
13
+ gem 'theman'
14
+
15
+ === Rails2.x
16
+
9
17
  config.gem 'theman'
10
18
 
11
19
  Or
12
20
 
13
21
  gem install 'theman'
14
22
 
23
+ Only needs active record and postgresql gem.
24
+
15
25
  == Basic Usage
16
26
 
17
27
  my_agent = ::Theman::Agency.new 'pretty.csv'
@@ -20,10 +30,10 @@ Or
20
30
 
21
31
  == Advanced Usage
22
32
 
23
- my_agent = ::Theman::Agency.new 'ugly.csv' do |agent|
24
- agent.nulls /"N"/, /"UNKNOWN"/, /""/
25
- agent.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
26
- agent.table do |t|
33
+ my_agent = ::Theman::Agency.new 'ugly.csv' do |smith|
34
+ smith.nulls /"N"/, /"UNKNOWN"/, /""/
35
+ smith.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
36
+ smith.table do |t|
27
37
  t.date :date
28
38
  t.integer :ext_id
29
39
  t.float :amount
@@ -33,15 +43,68 @@ Or
33
43
  temp_model = my_agent.instance
34
44
  temp_model.where(:exited => true).count
35
45
 
36
- In the above example we ommitted the last 15 rows and made some things null.
46
+ In the above example we omitted the last 15 rows and made some things null.
47
+
48
+ If you do not provide a table block your columns will be VARCHAR(255), you
49
+ can cherry pick cols to change data types.
50
+
51
+ The temp table has no id column but you could add one after if you wanted.
52
+
53
+ If you want to call this procedural just don't pass in the path to the file
54
+ and Theman will not create a table in which case
55
+ you will need to call everything explicitly:
56
+
57
+ smith = ::Theman::Agency.new
58
+ smith.stream 'real_ugly.csv'
59
+ smith.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
60
+ smith.nulls /"XXXX"/
61
+ smith.date :date
62
+
63
+ smith.create_table
64
+ smith.pipe_it
65
+
66
+ == Dates
67
+
68
+ Ah dates, everybodys joy. Use datestyle to tell Theman to tell postgresql:
69
+
70
+ my_agent = ::Theman::Agency.new 'uber_foie_gras.csv' do |schmit|
71
+ schmit.datestyle 'European'
72
+ schmit.table do |t|
73
+ t.date :col_date
74
+ end
75
+ end
76
+
77
+ Refer to postgrsql docs for more info but here is some copy and paste:
78
+
79
+ ISO
80
+
81
+ * Use ISO 8601-style dates and times (YYYY-MM-DD HH:MM:SS). This is the default.
82
+
83
+ SQL
84
+
85
+ * Use Oracle/Ingres-style dates and times. Note that this style has nothing to do with SQL
86
+ (which mandates ISO 8601 style), the naming of this option is a historical accident.
87
+
88
+ PostgreSQL
89
+
90
+ * Use traditional PostgreSQL format.
91
+
92
+ German
93
+
94
+ dd.mm.yyyy
95
+
96
+ European
97
+
98
+ dd/mm/yyyy
37
99
 
38
- If you do not provide a table block your columns will be VARCHAR(255), you can cherry pick cols to change data types.
100
+ US
39
101
 
40
- The temp table has no id col but you could add one after if you wanted
102
+ mm/dd/yyyy
41
103
 
42
104
  == Troubles
43
105
 
44
- Table empty? the man has given you crappy data and PostgresSQL has silently dissed your data
106
+ Table empty? the man (the real life one) has given you crappy data and PostgresSQL
107
+ has silently dissed it.
45
108
 
46
109
  == Copyright
47
110
 
@@ -2,8 +2,6 @@ module Theman
2
2
  class Agency
3
3
  attr_reader :instance, :column_names, :null_replacements, :sed_commands
4
4
 
5
- attr_writer :stream
6
-
7
5
  def initialize(stream = nil, parent = ::ActiveRecord::Base)
8
6
  # source of the data
9
7
  @stream = stream
@@ -70,12 +68,27 @@ module Theman
70
68
  end
71
69
  end
72
70
  end
73
-
71
+
72
+ def stream(path)
73
+ @stream = path
74
+ end
75
+
76
+ def datestyle(local)
77
+ @psql_datestyle = local
78
+ end
79
+
80
+ def psql_command
81
+ psql = []
82
+ psql << "SET DATESTYLE TO #{@psql_datestyle}" unless @psql_datestyle.nil?
83
+ psql << "COPY #{instance.table_name} FROM STDIN WITH CSV HEADER"
84
+ psql.join("; ")
85
+ end
86
+
74
87
  # use postgress COPY command using STDIN with CSV HEADER
75
88
  # reads chunks of 8192 bytes to save memory
76
89
  def pipe_it(l = "")
77
90
  raw = instance.connection.raw_connection
78
- raw.query "COPY #{instance.table_name} FROM STDIN WITH CSV HEADER"
91
+ raw.query psql_command
79
92
  command = "cat #{@stream} #{seds_join}"
80
93
  f = IO.popen(command)
81
94
  begin
@@ -1,3 +1,3 @@
1
1
  module Theman
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
@@ -0,0 +1,5 @@
1
+ "COL DATE"
2
+ "2010-12-25"
3
+ "2010-11-19"
4
+ "2010-11-05"
5
+ "2010-11-05"
@@ -0,0 +1,5 @@
1
+ "COL DATE"
2
+ "12/25/2010"
3
+ "11/19/2010"
4
+ "05/11/2010"
5
+ "05/11/2010"
@@ -0,0 +1,5 @@
1
+ "COL DATE"
2
+ "25/12/2010"
3
+ "19/11/2010"
4
+ "05/11/2010"
5
+ "05/11/2010"
data/spec/theman_spec.rb CHANGED
@@ -89,3 +89,81 @@ describe Theman::Agency, "data types" do
89
89
  @instance.where(:col_three => nil).count.should == 2
90
90
  end
91
91
  end
92
+
93
+ describe Theman::Agency, "european date styles" do
94
+ before do
95
+ @csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_three.csv'))
96
+ @agent = ::Theman::Agency.new @csv do |smith|
97
+ smith.datestyle 'European'
98
+ smith.table do |t|
99
+ t.date :col_date
100
+ end
101
+ end
102
+ @instance = @agent.instance
103
+ end
104
+
105
+ it "should have correct date" do
106
+ date = @instance.first.col_date
107
+ date.day.should == 25
108
+ date.month.should == 12
109
+ end
110
+ end
111
+
112
+ describe Theman::Agency, "US date styles" do
113
+ before do
114
+ @csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_four.csv'))
115
+ @agent = ::Theman::Agency.new @csv do |smith|
116
+ smith.datestyle 'US'
117
+ smith.table do |t|
118
+ t.date :col_date
119
+ end
120
+ end
121
+ @instance = @agent.instance
122
+ end
123
+
124
+ it "should have correct date" do
125
+ date = @instance.first.col_date
126
+ date.day.should == 25
127
+ date.month.should == 12
128
+ end
129
+ end
130
+
131
+ describe Theman::Agency, "ISO date styles" do
132
+ before do
133
+ @csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_five.csv'))
134
+ @agent = ::Theman::Agency.new @csv do |smith|
135
+ smith.datestyle 'ISO'
136
+ smith.table do |t|
137
+ t.date :col_date
138
+ end
139
+ end
140
+ @instance = @agent.instance
141
+ end
142
+
143
+ it "should have correct date" do
144
+ date = @instance.first.col_date
145
+ date.day.should == 25
146
+ date.month.should == 12
147
+ end
148
+ end
149
+
150
+ describe Theman::Agency, "procedural" do
151
+ before do
152
+ @csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_two.csv'))
153
+ end
154
+
155
+ it "should be able to be called procedural" do
156
+ smith = ::Theman::Agency.new
157
+ smith.stream @csv
158
+ smith.datestyle "European"
159
+ smith.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
160
+ smith.nulls /"XXXX"/
161
+ smith.date :date
162
+ smith.create_table
163
+ smith.pipe_it
164
+ my_model = smith.instance
165
+ my_model.first.date.class.should == Date
166
+ my_model.first.org_code.class.should == NilClass
167
+ my_model.count.should == 5
168
+ end
169
+ end
data/theman.gemspec CHANGED
@@ -17,6 +17,10 @@ Gem::Specification.new do |s|
17
17
  s.add_development_dependency "bundler", ">= 1.0.0"
18
18
  s.add_development_dependency "rspec", ">= 2.0.0"
19
19
  s.add_development_dependency "activerecord", ">= 3.0.0"
20
+ s.add_development_dependency "pg"
21
+
22
+ s.add_runtime_dependency "activerecord"
23
+ s.add_runtime_dependency "pg"
20
24
 
21
25
  s.files = `git ls-files`.split("\n")
22
26
  s.executables = `git ls-files`.split("\n").map{|f| f =~ /^bin\/(.*)/ ? $1 : nil}.compact
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 3
9
- version: 0.0.3
8
+ - 4
9
+ version: 0.0.4
10
10
  platform: ruby
11
11
  authors:
12
12
  - Rufus Post
@@ -62,6 +62,45 @@ dependencies:
62
62
  version: 3.0.0
63
63
  type: :development
64
64
  version_requirements: *id003
65
+ - !ruby/object:Gem::Dependency
66
+ name: pg
67
+ prerelease: false
68
+ requirement: &id004 !ruby/object:Gem::Requirement
69
+ none: false
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ segments:
74
+ - 0
75
+ version: "0"
76
+ type: :development
77
+ version_requirements: *id004
78
+ - !ruby/object:Gem::Dependency
79
+ name: activerecord
80
+ prerelease: false
81
+ requirement: &id005 !ruby/object:Gem::Requirement
82
+ none: false
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ segments:
87
+ - 0
88
+ version: "0"
89
+ type: :runtime
90
+ version_requirements: *id005
91
+ - !ruby/object:Gem::Dependency
92
+ name: pg
93
+ prerelease: false
94
+ requirement: &id006 !ruby/object:Gem::Requirement
95
+ none: false
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ segments:
100
+ - 0
101
+ version: "0"
102
+ type: :runtime
103
+ version_requirements: *id006
65
104
  description: FasterCSV is great and all but when you get to 100mb files it takes a while and you may only be looking for certain records that match some criteria, enter theman
66
105
  email:
67
106
  - rufuspost@gmail.com
@@ -79,7 +118,10 @@ files:
79
118
  - lib/theman.rb
80
119
  - lib/theman/themans_agency.rb
81
120
  - lib/theman/version.rb
121
+ - spec/fixtures/temp_five.csv
122
+ - spec/fixtures/temp_four.csv
82
123
  - spec/fixtures/temp_one.csv
124
+ - spec/fixtures/temp_three.csv
83
125
  - spec/fixtures/temp_two.csv
84
126
  - spec/spec_helper.rb
85
127
  - spec/theman_spec.rb