theman 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -2,16 +2,26 @@
2
2
 
3
3
  The man getting you down?
4
4
 
5
- FasterCSV is great and all but when you get to 100mb files it takes a while and you may only be looking for certain records that match some criteria, enter theman.
5
+ FasterCSV is great and all but when you get to 100MB files it takes a
6
+ while and you may only be looking for certain records that match some
7
+ criteria, enter Theman.
6
8
 
7
9
  == Installation
8
10
 
11
+ === Rails3
12
+
13
+ gem 'theman'
14
+
15
+ === Rails2.x
16
+
9
17
  config.gem 'theman'
10
18
 
11
19
  Or
12
20
 
13
21
  gem install 'theman'
14
22
 
23
+ Only needs active record and postgresql gem.
24
+
15
25
  == Basic Usage
16
26
 
17
27
  my_agent = ::Theman::Agency.new 'pretty.csv'
@@ -20,10 +30,10 @@ Or
20
30
 
21
31
  == Advanced Usage
22
32
 
23
- my_agent = ::Theman::Agency.new 'ugly.csv' do |agent|
24
- agent.nulls /"N"/, /"UNKNOWN"/, /""/
25
- agent.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
26
- agent.table do |t|
33
+ my_agent = ::Theman::Agency.new 'ugly.csv' do |smith|
34
+ smith.nulls /"N"/, /"UNKNOWN"/, /""/
35
+ smith.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
36
+ smith.table do |t|
27
37
  t.date :date
28
38
  t.integer :ext_id
29
39
  t.float :amount
@@ -33,15 +43,68 @@ Or
33
43
  temp_model = my_agent.instance
34
44
  temp_model.where(:exited => true).count
35
45
 
36
- In the above example we ommitted the last 15 rows and made some things null.
46
+ In the above example we omitted the last 15 rows and made some things null.
47
+
48
+ If you do not provide a table block your columns will be VARCHAR(255), you
49
+ can cherry pick cols to change data types.
50
+
51
+ The temp table has no id column but you could add one after if you wanted.
52
+
53
+ If you want to call this procedural just don't pass in the path to the file
54
+ and Theman will not create a table in which case
55
+ you will need to call everything explicitly:
56
+
57
+ smith = ::Theman::Agency.new
58
+ smith.stream 'real_ugly.csv'
59
+ smith.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
60
+ smith.nulls /"XXXX"/
61
+ smith.date :date
62
+
63
+ smith.create_table
64
+ smith.pipe_it
65
+
66
+ == Dates
67
+
68
+ Ah dates, everybodys joy. Use datestyle to tell Theman to tell postgresql:
69
+
70
+ my_agent = ::Theman::Agency.new 'uber_foie_gras.csv' do |schmit|
71
+ schmit.datestyle 'European'
72
+ schmit.table do |t|
73
+ t.date :col_date
74
+ end
75
+ end
76
+
77
+ Refer to postgrsql docs for more info but here is some copy and paste:
78
+
79
+ ISO
80
+
81
+ * Use ISO 8601-style dates and times (YYYY-MM-DD HH:MM:SS). This is the default.
82
+
83
+ SQL
84
+
85
+ * Use Oracle/Ingres-style dates and times. Note that this style has nothing to do with SQL
86
+ (which mandates ISO 8601 style), the naming of this option is a historical accident.
87
+
88
+ PostgreSQL
89
+
90
+ * Use traditional PostgreSQL format.
91
+
92
+ German
93
+
94
+ dd.mm.yyyy
95
+
96
+ European
97
+
98
+ dd/mm/yyyy
37
99
 
38
- If you do not provide a table block your columns will be VARCHAR(255), you can cherry pick cols to change data types.
100
+ US
39
101
 
40
- The temp table has no id col but you could add one after if you wanted
102
+ mm/dd/yyyy
41
103
 
42
104
  == Troubles
43
105
 
44
- Table empty? the man has given you crappy data and PostgresSQL has silently dissed your data
106
+ Table empty? the man (the real life one) has given you crappy data and PostgresSQL
107
+ has silently dissed it.
45
108
 
46
109
  == Copyright
47
110
 
@@ -2,8 +2,6 @@ module Theman
2
2
  class Agency
3
3
  attr_reader :instance, :column_names, :null_replacements, :sed_commands
4
4
 
5
- attr_writer :stream
6
-
7
5
  def initialize(stream = nil, parent = ::ActiveRecord::Base)
8
6
  # source of the data
9
7
  @stream = stream
@@ -70,12 +68,27 @@ module Theman
70
68
  end
71
69
  end
72
70
  end
73
-
71
+
72
+ def stream(path)
73
+ @stream = path
74
+ end
75
+
76
+ def datestyle(local)
77
+ @psql_datestyle = local
78
+ end
79
+
80
+ def psql_command
81
+ psql = []
82
+ psql << "SET DATESTYLE TO #{@psql_datestyle}" unless @psql_datestyle.nil?
83
+ psql << "COPY #{instance.table_name} FROM STDIN WITH CSV HEADER"
84
+ psql.join("; ")
85
+ end
86
+
74
87
  # use postgress COPY command using STDIN with CSV HEADER
75
88
  # reads chunks of 8192 bytes to save memory
76
89
  def pipe_it(l = "")
77
90
  raw = instance.connection.raw_connection
78
- raw.query "COPY #{instance.table_name} FROM STDIN WITH CSV HEADER"
91
+ raw.query psql_command
79
92
  command = "cat #{@stream} #{seds_join}"
80
93
  f = IO.popen(command)
81
94
  begin
@@ -1,3 +1,3 @@
1
1
  module Theman
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
@@ -0,0 +1,5 @@
1
+ "COL DATE"
2
+ "2010-12-25"
3
+ "2010-11-19"
4
+ "2010-11-05"
5
+ "2010-11-05"
@@ -0,0 +1,5 @@
1
+ "COL DATE"
2
+ "12/25/2010"
3
+ "11/19/2010"
4
+ "05/11/2010"
5
+ "05/11/2010"
@@ -0,0 +1,5 @@
1
+ "COL DATE"
2
+ "25/12/2010"
3
+ "19/11/2010"
4
+ "05/11/2010"
5
+ "05/11/2010"
data/spec/theman_spec.rb CHANGED
@@ -89,3 +89,81 @@ describe Theman::Agency, "data types" do
89
89
  @instance.where(:col_three => nil).count.should == 2
90
90
  end
91
91
  end
92
+
93
+ describe Theman::Agency, "european date styles" do
94
+ before do
95
+ @csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_three.csv'))
96
+ @agent = ::Theman::Agency.new @csv do |smith|
97
+ smith.datestyle 'European'
98
+ smith.table do |t|
99
+ t.date :col_date
100
+ end
101
+ end
102
+ @instance = @agent.instance
103
+ end
104
+
105
+ it "should have correct date" do
106
+ date = @instance.first.col_date
107
+ date.day.should == 25
108
+ date.month.should == 12
109
+ end
110
+ end
111
+
112
+ describe Theman::Agency, "US date styles" do
113
+ before do
114
+ @csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_four.csv'))
115
+ @agent = ::Theman::Agency.new @csv do |smith|
116
+ smith.datestyle 'US'
117
+ smith.table do |t|
118
+ t.date :col_date
119
+ end
120
+ end
121
+ @instance = @agent.instance
122
+ end
123
+
124
+ it "should have correct date" do
125
+ date = @instance.first.col_date
126
+ date.day.should == 25
127
+ date.month.should == 12
128
+ end
129
+ end
130
+
131
+ describe Theman::Agency, "ISO date styles" do
132
+ before do
133
+ @csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_five.csv'))
134
+ @agent = ::Theman::Agency.new @csv do |smith|
135
+ smith.datestyle 'ISO'
136
+ smith.table do |t|
137
+ t.date :col_date
138
+ end
139
+ end
140
+ @instance = @agent.instance
141
+ end
142
+
143
+ it "should have correct date" do
144
+ date = @instance.first.col_date
145
+ date.day.should == 25
146
+ date.month.should == 12
147
+ end
148
+ end
149
+
150
+ describe Theman::Agency, "procedural" do
151
+ before do
152
+ @csv = File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'temp_two.csv'))
153
+ end
154
+
155
+ it "should be able to be called procedural" do
156
+ smith = ::Theman::Agency.new
157
+ smith.stream @csv
158
+ smith.datestyle "European"
159
+ smith.seds "-n -e :a -e '1,15!{P;N;D;};N;ba'"
160
+ smith.nulls /"XXXX"/
161
+ smith.date :date
162
+ smith.create_table
163
+ smith.pipe_it
164
+ my_model = smith.instance
165
+ my_model.first.date.class.should == Date
166
+ my_model.first.org_code.class.should == NilClass
167
+ my_model.count.should == 5
168
+ end
169
+ end
data/theman.gemspec CHANGED
@@ -17,6 +17,10 @@ Gem::Specification.new do |s|
17
17
  s.add_development_dependency "bundler", ">= 1.0.0"
18
18
  s.add_development_dependency "rspec", ">= 2.0.0"
19
19
  s.add_development_dependency "activerecord", ">= 3.0.0"
20
+ s.add_development_dependency "pg"
21
+
22
+ s.add_runtime_dependency "activerecord"
23
+ s.add_runtime_dependency "pg"
20
24
 
21
25
  s.files = `git ls-files`.split("\n")
22
26
  s.executables = `git ls-files`.split("\n").map{|f| f =~ /^bin\/(.*)/ ? $1 : nil}.compact
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 3
9
- version: 0.0.3
8
+ - 4
9
+ version: 0.0.4
10
10
  platform: ruby
11
11
  authors:
12
12
  - Rufus Post
@@ -62,6 +62,45 @@ dependencies:
62
62
  version: 3.0.0
63
63
  type: :development
64
64
  version_requirements: *id003
65
+ - !ruby/object:Gem::Dependency
66
+ name: pg
67
+ prerelease: false
68
+ requirement: &id004 !ruby/object:Gem::Requirement
69
+ none: false
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ segments:
74
+ - 0
75
+ version: "0"
76
+ type: :development
77
+ version_requirements: *id004
78
+ - !ruby/object:Gem::Dependency
79
+ name: activerecord
80
+ prerelease: false
81
+ requirement: &id005 !ruby/object:Gem::Requirement
82
+ none: false
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ segments:
87
+ - 0
88
+ version: "0"
89
+ type: :runtime
90
+ version_requirements: *id005
91
+ - !ruby/object:Gem::Dependency
92
+ name: pg
93
+ prerelease: false
94
+ requirement: &id006 !ruby/object:Gem::Requirement
95
+ none: false
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ segments:
100
+ - 0
101
+ version: "0"
102
+ type: :runtime
103
+ version_requirements: *id006
65
104
  description: FasterCSV is great and all but when you get to 100mb files it takes a while and you may only be looking for certain records that match some criteria, enter theman
66
105
  email:
67
106
  - rufuspost@gmail.com
@@ -79,7 +118,10 @@ files:
79
118
  - lib/theman.rb
80
119
  - lib/theman/themans_agency.rb
81
120
  - lib/theman/version.rb
121
+ - spec/fixtures/temp_five.csv
122
+ - spec/fixtures/temp_four.csv
82
123
  - spec/fixtures/temp_one.csv
124
+ - spec/fixtures/temp_three.csv
83
125
  - spec/fixtures/temp_two.csv
84
126
  - spec/spec_helper.rb
85
127
  - spec/theman_spec.rb