chicago-etl 0.0.13 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. data/Gemfile +8 -3
  2. data/README.rdoc +4 -1
  3. data/VERSION +1 -1
  4. data/chicago-etl.gemspec +59 -22
  5. data/chicago-flow.gemspec +92 -0
  6. data/lib/chicago/etl/batch.rb +9 -2
  7. data/lib/chicago/etl/core_extensions.rb +12 -0
  8. data/lib/chicago/etl/counter.rb +8 -1
  9. data/lib/chicago/etl/dataset_batch_stage.rb +52 -0
  10. data/lib/chicago/etl/key_builder.rb +17 -39
  11. data/lib/chicago/etl/load_dataset_builder.rb +3 -1
  12. data/lib/chicago/etl/load_pipeline_stage_builder.rb +142 -0
  13. data/lib/chicago/etl/pipeline.rb +151 -0
  14. data/lib/chicago/etl/schema_table_sink_factory.rb +74 -0
  15. data/lib/chicago/etl/screens/column_screen.rb +26 -25
  16. data/lib/chicago/etl/screens/invalid_element.rb +5 -5
  17. data/lib/chicago/etl/screens/missing_value.rb +4 -2
  18. data/lib/chicago/etl/screens/out_of_bounds.rb +2 -0
  19. data/lib/chicago/etl/table_builder.rb +4 -2
  20. data/lib/chicago/etl/task_invocation.rb +0 -1
  21. data/lib/chicago/etl/transformations.rb +128 -0
  22. data/lib/chicago/etl.rb +39 -8
  23. data/lib/chicago/flow/array_sink.rb +35 -0
  24. data/lib/chicago/flow/array_source.rb +15 -0
  25. data/lib/chicago/flow/dataset_source.rb +23 -0
  26. data/lib/chicago/flow/errors.rb +14 -0
  27. data/lib/chicago/flow/filter.rb +15 -0
  28. data/lib/chicago/flow/mysql.rb +4 -0
  29. data/lib/chicago/{etl/mysql_load_file_value_transformer.rb → flow/mysql_file_serializer.rb} +7 -4
  30. data/lib/chicago/flow/mysql_file_sink.rb +68 -0
  31. data/lib/chicago/flow/null_sink.rb +8 -0
  32. data/lib/chicago/flow/pipeline_endpoint.rb +15 -0
  33. data/lib/chicago/flow/pipeline_stage.rb +68 -0
  34. data/lib/chicago/flow/sink.rb +53 -0
  35. data/lib/chicago/flow/transformation.rb +169 -0
  36. data/lib/chicago/flow/transformation_chain.rb +40 -0
  37. data/spec/etl/batch_spec.rb +2 -1
  38. data/spec/etl/core_extensions_spec.rb +13 -0
  39. data/spec/etl/dataset_batch_stage_spec.rb +55 -0
  40. data/spec/etl/key_builder_spec.rb +25 -83
  41. data/spec/etl/pipeline_stage_builder_spec.rb +39 -0
  42. data/spec/etl/schema_table_sink_factory_spec.rb +69 -0
  43. data/spec/etl/screens/invalid_element_spec.rb +10 -11
  44. data/spec/etl/screens/missing_value_spec.rb +21 -21
  45. data/spec/etl/screens/out_of_bounds_spec.rb +21 -29
  46. data/spec/etl/transformations_spec.rb +109 -0
  47. data/spec/flow/array_sink_spec.rb +26 -0
  48. data/spec/flow/array_source_spec.rb +20 -0
  49. data/spec/flow/dataset_source_spec.rb +15 -0
  50. data/spec/flow/filter_spec.rb +13 -0
  51. data/spec/flow/mysql_file_serializer_spec.rb +27 -0
  52. data/spec/flow/mysql_file_sink_spec.rb +94 -0
  53. data/spec/flow/mysql_integration_spec.rb +72 -0
  54. data/spec/flow/pipeline_stage_spec.rb +89 -0
  55. data/spec/flow/transformation_chain_spec.rb +76 -0
  56. data/spec/flow/transformation_spec.rb +91 -0
  57. data/spec/spec_helper.rb +5 -0
  58. metadata +135 -39
  59. data/lib/chicago/etl/buffering_insert_writer.rb +0 -36
  60. data/lib/chicago/etl/mysql_dumpfile.rb +0 -32
  61. data/lib/chicago/etl/screens/composite_screen.rb +0 -17
  62. data/lib/chicago/etl/sequel/load_data_infile.rb +0 -141
  63. data/lib/chicago/etl/sink.rb +0 -61
  64. data/lib/chicago/etl/transformations/add_insert_timestamp.rb +0 -16
  65. data/spec/etl/mysql_dumpfile_spec.rb +0 -42
  66. data/spec/etl/mysql_load_file_value_transformer_spec.rb +0 -27
  67. data/spec/etl/screens/composite_screen_spec.rb +0 -25
  68. data/spec/etl/sequel/load_data_infile_expression_spec.rb +0 -60
  69. data/spec/etl/sequel/load_data_infile_spec.rb +0 -37
  70. data/spec/etl/sink_spec.rb +0 -7
  71. data/spec/etl/transformations/add_insert_timestamp_spec.rb +0 -9
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chicago-etl
3
3
  version: !ruby/object:Gem::Version
4
- hash: 5
4
+ hash: 27
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
+ - 1
8
9
  - 0
9
- - 13
10
- version: 0.0.13
10
+ version: 0.1.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Roland Swingler
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2013-04-16 00:00:00 Z
18
+ date: 2013-09-05 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  version_requirements: &id001 !ruby/object:Gem::Requirement
@@ -36,16 +36,16 @@ dependencies:
36
36
  version_requirements: &id002 !ruby/object:Gem::Requirement
37
37
  none: false
38
38
  requirements:
39
- - - ~>
39
+ - - ">="
40
40
  - !ruby/object:Gem::Version
41
- hash: 7
41
+ hash: 3
42
42
  segments:
43
- - 2
44
- version: "2"
43
+ - 0
44
+ version: "0"
45
45
  requirement: *id002
46
- type: :development
46
+ type: :runtime
47
47
  prerelease: false
48
- name: rspec
48
+ name: fastercsv
49
49
  - !ruby/object:Gem::Dependency
50
50
  version_requirements: &id003 !ruby/object:Gem::Requirement
51
51
  none: false
@@ -57,23 +57,25 @@ dependencies:
57
57
  - 0
58
58
  version: "0"
59
59
  requirement: *id003
60
- type: :development
60
+ type: :runtime
61
61
  prerelease: false
62
- name: timecop
62
+ name: sequel
63
63
  - !ruby/object:Gem::Dependency
64
64
  version_requirements: &id004 !ruby/object:Gem::Requirement
65
65
  none: false
66
66
  requirements:
67
67
  - - ">="
68
68
  - !ruby/object:Gem::Version
69
- hash: 3
69
+ hash: 27
70
70
  segments:
71
71
  - 0
72
- version: "0"
72
+ - 0
73
+ - 2
74
+ version: 0.0.2
73
75
  requirement: *id004
74
- type: :development
76
+ type: :runtime
75
77
  prerelease: false
76
- name: yard
78
+ name: sequel_load_data_infile
77
79
  - !ruby/object:Gem::Dependency
78
80
  version_requirements: &id005 !ruby/object:Gem::Requirement
79
81
  none: false
@@ -85,23 +87,23 @@ dependencies:
85
87
  - 0
86
88
  version: "0"
87
89
  requirement: *id005
88
- type: :development
90
+ type: :runtime
89
91
  prerelease: false
90
- name: flog
92
+ name: sequel_fast_columns
91
93
  - !ruby/object:Gem::Dependency
92
94
  version_requirements: &id006 !ruby/object:Gem::Requirement
93
95
  none: false
94
96
  requirements:
95
- - - ">="
97
+ - - ~>
96
98
  - !ruby/object:Gem::Version
97
- hash: 3
99
+ hash: 7
98
100
  segments:
99
- - 0
100
- version: "0"
101
+ - 2
102
+ version: "2"
101
103
  requirement: *id006
102
104
  type: :development
103
105
  prerelease: false
104
- name: jeweler
106
+ name: rspec
105
107
  - !ruby/object:Gem::Dependency
106
108
  version_requirements: &id007 !ruby/object:Gem::Requirement
107
109
  none: false
@@ -115,7 +117,7 @@ dependencies:
115
117
  requirement: *id007
116
118
  type: :development
117
119
  prerelease: false
118
- name: rcov
120
+ name: timecop
119
121
  - !ruby/object:Gem::Dependency
120
122
  version_requirements: &id008 !ruby/object:Gem::Requirement
121
123
  none: false
@@ -129,7 +131,7 @@ dependencies:
129
131
  requirement: *id008
130
132
  type: :development
131
133
  prerelease: false
132
- name: simplecov
134
+ name: yard
133
135
  - !ruby/object:Gem::Dependency
134
136
  version_requirements: &id009 !ruby/object:Gem::Requirement
135
137
  none: false
@@ -143,7 +145,79 @@ dependencies:
143
145
  requirement: *id009
144
146
  type: :development
145
147
  prerelease: false
148
+ name: flog
149
+ - !ruby/object:Gem::Dependency
150
+ version_requirements: &id010 !ruby/object:Gem::Requirement
151
+ none: false
152
+ requirements:
153
+ - - ">="
154
+ - !ruby/object:Gem::Version
155
+ hash: 3
156
+ segments:
157
+ - 0
158
+ version: "0"
159
+ requirement: *id010
160
+ type: :development
161
+ prerelease: false
162
+ name: simplecov
163
+ - !ruby/object:Gem::Dependency
164
+ version_requirements: &id011 !ruby/object:Gem::Requirement
165
+ none: false
166
+ requirements:
167
+ - - ">="
168
+ - !ruby/object:Gem::Version
169
+ hash: 3
170
+ segments:
171
+ - 0
172
+ version: "0"
173
+ requirement: *id011
174
+ type: :development
175
+ prerelease: false
146
176
  name: ZenTest
177
+ - !ruby/object:Gem::Dependency
178
+ version_requirements: &id012 !ruby/object:Gem::Requirement
179
+ none: false
180
+ requirements:
181
+ - - "="
182
+ - !ruby/object:Gem::Version
183
+ hash: 45
184
+ segments:
185
+ - 2
186
+ - 8
187
+ - 1
188
+ version: 2.8.1
189
+ requirement: *id012
190
+ type: :development
191
+ prerelease: false
192
+ name: mysql
193
+ - !ruby/object:Gem::Dependency
194
+ version_requirements: &id013 !ruby/object:Gem::Requirement
195
+ none: false
196
+ requirements:
197
+ - - ~>
198
+ - !ruby/object:Gem::Version
199
+ hash: 1
200
+ segments:
201
+ - 1
202
+ version: "1"
203
+ requirement: *id013
204
+ type: :development
205
+ prerelease: false
206
+ name: bundler
207
+ - !ruby/object:Gem::Dependency
208
+ version_requirements: &id014 !ruby/object:Gem::Requirement
209
+ none: false
210
+ requirements:
211
+ - - ">="
212
+ - !ruby/object:Gem::Version
213
+ hash: 3
214
+ segments:
215
+ - 0
216
+ version: "0"
217
+ requirement: *id014
218
+ type: :development
219
+ prerelease: false
220
+ name: jeweler
147
221
  description: ETL tools for Chicago
148
222
  email: roland.swingler@gmail.com
149
223
  executables: []
@@ -162,52 +236,74 @@ files:
162
236
  - Rakefile
163
237
  - VERSION
164
238
  - chicago-etl.gemspec
239
+ - chicago-flow.gemspec
165
240
  - lib/chicago-etl.rb
166
241
  - lib/chicago/etl.rb
167
242
  - lib/chicago/etl/batch.rb
168
- - lib/chicago/etl/buffering_insert_writer.rb
243
+ - lib/chicago/etl/core_extensions.rb
169
244
  - lib/chicago/etl/counter.rb
245
+ - lib/chicago/etl/dataset_batch_stage.rb
170
246
  - lib/chicago/etl/key_builder.rb
171
247
  - lib/chicago/etl/load_dataset_builder.rb
172
- - lib/chicago/etl/mysql_dumpfile.rb
173
- - lib/chicago/etl/mysql_load_file_value_transformer.rb
248
+ - lib/chicago/etl/load_pipeline_stage_builder.rb
249
+ - lib/chicago/etl/pipeline.rb
250
+ - lib/chicago/etl/schema_table_sink_factory.rb
174
251
  - lib/chicago/etl/screens/column_screen.rb
175
- - lib/chicago/etl/screens/composite_screen.rb
176
252
  - lib/chicago/etl/screens/invalid_element.rb
177
253
  - lib/chicago/etl/screens/missing_value.rb
178
254
  - lib/chicago/etl/screens/out_of_bounds.rb
179
255
  - lib/chicago/etl/sequel/dependant_tables.rb
180
256
  - lib/chicago/etl/sequel/filter_to_etl_batch.rb
181
- - lib/chicago/etl/sequel/load_data_infile.rb
182
- - lib/chicago/etl/sink.rb
183
257
  - lib/chicago/etl/table_builder.rb
184
258
  - lib/chicago/etl/task_invocation.rb
185
259
  - lib/chicago/etl/tasks.rb
186
- - lib/chicago/etl/transformations/add_insert_timestamp.rb
260
+ - lib/chicago/etl/transformations.rb
187
261
  - lib/chicago/etl/transformations/uk_post_code.rb
188
262
  - lib/chicago/etl/transformations/uk_post_code_field.rb
263
+ - lib/chicago/flow/array_sink.rb
264
+ - lib/chicago/flow/array_source.rb
265
+ - lib/chicago/flow/dataset_source.rb
266
+ - lib/chicago/flow/errors.rb
267
+ - lib/chicago/flow/filter.rb
268
+ - lib/chicago/flow/mysql.rb
269
+ - lib/chicago/flow/mysql_file_serializer.rb
270
+ - lib/chicago/flow/mysql_file_sink.rb
271
+ - lib/chicago/flow/null_sink.rb
272
+ - lib/chicago/flow/pipeline_endpoint.rb
273
+ - lib/chicago/flow/pipeline_stage.rb
274
+ - lib/chicago/flow/sink.rb
275
+ - lib/chicago/flow/transformation.rb
276
+ - lib/chicago/flow/transformation_chain.rb
189
277
  - spec/db_connections.yml.dist
190
278
  - spec/etl/batch_spec.rb
279
+ - spec/etl/core_extensions_spec.rb
191
280
  - spec/etl/counter_spec.rb
281
+ - spec/etl/dataset_batch_stage_spec.rb
192
282
  - spec/etl/etl_batch_id_dataset_filter.rb
193
283
  - spec/etl/key_builder_spec.rb
194
284
  - spec/etl/load_dataset_builder_spec.rb
195
- - spec/etl/mysql_dumpfile_spec.rb
196
- - spec/etl/mysql_load_file_value_transformer_spec.rb
197
- - spec/etl/screens/composite_screen_spec.rb
285
+ - spec/etl/pipeline_stage_builder_spec.rb
286
+ - spec/etl/schema_table_sink_factory_spec.rb
198
287
  - spec/etl/screens/invalid_element_spec.rb
199
288
  - spec/etl/screens/missing_value_spec.rb
200
289
  - spec/etl/screens/out_of_bounds_spec.rb
201
290
  - spec/etl/sequel/dependant_tables_spec.rb
202
291
  - spec/etl/sequel/filter_to_etl_batch_spec.rb
203
- - spec/etl/sequel/load_data_infile_expression_spec.rb
204
- - spec/etl/sequel/load_data_infile_spec.rb
205
- - spec/etl/sink_spec.rb
206
292
  - spec/etl/table_builder_spec.rb
207
293
  - spec/etl/task_spec.rb
208
- - spec/etl/transformations/add_insert_timestamp_spec.rb
209
294
  - spec/etl/transformations/uk_post_code_field_spec.rb
210
295
  - spec/etl/transformations/uk_post_code_spec.rb
296
+ - spec/etl/transformations_spec.rb
297
+ - spec/flow/array_sink_spec.rb
298
+ - spec/flow/array_source_spec.rb
299
+ - spec/flow/dataset_source_spec.rb
300
+ - spec/flow/filter_spec.rb
301
+ - spec/flow/mysql_file_serializer_spec.rb
302
+ - spec/flow/mysql_file_sink_spec.rb
303
+ - spec/flow/mysql_integration_spec.rb
304
+ - spec/flow/pipeline_stage_spec.rb
305
+ - spec/flow/transformation_chain_spec.rb
306
+ - spec/flow/transformation_spec.rb
211
307
  - spec/spec_helper.rb
212
308
  homepage: http://github.com/notonthehighstreet/chicago-etl
213
309
  licenses:
@@ -1,36 +0,0 @@
1
- require 'chicago/etl/sink'
2
-
3
- module Chicago
4
- module ETL
5
- # Wrapper around a dataset to allowed buffered inserts.
6
- #
7
- # @api public
8
- class BufferingInsertWriter < Sink
9
- # The number of rows written before inserting to the DB.
10
- BUFFER_SIZE = 10_000
11
-
12
- def initialize(dataset, column_names, key=nil)
13
- super([], column_names, key)
14
- @dataset = dataset
15
- end
16
-
17
- def flush
18
- @dataset.insert_replace.import(column_names, output)
19
- output.clear
20
- end
21
-
22
- protected
23
-
24
- def write(row)
25
- output << @column_names.map {|name| row[name] }
26
- flush if reached_buffer_limit?
27
- end
28
-
29
- private
30
-
31
- def reached_buffer_limit?
32
- output.size >= BUFFER_SIZE
33
- end
34
- end
35
- end
36
- end
@@ -1,32 +0,0 @@
1
- require 'chicago/etl/sink'
2
-
3
- module Chicago
4
- module ETL
5
- # Wrapper around FasterCSV's output object, to convert values to a
6
- # format required by MySQL's LOAD DATA INFILE command.
7
- #
8
- # @api public
9
- class MysqlDumpfile < Sink
10
- # Creates a new writer.
11
- #
12
- # @param csv a FasterCSV output object
13
- # @param [Symbol] column_names columns to be output
14
- # @param key an optional key to ensure rows are written only once.
15
- def initialize(csv, column_names, key=nil)
16
- super(csv, column_names, key)
17
- @transformer = MysqlLoadFileValueTransformer.new
18
- end
19
-
20
- protected
21
-
22
- # Writes a row to the output.
23
- #
24
- # @param Hash row Only keys in column_names will be output.
25
- def write(row)
26
- output << @column_names.map {|name|
27
- @transformer.transform(row[name])
28
- }
29
- end
30
- end
31
- end
32
- end
@@ -1,17 +0,0 @@
1
- module Chicago
2
- module ETL
3
- module Screens
4
- class CompositeScreen
5
- def initialize(*screens)
6
- @screens = screens.flatten
7
- end
8
-
9
- def call(row, errors=[])
10
- @screens.inject([row,errors]) do |(row, errors), screen|
11
- screen.call(row, errors)
12
- end
13
- end
14
- end
15
- end
16
- end
17
- end
@@ -1,141 +0,0 @@
1
- module Chicago
2
- module ETL
3
- module SequelExtensions
4
- # @api private
5
- class LoadDataInfileExpression
6
- attr_reader :path, :table, :columns, :ignore, :character_set
7
-
8
- def initialize(path, table, columns, opts={})
9
- @path = path
10
- @table = table
11
- @columns = columns
12
- @ignore = opts[:ignore]
13
- @update = opts[:update]
14
- @set = opts[:set] || {}
15
- @character_set = opts[:character_set] || "utf8"
16
- if opts[:format] == :csv
17
- @field_terminator = ","
18
- @enclosed_by = '"'
19
- @escaped_by = '"'
20
- end
21
- end
22
-
23
- def replace?
24
- @update == :replace
25
- end
26
-
27
- def ignore?
28
- @update == :ignore
29
- end
30
-
31
- def to_sql(db)
32
- @db = db
33
- [load_fragment,
34
- replace_fragment,
35
- table_fragment,
36
- character_set_fragment,
37
- field_terminator_fragment,
38
- field_enclosure_fragment,
39
- escape_fragment,
40
- ignore_fragment,
41
- column_fragment,
42
- set_fragment].compact.join(" ")
43
- end
44
-
45
- private
46
-
47
- def load_fragment
48
- "LOAD DATA INFILE '#{path}'"
49
- end
50
-
51
- def replace_fragment
52
- @update.to_s.upcase if replace? || ignore?
53
- end
54
-
55
- def table_fragment
56
- "INTO TABLE `#{table}`"
57
- end
58
-
59
- def character_set_fragment
60
- "CHARACTER SET '#{character_set}'"
61
- end
62
-
63
- def field_terminator_fragment
64
- "FIELDS TERMINATED BY '#{@field_terminator}'" if @field_terminator
65
- end
66
-
67
- def field_enclosure_fragment
68
- "OPTIONALLY ENCLOSED BY '#{@enclosed_by}'" if @enclosed_by
69
- end
70
-
71
- def escape_fragment
72
- "ESCAPED BY '#{@escaped_by}'" if @escaped_by
73
- end
74
-
75
- def ignore_fragment
76
- "IGNORE #{ignore} LINES" if ignore
77
- end
78
-
79
- def column_fragment
80
- "(" + columns.map {|c| format_column(c) }.join(",") + ")"
81
- end
82
-
83
- def set_fragment
84
- unless @set.empty?
85
- "SET " + @set.map do |k, v|
86
- "#{@db.literal(k)} = #{@db.literal(v)}"
87
- end.join(", ")
88
- end
89
- end
90
-
91
- def format_column(column)
92
- column.to_s[0..0] == "@" ? column : "`#{column}`"
93
- end
94
- end
95
-
96
- module LoadDataInfile
97
- # Load data in file specified at path.
98
- #
99
- # Columns is a list of columns to load - column names starting
100
- # with an @ symbol will be treated as variables.
101
- #
102
- # By default, this will generate a REPLACE INTO TABLE
103
- # statement.
104
- #
105
- # Options:
106
- # :ignore - the number of lines to ignore in the source file
107
- # :update - nil, :ignore or :replace
108
- # :set - a hash specifying autopopulation of columns
109
- # :character_set - the character set of the file, UTF8 default
110
- # :format - either nil or :csv
111
- def load_infile(path, columns, options={})
112
- execute_dui(load_infile_sql(path, columns, options))
113
- end
114
-
115
- def load_infile_sql(path, columns, options={})
116
- replacement = opts[:insert_ignore] ? :ignore : :replace
117
- options = {:update => replacement}.merge(options)
118
- LoadDataInfileExpression.new(path,
119
- opts[:from].first,
120
- columns,
121
- options).
122
- to_sql(db)
123
- end
124
-
125
- # Loads the CSV data columns in path into this dataset's
126
- # table.
127
- #
128
- # See load_infile for more options.
129
- def load_csv_infile(path, columns, options={})
130
- execute_dui(load_csv_infile_sql(path, columns, options))
131
- end
132
-
133
- def load_csv_infile_sql(path, columns, options={})
134
- load_infile_sql(path, columns, options.merge(:format => :csv))
135
- end
136
- end
137
- end
138
- end
139
- end
140
-
141
- Sequel::Dataset.send :include, Chicago::ETL::SequelExtensions::LoadDataInfile
@@ -1,61 +0,0 @@
1
- require 'set'
2
-
3
- module Chicago
4
- module ETL
5
- # An end point to write rows.
6
- #
7
- # @abstract
8
- # @api public
9
- class Sink
10
- # Returns the column names expected to be written to this sink.
11
- # @api public
12
- attr_reader :column_names
13
-
14
- # @abstract
15
- def initialize(output, column_names, unique_row_key=nil)
16
- @output = output
17
- @column_names = column_names
18
- @written_rows = Set.new
19
- @unique_row_key = unique_row_key
20
- end
21
-
22
- # Writes a row to the output.
23
- #
24
- # Row will not be written to the output if it has already been
25
- # written, as identified by the unique row key.
26
- #
27
- # Should not be overridden by subclasses - overwrite write instead.
28
- def <<(row)
29
- unless written?(row)
30
- write row
31
- @written_rows << row[@unique_row_key]
32
- end
33
- end
34
-
35
- # Flushes any remaining writes to the output.
36
- #
37
- # By default does nothing, subclasses should override where
38
- # necessary.
39
- def flush
40
- end
41
-
42
- # Returns true if this row has previously been written to the
43
- # output.
44
- #
45
- # Always returns false if no key to determine row uniqueness has
46
- # been provided.
47
- def written?(row)
48
- return false if @unique_row_key.nil?
49
- @written_rows.include?(row[@unique_row_key])
50
- end
51
-
52
- protected
53
-
54
- attr_reader :output
55
-
56
- # @abstract
57
- def write(row)
58
- end
59
- end
60
- end
61
- end
@@ -1,16 +0,0 @@
1
- module Chicago
2
- module ETL
3
- module Transformations
4
- class AddInsertTimestamp
5
- def initialize(timestamp=Time.now)
6
- @insert_timestamp = timestamp.utc
7
- end
8
-
9
- def call(row, errors=[])
10
- row[:_inserted_at] = @insert_timestamp
11
- [row, errors]
12
- end
13
- end
14
- end
15
- end
16
- end
@@ -1,42 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Chicago::ETL::MysqlDumpfile do
4
- before :each do
5
- @csv = mock(:csv)
6
- end
7
-
8
- it "outputs specified column values in order" do
9
- dumpfile = described_class.new(@csv, [:foo, :bar])
10
- @csv.should_receive(:<<).with(["1", "2"])
11
-
12
- dumpfile << {:foo => "1", :bar => "2", :baz => "not output"}
13
- end
14
-
15
- it "transforms values with a MysqlLoadFileValueTransformer" do
16
- transformer = mock(:transformer)
17
- Chicago::ETL::MysqlLoadFileValueTransformer.stub(:new).and_return(transformer)
18
-
19
- transformer.should_receive(:transform).with("bar").and_return("baz")
20
- @csv.should_receive(:<<).with(["baz"])
21
-
22
- dumpfile = described_class.new(@csv, [:foo])
23
- dumpfile << {:foo => "bar"}
24
- end
25
-
26
- it "will write a row only once with the same key" do
27
- dumpfile = described_class.new(@csv, [:foo], :id)
28
- @csv.should_receive(:<<).with(["bar"])
29
-
30
- dumpfile << {:id => 1, :foo => "bar"}
31
- dumpfile << {:id => 1, :foo => "baz"}
32
- end
33
-
34
- it "will write a row multiple times if no key is specified" do
35
- dumpfile = described_class.new(@csv, [:foo])
36
- @csv.should_receive(:<<).with(["bar"])
37
- @csv.should_receive(:<<).with(["baz"])
38
-
39
- dumpfile << {:id => 1, :foo => "bar"}
40
- dumpfile << {:id => 1, :foo => "baz"}
41
- end
42
- end
@@ -1,27 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Chicago::ETL::MysqlLoadFileValueTransformer do
4
- it "transforms nil into \\N" do
5
- subject.transform(nil).should == "\\N"
6
- end
7
-
8
- it "transforms true into '1'" do
9
- subject.transform(true).should == "1"
10
- end
11
-
12
- it "transforms false into '0'" do
13
- subject.transform(false).should == "0"
14
- end
15
-
16
- it "transforms times into mysql time format" do
17
- subject.transform(Time.local(2011,01,02,10,30,50)).should == "2011-01-02 10:30:50"
18
- end
19
-
20
- it "transforms datetimes into mysql time format" do
21
- subject.transform(DateTime.new(2011,01,02,10,30,50)).should == "2011-01-02 10:30:50"
22
- end
23
-
24
- it "transforms dates into mysql date format" do
25
- subject.transform(Date.new(2011,01,02)).should == "2011-01-02"
26
- end
27
- end
@@ -1,25 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Chicago::ETL::Screens::CompositeScreen do
4
- let(:screen) do
5
- i = 0
6
-
7
- lambda {|row, errors|
8
- i += 1
9
- errors << i
10
- [row, errors]
11
- }
12
- end
13
-
14
- it "calls all child screens" do
15
- row, errors = described_class.new([screen, screen]).call({:a => 1}, [])
16
- row.should == {:a => 1}
17
- errors.should == [1,2]
18
- end
19
-
20
- it "supports variable arguments in the constructor" do
21
- row, errors = described_class.new(screen, screen).call({:a => 1}, [])
22
- row.should == {:a => 1}
23
- errors.should == [1,2]
24
- end
25
- end