chicago-etl 0.0.13 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. data/Gemfile +8 -3
  2. data/README.rdoc +4 -1
  3. data/VERSION +1 -1
  4. data/chicago-etl.gemspec +59 -22
  5. data/chicago-flow.gemspec +92 -0
  6. data/lib/chicago/etl/batch.rb +9 -2
  7. data/lib/chicago/etl/core_extensions.rb +12 -0
  8. data/lib/chicago/etl/counter.rb +8 -1
  9. data/lib/chicago/etl/dataset_batch_stage.rb +52 -0
  10. data/lib/chicago/etl/key_builder.rb +17 -39
  11. data/lib/chicago/etl/load_dataset_builder.rb +3 -1
  12. data/lib/chicago/etl/load_pipeline_stage_builder.rb +142 -0
  13. data/lib/chicago/etl/pipeline.rb +151 -0
  14. data/lib/chicago/etl/schema_table_sink_factory.rb +74 -0
  15. data/lib/chicago/etl/screens/column_screen.rb +26 -25
  16. data/lib/chicago/etl/screens/invalid_element.rb +5 -5
  17. data/lib/chicago/etl/screens/missing_value.rb +4 -2
  18. data/lib/chicago/etl/screens/out_of_bounds.rb +2 -0
  19. data/lib/chicago/etl/table_builder.rb +4 -2
  20. data/lib/chicago/etl/task_invocation.rb +0 -1
  21. data/lib/chicago/etl/transformations.rb +128 -0
  22. data/lib/chicago/etl.rb +39 -8
  23. data/lib/chicago/flow/array_sink.rb +35 -0
  24. data/lib/chicago/flow/array_source.rb +15 -0
  25. data/lib/chicago/flow/dataset_source.rb +23 -0
  26. data/lib/chicago/flow/errors.rb +14 -0
  27. data/lib/chicago/flow/filter.rb +15 -0
  28. data/lib/chicago/flow/mysql.rb +4 -0
  29. data/lib/chicago/{etl/mysql_load_file_value_transformer.rb → flow/mysql_file_serializer.rb} +7 -4
  30. data/lib/chicago/flow/mysql_file_sink.rb +68 -0
  31. data/lib/chicago/flow/null_sink.rb +8 -0
  32. data/lib/chicago/flow/pipeline_endpoint.rb +15 -0
  33. data/lib/chicago/flow/pipeline_stage.rb +68 -0
  34. data/lib/chicago/flow/sink.rb +53 -0
  35. data/lib/chicago/flow/transformation.rb +169 -0
  36. data/lib/chicago/flow/transformation_chain.rb +40 -0
  37. data/spec/etl/batch_spec.rb +2 -1
  38. data/spec/etl/core_extensions_spec.rb +13 -0
  39. data/spec/etl/dataset_batch_stage_spec.rb +55 -0
  40. data/spec/etl/key_builder_spec.rb +25 -83
  41. data/spec/etl/pipeline_stage_builder_spec.rb +39 -0
  42. data/spec/etl/schema_table_sink_factory_spec.rb +69 -0
  43. data/spec/etl/screens/invalid_element_spec.rb +10 -11
  44. data/spec/etl/screens/missing_value_spec.rb +21 -21
  45. data/spec/etl/screens/out_of_bounds_spec.rb +21 -29
  46. data/spec/etl/transformations_spec.rb +109 -0
  47. data/spec/flow/array_sink_spec.rb +26 -0
  48. data/spec/flow/array_source_spec.rb +20 -0
  49. data/spec/flow/dataset_source_spec.rb +15 -0
  50. data/spec/flow/filter_spec.rb +13 -0
  51. data/spec/flow/mysql_file_serializer_spec.rb +27 -0
  52. data/spec/flow/mysql_file_sink_spec.rb +94 -0
  53. data/spec/flow/mysql_integration_spec.rb +72 -0
  54. data/spec/flow/pipeline_stage_spec.rb +89 -0
  55. data/spec/flow/transformation_chain_spec.rb +76 -0
  56. data/spec/flow/transformation_spec.rb +91 -0
  57. data/spec/spec_helper.rb +5 -0
  58. metadata +135 -39
  59. data/lib/chicago/etl/buffering_insert_writer.rb +0 -36
  60. data/lib/chicago/etl/mysql_dumpfile.rb +0 -32
  61. data/lib/chicago/etl/screens/composite_screen.rb +0 -17
  62. data/lib/chicago/etl/sequel/load_data_infile.rb +0 -141
  63. data/lib/chicago/etl/sink.rb +0 -61
  64. data/lib/chicago/etl/transformations/add_insert_timestamp.rb +0 -16
  65. data/spec/etl/mysql_dumpfile_spec.rb +0 -42
  66. data/spec/etl/mysql_load_file_value_transformer_spec.rb +0 -27
  67. data/spec/etl/screens/composite_screen_spec.rb +0 -25
  68. data/spec/etl/sequel/load_data_infile_expression_spec.rb +0 -60
  69. data/spec/etl/sequel/load_data_infile_spec.rb +0 -37
  70. data/spec/etl/sink_spec.rb +0 -7
  71. data/spec/etl/transformations/add_insert_timestamp_spec.rb +0 -9
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chicago-etl
3
3
  version: !ruby/object:Gem::Version
4
- hash: 5
4
+ hash: 27
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
+ - 1
8
9
  - 0
9
- - 13
10
- version: 0.0.13
10
+ version: 0.1.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Roland Swingler
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2013-04-16 00:00:00 Z
18
+ date: 2013-09-05 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  version_requirements: &id001 !ruby/object:Gem::Requirement
@@ -36,16 +36,16 @@ dependencies:
36
36
  version_requirements: &id002 !ruby/object:Gem::Requirement
37
37
  none: false
38
38
  requirements:
39
- - - ~>
39
+ - - ">="
40
40
  - !ruby/object:Gem::Version
41
- hash: 7
41
+ hash: 3
42
42
  segments:
43
- - 2
44
- version: "2"
43
+ - 0
44
+ version: "0"
45
45
  requirement: *id002
46
- type: :development
46
+ type: :runtime
47
47
  prerelease: false
48
- name: rspec
48
+ name: fastercsv
49
49
  - !ruby/object:Gem::Dependency
50
50
  version_requirements: &id003 !ruby/object:Gem::Requirement
51
51
  none: false
@@ -57,23 +57,25 @@ dependencies:
57
57
  - 0
58
58
  version: "0"
59
59
  requirement: *id003
60
- type: :development
60
+ type: :runtime
61
61
  prerelease: false
62
- name: timecop
62
+ name: sequel
63
63
  - !ruby/object:Gem::Dependency
64
64
  version_requirements: &id004 !ruby/object:Gem::Requirement
65
65
  none: false
66
66
  requirements:
67
67
  - - ">="
68
68
  - !ruby/object:Gem::Version
69
- hash: 3
69
+ hash: 27
70
70
  segments:
71
71
  - 0
72
- version: "0"
72
+ - 0
73
+ - 2
74
+ version: 0.0.2
73
75
  requirement: *id004
74
- type: :development
76
+ type: :runtime
75
77
  prerelease: false
76
- name: yard
78
+ name: sequel_load_data_infile
77
79
  - !ruby/object:Gem::Dependency
78
80
  version_requirements: &id005 !ruby/object:Gem::Requirement
79
81
  none: false
@@ -85,23 +87,23 @@ dependencies:
85
87
  - 0
86
88
  version: "0"
87
89
  requirement: *id005
88
- type: :development
90
+ type: :runtime
89
91
  prerelease: false
90
- name: flog
92
+ name: sequel_fast_columns
91
93
  - !ruby/object:Gem::Dependency
92
94
  version_requirements: &id006 !ruby/object:Gem::Requirement
93
95
  none: false
94
96
  requirements:
95
- - - ">="
97
+ - - ~>
96
98
  - !ruby/object:Gem::Version
97
- hash: 3
99
+ hash: 7
98
100
  segments:
99
- - 0
100
- version: "0"
101
+ - 2
102
+ version: "2"
101
103
  requirement: *id006
102
104
  type: :development
103
105
  prerelease: false
104
- name: jeweler
106
+ name: rspec
105
107
  - !ruby/object:Gem::Dependency
106
108
  version_requirements: &id007 !ruby/object:Gem::Requirement
107
109
  none: false
@@ -115,7 +117,7 @@ dependencies:
115
117
  requirement: *id007
116
118
  type: :development
117
119
  prerelease: false
118
- name: rcov
120
+ name: timecop
119
121
  - !ruby/object:Gem::Dependency
120
122
  version_requirements: &id008 !ruby/object:Gem::Requirement
121
123
  none: false
@@ -129,7 +131,7 @@ dependencies:
129
131
  requirement: *id008
130
132
  type: :development
131
133
  prerelease: false
132
- name: simplecov
134
+ name: yard
133
135
  - !ruby/object:Gem::Dependency
134
136
  version_requirements: &id009 !ruby/object:Gem::Requirement
135
137
  none: false
@@ -143,7 +145,79 @@ dependencies:
143
145
  requirement: *id009
144
146
  type: :development
145
147
  prerelease: false
148
+ name: flog
149
+ - !ruby/object:Gem::Dependency
150
+ version_requirements: &id010 !ruby/object:Gem::Requirement
151
+ none: false
152
+ requirements:
153
+ - - ">="
154
+ - !ruby/object:Gem::Version
155
+ hash: 3
156
+ segments:
157
+ - 0
158
+ version: "0"
159
+ requirement: *id010
160
+ type: :development
161
+ prerelease: false
162
+ name: simplecov
163
+ - !ruby/object:Gem::Dependency
164
+ version_requirements: &id011 !ruby/object:Gem::Requirement
165
+ none: false
166
+ requirements:
167
+ - - ">="
168
+ - !ruby/object:Gem::Version
169
+ hash: 3
170
+ segments:
171
+ - 0
172
+ version: "0"
173
+ requirement: *id011
174
+ type: :development
175
+ prerelease: false
146
176
  name: ZenTest
177
+ - !ruby/object:Gem::Dependency
178
+ version_requirements: &id012 !ruby/object:Gem::Requirement
179
+ none: false
180
+ requirements:
181
+ - - "="
182
+ - !ruby/object:Gem::Version
183
+ hash: 45
184
+ segments:
185
+ - 2
186
+ - 8
187
+ - 1
188
+ version: 2.8.1
189
+ requirement: *id012
190
+ type: :development
191
+ prerelease: false
192
+ name: mysql
193
+ - !ruby/object:Gem::Dependency
194
+ version_requirements: &id013 !ruby/object:Gem::Requirement
195
+ none: false
196
+ requirements:
197
+ - - ~>
198
+ - !ruby/object:Gem::Version
199
+ hash: 1
200
+ segments:
201
+ - 1
202
+ version: "1"
203
+ requirement: *id013
204
+ type: :development
205
+ prerelease: false
206
+ name: bundler
207
+ - !ruby/object:Gem::Dependency
208
+ version_requirements: &id014 !ruby/object:Gem::Requirement
209
+ none: false
210
+ requirements:
211
+ - - ">="
212
+ - !ruby/object:Gem::Version
213
+ hash: 3
214
+ segments:
215
+ - 0
216
+ version: "0"
217
+ requirement: *id014
218
+ type: :development
219
+ prerelease: false
220
+ name: jeweler
147
221
  description: ETL tools for Chicago
148
222
  email: roland.swingler@gmail.com
149
223
  executables: []
@@ -162,52 +236,74 @@ files:
162
236
  - Rakefile
163
237
  - VERSION
164
238
  - chicago-etl.gemspec
239
+ - chicago-flow.gemspec
165
240
  - lib/chicago-etl.rb
166
241
  - lib/chicago/etl.rb
167
242
  - lib/chicago/etl/batch.rb
168
- - lib/chicago/etl/buffering_insert_writer.rb
243
+ - lib/chicago/etl/core_extensions.rb
169
244
  - lib/chicago/etl/counter.rb
245
+ - lib/chicago/etl/dataset_batch_stage.rb
170
246
  - lib/chicago/etl/key_builder.rb
171
247
  - lib/chicago/etl/load_dataset_builder.rb
172
- - lib/chicago/etl/mysql_dumpfile.rb
173
- - lib/chicago/etl/mysql_load_file_value_transformer.rb
248
+ - lib/chicago/etl/load_pipeline_stage_builder.rb
249
+ - lib/chicago/etl/pipeline.rb
250
+ - lib/chicago/etl/schema_table_sink_factory.rb
174
251
  - lib/chicago/etl/screens/column_screen.rb
175
- - lib/chicago/etl/screens/composite_screen.rb
176
252
  - lib/chicago/etl/screens/invalid_element.rb
177
253
  - lib/chicago/etl/screens/missing_value.rb
178
254
  - lib/chicago/etl/screens/out_of_bounds.rb
179
255
  - lib/chicago/etl/sequel/dependant_tables.rb
180
256
  - lib/chicago/etl/sequel/filter_to_etl_batch.rb
181
- - lib/chicago/etl/sequel/load_data_infile.rb
182
- - lib/chicago/etl/sink.rb
183
257
  - lib/chicago/etl/table_builder.rb
184
258
  - lib/chicago/etl/task_invocation.rb
185
259
  - lib/chicago/etl/tasks.rb
186
- - lib/chicago/etl/transformations/add_insert_timestamp.rb
260
+ - lib/chicago/etl/transformations.rb
187
261
  - lib/chicago/etl/transformations/uk_post_code.rb
188
262
  - lib/chicago/etl/transformations/uk_post_code_field.rb
263
+ - lib/chicago/flow/array_sink.rb
264
+ - lib/chicago/flow/array_source.rb
265
+ - lib/chicago/flow/dataset_source.rb
266
+ - lib/chicago/flow/errors.rb
267
+ - lib/chicago/flow/filter.rb
268
+ - lib/chicago/flow/mysql.rb
269
+ - lib/chicago/flow/mysql_file_serializer.rb
270
+ - lib/chicago/flow/mysql_file_sink.rb
271
+ - lib/chicago/flow/null_sink.rb
272
+ - lib/chicago/flow/pipeline_endpoint.rb
273
+ - lib/chicago/flow/pipeline_stage.rb
274
+ - lib/chicago/flow/sink.rb
275
+ - lib/chicago/flow/transformation.rb
276
+ - lib/chicago/flow/transformation_chain.rb
189
277
  - spec/db_connections.yml.dist
190
278
  - spec/etl/batch_spec.rb
279
+ - spec/etl/core_extensions_spec.rb
191
280
  - spec/etl/counter_spec.rb
281
+ - spec/etl/dataset_batch_stage_spec.rb
192
282
  - spec/etl/etl_batch_id_dataset_filter.rb
193
283
  - spec/etl/key_builder_spec.rb
194
284
  - spec/etl/load_dataset_builder_spec.rb
195
- - spec/etl/mysql_dumpfile_spec.rb
196
- - spec/etl/mysql_load_file_value_transformer_spec.rb
197
- - spec/etl/screens/composite_screen_spec.rb
285
+ - spec/etl/pipeline_stage_builder_spec.rb
286
+ - spec/etl/schema_table_sink_factory_spec.rb
198
287
  - spec/etl/screens/invalid_element_spec.rb
199
288
  - spec/etl/screens/missing_value_spec.rb
200
289
  - spec/etl/screens/out_of_bounds_spec.rb
201
290
  - spec/etl/sequel/dependant_tables_spec.rb
202
291
  - spec/etl/sequel/filter_to_etl_batch_spec.rb
203
- - spec/etl/sequel/load_data_infile_expression_spec.rb
204
- - spec/etl/sequel/load_data_infile_spec.rb
205
- - spec/etl/sink_spec.rb
206
292
  - spec/etl/table_builder_spec.rb
207
293
  - spec/etl/task_spec.rb
208
- - spec/etl/transformations/add_insert_timestamp_spec.rb
209
294
  - spec/etl/transformations/uk_post_code_field_spec.rb
210
295
  - spec/etl/transformations/uk_post_code_spec.rb
296
+ - spec/etl/transformations_spec.rb
297
+ - spec/flow/array_sink_spec.rb
298
+ - spec/flow/array_source_spec.rb
299
+ - spec/flow/dataset_source_spec.rb
300
+ - spec/flow/filter_spec.rb
301
+ - spec/flow/mysql_file_serializer_spec.rb
302
+ - spec/flow/mysql_file_sink_spec.rb
303
+ - spec/flow/mysql_integration_spec.rb
304
+ - spec/flow/pipeline_stage_spec.rb
305
+ - spec/flow/transformation_chain_spec.rb
306
+ - spec/flow/transformation_spec.rb
211
307
  - spec/spec_helper.rb
212
308
  homepage: http://github.com/notonthehighstreet/chicago-etl
213
309
  licenses:
@@ -1,36 +0,0 @@
1
- require 'chicago/etl/sink'
2
-
3
- module Chicago
4
- module ETL
5
- # Wrapper around a dataset to allowed buffered inserts.
6
- #
7
- # @api public
8
- class BufferingInsertWriter < Sink
9
- # The number of rows written before inserting to the DB.
10
- BUFFER_SIZE = 10_000
11
-
12
- def initialize(dataset, column_names, key=nil)
13
- super([], column_names, key)
14
- @dataset = dataset
15
- end
16
-
17
- def flush
18
- @dataset.insert_replace.import(column_names, output)
19
- output.clear
20
- end
21
-
22
- protected
23
-
24
- def write(row)
25
- output << @column_names.map {|name| row[name] }
26
- flush if reached_buffer_limit?
27
- end
28
-
29
- private
30
-
31
- def reached_buffer_limit?
32
- output.size >= BUFFER_SIZE
33
- end
34
- end
35
- end
36
- end
@@ -1,32 +0,0 @@
1
- require 'chicago/etl/sink'
2
-
3
- module Chicago
4
- module ETL
5
- # Wrapper around FasterCSV's output object, to convert values to a
6
- # format required by MySQL's LOAD DATA INFILE command.
7
- #
8
- # @api public
9
- class MysqlDumpfile < Sink
10
- # Creates a new writer.
11
- #
12
- # @param csv a FasterCSV output object
13
- # @param [Symbol] column_names columns to be output
14
- # @param key an optional key to ensure rows are written only once.
15
- def initialize(csv, column_names, key=nil)
16
- super(csv, column_names, key)
17
- @transformer = MysqlLoadFileValueTransformer.new
18
- end
19
-
20
- protected
21
-
22
- # Writes a row to the output.
23
- #
24
- # @param Hash row Only keys in column_names will be output.
25
- def write(row)
26
- output << @column_names.map {|name|
27
- @transformer.transform(row[name])
28
- }
29
- end
30
- end
31
- end
32
- end
@@ -1,17 +0,0 @@
1
- module Chicago
2
- module ETL
3
- module Screens
4
- class CompositeScreen
5
- def initialize(*screens)
6
- @screens = screens.flatten
7
- end
8
-
9
- def call(row, errors=[])
10
- @screens.inject([row,errors]) do |(row, errors), screen|
11
- screen.call(row, errors)
12
- end
13
- end
14
- end
15
- end
16
- end
17
- end
@@ -1,141 +0,0 @@
1
- module Chicago
2
- module ETL
3
- module SequelExtensions
4
- # @api private
5
- class LoadDataInfileExpression
6
- attr_reader :path, :table, :columns, :ignore, :character_set
7
-
8
- def initialize(path, table, columns, opts={})
9
- @path = path
10
- @table = table
11
- @columns = columns
12
- @ignore = opts[:ignore]
13
- @update = opts[:update]
14
- @set = opts[:set] || {}
15
- @character_set = opts[:character_set] || "utf8"
16
- if opts[:format] == :csv
17
- @field_terminator = ","
18
- @enclosed_by = '"'
19
- @escaped_by = '"'
20
- end
21
- end
22
-
23
- def replace?
24
- @update == :replace
25
- end
26
-
27
- def ignore?
28
- @update == :ignore
29
- end
30
-
31
- def to_sql(db)
32
- @db = db
33
- [load_fragment,
34
- replace_fragment,
35
- table_fragment,
36
- character_set_fragment,
37
- field_terminator_fragment,
38
- field_enclosure_fragment,
39
- escape_fragment,
40
- ignore_fragment,
41
- column_fragment,
42
- set_fragment].compact.join(" ")
43
- end
44
-
45
- private
46
-
47
- def load_fragment
48
- "LOAD DATA INFILE '#{path}'"
49
- end
50
-
51
- def replace_fragment
52
- @update.to_s.upcase if replace? || ignore?
53
- end
54
-
55
- def table_fragment
56
- "INTO TABLE `#{table}`"
57
- end
58
-
59
- def character_set_fragment
60
- "CHARACTER SET '#{character_set}'"
61
- end
62
-
63
- def field_terminator_fragment
64
- "FIELDS TERMINATED BY '#{@field_terminator}'" if @field_terminator
65
- end
66
-
67
- def field_enclosure_fragment
68
- "OPTIONALLY ENCLOSED BY '#{@enclosed_by}'" if @enclosed_by
69
- end
70
-
71
- def escape_fragment
72
- "ESCAPED BY '#{@escaped_by}'" if @escaped_by
73
- end
74
-
75
- def ignore_fragment
76
- "IGNORE #{ignore} LINES" if ignore
77
- end
78
-
79
- def column_fragment
80
- "(" + columns.map {|c| format_column(c) }.join(",") + ")"
81
- end
82
-
83
- def set_fragment
84
- unless @set.empty?
85
- "SET " + @set.map do |k, v|
86
- "#{@db.literal(k)} = #{@db.literal(v)}"
87
- end.join(", ")
88
- end
89
- end
90
-
91
- def format_column(column)
92
- column.to_s[0..0] == "@" ? column : "`#{column}`"
93
- end
94
- end
95
-
96
- module LoadDataInfile
97
- # Load data in file specified at path.
98
- #
99
- # Columns is a list of columns to load - column names starting
100
- # with an @ symbol will be treated as variables.
101
- #
102
- # By default, this will generate a REPLACE INTO TABLE
103
- # statement.
104
- #
105
- # Options:
106
- # :ignore - the number of lines to ignore in the source file
107
- # :update - nil, :ignore or :replace
108
- # :set - a hash specifying autopopulation of columns
109
- # :character_set - the character set of the file, UTF8 default
110
- # :format - either nil or :csv
111
- def load_infile(path, columns, options={})
112
- execute_dui(load_infile_sql(path, columns, options))
113
- end
114
-
115
- def load_infile_sql(path, columns, options={})
116
- replacement = opts[:insert_ignore] ? :ignore : :replace
117
- options = {:update => replacement}.merge(options)
118
- LoadDataInfileExpression.new(path,
119
- opts[:from].first,
120
- columns,
121
- options).
122
- to_sql(db)
123
- end
124
-
125
- # Loads the CSV data columns in path into this dataset's
126
- # table.
127
- #
128
- # See load_infile for more options.
129
- def load_csv_infile(path, columns, options={})
130
- execute_dui(load_csv_infile_sql(path, columns, options))
131
- end
132
-
133
- def load_csv_infile_sql(path, columns, options={})
134
- load_infile_sql(path, columns, options.merge(:format => :csv))
135
- end
136
- end
137
- end
138
- end
139
- end
140
-
141
- Sequel::Dataset.send :include, Chicago::ETL::SequelExtensions::LoadDataInfile
@@ -1,61 +0,0 @@
1
- require 'set'
2
-
3
- module Chicago
4
- module ETL
5
- # An end point to write rows.
6
- #
7
- # @abstract
8
- # @api public
9
- class Sink
10
- # Returns the column names expected to be written to this sink.
11
- # @api public
12
- attr_reader :column_names
13
-
14
- # @abstract
15
- def initialize(output, column_names, unique_row_key=nil)
16
- @output = output
17
- @column_names = column_names
18
- @written_rows = Set.new
19
- @unique_row_key = unique_row_key
20
- end
21
-
22
- # Writes a row to the output.
23
- #
24
- # Row will not be written to the output if it has already been
25
- # written, as identified by the unique row key.
26
- #
27
- # Should not be overridden by subclasses - overwrite write instead.
28
- def <<(row)
29
- unless written?(row)
30
- write row
31
- @written_rows << row[@unique_row_key]
32
- end
33
- end
34
-
35
- # Flushes any remaining writes to the output.
36
- #
37
- # By default does nothing, subclasses should override where
38
- # necessary.
39
- def flush
40
- end
41
-
42
- # Returns true if this row has previously been written to the
43
- # output.
44
- #
45
- # Always returns false if no key to determine row uniqueness has
46
- # been provided.
47
- def written?(row)
48
- return false if @unique_row_key.nil?
49
- @written_rows.include?(row[@unique_row_key])
50
- end
51
-
52
- protected
53
-
54
- attr_reader :output
55
-
56
- # @abstract
57
- def write(row)
58
- end
59
- end
60
- end
61
- end
@@ -1,16 +0,0 @@
1
- module Chicago
2
- module ETL
3
- module Transformations
4
- class AddInsertTimestamp
5
- def initialize(timestamp=Time.now)
6
- @insert_timestamp = timestamp.utc
7
- end
8
-
9
- def call(row, errors=[])
10
- row[:_inserted_at] = @insert_timestamp
11
- [row, errors]
12
- end
13
- end
14
- end
15
- end
16
- end
@@ -1,42 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Chicago::ETL::MysqlDumpfile do
4
- before :each do
5
- @csv = mock(:csv)
6
- end
7
-
8
- it "outputs specified column values in order" do
9
- dumpfile = described_class.new(@csv, [:foo, :bar])
10
- @csv.should_receive(:<<).with(["1", "2"])
11
-
12
- dumpfile << {:foo => "1", :bar => "2", :baz => "not output"}
13
- end
14
-
15
- it "transforms values with a MysqlLoadFileValueTransformer" do
16
- transformer = mock(:transformer)
17
- Chicago::ETL::MysqlLoadFileValueTransformer.stub(:new).and_return(transformer)
18
-
19
- transformer.should_receive(:transform).with("bar").and_return("baz")
20
- @csv.should_receive(:<<).with(["baz"])
21
-
22
- dumpfile = described_class.new(@csv, [:foo])
23
- dumpfile << {:foo => "bar"}
24
- end
25
-
26
- it "will write a row only once with the same key" do
27
- dumpfile = described_class.new(@csv, [:foo], :id)
28
- @csv.should_receive(:<<).with(["bar"])
29
-
30
- dumpfile << {:id => 1, :foo => "bar"}
31
- dumpfile << {:id => 1, :foo => "baz"}
32
- end
33
-
34
- it "will write a row multiple times if no key is specified" do
35
- dumpfile = described_class.new(@csv, [:foo])
36
- @csv.should_receive(:<<).with(["bar"])
37
- @csv.should_receive(:<<).with(["baz"])
38
-
39
- dumpfile << {:id => 1, :foo => "bar"}
40
- dumpfile << {:id => 1, :foo => "baz"}
41
- end
42
- end
@@ -1,27 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Chicago::ETL::MysqlLoadFileValueTransformer do
4
- it "transforms nil into \\N" do
5
- subject.transform(nil).should == "\\N"
6
- end
7
-
8
- it "transforms true into '1'" do
9
- subject.transform(true).should == "1"
10
- end
11
-
12
- it "transforms false into '0'" do
13
- subject.transform(false).should == "0"
14
- end
15
-
16
- it "transforms times into mysql time format" do
17
- subject.transform(Time.local(2011,01,02,10,30,50)).should == "2011-01-02 10:30:50"
18
- end
19
-
20
- it "transforms datetimes into mysql time format" do
21
- subject.transform(DateTime.new(2011,01,02,10,30,50)).should == "2011-01-02 10:30:50"
22
- end
23
-
24
- it "transforms dates into mysql date format" do
25
- subject.transform(Date.new(2011,01,02)).should == "2011-01-02"
26
- end
27
- end
@@ -1,25 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Chicago::ETL::Screens::CompositeScreen do
4
- let(:screen) do
5
- i = 0
6
-
7
- lambda {|row, errors|
8
- i += 1
9
- errors << i
10
- [row, errors]
11
- }
12
- end
13
-
14
- it "calls all child screens" do
15
- row, errors = described_class.new([screen, screen]).call({:a => 1}, [])
16
- row.should == {:a => 1}
17
- errors.should == [1,2]
18
- end
19
-
20
- it "supports variable arguments in the constructor" do
21
- row, errors = described_class.new(screen, screen).call({:a => 1}, [])
22
- row.should == {:a => 1}
23
- errors.should == [1,2]
24
- end
25
- end