chicago-etl 0.0.13 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. data/Gemfile +8 -3
  2. data/README.rdoc +4 -1
  3. data/VERSION +1 -1
  4. data/chicago-etl.gemspec +59 -22
  5. data/chicago-flow.gemspec +92 -0
  6. data/lib/chicago/etl/batch.rb +9 -2
  7. data/lib/chicago/etl/core_extensions.rb +12 -0
  8. data/lib/chicago/etl/counter.rb +8 -1
  9. data/lib/chicago/etl/dataset_batch_stage.rb +52 -0
  10. data/lib/chicago/etl/key_builder.rb +17 -39
  11. data/lib/chicago/etl/load_dataset_builder.rb +3 -1
  12. data/lib/chicago/etl/load_pipeline_stage_builder.rb +142 -0
  13. data/lib/chicago/etl/pipeline.rb +151 -0
  14. data/lib/chicago/etl/schema_table_sink_factory.rb +74 -0
  15. data/lib/chicago/etl/screens/column_screen.rb +26 -25
  16. data/lib/chicago/etl/screens/invalid_element.rb +5 -5
  17. data/lib/chicago/etl/screens/missing_value.rb +4 -2
  18. data/lib/chicago/etl/screens/out_of_bounds.rb +2 -0
  19. data/lib/chicago/etl/table_builder.rb +4 -2
  20. data/lib/chicago/etl/task_invocation.rb +0 -1
  21. data/lib/chicago/etl/transformations.rb +128 -0
  22. data/lib/chicago/etl.rb +39 -8
  23. data/lib/chicago/flow/array_sink.rb +35 -0
  24. data/lib/chicago/flow/array_source.rb +15 -0
  25. data/lib/chicago/flow/dataset_source.rb +23 -0
  26. data/lib/chicago/flow/errors.rb +14 -0
  27. data/lib/chicago/flow/filter.rb +15 -0
  28. data/lib/chicago/flow/mysql.rb +4 -0
  29. data/lib/chicago/{etl/mysql_load_file_value_transformer.rb → flow/mysql_file_serializer.rb} +7 -4
  30. data/lib/chicago/flow/mysql_file_sink.rb +68 -0
  31. data/lib/chicago/flow/null_sink.rb +8 -0
  32. data/lib/chicago/flow/pipeline_endpoint.rb +15 -0
  33. data/lib/chicago/flow/pipeline_stage.rb +68 -0
  34. data/lib/chicago/flow/sink.rb +53 -0
  35. data/lib/chicago/flow/transformation.rb +169 -0
  36. data/lib/chicago/flow/transformation_chain.rb +40 -0
  37. data/spec/etl/batch_spec.rb +2 -1
  38. data/spec/etl/core_extensions_spec.rb +13 -0
  39. data/spec/etl/dataset_batch_stage_spec.rb +55 -0
  40. data/spec/etl/key_builder_spec.rb +25 -83
  41. data/spec/etl/pipeline_stage_builder_spec.rb +39 -0
  42. data/spec/etl/schema_table_sink_factory_spec.rb +69 -0
  43. data/spec/etl/screens/invalid_element_spec.rb +10 -11
  44. data/spec/etl/screens/missing_value_spec.rb +21 -21
  45. data/spec/etl/screens/out_of_bounds_spec.rb +21 -29
  46. data/spec/etl/transformations_spec.rb +109 -0
  47. data/spec/flow/array_sink_spec.rb +26 -0
  48. data/spec/flow/array_source_spec.rb +20 -0
  49. data/spec/flow/dataset_source_spec.rb +15 -0
  50. data/spec/flow/filter_spec.rb +13 -0
  51. data/spec/flow/mysql_file_serializer_spec.rb +27 -0
  52. data/spec/flow/mysql_file_sink_spec.rb +94 -0
  53. data/spec/flow/mysql_integration_spec.rb +72 -0
  54. data/spec/flow/pipeline_stage_spec.rb +89 -0
  55. data/spec/flow/transformation_chain_spec.rb +76 -0
  56. data/spec/flow/transformation_spec.rb +91 -0
  57. data/spec/spec_helper.rb +5 -0
  58. metadata +135 -39
  59. data/lib/chicago/etl/buffering_insert_writer.rb +0 -36
  60. data/lib/chicago/etl/mysql_dumpfile.rb +0 -32
  61. data/lib/chicago/etl/screens/composite_screen.rb +0 -17
  62. data/lib/chicago/etl/sequel/load_data_infile.rb +0 -141
  63. data/lib/chicago/etl/sink.rb +0 -61
  64. data/lib/chicago/etl/transformations/add_insert_timestamp.rb +0 -16
  65. data/spec/etl/mysql_dumpfile_spec.rb +0 -42
  66. data/spec/etl/mysql_load_file_value_transformer_spec.rb +0 -27
  67. data/spec/etl/screens/composite_screen_spec.rb +0 -25
  68. data/spec/etl/sequel/load_data_infile_expression_spec.rb +0 -60
  69. data/spec/etl/sequel/load_data_infile_spec.rb +0 -37
  70. data/spec/etl/sink_spec.rb +0 -7
  71. data/spec/etl/transformations/add_insert_timestamp_spec.rb +0 -9
data/lib/chicago/etl.rb CHANGED
@@ -1,35 +1,66 @@
1
+ if RUBY_VERSION.split(".")[1] < "9"
2
+ require 'fastercsv'
3
+ CSV = FasterCSV
4
+ else
5
+ require 'csv'
6
+ end
7
+
1
8
  require 'sequel'
9
+ require 'chicago/flow/errors'
10
+ require 'chicago/flow/transformation'
11
+ require 'chicago/flow/filter'
12
+ require 'chicago/flow/transformation_chain'
13
+ require 'chicago/flow/pipeline_stage'
14
+ require 'chicago/flow/pipeline_endpoint'
15
+ require 'chicago/flow/array_source'
16
+ require 'chicago/flow/dataset_source'
17
+ require 'chicago/flow/sink'
18
+ require 'chicago/flow/array_sink'
19
+ require 'chicago/flow/null_sink'
20
+ require 'chicago/flow/mysql'
2
21
 
22
+ require 'chicago/etl/core_extensions'
3
23
  require 'chicago/etl/counter'
4
24
  require 'chicago/etl/key_builder'
5
- require 'chicago/etl/sink'
6
- require 'chicago/etl/mysql_load_file_value_transformer'
7
- require 'chicago/etl/buffering_insert_writer'
8
- require 'chicago/etl/mysql_dumpfile'
9
-
25
+ require 'chicago/etl/schema_table_sink_factory'
26
+ require 'chicago/etl/transformations'
10
27
  require 'chicago/etl/load_dataset_builder'
28
+ require 'chicago/etl/dataset_batch_stage'
29
+ require 'chicago/etl/load_pipeline_stage_builder'
30
+ require 'chicago/etl/pipeline'
11
31
 
12
32
  # Sequel Extensions
13
33
  require 'chicago/etl/sequel/filter_to_etl_batch'
14
- require 'chicago/etl/sequel/load_data_infile'
15
34
  require 'chicago/etl/sequel/dependant_tables'
16
35
 
17
36
  # Screens
18
37
  require 'chicago/etl/screens/column_screen'
19
- require 'chicago/etl/screens/composite_screen'
20
38
  require 'chicago/etl/screens/missing_value'
21
39
  require 'chicago/etl/screens/invalid_element'
22
40
  require 'chicago/etl/screens/out_of_bounds'
23
41
 
24
42
  # Transformations
25
- require 'chicago/etl/transformations/add_insert_timestamp'
26
43
  require 'chicago/etl/transformations/uk_post_code'
27
44
  require 'chicago/etl/transformations/uk_post_code_field'
28
45
 
29
46
  module Chicago
47
+ # Contains classes related to ETL processing.
30
48
  module ETL
31
49
  autoload :TableBuilder, 'chicago/etl/table_builder.rb'
32
50
  autoload :Batch, 'chicago/etl/batch.rb'
33
51
  autoload :TaskInvocation, 'chicago/etl/task_invocation.rb'
52
+
53
+ # Executes a pipeline stage in the context of an ETL Batch.
54
+ #
55
+ # Tasks execution status is stored in a database etl task
56
+ # invocations table - this ensures tasks aren't run more than once
57
+ # within a batch.
58
+ def self.execute(stage, etl_batch, reextract, logger)
59
+ etl_batch.perform_task(:load, stage.name) do
60
+ logger.debug "Starting loading #{stage.name}"
61
+ stage.execute(etl_batch, reextract)
62
+ logger.debug "Finished loading #{stage.name}"
63
+ end
64
+ end
34
65
  end
35
66
  end
@@ -0,0 +1,35 @@
1
+ module Chicago
2
+ module Flow
3
+ # An endpoint that stores rows in an Array.
4
+ #
5
+ # @api public
6
+ class ArraySink < Sink
7
+ # Returns the array of written rows.
8
+ attr_reader :data
9
+
10
+ # The name of this sink
11
+ attr_reader :name
12
+
13
+ # Creates an ArraySink.
14
+ #
15
+ # Optionally you may pass an array of column names if you wish
16
+ # to use static validation that the correct columns are written
17
+ # through the pipeline.
18
+ def initialize(name, fields=[])
19
+ @name = name
20
+ @fields = [fields].flatten
21
+ @data = []
22
+ end
23
+
24
+ # See Sink#<<
25
+ def <<(row)
26
+ @data << row.merge(constant_values)
27
+ end
28
+
29
+ # See Sink#truncate
30
+ def truncate
31
+ @data.clear
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,15 @@
1
+ module Chicago
2
+ module Flow
3
+ # @api public
4
+ class ArraySource < PipelineEndpoint
5
+ def initialize(array, fields=[])
6
+ @fields = [fields].flatten
7
+ @array = array
8
+ end
9
+
10
+ def each
11
+ @array.each {|row| yield row }
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,23 @@
1
+ require 'sequel'
2
+ require 'sequel/fast_columns'
3
+
4
+ module Chicago
5
+ module Flow
6
+ # @api public
7
+ class DatasetSource < PipelineEndpoint
8
+ attr_reader :dataset
9
+
10
+ def initialize(dataset)
11
+ @dataset = dataset
12
+ end
13
+
14
+ def each
15
+ @dataset.each {|row| yield row }
16
+ end
17
+
18
+ def fields
19
+ @dataset.columns
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,14 @@
1
+ module Chicago
2
+ module Flow
3
+ # @api public
4
+ class Error < RuntimeError
5
+ end
6
+
7
+ # @api public
8
+ class RaisingErrorHandler
9
+ def unregistered_sinks(sinks)
10
+ raise Error.new("Sinks not registered: #{sinks.join(",")}")
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,15 @@
1
+ module Chicago
2
+ module Flow
3
+ # @api public
4
+ class Filter < Transformation
5
+ def initialize(stream=:default, &block)
6
+ super(stream)
7
+ @block = block || lambda {|row| false }
8
+ end
9
+
10
+ def process_row(row)
11
+ row if @block.call(row)
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,4 @@
1
+ require 'sequel'
2
+ require 'sequel/load_data_infile'
3
+ require 'chicago/flow/mysql_file_serializer'
4
+ require 'chicago/flow/mysql_file_sink'
@@ -1,12 +1,15 @@
1
+ require 'date'
2
+
1
3
  module Chicago
2
- module ETL
3
- class MysqlLoadFileValueTransformer
4
+ module Flow
5
+ # @api private
6
+ class MysqlFileSerializer
4
7
  # Transforms a value to be suitable for use in file in a LOAD
5
8
  # DATA INFILE mysql statement.
6
- def transform(value)
9
+ def serialize(value)
7
10
  case value
8
11
  when nil
9
- "\\N"
12
+ "NULL"
10
13
  when true
11
14
  "1"
12
15
  when false
@@ -0,0 +1,68 @@
1
+ require 'sequel'
2
+ require 'sequel/load_data_infile'
3
+ require 'tmpdir'
4
+
5
+ Sequel.extension :core_extensions
6
+
7
+ module Chicago
8
+ module Flow
9
+ # @api public
10
+ class MysqlFileSink < Sink
11
+ attr_reader :filepath
12
+ attr_writer :truncation_strategy
13
+
14
+ def initialize(db, table_name, fields, options = {})
15
+ @fields = [fields].flatten
16
+ @filepath = options[:filepath] || temp_file(table_name)
17
+ @serializer = MysqlFileSerializer.new
18
+ @db = db
19
+ @table_name = table_name
20
+ @insert_ignore = !!options[:ignore]
21
+ end
22
+
23
+ def name
24
+ @table_name
25
+ end
26
+
27
+ def <<(row)
28
+ csv << fields.map {|c| @serializer.serialize(row[c]) }
29
+ end
30
+
31
+ def close
32
+ csv.flush
33
+ load_from_file(filepath)
34
+ csv.close
35
+ File.unlink(filepath) if File.exists?(filepath)
36
+ end
37
+
38
+ # Loads data from the file into the MySQL table via LOAD DATA
39
+ # INFILE, if the file exists and has content.
40
+ def load_from_file(file)
41
+ return unless File.size?(file)
42
+ dataset.load_csv_infile(file, @fields, :set => constant_values)
43
+ end
44
+
45
+ def truncate
46
+ if @truncation_strategy
47
+ @truncation_strategy.call
48
+ else
49
+ @db[@table_name].truncate
50
+ end
51
+ end
52
+
53
+ private
54
+
55
+ def dataset
56
+ @insert_ignore ? @db[@table_name].insert_ignore : @db[@table_name]
57
+ end
58
+
59
+ def csv
60
+ @csv ||= CSV.open(filepath, "w")
61
+ end
62
+
63
+ def temp_file(table_name)
64
+ File.join(Dir.tmpdir, "#{table_name}.#{rand(1_000_000)}.csv")
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,8 @@
1
+ module Chicago
2
+ module Flow
3
+ # Supports the Sink interface, but discards all rows written to
4
+ # it.
5
+ class NullSink < Sink
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,15 @@
1
+ module Chicago
2
+ module Flow
3
+ # A Source or a Sink.
4
+ #
5
+ # @api public
6
+ # abstract
7
+ class PipelineEndpoint
8
+ attr_reader :fields
9
+
10
+ def has_defined_fields?
11
+ !fields.empty?
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,68 @@
1
+ module Chicago
2
+ module Flow
3
+ # Co-ordinates iterating over rows provided by a source, passing
4
+ # them through a transformation chain before writing them to
5
+ # sink(s).
6
+ #
7
+ # @api public
8
+ class PipelineStage
9
+ attr_reader :transformation_chain
10
+
11
+ def initialize(options={})
12
+ @sinks = options[:sinks] || {}
13
+ @transformations = options[:transformations] || []
14
+ @error_handler = options[:error_handler] || RaisingErrorHandler.new
15
+ @transformation_chain = TransformationChain.new(*@transformations)
16
+ end
17
+
18
+ # Returns the named sink, if it exists
19
+ def sink(name)
20
+ @sinks[name.to_sym]
21
+ end
22
+
23
+ def sinks
24
+ @sinks.values
25
+ end
26
+
27
+ def register_sink(name, sink)
28
+ @sinks[name.to_sym] = sink
29
+ self
30
+ end
31
+
32
+ def validate_pipeline
33
+ unless unregistered_sinks.empty?
34
+ @error_handler.unregistered_sinks(unregistered_sinks)
35
+ end
36
+ end
37
+
38
+ def execute(source)
39
+ validate_pipeline
40
+ sinks.each(&:open)
41
+ pipe_rows_to_sinks_from(source)
42
+ sinks.each(&:close)
43
+ end
44
+
45
+ def required_sinks
46
+ transformation_chain.output_streams | [:default]
47
+ end
48
+
49
+ def unregistered_sinks
50
+ required_sinks - @sinks.keys
51
+ end
52
+
53
+ private
54
+
55
+ def pipe_rows_to_sinks_from(source)
56
+ source.each do |row|
57
+ transformation_chain.process(row).each {|row| process_row(row) }
58
+ end
59
+ transformation_chain.flush.each {|row| process_row(row) }
60
+ end
61
+
62
+ def process_row(row)
63
+ stream = row.delete(:_stream) || :default
64
+ @sinks[stream] << row
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,53 @@
1
+ module Chicago
2
+ module Flow
3
+ # The destination for rows passing through a pipeline stage.
4
+ #
5
+ # @api public
6
+ # @abstract
7
+ class Sink < PipelineEndpoint
8
+ # Specifies a hash of values that are assumed to apply to all
9
+ # rows.
10
+ #
11
+ # Subclasses should use there constant values appropriately when
12
+ # writing rows, by merging them with the row or otherwise
13
+ # ensuring that they end up in the final source this sink
14
+ # represents.
15
+ def constant_values
16
+ @constant_values ||= {}
17
+ end
18
+
19
+ # Sets a number of constant values.
20
+ def set_constant_values(hash={})
21
+ constant_values.merge!(hash)
22
+ self
23
+ end
24
+
25
+ # Performs any operations before writing rows to this sink.
26
+ #
27
+ # By default does nothing; may be overridden by subclasses.
28
+ def open
29
+ end
30
+
31
+ # Performs any operations after writing rows to this sink.
32
+ #
33
+ # By default does nothing; may be overridden by subclasses.
34
+ def close
35
+ end
36
+
37
+ # Writes a row to this sink.
38
+ #
39
+ # By default does nothing; may be overridden by subclasses.
40
+ def <<(row)
41
+ end
42
+
43
+ # Removes all rows from this sink.
44
+ #
45
+ # This includes all rows written prior to this particular
46
+ # execution of a pipeline stage.
47
+ #
48
+ # By default does nothing; should be overritten by subclasses.
49
+ def truncate
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,169 @@
1
+ module Chicago
2
+ module Flow
3
+ # The key used to store the stream in the row.
4
+ #
5
+ # @api private
6
+ STREAM = :_stream
7
+
8
+ # A base class for row transformations.
9
+ #
10
+ # Transformations process hash-like rows by filtering or altering
11
+ # their contents.
12
+ #
13
+ # @api public
14
+ # @abstract Subclass and add a process_row method
15
+ class Transformation
16
+ # Creates the transformation.
17
+ #
18
+ # This should not be overridden by subclasses - transformations
19
+ # that need their own arguments should do so by passing named
20
+ # options.
21
+ #
22
+ # @overload initialize(stream, options)
23
+ # Specifies this transformation applies to a specific
24
+ # stream. Options are specific to the stream subclass
25
+ # @overload initialize(options)
26
+ # As above, but the stream is assumed to be :default
27
+ def initialize(*args)
28
+ stream, options = *args
29
+ if stream.kind_of?(Hash)
30
+ @stream = :default
31
+ @options = stream
32
+ else
33
+ @stream = stream || :default
34
+ @options = options || {}
35
+ end
36
+
37
+ ensure_options_present
38
+ end
39
+
40
+ # Returns the required initialization options for this transformation.
41
+ def self.required_options
42
+ @required_options ||= []
43
+ end
44
+
45
+ # Returns the fields added by this transformation.
46
+ def self.added_fields
47
+ @added_fields ||= []
48
+ end
49
+
50
+ # Returns the fields removed by this transformation.
51
+ def self.removed_fields
52
+ @removed_fields ||= []
53
+ end
54
+
55
+ # Specify which options are required in the constructor of
56
+ # this transformation.
57
+ def self.requires_options(*options)
58
+ required_options.concat options.flatten
59
+ end
60
+
61
+ # Specify which fields are added to the row by this
62
+ # transformation.
63
+ def self.adds_fields(*fields)
64
+ added_fields.concat fields.flatten
65
+ end
66
+
67
+ # Specify which fields are removed from the row by this
68
+ # transformation.
69
+ def self.removes_fields(*fields)
70
+ removed_fields.concat fields.flatten
71
+ end
72
+
73
+ # Returns the required initialization options for this transformation.
74
+ def required_options
75
+ self.class.required_options
76
+ end
77
+
78
+ # Returns the fields added by this transformation.
79
+ def added_fields
80
+ self.class.added_fields
81
+ end
82
+
83
+ # Returns the fields removed by this transformation.
84
+ def removed_fields
85
+ self.class.removed_fields
86
+ end
87
+
88
+ def upstream_fields(fields)
89
+ ((fields + removed_fields) - added_fields).uniq
90
+ end
91
+
92
+ def downstream_fields(fields)
93
+ ((fields - removed_fields) + added_fields).uniq
94
+ end
95
+
96
+ # Processes a row if the row is on this transformation's stream.
97
+ #
98
+ # This should not be overridden by subclasses, override
99
+ # process_row instead.
100
+ #
101
+ # @return Hash if a single row is returned
102
+ # @return Array<Hash> if multiple rows need to be returned
103
+ def process(row)
104
+ applies_to_stream?(row[STREAM]) ? process_row(row) : row
105
+ end
106
+
107
+ # Returns all remaining rows yet to make their way through the
108
+ # pipeline.
109
+ #
110
+ # This should be overridden by subclasses if the transformation
111
+ # holds back rows as it does processing (to find the maximum
112
+ # value in a set of rows for example), to ensure that all rows
113
+ # are written through the pipeline.
114
+ #
115
+ # @return Array<Hash> by default an empty array.
116
+ def flush
117
+ []
118
+ end
119
+
120
+ # Returns the streams to which this transformation may write
121
+ # rows.
122
+ #
123
+ # By default, transformations are assumed to write only to the
124
+ # :default stream. Override this in subclasses as necessary.
125
+ def output_streams
126
+ [:default]
127
+ end
128
+
129
+ # Returns true if this transformation should be applied to a row
130
+ # on the target stream.
131
+ def applies_to_stream?(target_stream)
132
+ @stream == :all ||
133
+ (target_stream.nil? && @stream == :default) ||
134
+ target_stream == @stream
135
+ end
136
+
137
+ protected
138
+
139
+ # Performs transformation on the row.
140
+ #
141
+ # By default does nothing; override in subclasses. Subclasses
142
+ # should return either nil, a Hash-like row or an Array of
143
+ # Hash-like rows.
144
+ def process_row(row)
145
+ row
146
+ end
147
+
148
+ # Assigns the row to a stream.
149
+ #
150
+ # Will raise an error if the stream is not declared by
151
+ # overriding output_streams.
152
+ def assign_stream(row, stream)
153
+ raise "Stream not declared" unless stream.nil? || output_streams.include?(stream)
154
+ row[STREAM] = stream if stream
155
+ row
156
+ end
157
+
158
+ private
159
+
160
+ def ensure_options_present
161
+ missing_keys = required_options - @options.keys
162
+
163
+ unless missing_keys.empty?
164
+ raise ArgumentError.new("The following options are not supplied: " + missing_keys.join(","))
165
+ end
166
+ end
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,40 @@
1
+ module Chicago
2
+ module Flow
3
+ # @api private
4
+ class TransformationChain
5
+ def initialize(*transforms)
6
+ @transforms = transforms
7
+ end
8
+
9
+ def output_streams
10
+ @transforms.inject([]) {|s, t| s | t.output_streams }
11
+ end
12
+
13
+ def process(row)
14
+ @transforms.inject([row]) do |rows, transform|
15
+ process_rows(rows, transform)
16
+ end
17
+ end
18
+
19
+ def flush
20
+ @transforms.inject([]) do |rows, transform|
21
+ process_rows(rows, transform) + transform.flush
22
+ end
23
+ end
24
+
25
+ def upstream_fields(fields)
26
+ @transforms.inject(fields) {|t| t.upstream_fields(fields) }
27
+ end
28
+
29
+ def downstream_fields(fields)
30
+ @transforms.inject(fields) {|t| t.downstream_fields(fields) }
31
+ end
32
+
33
+ private
34
+
35
+ def process_rows(rows, transform)
36
+ rows.map {|row| transform.process(row) }.flatten.compact
37
+ end
38
+ end
39
+ end
40
+ end
@@ -15,7 +15,8 @@ describe Chicago::ETL::Batch do
15
15
  end
16
16
 
17
17
  it "should set the start timestamp of the batch to now when created" do
18
- ETL::Batch.instance.start.started_at.to_i.should == Time.now.to_i
18
+ (ETL::Batch.instance.start.started_at.to_i - Time.now.to_i).abs.
19
+ should <= 5
19
20
  end
20
21
 
21
22
  it "should have a state of 'Started' when started" do
@@ -0,0 +1,13 @@
1
+ require 'spec_helper'
2
+
3
+ describe Hash do
4
+ it "should have a put method which returns the hash" do
5
+ {}.put(:a, 1).should == {:a => 1}
6
+ end
7
+
8
+ it "should have a modify existing method that ignores nil values" do
9
+ {:a => nil}.modify_existing(:a) {|v| 2 }.should == {:a => nil}
10
+ {:a => 1}.modify_existing(:a) {|v| 2 }.should == {:a => 2}
11
+ {}.modify_existing(:a) {|r| 2 }.should == {}
12
+ end
13
+ end