chicago-etl 0.0.13 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. data/Gemfile +8 -3
  2. data/README.rdoc +4 -1
  3. data/VERSION +1 -1
  4. data/chicago-etl.gemspec +59 -22
  5. data/chicago-flow.gemspec +92 -0
  6. data/lib/chicago/etl/batch.rb +9 -2
  7. data/lib/chicago/etl/core_extensions.rb +12 -0
  8. data/lib/chicago/etl/counter.rb +8 -1
  9. data/lib/chicago/etl/dataset_batch_stage.rb +52 -0
  10. data/lib/chicago/etl/key_builder.rb +17 -39
  11. data/lib/chicago/etl/load_dataset_builder.rb +3 -1
  12. data/lib/chicago/etl/load_pipeline_stage_builder.rb +142 -0
  13. data/lib/chicago/etl/pipeline.rb +151 -0
  14. data/lib/chicago/etl/schema_table_sink_factory.rb +74 -0
  15. data/lib/chicago/etl/screens/column_screen.rb +26 -25
  16. data/lib/chicago/etl/screens/invalid_element.rb +5 -5
  17. data/lib/chicago/etl/screens/missing_value.rb +4 -2
  18. data/lib/chicago/etl/screens/out_of_bounds.rb +2 -0
  19. data/lib/chicago/etl/table_builder.rb +4 -2
  20. data/lib/chicago/etl/task_invocation.rb +0 -1
  21. data/lib/chicago/etl/transformations.rb +128 -0
  22. data/lib/chicago/etl.rb +39 -8
  23. data/lib/chicago/flow/array_sink.rb +35 -0
  24. data/lib/chicago/flow/array_source.rb +15 -0
  25. data/lib/chicago/flow/dataset_source.rb +23 -0
  26. data/lib/chicago/flow/errors.rb +14 -0
  27. data/lib/chicago/flow/filter.rb +15 -0
  28. data/lib/chicago/flow/mysql.rb +4 -0
  29. data/lib/chicago/{etl/mysql_load_file_value_transformer.rb → flow/mysql_file_serializer.rb} +7 -4
  30. data/lib/chicago/flow/mysql_file_sink.rb +68 -0
  31. data/lib/chicago/flow/null_sink.rb +8 -0
  32. data/lib/chicago/flow/pipeline_endpoint.rb +15 -0
  33. data/lib/chicago/flow/pipeline_stage.rb +68 -0
  34. data/lib/chicago/flow/sink.rb +53 -0
  35. data/lib/chicago/flow/transformation.rb +169 -0
  36. data/lib/chicago/flow/transformation_chain.rb +40 -0
  37. data/spec/etl/batch_spec.rb +2 -1
  38. data/spec/etl/core_extensions_spec.rb +13 -0
  39. data/spec/etl/dataset_batch_stage_spec.rb +55 -0
  40. data/spec/etl/key_builder_spec.rb +25 -83
  41. data/spec/etl/pipeline_stage_builder_spec.rb +39 -0
  42. data/spec/etl/schema_table_sink_factory_spec.rb +69 -0
  43. data/spec/etl/screens/invalid_element_spec.rb +10 -11
  44. data/spec/etl/screens/missing_value_spec.rb +21 -21
  45. data/spec/etl/screens/out_of_bounds_spec.rb +21 -29
  46. data/spec/etl/transformations_spec.rb +109 -0
  47. data/spec/flow/array_sink_spec.rb +26 -0
  48. data/spec/flow/array_source_spec.rb +20 -0
  49. data/spec/flow/dataset_source_spec.rb +15 -0
  50. data/spec/flow/filter_spec.rb +13 -0
  51. data/spec/flow/mysql_file_serializer_spec.rb +27 -0
  52. data/spec/flow/mysql_file_sink_spec.rb +94 -0
  53. data/spec/flow/mysql_integration_spec.rb +72 -0
  54. data/spec/flow/pipeline_stage_spec.rb +89 -0
  55. data/spec/flow/transformation_chain_spec.rb +76 -0
  56. data/spec/flow/transformation_spec.rb +91 -0
  57. data/spec/spec_helper.rb +5 -0
  58. metadata +135 -39
  59. data/lib/chicago/etl/buffering_insert_writer.rb +0 -36
  60. data/lib/chicago/etl/mysql_dumpfile.rb +0 -32
  61. data/lib/chicago/etl/screens/composite_screen.rb +0 -17
  62. data/lib/chicago/etl/sequel/load_data_infile.rb +0 -141
  63. data/lib/chicago/etl/sink.rb +0 -61
  64. data/lib/chicago/etl/transformations/add_insert_timestamp.rb +0 -16
  65. data/spec/etl/mysql_dumpfile_spec.rb +0 -42
  66. data/spec/etl/mysql_load_file_value_transformer_spec.rb +0 -27
  67. data/spec/etl/screens/composite_screen_spec.rb +0 -25
  68. data/spec/etl/sequel/load_data_infile_expression_spec.rb +0 -60
  69. data/spec/etl/sequel/load_data_infile_spec.rb +0 -37
  70. data/spec/etl/sink_spec.rb +0 -7
  71. data/spec/etl/transformations/add_insert_timestamp_spec.rb +0 -9
data/lib/chicago/etl.rb CHANGED
@@ -1,35 +1,66 @@
1
+ if RUBY_VERSION.split(".")[1] < "9"
2
+ require 'fastercsv'
3
+ CSV = FasterCSV
4
+ else
5
+ require 'csv'
6
+ end
7
+
1
8
  require 'sequel'
9
+ require 'chicago/flow/errors'
10
+ require 'chicago/flow/transformation'
11
+ require 'chicago/flow/filter'
12
+ require 'chicago/flow/transformation_chain'
13
+ require 'chicago/flow/pipeline_stage'
14
+ require 'chicago/flow/pipeline_endpoint'
15
+ require 'chicago/flow/array_source'
16
+ require 'chicago/flow/dataset_source'
17
+ require 'chicago/flow/sink'
18
+ require 'chicago/flow/array_sink'
19
+ require 'chicago/flow/null_sink'
20
+ require 'chicago/flow/mysql'
2
21
 
22
+ require 'chicago/etl/core_extensions'
3
23
  require 'chicago/etl/counter'
4
24
  require 'chicago/etl/key_builder'
5
- require 'chicago/etl/sink'
6
- require 'chicago/etl/mysql_load_file_value_transformer'
7
- require 'chicago/etl/buffering_insert_writer'
8
- require 'chicago/etl/mysql_dumpfile'
9
-
25
+ require 'chicago/etl/schema_table_sink_factory'
26
+ require 'chicago/etl/transformations'
10
27
  require 'chicago/etl/load_dataset_builder'
28
+ require 'chicago/etl/dataset_batch_stage'
29
+ require 'chicago/etl/load_pipeline_stage_builder'
30
+ require 'chicago/etl/pipeline'
11
31
 
12
32
  # Sequel Extensions
13
33
  require 'chicago/etl/sequel/filter_to_etl_batch'
14
- require 'chicago/etl/sequel/load_data_infile'
15
34
  require 'chicago/etl/sequel/dependant_tables'
16
35
 
17
36
  # Screens
18
37
  require 'chicago/etl/screens/column_screen'
19
- require 'chicago/etl/screens/composite_screen'
20
38
  require 'chicago/etl/screens/missing_value'
21
39
  require 'chicago/etl/screens/invalid_element'
22
40
  require 'chicago/etl/screens/out_of_bounds'
23
41
 
24
42
  # Transformations
25
- require 'chicago/etl/transformations/add_insert_timestamp'
26
43
  require 'chicago/etl/transformations/uk_post_code'
27
44
  require 'chicago/etl/transformations/uk_post_code_field'
28
45
 
29
46
  module Chicago
47
+ # Contains classes related to ETL processing.
30
48
  module ETL
31
49
  autoload :TableBuilder, 'chicago/etl/table_builder.rb'
32
50
  autoload :Batch, 'chicago/etl/batch.rb'
33
51
  autoload :TaskInvocation, 'chicago/etl/task_invocation.rb'
52
+
53
+ # Executes a pipeline stage in the context of an ETL Batch.
54
+ #
55
+ # Tasks execution status is stored in a database etl task
56
+ # invocations table - this ensures tasks aren't run more than once
57
+ # within a batch.
58
+ def self.execute(stage, etl_batch, reextract, logger)
59
+ etl_batch.perform_task(:load, stage.name) do
60
+ logger.debug "Starting loading #{stage.name}"
61
+ stage.execute(etl_batch, reextract)
62
+ logger.debug "Finished loading #{stage.name}"
63
+ end
64
+ end
34
65
  end
35
66
  end
@@ -0,0 +1,35 @@
1
+ module Chicago
2
+ module Flow
3
+ # An endpoint that stores rows in an Array.
4
+ #
5
+ # @api public
6
+ class ArraySink < Sink
7
+ # Returns the array of written rows.
8
+ attr_reader :data
9
+
10
+ # The name of this sink
11
+ attr_reader :name
12
+
13
+ # Creates an ArraySink.
14
+ #
15
+ # Optionally you may pass an array of column names if you wish
16
+ # to use static validation that the correct columns are written
17
+ # through the pipeline.
18
+ def initialize(name, fields=[])
19
+ @name = name
20
+ @fields = [fields].flatten
21
+ @data = []
22
+ end
23
+
24
+ # See Sink#<<
25
+ def <<(row)
26
+ @data << row.merge(constant_values)
27
+ end
28
+
29
+ # See Sink#truncate
30
+ def truncate
31
+ @data.clear
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,15 @@
1
+ module Chicago
2
+ module Flow
3
+ # @api public
4
+ class ArraySource < PipelineEndpoint
5
+ def initialize(array, fields=[])
6
+ @fields = [fields].flatten
7
+ @array = array
8
+ end
9
+
10
+ def each
11
+ @array.each {|row| yield row }
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,23 @@
1
+ require 'sequel'
2
+ require 'sequel/fast_columns'
3
+
4
+ module Chicago
5
+ module Flow
6
+ # @api public
7
+ class DatasetSource < PipelineEndpoint
8
+ attr_reader :dataset
9
+
10
+ def initialize(dataset)
11
+ @dataset = dataset
12
+ end
13
+
14
+ def each
15
+ @dataset.each {|row| yield row }
16
+ end
17
+
18
+ def fields
19
+ @dataset.columns
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,14 @@
1
+ module Chicago
2
+ module Flow
3
+ # @api public
4
+ class Error < RuntimeError
5
+ end
6
+
7
+ # @api public
8
+ class RaisingErrorHandler
9
+ def unregistered_sinks(sinks)
10
+ raise Error.new("Sinks not registered: #{sinks.join(",")}")
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,15 @@
1
+ module Chicago
2
+ module Flow
3
+ # @api public
4
+ class Filter < Transformation
5
+ def initialize(stream=:default, &block)
6
+ super(stream)
7
+ @block = block || lambda {|row| false }
8
+ end
9
+
10
+ def process_row(row)
11
+ row if @block.call(row)
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,4 @@
1
+ require 'sequel'
2
+ require 'sequel/load_data_infile'
3
+ require 'chicago/flow/mysql_file_serializer'
4
+ require 'chicago/flow/mysql_file_sink'
@@ -1,12 +1,15 @@
1
+ require 'date'
2
+
1
3
  module Chicago
2
- module ETL
3
- class MysqlLoadFileValueTransformer
4
+ module Flow
5
+ # @api private
6
+ class MysqlFileSerializer
4
7
  # Transforms a value to be suitable for use in file in a LOAD
5
8
  # DATA INFILE mysql statement.
6
- def transform(value)
9
+ def serialize(value)
7
10
  case value
8
11
  when nil
9
- "\\N"
12
+ "NULL"
10
13
  when true
11
14
  "1"
12
15
  when false
@@ -0,0 +1,68 @@
1
+ require 'sequel'
2
+ require 'sequel/load_data_infile'
3
+ require 'tmpdir'
4
+
5
+ Sequel.extension :core_extensions
6
+
7
+ module Chicago
8
+ module Flow
9
+ # @api public
10
+ class MysqlFileSink < Sink
11
+ attr_reader :filepath
12
+ attr_writer :truncation_strategy
13
+
14
+ def initialize(db, table_name, fields, options = {})
15
+ @fields = [fields].flatten
16
+ @filepath = options[:filepath] || temp_file(table_name)
17
+ @serializer = MysqlFileSerializer.new
18
+ @db = db
19
+ @table_name = table_name
20
+ @insert_ignore = !!options[:ignore]
21
+ end
22
+
23
+ def name
24
+ @table_name
25
+ end
26
+
27
+ def <<(row)
28
+ csv << fields.map {|c| @serializer.serialize(row[c]) }
29
+ end
30
+
31
+ def close
32
+ csv.flush
33
+ load_from_file(filepath)
34
+ csv.close
35
+ File.unlink(filepath) if File.exists?(filepath)
36
+ end
37
+
38
+ # Loads data from the file into the MySQL table via LOAD DATA
39
+ # INFILE, if the file exists and has content.
40
+ def load_from_file(file)
41
+ return unless File.size?(file)
42
+ dataset.load_csv_infile(file, @fields, :set => constant_values)
43
+ end
44
+
45
+ def truncate
46
+ if @truncation_strategy
47
+ @truncation_strategy.call
48
+ else
49
+ @db[@table_name].truncate
50
+ end
51
+ end
52
+
53
+ private
54
+
55
+ def dataset
56
+ @insert_ignore ? @db[@table_name].insert_ignore : @db[@table_name]
57
+ end
58
+
59
+ def csv
60
+ @csv ||= CSV.open(filepath, "w")
61
+ end
62
+
63
+ def temp_file(table_name)
64
+ File.join(Dir.tmpdir, "#{table_name}.#{rand(1_000_000)}.csv")
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,8 @@
1
+ module Chicago
2
+ module Flow
3
+ # Supports the Sink interface, but discards all rows written to
4
+ # it.
5
+ class NullSink < Sink
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,15 @@
1
+ module Chicago
2
+ module Flow
3
+ # A Source or a Sink.
4
+ #
5
+ # @api public
6
+ # abstract
7
+ class PipelineEndpoint
8
+ attr_reader :fields
9
+
10
+ def has_defined_fields?
11
+ !fields.empty?
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,68 @@
1
+ module Chicago
2
+ module Flow
3
+ # Co-ordinates iterating over rows provided by a source, passing
4
+ # them through a transformation chain before writing them to
5
+ # sink(s).
6
+ #
7
+ # @api public
8
+ class PipelineStage
9
+ attr_reader :transformation_chain
10
+
11
+ def initialize(options={})
12
+ @sinks = options[:sinks] || {}
13
+ @transformations = options[:transformations] || []
14
+ @error_handler = options[:error_handler] || RaisingErrorHandler.new
15
+ @transformation_chain = TransformationChain.new(*@transformations)
16
+ end
17
+
18
+ # Returns the named sink, if it exists
19
+ def sink(name)
20
+ @sinks[name.to_sym]
21
+ end
22
+
23
+ def sinks
24
+ @sinks.values
25
+ end
26
+
27
+ def register_sink(name, sink)
28
+ @sinks[name.to_sym] = sink
29
+ self
30
+ end
31
+
32
+ def validate_pipeline
33
+ unless unregistered_sinks.empty?
34
+ @error_handler.unregistered_sinks(unregistered_sinks)
35
+ end
36
+ end
37
+
38
+ def execute(source)
39
+ validate_pipeline
40
+ sinks.each(&:open)
41
+ pipe_rows_to_sinks_from(source)
42
+ sinks.each(&:close)
43
+ end
44
+
45
+ def required_sinks
46
+ transformation_chain.output_streams | [:default]
47
+ end
48
+
49
+ def unregistered_sinks
50
+ required_sinks - @sinks.keys
51
+ end
52
+
53
+ private
54
+
55
+ def pipe_rows_to_sinks_from(source)
56
+ source.each do |row|
57
+ transformation_chain.process(row).each {|row| process_row(row) }
58
+ end
59
+ transformation_chain.flush.each {|row| process_row(row) }
60
+ end
61
+
62
+ def process_row(row)
63
+ stream = row.delete(:_stream) || :default
64
+ @sinks[stream] << row
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,53 @@
1
+ module Chicago
2
+ module Flow
3
+ # The destination for rows passing through a pipeline stage.
4
+ #
5
+ # @api public
6
+ # @abstract
7
+ class Sink < PipelineEndpoint
8
+ # Specifies a hash of values that are assumed to apply to all
9
+ # rows.
10
+ #
11
+ # Subclasses should use there constant values appropriately when
12
+ # writing rows, by merging them with the row or otherwise
13
+ # ensuring that they end up in the final source this sink
14
+ # represents.
15
+ def constant_values
16
+ @constant_values ||= {}
17
+ end
18
+
19
+ # Sets a number of constant values.
20
+ def set_constant_values(hash={})
21
+ constant_values.merge!(hash)
22
+ self
23
+ end
24
+
25
+ # Performs any operations before writing rows to this sink.
26
+ #
27
+ # By default does nothing; may be overridden by subclasses.
28
+ def open
29
+ end
30
+
31
+ # Performs any operations after writing rows to this sink.
32
+ #
33
+ # By default does nothing; may be overridden by subclasses.
34
+ def close
35
+ end
36
+
37
+ # Writes a row to this sink.
38
+ #
39
+ # By default does nothing; may be overridden by subclasses.
40
+ def <<(row)
41
+ end
42
+
43
+ # Removes all rows from this sink.
44
+ #
45
+ # This includes all rows written prior to this particular
46
+ # execution of a pipeline stage.
47
+ #
48
+ # By default does nothing; should be overritten by subclasses.
49
+ def truncate
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,169 @@
1
+ module Chicago
2
+ module Flow
3
+ # The key used to store the stream in the row.
4
+ #
5
+ # @api private
6
+ STREAM = :_stream
7
+
8
+ # A base class for row transformations.
9
+ #
10
+ # Transformations process hash-like rows by filtering or altering
11
+ # their contents.
12
+ #
13
+ # @api public
14
+ # @abstract Subclass and add a process_row method
15
+ class Transformation
16
+ # Creates the transformation.
17
+ #
18
+ # This should not be overridden by subclasses - transformations
19
+ # that need their own arguments should do so by passing named
20
+ # options.
21
+ #
22
+ # @overload initialize(stream, options)
23
+ # Specifies this transformation applies to a specific
24
+ # stream. Options are specific to the stream subclass
25
+ # @overload initialize(options)
26
+ # As above, but the stream is assumed to be :default
27
+ def initialize(*args)
28
+ stream, options = *args
29
+ if stream.kind_of?(Hash)
30
+ @stream = :default
31
+ @options = stream
32
+ else
33
+ @stream = stream || :default
34
+ @options = options || {}
35
+ end
36
+
37
+ ensure_options_present
38
+ end
39
+
40
+ # Returns the required initialization options for this transformation.
41
+ def self.required_options
42
+ @required_options ||= []
43
+ end
44
+
45
+ # Returns the fields added by this transformation.
46
+ def self.added_fields
47
+ @added_fields ||= []
48
+ end
49
+
50
+ # Returns the fields removed by this transformation.
51
+ def self.removed_fields
52
+ @removed_fields ||= []
53
+ end
54
+
55
+ # Specify which options are required in the constructor of
56
+ # this transformation.
57
+ def self.requires_options(*options)
58
+ required_options.concat options.flatten
59
+ end
60
+
61
+ # Specify which fields are added to the row by this
62
+ # transformation.
63
+ def self.adds_fields(*fields)
64
+ added_fields.concat fields.flatten
65
+ end
66
+
67
+ # Specify which fields are removed from the row by this
68
+ # transformation.
69
+ def self.removes_fields(*fields)
70
+ removed_fields.concat fields.flatten
71
+ end
72
+
73
+ # Returns the required initialization options for this transformation.
74
+ def required_options
75
+ self.class.required_options
76
+ end
77
+
78
+ # Returns the fields added by this transformation.
79
+ def added_fields
80
+ self.class.added_fields
81
+ end
82
+
83
+ # Returns the fields removed by this transformation.
84
+ def removed_fields
85
+ self.class.removed_fields
86
+ end
87
+
88
+ def upstream_fields(fields)
89
+ ((fields + removed_fields) - added_fields).uniq
90
+ end
91
+
92
+ def downstream_fields(fields)
93
+ ((fields - removed_fields) + added_fields).uniq
94
+ end
95
+
96
+ # Processes a row if the row is on this transformation's stream.
97
+ #
98
+ # This should not be overridden by subclasses, override
99
+ # process_row instead.
100
+ #
101
+ # @return Hash if a single row is returned
102
+ # @return Array<Hash> if multiple rows need to be returned
103
+ def process(row)
104
+ applies_to_stream?(row[STREAM]) ? process_row(row) : row
105
+ end
106
+
107
+ # Returns all remaining rows yet to make their way through the
108
+ # pipeline.
109
+ #
110
+ # This should be overridden by subclasses if the transformation
111
+ # holds back rows as it does processing (to find the maximum
112
+ # value in a set of rows for example), to ensure that all rows
113
+ # are written through the pipeline.
114
+ #
115
+ # @return Array<Hash> by default an empty array.
116
+ def flush
117
+ []
118
+ end
119
+
120
+ # Returns the streams to which this transformation may write
121
+ # rows.
122
+ #
123
+ # By default, transformations are assumed to write only to the
124
+ # :default stream. Override this in subclasses as necessary.
125
+ def output_streams
126
+ [:default]
127
+ end
128
+
129
+ # Returns true if this transformation should be applied to a row
130
+ # on the target stream.
131
+ def applies_to_stream?(target_stream)
132
+ @stream == :all ||
133
+ (target_stream.nil? && @stream == :default) ||
134
+ target_stream == @stream
135
+ end
136
+
137
+ protected
138
+
139
+ # Performs transformation on the row.
140
+ #
141
+ # By default does nothing; override in subclasses. Subclasses
142
+ # should return either nil, a Hash-like row or an Array of
143
+ # Hash-like rows.
144
+ def process_row(row)
145
+ row
146
+ end
147
+
148
+ # Assigns the row to a stream.
149
+ #
150
+ # Will raise an error if the stream is not declared by
151
+ # overriding output_streams.
152
+ def assign_stream(row, stream)
153
+ raise "Stream not declared" unless stream.nil? || output_streams.include?(stream)
154
+ row[STREAM] = stream if stream
155
+ row
156
+ end
157
+
158
+ private
159
+
160
+ def ensure_options_present
161
+ missing_keys = required_options - @options.keys
162
+
163
+ unless missing_keys.empty?
164
+ raise ArgumentError.new("The following options are not supplied: " + missing_keys.join(","))
165
+ end
166
+ end
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,40 @@
1
+ module Chicago
2
+ module Flow
3
+ # @api private
4
+ class TransformationChain
5
+ def initialize(*transforms)
6
+ @transforms = transforms
7
+ end
8
+
9
+ def output_streams
10
+ @transforms.inject([]) {|s, t| s | t.output_streams }
11
+ end
12
+
13
+ def process(row)
14
+ @transforms.inject([row]) do |rows, transform|
15
+ process_rows(rows, transform)
16
+ end
17
+ end
18
+
19
+ def flush
20
+ @transforms.inject([]) do |rows, transform|
21
+ process_rows(rows, transform) + transform.flush
22
+ end
23
+ end
24
+
25
+ def upstream_fields(fields)
26
+ @transforms.inject(fields) {|t| t.upstream_fields(fields) }
27
+ end
28
+
29
+ def downstream_fields(fields)
30
+ @transforms.inject(fields) {|t| t.downstream_fields(fields) }
31
+ end
32
+
33
+ private
34
+
35
+ def process_rows(rows, transform)
36
+ rows.map {|row| transform.process(row) }.flatten.compact
37
+ end
38
+ end
39
+ end
40
+ end
@@ -15,7 +15,8 @@ describe Chicago::ETL::Batch do
15
15
  end
16
16
 
17
17
  it "should set the start timestamp of the batch to now when created" do
18
- ETL::Batch.instance.start.started_at.to_i.should == Time.now.to_i
18
+ (ETL::Batch.instance.start.started_at.to_i - Time.now.to_i).abs.
19
+ should <= 5
19
20
  end
20
21
 
21
22
  it "should have a state of 'Started' when started" do
@@ -0,0 +1,13 @@
1
+ require 'spec_helper'
2
+
3
+ describe Hash do
4
+ it "should have a put method which returns the hash" do
5
+ {}.put(:a, 1).should == {:a => 1}
6
+ end
7
+
8
+ it "should have a modify existing method that ignores nil values" do
9
+ {:a => nil}.modify_existing(:a) {|v| 2 }.should == {:a => nil}
10
+ {:a => 1}.modify_existing(:a) {|v| 2 }.should == {:a => 2}
11
+ {}.modify_existing(:a) {|r| 2 }.should == {}
12
+ end
13
+ end