chicago-etl 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. data/VERSION +1 -1
  2. data/chicago-etl.gemspec +24 -24
  3. data/lib/chicago/{flow → etl}/array_sink.rb +1 -1
  4. data/lib/chicago/{flow → etl}/array_source.rb +1 -1
  5. data/lib/chicago/etl/dataset_batch_stage.rb +0 -1
  6. data/lib/chicago/{flow → etl}/dataset_source.rb +1 -1
  7. data/lib/chicago/{flow → etl}/errors.rb +1 -1
  8. data/lib/chicago/{flow → etl}/filter.rb +1 -1
  9. data/lib/chicago/etl/mysql.rb +4 -0
  10. data/lib/chicago/{flow → etl}/mysql_file_serializer.rb +1 -1
  11. data/lib/chicago/{flow → etl}/mysql_file_sink.rb +1 -1
  12. data/lib/chicago/{flow → etl}/null_sink.rb +1 -1
  13. data/lib/chicago/{flow → etl}/pipeline_endpoint.rb +1 -1
  14. data/lib/chicago/etl/schema_sinks_and_transformations_builder.rb +1 -1
  15. data/lib/chicago/etl/schema_table_sink_factory.rb +11 -11
  16. data/lib/chicago/etl/screens/column_screen.rb +1 -1
  17. data/lib/chicago/{flow → etl}/sink.rb +1 -1
  18. data/lib/chicago/etl/stage.rb +2 -3
  19. data/lib/chicago/{flow → etl}/transformation.rb +1 -1
  20. data/lib/chicago/{flow → etl}/transformation_chain.rb +1 -1
  21. data/lib/chicago/etl/transformations/deduplicate_rows.rb +1 -1
  22. data/lib/chicago/etl/transformations.rb +5 -5
  23. data/lib/chicago/etl.rb +14 -11
  24. data/spec/{flow → etl}/array_sink_spec.rb +1 -1
  25. data/spec/{flow → etl}/array_source_spec.rb +1 -1
  26. data/spec/{flow → etl}/dataset_source_spec.rb +1 -1
  27. data/spec/etl/define_stage_spec.rb +6 -6
  28. data/spec/{flow → etl}/filter_spec.rb +1 -1
  29. data/spec/{flow → etl}/mysql_file_serializer_spec.rb +1 -1
  30. data/spec/{flow → etl}/mysql_file_sink_spec.rb +2 -2
  31. data/spec/{flow → etl}/mysql_integration_spec.rb +4 -4
  32. data/spec/etl/schema_table_sink_factory_spec.rb +1 -1
  33. data/spec/{flow → etl}/transformation_chain_spec.rb +6 -6
  34. data/spec/{flow → etl}/transformation_spec.rb +3 -3
  35. data/spec/etl/transformations_spec.rb +1 -1
  36. metadata +27 -27
  37. data/lib/chicago/flow/mysql.rb +0 -4
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.4
1
+ 0.2.0
data/chicago-etl.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "chicago-etl"
8
- s.version = "0.1.4"
8
+ s.version = "0.2.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Roland Swingler"]
12
- s.date = "2013-11-13"
12
+ s.date = "2013-11-18"
13
13
  s.description = "ETL tools for Chicago"
14
14
  s.email = "roland.swingler@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -28,14 +28,24 @@ Gem::Specification.new do |s|
28
28
  "chicago-flow.gemspec",
29
29
  "lib/chicago-etl.rb",
30
30
  "lib/chicago/etl.rb",
31
+ "lib/chicago/etl/array_sink.rb",
32
+ "lib/chicago/etl/array_source.rb",
31
33
  "lib/chicago/etl/batch.rb",
32
34
  "lib/chicago/etl/core_extensions.rb",
33
35
  "lib/chicago/etl/counter.rb",
34
36
  "lib/chicago/etl/dataset_batch_stage.rb",
35
37
  "lib/chicago/etl/dataset_builder.rb",
38
+ "lib/chicago/etl/dataset_source.rb",
39
+ "lib/chicago/etl/errors.rb",
40
+ "lib/chicago/etl/filter.rb",
36
41
  "lib/chicago/etl/key_builder.rb",
37
42
  "lib/chicago/etl/load_dataset_builder.rb",
43
+ "lib/chicago/etl/mysql.rb",
44
+ "lib/chicago/etl/mysql_file_serializer.rb",
45
+ "lib/chicago/etl/mysql_file_sink.rb",
46
+ "lib/chicago/etl/null_sink.rb",
38
47
  "lib/chicago/etl/pipeline.rb",
48
+ "lib/chicago/etl/pipeline_endpoint.rb",
39
49
  "lib/chicago/etl/schema_sinks_and_transformations_builder.rb",
40
50
  "lib/chicago/etl/schema_table_sink_factory.rb",
41
51
  "lib/chicago/etl/screens/column_screen.rb",
@@ -44,37 +54,34 @@ Gem::Specification.new do |s|
44
54
  "lib/chicago/etl/screens/out_of_bounds.rb",
45
55
  "lib/chicago/etl/sequel/dependant_tables.rb",
46
56
  "lib/chicago/etl/sequel/filter_to_etl_batch.rb",
57
+ "lib/chicago/etl/sink.rb",
47
58
  "lib/chicago/etl/stage.rb",
48
59
  "lib/chicago/etl/stage_builder.rb",
49
60
  "lib/chicago/etl/table_builder.rb",
50
61
  "lib/chicago/etl/task_invocation.rb",
51
62
  "lib/chicago/etl/tasks.rb",
63
+ "lib/chicago/etl/transformation.rb",
64
+ "lib/chicago/etl/transformation_chain.rb",
52
65
  "lib/chicago/etl/transformations.rb",
53
66
  "lib/chicago/etl/transformations/deduplicate_rows.rb",
54
67
  "lib/chicago/etl/transformations/uk_post_code.rb",
55
68
  "lib/chicago/etl/transformations/uk_post_code_field.rb",
56
- "lib/chicago/flow/array_sink.rb",
57
- "lib/chicago/flow/array_source.rb",
58
- "lib/chicago/flow/dataset_source.rb",
59
- "lib/chicago/flow/errors.rb",
60
- "lib/chicago/flow/filter.rb",
61
- "lib/chicago/flow/mysql.rb",
62
- "lib/chicago/flow/mysql_file_serializer.rb",
63
- "lib/chicago/flow/mysql_file_sink.rb",
64
- "lib/chicago/flow/null_sink.rb",
65
- "lib/chicago/flow/pipeline_endpoint.rb",
66
- "lib/chicago/flow/sink.rb",
67
- "lib/chicago/flow/transformation.rb",
68
- "lib/chicago/flow/transformation_chain.rb",
69
69
  "spec/db_connections.yml.dist",
70
+ "spec/etl/array_sink_spec.rb",
71
+ "spec/etl/array_source_spec.rb",
70
72
  "spec/etl/batch_spec.rb",
71
73
  "spec/etl/core_extensions_spec.rb",
72
74
  "spec/etl/counter_spec.rb",
75
+ "spec/etl/dataset_source_spec.rb",
73
76
  "spec/etl/define_dimension_stage_spec.rb",
74
77
  "spec/etl/define_stage_spec.rb",
75
78
  "spec/etl/etl_batch_id_dataset_filter.rb",
79
+ "spec/etl/filter_spec.rb",
76
80
  "spec/etl/key_builder_spec.rb",
77
81
  "spec/etl/load_dataset_builder_spec.rb",
82
+ "spec/etl/mysql_file_serializer_spec.rb",
83
+ "spec/etl/mysql_file_sink_spec.rb",
84
+ "spec/etl/mysql_integration_spec.rb",
78
85
  "spec/etl/pipeline_stage_builder_spec.rb",
79
86
  "spec/etl/schema_table_sink_factory_spec.rb",
80
87
  "spec/etl/screens/invalid_element_spec.rb",
@@ -85,19 +92,12 @@ Gem::Specification.new do |s|
85
92
  "spec/etl/stage_spec.rb",
86
93
  "spec/etl/table_builder_spec.rb",
87
94
  "spec/etl/task_spec.rb",
95
+ "spec/etl/transformation_chain_spec.rb",
96
+ "spec/etl/transformation_spec.rb",
88
97
  "spec/etl/transformations/deduplicate_rows_spec.rb",
89
98
  "spec/etl/transformations/uk_post_code_field_spec.rb",
90
99
  "spec/etl/transformations/uk_post_code_spec.rb",
91
100
  "spec/etl/transformations_spec.rb",
92
- "spec/flow/array_sink_spec.rb",
93
- "spec/flow/array_source_spec.rb",
94
- "spec/flow/dataset_source_spec.rb",
95
- "spec/flow/filter_spec.rb",
96
- "spec/flow/mysql_file_serializer_spec.rb",
97
- "spec/flow/mysql_file_sink_spec.rb",
98
- "spec/flow/mysql_integration_spec.rb",
99
- "spec/flow/transformation_chain_spec.rb",
100
- "spec/flow/transformation_spec.rb",
101
101
  "spec/spec_helper.rb"
102
102
  ]
103
103
  s.homepage = "http://github.com/notonthehighstreet/chicago-etl"
@@ -1,5 +1,5 @@
1
1
  module Chicago
2
- module Flow
2
+ module ETL
3
3
  # An endpoint that stores rows in an Array.
4
4
  #
5
5
  # @api public
@@ -1,5 +1,5 @@
1
1
  module Chicago
2
- module Flow
2
+ module ETL
3
3
  # @api public
4
4
  class ArraySource < PipelineEndpoint
5
5
  def initialize(array, fields=[])
@@ -11,7 +11,6 @@ module Chicago
11
11
  super
12
12
  @filter_strategy = options[:filter_strategy] ||
13
13
  lambda { |dataset, etl_batch| @source.filter_to_etl_batch(etl_batch)}
14
- @truncate_pre_load = !!options[:truncate_pre_load]
15
14
  end
16
15
 
17
16
  # Executes this ETL stage.
@@ -2,7 +2,7 @@ require 'sequel'
2
2
  require 'sequel/fast_columns'
3
3
 
4
4
  module Chicago
5
- module Flow
5
+ module ETL
6
6
  # @api public
7
7
  class DatasetSource < PipelineEndpoint
8
8
  attr_reader :dataset
@@ -1,5 +1,5 @@
1
1
  module Chicago
2
- module Flow
2
+ module ETL
3
3
  # @api public
4
4
  class Error < RuntimeError
5
5
  end
@@ -1,5 +1,5 @@
1
1
  module Chicago
2
- module Flow
2
+ module ETL
3
3
  # @api public
4
4
  class Filter < Transformation
5
5
  def initialize(stream=:default, &block)
@@ -0,0 +1,4 @@
1
+ require 'sequel'
2
+ require 'sequel/load_data_infile'
3
+ require 'chicago/etl/mysql_file_serializer'
4
+ require 'chicago/etl/mysql_file_sink'
@@ -1,7 +1,7 @@
1
1
  require 'date'
2
2
 
3
3
  module Chicago
4
- module Flow
4
+ module ETL
5
5
  # @api private
6
6
  class MysqlFileSerializer
7
7
  # Transforms a value to be suitable for use in file in a LOAD
@@ -5,7 +5,7 @@ require 'tmpdir'
5
5
  Sequel.extension :core_extensions
6
6
 
7
7
  module Chicago
8
- module Flow
8
+ module ETL
9
9
  # @api public
10
10
  class MysqlFileSink < Sink
11
11
  attr_reader :filepath
@@ -1,5 +1,5 @@
1
1
  module Chicago
2
- module Flow
2
+ module ETL
3
3
  # Supports the Sink interface, but discards all rows written to
4
4
  # it.
5
5
  class NullSink < Sink
@@ -1,5 +1,5 @@
1
1
  module Chicago
2
- module Flow
2
+ module ETL
3
3
  # A Source or a Sink.
4
4
  #
5
5
  # @api public
@@ -88,7 +88,7 @@ module Chicago
88
88
  @sink_factory.key_sink
89
89
  else
90
90
  # Facts have no key table to write to.
91
- Flow::NullSink.new
91
+ NullSink.new
92
92
  end
93
93
 
94
94
  {
@@ -13,10 +13,10 @@ module Chicago
13
13
  # Pass an :exclude option if you don't want all columns of the
14
14
  # schema table to be loaded via this sink.
15
15
  def sink(options={})
16
- Flow::MysqlFileSink.new(@db,
17
- @schema_table.table_name,
18
- load_columns(options[:exclude]),
19
- mysql_options(options))
16
+ MysqlFileSink.new(@db,
17
+ @schema_table.table_name,
18
+ load_columns(options[:exclude]),
19
+ mysql_options(options))
20
20
  end
21
21
 
22
22
  # Returns a sink to load data into the MySQL table backing the
@@ -26,20 +26,20 @@ module Chicago
26
26
  # schema table's key table name will be used otherwise.
27
27
  def key_sink(options={})
28
28
  table = options.delete(:table) || @schema_table.key_table_name
29
- sink = Flow::MysqlFileSink.new(@db,
30
- table,
31
- [:original_id, :dimension_id],
32
- mysql_options(options))
29
+ sink = MysqlFileSink.new(@db,
30
+ table,
31
+ [:original_id, :dimension_id],
32
+ mysql_options(options))
33
33
  sink.truncation_strategy = lambda do
34
34
  # No Op - we want to maintain keys to avoid having to sort
35
35
  # out fact tables.
36
36
  end
37
37
  sink
38
38
  end
39
-
39
+
40
40
  # Returns a sink to load errors generated in the ETL process.
41
41
  def error_sink(options={})
42
- sink = Flow::MysqlFileSink.
42
+ sink = MysqlFileSink.
43
43
  new(@db, :etl_error_log,
44
44
  [:column, :row_id, :error, :severity, :error_detail], mysql_options(options)).
45
45
  set_constant_values(:table => @schema_table.table_name.to_s,
@@ -53,7 +53,7 @@ module Chicago
53
53
  end
54
54
  sink
55
55
  end
56
-
56
+
57
57
  private
58
58
 
59
59
  def load_columns(exclude=nil)
@@ -2,7 +2,7 @@ module Chicago
2
2
  module ETL
3
3
  module Screens
4
4
  # @abstract
5
- class ColumnScreen < Flow::Transformation
5
+ class ColumnScreen < Transformation
6
6
  def self.for_columns(columns)
7
7
  columns.map {|column|
8
8
  new(:default, :column => column)
@@ -1,5 +1,5 @@
1
1
  module Chicago
2
- module Flow
2
+ module ETL
3
3
  # The destination for rows passing through a pipeline stage.
4
4
  #
5
5
  # @api public
@@ -39,7 +39,7 @@ module Chicago
39
39
  filtered_dataset = reextract ? source :
40
40
  @filter_strategy.call(source, etl_batch)
41
41
 
42
- Chicago::Flow::DatasetSource.new(filtered_dataset)
42
+ DatasetSource.new(filtered_dataset)
43
43
  end
44
44
 
45
45
  private
@@ -58,8 +58,7 @@ module Chicago
58
58
  end
59
59
 
60
60
  def transformation_chain
61
- @transformation_chain ||= Chicago::Flow::TransformationChain.
62
- new(*@transformations)
61
+ @transformation_chain ||= TransformationChain.new(*@transformations)
63
62
  end
64
63
 
65
64
  def process_row(row)
@@ -1,5 +1,5 @@
1
1
  module Chicago
2
- module Flow
2
+ module ETL
3
3
  # The key used to store the stream in the row.
4
4
  #
5
5
  # @api private
@@ -1,5 +1,5 @@
1
1
  module Chicago
2
- module Flow
2
+ module ETL
3
3
  # @api private
4
4
  class TransformationChain
5
5
  def initialize(*transforms)
@@ -1,6 +1,6 @@
1
1
  module Chicago
2
2
  module ETL
3
- class DeduplicateRows < Chicago::Flow::Transformation
3
+ class DeduplicateRows < Transformation
4
4
  def process_row(row)
5
5
  if @working_row.nil?
6
6
  @working_row = new_row(row)
@@ -2,7 +2,7 @@ module Chicago
2
2
  module ETL
3
3
  module Transformations
4
4
  # Filters rows so they only get output once, based on a :key.
5
- class WrittenRowFilter < Flow::Transformation
5
+ class WrittenRowFilter < Transformation
6
6
  requires_options :key
7
7
 
8
8
  def initialize(*args)
@@ -30,7 +30,7 @@ module Chicago
30
30
  # :_errors field.
31
31
  #
32
32
  # Pass the :key_builder option to set the KeyBuilder.
33
- class AddKey < Flow::Transformation
33
+ class AddKey < Transformation
34
34
  requires_options :key_builder
35
35
  adds_fields :id
36
36
 
@@ -57,7 +57,7 @@ module Chicago
57
57
  end
58
58
 
59
59
  # Removes embedded :_errors and puts them on the error stream.
60
- class DemultiplexErrors < Flow::Transformation
60
+ class DemultiplexErrors < Transformation
61
61
  def output_streams
62
62
  [:default, :error]
63
63
  end
@@ -73,7 +73,7 @@ module Chicago
73
73
 
74
74
  # Removes a field from the row, and creates a row on a
75
75
  # designated key stream
76
- class DimensionKeyMapping < Flow::Transformation
76
+ class DimensionKeyMapping < Transformation
77
77
  requires_options :original_key, :key_table
78
78
 
79
79
  def removed_fields
@@ -103,7 +103,7 @@ module Chicago
103
103
  end
104
104
 
105
105
  # Adds a hash of the specified columns as a field in the row.
106
- class HashColumns < Flow::Transformation
106
+ class HashColumns < Transformation
107
107
  requires_options :columns
108
108
 
109
109
  def process_row(row)
data/lib/chicago/etl.rb CHANGED
@@ -6,17 +6,17 @@ else
6
6
  end
7
7
 
8
8
  require 'sequel'
9
- require 'chicago/flow/errors'
10
- require 'chicago/flow/transformation'
11
- require 'chicago/flow/filter'
12
- require 'chicago/flow/transformation_chain'
13
- require 'chicago/flow/pipeline_endpoint'
14
- require 'chicago/flow/array_source'
15
- require 'chicago/flow/dataset_source'
16
- require 'chicago/flow/sink'
17
- require 'chicago/flow/array_sink'
18
- require 'chicago/flow/null_sink'
19
- require 'chicago/flow/mysql'
9
+ require 'chicago/etl/errors'
10
+ require 'chicago/etl/transformation'
11
+ require 'chicago/etl/filter'
12
+ require 'chicago/etl/transformation_chain'
13
+ require 'chicago/etl/pipeline_endpoint'
14
+ require 'chicago/etl/array_source'
15
+ require 'chicago/etl/dataset_source'
16
+ require 'chicago/etl/sink'
17
+ require 'chicago/etl/array_sink'
18
+ require 'chicago/etl/null_sink'
19
+ require 'chicago/etl/mysql'
20
20
 
21
21
  require 'chicago/etl/core_extensions'
22
22
  require 'chicago/etl/counter'
@@ -66,4 +66,7 @@ module Chicago
66
66
  end
67
67
  end
68
68
  end
69
+
70
+ # Deprecated, allows clients to transition when they like.
71
+ Flow = ETL
69
72
  end
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Chicago::Flow::ArraySink do
3
+ describe Chicago::ETL::ArraySink do
4
4
  let(:sink) { described_class.new(:foo) }
5
5
 
6
6
  it "has a name" do
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Chicago::Flow::ArraySource do
3
+ describe Chicago::ETL::ArraySource do
4
4
  it "has an each method that yields rows" do
5
5
  described_class.new([{:a => 1}]).each do |row|
6
6
  row.should == {:a => 1}
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Chicago::Flow::DatasetSource do
3
+ describe Chicago::ETL::DatasetSource do
4
4
  let(:dataset) { stub(:dataset) }
5
5
 
6
6
  it "should delegtate each to the dataset" do
@@ -1,6 +1,6 @@
1
1
  require "spec_helper"
2
2
 
3
- class TestTransformation < Chicago::Flow::Transformation
3
+ class TestTransformation < Chicago::ETL::Transformation
4
4
  def output_streams
5
5
  [:another_stream]
6
6
  end
@@ -23,8 +23,8 @@ describe "defining and executing a stage" do
23
23
  end
24
24
 
25
25
  sinks do
26
- add Chicago::Flow::ArraySink.new(:test)
27
- add Chicago::Flow::ArraySink.new(:test), :stream => :another_stream
26
+ add Chicago::ETL::ArraySink.new(:test)
27
+ add Chicago::ETL::ArraySink.new(:test), :stream => :another_stream
28
28
  end
29
29
  end
30
30
 
@@ -50,8 +50,8 @@ describe "defining and executing a stage" do
50
50
  end
51
51
 
52
52
  sinks do
53
- add Chicago::Flow::ArraySink.new(:test)
54
- add Chicago::Flow::ArraySink.new(:test), :stream => :another_stream
53
+ add Chicago::ETL::ArraySink.new(:test)
54
+ add Chicago::ETL::ArraySink.new(:test), :stream => :another_stream
55
55
  end
56
56
  end
57
57
 
@@ -78,7 +78,7 @@ describe "defining and executing a stage" do
78
78
  end
79
79
 
80
80
  sinks do
81
- add Chicago::Flow::ArraySink.new(:test)
81
+ add Chicago::ETL::ArraySink.new(:test)
82
82
  end
83
83
 
84
84
  filter_strategy do |source, etl_batch|
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Chicago::Flow::Filter do
3
+ describe Chicago::ETL::Filter do
4
4
  it "filters all rows by default" do
5
5
  subject.process({:a => 1}).should be_nil
6
6
  end
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Chicago::Flow::MysqlFileSerializer do
3
+ describe Chicago::ETL::MysqlFileSerializer do
4
4
  it "serializes nil into NULL" do
5
5
  subject.serialize(nil).should == "NULL"
6
6
  end
@@ -1,7 +1,7 @@
1
1
  require 'spec_helper'
2
2
  require 'sequel'
3
3
 
4
- describe Chicago::Flow::MysqlFileSink do
4
+ describe Chicago::ETL::MysqlFileSink do
5
5
  let(:dataset) { mock(:dataset).as_null_object }
6
6
  let(:db) { mock(:db, :[] => dataset, :schema => []) }
7
7
  let(:csv) { mock(:csv) }
@@ -29,7 +29,7 @@ describe Chicago::Flow::MysqlFileSink do
29
29
  end
30
30
 
31
31
  it "serializes values before writing to the file" do
32
- Chicago::Flow::MysqlFileSerializer.any_instance.
32
+ Chicago::ETL::MysqlFileSerializer.any_instance.
33
33
  should_receive(:serialize).with(1).and_return(1)
34
34
  sink << {:foo => 1}
35
35
  end
@@ -2,7 +2,7 @@ require 'spec_helper'
2
2
 
3
3
  describe "Mysql -> Mysql through transformation chain" do
4
4
  let(:dup_row) {
5
- Class.new(Chicago::Flow::Transformation) {
5
+ Class.new(Chicago::ETL::Transformation) {
6
6
  def output_streams
7
7
  [:default, @options[:onto]].flatten
8
8
  end
@@ -46,15 +46,15 @@ describe "Mysql -> Mysql through transformation chain" do
46
46
  TEST_DB[:source].multi_insert([{:foo => nil, :bin => :unhex.sql_function("1F")},
47
47
  {:foo => "Hello", :bin => :unhex.sql_function("1F")}])
48
48
 
49
- source = Chicago::Flow::DatasetSource.
49
+ source = Chicago::ETL::DatasetSource.
50
50
  new(TEST_DB[:source].
51
51
  select(:id, :foo, :hex.sql_function(:bin).as(:bin)))
52
52
 
53
53
  transformations = [dup_row.new(:onto => :other)]
54
54
 
55
- sink_1 = Chicago::Flow::MysqlFileSink.
55
+ sink_1 = Chicago::ETL::MysqlFileSink.
56
56
  new(TEST_DB, :destination, [:id, :foo, :bin])
57
- sink_2 = Chicago::Flow::ArraySink.new([:id, :foo, :bin])
57
+ sink_2 = Chicago::ETL::ArraySink.new([:id, :foo, :bin])
58
58
 
59
59
  stage = Chicago::ETL::Stage.new(:test,
60
60
  :source => source,
@@ -12,7 +12,7 @@ describe Chicago::ETL::SchemaTableSinkFactory do
12
12
  end
13
13
  }
14
14
 
15
- let(:sink_class) { Chicago::Flow::MysqlFileSink }
15
+ let(:sink_class) { Chicago::ETL::MysqlFileSink }
16
16
 
17
17
  it "builds a MysqlFileSink" do
18
18
  sink_class.should_receive(:new).
@@ -1,8 +1,8 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Chicago::Flow::TransformationChain do
3
+ describe Chicago::ETL::TransformationChain do
4
4
  let(:add_1_to_a) {
5
- Class.new(Chicago::Flow::Transformation) {
5
+ Class.new(Chicago::ETL::Transformation) {
6
6
  def process_row(row)
7
7
  row[:a] += 1
8
8
  row
@@ -11,7 +11,7 @@ describe Chicago::Flow::TransformationChain do
11
11
  }
12
12
 
13
13
  let(:dup_row) {
14
- Class.new(Chicago::Flow::Transformation) {
14
+ Class.new(Chicago::ETL::Transformation) {
15
15
  def output_streams
16
16
  [:default, @options[:onto]].flatten
17
17
  end
@@ -24,7 +24,7 @@ describe Chicago::Flow::TransformationChain do
24
24
  }
25
25
 
26
26
  let(:store_until_flush) {
27
- Class.new(Chicago::Flow::Transformation) {
27
+ Class.new(Chicago::ETL::Transformation) {
28
28
  def process_row(row)
29
29
  @cache ||= []
30
30
  @cache << row
@@ -48,7 +48,7 @@ describe Chicago::Flow::TransformationChain do
48
48
  end
49
49
 
50
50
  it "can cope with a filter returning nil" do
51
- described_class.new(Chicago::Flow::Filter.new,
51
+ described_class.new(Chicago::ETL::Filter.new,
52
52
  dup_row.new, add_1_to_a.new).process({:a => 1}).
53
53
  should == []
54
54
  end
@@ -56,7 +56,7 @@ describe Chicago::Flow::TransformationChain do
56
56
  it "can write to different streams" do
57
57
  described_class.new(dup_row.new(:onto => :other),
58
58
  add_1_to_a.new).process({:a => 1}).
59
- should == [{:a => 2}, {:a => 1, Chicago::Flow::STREAM => :other}]
59
+ should == [{:a => 2}, {:a => 1, Chicago::ETL::STREAM => :other}]
60
60
  end
61
61
 
62
62
  it "knows what streams it writes to as a chain" do
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Chicago::Flow::Transformation do
3
+ describe Chicago::ETL::Transformation do
4
4
  let(:add_1_to_a) {
5
5
  Class.new(described_class) {
6
6
  def process_row(row)
@@ -45,8 +45,8 @@ describe Chicago::Flow::Transformation do
45
45
 
46
46
  it "can apply to all streams using :all" do
47
47
  add_1_to_a.new(:all).process({:a => 1}).should == {:a => 2}
48
- add_1_to_a.new(:all).process({:a => 1, Chicago::Flow::STREAM => :other}).
49
- should == {:a => 2, Chicago::Flow::STREAM => :other}
48
+ add_1_to_a.new(:all).process({:a => 1, Chicago::ETL::STREAM => :other}).
49
+ should == {:a => 2, Chicago::ETL::STREAM => :other}
50
50
  end
51
51
 
52
52
  it "can be flushed" do
@@ -16,7 +16,7 @@ describe Chicago::ETL::Transformations::DemultiplexErrors do
16
16
  it "adds the errors onto the error stream" do
17
17
  subject.process(:_errors => [{:error => 1}]).last.should == {
18
18
  :error => 1,
19
- Chicago::Flow::STREAM => :error
19
+ Chicago::ETL::STREAM => :error
20
20
  }
21
21
  end
22
22
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chicago-etl
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 23
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 1
9
- - 4
10
- version: 0.1.4
8
+ - 2
9
+ - 0
10
+ version: 0.2.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Roland Swingler
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2013-11-13 00:00:00 Z
18
+ date: 2013-11-18 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  version_requirements: &id001 !ruby/object:Gem::Requirement
@@ -239,14 +239,24 @@ files:
239
239
  - chicago-flow.gemspec
240
240
  - lib/chicago-etl.rb
241
241
  - lib/chicago/etl.rb
242
+ - lib/chicago/etl/array_sink.rb
243
+ - lib/chicago/etl/array_source.rb
242
244
  - lib/chicago/etl/batch.rb
243
245
  - lib/chicago/etl/core_extensions.rb
244
246
  - lib/chicago/etl/counter.rb
245
247
  - lib/chicago/etl/dataset_batch_stage.rb
246
248
  - lib/chicago/etl/dataset_builder.rb
249
+ - lib/chicago/etl/dataset_source.rb
250
+ - lib/chicago/etl/errors.rb
251
+ - lib/chicago/etl/filter.rb
247
252
  - lib/chicago/etl/key_builder.rb
248
253
  - lib/chicago/etl/load_dataset_builder.rb
254
+ - lib/chicago/etl/mysql.rb
255
+ - lib/chicago/etl/mysql_file_serializer.rb
256
+ - lib/chicago/etl/mysql_file_sink.rb
257
+ - lib/chicago/etl/null_sink.rb
249
258
  - lib/chicago/etl/pipeline.rb
259
+ - lib/chicago/etl/pipeline_endpoint.rb
250
260
  - lib/chicago/etl/schema_sinks_and_transformations_builder.rb
251
261
  - lib/chicago/etl/schema_table_sink_factory.rb
252
262
  - lib/chicago/etl/screens/column_screen.rb
@@ -255,37 +265,34 @@ files:
255
265
  - lib/chicago/etl/screens/out_of_bounds.rb
256
266
  - lib/chicago/etl/sequel/dependant_tables.rb
257
267
  - lib/chicago/etl/sequel/filter_to_etl_batch.rb
268
+ - lib/chicago/etl/sink.rb
258
269
  - lib/chicago/etl/stage.rb
259
270
  - lib/chicago/etl/stage_builder.rb
260
271
  - lib/chicago/etl/table_builder.rb
261
272
  - lib/chicago/etl/task_invocation.rb
262
273
  - lib/chicago/etl/tasks.rb
274
+ - lib/chicago/etl/transformation.rb
275
+ - lib/chicago/etl/transformation_chain.rb
263
276
  - lib/chicago/etl/transformations.rb
264
277
  - lib/chicago/etl/transformations/deduplicate_rows.rb
265
278
  - lib/chicago/etl/transformations/uk_post_code.rb
266
279
  - lib/chicago/etl/transformations/uk_post_code_field.rb
267
- - lib/chicago/flow/array_sink.rb
268
- - lib/chicago/flow/array_source.rb
269
- - lib/chicago/flow/dataset_source.rb
270
- - lib/chicago/flow/errors.rb
271
- - lib/chicago/flow/filter.rb
272
- - lib/chicago/flow/mysql.rb
273
- - lib/chicago/flow/mysql_file_serializer.rb
274
- - lib/chicago/flow/mysql_file_sink.rb
275
- - lib/chicago/flow/null_sink.rb
276
- - lib/chicago/flow/pipeline_endpoint.rb
277
- - lib/chicago/flow/sink.rb
278
- - lib/chicago/flow/transformation.rb
279
- - lib/chicago/flow/transformation_chain.rb
280
280
  - spec/db_connections.yml.dist
281
+ - spec/etl/array_sink_spec.rb
282
+ - spec/etl/array_source_spec.rb
281
283
  - spec/etl/batch_spec.rb
282
284
  - spec/etl/core_extensions_spec.rb
283
285
  - spec/etl/counter_spec.rb
286
+ - spec/etl/dataset_source_spec.rb
284
287
  - spec/etl/define_dimension_stage_spec.rb
285
288
  - spec/etl/define_stage_spec.rb
286
289
  - spec/etl/etl_batch_id_dataset_filter.rb
290
+ - spec/etl/filter_spec.rb
287
291
  - spec/etl/key_builder_spec.rb
288
292
  - spec/etl/load_dataset_builder_spec.rb
293
+ - spec/etl/mysql_file_serializer_spec.rb
294
+ - spec/etl/mysql_file_sink_spec.rb
295
+ - spec/etl/mysql_integration_spec.rb
289
296
  - spec/etl/pipeline_stage_builder_spec.rb
290
297
  - spec/etl/schema_table_sink_factory_spec.rb
291
298
  - spec/etl/screens/invalid_element_spec.rb
@@ -296,19 +303,12 @@ files:
296
303
  - spec/etl/stage_spec.rb
297
304
  - spec/etl/table_builder_spec.rb
298
305
  - spec/etl/task_spec.rb
306
+ - spec/etl/transformation_chain_spec.rb
307
+ - spec/etl/transformation_spec.rb
299
308
  - spec/etl/transformations/deduplicate_rows_spec.rb
300
309
  - spec/etl/transformations/uk_post_code_field_spec.rb
301
310
  - spec/etl/transformations/uk_post_code_spec.rb
302
311
  - spec/etl/transformations_spec.rb
303
- - spec/flow/array_sink_spec.rb
304
- - spec/flow/array_source_spec.rb
305
- - spec/flow/dataset_source_spec.rb
306
- - spec/flow/filter_spec.rb
307
- - spec/flow/mysql_file_serializer_spec.rb
308
- - spec/flow/mysql_file_sink_spec.rb
309
- - spec/flow/mysql_integration_spec.rb
310
- - spec/flow/transformation_chain_spec.rb
311
- - spec/flow/transformation_spec.rb
312
312
  - spec/spec_helper.rb
313
313
  homepage: http://github.com/notonthehighstreet/chicago-etl
314
314
  licenses:
@@ -1,4 +0,0 @@
1
- require 'sequel'
2
- require 'sequel/load_data_infile'
3
- require 'chicago/flow/mysql_file_serializer'
4
- require 'chicago/flow/mysql_file_sink'