chicago-etl 0.1.4 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. data/VERSION +1 -1
  2. data/chicago-etl.gemspec +24 -24
  3. data/lib/chicago/{flow → etl}/array_sink.rb +1 -1
  4. data/lib/chicago/{flow → etl}/array_source.rb +1 -1
  5. data/lib/chicago/etl/dataset_batch_stage.rb +0 -1
  6. data/lib/chicago/{flow → etl}/dataset_source.rb +1 -1
  7. data/lib/chicago/{flow → etl}/errors.rb +1 -1
  8. data/lib/chicago/{flow → etl}/filter.rb +1 -1
  9. data/lib/chicago/etl/mysql.rb +4 -0
  10. data/lib/chicago/{flow → etl}/mysql_file_serializer.rb +1 -1
  11. data/lib/chicago/{flow → etl}/mysql_file_sink.rb +1 -1
  12. data/lib/chicago/{flow → etl}/null_sink.rb +1 -1
  13. data/lib/chicago/{flow → etl}/pipeline_endpoint.rb +1 -1
  14. data/lib/chicago/etl/schema_sinks_and_transformations_builder.rb +1 -1
  15. data/lib/chicago/etl/schema_table_sink_factory.rb +11 -11
  16. data/lib/chicago/etl/screens/column_screen.rb +1 -1
  17. data/lib/chicago/{flow → etl}/sink.rb +1 -1
  18. data/lib/chicago/etl/stage.rb +2 -3
  19. data/lib/chicago/{flow → etl}/transformation.rb +1 -1
  20. data/lib/chicago/{flow → etl}/transformation_chain.rb +1 -1
  21. data/lib/chicago/etl/transformations/deduplicate_rows.rb +1 -1
  22. data/lib/chicago/etl/transformations.rb +5 -5
  23. data/lib/chicago/etl.rb +14 -11
  24. data/spec/{flow → etl}/array_sink_spec.rb +1 -1
  25. data/spec/{flow → etl}/array_source_spec.rb +1 -1
  26. data/spec/{flow → etl}/dataset_source_spec.rb +1 -1
  27. data/spec/etl/define_stage_spec.rb +6 -6
  28. data/spec/{flow → etl}/filter_spec.rb +1 -1
  29. data/spec/{flow → etl}/mysql_file_serializer_spec.rb +1 -1
  30. data/spec/{flow → etl}/mysql_file_sink_spec.rb +2 -2
  31. data/spec/{flow → etl}/mysql_integration_spec.rb +4 -4
  32. data/spec/etl/schema_table_sink_factory_spec.rb +1 -1
  33. data/spec/{flow → etl}/transformation_chain_spec.rb +6 -6
  34. data/spec/{flow → etl}/transformation_spec.rb +3 -3
  35. data/spec/etl/transformations_spec.rb +1 -1
  36. metadata +27 -27
  37. data/lib/chicago/flow/mysql.rb +0 -4
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.4
1
+ 0.2.0
data/chicago-etl.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "chicago-etl"
8
- s.version = "0.1.4"
8
+ s.version = "0.2.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Roland Swingler"]
12
- s.date = "2013-11-13"
12
+ s.date = "2013-11-18"
13
13
  s.description = "ETL tools for Chicago"
14
14
  s.email = "roland.swingler@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -28,14 +28,24 @@ Gem::Specification.new do |s|
28
28
  "chicago-flow.gemspec",
29
29
  "lib/chicago-etl.rb",
30
30
  "lib/chicago/etl.rb",
31
+ "lib/chicago/etl/array_sink.rb",
32
+ "lib/chicago/etl/array_source.rb",
31
33
  "lib/chicago/etl/batch.rb",
32
34
  "lib/chicago/etl/core_extensions.rb",
33
35
  "lib/chicago/etl/counter.rb",
34
36
  "lib/chicago/etl/dataset_batch_stage.rb",
35
37
  "lib/chicago/etl/dataset_builder.rb",
38
+ "lib/chicago/etl/dataset_source.rb",
39
+ "lib/chicago/etl/errors.rb",
40
+ "lib/chicago/etl/filter.rb",
36
41
  "lib/chicago/etl/key_builder.rb",
37
42
  "lib/chicago/etl/load_dataset_builder.rb",
43
+ "lib/chicago/etl/mysql.rb",
44
+ "lib/chicago/etl/mysql_file_serializer.rb",
45
+ "lib/chicago/etl/mysql_file_sink.rb",
46
+ "lib/chicago/etl/null_sink.rb",
38
47
  "lib/chicago/etl/pipeline.rb",
48
+ "lib/chicago/etl/pipeline_endpoint.rb",
39
49
  "lib/chicago/etl/schema_sinks_and_transformations_builder.rb",
40
50
  "lib/chicago/etl/schema_table_sink_factory.rb",
41
51
  "lib/chicago/etl/screens/column_screen.rb",
@@ -44,37 +54,34 @@ Gem::Specification.new do |s|
44
54
  "lib/chicago/etl/screens/out_of_bounds.rb",
45
55
  "lib/chicago/etl/sequel/dependant_tables.rb",
46
56
  "lib/chicago/etl/sequel/filter_to_etl_batch.rb",
57
+ "lib/chicago/etl/sink.rb",
47
58
  "lib/chicago/etl/stage.rb",
48
59
  "lib/chicago/etl/stage_builder.rb",
49
60
  "lib/chicago/etl/table_builder.rb",
50
61
  "lib/chicago/etl/task_invocation.rb",
51
62
  "lib/chicago/etl/tasks.rb",
63
+ "lib/chicago/etl/transformation.rb",
64
+ "lib/chicago/etl/transformation_chain.rb",
52
65
  "lib/chicago/etl/transformations.rb",
53
66
  "lib/chicago/etl/transformations/deduplicate_rows.rb",
54
67
  "lib/chicago/etl/transformations/uk_post_code.rb",
55
68
  "lib/chicago/etl/transformations/uk_post_code_field.rb",
56
- "lib/chicago/flow/array_sink.rb",
57
- "lib/chicago/flow/array_source.rb",
58
- "lib/chicago/flow/dataset_source.rb",
59
- "lib/chicago/flow/errors.rb",
60
- "lib/chicago/flow/filter.rb",
61
- "lib/chicago/flow/mysql.rb",
62
- "lib/chicago/flow/mysql_file_serializer.rb",
63
- "lib/chicago/flow/mysql_file_sink.rb",
64
- "lib/chicago/flow/null_sink.rb",
65
- "lib/chicago/flow/pipeline_endpoint.rb",
66
- "lib/chicago/flow/sink.rb",
67
- "lib/chicago/flow/transformation.rb",
68
- "lib/chicago/flow/transformation_chain.rb",
69
69
  "spec/db_connections.yml.dist",
70
+ "spec/etl/array_sink_spec.rb",
71
+ "spec/etl/array_source_spec.rb",
70
72
  "spec/etl/batch_spec.rb",
71
73
  "spec/etl/core_extensions_spec.rb",
72
74
  "spec/etl/counter_spec.rb",
75
+ "spec/etl/dataset_source_spec.rb",
73
76
  "spec/etl/define_dimension_stage_spec.rb",
74
77
  "spec/etl/define_stage_spec.rb",
75
78
  "spec/etl/etl_batch_id_dataset_filter.rb",
79
+ "spec/etl/filter_spec.rb",
76
80
  "spec/etl/key_builder_spec.rb",
77
81
  "spec/etl/load_dataset_builder_spec.rb",
82
+ "spec/etl/mysql_file_serializer_spec.rb",
83
+ "spec/etl/mysql_file_sink_spec.rb",
84
+ "spec/etl/mysql_integration_spec.rb",
78
85
  "spec/etl/pipeline_stage_builder_spec.rb",
79
86
  "spec/etl/schema_table_sink_factory_spec.rb",
80
87
  "spec/etl/screens/invalid_element_spec.rb",
@@ -85,19 +92,12 @@ Gem::Specification.new do |s|
85
92
  "spec/etl/stage_spec.rb",
86
93
  "spec/etl/table_builder_spec.rb",
87
94
  "spec/etl/task_spec.rb",
95
+ "spec/etl/transformation_chain_spec.rb",
96
+ "spec/etl/transformation_spec.rb",
88
97
  "spec/etl/transformations/deduplicate_rows_spec.rb",
89
98
  "spec/etl/transformations/uk_post_code_field_spec.rb",
90
99
  "spec/etl/transformations/uk_post_code_spec.rb",
91
100
  "spec/etl/transformations_spec.rb",
92
- "spec/flow/array_sink_spec.rb",
93
- "spec/flow/array_source_spec.rb",
94
- "spec/flow/dataset_source_spec.rb",
95
- "spec/flow/filter_spec.rb",
96
- "spec/flow/mysql_file_serializer_spec.rb",
97
- "spec/flow/mysql_file_sink_spec.rb",
98
- "spec/flow/mysql_integration_spec.rb",
99
- "spec/flow/transformation_chain_spec.rb",
100
- "spec/flow/transformation_spec.rb",
101
101
  "spec/spec_helper.rb"
102
102
  ]
103
103
  s.homepage = "http://github.com/notonthehighstreet/chicago-etl"
@@ -1,5 +1,5 @@
1
1
  module Chicago
2
- module Flow
2
+ module ETL
3
3
  # An endpoint that stores rows in an Array.
4
4
  #
5
5
  # @api public
@@ -1,5 +1,5 @@
1
1
  module Chicago
2
- module Flow
2
+ module ETL
3
3
  # @api public
4
4
  class ArraySource < PipelineEndpoint
5
5
  def initialize(array, fields=[])
@@ -11,7 +11,6 @@ module Chicago
11
11
  super
12
12
  @filter_strategy = options[:filter_strategy] ||
13
13
  lambda { |dataset, etl_batch| @source.filter_to_etl_batch(etl_batch)}
14
- @truncate_pre_load = !!options[:truncate_pre_load]
15
14
  end
16
15
 
17
16
  # Executes this ETL stage.
@@ -2,7 +2,7 @@ require 'sequel'
2
2
  require 'sequel/fast_columns'
3
3
 
4
4
  module Chicago
5
- module Flow
5
+ module ETL
6
6
  # @api public
7
7
  class DatasetSource < PipelineEndpoint
8
8
  attr_reader :dataset
@@ -1,5 +1,5 @@
1
1
  module Chicago
2
- module Flow
2
+ module ETL
3
3
  # @api public
4
4
  class Error < RuntimeError
5
5
  end
@@ -1,5 +1,5 @@
1
1
  module Chicago
2
- module Flow
2
+ module ETL
3
3
  # @api public
4
4
  class Filter < Transformation
5
5
  def initialize(stream=:default, &block)
@@ -0,0 +1,4 @@
1
+ require 'sequel'
2
+ require 'sequel/load_data_infile'
3
+ require 'chicago/etl/mysql_file_serializer'
4
+ require 'chicago/etl/mysql_file_sink'
@@ -1,7 +1,7 @@
1
1
  require 'date'
2
2
 
3
3
  module Chicago
4
- module Flow
4
+ module ETL
5
5
  # @api private
6
6
  class MysqlFileSerializer
7
7
  # Transforms a value to be suitable for use in file in a LOAD
@@ -5,7 +5,7 @@ require 'tmpdir'
5
5
  Sequel.extension :core_extensions
6
6
 
7
7
  module Chicago
8
- module Flow
8
+ module ETL
9
9
  # @api public
10
10
  class MysqlFileSink < Sink
11
11
  attr_reader :filepath
@@ -1,5 +1,5 @@
1
1
  module Chicago
2
- module Flow
2
+ module ETL
3
3
  # Supports the Sink interface, but discards all rows written to
4
4
  # it.
5
5
  class NullSink < Sink
@@ -1,5 +1,5 @@
1
1
  module Chicago
2
- module Flow
2
+ module ETL
3
3
  # A Source or a Sink.
4
4
  #
5
5
  # @api public
@@ -88,7 +88,7 @@ module Chicago
88
88
  @sink_factory.key_sink
89
89
  else
90
90
  # Facts have no key table to write to.
91
- Flow::NullSink.new
91
+ NullSink.new
92
92
  end
93
93
 
94
94
  {
@@ -13,10 +13,10 @@ module Chicago
13
13
  # Pass an :exclude option if you don't want all columns of the
14
14
  # schema table to be loaded via this sink.
15
15
  def sink(options={})
16
- Flow::MysqlFileSink.new(@db,
17
- @schema_table.table_name,
18
- load_columns(options[:exclude]),
19
- mysql_options(options))
16
+ MysqlFileSink.new(@db,
17
+ @schema_table.table_name,
18
+ load_columns(options[:exclude]),
19
+ mysql_options(options))
20
20
  end
21
21
 
22
22
  # Returns a sink to load data into the MySQL table backing the
@@ -26,20 +26,20 @@ module Chicago
26
26
  # schema table's key table name will be used otherwise.
27
27
  def key_sink(options={})
28
28
  table = options.delete(:table) || @schema_table.key_table_name
29
- sink = Flow::MysqlFileSink.new(@db,
30
- table,
31
- [:original_id, :dimension_id],
32
- mysql_options(options))
29
+ sink = MysqlFileSink.new(@db,
30
+ table,
31
+ [:original_id, :dimension_id],
32
+ mysql_options(options))
33
33
  sink.truncation_strategy = lambda do
34
34
  # No Op - we want to maintain keys to avoid having to sort
35
35
  # out fact tables.
36
36
  end
37
37
  sink
38
38
  end
39
-
39
+
40
40
  # Returns a sink to load errors generated in the ETL process.
41
41
  def error_sink(options={})
42
- sink = Flow::MysqlFileSink.
42
+ sink = MysqlFileSink.
43
43
  new(@db, :etl_error_log,
44
44
  [:column, :row_id, :error, :severity, :error_detail], mysql_options(options)).
45
45
  set_constant_values(:table => @schema_table.table_name.to_s,
@@ -53,7 +53,7 @@ module Chicago
53
53
  end
54
54
  sink
55
55
  end
56
-
56
+
57
57
  private
58
58
 
59
59
  def load_columns(exclude=nil)
@@ -2,7 +2,7 @@ module Chicago
2
2
  module ETL
3
3
  module Screens
4
4
  # @abstract
5
- class ColumnScreen < Flow::Transformation
5
+ class ColumnScreen < Transformation
6
6
  def self.for_columns(columns)
7
7
  columns.map {|column|
8
8
  new(:default, :column => column)
@@ -1,5 +1,5 @@
1
1
  module Chicago
2
- module Flow
2
+ module ETL
3
3
  # The destination for rows passing through a pipeline stage.
4
4
  #
5
5
  # @api public
@@ -39,7 +39,7 @@ module Chicago
39
39
  filtered_dataset = reextract ? source :
40
40
  @filter_strategy.call(source, etl_batch)
41
41
 
42
- Chicago::Flow::DatasetSource.new(filtered_dataset)
42
+ DatasetSource.new(filtered_dataset)
43
43
  end
44
44
 
45
45
  private
@@ -58,8 +58,7 @@ module Chicago
58
58
  end
59
59
 
60
60
  def transformation_chain
61
- @transformation_chain ||= Chicago::Flow::TransformationChain.
62
- new(*@transformations)
61
+ @transformation_chain ||= TransformationChain.new(*@transformations)
63
62
  end
64
63
 
65
64
  def process_row(row)
@@ -1,5 +1,5 @@
1
1
  module Chicago
2
- module Flow
2
+ module ETL
3
3
  # The key used to store the stream in the row.
4
4
  #
5
5
  # @api private
@@ -1,5 +1,5 @@
1
1
  module Chicago
2
- module Flow
2
+ module ETL
3
3
  # @api private
4
4
  class TransformationChain
5
5
  def initialize(*transforms)
@@ -1,6 +1,6 @@
1
1
  module Chicago
2
2
  module ETL
3
- class DeduplicateRows < Chicago::Flow::Transformation
3
+ class DeduplicateRows < Transformation
4
4
  def process_row(row)
5
5
  if @working_row.nil?
6
6
  @working_row = new_row(row)
@@ -2,7 +2,7 @@ module Chicago
2
2
  module ETL
3
3
  module Transformations
4
4
  # Filters rows so they only get output once, based on a :key.
5
- class WrittenRowFilter < Flow::Transformation
5
+ class WrittenRowFilter < Transformation
6
6
  requires_options :key
7
7
 
8
8
  def initialize(*args)
@@ -30,7 +30,7 @@ module Chicago
30
30
  # :_errors field.
31
31
  #
32
32
  # Pass the :key_builder option to set the KeyBuilder.
33
- class AddKey < Flow::Transformation
33
+ class AddKey < Transformation
34
34
  requires_options :key_builder
35
35
  adds_fields :id
36
36
 
@@ -57,7 +57,7 @@ module Chicago
57
57
  end
58
58
 
59
59
  # Removes embedded :_errors and puts them on the error stream.
60
- class DemultiplexErrors < Flow::Transformation
60
+ class DemultiplexErrors < Transformation
61
61
  def output_streams
62
62
  [:default, :error]
63
63
  end
@@ -73,7 +73,7 @@ module Chicago
73
73
 
74
74
  # Removes a field from the row, and creates a row on a
75
75
  # designated key stream
76
- class DimensionKeyMapping < Flow::Transformation
76
+ class DimensionKeyMapping < Transformation
77
77
  requires_options :original_key, :key_table
78
78
 
79
79
  def removed_fields
@@ -103,7 +103,7 @@ module Chicago
103
103
  end
104
104
 
105
105
  # Adds a hash of the specified columns as a field in the row.
106
- class HashColumns < Flow::Transformation
106
+ class HashColumns < Transformation
107
107
  requires_options :columns
108
108
 
109
109
  def process_row(row)
data/lib/chicago/etl.rb CHANGED
@@ -6,17 +6,17 @@ else
6
6
  end
7
7
 
8
8
  require 'sequel'
9
- require 'chicago/flow/errors'
10
- require 'chicago/flow/transformation'
11
- require 'chicago/flow/filter'
12
- require 'chicago/flow/transformation_chain'
13
- require 'chicago/flow/pipeline_endpoint'
14
- require 'chicago/flow/array_source'
15
- require 'chicago/flow/dataset_source'
16
- require 'chicago/flow/sink'
17
- require 'chicago/flow/array_sink'
18
- require 'chicago/flow/null_sink'
19
- require 'chicago/flow/mysql'
9
+ require 'chicago/etl/errors'
10
+ require 'chicago/etl/transformation'
11
+ require 'chicago/etl/filter'
12
+ require 'chicago/etl/transformation_chain'
13
+ require 'chicago/etl/pipeline_endpoint'
14
+ require 'chicago/etl/array_source'
15
+ require 'chicago/etl/dataset_source'
16
+ require 'chicago/etl/sink'
17
+ require 'chicago/etl/array_sink'
18
+ require 'chicago/etl/null_sink'
19
+ require 'chicago/etl/mysql'
20
20
 
21
21
  require 'chicago/etl/core_extensions'
22
22
  require 'chicago/etl/counter'
@@ -66,4 +66,7 @@ module Chicago
66
66
  end
67
67
  end
68
68
  end
69
+
70
+ # Deprecated, allows clients to transition when they like.
71
+ Flow = ETL
69
72
  end
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Chicago::Flow::ArraySink do
3
+ describe Chicago::ETL::ArraySink do
4
4
  let(:sink) { described_class.new(:foo) }
5
5
 
6
6
  it "has a name" do
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Chicago::Flow::ArraySource do
3
+ describe Chicago::ETL::ArraySource do
4
4
  it "has an each method that yields rows" do
5
5
  described_class.new([{:a => 1}]).each do |row|
6
6
  row.should == {:a => 1}
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Chicago::Flow::DatasetSource do
3
+ describe Chicago::ETL::DatasetSource do
4
4
  let(:dataset) { stub(:dataset) }
5
5
 
6
6
  it "should delegtate each to the dataset" do
@@ -1,6 +1,6 @@
1
1
  require "spec_helper"
2
2
 
3
- class TestTransformation < Chicago::Flow::Transformation
3
+ class TestTransformation < Chicago::ETL::Transformation
4
4
  def output_streams
5
5
  [:another_stream]
6
6
  end
@@ -23,8 +23,8 @@ describe "defining and executing a stage" do
23
23
  end
24
24
 
25
25
  sinks do
26
- add Chicago::Flow::ArraySink.new(:test)
27
- add Chicago::Flow::ArraySink.new(:test), :stream => :another_stream
26
+ add Chicago::ETL::ArraySink.new(:test)
27
+ add Chicago::ETL::ArraySink.new(:test), :stream => :another_stream
28
28
  end
29
29
  end
30
30
 
@@ -50,8 +50,8 @@ describe "defining and executing a stage" do
50
50
  end
51
51
 
52
52
  sinks do
53
- add Chicago::Flow::ArraySink.new(:test)
54
- add Chicago::Flow::ArraySink.new(:test), :stream => :another_stream
53
+ add Chicago::ETL::ArraySink.new(:test)
54
+ add Chicago::ETL::ArraySink.new(:test), :stream => :another_stream
55
55
  end
56
56
  end
57
57
 
@@ -78,7 +78,7 @@ describe "defining and executing a stage" do
78
78
  end
79
79
 
80
80
  sinks do
81
- add Chicago::Flow::ArraySink.new(:test)
81
+ add Chicago::ETL::ArraySink.new(:test)
82
82
  end
83
83
 
84
84
  filter_strategy do |source, etl_batch|
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Chicago::Flow::Filter do
3
+ describe Chicago::ETL::Filter do
4
4
  it "filters all rows by default" do
5
5
  subject.process({:a => 1}).should be_nil
6
6
  end
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Chicago::Flow::MysqlFileSerializer do
3
+ describe Chicago::ETL::MysqlFileSerializer do
4
4
  it "serializes nil into NULL" do
5
5
  subject.serialize(nil).should == "NULL"
6
6
  end
@@ -1,7 +1,7 @@
1
1
  require 'spec_helper'
2
2
  require 'sequel'
3
3
 
4
- describe Chicago::Flow::MysqlFileSink do
4
+ describe Chicago::ETL::MysqlFileSink do
5
5
  let(:dataset) { mock(:dataset).as_null_object }
6
6
  let(:db) { mock(:db, :[] => dataset, :schema => []) }
7
7
  let(:csv) { mock(:csv) }
@@ -29,7 +29,7 @@ describe Chicago::Flow::MysqlFileSink do
29
29
  end
30
30
 
31
31
  it "serializes values before writing to the file" do
32
- Chicago::Flow::MysqlFileSerializer.any_instance.
32
+ Chicago::ETL::MysqlFileSerializer.any_instance.
33
33
  should_receive(:serialize).with(1).and_return(1)
34
34
  sink << {:foo => 1}
35
35
  end
@@ -2,7 +2,7 @@ require 'spec_helper'
2
2
 
3
3
  describe "Mysql -> Mysql through transformation chain" do
4
4
  let(:dup_row) {
5
- Class.new(Chicago::Flow::Transformation) {
5
+ Class.new(Chicago::ETL::Transformation) {
6
6
  def output_streams
7
7
  [:default, @options[:onto]].flatten
8
8
  end
@@ -46,15 +46,15 @@ describe "Mysql -> Mysql through transformation chain" do
46
46
  TEST_DB[:source].multi_insert([{:foo => nil, :bin => :unhex.sql_function("1F")},
47
47
  {:foo => "Hello", :bin => :unhex.sql_function("1F")}])
48
48
 
49
- source = Chicago::Flow::DatasetSource.
49
+ source = Chicago::ETL::DatasetSource.
50
50
  new(TEST_DB[:source].
51
51
  select(:id, :foo, :hex.sql_function(:bin).as(:bin)))
52
52
 
53
53
  transformations = [dup_row.new(:onto => :other)]
54
54
 
55
- sink_1 = Chicago::Flow::MysqlFileSink.
55
+ sink_1 = Chicago::ETL::MysqlFileSink.
56
56
  new(TEST_DB, :destination, [:id, :foo, :bin])
57
- sink_2 = Chicago::Flow::ArraySink.new([:id, :foo, :bin])
57
+ sink_2 = Chicago::ETL::ArraySink.new([:id, :foo, :bin])
58
58
 
59
59
  stage = Chicago::ETL::Stage.new(:test,
60
60
  :source => source,
@@ -12,7 +12,7 @@ describe Chicago::ETL::SchemaTableSinkFactory do
12
12
  end
13
13
  }
14
14
 
15
- let(:sink_class) { Chicago::Flow::MysqlFileSink }
15
+ let(:sink_class) { Chicago::ETL::MysqlFileSink }
16
16
 
17
17
  it "builds a MysqlFileSink" do
18
18
  sink_class.should_receive(:new).
@@ -1,8 +1,8 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Chicago::Flow::TransformationChain do
3
+ describe Chicago::ETL::TransformationChain do
4
4
  let(:add_1_to_a) {
5
- Class.new(Chicago::Flow::Transformation) {
5
+ Class.new(Chicago::ETL::Transformation) {
6
6
  def process_row(row)
7
7
  row[:a] += 1
8
8
  row
@@ -11,7 +11,7 @@ describe Chicago::Flow::TransformationChain do
11
11
  }
12
12
 
13
13
  let(:dup_row) {
14
- Class.new(Chicago::Flow::Transformation) {
14
+ Class.new(Chicago::ETL::Transformation) {
15
15
  def output_streams
16
16
  [:default, @options[:onto]].flatten
17
17
  end
@@ -24,7 +24,7 @@ describe Chicago::Flow::TransformationChain do
24
24
  }
25
25
 
26
26
  let(:store_until_flush) {
27
- Class.new(Chicago::Flow::Transformation) {
27
+ Class.new(Chicago::ETL::Transformation) {
28
28
  def process_row(row)
29
29
  @cache ||= []
30
30
  @cache << row
@@ -48,7 +48,7 @@ describe Chicago::Flow::TransformationChain do
48
48
  end
49
49
 
50
50
  it "can cope with a filter returning nil" do
51
- described_class.new(Chicago::Flow::Filter.new,
51
+ described_class.new(Chicago::ETL::Filter.new,
52
52
  dup_row.new, add_1_to_a.new).process({:a => 1}).
53
53
  should == []
54
54
  end
@@ -56,7 +56,7 @@ describe Chicago::Flow::TransformationChain do
56
56
  it "can write to different streams" do
57
57
  described_class.new(dup_row.new(:onto => :other),
58
58
  add_1_to_a.new).process({:a => 1}).
59
- should == [{:a => 2}, {:a => 1, Chicago::Flow::STREAM => :other}]
59
+ should == [{:a => 2}, {:a => 1, Chicago::ETL::STREAM => :other}]
60
60
  end
61
61
 
62
62
  it "knows what streams it writes to as a chain" do
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Chicago::Flow::Transformation do
3
+ describe Chicago::ETL::Transformation do
4
4
  let(:add_1_to_a) {
5
5
  Class.new(described_class) {
6
6
  def process_row(row)
@@ -45,8 +45,8 @@ describe Chicago::Flow::Transformation do
45
45
 
46
46
  it "can apply to all streams using :all" do
47
47
  add_1_to_a.new(:all).process({:a => 1}).should == {:a => 2}
48
- add_1_to_a.new(:all).process({:a => 1, Chicago::Flow::STREAM => :other}).
49
- should == {:a => 2, Chicago::Flow::STREAM => :other}
48
+ add_1_to_a.new(:all).process({:a => 1, Chicago::ETL::STREAM => :other}).
49
+ should == {:a => 2, Chicago::ETL::STREAM => :other}
50
50
  end
51
51
 
52
52
  it "can be flushed" do
@@ -16,7 +16,7 @@ describe Chicago::ETL::Transformations::DemultiplexErrors do
16
16
  it "adds the errors onto the error stream" do
17
17
  subject.process(:_errors => [{:error => 1}]).last.should == {
18
18
  :error => 1,
19
- Chicago::Flow::STREAM => :error
19
+ Chicago::ETL::STREAM => :error
20
20
  }
21
21
  end
22
22
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chicago-etl
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 23
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 1
9
- - 4
10
- version: 0.1.4
8
+ - 2
9
+ - 0
10
+ version: 0.2.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Roland Swingler
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2013-11-13 00:00:00 Z
18
+ date: 2013-11-18 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  version_requirements: &id001 !ruby/object:Gem::Requirement
@@ -239,14 +239,24 @@ files:
239
239
  - chicago-flow.gemspec
240
240
  - lib/chicago-etl.rb
241
241
  - lib/chicago/etl.rb
242
+ - lib/chicago/etl/array_sink.rb
243
+ - lib/chicago/etl/array_source.rb
242
244
  - lib/chicago/etl/batch.rb
243
245
  - lib/chicago/etl/core_extensions.rb
244
246
  - lib/chicago/etl/counter.rb
245
247
  - lib/chicago/etl/dataset_batch_stage.rb
246
248
  - lib/chicago/etl/dataset_builder.rb
249
+ - lib/chicago/etl/dataset_source.rb
250
+ - lib/chicago/etl/errors.rb
251
+ - lib/chicago/etl/filter.rb
247
252
  - lib/chicago/etl/key_builder.rb
248
253
  - lib/chicago/etl/load_dataset_builder.rb
254
+ - lib/chicago/etl/mysql.rb
255
+ - lib/chicago/etl/mysql_file_serializer.rb
256
+ - lib/chicago/etl/mysql_file_sink.rb
257
+ - lib/chicago/etl/null_sink.rb
249
258
  - lib/chicago/etl/pipeline.rb
259
+ - lib/chicago/etl/pipeline_endpoint.rb
250
260
  - lib/chicago/etl/schema_sinks_and_transformations_builder.rb
251
261
  - lib/chicago/etl/schema_table_sink_factory.rb
252
262
  - lib/chicago/etl/screens/column_screen.rb
@@ -255,37 +265,34 @@ files:
255
265
  - lib/chicago/etl/screens/out_of_bounds.rb
256
266
  - lib/chicago/etl/sequel/dependant_tables.rb
257
267
  - lib/chicago/etl/sequel/filter_to_etl_batch.rb
268
+ - lib/chicago/etl/sink.rb
258
269
  - lib/chicago/etl/stage.rb
259
270
  - lib/chicago/etl/stage_builder.rb
260
271
  - lib/chicago/etl/table_builder.rb
261
272
  - lib/chicago/etl/task_invocation.rb
262
273
  - lib/chicago/etl/tasks.rb
274
+ - lib/chicago/etl/transformation.rb
275
+ - lib/chicago/etl/transformation_chain.rb
263
276
  - lib/chicago/etl/transformations.rb
264
277
  - lib/chicago/etl/transformations/deduplicate_rows.rb
265
278
  - lib/chicago/etl/transformations/uk_post_code.rb
266
279
  - lib/chicago/etl/transformations/uk_post_code_field.rb
267
- - lib/chicago/flow/array_sink.rb
268
- - lib/chicago/flow/array_source.rb
269
- - lib/chicago/flow/dataset_source.rb
270
- - lib/chicago/flow/errors.rb
271
- - lib/chicago/flow/filter.rb
272
- - lib/chicago/flow/mysql.rb
273
- - lib/chicago/flow/mysql_file_serializer.rb
274
- - lib/chicago/flow/mysql_file_sink.rb
275
- - lib/chicago/flow/null_sink.rb
276
- - lib/chicago/flow/pipeline_endpoint.rb
277
- - lib/chicago/flow/sink.rb
278
- - lib/chicago/flow/transformation.rb
279
- - lib/chicago/flow/transformation_chain.rb
280
280
  - spec/db_connections.yml.dist
281
+ - spec/etl/array_sink_spec.rb
282
+ - spec/etl/array_source_spec.rb
281
283
  - spec/etl/batch_spec.rb
282
284
  - spec/etl/core_extensions_spec.rb
283
285
  - spec/etl/counter_spec.rb
286
+ - spec/etl/dataset_source_spec.rb
284
287
  - spec/etl/define_dimension_stage_spec.rb
285
288
  - spec/etl/define_stage_spec.rb
286
289
  - spec/etl/etl_batch_id_dataset_filter.rb
290
+ - spec/etl/filter_spec.rb
287
291
  - spec/etl/key_builder_spec.rb
288
292
  - spec/etl/load_dataset_builder_spec.rb
293
+ - spec/etl/mysql_file_serializer_spec.rb
294
+ - spec/etl/mysql_file_sink_spec.rb
295
+ - spec/etl/mysql_integration_spec.rb
289
296
  - spec/etl/pipeline_stage_builder_spec.rb
290
297
  - spec/etl/schema_table_sink_factory_spec.rb
291
298
  - spec/etl/screens/invalid_element_spec.rb
@@ -296,19 +303,12 @@ files:
296
303
  - spec/etl/stage_spec.rb
297
304
  - spec/etl/table_builder_spec.rb
298
305
  - spec/etl/task_spec.rb
306
+ - spec/etl/transformation_chain_spec.rb
307
+ - spec/etl/transformation_spec.rb
299
308
  - spec/etl/transformations/deduplicate_rows_spec.rb
300
309
  - spec/etl/transformations/uk_post_code_field_spec.rb
301
310
  - spec/etl/transformations/uk_post_code_spec.rb
302
311
  - spec/etl/transformations_spec.rb
303
- - spec/flow/array_sink_spec.rb
304
- - spec/flow/array_source_spec.rb
305
- - spec/flow/dataset_source_spec.rb
306
- - spec/flow/filter_spec.rb
307
- - spec/flow/mysql_file_serializer_spec.rb
308
- - spec/flow/mysql_file_sink_spec.rb
309
- - spec/flow/mysql_integration_spec.rb
310
- - spec/flow/transformation_chain_spec.rb
311
- - spec/flow/transformation_spec.rb
312
312
  - spec/spec_helper.rb
313
313
  homepage: http://github.com/notonthehighstreet/chicago-etl
314
314
  licenses:
@@ -1,4 +0,0 @@
1
- require 'sequel'
2
- require 'sequel/load_data_infile'
3
- require 'chicago/flow/mysql_file_serializer'
4
- require 'chicago/flow/mysql_file_sink'