beetle_etl 0.0.7 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9d792ed373d43d6c0ab7ec27241eaf18fb59736b
4
- data.tar.gz: 7b08688ce87fbff7eea72b362773444ec4700b2c
3
+ metadata.gz: 788ce4239b271ab02fe67da3642f8f17e6fd275e
4
+ data.tar.gz: e89e37f2bd8ec970599249b73e97f9d66be60555
5
5
  SHA512:
6
- metadata.gz: 7022ab2cc6a60f57f061d3b1acaa8a125da6db7e9cec76421189ae2fa2c4729b4134cb8246a8c14283aeb7769e1ef92a75f480e2d6be3a3e41ba2e8017b7ee2c
7
- data.tar.gz: fa6e2f2f74cf1c4a6a03e53c2d76c962154e8d64f656d32690101bdd4dfa636dda9036213b29dfc675d07e9ed381a6527e94abb462f4f7eabfa3c3f11f279ee1
6
+ metadata.gz: 3df6bfbcadd41a1e98f330aa35c9bcc0701c3450d9425c7c156520cae467cdccbb4adc5075a7923261d972c1341f50c49f3a09e35536270f2cdd165414b14ea7
7
+ data.tar.gz: dd92ef629e21523001a4d8371b0a8b2432845ecb371b0bde278d752b06f989e19c1b61321dc28686220f14a0b428f33b749bab52c119ec1d56284c6a71ae0e44
data/beetle_etl.gemspec CHANGED
@@ -19,10 +19,11 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ['lib']
20
20
 
21
21
  spec.add_runtime_dependency 'sequel', '>= 4.0.0'
22
+ spec.add_runtime_dependency 'activesupport', '>= 4.2.0'
22
23
 
23
24
  spec.add_development_dependency 'bundler', '~> 1.6'
24
25
  spec.add_development_dependency 'rspec', '>= 3.0.0'
25
26
  spec.add_development_dependency 'timecop', '>= 0.7.0'
26
27
  spec.add_development_dependency 'pg', '>= 0.18.0'
27
- spec.add_development_dependency 'activesupport', '>= 4.2.0'
28
+ spec.add_development_dependency 'unindent', '~> 1.0'
28
29
  end
@@ -1,15 +1,34 @@
1
+ require 'active_support/core_ext/hash/deep_merge'
2
+
1
3
  module BeetleETL
2
4
  class Import
3
5
 
4
6
  def run
5
- TaskRunner.new(data_steps).run
6
- BeetleETL.database.transaction do
7
- TaskRunner.new(load_steps).run
8
- end
9
- rescue => e
10
- raise e
7
+ setup
8
+ import
11
9
  ensure
12
- TaskRunner.new(cleanup_steps).run
10
+ cleanup
11
+ end
12
+
13
+ def setup
14
+ transformations.each do |t|
15
+ CreateStage.new(t.table_name, t.relations, t.column_names).run
16
+ end
17
+ end
18
+
19
+ def import
20
+ data_report = AsyncStepRunner.new(data_steps).run
21
+ load_report = BeetleETL.database.transaction do
22
+ AsyncStepRunner.new(load_steps).run
23
+ end
24
+
25
+ data_report.deep_merge load_report
26
+ end
27
+
28
+ def cleanup
29
+ transformations.each do |t|
30
+ DropStage.new(t.table_name).run
31
+ end
13
32
  end
14
33
 
15
34
  private
@@ -17,7 +36,6 @@ module BeetleETL
17
36
  def data_steps
18
37
  transformations.flat_map do |t|
19
38
  [
20
- CreateStage.new(t.table_name, t.relations, t.column_names),
21
39
  Transform.new(t.table_name, t.dependencies, t.query),
22
40
  MapRelations.new(t.table_name, t.relations),
23
41
  TableDiff.new(t.table_name),
@@ -32,10 +50,6 @@ module BeetleETL
32
50
  end
33
51
  end
34
52
 
35
- def cleanup_steps
36
- transformations.map { |t| DropStage.new(t.table_name) }
37
- end
38
-
39
53
  def transformations
40
54
  @transformations ||= TransformationLoader.new.load
41
55
  end
@@ -0,0 +1,65 @@
1
+ module BeetleETL
2
+ class Reporter
3
+
4
+ def initialize(report)
5
+ @report = report
6
+ end
7
+
8
+ def log_summary
9
+ BeetleETL.logger.info(summary)
10
+ end
11
+
12
+ private
13
+
14
+ def summary
15
+ "\n\n" +
16
+ @report.map do |(table_name, steps)|
17
+ total_duration = format_duration(sum_durations(steps))
18
+ [
19
+ table_name,
20
+ seperator("="),
21
+ step_rows(steps).join("\n"),
22
+ seperator("-"),
23
+ total_duration.rjust(line_width)
24
+ ].join("\n")
25
+ end.join("\n\n") + "\n"
26
+ end
27
+
28
+ def step_rows(steps)
29
+ steps.map do |step_name, data|
30
+ label = step_name.split(": ")[1] + ":"
31
+ duration = format_duration(data[:finished_at] - data[:started_at])
32
+ line = duration.rjust(line_width)
33
+ line[2, label.length] = label
34
+ line
35
+ end
36
+ end
37
+
38
+ def format_duration(duration)
39
+ Time.at(duration).utc.strftime("%H:%M:%S")
40
+ end
41
+
42
+ def sum_durations(steps)
43
+ steps.inject(0) do |acc, (_step_name, data)|
44
+ acc + (data[:finished_at] - data[:started_at])
45
+ end
46
+ end
47
+
48
+ def line_width
49
+ # 2 spaces
50
+ # + 1 colon
51
+ # + 1 space
52
+ # + 8 duration
53
+ 12 + longest_step_name_length
54
+ end
55
+
56
+ def longest_step_name_length
57
+ @report.keys.max_by(&:length).length - 1
58
+ end
59
+
60
+ def seperator(character)
61
+ character * line_width
62
+ end
63
+
64
+ end
65
+ end
@@ -0,0 +1,83 @@
1
+ module BeetleETL
2
+ class AsyncStepRunner
3
+
4
+ def initialize(steps)
5
+ @dependency_resolver = DependencyResolver.new(steps)
6
+ @steps = steps
7
+
8
+ @queue = Queue.new
9
+ @completed = Set.new
10
+ @running = Set.new
11
+ end
12
+
13
+ def run
14
+ results = {}
15
+
16
+ until all_steps_complete?
17
+ runnables.each do |step|
18
+ run_step_async(step)
19
+ mark_step_running(step.name)
20
+ end
21
+
22
+ table_name, step_name, step_data = @queue.pop
23
+
24
+ unless results.has_key?(table_name)
25
+ results[table_name] = {}
26
+ end
27
+
28
+ results[table_name][step_name] = step_data
29
+ mark_step_completed(step_name)
30
+ end
31
+
32
+ results
33
+ end
34
+
35
+ private
36
+
37
+ attr_reader :running, :completed
38
+
39
+ def run_step_async(step)
40
+ Thread.new do
41
+ begin
42
+ BeetleETL.logger.info("started step #{step.name}")
43
+
44
+ started_at = Time.now
45
+ step.run
46
+ finished_at = Time.now
47
+
48
+ duration = Time.at(finished_at - started_at).utc.strftime("%H:%M:%S")
49
+ BeetleETL.logger.info("finished #{step.name} in #{duration}")
50
+
51
+ @queue.push [
52
+ step.table_name,
53
+ step.name,
54
+ { started_at: started_at, finished_at: finished_at }
55
+ ]
56
+
57
+ rescue => e
58
+ BeetleETL.logger.fatal(e.message)
59
+ raise e
60
+ end
61
+ end
62
+ end
63
+
64
+ def mark_step_running(step_name)
65
+ running.add(step_name)
66
+ end
67
+
68
+ def mark_step_completed(step_name)
69
+ runnables.delete(step_name)
70
+ completed.add(step_name)
71
+ end
72
+
73
+ def runnables
74
+ resolvables = @dependency_resolver.resolvables(completed)
75
+ resolvables.reject { |r| running.include? r.name }
76
+ end
77
+
78
+ def all_steps_complete?
79
+ @steps.map(&:name).to_set == completed.to_set
80
+ end
81
+
82
+ end
83
+ end
@@ -1,4 +1,8 @@
1
1
  module BeetleETL
2
+
3
+ ColumnDefinitionNotFoundError = Class.new(StandardError)
4
+ NoColumnsDefinedError = Class.new(StandardError)
5
+
2
6
  class CreateStage < Step
3
7
 
4
8
  def initialize(table_name, relations, column_names)
@@ -7,27 +11,38 @@ module BeetleETL
7
11
  @column_names = column_names
8
12
  end
9
13
 
10
- def dependencies
11
- Set.new
12
- end
13
-
14
14
  def run
15
15
  database.execute <<-SQL
16
- CREATE TABLE #{stage_table_name_sql} (
16
+ CREATE UNLOGGED TABLE #{stage_table_name_sql} (
17
17
  id integer,
18
18
  external_id character varying(255),
19
19
  transition character varying(255),
20
20
 
21
- #{[
22
- payload_column_definitions,
23
- relation_column_definitions
24
- ].compact.join(',')}
25
- )
21
+ #{column_definitions}
22
+ );
23
+
24
+ #{index_definitions}
26
25
  SQL
27
26
  end
28
27
 
29
28
  private
30
29
 
30
+ def column_definitions
31
+ definitions = [
32
+ payload_column_definitions,
33
+ relation_column_definitions
34
+ ].compact
35
+
36
+ if definitions.empty?
37
+ raise NoColumnsDefinedError.new <<-MSG
38
+ Transformation for #{table_name} has no column definitions.
39
+ Either add an array of columns or references to other tables.
40
+ MSG
41
+ end
42
+
43
+ definitions.join(',')
44
+ end
45
+
31
46
  def payload_column_definitions
32
47
  definitions = (@column_names - @relations.keys).map do |column_name|
33
48
  "#{column_name} #{column_type(column_name)}"
@@ -45,6 +60,13 @@ module BeetleETL
45
60
  definitions.join(',') if definitions.any?
46
61
  end
47
62
 
63
+ def index_definitions
64
+ index_columns = [:external_id] + @relations.keys.map { |c| "external_#{c}" }
65
+ index_columns.map do |column_name|
66
+ "CREATE INDEX ON #{stage_table_name_sql} (#{column_name})"
67
+ end.join(";")
68
+ end
69
+
48
70
  def column_type(column_name)
49
71
  @column_types ||= Hash[database.schema(public_table_name.to_sym)]
50
72
  .reduce({}) do |acc, (name, schema)|
@@ -52,6 +74,12 @@ module BeetleETL
52
74
  acc
53
75
  end
54
76
 
77
+ unless @column_types.has_key?(column_name)
78
+ raise ColumnDefinitionNotFoundError.new <<-MSG
79
+ Table "#{table_name}" has no column "#{column_name}".
80
+ MSG
81
+ end
82
+
55
83
  @column_types[column_name]
56
84
  end
57
85
 
@@ -1,10 +1,6 @@
1
1
  module BeetleETL
2
2
  class DropStage < Step
3
3
 
4
- def dependencies
5
- Set.new
6
- end
7
-
8
4
  def run
9
5
  database.execute <<-SQL
10
6
  DROP TABLE IF EXISTS #{stage_table_name_sql}
@@ -1,7 +1,5 @@
1
1
  module BeetleETL
2
2
 
3
- DependenciesNotDefinedError = Class.new(StandardError)
4
-
5
3
  class Step
6
4
 
7
5
  include BeetleETL::Naming
@@ -20,7 +18,7 @@ module BeetleETL
20
18
  end
21
19
 
22
20
  def dependencies
23
- raise DependenciesNotDefinedError
21
+ Set.new
24
22
  end
25
23
 
26
24
  def external_source
@@ -8,7 +8,7 @@ module BeetleETL
8
8
  end
9
9
 
10
10
  def dependencies
11
- Set.new(@dependencies.map { |d| self.class.step_name(d) }) << CreateStage.step_name(table_name)
11
+ Set.new(@dependencies.map { |d| self.class.step_name(d) })
12
12
  end
13
13
 
14
14
  def run
@@ -0,0 +1,46 @@
1
+ module BeetleETL
2
+ module Testing
3
+
4
+ TargetTableNotFoundError = Class.new(StandardError)
5
+ NoTransformationFoundError = Class.new(StandardError)
6
+
7
+ def with_stage_tables_for(*table_names, &block)
8
+ table_names.each do |table_name|
9
+ unless BeetleETL.database.table_exists?(table_name)
10
+ raise TargetTableNotFoundError.new <<-MSG
11
+ Missing target table "#{table_name}".
12
+ In order to create stage tables, BeetleETL requires the target tables to exist because they provide the column definitions.
13
+ MSG
14
+ end
15
+ end
16
+
17
+ import = Import.new
18
+ begin
19
+ import.setup
20
+ block.call
21
+ ensure
22
+ import.cleanup
23
+ end
24
+ end
25
+
26
+ def run_transformation(table_name)
27
+ transformations = TransformationLoader.new.load
28
+
29
+ unless transformations.map(&:table_name).include?(table_name)
30
+ raise NoTransformationFoundError.new <<-MSG
31
+ No transformation definition found for table "#{table_name}".
32
+ MSG
33
+ end
34
+
35
+ transformation = transformations.find { |t| t.table_name == table_name }
36
+ transform = Transform.new(transformation.table_name, transformation.dependencies, transformation.query)
37
+ transform.run
38
+ end
39
+
40
+
41
+ def stage_table_name(table_name)
42
+ BeetleETL::Naming.stage_table_name(table_name)
43
+ end
44
+
45
+ end
46
+ end
@@ -1,3 +1,3 @@
1
1
  module BeetleETL
2
- VERSION = "0.0.7"
2
+ VERSION = "0.0.11"
3
3
  end
data/lib/beetle_etl.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'beetle_etl/version'
2
2
 
3
3
  require 'sequel'
4
+ require 'logger'
4
5
 
5
6
  module BeetleETL
6
7
 
@@ -21,10 +22,11 @@ module BeetleETL
21
22
  require 'beetle_etl/steps/load'
22
23
  require 'beetle_etl/steps/drop_stage'
23
24
 
24
- require 'beetle_etl/task_runner/dependency_resolver'
25
- require 'beetle_etl/task_runner/task_runner'
25
+ require 'beetle_etl/step_runner/dependency_resolver'
26
+ require 'beetle_etl/step_runner/async_step_runner'
26
27
 
27
28
  require 'beetle_etl/import'
29
+ require 'beetle_etl/reporter'
28
30
 
29
31
  class Configuration
30
32
  attr_accessor \
@@ -33,10 +35,12 @@ module BeetleETL
33
35
  :transformation_file,
34
36
  :stage_schema,
35
37
  :public_schema,
36
- :external_source
38
+ :external_source,
39
+ :logger
37
40
 
38
41
  def initialize
39
42
  @public_schema = 'public'
43
+ @logger = ::Logger.new(STDOUT)
40
44
  end
41
45
  end
42
46
 
@@ -44,9 +48,9 @@ module BeetleETL
44
48
 
45
49
  def import
46
50
  begin
47
- Import.new.run
48
- rescue Exception => e
49
- raise e
51
+ report = Import.new.run
52
+ Reporter.new(report).log_summary
53
+ report
50
54
  ensure
51
55
  @database.disconnect if @database
52
56
  end
@@ -60,6 +64,10 @@ module BeetleETL
60
64
  @config ||= Configuration.new
61
65
  end
62
66
 
67
+ def logger
68
+ config.logger
69
+ end
70
+
63
71
  def database
64
72
  if config.database
65
73
  config.database
@@ -3,9 +3,13 @@ require 'spec_helper'
3
3
  describe BeetleETL do
4
4
 
5
5
  describe '#import' do
6
- it 'runs the import' do
7
- expect(BeetleETL::Import).to receive_message_chain(:new, :run)
8
- BeetleETL.import
6
+ it 'runs the import with reporting' do
7
+ report = double(:report)
8
+ reporter = double(:reporter, log_summary: nil)
9
+
10
+ expect(BeetleETL::Import).to receive_message_chain(:new, :run).and_return report
11
+ expect(BeetleETL::Reporter).to receive(:new).with(report).and_return reporter
12
+ expect(BeetleETL.import).to eql(report)
9
13
  end
10
14
  end
11
15
 
@@ -1,12 +1,11 @@
1
1
  require 'spec_helper'
2
- require 'tempfile'
3
2
 
4
3
  module BeetleETL
5
4
  describe TransformationLoader do
6
5
 
7
6
  subject { TransformationLoader.new }
8
7
 
9
- before :example do
8
+ before do
10
9
  data_file = tempfile_with_contents <<-FILE
11
10
  import :foo do
12
11
  'foo'
@@ -47,12 +46,5 @@ module BeetleETL
47
46
  end
48
47
  end
49
48
 
50
- def tempfile_with_contents(contents)
51
- Tempfile.new('transform').tap do |file|
52
- file.write(contents)
53
- file.close
54
- end
55
- end
56
-
57
49
  end
58
50
  end
@@ -13,9 +13,9 @@ describe BeetleETL do
13
13
 
14
14
  include ExampleSchema
15
15
 
16
- let!(:time1) { Time.new(2014 , 7 , 17 , 16 , 12).beginning_of_day }
17
- let!(:time2) { Time.new(2015 , 2 , 8 , 22 , 18).beginning_of_day }
18
- let!(:time3) { Time.new(2015 , 11 , 3 , 12 , 17).beginning_of_day }
16
+ let!(:time1) { Time.new(2014 , 7 , 17 , 16 , 12).beginning_of_day }
17
+ let!(:time2) { Time.new(2015 , 2 , 8 , 22 , 18).beginning_of_day }
18
+ let!(:time3) { Time.new(2015 , 11 , 3 , 12 , 17).beginning_of_day }
19
19
 
20
20
  before :each do
21
21
  create_tables
@@ -27,6 +27,7 @@ describe BeetleETL do
27
27
  config.transformation_file = File.expand_path('../example_transform.rb', __FILE__)
28
28
  config.database_config = database_config
29
29
  config.external_source = 'source_name'
30
+ config.logger = Logger.new(Tempfile.new("log"))
30
31
  end
31
32
  end
32
33
 
@@ -0,0 +1,65 @@
1
+ require 'spec_helper'
2
+ require 'unindent'
3
+
4
+ module BeetleETL
5
+ describe Reporter do
6
+
7
+ let(:report) do
8
+ {
9
+ organisations: {
10
+ "organisations: Transform" => {
11
+ :started_at => Time.new(2015, 03, 14, 16, 0),
12
+ :finished_at => Time.new(2015, 03, 14, 16, 10)
13
+ },
14
+ "organisations: MapRelations" => {
15
+ :started_at => Time.new(2015, 03, 14, 17, 0),
16
+ :finished_at => Time.new(2015, 03, 14, 17, 10)
17
+ },
18
+ "organisations: Load" => {
19
+ :started_at => Time.new(2015, 03, 14, 18, 0),
20
+ :finished_at => Time.new(2015, 03, 14, 18, 10)
21
+ },
22
+ },
23
+ departments: {
24
+ "departments: Transform" => {
25
+ :started_at => Time.new(2015, 03, 14, 16, 0),
26
+ :finished_at => Time.new(2015, 03, 14, 16, 12)
27
+ },
28
+ "departments: MapRelations" => {
29
+ :started_at => Time.new(2015, 03, 14, 17, 2),
30
+ :finished_at => Time.new(2015, 03, 14, 17, 10)
31
+ },
32
+ "departments: Load" => {
33
+ :started_at => Time.new(2015, 03, 14, 18, 10),
34
+ :finished_at => Time.new(2015, 03, 14, 19, 21, 39)
35
+ },
36
+ }
37
+ }
38
+ end
39
+
40
+ it "loggs a summary of all step times by table name" do
41
+ expect(BeetleETL.logger).to receive(:info).with <<-LOG.unindent
42
+
43
+
44
+ organisations
45
+ ========================
46
+ Transform: 00:10:00
47
+ MapRelations: 00:10:00
48
+ Load: 00:10:00
49
+ ------------------------
50
+ 00:30:00
51
+
52
+ departments
53
+ ========================
54
+ Transform: 00:12:00
55
+ MapRelations: 00:08:00
56
+ Load: 01:11:39
57
+ ------------------------
58
+ 01:31:39
59
+ LOG
60
+
61
+ Reporter.new(report).log_summary
62
+ end
63
+
64
+ end
65
+ end
data/spec/spec_helper.rb CHANGED
@@ -3,14 +3,18 @@ CodeClimate::TestReporter.start
3
3
 
4
4
  require_relative '../lib/beetle_etl.rb'
5
5
  require_relative 'support/database_helpers.rb'
6
+ require_relative 'support/file_helpers.rb'
6
7
 
7
8
  RSpec.configure do |config|
8
9
 
9
10
  config.include SpecSupport::DatabaseHelpers
11
+ config.include SpecSupport::FileHelpers
12
+
10
13
  config.backtrace_exclusion_patterns = [/rspec-core/]
11
14
 
12
15
  config.around(:each) do |example|
13
16
  BeetleETL.reset
17
+
14
18
  if example.metadata[:feature]
15
19
  example.run
16
20
  else
@@ -47,42 +47,54 @@ module BeetleETL
47
47
  it 'creates a stage table table with all payload columns' do
48
48
  subject.run
49
49
 
50
- columns = Hash[test_database.schema(subject.stage_table_name.to_sym)]
50
+ schema = Hash[test_database.schema(subject.stage_table_name.to_sym)]
51
51
 
52
52
  expected_columns = %i(id external_id some_string some_integer some_float)
53
- expect(columns.keys).to include(*expected_columns)
53
+ expect(schema.keys).to include(*expected_columns)
54
54
 
55
- expect(columns[:id][:db_type]).to eq('integer')
56
- expect(columns[:external_id][:db_type]).to eq('character varying(255)')
57
- expect(columns[:transition][:db_type]).to eq('character varying(255)')
55
+ expect(schema[:id][:db_type]).to eq('integer')
56
+ expect(schema[:external_id][:db_type]).to eq('character varying(255)')
57
+ expect(schema[:transition][:db_type]).to eq('character varying(255)')
58
58
 
59
- expect(columns[:some_string][:db_type]).to eq('character varying(200)')
60
- expect(columns[:some_integer][:db_type]).to eq('integer')
61
- expect(columns[:some_float][:db_type]).to eq('double precision')
59
+ expect(schema[:some_string][:db_type]).to eq('character varying(200)')
60
+ expect(schema[:some_integer][:db_type]).to eq('integer')
61
+ expect(schema[:some_float][:db_type]).to eq('double precision')
62
62
  end
63
63
 
64
64
  it 'adds columns for dependent foreign key associations' do
65
65
  subject.run
66
66
 
67
- columns = Hash[test_database.schema(subject.stage_table_name)]
67
+ schema = Hash[test_database.schema(subject.stage_table_name)]
68
68
 
69
69
  expected_columns = %i(
70
70
  dependee_a_id external_dependee_a_id
71
71
  dependee_b_id external_dependee_b_id
72
72
  )
73
- expect(columns.keys).to include(*expected_columns)
73
+ expect(schema.keys).to include(*expected_columns)
74
74
 
75
- expect(columns[:dependee_a_id][:db_type]).to eq('integer')
76
- expect(columns[:external_dependee_a_id][:db_type]).to eq('character varying(255)')
75
+ expect(schema[:dependee_a_id][:db_type]).to eq('integer')
76
+ expect(schema[:external_dependee_a_id][:db_type]).to eq('character varying(255)')
77
77
 
78
- expect(columns[:dependee_b_id][:db_type]).to eq('integer')
79
- expect(columns[:external_dependee_b_id][:db_type]).to eq('character varying(255)')
78
+ expect(schema[:dependee_b_id][:db_type]).to eq('integer')
79
+ expect(schema[:external_dependee_b_id][:db_type]).to eq('character varying(255)')
80
80
  end
81
81
 
82
82
  it 'does not add foreign key columns twice if defined as payload column' do
83
83
  columns = [:some_string, :dependee_a_id]
84
84
  CreateStage.new(:example_table, @relations, columns).run
85
85
  end
86
+
87
+ it 'raises an error if no columns and no relations are defined' do
88
+ expect do
89
+ CreateStage.new(:example_table, {}, []).run
90
+ end.to raise_error(BeetleETL::NoColumnsDefinedError)
91
+ end
92
+
93
+ it 'raises an error when given columns with no definition' do
94
+ expect do
95
+ CreateStage.new(:example_table, @relations, [:undefined_column]).run
96
+ end.to raise_error(BeetleETL::ColumnDefinitionNotFoundError)
97
+ end
86
98
  end
87
99
 
88
100
  end
@@ -27,8 +27,8 @@ module BeetleETL
27
27
  end
28
28
 
29
29
  describe '#dependencies' do
30
- it 'raises an exception' do
31
- expect { subject.dependencies }.to raise_error(DependenciesNotDefinedError)
30
+ it 'returns an empty set' do
31
+ expect(subject.dependencies).to eql(Set.new)
32
32
  end
33
33
  end
34
34
 
@@ -15,7 +15,6 @@ module BeetleETL
15
15
  [
16
16
  'some_table: Transform',
17
17
  'some_other_table: Transform',
18
- 'example_table: CreateStage',
19
18
  ].to_set
20
19
  )
21
20
  end
@@ -37,7 +37,7 @@ end
37
37
 
38
38
  RSpec::Matchers.define :have_values do |*rows|
39
39
  match do |table_description|
40
- dataset = test_database[table_description]
40
+ dataset = test_database[table_description.to_sym]
41
41
 
42
42
  columns = rows[0].map(&:to_sym)
43
43
  values = rows[1..-1]
@@ -0,0 +1,14 @@
1
+ require "tempfile"
2
+
3
+ module SpecSupport
4
+ module FileHelpers
5
+
6
+ def tempfile_with_contents(contents)
7
+ ::Tempfile.new('transform').tap do |file|
8
+ file.write(contents)
9
+ file.close
10
+ end
11
+ end
12
+
13
+ end
14
+ end
@@ -0,0 +1,76 @@
1
+ require 'byebug'
2
+ require "spec_helper"
3
+ require "beetle_etl/testing"
4
+
5
+ describe "BeetleETL:Testing" do
6
+
7
+ include BeetleETL::Testing
8
+
9
+ before do
10
+ data_file = tempfile_with_contents <<-'FILE'
11
+ import :some_table do
12
+ columns :some_attribute
13
+ end
14
+
15
+ import :organisations do
16
+ references :some_table, on: :some_table_id
17
+ columns :name, :address
18
+
19
+ query <<-SQL
20
+ INSERT INTO #{stage_table} (external_id, address, name)
21
+ VALUES ('external_id', 'address', 'name')
22
+ SQL
23
+ end
24
+ FILE
25
+
26
+ BeetleETL.configure do |config|
27
+ config.database = test_database
28
+ config.transformation_file = data_file.path
29
+ end
30
+ end
31
+
32
+ context "with properly defined target tables" do
33
+ before do
34
+ test_database.create_table :some_table do
35
+ primary_key :id
36
+ String :external_id, size: 255
37
+ String :some_attribute, size: 255
38
+ end
39
+
40
+ test_database.create_table :organisations do
41
+ primary_key :id
42
+ String :external_id, size: 255
43
+ String :name, size: 255
44
+ String :address, size: 255
45
+ end
46
+ end
47
+
48
+ it "makes stage tables available in the block" do
49
+ with_stage_tables_for(:organisations, :some_table) do
50
+ expect(test_database.table_exists?(stage_table_name(:organisations))).to be_truthy
51
+ expect(test_database.table_exists?(stage_table_name(:some_table))).to be_truthy
52
+ end
53
+
54
+ expect(test_database.table_exists?(stage_table_name(:organisations))).to be_falsey
55
+ expect(test_database.table_exists?(stage_table_name(:some_table))).to be_falsey
56
+ end
57
+
58
+ it "allows the transformation to be run insiede the block" do
59
+ with_stage_tables_for(:organisations, :some_table) do
60
+ run_transformation(:organisations)
61
+
62
+ expect(stage_table_name(:organisations)).to have_values(
63
+ [ :external_id , :address , :name ] ,
64
+ [ "external_id" , "address" , "name" ]
65
+ )
66
+ end
67
+ end
68
+
69
+ end
70
+
71
+ it "raises an error if the target table cannot be found" do
72
+ expect do
73
+ with_stage_tables_for(:organisations)
74
+ end.to raise_error(BeetleETL::Testing::TargetTableNotFoundError)
75
+ end
76
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: beetle_etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luciano Maiwald
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-28 00:00:00.000000000 Z
11
+ date: 2015-04-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sequel
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: 4.0.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: activesupport
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 4.2.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 4.2.0
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: bundler
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -81,19 +95,19 @@ dependencies:
81
95
  - !ruby/object:Gem::Version
82
96
  version: 0.18.0
83
97
  - !ruby/object:Gem::Dependency
84
- name: activesupport
98
+ name: unindent
85
99
  requirement: !ruby/object:Gem::Requirement
86
100
  requirements:
87
- - - ">="
101
+ - - "~>"
88
102
  - !ruby/object:Gem::Version
89
- version: 4.2.0
103
+ version: '1.0'
90
104
  type: :development
91
105
  prerelease: false
92
106
  version_requirements: !ruby/object:Gem::Requirement
93
107
  requirements:
94
- - - ">="
108
+ - - "~>"
95
109
  - !ruby/object:Gem::Version
96
- version: 4.2.0
110
+ version: '1.0'
97
111
  description: Taking care of synchronizing external data with referential data in your
98
112
  application.
99
113
  email:
@@ -115,6 +129,9 @@ files:
115
129
  - lib/beetle_etl/dsl/transformation_loader.rb
116
130
  - lib/beetle_etl/import.rb
117
131
  - lib/beetle_etl/naming.rb
132
+ - lib/beetle_etl/reporter.rb
133
+ - lib/beetle_etl/step_runner/async_step_runner.rb
134
+ - lib/beetle_etl/step_runner/dependency_resolver.rb
118
135
  - lib/beetle_etl/steps/assign_ids.rb
119
136
  - lib/beetle_etl/steps/create_stage.rb
120
137
  - lib/beetle_etl/steps/drop_stage.rb
@@ -123,8 +140,7 @@ files:
123
140
  - lib/beetle_etl/steps/step.rb
124
141
  - lib/beetle_etl/steps/table_diff.rb
125
142
  - lib/beetle_etl/steps/transform.rb
126
- - lib/beetle_etl/task_runner/dependency_resolver.rb
127
- - lib/beetle_etl/task_runner/task_runner.rb
143
+ - lib/beetle_etl/testing.rb
128
144
  - lib/beetle_etl/version.rb
129
145
  - script/postgres
130
146
  - spec/beetle_etl_spec.rb
@@ -134,6 +150,7 @@ files:
134
150
  - spec/feature/example_schema.rb
135
151
  - spec/feature/example_transform.rb
136
152
  - spec/feature/feature_spec.rb
153
+ - spec/reporter_spec.rb
137
154
  - spec/spec_helper.rb
138
155
  - spec/steps/assign_ids_spec.rb
139
156
  - spec/steps/create_stage_spec.rb
@@ -145,7 +162,9 @@ files:
145
162
  - spec/support/database.yml.example
146
163
  - spec/support/database.yml.travis
147
164
  - spec/support/database_helpers.rb
165
+ - spec/support/file_helpers.rb
148
166
  - spec/task_runner/dependency_resolver_spec.rb
167
+ - spec/testing_spec.rb
149
168
  homepage: https://github.com/maiwald/beetle_etl
150
169
  licenses:
151
170
  - MIT
@@ -178,6 +197,7 @@ test_files:
178
197
  - spec/feature/example_schema.rb
179
198
  - spec/feature/example_transform.rb
180
199
  - spec/feature/feature_spec.rb
200
+ - spec/reporter_spec.rb
181
201
  - spec/spec_helper.rb
182
202
  - spec/steps/assign_ids_spec.rb
183
203
  - spec/steps/create_stage_spec.rb
@@ -189,4 +209,6 @@ test_files:
189
209
  - spec/support/database.yml.example
190
210
  - spec/support/database.yml.travis
191
211
  - spec/support/database_helpers.rb
212
+ - spec/support/file_helpers.rb
192
213
  - spec/task_runner/dependency_resolver_spec.rb
214
+ - spec/testing_spec.rb
@@ -1,71 +0,0 @@
1
- module BeetleETL
2
- class TaskRunner
3
-
4
- def initialize(tasks)
5
- @dependency_resolver = DependencyResolver.new(tasks)
6
- @tasks = tasks
7
-
8
- @queue = Queue.new
9
- @completed = Set.new
10
- @running = Set.new
11
- end
12
-
13
- def run
14
- results = {}
15
-
16
- until all_tasks_complete?
17
- runnables.each do |task|
18
- run_task_async(task)
19
- mark_task_running(task.name)
20
- end
21
-
22
- task_name, task_data = @queue.pop
23
- results[task_name] = task_data
24
- mark_task_completed(task_name)
25
- end
26
-
27
- results
28
- end
29
-
30
- private
31
-
32
- attr_reader :running, :completed
33
-
34
- def run_task_async(task)
35
- Thread.new do
36
- started_at = now
37
- result = task.run
38
- finished_at = now
39
-
40
- @queue.push [task.name, {
41
- started_at: started_at,
42
- finished_at: finished_at,
43
- result: result,
44
- }]
45
- end
46
- end
47
-
48
- def mark_task_running(task_name)
49
- running.add(task_name)
50
- end
51
-
52
- def mark_task_completed(task_name)
53
- runnables.delete(task_name)
54
- completed.add(task_name)
55
- end
56
-
57
- def runnables
58
- resolvables = @dependency_resolver.resolvables(completed)
59
- resolvables.reject { |r| running.include? r.name }
60
- end
61
-
62
- def all_tasks_complete?
63
- @tasks.map(&:name).to_set == completed.to_set
64
- end
65
-
66
- def now
67
- Time.now
68
- end
69
-
70
- end
71
- end