beetle_etl 0.0.7 → 0.0.11

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9d792ed373d43d6c0ab7ec27241eaf18fb59736b
4
- data.tar.gz: 7b08688ce87fbff7eea72b362773444ec4700b2c
3
+ metadata.gz: 788ce4239b271ab02fe67da3642f8f17e6fd275e
4
+ data.tar.gz: e89e37f2bd8ec970599249b73e97f9d66be60555
5
5
  SHA512:
6
- metadata.gz: 7022ab2cc6a60f57f061d3b1acaa8a125da6db7e9cec76421189ae2fa2c4729b4134cb8246a8c14283aeb7769e1ef92a75f480e2d6be3a3e41ba2e8017b7ee2c
7
- data.tar.gz: fa6e2f2f74cf1c4a6a03e53c2d76c962154e8d64f656d32690101bdd4dfa636dda9036213b29dfc675d07e9ed381a6527e94abb462f4f7eabfa3c3f11f279ee1
6
+ metadata.gz: 3df6bfbcadd41a1e98f330aa35c9bcc0701c3450d9425c7c156520cae467cdccbb4adc5075a7923261d972c1341f50c49f3a09e35536270f2cdd165414b14ea7
7
+ data.tar.gz: dd92ef629e21523001a4d8371b0a8b2432845ecb371b0bde278d752b06f989e19c1b61321dc28686220f14a0b428f33b749bab52c119ec1d56284c6a71ae0e44
data/beetle_etl.gemspec CHANGED
@@ -19,10 +19,11 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ['lib']
20
20
 
21
21
  spec.add_runtime_dependency 'sequel', '>= 4.0.0'
22
+ spec.add_runtime_dependency 'activesupport', '>= 4.2.0'
22
23
 
23
24
  spec.add_development_dependency 'bundler', '~> 1.6'
24
25
  spec.add_development_dependency 'rspec', '>= 3.0.0'
25
26
  spec.add_development_dependency 'timecop', '>= 0.7.0'
26
27
  spec.add_development_dependency 'pg', '>= 0.18.0'
27
- spec.add_development_dependency 'activesupport', '>= 4.2.0'
28
+ spec.add_development_dependency 'unindent', '~> 1.0'
28
29
  end
@@ -1,15 +1,34 @@
1
+ require 'active_support/core_ext/hash/deep_merge'
2
+
1
3
  module BeetleETL
2
4
  class Import
3
5
 
4
6
  def run
5
- TaskRunner.new(data_steps).run
6
- BeetleETL.database.transaction do
7
- TaskRunner.new(load_steps).run
8
- end
9
- rescue => e
10
- raise e
7
+ setup
8
+ import
11
9
  ensure
12
- TaskRunner.new(cleanup_steps).run
10
+ cleanup
11
+ end
12
+
13
+ def setup
14
+ transformations.each do |t|
15
+ CreateStage.new(t.table_name, t.relations, t.column_names).run
16
+ end
17
+ end
18
+
19
+ def import
20
+ data_report = AsyncStepRunner.new(data_steps).run
21
+ load_report = BeetleETL.database.transaction do
22
+ AsyncStepRunner.new(load_steps).run
23
+ end
24
+
25
+ data_report.deep_merge load_report
26
+ end
27
+
28
+ def cleanup
29
+ transformations.each do |t|
30
+ DropStage.new(t.table_name).run
31
+ end
13
32
  end
14
33
 
15
34
  private
@@ -17,7 +36,6 @@ module BeetleETL
17
36
  def data_steps
18
37
  transformations.flat_map do |t|
19
38
  [
20
- CreateStage.new(t.table_name, t.relations, t.column_names),
21
39
  Transform.new(t.table_name, t.dependencies, t.query),
22
40
  MapRelations.new(t.table_name, t.relations),
23
41
  TableDiff.new(t.table_name),
@@ -32,10 +50,6 @@ module BeetleETL
32
50
  end
33
51
  end
34
52
 
35
- def cleanup_steps
36
- transformations.map { |t| DropStage.new(t.table_name) }
37
- end
38
-
39
53
  def transformations
40
54
  @transformations ||= TransformationLoader.new.load
41
55
  end
@@ -0,0 +1,65 @@
1
+ module BeetleETL
2
+ class Reporter
3
+
4
+ def initialize(report)
5
+ @report = report
6
+ end
7
+
8
+ def log_summary
9
+ BeetleETL.logger.info(summary)
10
+ end
11
+
12
+ private
13
+
14
+ def summary
15
+ "\n\n" +
16
+ @report.map do |(table_name, steps)|
17
+ total_duration = format_duration(sum_durations(steps))
18
+ [
19
+ table_name,
20
+ seperator("="),
21
+ step_rows(steps).join("\n"),
22
+ seperator("-"),
23
+ total_duration.rjust(line_width)
24
+ ].join("\n")
25
+ end.join("\n\n") + "\n"
26
+ end
27
+
28
+ def step_rows(steps)
29
+ steps.map do |step_name, data|
30
+ label = step_name.split(": ")[1] + ":"
31
+ duration = format_duration(data[:finished_at] - data[:started_at])
32
+ line = duration.rjust(line_width)
33
+ line[2, label.length] = label
34
+ line
35
+ end
36
+ end
37
+
38
+ def format_duration(duration)
39
+ Time.at(duration).utc.strftime("%H:%M:%S")
40
+ end
41
+
42
+ def sum_durations(steps)
43
+ steps.inject(0) do |acc, (_step_name, data)|
44
+ acc + (data[:finished_at] - data[:started_at])
45
+ end
46
+ end
47
+
48
+ def line_width
49
+ # 2 spaces
50
+ # + 1 colon
51
+ # + 1 space
52
+ # + 8 duration
53
+ 12 + longest_step_name_length
54
+ end
55
+
56
+ def longest_step_name_length
57
+ @report.keys.max_by(&:length).length - 1
58
+ end
59
+
60
+ def seperator(character)
61
+ character * line_width
62
+ end
63
+
64
+ end
65
+ end
@@ -0,0 +1,83 @@
1
+ module BeetleETL
2
+ class AsyncStepRunner
3
+
4
+ def initialize(steps)
5
+ @dependency_resolver = DependencyResolver.new(steps)
6
+ @steps = steps
7
+
8
+ @queue = Queue.new
9
+ @completed = Set.new
10
+ @running = Set.new
11
+ end
12
+
13
+ def run
14
+ results = {}
15
+
16
+ until all_steps_complete?
17
+ runnables.each do |step|
18
+ run_step_async(step)
19
+ mark_step_running(step.name)
20
+ end
21
+
22
+ table_name, step_name, step_data = @queue.pop
23
+
24
+ unless results.has_key?(table_name)
25
+ results[table_name] = {}
26
+ end
27
+
28
+ results[table_name][step_name] = step_data
29
+ mark_step_completed(step_name)
30
+ end
31
+
32
+ results
33
+ end
34
+
35
+ private
36
+
37
+ attr_reader :running, :completed
38
+
39
+ def run_step_async(step)
40
+ Thread.new do
41
+ begin
42
+ BeetleETL.logger.info("started step #{step.name}")
43
+
44
+ started_at = Time.now
45
+ step.run
46
+ finished_at = Time.now
47
+
48
+ duration = Time.at(finished_at - started_at).utc.strftime("%H:%M:%S")
49
+ BeetleETL.logger.info("finished #{step.name} in #{duration}")
50
+
51
+ @queue.push [
52
+ step.table_name,
53
+ step.name,
54
+ { started_at: started_at, finished_at: finished_at }
55
+ ]
56
+
57
+ rescue => e
58
+ BeetleETL.logger.fatal(e.message)
59
+ raise e
60
+ end
61
+ end
62
+ end
63
+
64
+ def mark_step_running(step_name)
65
+ running.add(step_name)
66
+ end
67
+
68
+ def mark_step_completed(step_name)
69
+ runnables.delete(step_name)
70
+ completed.add(step_name)
71
+ end
72
+
73
+ def runnables
74
+ resolvables = @dependency_resolver.resolvables(completed)
75
+ resolvables.reject { |r| running.include? r.name }
76
+ end
77
+
78
+ def all_steps_complete?
79
+ @steps.map(&:name).to_set == completed.to_set
80
+ end
81
+
82
+ end
83
+ end
@@ -1,4 +1,8 @@
1
1
  module BeetleETL
2
+
3
+ ColumnDefinitionNotFoundError = Class.new(StandardError)
4
+ NoColumnsDefinedError = Class.new(StandardError)
5
+
2
6
  class CreateStage < Step
3
7
 
4
8
  def initialize(table_name, relations, column_names)
@@ -7,27 +11,38 @@ module BeetleETL
7
11
  @column_names = column_names
8
12
  end
9
13
 
10
- def dependencies
11
- Set.new
12
- end
13
-
14
14
  def run
15
15
  database.execute <<-SQL
16
- CREATE TABLE #{stage_table_name_sql} (
16
+ CREATE UNLOGGED TABLE #{stage_table_name_sql} (
17
17
  id integer,
18
18
  external_id character varying(255),
19
19
  transition character varying(255),
20
20
 
21
- #{[
22
- payload_column_definitions,
23
- relation_column_definitions
24
- ].compact.join(',')}
25
- )
21
+ #{column_definitions}
22
+ );
23
+
24
+ #{index_definitions}
26
25
  SQL
27
26
  end
28
27
 
29
28
  private
30
29
 
30
+ def column_definitions
31
+ definitions = [
32
+ payload_column_definitions,
33
+ relation_column_definitions
34
+ ].compact
35
+
36
+ if definitions.empty?
37
+ raise NoColumnsDefinedError.new <<-MSG
38
+ Transformation for #{table_name} has no column definitions.
39
+ Either add an array of columns or references to other tables.
40
+ MSG
41
+ end
42
+
43
+ definitions.join(',')
44
+ end
45
+
31
46
  def payload_column_definitions
32
47
  definitions = (@column_names - @relations.keys).map do |column_name|
33
48
  "#{column_name} #{column_type(column_name)}"
@@ -45,6 +60,13 @@ module BeetleETL
45
60
  definitions.join(',') if definitions.any?
46
61
  end
47
62
 
63
+ def index_definitions
64
+ index_columns = [:external_id] + @relations.keys.map { |c| "external_#{c}" }
65
+ index_columns.map do |column_name|
66
+ "CREATE INDEX ON #{stage_table_name_sql} (#{column_name})"
67
+ end.join(";")
68
+ end
69
+
48
70
  def column_type(column_name)
49
71
  @column_types ||= Hash[database.schema(public_table_name.to_sym)]
50
72
  .reduce({}) do |acc, (name, schema)|
@@ -52,6 +74,12 @@ module BeetleETL
52
74
  acc
53
75
  end
54
76
 
77
+ unless @column_types.has_key?(column_name)
78
+ raise ColumnDefinitionNotFoundError.new <<-MSG
79
+ Table "#{table_name}" has no column "#{column_name}".
80
+ MSG
81
+ end
82
+
55
83
  @column_types[column_name]
56
84
  end
57
85
 
@@ -1,10 +1,6 @@
1
1
  module BeetleETL
2
2
  class DropStage < Step
3
3
 
4
- def dependencies
5
- Set.new
6
- end
7
-
8
4
  def run
9
5
  database.execute <<-SQL
10
6
  DROP TABLE IF EXISTS #{stage_table_name_sql}
@@ -1,7 +1,5 @@
1
1
  module BeetleETL
2
2
 
3
- DependenciesNotDefinedError = Class.new(StandardError)
4
-
5
3
  class Step
6
4
 
7
5
  include BeetleETL::Naming
@@ -20,7 +18,7 @@ module BeetleETL
20
18
  end
21
19
 
22
20
  def dependencies
23
- raise DependenciesNotDefinedError
21
+ Set.new
24
22
  end
25
23
 
26
24
  def external_source
@@ -8,7 +8,7 @@ module BeetleETL
8
8
  end
9
9
 
10
10
  def dependencies
11
- Set.new(@dependencies.map { |d| self.class.step_name(d) }) << CreateStage.step_name(table_name)
11
+ Set.new(@dependencies.map { |d| self.class.step_name(d) })
12
12
  end
13
13
 
14
14
  def run
@@ -0,0 +1,46 @@
1
+ module BeetleETL
2
+ module Testing
3
+
4
+ TargetTableNotFoundError = Class.new(StandardError)
5
+ NoTransformationFoundError = Class.new(StandardError)
6
+
7
+ def with_stage_tables_for(*table_names, &block)
8
+ table_names.each do |table_name|
9
+ unless BeetleETL.database.table_exists?(table_name)
10
+ raise TargetTableNotFoundError.new <<-MSG
11
+ Missing target table "#{table_name}".
12
+ In order to create stage tables, BeetleETL requires the target tables to exist because they provide the column definitions.
13
+ MSG
14
+ end
15
+ end
16
+
17
+ import = Import.new
18
+ begin
19
+ import.setup
20
+ block.call
21
+ ensure
22
+ import.cleanup
23
+ end
24
+ end
25
+
26
+ def run_transformation(table_name)
27
+ transformations = TransformationLoader.new.load
28
+
29
+ unless transformations.map(&:table_name).include?(table_name)
30
+ raise NoTransformationFoundError.new <<-MSG
31
+ No transformation definition found for table "#{table_name}".
32
+ MSG
33
+ end
34
+
35
+ transformation = transformations.find { |t| t.table_name == table_name }
36
+ transform = Transform.new(transformation.table_name, transformation.dependencies, transformation.query)
37
+ transform.run
38
+ end
39
+
40
+
41
+ def stage_table_name(table_name)
42
+ BeetleETL::Naming.stage_table_name(table_name)
43
+ end
44
+
45
+ end
46
+ end
@@ -1,3 +1,3 @@
1
1
  module BeetleETL
2
- VERSION = "0.0.7"
2
+ VERSION = "0.0.11"
3
3
  end
data/lib/beetle_etl.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'beetle_etl/version'
2
2
 
3
3
  require 'sequel'
4
+ require 'logger'
4
5
 
5
6
  module BeetleETL
6
7
 
@@ -21,10 +22,11 @@ module BeetleETL
21
22
  require 'beetle_etl/steps/load'
22
23
  require 'beetle_etl/steps/drop_stage'
23
24
 
24
- require 'beetle_etl/task_runner/dependency_resolver'
25
- require 'beetle_etl/task_runner/task_runner'
25
+ require 'beetle_etl/step_runner/dependency_resolver'
26
+ require 'beetle_etl/step_runner/async_step_runner'
26
27
 
27
28
  require 'beetle_etl/import'
29
+ require 'beetle_etl/reporter'
28
30
 
29
31
  class Configuration
30
32
  attr_accessor \
@@ -33,10 +35,12 @@ module BeetleETL
33
35
  :transformation_file,
34
36
  :stage_schema,
35
37
  :public_schema,
36
- :external_source
38
+ :external_source,
39
+ :logger
37
40
 
38
41
  def initialize
39
42
  @public_schema = 'public'
43
+ @logger = ::Logger.new(STDOUT)
40
44
  end
41
45
  end
42
46
 
@@ -44,9 +48,9 @@ module BeetleETL
44
48
 
45
49
  def import
46
50
  begin
47
- Import.new.run
48
- rescue Exception => e
49
- raise e
51
+ report = Import.new.run
52
+ Reporter.new(report).log_summary
53
+ report
50
54
  ensure
51
55
  @database.disconnect if @database
52
56
  end
@@ -60,6 +64,10 @@ module BeetleETL
60
64
  @config ||= Configuration.new
61
65
  end
62
66
 
67
+ def logger
68
+ config.logger
69
+ end
70
+
63
71
  def database
64
72
  if config.database
65
73
  config.database
@@ -3,9 +3,13 @@ require 'spec_helper'
3
3
  describe BeetleETL do
4
4
 
5
5
  describe '#import' do
6
- it 'runs the import' do
7
- expect(BeetleETL::Import).to receive_message_chain(:new, :run)
8
- BeetleETL.import
6
+ it 'runs the import with reporting' do
7
+ report = double(:report)
8
+ reporter = double(:reporter, log_summary: nil)
9
+
10
+ expect(BeetleETL::Import).to receive_message_chain(:new, :run).and_return report
11
+ expect(BeetleETL::Reporter).to receive(:new).with(report).and_return reporter
12
+ expect(BeetleETL.import).to eql(report)
9
13
  end
10
14
  end
11
15
 
@@ -1,12 +1,11 @@
1
1
  require 'spec_helper'
2
- require 'tempfile'
3
2
 
4
3
  module BeetleETL
5
4
  describe TransformationLoader do
6
5
 
7
6
  subject { TransformationLoader.new }
8
7
 
9
- before :example do
8
+ before do
10
9
  data_file = tempfile_with_contents <<-FILE
11
10
  import :foo do
12
11
  'foo'
@@ -47,12 +46,5 @@ module BeetleETL
47
46
  end
48
47
  end
49
48
 
50
- def tempfile_with_contents(contents)
51
- Tempfile.new('transform').tap do |file|
52
- file.write(contents)
53
- file.close
54
- end
55
- end
56
-
57
49
  end
58
50
  end
@@ -13,9 +13,9 @@ describe BeetleETL do
13
13
 
14
14
  include ExampleSchema
15
15
 
16
- let!(:time1) { Time.new(2014 , 7 , 17 , 16 , 12).beginning_of_day }
17
- let!(:time2) { Time.new(2015 , 2 , 8 , 22 , 18).beginning_of_day }
18
- let!(:time3) { Time.new(2015 , 11 , 3 , 12 , 17).beginning_of_day }
16
+ let!(:time1) { Time.new(2014 , 7 , 17 , 16 , 12).beginning_of_day }
17
+ let!(:time2) { Time.new(2015 , 2 , 8 , 22 , 18).beginning_of_day }
18
+ let!(:time3) { Time.new(2015 , 11 , 3 , 12 , 17).beginning_of_day }
19
19
 
20
20
  before :each do
21
21
  create_tables
@@ -27,6 +27,7 @@ describe BeetleETL do
27
27
  config.transformation_file = File.expand_path('../example_transform.rb', __FILE__)
28
28
  config.database_config = database_config
29
29
  config.external_source = 'source_name'
30
+ config.logger = Logger.new(Tempfile.new("log"))
30
31
  end
31
32
  end
32
33
 
@@ -0,0 +1,65 @@
1
+ require 'spec_helper'
2
+ require 'unindent'
3
+
4
+ module BeetleETL
5
+ describe Reporter do
6
+
7
+ let(:report) do
8
+ {
9
+ organisations: {
10
+ "organisations: Transform" => {
11
+ :started_at => Time.new(2015, 03, 14, 16, 0),
12
+ :finished_at => Time.new(2015, 03, 14, 16, 10)
13
+ },
14
+ "organisations: MapRelations" => {
15
+ :started_at => Time.new(2015, 03, 14, 17, 0),
16
+ :finished_at => Time.new(2015, 03, 14, 17, 10)
17
+ },
18
+ "organisations: Load" => {
19
+ :started_at => Time.new(2015, 03, 14, 18, 0),
20
+ :finished_at => Time.new(2015, 03, 14, 18, 10)
21
+ },
22
+ },
23
+ departments: {
24
+ "departments: Transform" => {
25
+ :started_at => Time.new(2015, 03, 14, 16, 0),
26
+ :finished_at => Time.new(2015, 03, 14, 16, 12)
27
+ },
28
+ "departments: MapRelations" => {
29
+ :started_at => Time.new(2015, 03, 14, 17, 2),
30
+ :finished_at => Time.new(2015, 03, 14, 17, 10)
31
+ },
32
+ "departments: Load" => {
33
+ :started_at => Time.new(2015, 03, 14, 18, 10),
34
+ :finished_at => Time.new(2015, 03, 14, 19, 21, 39)
35
+ },
36
+ }
37
+ }
38
+ end
39
+
40
+ it "loggs a summary of all step times by table name" do
41
+ expect(BeetleETL.logger).to receive(:info).with <<-LOG.unindent
42
+
43
+
44
+ organisations
45
+ ========================
46
+ Transform: 00:10:00
47
+ MapRelations: 00:10:00
48
+ Load: 00:10:00
49
+ ------------------------
50
+ 00:30:00
51
+
52
+ departments
53
+ ========================
54
+ Transform: 00:12:00
55
+ MapRelations: 00:08:00
56
+ Load: 01:11:39
57
+ ------------------------
58
+ 01:31:39
59
+ LOG
60
+
61
+ Reporter.new(report).log_summary
62
+ end
63
+
64
+ end
65
+ end
data/spec/spec_helper.rb CHANGED
@@ -3,14 +3,18 @@ CodeClimate::TestReporter.start
3
3
 
4
4
  require_relative '../lib/beetle_etl.rb'
5
5
  require_relative 'support/database_helpers.rb'
6
+ require_relative 'support/file_helpers.rb'
6
7
 
7
8
  RSpec.configure do |config|
8
9
 
9
10
  config.include SpecSupport::DatabaseHelpers
11
+ config.include SpecSupport::FileHelpers
12
+
10
13
  config.backtrace_exclusion_patterns = [/rspec-core/]
11
14
 
12
15
  config.around(:each) do |example|
13
16
  BeetleETL.reset
17
+
14
18
  if example.metadata[:feature]
15
19
  example.run
16
20
  else
@@ -47,42 +47,54 @@ module BeetleETL
47
47
  it 'creates a stage table table with all payload columns' do
48
48
  subject.run
49
49
 
50
- columns = Hash[test_database.schema(subject.stage_table_name.to_sym)]
50
+ schema = Hash[test_database.schema(subject.stage_table_name.to_sym)]
51
51
 
52
52
  expected_columns = %i(id external_id some_string some_integer some_float)
53
- expect(columns.keys).to include(*expected_columns)
53
+ expect(schema.keys).to include(*expected_columns)
54
54
 
55
- expect(columns[:id][:db_type]).to eq('integer')
56
- expect(columns[:external_id][:db_type]).to eq('character varying(255)')
57
- expect(columns[:transition][:db_type]).to eq('character varying(255)')
55
+ expect(schema[:id][:db_type]).to eq('integer')
56
+ expect(schema[:external_id][:db_type]).to eq('character varying(255)')
57
+ expect(schema[:transition][:db_type]).to eq('character varying(255)')
58
58
 
59
- expect(columns[:some_string][:db_type]).to eq('character varying(200)')
60
- expect(columns[:some_integer][:db_type]).to eq('integer')
61
- expect(columns[:some_float][:db_type]).to eq('double precision')
59
+ expect(schema[:some_string][:db_type]).to eq('character varying(200)')
60
+ expect(schema[:some_integer][:db_type]).to eq('integer')
61
+ expect(schema[:some_float][:db_type]).to eq('double precision')
62
62
  end
63
63
 
64
64
  it 'adds columns for dependent foreign key associations' do
65
65
  subject.run
66
66
 
67
- columns = Hash[test_database.schema(subject.stage_table_name)]
67
+ schema = Hash[test_database.schema(subject.stage_table_name)]
68
68
 
69
69
  expected_columns = %i(
70
70
  dependee_a_id external_dependee_a_id
71
71
  dependee_b_id external_dependee_b_id
72
72
  )
73
- expect(columns.keys).to include(*expected_columns)
73
+ expect(schema.keys).to include(*expected_columns)
74
74
 
75
- expect(columns[:dependee_a_id][:db_type]).to eq('integer')
76
- expect(columns[:external_dependee_a_id][:db_type]).to eq('character varying(255)')
75
+ expect(schema[:dependee_a_id][:db_type]).to eq('integer')
76
+ expect(schema[:external_dependee_a_id][:db_type]).to eq('character varying(255)')
77
77
 
78
- expect(columns[:dependee_b_id][:db_type]).to eq('integer')
79
- expect(columns[:external_dependee_b_id][:db_type]).to eq('character varying(255)')
78
+ expect(schema[:dependee_b_id][:db_type]).to eq('integer')
79
+ expect(schema[:external_dependee_b_id][:db_type]).to eq('character varying(255)')
80
80
  end
81
81
 
82
82
  it 'does not add foreign key columns twice if defined as payload column' do
83
83
  columns = [:some_string, :dependee_a_id]
84
84
  CreateStage.new(:example_table, @relations, columns).run
85
85
  end
86
+
87
+ it 'raises an error if no columns and no relations are defined' do
88
+ expect do
89
+ CreateStage.new(:example_table, {}, []).run
90
+ end.to raise_error(BeetleETL::NoColumnsDefinedError)
91
+ end
92
+
93
+ it 'raises an error when given columns with no definition' do
94
+ expect do
95
+ CreateStage.new(:example_table, @relations, [:undefined_column]).run
96
+ end.to raise_error(BeetleETL::ColumnDefinitionNotFoundError)
97
+ end
86
98
  end
87
99
 
88
100
  end
@@ -27,8 +27,8 @@ module BeetleETL
27
27
  end
28
28
 
29
29
  describe '#dependencies' do
30
- it 'raises an exception' do
31
- expect { subject.dependencies }.to raise_error(DependenciesNotDefinedError)
30
+ it 'returns an empty set' do
31
+ expect(subject.dependencies).to eql(Set.new)
32
32
  end
33
33
  end
34
34
 
@@ -15,7 +15,6 @@ module BeetleETL
15
15
  [
16
16
  'some_table: Transform',
17
17
  'some_other_table: Transform',
18
- 'example_table: CreateStage',
19
18
  ].to_set
20
19
  )
21
20
  end
@@ -37,7 +37,7 @@ end
37
37
 
38
38
  RSpec::Matchers.define :have_values do |*rows|
39
39
  match do |table_description|
40
- dataset = test_database[table_description]
40
+ dataset = test_database[table_description.to_sym]
41
41
 
42
42
  columns = rows[0].map(&:to_sym)
43
43
  values = rows[1..-1]
@@ -0,0 +1,14 @@
1
+ require "tempfile"
2
+
3
+ module SpecSupport
4
+ module FileHelpers
5
+
6
+ def tempfile_with_contents(contents)
7
+ ::Tempfile.new('transform').tap do |file|
8
+ file.write(contents)
9
+ file.close
10
+ end
11
+ end
12
+
13
+ end
14
+ end
@@ -0,0 +1,76 @@
1
+ require 'byebug'
2
+ require "spec_helper"
3
+ require "beetle_etl/testing"
4
+
5
+ describe "BeetleETL:Testing" do
6
+
7
+ include BeetleETL::Testing
8
+
9
+ before do
10
+ data_file = tempfile_with_contents <<-'FILE'
11
+ import :some_table do
12
+ columns :some_attribute
13
+ end
14
+
15
+ import :organisations do
16
+ references :some_table, on: :some_table_id
17
+ columns :name, :address
18
+
19
+ query <<-SQL
20
+ INSERT INTO #{stage_table} (external_id, address, name)
21
+ VALUES ('external_id', 'address', 'name')
22
+ SQL
23
+ end
24
+ FILE
25
+
26
+ BeetleETL.configure do |config|
27
+ config.database = test_database
28
+ config.transformation_file = data_file.path
29
+ end
30
+ end
31
+
32
+ context "with properly defined target tables" do
33
+ before do
34
+ test_database.create_table :some_table do
35
+ primary_key :id
36
+ String :external_id, size: 255
37
+ String :some_attribute, size: 255
38
+ end
39
+
40
+ test_database.create_table :organisations do
41
+ primary_key :id
42
+ String :external_id, size: 255
43
+ String :name, size: 255
44
+ String :address, size: 255
45
+ end
46
+ end
47
+
48
+ it "makes stage tables available in the block" do
49
+ with_stage_tables_for(:organisations, :some_table) do
50
+ expect(test_database.table_exists?(stage_table_name(:organisations))).to be_truthy
51
+ expect(test_database.table_exists?(stage_table_name(:some_table))).to be_truthy
52
+ end
53
+
54
+ expect(test_database.table_exists?(stage_table_name(:organisations))).to be_falsey
55
+ expect(test_database.table_exists?(stage_table_name(:some_table))).to be_falsey
56
+ end
57
+
58
+ it "allows the transformation to be run insiede the block" do
59
+ with_stage_tables_for(:organisations, :some_table) do
60
+ run_transformation(:organisations)
61
+
62
+ expect(stage_table_name(:organisations)).to have_values(
63
+ [ :external_id , :address , :name ] ,
64
+ [ "external_id" , "address" , "name" ]
65
+ )
66
+ end
67
+ end
68
+
69
+ end
70
+
71
+ it "raises an error if the target table cannot be found" do
72
+ expect do
73
+ with_stage_tables_for(:organisations)
74
+ end.to raise_error(BeetleETL::Testing::TargetTableNotFoundError)
75
+ end
76
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: beetle_etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luciano Maiwald
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-28 00:00:00.000000000 Z
11
+ date: 2015-04-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sequel
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: 4.0.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: activesupport
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 4.2.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 4.2.0
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: bundler
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -81,19 +95,19 @@ dependencies:
81
95
  - !ruby/object:Gem::Version
82
96
  version: 0.18.0
83
97
  - !ruby/object:Gem::Dependency
84
- name: activesupport
98
+ name: unindent
85
99
  requirement: !ruby/object:Gem::Requirement
86
100
  requirements:
87
- - - ">="
101
+ - - "~>"
88
102
  - !ruby/object:Gem::Version
89
- version: 4.2.0
103
+ version: '1.0'
90
104
  type: :development
91
105
  prerelease: false
92
106
  version_requirements: !ruby/object:Gem::Requirement
93
107
  requirements:
94
- - - ">="
108
+ - - "~>"
95
109
  - !ruby/object:Gem::Version
96
- version: 4.2.0
110
+ version: '1.0'
97
111
  description: Taking care of synchronizing external data with referential data in your
98
112
  application.
99
113
  email:
@@ -115,6 +129,9 @@ files:
115
129
  - lib/beetle_etl/dsl/transformation_loader.rb
116
130
  - lib/beetle_etl/import.rb
117
131
  - lib/beetle_etl/naming.rb
132
+ - lib/beetle_etl/reporter.rb
133
+ - lib/beetle_etl/step_runner/async_step_runner.rb
134
+ - lib/beetle_etl/step_runner/dependency_resolver.rb
118
135
  - lib/beetle_etl/steps/assign_ids.rb
119
136
  - lib/beetle_etl/steps/create_stage.rb
120
137
  - lib/beetle_etl/steps/drop_stage.rb
@@ -123,8 +140,7 @@ files:
123
140
  - lib/beetle_etl/steps/step.rb
124
141
  - lib/beetle_etl/steps/table_diff.rb
125
142
  - lib/beetle_etl/steps/transform.rb
126
- - lib/beetle_etl/task_runner/dependency_resolver.rb
127
- - lib/beetle_etl/task_runner/task_runner.rb
143
+ - lib/beetle_etl/testing.rb
128
144
  - lib/beetle_etl/version.rb
129
145
  - script/postgres
130
146
  - spec/beetle_etl_spec.rb
@@ -134,6 +150,7 @@ files:
134
150
  - spec/feature/example_schema.rb
135
151
  - spec/feature/example_transform.rb
136
152
  - spec/feature/feature_spec.rb
153
+ - spec/reporter_spec.rb
137
154
  - spec/spec_helper.rb
138
155
  - spec/steps/assign_ids_spec.rb
139
156
  - spec/steps/create_stage_spec.rb
@@ -145,7 +162,9 @@ files:
145
162
  - spec/support/database.yml.example
146
163
  - spec/support/database.yml.travis
147
164
  - spec/support/database_helpers.rb
165
+ - spec/support/file_helpers.rb
148
166
  - spec/task_runner/dependency_resolver_spec.rb
167
+ - spec/testing_spec.rb
149
168
  homepage: https://github.com/maiwald/beetle_etl
150
169
  licenses:
151
170
  - MIT
@@ -178,6 +197,7 @@ test_files:
178
197
  - spec/feature/example_schema.rb
179
198
  - spec/feature/example_transform.rb
180
199
  - spec/feature/feature_spec.rb
200
+ - spec/reporter_spec.rb
181
201
  - spec/spec_helper.rb
182
202
  - spec/steps/assign_ids_spec.rb
183
203
  - spec/steps/create_stage_spec.rb
@@ -189,4 +209,6 @@ test_files:
189
209
  - spec/support/database.yml.example
190
210
  - spec/support/database.yml.travis
191
211
  - spec/support/database_helpers.rb
212
+ - spec/support/file_helpers.rb
192
213
  - spec/task_runner/dependency_resolver_spec.rb
214
+ - spec/testing_spec.rb
@@ -1,71 +0,0 @@
1
- module BeetleETL
2
- class TaskRunner
3
-
4
- def initialize(tasks)
5
- @dependency_resolver = DependencyResolver.new(tasks)
6
- @tasks = tasks
7
-
8
- @queue = Queue.new
9
- @completed = Set.new
10
- @running = Set.new
11
- end
12
-
13
- def run
14
- results = {}
15
-
16
- until all_tasks_complete?
17
- runnables.each do |task|
18
- run_task_async(task)
19
- mark_task_running(task.name)
20
- end
21
-
22
- task_name, task_data = @queue.pop
23
- results[task_name] = task_data
24
- mark_task_completed(task_name)
25
- end
26
-
27
- results
28
- end
29
-
30
- private
31
-
32
- attr_reader :running, :completed
33
-
34
- def run_task_async(task)
35
- Thread.new do
36
- started_at = now
37
- result = task.run
38
- finished_at = now
39
-
40
- @queue.push [task.name, {
41
- started_at: started_at,
42
- finished_at: finished_at,
43
- result: result,
44
- }]
45
- end
46
- end
47
-
48
- def mark_task_running(task_name)
49
- running.add(task_name)
50
- end
51
-
52
- def mark_task_completed(task_name)
53
- runnables.delete(task_name)
54
- completed.add(task_name)
55
- end
56
-
57
- def runnables
58
- resolvables = @dependency_resolver.resolvables(completed)
59
- resolvables.reject { |r| running.include? r.name }
60
- end
61
-
62
- def all_tasks_complete?
63
- @tasks.map(&:name).to_set == completed.to_set
64
- end
65
-
66
- def now
67
- Time.now
68
- end
69
-
70
- end
71
- end