beetle_etl 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ebb61022b0e58e217f1b215486a993c7e43799a2
4
- data.tar.gz: e00ad7086cf5be7c4cadf520cbd21c1ebd47202d
3
+ metadata.gz: b08e51fe6819079c6b8317636030d5d678b102cc
4
+ data.tar.gz: 6afa19db3171f105fa275f99c82c07306aa9c568
5
5
  SHA512:
6
- metadata.gz: 8e3c2be8adf3cb65807fddb95d1b09d1de9064f586f3e0aefa6026ff3e7b572b8f9653c31ca759114228e6cb6470495f21b74f7470ad6fb7745136210b87f31e
7
- data.tar.gz: 3d91601c914486564b8db8e3afdef62c90b04b54fe3633321547de3ec437ee1f12ead442265e84561c4578d2dae482be2cebc85dfd610c76e3db877239d4a2d5
6
+ metadata.gz: a915efc65e4450aa4ba7cec71a75b89082d6b65be4cd2e6db88ee8e540f9bc0d1db96afd18c787ac504309460447bfdbb20ab462e012ac503e8fe8cdecb65880
7
+ data.tar.gz: afc74720f9875d7a447030bb14fbc4da90c1004b5c725fb55b0630bbc073d3d15560e6c5e2b7002951e1989eb4b63b956508411811c146986820aee2679a6462
@@ -5,7 +5,7 @@ rvm:
5
5
  addons:
6
6
  postgresql: "9.3"
7
7
  code_climate:
8
- repo_token: dd18697b0acb6be343db62982b753b72676e8342701cc0442121de2d12ee6549
8
+ repo_token: fcd6d8c28da900609a2cf903716d858621b8ce68152edbcebe6908a9a3f5d3d5
9
9
 
10
10
  before_script:
11
11
  - psql -c 'create database travis_ci_test;' -U postgres
@@ -19,11 +19,10 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ['lib']
20
20
 
21
21
  spec.add_runtime_dependency 'sequel', '>= 4.13.0'
22
- spec.add_runtime_dependency 'celluloid', '>= 0.15.2'
23
22
 
24
23
  spec.add_development_dependency 'bundler', '~> 1.6'
25
24
  spec.add_development_dependency 'rake'
26
- spec.add_development_dependency 'rspec', '~> 3.0.0'
25
+ spec.add_development_dependency 'rspec', '~> 3.1.0'
27
26
  spec.add_development_dependency 'pg'
28
27
  spec.add_development_dependency 'codeclimate-test-reporter'
29
28
  spec.add_development_dependency 'activesupport'
@@ -4,28 +4,28 @@ module BeetleETL
4
4
  extend self
5
5
 
6
6
  def run
7
- TaskRunner.run(data_steps)
7
+ TaskRunner.new(data_steps).run
8
8
  BeetleETL.database.transaction do
9
- load_steps.each(&:run)
9
+ TaskRunner.new(load_steps).run
10
10
  end
11
11
  end
12
12
 
13
13
  private
14
14
 
15
15
  def data_steps
16
- transformations.map do |t|
16
+ transformations.flat_map do |t|
17
17
  [
18
18
  Transform.new(t.table_name, t.dependencies, t.query),
19
19
  MapRelations.new(t.table_name, t.relations),
20
20
  TableDiff.new(t.table_name),
21
21
  AssignIds.new(t.table_name),
22
22
  ]
23
- end.flatten
23
+ end
24
24
  end
25
25
 
26
26
  def load_steps
27
27
  transformations.map do |t|
28
- Load.new(t.table_name)
28
+ Load.new(t.table_name, t.relations)
29
29
  end
30
30
  end
31
31
 
@@ -6,8 +6,10 @@ module BeetleETL
6
6
  end
7
7
 
8
8
  def run
9
- assign_new_ids
10
- map_existing_ids
9
+ [
10
+ Thread.new { assign_new_ids },
11
+ Thread.new { map_existing_ids }
12
+ ].each(&:join)
11
13
  end
12
14
 
13
15
  def assign_new_ids
@@ -7,10 +7,21 @@ module BeetleETL
7
7
  transition
8
8
  ]
9
9
 
10
+ attr_reader :relations
11
+
12
+ def initialize(table_name, relations)
13
+ super(table_name)
14
+ @relations = relations
15
+ end
16
+
10
17
  def run
11
- %w(create update delete undelete).each do |transition|
12
- public_send(:"load_#{transition}")
13
- end
18
+ %w(create update delete undelete).map do |transition|
19
+ Thread.new { public_send(:"load_#{transition}") }
20
+ end.each(&:join)
21
+ end
22
+
23
+ def dependencies
24
+ relations.values.map { |d| Load.step_name(d) }.to_set
14
25
  end
15
26
 
16
27
  def load_create
@@ -12,9 +12,9 @@ module BeetleETL
12
12
  end
13
13
 
14
14
  def run
15
- %w(create keep update delete undelete).each do |transition|
16
- public_send(:"transition_#{transition}")
17
- end
15
+ %w(create keep update delete undelete).map do |transition|
16
+ Thread.new { public_send(:"transition_#{transition}") }
17
+ end.each(&:join)
18
18
  end
19
19
 
20
20
  def transition_create
@@ -21,7 +21,7 @@ module BeetleETL
21
21
  items = @items.dup
22
22
  resolved = []
23
23
 
24
- while not items.empty?
24
+ until items.empty?
25
25
  resolved_names = resolved.flatten.map(&:name).to_set
26
26
 
27
27
  resolvable = items.select do |item|
@@ -1,63 +1,70 @@
1
- require 'celluloid/autostart'
2
-
3
1
  module BeetleETL
4
2
  class TaskRunner
5
3
 
6
- include Celluloid
4
+ def initialize(tasks)
5
+ @dependency_resolver = DependencyResolver.new(tasks)
6
+ @tasks = tasks
7
7
 
8
- def initialize(runnables)
9
- @runnables = runnables
8
+ @queue = Queue.new
10
9
  @completed = Set.new
11
10
  @running = Set.new
12
- @dependency_resolver = DependencyResolver.new(runnables)
13
-
14
- run_next
15
11
  end
16
12
 
17
- def completed(runnable_name)
18
- @running.delete(runnable_name)
19
- @completed << runnable_name
20
-
21
- run_next
22
- end
13
+ def run
14
+ results = {}
23
15
 
24
- def run_next
25
- if all_run?
26
- terminate
27
- else
28
- resolvables.each do |runnable|
29
- unless @running.include?(runnable.name)
30
- Task.new(Actor.current, runnable).async.run_task
31
- @running << runnable.name
32
- end
16
+ until all_tasks_complete?
17
+ runnables.each do |task|
18
+ run_task_async(task)
19
+ mark_task_running(task.name)
33
20
  end
21
+
22
+ task_name, task_data = @queue.pop
23
+ results[task_name] = task_data
24
+ mark_task_completed(task_name)
34
25
  end
26
+
27
+ results
35
28
  end
36
29
 
37
30
  private
38
31
 
39
- def resolvables
40
- @dependency_resolver.resolvables(@completed)
32
+ attr_reader :running, :completed
33
+
34
+ def run_task_async(task)
35
+ Thread.new do
36
+ started_at = now
37
+ result = task.run
38
+ finished_at = now
39
+
40
+ @queue.push [task.name, {
41
+ started_at: started_at,
42
+ finished_at: finished_at,
43
+ result: result,
44
+ }]
45
+ end
41
46
  end
42
47
 
43
- def all_run?
44
- @completed == @runnables.map(&:name).to_set
48
+ def mark_task_running(task_name)
49
+ running.add(task_name)
45
50
  end
46
51
 
47
- class Task
52
+ def mark_task_completed(task_name)
53
+ runnables.delete(task_name)
54
+ completed.add(task_name)
55
+ end
48
56
 
49
- include Celluloid
57
+ def runnables
58
+ resolvables = @dependency_resolver.resolvables(completed)
59
+ resolvables.reject { |r| running.include? r.name }
60
+ end
50
61
 
51
- def initialize(runner, task)
52
- @runner = runner
53
- @task = task
54
- end
62
+ def all_tasks_complete?
63
+ @tasks.map(&:name).to_set == completed.to_set
64
+ end
55
65
 
56
- def run_task
57
- @task.run
58
- @runner.async.completed(@task.name)
59
- terminate
60
- end
66
+ def now
67
+ Time.now
61
68
  end
62
69
 
63
70
  end
@@ -1,3 +1,3 @@
1
1
  module BeetleETL
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -13,7 +13,7 @@ module BeetleETL
13
13
  let(:now) { Time.now.beginning_of_day }
14
14
  let(:yesterday) { 1.day.ago.beginning_of_day }
15
15
 
16
- subject { Load.new(:example_table) }
16
+ subject { Load.new(:example_table, []) }
17
17
 
18
18
  before do
19
19
  BeetleETL.configure do |config|
@@ -52,6 +52,22 @@ module BeetleETL
52
52
  end
53
53
  end
54
54
 
55
+ describe '#depenencies' do
56
+ it 'depends on Transform of the same table and AssignIds of its dependees' do
57
+ relations = {
58
+ dependee_a_id: :dependee_a,
59
+ dependee_b_id: :dependee_b,
60
+ }
61
+
62
+ expect(Load.new(:depender, relations).dependencies).to eql(
63
+ [
64
+ 'dependee_a: Load',
65
+ 'dependee_b: Load',
66
+ ].to_set
67
+ )
68
+ end
69
+ end
70
+
55
71
  describe '#run' do
56
72
  it 'runs all load steps' do
57
73
  %w(create update delete undelete).each do |transition|
@@ -30,7 +30,6 @@ module BeetleETL
30
30
  it 'raises an exception' do
31
31
  expect { subject.dependencies }.to raise_error(DependenciesNotDefinedError)
32
32
  end
33
-
34
33
  end
35
34
 
36
35
  end
metadata CHANGED
@@ -1,125 +1,111 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: beetle_etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luciano Maiwald
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-26 00:00:00.000000000 Z
11
+ date: 2014-12-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sequel
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: 4.13.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: 4.13.0
27
- - !ruby/object:Gem::Dependency
28
- name: celluloid
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - '>='
32
- - !ruby/object:Gem::Version
33
- version: 0.15.2
34
- type: :runtime
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - '>='
39
- - !ruby/object:Gem::Version
40
- version: 0.15.2
41
27
  - !ruby/object:Gem::Dependency
42
28
  name: bundler
43
29
  requirement: !ruby/object:Gem::Requirement
44
30
  requirements:
45
- - - ~>
31
+ - - "~>"
46
32
  - !ruby/object:Gem::Version
47
33
  version: '1.6'
48
34
  type: :development
49
35
  prerelease: false
50
36
  version_requirements: !ruby/object:Gem::Requirement
51
37
  requirements:
52
- - - ~>
38
+ - - "~>"
53
39
  - !ruby/object:Gem::Version
54
40
  version: '1.6'
55
41
  - !ruby/object:Gem::Dependency
56
42
  name: rake
57
43
  requirement: !ruby/object:Gem::Requirement
58
44
  requirements:
59
- - - '>='
45
+ - - ">="
60
46
  - !ruby/object:Gem::Version
61
47
  version: '0'
62
48
  type: :development
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
- - - '>='
52
+ - - ">="
67
53
  - !ruby/object:Gem::Version
68
54
  version: '0'
69
55
  - !ruby/object:Gem::Dependency
70
56
  name: rspec
71
57
  requirement: !ruby/object:Gem::Requirement
72
58
  requirements:
73
- - - ~>
59
+ - - "~>"
74
60
  - !ruby/object:Gem::Version
75
- version: 3.0.0
61
+ version: 3.1.0
76
62
  type: :development
77
63
  prerelease: false
78
64
  version_requirements: !ruby/object:Gem::Requirement
79
65
  requirements:
80
- - - ~>
66
+ - - "~>"
81
67
  - !ruby/object:Gem::Version
82
- version: 3.0.0
68
+ version: 3.1.0
83
69
  - !ruby/object:Gem::Dependency
84
70
  name: pg
85
71
  requirement: !ruby/object:Gem::Requirement
86
72
  requirements:
87
- - - '>='
73
+ - - ">="
88
74
  - !ruby/object:Gem::Version
89
75
  version: '0'
90
76
  type: :development
91
77
  prerelease: false
92
78
  version_requirements: !ruby/object:Gem::Requirement
93
79
  requirements:
94
- - - '>='
80
+ - - ">="
95
81
  - !ruby/object:Gem::Version
96
82
  version: '0'
97
83
  - !ruby/object:Gem::Dependency
98
84
  name: codeclimate-test-reporter
99
85
  requirement: !ruby/object:Gem::Requirement
100
86
  requirements:
101
- - - '>='
87
+ - - ">="
102
88
  - !ruby/object:Gem::Version
103
89
  version: '0'
104
90
  type: :development
105
91
  prerelease: false
106
92
  version_requirements: !ruby/object:Gem::Requirement
107
93
  requirements:
108
- - - '>='
94
+ - - ">="
109
95
  - !ruby/object:Gem::Version
110
96
  version: '0'
111
97
  - !ruby/object:Gem::Dependency
112
98
  name: activesupport
113
99
  requirement: !ruby/object:Gem::Requirement
114
100
  requirements:
115
- - - '>='
101
+ - - ">="
116
102
  - !ruby/object:Gem::Version
117
103
  version: '0'
118
104
  type: :development
119
105
  prerelease: false
120
106
  version_requirements: !ruby/object:Gem::Requirement
121
107
  requirements:
122
- - - '>='
108
+ - - ">="
123
109
  - !ruby/object:Gem::Version
124
110
  version: '0'
125
111
  description: Taking care of synchronizing external data with referential data in your
@@ -130,8 +116,8 @@ executables: []
130
116
  extensions: []
131
117
  extra_rdoc_files: []
132
118
  files:
133
- - .gitignore
134
- - .travis.yml
119
+ - ".gitignore"
120
+ - ".travis.yml"
135
121
  - Gemfile
136
122
  - LICENSE.txt
137
123
  - README.md
@@ -183,17 +169,17 @@ require_paths:
183
169
  - lib
184
170
  required_ruby_version: !ruby/object:Gem::Requirement
185
171
  requirements:
186
- - - '>='
172
+ - - ">="
187
173
  - !ruby/object:Gem::Version
188
174
  version: '0'
189
175
  required_rubygems_version: !ruby/object:Gem::Requirement
190
176
  requirements:
191
- - - '>='
177
+ - - ">="
192
178
  - !ruby/object:Gem::Version
193
179
  version: '0'
194
180
  requirements: []
195
181
  rubyforge_project:
196
- rubygems_version: 2.1.11
182
+ rubygems_version: 2.4.5
197
183
  signing_key:
198
184
  specification_version: 4
199
185
  summary: BeetleETL helps you with your recurring ETL imports.