data-import 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +6 -0
- data/.rvmrc +1 -0
- data/Gemfile +4 -0
- data/README.md +157 -0
- data/Rakefile +20 -0
- data/data-import.gemspec +29 -0
- data/lib/data-import.rb +35 -0
- data/lib/data-import/adapters/sequel.rb +96 -0
- data/lib/data-import/database.rb +28 -0
- data/lib/data-import/definition.rb +20 -0
- data/lib/data-import/definition/simple.rb +59 -0
- data/lib/data-import/dsl.rb +54 -0
- data/lib/data-import/dsl/import.rb +48 -0
- data/lib/data-import/dsl/import/from.rb +35 -0
- data/lib/data-import/execution_plan.rb +16 -0
- data/lib/data-import/importer.rb +55 -0
- data/lib/data-import/runner.rb +62 -0
- data/lib/data-import/version.rb +3 -0
- data/scripts/ci.sh +12 -0
- data/spec/data-import/adapters/sequel_spec.rb +159 -0
- data/spec/data-import/database_spec.rb +24 -0
- data/spec/data-import/definition/simple_spec.rb +71 -0
- data/spec/data-import/definition_spec.rb +14 -0
- data/spec/data-import/dsl/import/from_spec.rb +43 -0
- data/spec/data-import/dsl/import_spec.rb +87 -0
- data/spec/data-import/dsl_spec.rb +99 -0
- data/spec/data-import/execution_plan_spec.rb +25 -0
- data/spec/data-import/importer_spec.rb +150 -0
- data/spec/data-import/runner_spec.rb +136 -0
- data/spec/data-import_spec.rb +34 -0
- data/spec/integration/before_block_spec.rb +59 -0
- data/spec/integration/simple_mappings_spec.rb +68 -0
- data/spec/integration/update_records_spec.rb +57 -0
- data/spec/junit_formatter.rb +106 -0
- data/spec/spec_helper.rb +8 -0
- metadata +164 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'data-import/dsl/import'
|
2
|
+
require 'data-import/dsl/import/from'
|
3
|
+
|
4
|
+
module DataImport
|
5
|
+
class Dsl
|
6
|
+
class << self
|
7
|
+
|
8
|
+
def evaluate_import_config(file)
|
9
|
+
plan = DataImport::ExecutionPlan.new
|
10
|
+
context = new(plan)
|
11
|
+
context.instance_eval read_import_config(file), file
|
12
|
+
plan
|
13
|
+
end
|
14
|
+
|
15
|
+
def define(&block)
|
16
|
+
plan = DataImport::ExecutionPlan.new
|
17
|
+
context = new(plan)
|
18
|
+
context.instance_eval &block
|
19
|
+
plan
|
20
|
+
end
|
21
|
+
|
22
|
+
def read_import_config(file)
|
23
|
+
File.read(file)
|
24
|
+
end
|
25
|
+
private :read_import_config
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
attr_reader :source_database, :target_database
|
30
|
+
|
31
|
+
def initialize(plan)
|
32
|
+
@plan = plan
|
33
|
+
end
|
34
|
+
|
35
|
+
def source(*args)
|
36
|
+
@source_database = DataImport::Database.connect(*args)
|
37
|
+
end
|
38
|
+
|
39
|
+
def target(*args)
|
40
|
+
@target_database = DataImport::Database.connect(*args)
|
41
|
+
end
|
42
|
+
|
43
|
+
def import(name, &block)
|
44
|
+
definition = DataImport::Definition::Simple.new(name, source_database, target_database)
|
45
|
+
@plan.add_definition(definition)
|
46
|
+
|
47
|
+
Import.new(definition).instance_eval &block if block_given?
|
48
|
+
end
|
49
|
+
|
50
|
+
def before_filter(&block)
|
51
|
+
@plan.before_filter = block
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module DataImport
|
2
|
+
class Dsl
|
3
|
+
class Import
|
4
|
+
attr_reader :definition
|
5
|
+
|
6
|
+
def initialize(definition)
|
7
|
+
@definition = definition
|
8
|
+
end
|
9
|
+
|
10
|
+
def from(name = nil, options = {}, &block)
|
11
|
+
definition.source_table_name = name
|
12
|
+
definition.source_primary_key = options[:primary_key]
|
13
|
+
|
14
|
+
From.new(definition).instance_eval &block if block_given?
|
15
|
+
end
|
16
|
+
|
17
|
+
def to(name, options = {})
|
18
|
+
definition.target_table_name = name
|
19
|
+
definition.use_mode(:update) if options[:mode] == :update
|
20
|
+
end
|
21
|
+
|
22
|
+
def mapping(*hash_or_symbols, &block)
|
23
|
+
if hash_or_symbols.first.is_a? Hash
|
24
|
+
definition.mappings.merge! hash_or_symbols.first
|
25
|
+
else
|
26
|
+
symbols = hash_or_symbols
|
27
|
+
symbols = symbols.first if symbols.count == 1
|
28
|
+
definition.mappings[symbols] = block
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def after(&block)
|
33
|
+
definition.after_blocks << block
|
34
|
+
end
|
35
|
+
|
36
|
+
def after_row(&block)
|
37
|
+
definition.after_row_blocks << block
|
38
|
+
end
|
39
|
+
|
40
|
+
def dependencies(*dependencies)
|
41
|
+
dependencies.each do |dependency|
|
42
|
+
definition.add_dependency(dependency)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module DataImport
|
2
|
+
class Dsl
|
3
|
+
class Import
|
4
|
+
class From
|
5
|
+
|
6
|
+
attr_reader :definition
|
7
|
+
|
8
|
+
def initialize(definition)
|
9
|
+
@definition = definition
|
10
|
+
end
|
11
|
+
|
12
|
+
def table(name)
|
13
|
+
definition.source_table_name = name
|
14
|
+
end
|
15
|
+
|
16
|
+
def primary_key(name)
|
17
|
+
definition.source_primary_key = name
|
18
|
+
end
|
19
|
+
|
20
|
+
def columns(*args)
|
21
|
+
if args.last.is_a? Hash
|
22
|
+
options = args.pop
|
23
|
+
definition.source_distinct_columns = options[:distinct]
|
24
|
+
end
|
25
|
+
definition.source_columns |= args
|
26
|
+
end
|
27
|
+
|
28
|
+
def order(*args)
|
29
|
+
definition.source_order_columns |= args
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module DataImport
|
2
|
+
class ExecutionPlan
|
3
|
+
|
4
|
+
attr_reader :definitions
|
5
|
+
attr_accessor :before_filter
|
6
|
+
|
7
|
+
def initialize(definitions = [])
|
8
|
+
@definitions = definitions
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_definition(definition)
|
12
|
+
@definitions << definition
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module DataImport
|
2
|
+
class Importer
|
3
|
+
|
4
|
+
def initialize(context, definition)
|
5
|
+
@context = context
|
6
|
+
@definition = definition
|
7
|
+
end
|
8
|
+
|
9
|
+
def run
|
10
|
+
@definition.target_database.transaction do
|
11
|
+
options = {}
|
12
|
+
options[:primary_key] = @definition.source_primary_key
|
13
|
+
options[:columns] = @definition.source_columns
|
14
|
+
options[:distinct] = @definition.source_distinct_columns
|
15
|
+
options[:order] = @definition.source_order_columns
|
16
|
+
@definition.source_database.each_row(@definition.source_table_name, options) do |row|
|
17
|
+
@context.before_filter.call(row) if @context.before_filter
|
18
|
+
import_row row
|
19
|
+
yield if block_given?
|
20
|
+
end
|
21
|
+
end
|
22
|
+
@definition.after_blocks.each do |block|
|
23
|
+
@definition.instance_exec(@context, &block)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def import_row(row)
|
28
|
+
mapped_row = {}
|
29
|
+
@definition.mappings.each do |old_key, new_key|
|
30
|
+
if new_key.respond_to?(:call)
|
31
|
+
keys = old_key
|
32
|
+
keys = [keys] unless keys.is_a? Array
|
33
|
+
params = [@context] + keys.map{|key| row[key.to_sym]}
|
34
|
+
mapped_values = @definition.instance_exec(*params, &new_key)
|
35
|
+
mapped_row.merge! mapped_values if mapped_values.present?
|
36
|
+
else
|
37
|
+
mapped_row[new_key] = row[old_key.to_sym]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
case @definition.mode
|
42
|
+
when :insert
|
43
|
+
new_id = @definition.target_database.insert_row @definition.target_table_name, mapped_row
|
44
|
+
@definition.add_id_mapping row[@definition.source_primary_key] => new_id
|
45
|
+
when :update
|
46
|
+
@definition.target_database.update_row(@definition.target_table_name, mapped_row)
|
47
|
+
end
|
48
|
+
|
49
|
+
@definition.after_row_blocks.each do |block|
|
50
|
+
@definition.instance_exec(@context, row, mapped_row, &block)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
private :import_row
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module DataImport
|
2
|
+
class Runner
|
3
|
+
|
4
|
+
def initialize(plan)
|
5
|
+
@plan = plan
|
6
|
+
@definitions = Hash[@plan.definitions.map do |definition|
|
7
|
+
[definition.name, definition]
|
8
|
+
end]
|
9
|
+
|
10
|
+
@executed_definitions = []
|
11
|
+
end
|
12
|
+
|
13
|
+
def run(options = {})
|
14
|
+
definitions_to_execute = definitions_for_execution(options[:only])
|
15
|
+
while @executed_definitions.count < definitions_to_execute.count
|
16
|
+
did_execute = false
|
17
|
+
definitions_to_execute.each do |name|
|
18
|
+
candidate = definition(name)
|
19
|
+
next if @executed_definitions.include?(name)
|
20
|
+
if (candidate.dependencies - @executed_definitions).blank?
|
21
|
+
candidate.run(self)
|
22
|
+
@executed_definitions << name
|
23
|
+
did_execute = true
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
unless did_execute
|
28
|
+
raise "something went wrong! Could not execute all necessary definitions: #{candidate.dependencies - @@executed_definitions}"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def definition(name)
|
34
|
+
raise "no definition found for '#{name}'" unless @definitions[name].present?
|
35
|
+
@definitions[name]
|
36
|
+
end
|
37
|
+
|
38
|
+
def before_filter
|
39
|
+
@plan.before_filter
|
40
|
+
end
|
41
|
+
|
42
|
+
def definitions_for_execution(run_only = nil)
|
43
|
+
(run_only || @definitions.keys).map do |name|
|
44
|
+
[name] + dependencies(name)
|
45
|
+
end.flatten.uniq
|
46
|
+
end
|
47
|
+
private :definitions_for_execution
|
48
|
+
|
49
|
+
def dependencies(name, visited_definitions = [])
|
50
|
+
definition = definition(name)
|
51
|
+
direct_dependencies = definition.dependencies
|
52
|
+
indirect_dependencies = direct_dependencies.map do |dep|
|
53
|
+
raise RuntimeError, "ciruclar dependencies: '#{name}' <-> '#{dep}'" if visited_definitions.include?(dep)
|
54
|
+
dependencies(dep, visited_definitions + [name])
|
55
|
+
end.flatten
|
56
|
+
direct_dependencies + indirect_dependencies
|
57
|
+
end
|
58
|
+
private :dependencies
|
59
|
+
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
data/scripts/ci.sh
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
if [[ -s "$HOME/.rvm/scripts/rvm" ]] ; then
|
2
|
+
source "$HOME/.rvm/scripts/rvm"
|
3
|
+
elif [[ -s "/usr/local/rvm/scripts/rvm" ]] ; then
|
4
|
+
source "/usr/local/rvm/scripts/rvm"
|
5
|
+
else
|
6
|
+
printf "ERROR: An RVM installation was not found.\n"
|
7
|
+
fi
|
8
|
+
|
9
|
+
`cat .rvmrc`
|
10
|
+
gem install bundler --conservative --no-ri --no-rdoc
|
11
|
+
bundle install
|
12
|
+
rake ci:rspec
|
@@ -0,0 +1,159 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'data-import/adapters/sequel'
|
4
|
+
|
5
|
+
describe DataImport::Adapters::Sequel do
|
6
|
+
|
7
|
+
let(:table) { Object.new }
|
8
|
+
let(:dummy_db) do
|
9
|
+
class DummyDB
|
10
|
+
end
|
11
|
+
DummyDB.any_instance.stub(:from).and_return { table }
|
12
|
+
DummyDB
|
13
|
+
end
|
14
|
+
let(:db) { dummy_db.new }
|
15
|
+
subject { DataImport::Adapters::Sequel.new db }
|
16
|
+
|
17
|
+
describe ".connect" do
|
18
|
+
subject { DataImport::Adapters::Sequel }
|
19
|
+
|
20
|
+
it "connects to the database" do
|
21
|
+
Sequel.should_receive(:connect).with(:adapter => :test)
|
22
|
+
subject.connect :adapter => :test
|
23
|
+
end
|
24
|
+
|
25
|
+
it "returns an instance of DataImport::Adapters::Sequel" do
|
26
|
+
Sequel.stub(:connect)
|
27
|
+
subject.connect.should be_a(DataImport::Adapters::Sequel)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe "#truncate" do
|
32
|
+
it "deletes all rows from a table" do
|
33
|
+
table.should_receive(:delete)
|
34
|
+
subject.truncate('svp')
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
describe "#transaction" do
|
39
|
+
let(:block) { Proc.new {} }
|
40
|
+
|
41
|
+
it "runs the block in a transaction" do
|
42
|
+
db.should_receive(:transaction)
|
43
|
+
subject.transaction &block
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
describe ".each_row" do
|
48
|
+
it "delegates to .each_row_in_batches if there is a numeric primary key" do
|
49
|
+
subject.stub(:numeric_column?).and_return { true }
|
50
|
+
subject.should_receive(:each_row_in_batches).with(:abc, :primary_key => :PersonenID)
|
51
|
+
subject.each_row(:abc, :primary_key => :PersonenID)
|
52
|
+
end
|
53
|
+
|
54
|
+
it "delegates to .each_row_without_batches if there is a primary key other than numeric" do
|
55
|
+
subject.stub(:numeric_column?).and_return { false }
|
56
|
+
subject.should_receive(:each_row_without_batches).with(:abc, :primary_key => :PersonenID)
|
57
|
+
subject.each_row(:abc, :primary_key => :PersonenID)
|
58
|
+
end
|
59
|
+
|
60
|
+
it "delegates to .each_row_without_batches if there is no primary key" do
|
61
|
+
subject.should_receive(:each_row_without_batches).with(:abc, {})
|
62
|
+
subject.each_row(:abc)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
describe "#each_row_without_batches" do
|
67
|
+
let(:resultset) { [:id => 2, :id => 4] }
|
68
|
+
let(:proxy) { Object.new }
|
69
|
+
|
70
|
+
it "selects all rows from the database" do
|
71
|
+
db.stub_chain(:from, :each).and_return { proxy }
|
72
|
+
db.should_receive(:from).with('Personen')
|
73
|
+
subject.each_row_without_batches('Personen')
|
74
|
+
end
|
75
|
+
|
76
|
+
it "yields each item of the resultset" do
|
77
|
+
db.stub_chain(:from, :each).and_yield(resultset[0])
|
78
|
+
.and_yield(resultset[1])
|
79
|
+
left_results = resultset.clone
|
80
|
+
subject.each_row_without_batches('Personen') do |row|
|
81
|
+
left_results.delete(row)
|
82
|
+
end
|
83
|
+
left_results.should be_empty
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
describe "#each_row_in_batches" do
|
88
|
+
it "gets the maximum id" do
|
89
|
+
subject.should_receive(:maximum_value)
|
90
|
+
table.stub(:filter)
|
91
|
+
subject.each_row_in_batches('abc', :primary_key => :PersonenID)
|
92
|
+
end
|
93
|
+
|
94
|
+
it "selects batches of a fixed size" do
|
95
|
+
subject.stub(:maximum_value).and_return { 2000 }
|
96
|
+
table.stub(:filter)
|
97
|
+
table.should_receive(:filter).with(:PersonenID => 0..999)
|
98
|
+
table.should_receive(:filter).with(:PersonenID => 1000..1999)
|
99
|
+
subject.each_row_in_batches('abc', :primary_key => :PersonenID)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
describe "#maximum_value" do
|
104
|
+
it "selects the maximum value of a column" do
|
105
|
+
table.should_receive(:max).with(:PersonenID)
|
106
|
+
subject.maximum_value('abc', :PersonenID)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
describe "#count" do
|
111
|
+
it "returns the amount of rows of a table" do
|
112
|
+
table.should_receive(:count)
|
113
|
+
subject.count('abc')
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
describe "#insert_row" do
|
118
|
+
it "inserts a single row into the database" do
|
119
|
+
table.should_receive(:insert).with(:id => 29)
|
120
|
+
subject.insert_row(:abc, :id => 29)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
describe "#update_row" do
|
125
|
+
it 'updates the row with the given id' do
|
126
|
+
filtered_records = stub
|
127
|
+
table.should_receive(:filter).with(:id => 9).and_return(filtered_records)
|
128
|
+
filtered_records.should_receive(:update).with(:name => 'Hans', :alter => 17)
|
129
|
+
subject.update_row(:abc, {:id => 9, :name => 'Hans', :alter => 17})
|
130
|
+
end
|
131
|
+
|
132
|
+
it 'works with string keys' do
|
133
|
+
filtered_records = stub
|
134
|
+
table.should_receive(:filter).with(:id => 11).and_return(filtered_records)
|
135
|
+
filtered_records.should_receive(:update).with('name' => 'Hans', 'alter' => 17)
|
136
|
+
subject.update_row(:abc, {'id' => 11, 'name' => 'Hans', 'alter' => 17})
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
describe "numeric_column?" do
|
141
|
+
it "returns true for numbers" do
|
142
|
+
db.stub(:schema).and_return { [[:id, {:type => :integer}]] }
|
143
|
+
subject.numeric_column?(:table, :id).should be_true
|
144
|
+
end
|
145
|
+
|
146
|
+
it "returns false for other column types" do
|
147
|
+
db.stub(:schema).and_return { [[:id, {:type => :string}]] }
|
148
|
+
subject.numeric_column?(:table, :id).should be_false
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
describe "#unique_row" do
|
153
|
+
it "returns a row by its key" do
|
154
|
+
db.stub_chain(:from, :[]).and_return { {:id => 5, :a => 7} }
|
155
|
+
subject.unique_row(:table, 5).should == {:id => 5, :a => 7}
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
end
|