data-import 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,54 @@
1
+ require 'data-import/dsl/import'
2
+ require 'data-import/dsl/import/from'
3
+
4
+ module DataImport
5
+ class Dsl
6
+ class << self
7
+
8
+ def evaluate_import_config(file)
9
+ plan = DataImport::ExecutionPlan.new
10
+ context = new(plan)
11
+ context.instance_eval read_import_config(file), file
12
+ plan
13
+ end
14
+
15
+ def define(&block)
16
+ plan = DataImport::ExecutionPlan.new
17
+ context = new(plan)
18
+ context.instance_eval &block
19
+ plan
20
+ end
21
+
22
+ def read_import_config(file)
23
+ File.read(file)
24
+ end
25
+ private :read_import_config
26
+
27
+ end
28
+
29
+ attr_reader :source_database, :target_database
30
+
31
+ def initialize(plan)
32
+ @plan = plan
33
+ end
34
+
35
+ def source(*args)
36
+ @source_database = DataImport::Database.connect(*args)
37
+ end
38
+
39
+ def target(*args)
40
+ @target_database = DataImport::Database.connect(*args)
41
+ end
42
+
43
+ def import(name, &block)
44
+ definition = DataImport::Definition::Simple.new(name, source_database, target_database)
45
+ @plan.add_definition(definition)
46
+
47
+ Import.new(definition).instance_eval &block if block_given?
48
+ end
49
+
50
+ def before_filter(&block)
51
+ @plan.before_filter = block
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,48 @@
1
+ module DataImport
2
+ class Dsl
3
+ class Import
4
+ attr_reader :definition
5
+
6
+ def initialize(definition)
7
+ @definition = definition
8
+ end
9
+
10
+ def from(name = nil, options = {}, &block)
11
+ definition.source_table_name = name
12
+ definition.source_primary_key = options[:primary_key]
13
+
14
+ From.new(definition).instance_eval &block if block_given?
15
+ end
16
+
17
+ def to(name, options = {})
18
+ definition.target_table_name = name
19
+ definition.use_mode(:update) if options[:mode] == :update
20
+ end
21
+
22
+ def mapping(*hash_or_symbols, &block)
23
+ if hash_or_symbols.first.is_a? Hash
24
+ definition.mappings.merge! hash_or_symbols.first
25
+ else
26
+ symbols = hash_or_symbols
27
+ symbols = symbols.first if symbols.count == 1
28
+ definition.mappings[symbols] = block
29
+ end
30
+ end
31
+
32
+ def after(&block)
33
+ definition.after_blocks << block
34
+ end
35
+
36
+ def after_row(&block)
37
+ definition.after_row_blocks << block
38
+ end
39
+
40
+ def dependencies(*dependencies)
41
+ dependencies.each do |dependency|
42
+ definition.add_dependency(dependency)
43
+ end
44
+ end
45
+
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,35 @@
1
+ module DataImport
2
+ class Dsl
3
+ class Import
4
+ class From
5
+
6
+ attr_reader :definition
7
+
8
+ def initialize(definition)
9
+ @definition = definition
10
+ end
11
+
12
+ def table(name)
13
+ definition.source_table_name = name
14
+ end
15
+
16
+ def primary_key(name)
17
+ definition.source_primary_key = name
18
+ end
19
+
20
+ def columns(*args)
21
+ if args.last.is_a? Hash
22
+ options = args.pop
23
+ definition.source_distinct_columns = options[:distinct]
24
+ end
25
+ definition.source_columns |= args
26
+ end
27
+
28
+ def order(*args)
29
+ definition.source_order_columns |= args
30
+ end
31
+
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,16 @@
1
+ module DataImport
2
+ class ExecutionPlan
3
+
4
+ attr_reader :definitions
5
+ attr_accessor :before_filter
6
+
7
+ def initialize(definitions = [])
8
+ @definitions = definitions
9
+ end
10
+
11
+ def add_definition(definition)
12
+ @definitions << definition
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,55 @@
1
+ module DataImport
2
+ class Importer
3
+
4
+ def initialize(context, definition)
5
+ @context = context
6
+ @definition = definition
7
+ end
8
+
9
+ def run
10
+ @definition.target_database.transaction do
11
+ options = {}
12
+ options[:primary_key] = @definition.source_primary_key
13
+ options[:columns] = @definition.source_columns
14
+ options[:distinct] = @definition.source_distinct_columns
15
+ options[:order] = @definition.source_order_columns
16
+ @definition.source_database.each_row(@definition.source_table_name, options) do |row|
17
+ @context.before_filter.call(row) if @context.before_filter
18
+ import_row row
19
+ yield if block_given?
20
+ end
21
+ end
22
+ @definition.after_blocks.each do |block|
23
+ @definition.instance_exec(@context, &block)
24
+ end
25
+ end
26
+
27
+ def import_row(row)
28
+ mapped_row = {}
29
+ @definition.mappings.each do |old_key, new_key|
30
+ if new_key.respond_to?(:call)
31
+ keys = old_key
32
+ keys = [keys] unless keys.is_a? Array
33
+ params = [@context] + keys.map{|key| row[key.to_sym]}
34
+ mapped_values = @definition.instance_exec(*params, &new_key)
35
+ mapped_row.merge! mapped_values if mapped_values.present?
36
+ else
37
+ mapped_row[new_key] = row[old_key.to_sym]
38
+ end
39
+ end
40
+
41
+ case @definition.mode
42
+ when :insert
43
+ new_id = @definition.target_database.insert_row @definition.target_table_name, mapped_row
44
+ @definition.add_id_mapping row[@definition.source_primary_key] => new_id
45
+ when :update
46
+ @definition.target_database.update_row(@definition.target_table_name, mapped_row)
47
+ end
48
+
49
+ @definition.after_row_blocks.each do |block|
50
+ @definition.instance_exec(@context, row, mapped_row, &block)
51
+ end
52
+ end
53
+ private :import_row
54
+ end
55
+ end
@@ -0,0 +1,62 @@
1
+ module DataImport
2
+ class Runner
3
+
4
+ def initialize(plan)
5
+ @plan = plan
6
+ @definitions = Hash[@plan.definitions.map do |definition|
7
+ [definition.name, definition]
8
+ end]
9
+
10
+ @executed_definitions = []
11
+ end
12
+
13
+ def run(options = {})
14
+ definitions_to_execute = definitions_for_execution(options[:only])
15
+ while @executed_definitions.count < definitions_to_execute.count
16
+ did_execute = false
17
+ definitions_to_execute.each do |name|
18
+ candidate = definition(name)
19
+ next if @executed_definitions.include?(name)
20
+ if (candidate.dependencies - @executed_definitions).blank?
21
+ candidate.run(self)
22
+ @executed_definitions << name
23
+ did_execute = true
24
+ end
25
+ end
26
+
27
+ unless did_execute
28
+ raise "something went wrong! Could not execute all necessary definitions: #{candidate.dependencies - @@executed_definitions}"
29
+ end
30
+ end
31
+ end
32
+
33
+ def definition(name)
34
+ raise "no definition found for '#{name}'" unless @definitions[name].present?
35
+ @definitions[name]
36
+ end
37
+
38
+ def before_filter
39
+ @plan.before_filter
40
+ end
41
+
42
+ def definitions_for_execution(run_only = nil)
43
+ (run_only || @definitions.keys).map do |name|
44
+ [name] + dependencies(name)
45
+ end.flatten.uniq
46
+ end
47
+ private :definitions_for_execution
48
+
49
+ def dependencies(name, visited_definitions = [])
50
+ definition = definition(name)
51
+ direct_dependencies = definition.dependencies
52
+ indirect_dependencies = direct_dependencies.map do |dep|
53
+ raise RuntimeError, "ciruclar dependencies: '#{name}' <-> '#{dep}'" if visited_definitions.include?(dep)
54
+ dependencies(dep, visited_definitions + [name])
55
+ end.flatten
56
+ direct_dependencies + indirect_dependencies
57
+ end
58
+ private :dependencies
59
+
60
+
61
+ end
62
+ end
@@ -0,0 +1,3 @@
1
+ module DataImport
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,12 @@
1
+ if [[ -s "$HOME/.rvm/scripts/rvm" ]] ; then
2
+ source "$HOME/.rvm/scripts/rvm"
3
+ elif [[ -s "/usr/local/rvm/scripts/rvm" ]] ; then
4
+ source "/usr/local/rvm/scripts/rvm"
5
+ else
6
+ printf "ERROR: An RVM installation was not found.\n"
7
+ fi
8
+
9
+ `cat .rvmrc`
10
+ gem install bundler --conservative --no-ri --no-rdoc
11
+ bundle install
12
+ rake ci:rspec
@@ -0,0 +1,159 @@
1
+ require 'spec_helper'
2
+
3
+ require 'data-import/adapters/sequel'
4
+
5
+ describe DataImport::Adapters::Sequel do
6
+
7
+ let(:table) { Object.new }
8
+ let(:dummy_db) do
9
+ class DummyDB
10
+ end
11
+ DummyDB.any_instance.stub(:from).and_return { table }
12
+ DummyDB
13
+ end
14
+ let(:db) { dummy_db.new }
15
+ subject { DataImport::Adapters::Sequel.new db }
16
+
17
+ describe ".connect" do
18
+ subject { DataImport::Adapters::Sequel }
19
+
20
+ it "connects to the database" do
21
+ Sequel.should_receive(:connect).with(:adapter => :test)
22
+ subject.connect :adapter => :test
23
+ end
24
+
25
+ it "returns an instance of DataImport::Adapters::Sequel" do
26
+ Sequel.stub(:connect)
27
+ subject.connect.should be_a(DataImport::Adapters::Sequel)
28
+ end
29
+ end
30
+
31
+ describe "#truncate" do
32
+ it "deletes all rows from a table" do
33
+ table.should_receive(:delete)
34
+ subject.truncate('svp')
35
+ end
36
+ end
37
+
38
+ describe "#transaction" do
39
+ let(:block) { Proc.new {} }
40
+
41
+ it "runs the block in a transaction" do
42
+ db.should_receive(:transaction)
43
+ subject.transaction &block
44
+ end
45
+ end
46
+
47
+ describe ".each_row" do
48
+ it "delegates to .each_row_in_batches if there is a numeric primary key" do
49
+ subject.stub(:numeric_column?).and_return { true }
50
+ subject.should_receive(:each_row_in_batches).with(:abc, :primary_key => :PersonenID)
51
+ subject.each_row(:abc, :primary_key => :PersonenID)
52
+ end
53
+
54
+ it "delegates to .each_row_without_batches if there is a primary key other than numeric" do
55
+ subject.stub(:numeric_column?).and_return { false }
56
+ subject.should_receive(:each_row_without_batches).with(:abc, :primary_key => :PersonenID)
57
+ subject.each_row(:abc, :primary_key => :PersonenID)
58
+ end
59
+
60
+ it "delegates to .each_row_without_batches if there is no primary key" do
61
+ subject.should_receive(:each_row_without_batches).with(:abc, {})
62
+ subject.each_row(:abc)
63
+ end
64
+ end
65
+
66
+ describe "#each_row_without_batches" do
67
+ let(:resultset) { [:id => 2, :id => 4] }
68
+ let(:proxy) { Object.new }
69
+
70
+ it "selects all rows from the database" do
71
+ db.stub_chain(:from, :each).and_return { proxy }
72
+ db.should_receive(:from).with('Personen')
73
+ subject.each_row_without_batches('Personen')
74
+ end
75
+
76
+ it "yields each item of the resultset" do
77
+ db.stub_chain(:from, :each).and_yield(resultset[0])
78
+ .and_yield(resultset[1])
79
+ left_results = resultset.clone
80
+ subject.each_row_without_batches('Personen') do |row|
81
+ left_results.delete(row)
82
+ end
83
+ left_results.should be_empty
84
+ end
85
+ end
86
+
87
+ describe "#each_row_in_batches" do
88
+ it "gets the maximum id" do
89
+ subject.should_receive(:maximum_value)
90
+ table.stub(:filter)
91
+ subject.each_row_in_batches('abc', :primary_key => :PersonenID)
92
+ end
93
+
94
+ it "selects batches of a fixed size" do
95
+ subject.stub(:maximum_value).and_return { 2000 }
96
+ table.stub(:filter)
97
+ table.should_receive(:filter).with(:PersonenID => 0..999)
98
+ table.should_receive(:filter).with(:PersonenID => 1000..1999)
99
+ subject.each_row_in_batches('abc', :primary_key => :PersonenID)
100
+ end
101
+ end
102
+
103
+ describe "#maximum_value" do
104
+ it "selects the maximum value of a column" do
105
+ table.should_receive(:max).with(:PersonenID)
106
+ subject.maximum_value('abc', :PersonenID)
107
+ end
108
+ end
109
+
110
+ describe "#count" do
111
+ it "returns the amount of rows of a table" do
112
+ table.should_receive(:count)
113
+ subject.count('abc')
114
+ end
115
+ end
116
+
117
+ describe "#insert_row" do
118
+ it "inserts a single row into the database" do
119
+ table.should_receive(:insert).with(:id => 29)
120
+ subject.insert_row(:abc, :id => 29)
121
+ end
122
+ end
123
+
124
+ describe "#update_row" do
125
+ it 'updates the row with the given id' do
126
+ filtered_records = stub
127
+ table.should_receive(:filter).with(:id => 9).and_return(filtered_records)
128
+ filtered_records.should_receive(:update).with(:name => 'Hans', :alter => 17)
129
+ subject.update_row(:abc, {:id => 9, :name => 'Hans', :alter => 17})
130
+ end
131
+
132
+ it 'works with string keys' do
133
+ filtered_records = stub
134
+ table.should_receive(:filter).with(:id => 11).and_return(filtered_records)
135
+ filtered_records.should_receive(:update).with('name' => 'Hans', 'alter' => 17)
136
+ subject.update_row(:abc, {'id' => 11, 'name' => 'Hans', 'alter' => 17})
137
+ end
138
+ end
139
+
140
+ describe "numeric_column?" do
141
+ it "returns true for numbers" do
142
+ db.stub(:schema).and_return { [[:id, {:type => :integer}]] }
143
+ subject.numeric_column?(:table, :id).should be_true
144
+ end
145
+
146
+ it "returns false for other column types" do
147
+ db.stub(:schema).and_return { [[:id, {:type => :string}]] }
148
+ subject.numeric_column?(:table, :id).should be_false
149
+ end
150
+ end
151
+
152
+ describe "#unique_row" do
153
+ it "returns a row by its key" do
154
+ db.stub_chain(:from, :[]).and_return { {:id => 5, :a => 7} }
155
+ subject.unique_row(:table, 5).should == {:id => 5, :a => 7}
156
+ end
157
+ end
158
+
159
+ end