abstract_importer 1.2.0.rc1 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +20 -2
- data/abstract_importer.gemspec +1 -0
- data/lib/abstract_importer/base.rb +26 -3
- data/lib/abstract_importer/collection.rb +15 -0
- data/lib/abstract_importer/collection_importer.rb +8 -69
- data/lib/abstract_importer/import_options.rb +21 -35
- data/lib/abstract_importer/reporters.rb +4 -0
- data/lib/abstract_importer/reporters/base_reporter.rb +72 -0
- data/lib/abstract_importer/reporters/debug_reporter.rb +131 -0
- data/lib/abstract_importer/reporters/null_reporter.rb +19 -0
- data/lib/abstract_importer/reporters/performance_reporter.rb +103 -0
- data/lib/abstract_importer/strategies.rb +2 -0
- data/lib/abstract_importer/strategies/base.rb +30 -0
- data/lib/abstract_importer/strategies/default_strategy.rb +83 -0
- data/lib/abstract_importer/strategies/replace_strategy.rb +67 -0
- data/lib/abstract_importer/version.rb +1 -1
- data/test/importer_test.rb +79 -6
- data/test/support/mock_data_source.rb +3 -3
- data/test/test_helper.rb +3 -2
- metadata +53 -31
- data/lib/abstract_importer/reporter.rb +0 -150
@@ -0,0 +1,19 @@
|
|
1
|
+
module AbstractImporter
|
2
|
+
module Reporters
|
3
|
+
class NullReporter < BaseReporter
|
4
|
+
|
5
|
+
def start_all(importer)
|
6
|
+
end
|
7
|
+
|
8
|
+
def finish_all(importer, ms)
|
9
|
+
end
|
10
|
+
|
11
|
+
def finish_setup(ms)
|
12
|
+
end
|
13
|
+
|
14
|
+
def start_collection(collection)
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require "objspace"
|
2
|
+
|
3
|
+
module AbstractImporter
|
4
|
+
module Reporters
|
5
|
+
class PerformanceReporter < BaseReporter
|
6
|
+
attr_reader :sample_size
|
7
|
+
|
8
|
+
def initialize(io, options={})
|
9
|
+
super io
|
10
|
+
@sample_size = options.fetch(:sample_size, 50)
|
11
|
+
ObjectSpace.trace_object_allocations_start
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
def start_collection(collection)
|
16
|
+
super
|
17
|
+
@collection = collection
|
18
|
+
@major_gc_runs = GC.stat[:major_gc_count]
|
19
|
+
@i = 0
|
20
|
+
end
|
21
|
+
|
22
|
+
def finish_collection(collection, summary)
|
23
|
+
@collection = nil
|
24
|
+
return if @i.zero?
|
25
|
+
find_objects_holding_onto_references_to_a collection.model
|
26
|
+
end
|
27
|
+
|
28
|
+
def record_created(record)
|
29
|
+
print_stats if @i % sample_size == 0
|
30
|
+
@i += 1
|
31
|
+
end
|
32
|
+
|
33
|
+
def record_failed(record, hash)
|
34
|
+
print_stats if @i % sample_size == 0
|
35
|
+
@i += 1
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
def print_stats
|
40
|
+
stats = GC.stat
|
41
|
+
objects = ObjectSpace.count_objects
|
42
|
+
puts "gc[minor]: #{stats[:minor_gc_count]}, gc[major]: #{stats[:major_gc_count]}, objects: #{objects[:TOTAL] - objects[:FREE]}, memsize: #{(ObjectSpace.memsize_of_all / 1048576.0).round(3)}MB, #{collection.name}: #{ObjectSpace.each_object(collection.model).count}"
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
attr_reader :collection
|
47
|
+
|
48
|
+
def find_objects_holding_onto_references_to_a(model)
|
49
|
+
GC.start
|
50
|
+
|
51
|
+
# After GC.start, all models in this collection should be
|
52
|
+
# garbage-collected unless there is a memory leak. Find one
|
53
|
+
# of the uncollected objects and figure out what is holding
|
54
|
+
# onto a reference to it.
|
55
|
+
example = ObjectSpace.each_object(model).first
|
56
|
+
unless example
|
57
|
+
puts "\e[32mThere are no #{model.name.tableize.gsub("_", " ")} still in memory\e[0m"
|
58
|
+
return
|
59
|
+
end
|
60
|
+
puts "\e[33mThere are #{ObjectSpace.each_object(model).count} #{model.name.tableize.gsub("_", " ")} still in memory\e[0m"
|
61
|
+
|
62
|
+
example_klass = example.class.name
|
63
|
+
example_id = example.object_id
|
64
|
+
example = nil
|
65
|
+
|
66
|
+
# Search through all objects to find ones that hold a reference
|
67
|
+
# to the model that hasn't been garbage-collected.
|
68
|
+
print "\e[90m"
|
69
|
+
require "progressbar"
|
70
|
+
pbar = ProgressBar.new("scanning", ObjectSpace.each_object.count)
|
71
|
+
objects_of_holding = []
|
72
|
+
ObjectSpace.each_object do |o|
|
73
|
+
pbar.inc
|
74
|
+
next if ObjectSpace.reachable_objects_from(o).none? { |oo| oo.object_id == example_id }
|
75
|
+
|
76
|
+
message = "#{o.class.name}"
|
77
|
+
case o
|
78
|
+
when Array
|
79
|
+
message << " (length: #{o.length})"
|
80
|
+
when ActiveRecord::Associations::Association
|
81
|
+
reflection = o.reflection
|
82
|
+
message << " (#{reflection.active_record.name}##{reflection.macro}" <<
|
83
|
+
" :#{reflection.name})"
|
84
|
+
end
|
85
|
+
message << " [#{ObjectSpace.allocation_sourcefile(o)}" <<
|
86
|
+
":#{ObjectSpace.allocation_sourceline(o)}]"
|
87
|
+
|
88
|
+
objects_of_holding.push(message)
|
89
|
+
end
|
90
|
+
pbar.finish
|
91
|
+
print "\e[0m"
|
92
|
+
|
93
|
+
if objects_of_holding.none?
|
94
|
+
puts "\e[95mNo objects are holding a reference to the first one\e[0m"
|
95
|
+
else
|
96
|
+
puts "\e[95m#{objects_of_holding.length} objects hold a reference to the first one:",
|
97
|
+
"\e[35m#{objects_of_holding.join("\n")}\e[0m"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module AbstractImporter
|
2
|
+
module Strategies
|
3
|
+
class Base
|
4
|
+
attr_reader :collection
|
5
|
+
|
6
|
+
delegate :summary,
|
7
|
+
:remap_foreign_keys!,
|
8
|
+
:redundant_record?,
|
9
|
+
:invoke_callback,
|
10
|
+
:dry_run?,
|
11
|
+
:id_map,
|
12
|
+
:scope,
|
13
|
+
:reporter,
|
14
|
+
to: :collection
|
15
|
+
|
16
|
+
def initialize(collection)
|
17
|
+
@collection = collection
|
18
|
+
end
|
19
|
+
|
20
|
+
def process_record(hash)
|
21
|
+
raise NotImplementedError
|
22
|
+
end
|
23
|
+
|
24
|
+
def already_imported?(hash)
|
25
|
+
id_map.contains? collection.table_name, hash[:id]
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require "abstract_importer/strategies/base"
|
2
|
+
|
3
|
+
module AbstractImporter
|
4
|
+
module Strategies
|
5
|
+
class DefaultStrategy < Base
|
6
|
+
|
7
|
+
|
8
|
+
def process_record(hash)
|
9
|
+
summary.total += 1
|
10
|
+
|
11
|
+
if already_imported?(hash)
|
12
|
+
summary.already_imported += 1
|
13
|
+
return
|
14
|
+
end
|
15
|
+
|
16
|
+
remap_foreign_keys!(hash)
|
17
|
+
|
18
|
+
if redundant_record?(hash)
|
19
|
+
summary.redundant += 1
|
20
|
+
return
|
21
|
+
end
|
22
|
+
|
23
|
+
if create_record(hash)
|
24
|
+
summary.created += 1
|
25
|
+
else
|
26
|
+
summary.invalid += 1
|
27
|
+
end
|
28
|
+
rescue ::AbstractImporter::Skip
|
29
|
+
summary.skipped += 1
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
def create_record(hash)
|
34
|
+
record = build_record(hash)
|
35
|
+
|
36
|
+
return true if dry_run?
|
37
|
+
|
38
|
+
invoke_callback(:before_create, record)
|
39
|
+
invoke_callback(:before_save, record)
|
40
|
+
|
41
|
+
# rescue_callback has one shot to fix things
|
42
|
+
invoke_callback(:rescue, record) unless record.valid?
|
43
|
+
|
44
|
+
if record.valid? && record.save
|
45
|
+
invoke_callback(:after_create, hash, record)
|
46
|
+
invoke_callback(:after_save, hash, record)
|
47
|
+
id_map << record
|
48
|
+
|
49
|
+
reporter.record_created(record)
|
50
|
+
clean_record(record)
|
51
|
+
true
|
52
|
+
else
|
53
|
+
|
54
|
+
reporter.record_failed(record, hash)
|
55
|
+
clean_record(record)
|
56
|
+
false
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def build_record(hash)
|
61
|
+
hash = invoke_callback(:before_build, hash) || hash
|
62
|
+
|
63
|
+
legacy_id = hash.delete(:id)
|
64
|
+
|
65
|
+
collection.model.new(hash
|
66
|
+
.merge(legacy_id: legacy_id)
|
67
|
+
.merge(collection.association_attrs))
|
68
|
+
end
|
69
|
+
|
70
|
+
def clean_record(record)
|
71
|
+
# If this record isn't able to be garbage-collected,
|
72
|
+
# then we will print out all of the objects that are
|
73
|
+
# retaining a reference to this one. Ruby's garbage-
|
74
|
+
# collector is smart enough to clean up objects with
|
75
|
+
# circular references; but if we free these now, we
|
76
|
+
# will have fewer results to consider later.
|
77
|
+
record.remove_instance_variable :@association_cache
|
78
|
+
record.remove_instance_variable :@errors
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require "abstract_importer/strategies/default_strategy"
|
2
|
+
|
3
|
+
module AbstractImporter
|
4
|
+
module Strategies
|
5
|
+
class ReplaceStrategy < DefaultStrategy
|
6
|
+
|
7
|
+
|
8
|
+
def process_record(hash)
|
9
|
+
summary.total += 1
|
10
|
+
|
11
|
+
remap_foreign_keys!(hash)
|
12
|
+
|
13
|
+
if redundant_record?(hash)
|
14
|
+
summary.redundant += 1
|
15
|
+
return
|
16
|
+
end
|
17
|
+
|
18
|
+
if create_or_update_record(hash)
|
19
|
+
summary.created += 1
|
20
|
+
else
|
21
|
+
summary.invalid += 1
|
22
|
+
end
|
23
|
+
rescue ::AbstractImporter::Skip
|
24
|
+
summary.skipped += 1
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
def create_or_update_record(hash)
|
29
|
+
if already_imported?(hash)
|
30
|
+
update_record(hash)
|
31
|
+
else
|
32
|
+
create_record(hash)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
def update_record(hash)
|
38
|
+
hash = invoke_callback(:before_build, hash) || hash
|
39
|
+
|
40
|
+
record = scope.find_by(legacy_id: hash.delete(:id))
|
41
|
+
record.attributes = hash
|
42
|
+
|
43
|
+
return true if dry_run?
|
44
|
+
|
45
|
+
invoke_callback(:before_update, record)
|
46
|
+
invoke_callback(:before_save, record)
|
47
|
+
|
48
|
+
# rescue_callback has one shot to fix things
|
49
|
+
invoke_callback(:rescue, record) unless record.valid?
|
50
|
+
|
51
|
+
if record.valid? && record.save
|
52
|
+
invoke_callback(:after_update, hash, record)
|
53
|
+
invoke_callback(:after_save, hash, record)
|
54
|
+
|
55
|
+
reporter.record_created(record)
|
56
|
+
true
|
57
|
+
else
|
58
|
+
|
59
|
+
reporter.record_failed(record, hash)
|
60
|
+
false
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
data/test/importer_test.rb
CHANGED
@@ -21,12 +21,6 @@ class ImporterTest < ActiveSupport::TestCase
|
|
21
21
|
import!
|
22
22
|
assert_equal [456, 457, 458], account.students.pluck(:legacy_id)
|
23
23
|
end
|
24
|
-
|
25
|
-
should "not import existing records twice" do
|
26
|
-
account.students.create!(name: "Ron Weasley", legacy_id: 457)
|
27
|
-
import!
|
28
|
-
assert_equal 3, account.students.count
|
29
|
-
end
|
30
24
|
end
|
31
25
|
|
32
26
|
|
@@ -159,4 +153,83 @@ class ImporterTest < ActiveSupport::TestCase
|
|
159
153
|
|
160
154
|
|
161
155
|
|
156
|
+
context "When we use the default strategy" do
|
157
|
+
setup do
|
158
|
+
plan do |import|
|
159
|
+
import.students
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
context "and records already exist" do
|
164
|
+
setup do
|
165
|
+
account.students.create!(name: "Ron Weasley", legacy_id: 457)
|
166
|
+
end
|
167
|
+
|
168
|
+
should "not import existing records twice" do
|
169
|
+
import!
|
170
|
+
assert_equal 3, account.students.count
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
|
176
|
+
|
177
|
+
context "When we specify collections to skip" do
|
178
|
+
setup do
|
179
|
+
plan do |import|
|
180
|
+
import.students
|
181
|
+
import.parents
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
context "using :skip" do
|
186
|
+
setup do
|
187
|
+
options.merge!(skip: :parents)
|
188
|
+
end
|
189
|
+
|
190
|
+
should "not import the named collections" do
|
191
|
+
import!
|
192
|
+
assert_equal 3, account.students.length
|
193
|
+
assert_equal 0, account.parents.length
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
context "using :only" do
|
198
|
+
setup do
|
199
|
+
options.merge!(only: [:students])
|
200
|
+
end
|
201
|
+
|
202
|
+
should "import only the named collections" do
|
203
|
+
import!
|
204
|
+
assert_equal 3, account.students.length
|
205
|
+
assert_equal 0, account.parents.length
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
|
211
|
+
|
212
|
+
context "When we use the :replace strategy" do
|
213
|
+
setup do
|
214
|
+
options.merge!(strategy: {students: :replace})
|
215
|
+
plan do |import|
|
216
|
+
import.students
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
context "and records already exist" do
|
221
|
+
setup do
|
222
|
+
account.students.create!(name: "Ron Weasley", legacy_id: 457)
|
223
|
+
end
|
224
|
+
|
225
|
+
should "reimport the existing records" do
|
226
|
+
import!
|
227
|
+
assert_equal "Gryffindor", account.students.find_by_name("Ron Weasley").house,
|
228
|
+
"Expected Ron's record to have been replaced with one that has a house"
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
|
234
|
+
|
162
235
|
end
|
@@ -3,9 +3,9 @@ class MockDataSource
|
|
3
3
|
|
4
4
|
def students
|
5
5
|
Enumerator.new do |e|
|
6
|
-
e.yield id: 456, name: "Harry Potter", pet_type: "Owl", pet_id: 901
|
7
|
-
e.yield id: 457, name: "Ron Weasley", pet_type: nil, pet_id: nil
|
8
|
-
e.yield id: 458, name: "Hermione Granger", pet_type: "Cat", pet_id: 901
|
6
|
+
e.yield id: 456, name: "Harry Potter", pet_type: "Owl", pet_id: 901, house: "Gryffindor"
|
7
|
+
e.yield id: 457, name: "Ron Weasley", pet_type: nil, pet_id: nil, house: "Gryffindor"
|
8
|
+
e.yield id: 458, name: "Hermione Granger", pet_type: "Cat", pet_id: 901, house: "Gryffindor"
|
9
9
|
end
|
10
10
|
end
|
11
11
|
|
data/test/test_helper.rb
CHANGED
@@ -41,6 +41,7 @@ class ActiveSupport::TestCase
|
|
41
41
|
@data_source = MockDataSource.new
|
42
42
|
@klass = Class.new(AbstractImporter::Base)
|
43
43
|
@account = Account.create!
|
44
|
+
@options = {}
|
44
45
|
end
|
45
46
|
|
46
47
|
teardown do
|
@@ -50,7 +51,7 @@ class ActiveSupport::TestCase
|
|
50
51
|
|
51
52
|
protected
|
52
53
|
|
53
|
-
attr_reader :account, :results, :data_source
|
54
|
+
attr_reader :account, :results, :data_source, :options
|
54
55
|
|
55
56
|
def plan(&block)
|
56
57
|
@klass.import(&block)
|
@@ -65,7 +66,7 @@ protected
|
|
65
66
|
end
|
66
67
|
|
67
68
|
def importer
|
68
|
-
@importer ||= @klass.new(@account, @data_source, io: $io)
|
69
|
+
@importer ||= @klass.new(@account, @data_source, options.merge(io: $io))
|
69
70
|
end
|
70
71
|
|
71
72
|
end
|