abstract_importer 1.2.0.rc1 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +20 -2
- data/abstract_importer.gemspec +1 -0
- data/lib/abstract_importer/base.rb +26 -3
- data/lib/abstract_importer/collection.rb +15 -0
- data/lib/abstract_importer/collection_importer.rb +8 -69
- data/lib/abstract_importer/import_options.rb +21 -35
- data/lib/abstract_importer/reporters.rb +4 -0
- data/lib/abstract_importer/reporters/base_reporter.rb +72 -0
- data/lib/abstract_importer/reporters/debug_reporter.rb +131 -0
- data/lib/abstract_importer/reporters/null_reporter.rb +19 -0
- data/lib/abstract_importer/reporters/performance_reporter.rb +103 -0
- data/lib/abstract_importer/strategies.rb +2 -0
- data/lib/abstract_importer/strategies/base.rb +30 -0
- data/lib/abstract_importer/strategies/default_strategy.rb +83 -0
- data/lib/abstract_importer/strategies/replace_strategy.rb +67 -0
- data/lib/abstract_importer/version.rb +1 -1
- data/test/importer_test.rb +79 -6
- data/test/support/mock_data_source.rb +3 -3
- data/test/test_helper.rb +3 -2
- metadata +53 -31
- data/lib/abstract_importer/reporter.rb +0 -150
@@ -0,0 +1,19 @@
|
|
1
|
+
module AbstractImporter
|
2
|
+
module Reporters
|
3
|
+
class NullReporter < BaseReporter
|
4
|
+
|
5
|
+
def start_all(importer)
|
6
|
+
end
|
7
|
+
|
8
|
+
def finish_all(importer, ms)
|
9
|
+
end
|
10
|
+
|
11
|
+
def finish_setup(ms)
|
12
|
+
end
|
13
|
+
|
14
|
+
def start_collection(collection)
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require "objspace"
|
2
|
+
|
3
|
+
module AbstractImporter
|
4
|
+
module Reporters
|
5
|
+
class PerformanceReporter < BaseReporter
|
6
|
+
attr_reader :sample_size
|
7
|
+
|
8
|
+
def initialize(io, options={})
|
9
|
+
super io
|
10
|
+
@sample_size = options.fetch(:sample_size, 50)
|
11
|
+
ObjectSpace.trace_object_allocations_start
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
def start_collection(collection)
|
16
|
+
super
|
17
|
+
@collection = collection
|
18
|
+
@major_gc_runs = GC.stat[:major_gc_count]
|
19
|
+
@i = 0
|
20
|
+
end
|
21
|
+
|
22
|
+
def finish_collection(collection, summary)
|
23
|
+
@collection = nil
|
24
|
+
return if @i.zero?
|
25
|
+
find_objects_holding_onto_references_to_a collection.model
|
26
|
+
end
|
27
|
+
|
28
|
+
def record_created(record)
|
29
|
+
print_stats if @i % sample_size == 0
|
30
|
+
@i += 1
|
31
|
+
end
|
32
|
+
|
33
|
+
def record_failed(record, hash)
|
34
|
+
print_stats if @i % sample_size == 0
|
35
|
+
@i += 1
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
def print_stats
|
40
|
+
stats = GC.stat
|
41
|
+
objects = ObjectSpace.count_objects
|
42
|
+
puts "gc[minor]: #{stats[:minor_gc_count]}, gc[major]: #{stats[:major_gc_count]}, objects: #{objects[:TOTAL] - objects[:FREE]}, memsize: #{(ObjectSpace.memsize_of_all / 1048576.0).round(3)}MB, #{collection.name}: #{ObjectSpace.each_object(collection.model).count}"
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
attr_reader :collection
|
47
|
+
|
48
|
+
def find_objects_holding_onto_references_to_a(model)
|
49
|
+
GC.start
|
50
|
+
|
51
|
+
# After GC.start, all models in this collection should be
|
52
|
+
# garbage-collected unless there is a memory leak. Find one
|
53
|
+
# of the uncollected objects and figure out what is holding
|
54
|
+
# onto a reference to it.
|
55
|
+
example = ObjectSpace.each_object(model).first
|
56
|
+
unless example
|
57
|
+
puts "\e[32mThere are no #{model.name.tableize.gsub("_", " ")} still in memory\e[0m"
|
58
|
+
return
|
59
|
+
end
|
60
|
+
puts "\e[33mThere are #{ObjectSpace.each_object(model).count} #{model.name.tableize.gsub("_", " ")} still in memory\e[0m"
|
61
|
+
|
62
|
+
example_klass = example.class.name
|
63
|
+
example_id = example.object_id
|
64
|
+
example = nil
|
65
|
+
|
66
|
+
# Search through all objects to find ones that hold a reference
|
67
|
+
# to the model that hasn't been garbage-collected.
|
68
|
+
print "\e[90m"
|
69
|
+
require "progressbar"
|
70
|
+
pbar = ProgressBar.new("scanning", ObjectSpace.each_object.count)
|
71
|
+
objects_of_holding = []
|
72
|
+
ObjectSpace.each_object do |o|
|
73
|
+
pbar.inc
|
74
|
+
next if ObjectSpace.reachable_objects_from(o).none? { |oo| oo.object_id == example_id }
|
75
|
+
|
76
|
+
message = "#{o.class.name}"
|
77
|
+
case o
|
78
|
+
when Array
|
79
|
+
message << " (length: #{o.length})"
|
80
|
+
when ActiveRecord::Associations::Association
|
81
|
+
reflection = o.reflection
|
82
|
+
message << " (#{reflection.active_record.name}##{reflection.macro}" <<
|
83
|
+
" :#{reflection.name})"
|
84
|
+
end
|
85
|
+
message << " [#{ObjectSpace.allocation_sourcefile(o)}" <<
|
86
|
+
":#{ObjectSpace.allocation_sourceline(o)}]"
|
87
|
+
|
88
|
+
objects_of_holding.push(message)
|
89
|
+
end
|
90
|
+
pbar.finish
|
91
|
+
print "\e[0m"
|
92
|
+
|
93
|
+
if objects_of_holding.none?
|
94
|
+
puts "\e[95mNo objects are holding a reference to the first one\e[0m"
|
95
|
+
else
|
96
|
+
puts "\e[95m#{objects_of_holding.length} objects hold a reference to the first one:",
|
97
|
+
"\e[35m#{objects_of_holding.join("\n")}\e[0m"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module AbstractImporter
|
2
|
+
module Strategies
|
3
|
+
class Base
|
4
|
+
attr_reader :collection
|
5
|
+
|
6
|
+
delegate :summary,
|
7
|
+
:remap_foreign_keys!,
|
8
|
+
:redundant_record?,
|
9
|
+
:invoke_callback,
|
10
|
+
:dry_run?,
|
11
|
+
:id_map,
|
12
|
+
:scope,
|
13
|
+
:reporter,
|
14
|
+
to: :collection
|
15
|
+
|
16
|
+
def initialize(collection)
|
17
|
+
@collection = collection
|
18
|
+
end
|
19
|
+
|
20
|
+
def process_record(hash)
|
21
|
+
raise NotImplementedError
|
22
|
+
end
|
23
|
+
|
24
|
+
def already_imported?(hash)
|
25
|
+
id_map.contains? collection.table_name, hash[:id]
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require "abstract_importer/strategies/base"
|
2
|
+
|
3
|
+
module AbstractImporter
|
4
|
+
module Strategies
|
5
|
+
class DefaultStrategy < Base
|
6
|
+
|
7
|
+
|
8
|
+
def process_record(hash)
|
9
|
+
summary.total += 1
|
10
|
+
|
11
|
+
if already_imported?(hash)
|
12
|
+
summary.already_imported += 1
|
13
|
+
return
|
14
|
+
end
|
15
|
+
|
16
|
+
remap_foreign_keys!(hash)
|
17
|
+
|
18
|
+
if redundant_record?(hash)
|
19
|
+
summary.redundant += 1
|
20
|
+
return
|
21
|
+
end
|
22
|
+
|
23
|
+
if create_record(hash)
|
24
|
+
summary.created += 1
|
25
|
+
else
|
26
|
+
summary.invalid += 1
|
27
|
+
end
|
28
|
+
rescue ::AbstractImporter::Skip
|
29
|
+
summary.skipped += 1
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
def create_record(hash)
|
34
|
+
record = build_record(hash)
|
35
|
+
|
36
|
+
return true if dry_run?
|
37
|
+
|
38
|
+
invoke_callback(:before_create, record)
|
39
|
+
invoke_callback(:before_save, record)
|
40
|
+
|
41
|
+
# rescue_callback has one shot to fix things
|
42
|
+
invoke_callback(:rescue, record) unless record.valid?
|
43
|
+
|
44
|
+
if record.valid? && record.save
|
45
|
+
invoke_callback(:after_create, hash, record)
|
46
|
+
invoke_callback(:after_save, hash, record)
|
47
|
+
id_map << record
|
48
|
+
|
49
|
+
reporter.record_created(record)
|
50
|
+
clean_record(record)
|
51
|
+
true
|
52
|
+
else
|
53
|
+
|
54
|
+
reporter.record_failed(record, hash)
|
55
|
+
clean_record(record)
|
56
|
+
false
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def build_record(hash)
|
61
|
+
hash = invoke_callback(:before_build, hash) || hash
|
62
|
+
|
63
|
+
legacy_id = hash.delete(:id)
|
64
|
+
|
65
|
+
collection.model.new(hash
|
66
|
+
.merge(legacy_id: legacy_id)
|
67
|
+
.merge(collection.association_attrs))
|
68
|
+
end
|
69
|
+
|
70
|
+
def clean_record(record)
|
71
|
+
# If this record isn't able to be garbage-collected,
|
72
|
+
# then we will print out all of the objects that are
|
73
|
+
# retaining a reference to this one. Ruby's garbage-
|
74
|
+
# collector is smart enough to clean up objects with
|
75
|
+
# circular references; but if we free these now, we
|
76
|
+
# will have fewer results to consider later.
|
77
|
+
record.remove_instance_variable :@association_cache
|
78
|
+
record.remove_instance_variable :@errors
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require "abstract_importer/strategies/default_strategy"
|
2
|
+
|
3
|
+
module AbstractImporter
|
4
|
+
module Strategies
|
5
|
+
class ReplaceStrategy < DefaultStrategy
|
6
|
+
|
7
|
+
|
8
|
+
def process_record(hash)
|
9
|
+
summary.total += 1
|
10
|
+
|
11
|
+
remap_foreign_keys!(hash)
|
12
|
+
|
13
|
+
if redundant_record?(hash)
|
14
|
+
summary.redundant += 1
|
15
|
+
return
|
16
|
+
end
|
17
|
+
|
18
|
+
if create_or_update_record(hash)
|
19
|
+
summary.created += 1
|
20
|
+
else
|
21
|
+
summary.invalid += 1
|
22
|
+
end
|
23
|
+
rescue ::AbstractImporter::Skip
|
24
|
+
summary.skipped += 1
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
def create_or_update_record(hash)
|
29
|
+
if already_imported?(hash)
|
30
|
+
update_record(hash)
|
31
|
+
else
|
32
|
+
create_record(hash)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
def update_record(hash)
|
38
|
+
hash = invoke_callback(:before_build, hash) || hash
|
39
|
+
|
40
|
+
record = scope.find_by(legacy_id: hash.delete(:id))
|
41
|
+
record.attributes = hash
|
42
|
+
|
43
|
+
return true if dry_run?
|
44
|
+
|
45
|
+
invoke_callback(:before_update, record)
|
46
|
+
invoke_callback(:before_save, record)
|
47
|
+
|
48
|
+
# rescue_callback has one shot to fix things
|
49
|
+
invoke_callback(:rescue, record) unless record.valid?
|
50
|
+
|
51
|
+
if record.valid? && record.save
|
52
|
+
invoke_callback(:after_update, hash, record)
|
53
|
+
invoke_callback(:after_save, hash, record)
|
54
|
+
|
55
|
+
reporter.record_created(record)
|
56
|
+
true
|
57
|
+
else
|
58
|
+
|
59
|
+
reporter.record_failed(record, hash)
|
60
|
+
false
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
data/test/importer_test.rb
CHANGED
@@ -21,12 +21,6 @@ class ImporterTest < ActiveSupport::TestCase
|
|
21
21
|
import!
|
22
22
|
assert_equal [456, 457, 458], account.students.pluck(:legacy_id)
|
23
23
|
end
|
24
|
-
|
25
|
-
should "not import existing records twice" do
|
26
|
-
account.students.create!(name: "Ron Weasley", legacy_id: 457)
|
27
|
-
import!
|
28
|
-
assert_equal 3, account.students.count
|
29
|
-
end
|
30
24
|
end
|
31
25
|
|
32
26
|
|
@@ -159,4 +153,83 @@ class ImporterTest < ActiveSupport::TestCase
|
|
159
153
|
|
160
154
|
|
161
155
|
|
156
|
+
context "When we use the default strategy" do
|
157
|
+
setup do
|
158
|
+
plan do |import|
|
159
|
+
import.students
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
context "and records already exist" do
|
164
|
+
setup do
|
165
|
+
account.students.create!(name: "Ron Weasley", legacy_id: 457)
|
166
|
+
end
|
167
|
+
|
168
|
+
should "not import existing records twice" do
|
169
|
+
import!
|
170
|
+
assert_equal 3, account.students.count
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
|
176
|
+
|
177
|
+
context "When we specify collections to skip" do
|
178
|
+
setup do
|
179
|
+
plan do |import|
|
180
|
+
import.students
|
181
|
+
import.parents
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
context "using :skip" do
|
186
|
+
setup do
|
187
|
+
options.merge!(skip: :parents)
|
188
|
+
end
|
189
|
+
|
190
|
+
should "not import the named collections" do
|
191
|
+
import!
|
192
|
+
assert_equal 3, account.students.length
|
193
|
+
assert_equal 0, account.parents.length
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
context "using :only" do
|
198
|
+
setup do
|
199
|
+
options.merge!(only: [:students])
|
200
|
+
end
|
201
|
+
|
202
|
+
should "import only the named collections" do
|
203
|
+
import!
|
204
|
+
assert_equal 3, account.students.length
|
205
|
+
assert_equal 0, account.parents.length
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
|
211
|
+
|
212
|
+
context "When we use the :replace strategy" do
|
213
|
+
setup do
|
214
|
+
options.merge!(strategy: {students: :replace})
|
215
|
+
plan do |import|
|
216
|
+
import.students
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
context "and records already exist" do
|
221
|
+
setup do
|
222
|
+
account.students.create!(name: "Ron Weasley", legacy_id: 457)
|
223
|
+
end
|
224
|
+
|
225
|
+
should "reimport the existing records" do
|
226
|
+
import!
|
227
|
+
assert_equal "Gryffindor", account.students.find_by_name("Ron Weasley").house,
|
228
|
+
"Expected Ron's record to have been replaced with one that has a house"
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
|
234
|
+
|
162
235
|
end
|
@@ -3,9 +3,9 @@ class MockDataSource
|
|
3
3
|
|
4
4
|
def students
|
5
5
|
Enumerator.new do |e|
|
6
|
-
e.yield id: 456, name: "Harry Potter", pet_type: "Owl", pet_id: 901
|
7
|
-
e.yield id: 457, name: "Ron Weasley", pet_type: nil, pet_id: nil
|
8
|
-
e.yield id: 458, name: "Hermione Granger", pet_type: "Cat", pet_id: 901
|
6
|
+
e.yield id: 456, name: "Harry Potter", pet_type: "Owl", pet_id: 901, house: "Gryffindor"
|
7
|
+
e.yield id: 457, name: "Ron Weasley", pet_type: nil, pet_id: nil, house: "Gryffindor"
|
8
|
+
e.yield id: 458, name: "Hermione Granger", pet_type: "Cat", pet_id: 901, house: "Gryffindor"
|
9
9
|
end
|
10
10
|
end
|
11
11
|
|
data/test/test_helper.rb
CHANGED
@@ -41,6 +41,7 @@ class ActiveSupport::TestCase
|
|
41
41
|
@data_source = MockDataSource.new
|
42
42
|
@klass = Class.new(AbstractImporter::Base)
|
43
43
|
@account = Account.create!
|
44
|
+
@options = {}
|
44
45
|
end
|
45
46
|
|
46
47
|
teardown do
|
@@ -50,7 +51,7 @@ class ActiveSupport::TestCase
|
|
50
51
|
|
51
52
|
protected
|
52
53
|
|
53
|
-
attr_reader :account, :results, :data_source
|
54
|
+
attr_reader :account, :results, :data_source, :options
|
54
55
|
|
55
56
|
def plan(&block)
|
56
57
|
@klass.import(&block)
|
@@ -65,7 +66,7 @@ protected
|
|
65
66
|
end
|
66
67
|
|
67
68
|
def importer
|
68
|
-
@importer ||= @klass.new(@account, @data_source, io: $io)
|
69
|
+
@importer ||= @klass.new(@account, @data_source, options.merge(io: $io))
|
69
70
|
end
|
70
71
|
|
71
72
|
end
|