active_record_data_loader 1.0.2 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/build.yml +51 -0
- data/.github/workflows/codeql-analysis.yml +70 -0
- data/.github/workflows/gem-push.yml +29 -0
- data/.rubocop.yml +46 -7
- data/CHANGELOG.md +38 -2
- data/CODE_OF_CONDUCT.md +2 -2
- data/Gemfile.lock +71 -73
- data/README.md +162 -9
- data/Rakefile +8 -2
- data/active_record_data_loader.gemspec +7 -6
- data/config/database.yml +2 -0
- data/docker-compose.yml +18 -0
- data/gemfiles/activerecord_6.gemfile +1 -1
- data/lib/active_record_data_loader/active_record/{belongs_to_configuration.rb → belongs_to_data_provider.rb} +8 -7
- data/lib/active_record_data_loader/active_record/{column_configuration.rb → column_data_provider.rb} +2 -2
- data/lib/active_record_data_loader/active_record/enum_value_generator.rb +9 -8
- data/lib/active_record_data_loader/active_record/integer_value_generator.rb +1 -1
- data/lib/active_record_data_loader/active_record/list.rb +47 -0
- data/lib/active_record_data_loader/active_record/model_data_generator.rb +62 -7
- data/lib/active_record_data_loader/active_record/{polymorphic_belongs_to_configuration.rb → polymorphic_belongs_to_data_provider.rb} +12 -7
- data/lib/active_record_data_loader/active_record/unique_index_tracker.rb +67 -0
- data/lib/active_record_data_loader/bulk_insert_strategy.rb +16 -8
- data/lib/active_record_data_loader/configuration.rb +26 -3
- data/lib/active_record_data_loader/connection_handler.rb +52 -0
- data/lib/active_record_data_loader/copy_strategy.rb +38 -24
- data/lib/active_record_data_loader/data_faker.rb +12 -4
- data/lib/active_record_data_loader/dsl/model.rb +19 -2
- data/lib/active_record_data_loader/errors.rb +5 -0
- data/lib/active_record_data_loader/file_output_adapter.rb +48 -0
- data/lib/active_record_data_loader/loader.rb +55 -71
- data/lib/active_record_data_loader/null_output_adapter.rb +15 -0
- data/lib/active_record_data_loader/table_loader.rb +59 -0
- data/lib/active_record_data_loader/version.rb +1 -1
- data/lib/active_record_data_loader.rb +11 -38
- metadata +51 -29
- data/.travis.yml +0 -24
- data/config/database.yml.travis +0 -12
@@ -2,20 +2,21 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
module ActiveRecord
|
5
|
-
class
|
6
|
-
def self.
|
5
|
+
class PolymorphicBelongsToDataProvider
|
6
|
+
def self.provider_for(polymorphic_settings:, strategy: :random)
|
7
7
|
ar_association = polymorphic_settings.model_class.reflect_on_association(
|
8
8
|
polymorphic_settings.name
|
9
9
|
)
|
10
10
|
raise "#{name} only supports polymorphic associations" unless ar_association.polymorphic?
|
11
11
|
|
12
|
-
new(polymorphic_settings, ar_association).polymorphic_config
|
12
|
+
new(polymorphic_settings, ar_association, strategy).polymorphic_config
|
13
13
|
end
|
14
14
|
|
15
|
-
def initialize(settings, ar_association)
|
15
|
+
def initialize(settings, ar_association, strategy)
|
16
16
|
@settings = settings
|
17
17
|
@ar_association = ar_association
|
18
18
|
@model_count = settings.weighted_models.size
|
19
|
+
@strategy = strategy
|
19
20
|
end
|
20
21
|
|
21
22
|
def polymorphic_config
|
@@ -32,21 +33,25 @@ module ActiveRecordDataLoader
|
|
32
33
|
end
|
33
34
|
|
34
35
|
def foreign_key(row_number)
|
35
|
-
possible_values[row_number % @model_count][1].
|
36
|
+
possible_values[row_number % @model_count][1].next
|
36
37
|
end
|
37
38
|
|
38
39
|
def possible_values
|
39
40
|
@possible_values ||= begin
|
40
41
|
values = @settings.models.keys.map do |klass|
|
41
|
-
[klass.name,
|
42
|
+
[klass.name, values_query(klass)]
|
42
43
|
end.to_h
|
43
44
|
|
44
45
|
@settings.weighted_models.map { |klass| [klass.name, values[klass.name]] }
|
45
46
|
end
|
46
47
|
end
|
47
48
|
|
49
|
+
def values_query(klass)
|
50
|
+
List.for(base_query(klass).pluck(klass.primary_key), strategy: @strategy)
|
51
|
+
end
|
52
|
+
|
48
53
|
def base_query(klass)
|
49
|
-
if @settings.queries[klass]
|
54
|
+
if @settings.queries[klass].respond_to?(:call)
|
50
55
|
@settings.queries[klass].call.all
|
51
56
|
else
|
52
57
|
klass.all
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
module ActiveRecord
|
5
|
+
class UniqueIndexTracker
|
6
|
+
Index = Struct.new(:name, :columns, :column_indexes, keyword_init: true)
|
7
|
+
|
8
|
+
def initialize(model:, connection_factory:)
|
9
|
+
@model = model
|
10
|
+
@table = model.table_name
|
11
|
+
@unique_indexes = []
|
12
|
+
@unique_values_used = {}
|
13
|
+
find_unique_indexes(connection_factory)
|
14
|
+
end
|
15
|
+
|
16
|
+
def map_indexed_columns(column_list)
|
17
|
+
@unique_indexes = @raw_unique_indexes.map do |index|
|
18
|
+
@unique_values_used[index.name] = Set.new
|
19
|
+
columns = index.columns.map(&:to_sym)
|
20
|
+
Index.new(
|
21
|
+
name: index.name,
|
22
|
+
columns: columns,
|
23
|
+
column_indexes: columns.map { |c| column_list.find_index(c) }
|
24
|
+
)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def repeating_unique_values?(row)
|
29
|
+
@unique_indexes.map do |index|
|
30
|
+
values = index.column_indexes.map { |i| row[i] }
|
31
|
+
@unique_values_used.fetch(index.name).include?(values)
|
32
|
+
end.any?
|
33
|
+
end
|
34
|
+
|
35
|
+
def capture_unique_values(row)
|
36
|
+
return unless row.present?
|
37
|
+
|
38
|
+
@unique_indexes.each do |index|
|
39
|
+
values = index.column_indexes.map { |i| row[i] }
|
40
|
+
@unique_values_used.fetch(index.name) << values
|
41
|
+
end
|
42
|
+
row
|
43
|
+
end
|
44
|
+
|
45
|
+
def contained_in_index?(ar_column)
|
46
|
+
target_column = if @model.reflect_on_association(ar_column.name)&.belongs_to?
|
47
|
+
ar_column.join_foreign_key.to_sym
|
48
|
+
else
|
49
|
+
ar_column.name.to_sym
|
50
|
+
end
|
51
|
+
|
52
|
+
@raw_unique_indexes.flat_map { |i| i.columns.map(&:to_sym) }.include?(target_column)
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
attr_reader :table
|
58
|
+
|
59
|
+
def find_unique_indexes(connection_factory)
|
60
|
+
connection = connection_factory.call
|
61
|
+
@raw_unique_indexes = connection.indexes(table).select(&:unique)
|
62
|
+
ensure
|
63
|
+
connection&.close
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -2,15 +2,18 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class BulkInsertStrategy
|
5
|
-
def initialize(data_generator)
|
5
|
+
def initialize(data_generator, file_adapter)
|
6
6
|
@data_generator = data_generator
|
7
|
+
@file_adapter = file_adapter
|
7
8
|
end
|
8
9
|
|
9
10
|
def load_batch(row_numbers, connection)
|
10
|
-
|
11
|
+
command = <<~SQL
|
11
12
|
INSERT INTO #{quoted_table_name(connection)} (#{column_list(connection)})
|
12
13
|
VALUES #{values(row_numbers, connection)}
|
13
14
|
SQL
|
15
|
+
insert(connection: connection, command: command)
|
16
|
+
file_adapter.insert(command)
|
14
17
|
end
|
15
18
|
|
16
19
|
def table_name
|
@@ -23,7 +26,11 @@ module ActiveRecordDataLoader
|
|
23
26
|
|
24
27
|
private
|
25
28
|
|
26
|
-
attr_reader :data_generator
|
29
|
+
attr_reader :data_generator, :file_adapter
|
30
|
+
|
31
|
+
def insert(connection:, command:)
|
32
|
+
connection.insert(command)
|
33
|
+
end
|
27
34
|
|
28
35
|
def quoted_table_name(connection)
|
29
36
|
@quoted_table_name ||= connection.quote_table_name(data_generator.table)
|
@@ -38,15 +45,16 @@ module ActiveRecordDataLoader
|
|
38
45
|
|
39
46
|
def values(row_numbers, connection)
|
40
47
|
row_numbers
|
41
|
-
.map { |i|
|
48
|
+
.map { |i| row_values(i, connection) }
|
49
|
+
.compact
|
42
50
|
.join(",")
|
43
51
|
end
|
44
52
|
|
45
53
|
def row_values(row_number, connection)
|
46
|
-
data_generator
|
47
|
-
|
48
|
-
|
49
|
-
|
54
|
+
row = data_generator.generate_row(row_number)
|
55
|
+
return unless row.present?
|
56
|
+
|
57
|
+
"(#{row.map { |v| connection.quote(v) }.join(',')})"
|
50
58
|
end
|
51
59
|
end
|
52
60
|
end
|
@@ -2,29 +2,52 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class Configuration
|
5
|
-
attr_accessor :
|
5
|
+
attr_accessor :connection_factory, :default_batch_size, :default_row_count,
|
6
|
+
:logger, :max_duplicate_retries, :raise_on_duplicates, :statement_timeout
|
7
|
+
attr_reader :output
|
6
8
|
|
7
9
|
def initialize(
|
8
10
|
default_batch_size: 100_000,
|
9
11
|
default_row_count: 1,
|
10
12
|
logger: nil,
|
11
13
|
statement_timeout: "2min",
|
12
|
-
connection_factory: -> { ::ActiveRecord::Base.connection }
|
14
|
+
connection_factory: -> { ::ActiveRecord::Base.connection },
|
15
|
+
raise_on_duplicates: false,
|
16
|
+
max_duplicate_retries: 5,
|
17
|
+
output: nil
|
13
18
|
)
|
14
19
|
@default_batch_size = default_batch_size
|
15
20
|
@default_row_count = default_row_count
|
16
21
|
@logger = logger || default_logger
|
17
22
|
@statement_timeout = statement_timeout
|
18
23
|
@connection_factory = connection_factory
|
24
|
+
@raise_on_duplicates = raise_on_duplicates
|
25
|
+
@max_duplicate_retries = max_duplicate_retries
|
26
|
+
self.output = output
|
27
|
+
end
|
28
|
+
|
29
|
+
def output=(output)
|
30
|
+
@output = validate_output(output)
|
19
31
|
end
|
20
32
|
|
21
33
|
private
|
22
34
|
|
35
|
+
def validate_output(output)
|
36
|
+
if output.to_s.blank?
|
37
|
+
nil
|
38
|
+
elsif output.is_a?(String)
|
39
|
+
output
|
40
|
+
else
|
41
|
+
raise "The output configuration parameter must be a filename meant to be the "\
|
42
|
+
"target for the SQL script"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
23
46
|
def default_logger
|
24
47
|
if defined?(Rails) && Rails.respond_to?(:logger)
|
25
48
|
Rails.logger
|
26
49
|
else
|
27
|
-
Logger.new(
|
50
|
+
Logger.new($stdout, level: :info)
|
28
51
|
end
|
29
52
|
end
|
30
53
|
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
class ConnectionHandler
|
5
|
+
def initialize(connection_factory:, statement_timeout:)
|
6
|
+
@connection_factory = connection_factory
|
7
|
+
@statement_timeout = statement_timeout
|
8
|
+
cache_facts
|
9
|
+
end
|
10
|
+
|
11
|
+
def with_connection
|
12
|
+
connection = connection_factory.call
|
13
|
+
if supports_timeout?
|
14
|
+
connection.execute(timeout_set_command)
|
15
|
+
yield connection
|
16
|
+
connection.execute(reset_timeout_command)
|
17
|
+
else
|
18
|
+
yield connection
|
19
|
+
end
|
20
|
+
ensure
|
21
|
+
connection&.close
|
22
|
+
end
|
23
|
+
|
24
|
+
def supports_timeout?
|
25
|
+
@supports_timeout
|
26
|
+
end
|
27
|
+
|
28
|
+
def supports_copy?
|
29
|
+
@supports_copy
|
30
|
+
end
|
31
|
+
|
32
|
+
def timeout_set_command
|
33
|
+
"SET statement_timeout = \"#{statement_timeout}\""
|
34
|
+
end
|
35
|
+
|
36
|
+
def reset_timeout_command
|
37
|
+
"RESET statement_timeout"
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
attr_reader :connection_factory, :statement_timeout
|
43
|
+
|
44
|
+
def cache_facts
|
45
|
+
connection = connection_factory.call
|
46
|
+
@supports_timeout = connection.adapter_name.downcase.to_sym == :postgresql
|
47
|
+
@supports_copy = connection.raw_connection.respond_to?(:copy_data)
|
48
|
+
ensure
|
49
|
+
connection&.close
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -2,15 +2,26 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class CopyStrategy
|
5
|
-
def initialize(data_generator)
|
5
|
+
def initialize(data_generator, file_adapter)
|
6
6
|
@data_generator = data_generator
|
7
|
+
@file_adapter = file_adapter
|
7
8
|
end
|
8
9
|
|
9
10
|
def load_batch(row_numbers, connection)
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
11
|
+
data = csv_rows(row_numbers, connection)
|
12
|
+
copy(
|
13
|
+
connection: connection,
|
14
|
+
table: table_name_for_copy(connection),
|
15
|
+
columns: columns_for_copy(connection),
|
16
|
+
data: data,
|
17
|
+
row_numbers: row_numbers
|
18
|
+
)
|
19
|
+
file_adapter.copy(
|
20
|
+
table: table_name_for_copy(connection),
|
21
|
+
columns: columns_for_copy(connection),
|
22
|
+
data: data,
|
23
|
+
row_numbers: row_numbers
|
24
|
+
)
|
14
25
|
end
|
15
26
|
|
16
27
|
def table_name
|
@@ -23,29 +34,32 @@ module ActiveRecordDataLoader
|
|
23
34
|
|
24
35
|
private
|
25
36
|
|
26
|
-
attr_reader :data_generator
|
37
|
+
attr_reader :data_generator, :file_adapter
|
27
38
|
|
28
|
-
def
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
end
|
33
|
-
|
34
|
-
def copy_command(connection)
|
35
|
-
@copy_command ||= begin
|
36
|
-
quoted_table_name = connection.quote_table_name(data_generator.table)
|
37
|
-
columns = data_generator
|
38
|
-
.column_list
|
39
|
-
.map { |c| connection.quote_column_name(c) }
|
40
|
-
.join(", ")
|
41
|
-
|
42
|
-
<<~SQL
|
43
|
-
COPY #{quoted_table_name} (#{columns})
|
44
|
-
FROM STDIN WITH (FORMAT CSV)
|
45
|
-
SQL
|
39
|
+
def copy(connection:, table:, columns:, data:, row_numbers:)
|
40
|
+
raw_connection = connection.raw_connection
|
41
|
+
raw_connection.copy_data("COPY #{table} (#{columns}) FROM STDIN WITH (FORMAT CSV)") do
|
42
|
+
raw_connection.put_copy_data(data.join("\n"))
|
46
43
|
end
|
47
44
|
end
|
48
45
|
|
46
|
+
def csv_rows(row_numbers, connection)
|
47
|
+
row_numbers.map do |i|
|
48
|
+
data_generator.generate_row(i)&.map { |d| quote_data(d, connection) }&.join(",")
|
49
|
+
end.compact
|
50
|
+
end
|
51
|
+
|
52
|
+
def table_name_for_copy(connection)
|
53
|
+
@table_name_for_copy ||= connection.quote_table_name(data_generator.table)
|
54
|
+
end
|
55
|
+
|
56
|
+
def columns_for_copy(connection)
|
57
|
+
@columns_for_copy ||= data_generator
|
58
|
+
.column_list
|
59
|
+
.map { |c| connection.quote_column_name(c) }
|
60
|
+
.join(", ")
|
61
|
+
end
|
62
|
+
|
49
63
|
def quote_data(data, connection)
|
50
64
|
return if data.nil?
|
51
65
|
|
@@ -13,16 +13,24 @@ module ActiveRecordDataLoader
|
|
13
13
|
|
14
14
|
def adapter
|
15
15
|
@adapter ||=
|
16
|
-
if
|
17
|
-
require "ffaker"
|
16
|
+
if can_use?("ffaker", "2.1.0")
|
18
17
|
FFakerGemAdapter.new
|
19
|
-
elsif
|
20
|
-
require "faker"
|
18
|
+
elsif can_use?("faker", "1.9.3")
|
21
19
|
FakerGemAdapter.new
|
22
20
|
else
|
23
21
|
NoGemAdapter.new
|
24
22
|
end
|
25
23
|
end
|
24
|
+
|
25
|
+
def can_use?(gem, min_version)
|
26
|
+
gemspec = Gem.loaded_specs[gem]
|
27
|
+
return false unless gemspec.present? && gemspec.version >= Gem::Version.new(min_version)
|
28
|
+
|
29
|
+
require gem
|
30
|
+
true
|
31
|
+
rescue LoadError
|
32
|
+
false
|
33
|
+
end
|
26
34
|
end
|
27
35
|
|
28
36
|
class FFakerGemAdapter
|
@@ -3,13 +3,16 @@
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
module Dsl
|
5
5
|
class Model
|
6
|
-
attr_reader :klass, :columns, :row_count, :polymorphic_associations, :belongs_to_associations
|
6
|
+
attr_reader :klass, :columns, :row_count, :polymorphic_associations, :belongs_to_associations,
|
7
|
+
:raise_on_duplicates_flag
|
7
8
|
|
8
9
|
def initialize(klass:, configuration:)
|
9
10
|
@klass = klass
|
10
11
|
@columns = {}
|
11
12
|
@row_count = configuration.default_row_count
|
12
13
|
@batch_size = configuration.default_batch_size
|
14
|
+
@raise_on_duplicates_flag = configuration.raise_on_duplicates
|
15
|
+
@max_duplicate_retries = configuration.max_duplicate_retries
|
13
16
|
@polymorphic_associations = []
|
14
17
|
@belongs_to_associations = []
|
15
18
|
end
|
@@ -22,6 +25,20 @@ module ActiveRecordDataLoader
|
|
22
25
|
@batch_size = (size || @batch_size)
|
23
26
|
end
|
24
27
|
|
28
|
+
def raise_on_duplicates
|
29
|
+
@raise_on_duplicates_flag = true
|
30
|
+
end
|
31
|
+
|
32
|
+
def do_not_raise_on_duplicates
|
33
|
+
@raise_on_duplicates_flag = false
|
34
|
+
end
|
35
|
+
|
36
|
+
def max_duplicate_retries(retries = nil)
|
37
|
+
return @max_duplicate_retries if retries.nil?
|
38
|
+
|
39
|
+
@max_duplicate_retries = retries
|
40
|
+
end
|
41
|
+
|
25
42
|
def column(name, func)
|
26
43
|
@columns[name.to_sym] = func
|
27
44
|
end
|
@@ -32,7 +49,7 @@ module ActiveRecordDataLoader
|
|
32
49
|
).tap { |a| block.call(a) }
|
33
50
|
end
|
34
51
|
|
35
|
-
def belongs_to(assoc_name, eligible_set:)
|
52
|
+
def belongs_to(assoc_name, eligible_set: nil)
|
36
53
|
@belongs_to_associations << BelongsToAssociation.new(@klass, assoc_name, eligible_set)
|
37
54
|
end
|
38
55
|
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
class FileOutputAdapter
|
5
|
+
def self.with_output_options(options)
|
6
|
+
adapter = new(options)
|
7
|
+
pre_command = options[:pre_command]
|
8
|
+
adapter.write_command(pre_command) if pre_command
|
9
|
+
yield adapter
|
10
|
+
post_command = options[:post_command]
|
11
|
+
adapter.write_command(post_command) if post_command
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(options)
|
15
|
+
@filename = options.fetch(:filename, "active_record_data_loader_script.sql")
|
16
|
+
@file_basename = File.basename(@filename, File.extname(@filename))
|
17
|
+
@path = File.expand_path(File.dirname(@filename))
|
18
|
+
File.open(@filename, File::TRUNC) if File.exist?(@filename)
|
19
|
+
end
|
20
|
+
|
21
|
+
def copy(table:, columns:, data:, row_numbers:)
|
22
|
+
data_filename = data_filename(table, row_numbers)
|
23
|
+
File.open(data_filename, "w") { |f| f.puts(data) }
|
24
|
+
File.open(filename, "a") do |file|
|
25
|
+
file.puts("\\COPY #{table} (#{columns}) FROM '#{data_filename}' WITH (FORMAT CSV);")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def insert(command)
|
30
|
+
write_command(command)
|
31
|
+
end
|
32
|
+
|
33
|
+
def write_command(command)
|
34
|
+
File.open(filename, "a") { |f| f.puts("#{command.gsub("\n", ' ')};") }
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
attr_reader :filename, :path, :file_basename
|
40
|
+
|
41
|
+
def data_filename(table, row_numbers)
|
42
|
+
File.join(
|
43
|
+
path,
|
44
|
+
"#{file_basename}_#{table.gsub(/"/, '')}_rows_#{row_numbers[0]}_to_#{row_numbers[-1]}.csv"
|
45
|
+
)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -1,98 +1,82 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "benchmark"
|
4
|
-
|
5
3
|
module ActiveRecordDataLoader
|
6
4
|
class Loader
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
total_rows:,
|
11
|
-
batch_size:,
|
12
|
-
configuration:
|
13
|
-
)
|
14
|
-
new(
|
15
|
-
logger: configuration.logger,
|
16
|
-
statement_timeout: configuration.statement_timeout,
|
17
|
-
strategy: strategy_class(configuration.connection_factory).new(data_generator),
|
18
|
-
connection_factory: configuration.connection_factory
|
19
|
-
).load_data(batch_size, total_rows)
|
20
|
-
end
|
21
|
-
|
22
|
-
private
|
23
|
-
|
24
|
-
def strategy_class(connection_factory)
|
25
|
-
if connection_factory.call.raw_connection.respond_to?(:copy_data)
|
26
|
-
ActiveRecordDataLoader::CopyStrategy
|
27
|
-
else
|
28
|
-
ActiveRecordDataLoader::BulkInsertStrategy
|
29
|
-
end
|
30
|
-
end
|
5
|
+
def initialize(configuration, definition)
|
6
|
+
@configuration = configuration
|
7
|
+
@definition = definition
|
31
8
|
end
|
32
9
|
|
33
|
-
def
|
34
|
-
|
35
|
-
@strategy = strategy
|
36
|
-
@statement_timeout = statement_timeout
|
37
|
-
@connection_factory = connection_factory
|
38
|
-
end
|
10
|
+
def load_data
|
11
|
+
ActiveRecordDataLoader::ActiveRecord::PerRowValueCache.clear
|
39
12
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
logger.info(
|
44
|
-
"[ActiveRecordDataLoader] "\
|
45
|
-
"Loading #{total_rows} row(s) into '#{strategy.table_name}' via #{strategy.name}. "\
|
46
|
-
"#{batch_size} row(s) per batch, #{batch_count} batch(es)."
|
47
|
-
)
|
48
|
-
total_time = Benchmark.realtime do
|
49
|
-
load_in_batches(batch_size, total_rows, batch_count)
|
13
|
+
file_adapter_class.with_output_options(file_adapter_options) do |file_adapter|
|
14
|
+
definition.models.map { |m| load_model(m, file_adapter) }
|
50
15
|
end
|
51
|
-
logger.info(
|
52
|
-
"[ActiveRecordDataLoader] "\
|
53
|
-
"Completed loading #{total_rows} row(s) into '#{strategy.table_name}' "\
|
54
|
-
"in #{total_time} seconds."
|
55
|
-
)
|
56
16
|
end
|
57
17
|
|
58
18
|
private
|
59
19
|
|
60
|
-
attr_reader :
|
20
|
+
attr_reader :definition, :configuration
|
61
21
|
|
62
|
-
def
|
63
|
-
|
64
|
-
|
65
|
-
|
22
|
+
def load_model(model, file_adapter)
|
23
|
+
ActiveRecordDataLoader::TableLoader.load_data(
|
24
|
+
batch_size: model.batch_size,
|
25
|
+
total_rows: model.row_count,
|
26
|
+
connection_handler: connection_handler,
|
27
|
+
strategy: strategy_class.new(generator(model), file_adapter),
|
28
|
+
logger: configuration.logger
|
29
|
+
)
|
30
|
+
end
|
66
31
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
32
|
+
def generator(model)
|
33
|
+
ActiveRecordDataLoader::ActiveRecord::ModelDataGenerator.new(
|
34
|
+
model: model.klass,
|
35
|
+
column_settings: model.columns,
|
36
|
+
polymorphic_settings: model.polymorphic_associations,
|
37
|
+
belongs_to_settings: model.belongs_to_associations,
|
38
|
+
connection_factory: configuration.connection_factory,
|
39
|
+
raise_on_duplicates: model.raise_on_duplicates_flag,
|
40
|
+
max_duplicate_retries: model.max_duplicate_retries,
|
41
|
+
logger: configuration.logger
|
42
|
+
)
|
73
43
|
end
|
74
44
|
|
75
|
-
def
|
76
|
-
if
|
77
|
-
|
78
|
-
update_statement_timeout(statement_timeout)
|
79
|
-
yield connection
|
80
|
-
update_statement_timeout(original_timeout)
|
45
|
+
def file_adapter_class
|
46
|
+
if configuration.output.present?
|
47
|
+
ActiveRecordDataLoader::FileOutputAdapter
|
81
48
|
else
|
82
|
-
|
49
|
+
ActiveRecordDataLoader::NullOutputAdapter
|
83
50
|
end
|
84
51
|
end
|
85
52
|
|
86
|
-
def
|
87
|
-
|
53
|
+
def file_adapter_options
|
54
|
+
timeout_commands =
|
55
|
+
if connection_handler.supports_timeout?
|
56
|
+
{
|
57
|
+
pre_command: connection_handler.timeout_set_command,
|
58
|
+
post_command: connection_handler.reset_timeout_command,
|
59
|
+
}
|
60
|
+
else
|
61
|
+
{}
|
62
|
+
end
|
63
|
+
|
64
|
+
timeout_commands.merge(filename: configuration.output)
|
88
65
|
end
|
89
66
|
|
90
|
-
def
|
91
|
-
|
67
|
+
def strategy_class
|
68
|
+
@strategy_class ||= if connection_handler.supports_copy?
|
69
|
+
ActiveRecordDataLoader::CopyStrategy
|
70
|
+
else
|
71
|
+
ActiveRecordDataLoader::BulkInsertStrategy
|
72
|
+
end
|
92
73
|
end
|
93
74
|
|
94
|
-
def
|
95
|
-
|
75
|
+
def connection_handler
|
76
|
+
@connection_handler ||= ActiveRecordDataLoader::ConnectionHandler.new(
|
77
|
+
connection_factory: configuration.connection_factory,
|
78
|
+
statement_timeout: configuration.statement_timeout
|
79
|
+
)
|
96
80
|
end
|
97
81
|
end
|
98
82
|
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
class NullOutputAdapter
|
5
|
+
def self.with_output_options(_options)
|
6
|
+
yield new
|
7
|
+
end
|
8
|
+
|
9
|
+
def copy(table:, columns:, data:, row_numbers:); end
|
10
|
+
|
11
|
+
def insert(command); end
|
12
|
+
|
13
|
+
def write_command(command); end
|
14
|
+
end
|
15
|
+
end
|