active_record_data_loader 1.0.2 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/workflows/build.yml +51 -0
- data/.github/workflows/codeql-analysis.yml +70 -0
- data/.github/workflows/gem-push.yml +29 -0
- data/.rubocop.yml +46 -7
- data/CHANGELOG.md +38 -2
- data/CODE_OF_CONDUCT.md +2 -2
- data/Gemfile.lock +71 -73
- data/README.md +162 -9
- data/Rakefile +8 -2
- data/active_record_data_loader.gemspec +7 -6
- data/config/database.yml +2 -0
- data/docker-compose.yml +18 -0
- data/gemfiles/activerecord_6.gemfile +1 -1
- data/lib/active_record_data_loader/active_record/{belongs_to_configuration.rb → belongs_to_data_provider.rb} +8 -7
- data/lib/active_record_data_loader/active_record/{column_configuration.rb → column_data_provider.rb} +2 -2
- data/lib/active_record_data_loader/active_record/enum_value_generator.rb +9 -8
- data/lib/active_record_data_loader/active_record/integer_value_generator.rb +1 -1
- data/lib/active_record_data_loader/active_record/list.rb +47 -0
- data/lib/active_record_data_loader/active_record/model_data_generator.rb +62 -7
- data/lib/active_record_data_loader/active_record/{polymorphic_belongs_to_configuration.rb → polymorphic_belongs_to_data_provider.rb} +12 -7
- data/lib/active_record_data_loader/active_record/unique_index_tracker.rb +67 -0
- data/lib/active_record_data_loader/bulk_insert_strategy.rb +16 -8
- data/lib/active_record_data_loader/configuration.rb +26 -3
- data/lib/active_record_data_loader/connection_handler.rb +52 -0
- data/lib/active_record_data_loader/copy_strategy.rb +38 -24
- data/lib/active_record_data_loader/data_faker.rb +12 -4
- data/lib/active_record_data_loader/dsl/model.rb +19 -2
- data/lib/active_record_data_loader/errors.rb +5 -0
- data/lib/active_record_data_loader/file_output_adapter.rb +48 -0
- data/lib/active_record_data_loader/loader.rb +55 -71
- data/lib/active_record_data_loader/null_output_adapter.rb +15 -0
- data/lib/active_record_data_loader/table_loader.rb +59 -0
- data/lib/active_record_data_loader/version.rb +1 -1
- data/lib/active_record_data_loader.rb +11 -38
- metadata +51 -29
- data/.travis.yml +0 -24
- data/config/database.yml.travis +0 -12
@@ -2,20 +2,21 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
module ActiveRecord
|
5
|
-
class
|
6
|
-
def self.
|
5
|
+
class PolymorphicBelongsToDataProvider
|
6
|
+
def self.provider_for(polymorphic_settings:, strategy: :random)
|
7
7
|
ar_association = polymorphic_settings.model_class.reflect_on_association(
|
8
8
|
polymorphic_settings.name
|
9
9
|
)
|
10
10
|
raise "#{name} only supports polymorphic associations" unless ar_association.polymorphic?
|
11
11
|
|
12
|
-
new(polymorphic_settings, ar_association).polymorphic_config
|
12
|
+
new(polymorphic_settings, ar_association, strategy).polymorphic_config
|
13
13
|
end
|
14
14
|
|
15
|
-
def initialize(settings, ar_association)
|
15
|
+
def initialize(settings, ar_association, strategy)
|
16
16
|
@settings = settings
|
17
17
|
@ar_association = ar_association
|
18
18
|
@model_count = settings.weighted_models.size
|
19
|
+
@strategy = strategy
|
19
20
|
end
|
20
21
|
|
21
22
|
def polymorphic_config
|
@@ -32,21 +33,25 @@ module ActiveRecordDataLoader
|
|
32
33
|
end
|
33
34
|
|
34
35
|
def foreign_key(row_number)
|
35
|
-
possible_values[row_number % @model_count][1].
|
36
|
+
possible_values[row_number % @model_count][1].next
|
36
37
|
end
|
37
38
|
|
38
39
|
def possible_values
|
39
40
|
@possible_values ||= begin
|
40
41
|
values = @settings.models.keys.map do |klass|
|
41
|
-
[klass.name,
|
42
|
+
[klass.name, values_query(klass)]
|
42
43
|
end.to_h
|
43
44
|
|
44
45
|
@settings.weighted_models.map { |klass| [klass.name, values[klass.name]] }
|
45
46
|
end
|
46
47
|
end
|
47
48
|
|
49
|
+
def values_query(klass)
|
50
|
+
List.for(base_query(klass).pluck(klass.primary_key), strategy: @strategy)
|
51
|
+
end
|
52
|
+
|
48
53
|
def base_query(klass)
|
49
|
-
if @settings.queries[klass]
|
54
|
+
if @settings.queries[klass].respond_to?(:call)
|
50
55
|
@settings.queries[klass].call.all
|
51
56
|
else
|
52
57
|
klass.all
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
module ActiveRecord
|
5
|
+
class UniqueIndexTracker
|
6
|
+
Index = Struct.new(:name, :columns, :column_indexes, keyword_init: true)
|
7
|
+
|
8
|
+
def initialize(model:, connection_factory:)
|
9
|
+
@model = model
|
10
|
+
@table = model.table_name
|
11
|
+
@unique_indexes = []
|
12
|
+
@unique_values_used = {}
|
13
|
+
find_unique_indexes(connection_factory)
|
14
|
+
end
|
15
|
+
|
16
|
+
def map_indexed_columns(column_list)
|
17
|
+
@unique_indexes = @raw_unique_indexes.map do |index|
|
18
|
+
@unique_values_used[index.name] = Set.new
|
19
|
+
columns = index.columns.map(&:to_sym)
|
20
|
+
Index.new(
|
21
|
+
name: index.name,
|
22
|
+
columns: columns,
|
23
|
+
column_indexes: columns.map { |c| column_list.find_index(c) }
|
24
|
+
)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def repeating_unique_values?(row)
|
29
|
+
@unique_indexes.map do |index|
|
30
|
+
values = index.column_indexes.map { |i| row[i] }
|
31
|
+
@unique_values_used.fetch(index.name).include?(values)
|
32
|
+
end.any?
|
33
|
+
end
|
34
|
+
|
35
|
+
def capture_unique_values(row)
|
36
|
+
return unless row.present?
|
37
|
+
|
38
|
+
@unique_indexes.each do |index|
|
39
|
+
values = index.column_indexes.map { |i| row[i] }
|
40
|
+
@unique_values_used.fetch(index.name) << values
|
41
|
+
end
|
42
|
+
row
|
43
|
+
end
|
44
|
+
|
45
|
+
def contained_in_index?(ar_column)
|
46
|
+
target_column = if @model.reflect_on_association(ar_column.name)&.belongs_to?
|
47
|
+
ar_column.join_foreign_key.to_sym
|
48
|
+
else
|
49
|
+
ar_column.name.to_sym
|
50
|
+
end
|
51
|
+
|
52
|
+
@raw_unique_indexes.flat_map { |i| i.columns.map(&:to_sym) }.include?(target_column)
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
attr_reader :table
|
58
|
+
|
59
|
+
def find_unique_indexes(connection_factory)
|
60
|
+
connection = connection_factory.call
|
61
|
+
@raw_unique_indexes = connection.indexes(table).select(&:unique)
|
62
|
+
ensure
|
63
|
+
connection&.close
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -2,15 +2,18 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class BulkInsertStrategy
|
5
|
-
def initialize(data_generator)
|
5
|
+
def initialize(data_generator, file_adapter)
|
6
6
|
@data_generator = data_generator
|
7
|
+
@file_adapter = file_adapter
|
7
8
|
end
|
8
9
|
|
9
10
|
def load_batch(row_numbers, connection)
|
10
|
-
|
11
|
+
command = <<~SQL
|
11
12
|
INSERT INTO #{quoted_table_name(connection)} (#{column_list(connection)})
|
12
13
|
VALUES #{values(row_numbers, connection)}
|
13
14
|
SQL
|
15
|
+
insert(connection: connection, command: command)
|
16
|
+
file_adapter.insert(command)
|
14
17
|
end
|
15
18
|
|
16
19
|
def table_name
|
@@ -23,7 +26,11 @@ module ActiveRecordDataLoader
|
|
23
26
|
|
24
27
|
private
|
25
28
|
|
26
|
-
attr_reader :data_generator
|
29
|
+
attr_reader :data_generator, :file_adapter
|
30
|
+
|
31
|
+
def insert(connection:, command:)
|
32
|
+
connection.insert(command)
|
33
|
+
end
|
27
34
|
|
28
35
|
def quoted_table_name(connection)
|
29
36
|
@quoted_table_name ||= connection.quote_table_name(data_generator.table)
|
@@ -38,15 +45,16 @@ module ActiveRecordDataLoader
|
|
38
45
|
|
39
46
|
def values(row_numbers, connection)
|
40
47
|
row_numbers
|
41
|
-
.map { |i|
|
48
|
+
.map { |i| row_values(i, connection) }
|
49
|
+
.compact
|
42
50
|
.join(",")
|
43
51
|
end
|
44
52
|
|
45
53
|
def row_values(row_number, connection)
|
46
|
-
data_generator
|
47
|
-
|
48
|
-
|
49
|
-
|
54
|
+
row = data_generator.generate_row(row_number)
|
55
|
+
return unless row.present?
|
56
|
+
|
57
|
+
"(#{row.map { |v| connection.quote(v) }.join(',')})"
|
50
58
|
end
|
51
59
|
end
|
52
60
|
end
|
@@ -2,29 +2,52 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class Configuration
|
5
|
-
attr_accessor :
|
5
|
+
attr_accessor :connection_factory, :default_batch_size, :default_row_count,
|
6
|
+
:logger, :max_duplicate_retries, :raise_on_duplicates, :statement_timeout
|
7
|
+
attr_reader :output
|
6
8
|
|
7
9
|
def initialize(
|
8
10
|
default_batch_size: 100_000,
|
9
11
|
default_row_count: 1,
|
10
12
|
logger: nil,
|
11
13
|
statement_timeout: "2min",
|
12
|
-
connection_factory: -> { ::ActiveRecord::Base.connection }
|
14
|
+
connection_factory: -> { ::ActiveRecord::Base.connection },
|
15
|
+
raise_on_duplicates: false,
|
16
|
+
max_duplicate_retries: 5,
|
17
|
+
output: nil
|
13
18
|
)
|
14
19
|
@default_batch_size = default_batch_size
|
15
20
|
@default_row_count = default_row_count
|
16
21
|
@logger = logger || default_logger
|
17
22
|
@statement_timeout = statement_timeout
|
18
23
|
@connection_factory = connection_factory
|
24
|
+
@raise_on_duplicates = raise_on_duplicates
|
25
|
+
@max_duplicate_retries = max_duplicate_retries
|
26
|
+
self.output = output
|
27
|
+
end
|
28
|
+
|
29
|
+
def output=(output)
|
30
|
+
@output = validate_output(output)
|
19
31
|
end
|
20
32
|
|
21
33
|
private
|
22
34
|
|
35
|
+
def validate_output(output)
|
36
|
+
if output.to_s.blank?
|
37
|
+
nil
|
38
|
+
elsif output.is_a?(String)
|
39
|
+
output
|
40
|
+
else
|
41
|
+
raise "The output configuration parameter must be a filename meant to be the "\
|
42
|
+
"target for the SQL script"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
23
46
|
def default_logger
|
24
47
|
if defined?(Rails) && Rails.respond_to?(:logger)
|
25
48
|
Rails.logger
|
26
49
|
else
|
27
|
-
Logger.new(
|
50
|
+
Logger.new($stdout, level: :info)
|
28
51
|
end
|
29
52
|
end
|
30
53
|
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
class ConnectionHandler
|
5
|
+
def initialize(connection_factory:, statement_timeout:)
|
6
|
+
@connection_factory = connection_factory
|
7
|
+
@statement_timeout = statement_timeout
|
8
|
+
cache_facts
|
9
|
+
end
|
10
|
+
|
11
|
+
def with_connection
|
12
|
+
connection = connection_factory.call
|
13
|
+
if supports_timeout?
|
14
|
+
connection.execute(timeout_set_command)
|
15
|
+
yield connection
|
16
|
+
connection.execute(reset_timeout_command)
|
17
|
+
else
|
18
|
+
yield connection
|
19
|
+
end
|
20
|
+
ensure
|
21
|
+
connection&.close
|
22
|
+
end
|
23
|
+
|
24
|
+
def supports_timeout?
|
25
|
+
@supports_timeout
|
26
|
+
end
|
27
|
+
|
28
|
+
def supports_copy?
|
29
|
+
@supports_copy
|
30
|
+
end
|
31
|
+
|
32
|
+
def timeout_set_command
|
33
|
+
"SET statement_timeout = \"#{statement_timeout}\""
|
34
|
+
end
|
35
|
+
|
36
|
+
def reset_timeout_command
|
37
|
+
"RESET statement_timeout"
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
attr_reader :connection_factory, :statement_timeout
|
43
|
+
|
44
|
+
def cache_facts
|
45
|
+
connection = connection_factory.call
|
46
|
+
@supports_timeout = connection.adapter_name.downcase.to_sym == :postgresql
|
47
|
+
@supports_copy = connection.raw_connection.respond_to?(:copy_data)
|
48
|
+
ensure
|
49
|
+
connection&.close
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -2,15 +2,26 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class CopyStrategy
|
5
|
-
def initialize(data_generator)
|
5
|
+
def initialize(data_generator, file_adapter)
|
6
6
|
@data_generator = data_generator
|
7
|
+
@file_adapter = file_adapter
|
7
8
|
end
|
8
9
|
|
9
10
|
def load_batch(row_numbers, connection)
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
11
|
+
data = csv_rows(row_numbers, connection)
|
12
|
+
copy(
|
13
|
+
connection: connection,
|
14
|
+
table: table_name_for_copy(connection),
|
15
|
+
columns: columns_for_copy(connection),
|
16
|
+
data: data,
|
17
|
+
row_numbers: row_numbers
|
18
|
+
)
|
19
|
+
file_adapter.copy(
|
20
|
+
table: table_name_for_copy(connection),
|
21
|
+
columns: columns_for_copy(connection),
|
22
|
+
data: data,
|
23
|
+
row_numbers: row_numbers
|
24
|
+
)
|
14
25
|
end
|
15
26
|
|
16
27
|
def table_name
|
@@ -23,29 +34,32 @@ module ActiveRecordDataLoader
|
|
23
34
|
|
24
35
|
private
|
25
36
|
|
26
|
-
attr_reader :data_generator
|
37
|
+
attr_reader :data_generator, :file_adapter
|
27
38
|
|
28
|
-
def
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
end
|
33
|
-
|
34
|
-
def copy_command(connection)
|
35
|
-
@copy_command ||= begin
|
36
|
-
quoted_table_name = connection.quote_table_name(data_generator.table)
|
37
|
-
columns = data_generator
|
38
|
-
.column_list
|
39
|
-
.map { |c| connection.quote_column_name(c) }
|
40
|
-
.join(", ")
|
41
|
-
|
42
|
-
<<~SQL
|
43
|
-
COPY #{quoted_table_name} (#{columns})
|
44
|
-
FROM STDIN WITH (FORMAT CSV)
|
45
|
-
SQL
|
39
|
+
def copy(connection:, table:, columns:, data:, row_numbers:)
|
40
|
+
raw_connection = connection.raw_connection
|
41
|
+
raw_connection.copy_data("COPY #{table} (#{columns}) FROM STDIN WITH (FORMAT CSV)") do
|
42
|
+
raw_connection.put_copy_data(data.join("\n"))
|
46
43
|
end
|
47
44
|
end
|
48
45
|
|
46
|
+
def csv_rows(row_numbers, connection)
|
47
|
+
row_numbers.map do |i|
|
48
|
+
data_generator.generate_row(i)&.map { |d| quote_data(d, connection) }&.join(",")
|
49
|
+
end.compact
|
50
|
+
end
|
51
|
+
|
52
|
+
def table_name_for_copy(connection)
|
53
|
+
@table_name_for_copy ||= connection.quote_table_name(data_generator.table)
|
54
|
+
end
|
55
|
+
|
56
|
+
def columns_for_copy(connection)
|
57
|
+
@columns_for_copy ||= data_generator
|
58
|
+
.column_list
|
59
|
+
.map { |c| connection.quote_column_name(c) }
|
60
|
+
.join(", ")
|
61
|
+
end
|
62
|
+
|
49
63
|
def quote_data(data, connection)
|
50
64
|
return if data.nil?
|
51
65
|
|
@@ -13,16 +13,24 @@ module ActiveRecordDataLoader
|
|
13
13
|
|
14
14
|
def adapter
|
15
15
|
@adapter ||=
|
16
|
-
if
|
17
|
-
require "ffaker"
|
16
|
+
if can_use?("ffaker", "2.1.0")
|
18
17
|
FFakerGemAdapter.new
|
19
|
-
elsif
|
20
|
-
require "faker"
|
18
|
+
elsif can_use?("faker", "1.9.3")
|
21
19
|
FakerGemAdapter.new
|
22
20
|
else
|
23
21
|
NoGemAdapter.new
|
24
22
|
end
|
25
23
|
end
|
24
|
+
|
25
|
+
def can_use?(gem, min_version)
|
26
|
+
gemspec = Gem.loaded_specs[gem]
|
27
|
+
return false unless gemspec.present? && gemspec.version >= Gem::Version.new(min_version)
|
28
|
+
|
29
|
+
require gem
|
30
|
+
true
|
31
|
+
rescue LoadError
|
32
|
+
false
|
33
|
+
end
|
26
34
|
end
|
27
35
|
|
28
36
|
class FFakerGemAdapter
|
@@ -3,13 +3,16 @@
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
module Dsl
|
5
5
|
class Model
|
6
|
-
attr_reader :klass, :columns, :row_count, :polymorphic_associations, :belongs_to_associations
|
6
|
+
attr_reader :klass, :columns, :row_count, :polymorphic_associations, :belongs_to_associations,
|
7
|
+
:raise_on_duplicates_flag
|
7
8
|
|
8
9
|
def initialize(klass:, configuration:)
|
9
10
|
@klass = klass
|
10
11
|
@columns = {}
|
11
12
|
@row_count = configuration.default_row_count
|
12
13
|
@batch_size = configuration.default_batch_size
|
14
|
+
@raise_on_duplicates_flag = configuration.raise_on_duplicates
|
15
|
+
@max_duplicate_retries = configuration.max_duplicate_retries
|
13
16
|
@polymorphic_associations = []
|
14
17
|
@belongs_to_associations = []
|
15
18
|
end
|
@@ -22,6 +25,20 @@ module ActiveRecordDataLoader
|
|
22
25
|
@batch_size = (size || @batch_size)
|
23
26
|
end
|
24
27
|
|
28
|
+
def raise_on_duplicates
|
29
|
+
@raise_on_duplicates_flag = true
|
30
|
+
end
|
31
|
+
|
32
|
+
def do_not_raise_on_duplicates
|
33
|
+
@raise_on_duplicates_flag = false
|
34
|
+
end
|
35
|
+
|
36
|
+
def max_duplicate_retries(retries = nil)
|
37
|
+
return @max_duplicate_retries if retries.nil?
|
38
|
+
|
39
|
+
@max_duplicate_retries = retries
|
40
|
+
end
|
41
|
+
|
25
42
|
def column(name, func)
|
26
43
|
@columns[name.to_sym] = func
|
27
44
|
end
|
@@ -32,7 +49,7 @@ module ActiveRecordDataLoader
|
|
32
49
|
).tap { |a| block.call(a) }
|
33
50
|
end
|
34
51
|
|
35
|
-
def belongs_to(assoc_name, eligible_set:)
|
52
|
+
def belongs_to(assoc_name, eligible_set: nil)
|
36
53
|
@belongs_to_associations << BelongsToAssociation.new(@klass, assoc_name, eligible_set)
|
37
54
|
end
|
38
55
|
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
class FileOutputAdapter
|
5
|
+
def self.with_output_options(options)
|
6
|
+
adapter = new(options)
|
7
|
+
pre_command = options[:pre_command]
|
8
|
+
adapter.write_command(pre_command) if pre_command
|
9
|
+
yield adapter
|
10
|
+
post_command = options[:post_command]
|
11
|
+
adapter.write_command(post_command) if post_command
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(options)
|
15
|
+
@filename = options.fetch(:filename, "active_record_data_loader_script.sql")
|
16
|
+
@file_basename = File.basename(@filename, File.extname(@filename))
|
17
|
+
@path = File.expand_path(File.dirname(@filename))
|
18
|
+
File.open(@filename, File::TRUNC) if File.exist?(@filename)
|
19
|
+
end
|
20
|
+
|
21
|
+
def copy(table:, columns:, data:, row_numbers:)
|
22
|
+
data_filename = data_filename(table, row_numbers)
|
23
|
+
File.open(data_filename, "w") { |f| f.puts(data) }
|
24
|
+
File.open(filename, "a") do |file|
|
25
|
+
file.puts("\\COPY #{table} (#{columns}) FROM '#{data_filename}' WITH (FORMAT CSV);")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def insert(command)
|
30
|
+
write_command(command)
|
31
|
+
end
|
32
|
+
|
33
|
+
def write_command(command)
|
34
|
+
File.open(filename, "a") { |f| f.puts("#{command.gsub("\n", ' ')};") }
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
attr_reader :filename, :path, :file_basename
|
40
|
+
|
41
|
+
def data_filename(table, row_numbers)
|
42
|
+
File.join(
|
43
|
+
path,
|
44
|
+
"#{file_basename}_#{table.gsub(/"/, '')}_rows_#{row_numbers[0]}_to_#{row_numbers[-1]}.csv"
|
45
|
+
)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -1,98 +1,82 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "benchmark"
|
4
|
-
|
5
3
|
module ActiveRecordDataLoader
|
6
4
|
class Loader
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
total_rows:,
|
11
|
-
batch_size:,
|
12
|
-
configuration:
|
13
|
-
)
|
14
|
-
new(
|
15
|
-
logger: configuration.logger,
|
16
|
-
statement_timeout: configuration.statement_timeout,
|
17
|
-
strategy: strategy_class(configuration.connection_factory).new(data_generator),
|
18
|
-
connection_factory: configuration.connection_factory
|
19
|
-
).load_data(batch_size, total_rows)
|
20
|
-
end
|
21
|
-
|
22
|
-
private
|
23
|
-
|
24
|
-
def strategy_class(connection_factory)
|
25
|
-
if connection_factory.call.raw_connection.respond_to?(:copy_data)
|
26
|
-
ActiveRecordDataLoader::CopyStrategy
|
27
|
-
else
|
28
|
-
ActiveRecordDataLoader::BulkInsertStrategy
|
29
|
-
end
|
30
|
-
end
|
5
|
+
def initialize(configuration, definition)
|
6
|
+
@configuration = configuration
|
7
|
+
@definition = definition
|
31
8
|
end
|
32
9
|
|
33
|
-
def
|
34
|
-
|
35
|
-
@strategy = strategy
|
36
|
-
@statement_timeout = statement_timeout
|
37
|
-
@connection_factory = connection_factory
|
38
|
-
end
|
10
|
+
def load_data
|
11
|
+
ActiveRecordDataLoader::ActiveRecord::PerRowValueCache.clear
|
39
12
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
logger.info(
|
44
|
-
"[ActiveRecordDataLoader] "\
|
45
|
-
"Loading #{total_rows} row(s) into '#{strategy.table_name}' via #{strategy.name}. "\
|
46
|
-
"#{batch_size} row(s) per batch, #{batch_count} batch(es)."
|
47
|
-
)
|
48
|
-
total_time = Benchmark.realtime do
|
49
|
-
load_in_batches(batch_size, total_rows, batch_count)
|
13
|
+
file_adapter_class.with_output_options(file_adapter_options) do |file_adapter|
|
14
|
+
definition.models.map { |m| load_model(m, file_adapter) }
|
50
15
|
end
|
51
|
-
logger.info(
|
52
|
-
"[ActiveRecordDataLoader] "\
|
53
|
-
"Completed loading #{total_rows} row(s) into '#{strategy.table_name}' "\
|
54
|
-
"in #{total_time} seconds."
|
55
|
-
)
|
56
16
|
end
|
57
17
|
|
58
18
|
private
|
59
19
|
|
60
|
-
attr_reader :
|
20
|
+
attr_reader :definition, :configuration
|
61
21
|
|
62
|
-
def
|
63
|
-
|
64
|
-
|
65
|
-
|
22
|
+
def load_model(model, file_adapter)
|
23
|
+
ActiveRecordDataLoader::TableLoader.load_data(
|
24
|
+
batch_size: model.batch_size,
|
25
|
+
total_rows: model.row_count,
|
26
|
+
connection_handler: connection_handler,
|
27
|
+
strategy: strategy_class.new(generator(model), file_adapter),
|
28
|
+
logger: configuration.logger
|
29
|
+
)
|
30
|
+
end
|
66
31
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
32
|
+
def generator(model)
|
33
|
+
ActiveRecordDataLoader::ActiveRecord::ModelDataGenerator.new(
|
34
|
+
model: model.klass,
|
35
|
+
column_settings: model.columns,
|
36
|
+
polymorphic_settings: model.polymorphic_associations,
|
37
|
+
belongs_to_settings: model.belongs_to_associations,
|
38
|
+
connection_factory: configuration.connection_factory,
|
39
|
+
raise_on_duplicates: model.raise_on_duplicates_flag,
|
40
|
+
max_duplicate_retries: model.max_duplicate_retries,
|
41
|
+
logger: configuration.logger
|
42
|
+
)
|
73
43
|
end
|
74
44
|
|
75
|
-
def
|
76
|
-
if
|
77
|
-
|
78
|
-
update_statement_timeout(statement_timeout)
|
79
|
-
yield connection
|
80
|
-
update_statement_timeout(original_timeout)
|
45
|
+
def file_adapter_class
|
46
|
+
if configuration.output.present?
|
47
|
+
ActiveRecordDataLoader::FileOutputAdapter
|
81
48
|
else
|
82
|
-
|
49
|
+
ActiveRecordDataLoader::NullOutputAdapter
|
83
50
|
end
|
84
51
|
end
|
85
52
|
|
86
|
-
def
|
87
|
-
|
53
|
+
def file_adapter_options
|
54
|
+
timeout_commands =
|
55
|
+
if connection_handler.supports_timeout?
|
56
|
+
{
|
57
|
+
pre_command: connection_handler.timeout_set_command,
|
58
|
+
post_command: connection_handler.reset_timeout_command,
|
59
|
+
}
|
60
|
+
else
|
61
|
+
{}
|
62
|
+
end
|
63
|
+
|
64
|
+
timeout_commands.merge(filename: configuration.output)
|
88
65
|
end
|
89
66
|
|
90
|
-
def
|
91
|
-
|
67
|
+
def strategy_class
|
68
|
+
@strategy_class ||= if connection_handler.supports_copy?
|
69
|
+
ActiveRecordDataLoader::CopyStrategy
|
70
|
+
else
|
71
|
+
ActiveRecordDataLoader::BulkInsertStrategy
|
72
|
+
end
|
92
73
|
end
|
93
74
|
|
94
|
-
def
|
95
|
-
|
75
|
+
def connection_handler
|
76
|
+
@connection_handler ||= ActiveRecordDataLoader::ConnectionHandler.new(
|
77
|
+
connection_factory: configuration.connection_factory,
|
78
|
+
statement_timeout: configuration.statement_timeout
|
79
|
+
)
|
96
80
|
end
|
97
81
|
end
|
98
82
|
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
class NullOutputAdapter
|
5
|
+
def self.with_output_options(_options)
|
6
|
+
yield new
|
7
|
+
end
|
8
|
+
|
9
|
+
def copy(table:, columns:, data:, row_numbers:); end
|
10
|
+
|
11
|
+
def insert(command); end
|
12
|
+
|
13
|
+
def write_command(command); end
|
14
|
+
end
|
15
|
+
end
|