active_record_data_loader 1.2.0 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/codeql-analysis.yml +70 -0
- data/.rubocop.yml +8 -2
- data/CHANGELOG.md +9 -0
- data/CODE_OF_CONDUCT.md +2 -2
- data/Gemfile.lock +24 -24
- data/README.md +88 -18
- data/active_record_data_loader.gemspec +1 -1
- data/lib/active_record_data_loader/active_record/{belongs_to_configuration.rb → belongs_to_data_provider.rb} +7 -6
- data/lib/active_record_data_loader/active_record/{column_configuration.rb → column_data_provider.rb} +2 -2
- data/lib/active_record_data_loader/active_record/list.rb +35 -0
- data/lib/active_record_data_loader/active_record/model_data_generator.rb +60 -5
- data/lib/active_record_data_loader/active_record/{polymorphic_belongs_to_configuration.rb → polymorphic_belongs_to_data_provider.rb} +11 -6
- data/lib/active_record_data_loader/active_record/unique_index_tracker.rb +67 -0
- data/lib/active_record_data_loader/bulk_insert_strategy.rb +16 -9
- data/lib/active_record_data_loader/configuration.rb +13 -30
- data/lib/active_record_data_loader/connection_handler.rb +23 -45
- data/lib/active_record_data_loader/copy_strategy.rb +21 -7
- data/lib/active_record_data_loader/data_faker.rb +12 -4
- data/lib/active_record_data_loader/dsl/model.rb +19 -2
- data/lib/active_record_data_loader/errors.rb +5 -0
- data/lib/active_record_data_loader/file_output_adapter.rb +20 -12
- data/lib/active_record_data_loader/loader.rb +61 -55
- data/lib/active_record_data_loader/null_output_adapter.rb +15 -0
- data/lib/active_record_data_loader/table_loader.rb +59 -0
- data/lib/active_record_data_loader/version.rb +1 -1
- data/lib/active_record_data_loader.rb +9 -41
- metadata +12 -7
- data/lib/active_record_data_loader/connection_output_adapter.rb +0 -20
@@ -2,20 +2,21 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
module ActiveRecord
|
5
|
-
class
|
6
|
-
def self.
|
5
|
+
class PolymorphicBelongsToDataProvider
|
6
|
+
def self.provider_for(polymorphic_settings:, strategy: :random)
|
7
7
|
ar_association = polymorphic_settings.model_class.reflect_on_association(
|
8
8
|
polymorphic_settings.name
|
9
9
|
)
|
10
10
|
raise "#{name} only supports polymorphic associations" unless ar_association.polymorphic?
|
11
11
|
|
12
|
-
new(polymorphic_settings, ar_association).polymorphic_config
|
12
|
+
new(polymorphic_settings, ar_association, strategy).polymorphic_config
|
13
13
|
end
|
14
14
|
|
15
|
-
def initialize(settings, ar_association)
|
15
|
+
def initialize(settings, ar_association, strategy)
|
16
16
|
@settings = settings
|
17
17
|
@ar_association = ar_association
|
18
18
|
@model_count = settings.weighted_models.size
|
19
|
+
@strategy = strategy
|
19
20
|
end
|
20
21
|
|
21
22
|
def polymorphic_config
|
@@ -32,19 +33,23 @@ module ActiveRecordDataLoader
|
|
32
33
|
end
|
33
34
|
|
34
35
|
def foreign_key(row_number)
|
35
|
-
possible_values[row_number % @model_count][1].
|
36
|
+
possible_values[row_number % @model_count][1].next
|
36
37
|
end
|
37
38
|
|
38
39
|
def possible_values
|
39
40
|
@possible_values ||= begin
|
40
41
|
values = @settings.models.keys.map do |klass|
|
41
|
-
[klass.name,
|
42
|
+
[klass.name, values_query(klass)]
|
42
43
|
end.to_h
|
43
44
|
|
44
45
|
@settings.weighted_models.map { |klass| [klass.name, values[klass.name]] }
|
45
46
|
end
|
46
47
|
end
|
47
48
|
|
49
|
+
def values_query(klass)
|
50
|
+
List.for(base_query(klass).pluck(klass.primary_key), strategy: @strategy)
|
51
|
+
end
|
52
|
+
|
48
53
|
def base_query(klass)
|
49
54
|
if @settings.queries[klass].respond_to?(:call)
|
50
55
|
@settings.queries[klass].call.all
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
module ActiveRecord
|
5
|
+
class UniqueIndexTracker
|
6
|
+
Index = Struct.new(:name, :columns, :column_indexes, keyword_init: true)
|
7
|
+
|
8
|
+
def initialize(model:, connection_factory:)
|
9
|
+
@model = model
|
10
|
+
@table = model.table_name
|
11
|
+
@unique_indexes = []
|
12
|
+
@unique_values_used = {}
|
13
|
+
find_unique_indexes(connection_factory)
|
14
|
+
end
|
15
|
+
|
16
|
+
def map_indexed_columns(column_list)
|
17
|
+
@unique_indexes = @raw_unique_indexes.map do |index|
|
18
|
+
@unique_values_used[index.name] = Set.new
|
19
|
+
columns = index.columns.map(&:to_sym)
|
20
|
+
Index.new(
|
21
|
+
name: index.name,
|
22
|
+
columns: columns,
|
23
|
+
column_indexes: columns.map { |c| column_list.find_index(c) }
|
24
|
+
)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def repeating_unique_values?(row)
|
29
|
+
@unique_indexes.map do |index|
|
30
|
+
values = index.column_indexes.map { |i| row[i] }
|
31
|
+
@unique_values_used.fetch(index.name).include?(values)
|
32
|
+
end.any?
|
33
|
+
end
|
34
|
+
|
35
|
+
def capture_unique_values(row)
|
36
|
+
return unless row.present?
|
37
|
+
|
38
|
+
@unique_indexes.each do |index|
|
39
|
+
values = index.column_indexes.map { |i| row[i] }
|
40
|
+
@unique_values_used.fetch(index.name) << values
|
41
|
+
end
|
42
|
+
row
|
43
|
+
end
|
44
|
+
|
45
|
+
def contained_in_index?(ar_column)
|
46
|
+
target_column = if @model.reflect_on_association(ar_column.name)&.belongs_to?
|
47
|
+
ar_column.join_foreign_key.to_sym
|
48
|
+
else
|
49
|
+
ar_column.name.to_sym
|
50
|
+
end
|
51
|
+
|
52
|
+
@raw_unique_indexes.flat_map { |i| i.columns.map(&:to_sym) }.include?(target_column)
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
attr_reader :table
|
58
|
+
|
59
|
+
def find_unique_indexes(connection_factory)
|
60
|
+
connection = connection_factory.call
|
61
|
+
@raw_unique_indexes = connection.indexes(table).select(&:unique)
|
62
|
+
ensure
|
63
|
+
connection&.close
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -2,16 +2,18 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class BulkInsertStrategy
|
5
|
-
def initialize(data_generator,
|
5
|
+
def initialize(data_generator, file_adapter)
|
6
6
|
@data_generator = data_generator
|
7
|
-
@
|
7
|
+
@file_adapter = file_adapter
|
8
8
|
end
|
9
9
|
|
10
10
|
def load_batch(row_numbers, connection)
|
11
|
-
|
11
|
+
command = <<~SQL
|
12
12
|
INSERT INTO #{quoted_table_name(connection)} (#{column_list(connection)})
|
13
13
|
VALUES #{values(row_numbers, connection)}
|
14
14
|
SQL
|
15
|
+
insert(connection: connection, command: command)
|
16
|
+
file_adapter.insert(command)
|
15
17
|
end
|
16
18
|
|
17
19
|
def table_name
|
@@ -24,7 +26,11 @@ module ActiveRecordDataLoader
|
|
24
26
|
|
25
27
|
private
|
26
28
|
|
27
|
-
attr_reader :data_generator, :
|
29
|
+
attr_reader :data_generator, :file_adapter
|
30
|
+
|
31
|
+
def insert(connection:, command:)
|
32
|
+
connection.insert(command)
|
33
|
+
end
|
28
34
|
|
29
35
|
def quoted_table_name(connection)
|
30
36
|
@quoted_table_name ||= connection.quote_table_name(data_generator.table)
|
@@ -39,15 +45,16 @@ module ActiveRecordDataLoader
|
|
39
45
|
|
40
46
|
def values(row_numbers, connection)
|
41
47
|
row_numbers
|
42
|
-
.map { |i|
|
48
|
+
.map { |i| row_values(i, connection) }
|
49
|
+
.compact
|
43
50
|
.join(",")
|
44
51
|
end
|
45
52
|
|
46
53
|
def row_values(row_number, connection)
|
47
|
-
data_generator
|
48
|
-
|
49
|
-
|
50
|
-
|
54
|
+
row = data_generator.generate_row(row_number)
|
55
|
+
return unless row.present?
|
56
|
+
|
57
|
+
"(#{row.map { |v| connection.quote(v) }.join(',')})"
|
51
58
|
end
|
52
59
|
end
|
53
60
|
end
|
@@ -3,7 +3,7 @@
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class Configuration
|
5
5
|
attr_accessor :connection_factory, :default_batch_size, :default_row_count,
|
6
|
-
:logger, :statement_timeout
|
6
|
+
:logger, :max_duplicate_retries, :raise_on_duplicates, :statement_timeout
|
7
7
|
attr_reader :output
|
8
8
|
|
9
9
|
def initialize(
|
@@ -12,51 +12,34 @@ module ActiveRecordDataLoader
|
|
12
12
|
logger: nil,
|
13
13
|
statement_timeout: "2min",
|
14
14
|
connection_factory: -> { ::ActiveRecord::Base.connection },
|
15
|
-
|
15
|
+
raise_on_duplicates: false,
|
16
|
+
max_duplicate_retries: 5,
|
17
|
+
output: nil
|
16
18
|
)
|
17
19
|
@default_batch_size = default_batch_size
|
18
20
|
@default_row_count = default_row_count
|
19
21
|
@logger = logger || default_logger
|
20
22
|
@statement_timeout = statement_timeout
|
21
23
|
@connection_factory = connection_factory
|
24
|
+
@raise_on_duplicates = raise_on_duplicates
|
25
|
+
@max_duplicate_retries = max_duplicate_retries
|
22
26
|
self.output = output
|
23
27
|
end
|
24
28
|
|
25
29
|
def output=(output)
|
26
|
-
@output = validate_output(output
|
27
|
-
end
|
28
|
-
|
29
|
-
def output_adapter
|
30
|
-
if output.fetch(:type) == :file
|
31
|
-
ActiveRecordDataLoader::FileOutputAdapter.new(output)
|
32
|
-
else
|
33
|
-
ActiveRecordDataLoader::ConnectionOutputAdapter.new
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
def connection_handler
|
38
|
-
ActiveRecordDataLoader::ConnectionHandler.new(
|
39
|
-
connection_factory: connection_factory,
|
40
|
-
statement_timeout: statement_timeout,
|
41
|
-
output_adapter: output_adapter
|
42
|
-
)
|
30
|
+
@output = validate_output(output)
|
43
31
|
end
|
44
32
|
|
45
33
|
private
|
46
34
|
|
47
|
-
OUTPUT_OPTIONS_BY_TYPE = { connection: %i[type], file: %i[type filename] }.freeze
|
48
|
-
|
49
35
|
def validate_output(output)
|
50
|
-
if
|
51
|
-
|
52
|
-
elsif output.is_a?(
|
53
|
-
|
54
|
-
unless %i[file connection].include?(output[:type])
|
55
|
-
|
56
|
-
output.slice(*OUTPUT_OPTIONS_BY_TYPE[output[:type]])
|
36
|
+
if output.to_s.blank?
|
37
|
+
nil
|
38
|
+
elsif output.is_a?(String)
|
39
|
+
output
|
57
40
|
else
|
58
|
-
raise "The output configuration parameter must be
|
59
|
-
"
|
41
|
+
raise "The output configuration parameter must be a filename meant to be the "\
|
42
|
+
"target for the SQL script"
|
60
43
|
end
|
61
44
|
end
|
62
45
|
|
@@ -2,19 +2,18 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class ConnectionHandler
|
5
|
-
def initialize(connection_factory:, statement_timeout
|
5
|
+
def initialize(connection_factory:, statement_timeout:)
|
6
6
|
@connection_factory = connection_factory
|
7
7
|
@statement_timeout = statement_timeout
|
8
|
-
|
8
|
+
cache_facts
|
9
9
|
end
|
10
10
|
|
11
11
|
def with_connection
|
12
|
-
connection =
|
13
|
-
if
|
14
|
-
|
15
|
-
update_statement_timeout(connection, statement_timeout)
|
12
|
+
connection = connection_factory.call
|
13
|
+
if supports_timeout?
|
14
|
+
connection.execute(timeout_set_command)
|
16
15
|
yield connection
|
17
|
-
|
16
|
+
connection.execute(reset_timeout_command)
|
18
17
|
else
|
19
18
|
yield connection
|
20
19
|
end
|
@@ -22,53 +21,32 @@ module ActiveRecordDataLoader
|
|
22
21
|
connection&.close
|
23
22
|
end
|
24
23
|
|
25
|
-
|
26
|
-
|
27
|
-
# because we are reading data from the connection to come up with related data
|
28
|
-
# while generating the data. Also, the final SQL script file needs the timeout
|
29
|
-
# updated so that when those \COPY commands are executed they have the higher
|
30
|
-
# timeout as well.
|
31
|
-
def with_statement_timeout_for_output
|
32
|
-
return yield unless output_adapter.needs_timeout_output?
|
33
|
-
|
34
|
-
original_timeout = begin
|
35
|
-
connection = open_connection
|
36
|
-
retrieve_statement_timeout(connection) if postgres?(connection)
|
37
|
-
ensure
|
38
|
-
connection&.close
|
39
|
-
end
|
40
|
-
|
41
|
-
if original_timeout
|
42
|
-
output_adapter.execute(statement_timeout_set_command(statement_timeout))
|
43
|
-
yield
|
44
|
-
output_adapter.execute(statement_timeout_set_command(original_timeout))
|
45
|
-
else
|
46
|
-
yield
|
47
|
-
end
|
24
|
+
def supports_timeout?
|
25
|
+
@supports_timeout
|
48
26
|
end
|
49
27
|
|
50
|
-
|
51
|
-
|
52
|
-
attr_reader :connection_factory, :statement_timeout, :output_adapter
|
53
|
-
|
54
|
-
def retrieve_statement_timeout(connection)
|
55
|
-
connection.execute("SHOW statement_timeout").first["statement_timeout"]
|
28
|
+
def supports_copy?
|
29
|
+
@supports_copy
|
56
30
|
end
|
57
31
|
|
58
|
-
def
|
59
|
-
|
32
|
+
def timeout_set_command
|
33
|
+
"SET statement_timeout = \"#{statement_timeout}\""
|
60
34
|
end
|
61
35
|
|
62
|
-
def
|
63
|
-
"
|
36
|
+
def reset_timeout_command
|
37
|
+
"RESET statement_timeout"
|
64
38
|
end
|
65
39
|
|
66
|
-
|
67
|
-
connection_factory.call
|
68
|
-
end
|
40
|
+
private
|
69
41
|
|
70
|
-
|
71
|
-
|
42
|
+
attr_reader :connection_factory, :statement_timeout
|
43
|
+
|
44
|
+
def cache_facts
|
45
|
+
connection = connection_factory.call
|
46
|
+
@supports_timeout = connection.adapter_name.downcase.to_sym == :postgresql
|
47
|
+
@supports_copy = connection.raw_connection.respond_to?(:copy_data)
|
48
|
+
ensure
|
49
|
+
connection&.close
|
72
50
|
end
|
73
51
|
end
|
74
52
|
end
|
@@ -2,17 +2,24 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class CopyStrategy
|
5
|
-
def initialize(data_generator,
|
5
|
+
def initialize(data_generator, file_adapter)
|
6
6
|
@data_generator = data_generator
|
7
|
-
@
|
7
|
+
@file_adapter = file_adapter
|
8
8
|
end
|
9
9
|
|
10
10
|
def load_batch(row_numbers, connection)
|
11
|
-
|
11
|
+
data = csv_rows(row_numbers, connection)
|
12
|
+
copy(
|
12
13
|
connection: connection,
|
13
14
|
table: table_name_for_copy(connection),
|
14
15
|
columns: columns_for_copy(connection),
|
15
|
-
data:
|
16
|
+
data: data,
|
17
|
+
row_numbers: row_numbers
|
18
|
+
)
|
19
|
+
file_adapter.copy(
|
20
|
+
table: table_name_for_copy(connection),
|
21
|
+
columns: columns_for_copy(connection),
|
22
|
+
data: data,
|
16
23
|
row_numbers: row_numbers
|
17
24
|
)
|
18
25
|
end
|
@@ -27,12 +34,19 @@ module ActiveRecordDataLoader
|
|
27
34
|
|
28
35
|
private
|
29
36
|
|
30
|
-
attr_reader :data_generator, :
|
37
|
+
attr_reader :data_generator, :file_adapter
|
38
|
+
|
39
|
+
def copy(connection:, table:, columns:, data:, row_numbers:)
|
40
|
+
raw_connection = connection.raw_connection
|
41
|
+
raw_connection.copy_data("COPY #{table} (#{columns}) FROM STDIN WITH (FORMAT CSV)") do
|
42
|
+
raw_connection.put_copy_data(data.join("\n"))
|
43
|
+
end
|
44
|
+
end
|
31
45
|
|
32
46
|
def csv_rows(row_numbers, connection)
|
33
47
|
row_numbers.map do |i|
|
34
|
-
data_generator.generate_row(i)
|
35
|
-
end
|
48
|
+
data_generator.generate_row(i)&.map { |d| quote_data(d, connection) }&.join(",")
|
49
|
+
end.compact
|
36
50
|
end
|
37
51
|
|
38
52
|
def table_name_for_copy(connection)
|
@@ -13,16 +13,24 @@ module ActiveRecordDataLoader
|
|
13
13
|
|
14
14
|
def adapter
|
15
15
|
@adapter ||=
|
16
|
-
if
|
17
|
-
require "ffaker"
|
16
|
+
if can_use?("ffaker", "2.1.0")
|
18
17
|
FFakerGemAdapter.new
|
19
|
-
elsif
|
20
|
-
require "faker"
|
18
|
+
elsif can_use?("faker", "1.9.3")
|
21
19
|
FakerGemAdapter.new
|
22
20
|
else
|
23
21
|
NoGemAdapter.new
|
24
22
|
end
|
25
23
|
end
|
24
|
+
|
25
|
+
def can_use?(gem, min_version)
|
26
|
+
gemspec = Gem.loaded_specs[gem]
|
27
|
+
return false unless gemspec.present? && gemspec.version >= Gem::Version.new(min_version)
|
28
|
+
|
29
|
+
require gem
|
30
|
+
true
|
31
|
+
rescue LoadError
|
32
|
+
false
|
33
|
+
end
|
26
34
|
end
|
27
35
|
|
28
36
|
class FFakerGemAdapter
|
@@ -3,13 +3,16 @@
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
module Dsl
|
5
5
|
class Model
|
6
|
-
attr_reader :klass, :columns, :row_count, :polymorphic_associations, :belongs_to_associations
|
6
|
+
attr_reader :klass, :columns, :row_count, :polymorphic_associations, :belongs_to_associations,
|
7
|
+
:raise_on_duplicates_flag
|
7
8
|
|
8
9
|
def initialize(klass:, configuration:)
|
9
10
|
@klass = klass
|
10
11
|
@columns = {}
|
11
12
|
@row_count = configuration.default_row_count
|
12
13
|
@batch_size = configuration.default_batch_size
|
14
|
+
@raise_on_duplicates_flag = configuration.raise_on_duplicates
|
15
|
+
@max_duplicate_retries = configuration.max_duplicate_retries
|
13
16
|
@polymorphic_associations = []
|
14
17
|
@belongs_to_associations = []
|
15
18
|
end
|
@@ -22,6 +25,20 @@ module ActiveRecordDataLoader
|
|
22
25
|
@batch_size = (size || @batch_size)
|
23
26
|
end
|
24
27
|
|
28
|
+
def raise_on_duplicates
|
29
|
+
@raise_on_duplicates_flag = true
|
30
|
+
end
|
31
|
+
|
32
|
+
def do_not_raise_on_duplicates
|
33
|
+
@raise_on_duplicates_flag = false
|
34
|
+
end
|
35
|
+
|
36
|
+
def max_duplicate_retries(retries = nil)
|
37
|
+
return @max_duplicate_retries if retries.nil?
|
38
|
+
|
39
|
+
@max_duplicate_retries = retries
|
40
|
+
end
|
41
|
+
|
25
42
|
def column(name, func)
|
26
43
|
@columns[name.to_sym] = func
|
27
44
|
end
|
@@ -32,7 +49,7 @@ module ActiveRecordDataLoader
|
|
32
49
|
).tap { |a| block.call(a) }
|
33
50
|
end
|
34
51
|
|
35
|
-
def belongs_to(assoc_name, eligible_set:)
|
52
|
+
def belongs_to(assoc_name, eligible_set: nil)
|
36
53
|
@belongs_to_associations << BelongsToAssociation.new(@klass, assoc_name, eligible_set)
|
37
54
|
end
|
38
55
|
end
|
@@ -2,38 +2,46 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class FileOutputAdapter
|
5
|
+
def self.with_output_options(options)
|
6
|
+
adapter = new(options)
|
7
|
+
pre_command = options[:pre_command]
|
8
|
+
adapter.write_command(pre_command) if pre_command
|
9
|
+
yield adapter
|
10
|
+
post_command = options[:post_command]
|
11
|
+
adapter.write_command(post_command) if post_command
|
12
|
+
end
|
13
|
+
|
5
14
|
def initialize(options)
|
6
15
|
@filename = options.fetch(:filename, "active_record_data_loader_script.sql")
|
7
16
|
@file_basename = File.basename(@filename, File.extname(@filename))
|
8
17
|
@path = File.expand_path(File.dirname(@filename))
|
18
|
+
File.open(@filename, File::TRUNC) if File.exist?(@filename)
|
9
19
|
end
|
10
20
|
|
11
|
-
def
|
12
|
-
true
|
13
|
-
end
|
14
|
-
|
15
|
-
def copy(connection:, table:, columns:, data:, row_numbers:)
|
21
|
+
def copy(table:, columns:, data:, row_numbers:)
|
16
22
|
data_filename = data_filename(table, row_numbers)
|
17
23
|
File.open(data_filename, "w") { |f| f.puts(data) }
|
18
|
-
File.open(
|
24
|
+
File.open(filename, "a") do |file|
|
19
25
|
file.puts("\\COPY #{table} (#{columns}) FROM '#{data_filename}' WITH (FORMAT CSV);")
|
20
26
|
end
|
21
27
|
end
|
22
28
|
|
23
|
-
def insert(
|
24
|
-
|
29
|
+
def insert(command)
|
30
|
+
write_command(command)
|
25
31
|
end
|
26
32
|
|
27
|
-
def
|
28
|
-
File.open(
|
33
|
+
def write_command(command)
|
34
|
+
File.open(filename, "a") { |f| f.puts("#{command.gsub("\n", ' ')};") }
|
29
35
|
end
|
30
36
|
|
31
37
|
private
|
32
38
|
|
39
|
+
attr_reader :filename, :path, :file_basename
|
40
|
+
|
33
41
|
def data_filename(table, row_numbers)
|
34
42
|
File.join(
|
35
|
-
|
36
|
-
"#{
|
43
|
+
path,
|
44
|
+
"#{file_basename}_#{table.gsub(/"/, '')}_rows_#{row_numbers[0]}_to_#{row_numbers[-1]}.csv"
|
37
45
|
)
|
38
46
|
end
|
39
47
|
end
|
@@ -1,76 +1,82 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "benchmark"
|
4
|
-
|
5
3
|
module ActiveRecordDataLoader
|
6
4
|
class Loader
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
batch_size:,
|
12
|
-
configuration:
|
13
|
-
)
|
14
|
-
new(
|
15
|
-
logger: configuration.logger,
|
16
|
-
connection_handler: configuration.connection_handler,
|
17
|
-
strategy: strategy_class(configuration.connection_factory).new(
|
18
|
-
data_generator,
|
19
|
-
configuration.output_adapter
|
20
|
-
)
|
21
|
-
).load_data(batch_size, total_rows)
|
22
|
-
end
|
5
|
+
def initialize(configuration, definition)
|
6
|
+
@configuration = configuration
|
7
|
+
@definition = definition
|
8
|
+
end
|
23
9
|
|
24
|
-
|
10
|
+
def load_data
|
11
|
+
ActiveRecordDataLoader::ActiveRecord::PerRowValueCache.clear
|
25
12
|
|
26
|
-
|
27
|
-
|
28
|
-
ActiveRecordDataLoader::CopyStrategy
|
29
|
-
else
|
30
|
-
ActiveRecordDataLoader::BulkInsertStrategy
|
31
|
-
end
|
13
|
+
file_adapter_class.with_output_options(file_adapter_options) do |file_adapter|
|
14
|
+
definition.models.map { |m| load_model(m, file_adapter) }
|
32
15
|
end
|
33
16
|
end
|
34
17
|
|
35
|
-
|
36
|
-
@logger = logger
|
37
|
-
@connection_handler = connection_handler
|
38
|
-
@strategy = strategy
|
39
|
-
end
|
18
|
+
private
|
40
19
|
|
41
|
-
|
42
|
-
batch_count = (total_rows / batch_size.to_f).ceil
|
20
|
+
attr_reader :definition, :configuration
|
43
21
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
22
|
+
def load_model(model, file_adapter)
|
23
|
+
ActiveRecordDataLoader::TableLoader.load_data(
|
24
|
+
batch_size: model.batch_size,
|
25
|
+
total_rows: model.row_count,
|
26
|
+
connection_handler: connection_handler,
|
27
|
+
strategy: strategy_class.new(generator(model), file_adapter),
|
28
|
+
logger: configuration.logger
|
48
29
|
)
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
30
|
+
end
|
31
|
+
|
32
|
+
def generator(model)
|
33
|
+
ActiveRecordDataLoader::ActiveRecord::ModelDataGenerator.new(
|
34
|
+
model: model.klass,
|
35
|
+
column_settings: model.columns,
|
36
|
+
polymorphic_settings: model.polymorphic_associations,
|
37
|
+
belongs_to_settings: model.belongs_to_associations,
|
38
|
+
connection_factory: configuration.connection_factory,
|
39
|
+
raise_on_duplicates: model.raise_on_duplicates_flag,
|
40
|
+
max_duplicate_retries: model.max_duplicate_retries,
|
41
|
+
logger: configuration.logger
|
56
42
|
)
|
57
43
|
end
|
58
44
|
|
59
|
-
|
45
|
+
def file_adapter_class
|
46
|
+
if configuration.output.present?
|
47
|
+
ActiveRecordDataLoader::FileOutputAdapter
|
48
|
+
else
|
49
|
+
ActiveRecordDataLoader::NullOutputAdapter
|
50
|
+
end
|
51
|
+
end
|
60
52
|
|
61
|
-
|
53
|
+
def file_adapter_options
|
54
|
+
timeout_commands =
|
55
|
+
if connection_handler.supports_timeout?
|
56
|
+
{
|
57
|
+
pre_command: connection_handler.timeout_set_command,
|
58
|
+
post_command: connection_handler.reset_timeout_command,
|
59
|
+
}
|
60
|
+
else
|
61
|
+
{}
|
62
|
+
end
|
62
63
|
|
63
|
-
|
64
|
-
|
65
|
-
total_rows.times.each_slice(batch_size).with_index do |row_numbers, i|
|
66
|
-
time = Benchmark.realtime { strategy.load_batch(row_numbers, connection) }
|
64
|
+
timeout_commands.merge(filename: configuration.output)
|
65
|
+
end
|
67
66
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
67
|
+
def strategy_class
|
68
|
+
@strategy_class ||= if connection_handler.supports_copy?
|
69
|
+
ActiveRecordDataLoader::CopyStrategy
|
70
|
+
else
|
71
|
+
ActiveRecordDataLoader::BulkInsertStrategy
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def connection_handler
|
76
|
+
@connection_handler ||= ActiveRecordDataLoader::ConnectionHandler.new(
|
77
|
+
connection_factory: configuration.connection_factory,
|
78
|
+
statement_timeout: configuration.statement_timeout
|
79
|
+
)
|
74
80
|
end
|
75
81
|
end
|
76
82
|
end
|