active_record_data_loader 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/codeql-analysis.yml +70 -0
- data/.rubocop.yml +8 -2
- data/CHANGELOG.md +9 -0
- data/CODE_OF_CONDUCT.md +2 -2
- data/Gemfile.lock +24 -24
- data/README.md +88 -18
- data/active_record_data_loader.gemspec +1 -1
- data/lib/active_record_data_loader/active_record/{belongs_to_configuration.rb → belongs_to_data_provider.rb} +7 -6
- data/lib/active_record_data_loader/active_record/{column_configuration.rb → column_data_provider.rb} +2 -2
- data/lib/active_record_data_loader/active_record/list.rb +35 -0
- data/lib/active_record_data_loader/active_record/model_data_generator.rb +60 -5
- data/lib/active_record_data_loader/active_record/{polymorphic_belongs_to_configuration.rb → polymorphic_belongs_to_data_provider.rb} +11 -6
- data/lib/active_record_data_loader/active_record/unique_index_tracker.rb +67 -0
- data/lib/active_record_data_loader/bulk_insert_strategy.rb +16 -9
- data/lib/active_record_data_loader/configuration.rb +13 -30
- data/lib/active_record_data_loader/connection_handler.rb +23 -45
- data/lib/active_record_data_loader/copy_strategy.rb +21 -7
- data/lib/active_record_data_loader/data_faker.rb +12 -4
- data/lib/active_record_data_loader/dsl/model.rb +19 -2
- data/lib/active_record_data_loader/errors.rb +5 -0
- data/lib/active_record_data_loader/file_output_adapter.rb +20 -12
- data/lib/active_record_data_loader/loader.rb +61 -55
- data/lib/active_record_data_loader/null_output_adapter.rb +15 -0
- data/lib/active_record_data_loader/table_loader.rb +59 -0
- data/lib/active_record_data_loader/version.rb +1 -1
- data/lib/active_record_data_loader.rb +9 -41
- metadata +12 -7
- data/lib/active_record_data_loader/connection_output_adapter.rb +0 -20
@@ -2,20 +2,21 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
module ActiveRecord
|
5
|
-
class
|
6
|
-
def self.
|
5
|
+
class PolymorphicBelongsToDataProvider
|
6
|
+
def self.provider_for(polymorphic_settings:, strategy: :random)
|
7
7
|
ar_association = polymorphic_settings.model_class.reflect_on_association(
|
8
8
|
polymorphic_settings.name
|
9
9
|
)
|
10
10
|
raise "#{name} only supports polymorphic associations" unless ar_association.polymorphic?
|
11
11
|
|
12
|
-
new(polymorphic_settings, ar_association).polymorphic_config
|
12
|
+
new(polymorphic_settings, ar_association, strategy).polymorphic_config
|
13
13
|
end
|
14
14
|
|
15
|
-
def initialize(settings, ar_association)
|
15
|
+
def initialize(settings, ar_association, strategy)
|
16
16
|
@settings = settings
|
17
17
|
@ar_association = ar_association
|
18
18
|
@model_count = settings.weighted_models.size
|
19
|
+
@strategy = strategy
|
19
20
|
end
|
20
21
|
|
21
22
|
def polymorphic_config
|
@@ -32,19 +33,23 @@ module ActiveRecordDataLoader
|
|
32
33
|
end
|
33
34
|
|
34
35
|
def foreign_key(row_number)
|
35
|
-
possible_values[row_number % @model_count][1].
|
36
|
+
possible_values[row_number % @model_count][1].next
|
36
37
|
end
|
37
38
|
|
38
39
|
def possible_values
|
39
40
|
@possible_values ||= begin
|
40
41
|
values = @settings.models.keys.map do |klass|
|
41
|
-
[klass.name,
|
42
|
+
[klass.name, values_query(klass)]
|
42
43
|
end.to_h
|
43
44
|
|
44
45
|
@settings.weighted_models.map { |klass| [klass.name, values[klass.name]] }
|
45
46
|
end
|
46
47
|
end
|
47
48
|
|
49
|
+
def values_query(klass)
|
50
|
+
List.for(base_query(klass).pluck(klass.primary_key), strategy: @strategy)
|
51
|
+
end
|
52
|
+
|
48
53
|
def base_query(klass)
|
49
54
|
if @settings.queries[klass].respond_to?(:call)
|
50
55
|
@settings.queries[klass].call.all
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
module ActiveRecord
|
5
|
+
class UniqueIndexTracker
|
6
|
+
Index = Struct.new(:name, :columns, :column_indexes, keyword_init: true)
|
7
|
+
|
8
|
+
def initialize(model:, connection_factory:)
|
9
|
+
@model = model
|
10
|
+
@table = model.table_name
|
11
|
+
@unique_indexes = []
|
12
|
+
@unique_values_used = {}
|
13
|
+
find_unique_indexes(connection_factory)
|
14
|
+
end
|
15
|
+
|
16
|
+
def map_indexed_columns(column_list)
|
17
|
+
@unique_indexes = @raw_unique_indexes.map do |index|
|
18
|
+
@unique_values_used[index.name] = Set.new
|
19
|
+
columns = index.columns.map(&:to_sym)
|
20
|
+
Index.new(
|
21
|
+
name: index.name,
|
22
|
+
columns: columns,
|
23
|
+
column_indexes: columns.map { |c| column_list.find_index(c) }
|
24
|
+
)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def repeating_unique_values?(row)
|
29
|
+
@unique_indexes.map do |index|
|
30
|
+
values = index.column_indexes.map { |i| row[i] }
|
31
|
+
@unique_values_used.fetch(index.name).include?(values)
|
32
|
+
end.any?
|
33
|
+
end
|
34
|
+
|
35
|
+
def capture_unique_values(row)
|
36
|
+
return unless row.present?
|
37
|
+
|
38
|
+
@unique_indexes.each do |index|
|
39
|
+
values = index.column_indexes.map { |i| row[i] }
|
40
|
+
@unique_values_used.fetch(index.name) << values
|
41
|
+
end
|
42
|
+
row
|
43
|
+
end
|
44
|
+
|
45
|
+
def contained_in_index?(ar_column)
|
46
|
+
target_column = if @model.reflect_on_association(ar_column.name)&.belongs_to?
|
47
|
+
ar_column.join_foreign_key.to_sym
|
48
|
+
else
|
49
|
+
ar_column.name.to_sym
|
50
|
+
end
|
51
|
+
|
52
|
+
@raw_unique_indexes.flat_map { |i| i.columns.map(&:to_sym) }.include?(target_column)
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
attr_reader :table
|
58
|
+
|
59
|
+
def find_unique_indexes(connection_factory)
|
60
|
+
connection = connection_factory.call
|
61
|
+
@raw_unique_indexes = connection.indexes(table).select(&:unique)
|
62
|
+
ensure
|
63
|
+
connection&.close
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -2,16 +2,18 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class BulkInsertStrategy
|
5
|
-
def initialize(data_generator,
|
5
|
+
def initialize(data_generator, file_adapter)
|
6
6
|
@data_generator = data_generator
|
7
|
-
@
|
7
|
+
@file_adapter = file_adapter
|
8
8
|
end
|
9
9
|
|
10
10
|
def load_batch(row_numbers, connection)
|
11
|
-
|
11
|
+
command = <<~SQL
|
12
12
|
INSERT INTO #{quoted_table_name(connection)} (#{column_list(connection)})
|
13
13
|
VALUES #{values(row_numbers, connection)}
|
14
14
|
SQL
|
15
|
+
insert(connection: connection, command: command)
|
16
|
+
file_adapter.insert(command)
|
15
17
|
end
|
16
18
|
|
17
19
|
def table_name
|
@@ -24,7 +26,11 @@ module ActiveRecordDataLoader
|
|
24
26
|
|
25
27
|
private
|
26
28
|
|
27
|
-
attr_reader :data_generator, :
|
29
|
+
attr_reader :data_generator, :file_adapter
|
30
|
+
|
31
|
+
def insert(connection:, command:)
|
32
|
+
connection.insert(command)
|
33
|
+
end
|
28
34
|
|
29
35
|
def quoted_table_name(connection)
|
30
36
|
@quoted_table_name ||= connection.quote_table_name(data_generator.table)
|
@@ -39,15 +45,16 @@ module ActiveRecordDataLoader
|
|
39
45
|
|
40
46
|
def values(row_numbers, connection)
|
41
47
|
row_numbers
|
42
|
-
.map { |i|
|
48
|
+
.map { |i| row_values(i, connection) }
|
49
|
+
.compact
|
43
50
|
.join(",")
|
44
51
|
end
|
45
52
|
|
46
53
|
def row_values(row_number, connection)
|
47
|
-
data_generator
|
48
|
-
|
49
|
-
|
50
|
-
|
54
|
+
row = data_generator.generate_row(row_number)
|
55
|
+
return unless row.present?
|
56
|
+
|
57
|
+
"(#{row.map { |v| connection.quote(v) }.join(',')})"
|
51
58
|
end
|
52
59
|
end
|
53
60
|
end
|
@@ -3,7 +3,7 @@
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class Configuration
|
5
5
|
attr_accessor :connection_factory, :default_batch_size, :default_row_count,
|
6
|
-
:logger, :statement_timeout
|
6
|
+
:logger, :max_duplicate_retries, :raise_on_duplicates, :statement_timeout
|
7
7
|
attr_reader :output
|
8
8
|
|
9
9
|
def initialize(
|
@@ -12,51 +12,34 @@ module ActiveRecordDataLoader
|
|
12
12
|
logger: nil,
|
13
13
|
statement_timeout: "2min",
|
14
14
|
connection_factory: -> { ::ActiveRecord::Base.connection },
|
15
|
-
|
15
|
+
raise_on_duplicates: false,
|
16
|
+
max_duplicate_retries: 5,
|
17
|
+
output: nil
|
16
18
|
)
|
17
19
|
@default_batch_size = default_batch_size
|
18
20
|
@default_row_count = default_row_count
|
19
21
|
@logger = logger || default_logger
|
20
22
|
@statement_timeout = statement_timeout
|
21
23
|
@connection_factory = connection_factory
|
24
|
+
@raise_on_duplicates = raise_on_duplicates
|
25
|
+
@max_duplicate_retries = max_duplicate_retries
|
22
26
|
self.output = output
|
23
27
|
end
|
24
28
|
|
25
29
|
def output=(output)
|
26
|
-
@output = validate_output(output
|
27
|
-
end
|
28
|
-
|
29
|
-
def output_adapter
|
30
|
-
if output.fetch(:type) == :file
|
31
|
-
ActiveRecordDataLoader::FileOutputAdapter.new(output)
|
32
|
-
else
|
33
|
-
ActiveRecordDataLoader::ConnectionOutputAdapter.new
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
def connection_handler
|
38
|
-
ActiveRecordDataLoader::ConnectionHandler.new(
|
39
|
-
connection_factory: connection_factory,
|
40
|
-
statement_timeout: statement_timeout,
|
41
|
-
output_adapter: output_adapter
|
42
|
-
)
|
30
|
+
@output = validate_output(output)
|
43
31
|
end
|
44
32
|
|
45
33
|
private
|
46
34
|
|
47
|
-
OUTPUT_OPTIONS_BY_TYPE = { connection: %i[type], file: %i[type filename] }.freeze
|
48
|
-
|
49
35
|
def validate_output(output)
|
50
|
-
if
|
51
|
-
|
52
|
-
elsif output.is_a?(
|
53
|
-
|
54
|
-
unless %i[file connection].include?(output[:type])
|
55
|
-
|
56
|
-
output.slice(*OUTPUT_OPTIONS_BY_TYPE[output[:type]])
|
36
|
+
if output.to_s.blank?
|
37
|
+
nil
|
38
|
+
elsif output.is_a?(String)
|
39
|
+
output
|
57
40
|
else
|
58
|
-
raise "The output configuration parameter must be
|
59
|
-
"
|
41
|
+
raise "The output configuration parameter must be a filename meant to be the "\
|
42
|
+
"target for the SQL script"
|
60
43
|
end
|
61
44
|
end
|
62
45
|
|
@@ -2,19 +2,18 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class ConnectionHandler
|
5
|
-
def initialize(connection_factory:, statement_timeout
|
5
|
+
def initialize(connection_factory:, statement_timeout:)
|
6
6
|
@connection_factory = connection_factory
|
7
7
|
@statement_timeout = statement_timeout
|
8
|
-
|
8
|
+
cache_facts
|
9
9
|
end
|
10
10
|
|
11
11
|
def with_connection
|
12
|
-
connection =
|
13
|
-
if
|
14
|
-
|
15
|
-
update_statement_timeout(connection, statement_timeout)
|
12
|
+
connection = connection_factory.call
|
13
|
+
if supports_timeout?
|
14
|
+
connection.execute(timeout_set_command)
|
16
15
|
yield connection
|
17
|
-
|
16
|
+
connection.execute(reset_timeout_command)
|
18
17
|
else
|
19
18
|
yield connection
|
20
19
|
end
|
@@ -22,53 +21,32 @@ module ActiveRecordDataLoader
|
|
22
21
|
connection&.close
|
23
22
|
end
|
24
23
|
|
25
|
-
|
26
|
-
|
27
|
-
# because we are reading data from the connection to come up with related data
|
28
|
-
# while generating the data. Also, the final SQL script file needs the timeout
|
29
|
-
# updated so that when those \COPY commands are executed they have the higher
|
30
|
-
# timeout as well.
|
31
|
-
def with_statement_timeout_for_output
|
32
|
-
return yield unless output_adapter.needs_timeout_output?
|
33
|
-
|
34
|
-
original_timeout = begin
|
35
|
-
connection = open_connection
|
36
|
-
retrieve_statement_timeout(connection) if postgres?(connection)
|
37
|
-
ensure
|
38
|
-
connection&.close
|
39
|
-
end
|
40
|
-
|
41
|
-
if original_timeout
|
42
|
-
output_adapter.execute(statement_timeout_set_command(statement_timeout))
|
43
|
-
yield
|
44
|
-
output_adapter.execute(statement_timeout_set_command(original_timeout))
|
45
|
-
else
|
46
|
-
yield
|
47
|
-
end
|
24
|
+
def supports_timeout?
|
25
|
+
@supports_timeout
|
48
26
|
end
|
49
27
|
|
50
|
-
|
51
|
-
|
52
|
-
attr_reader :connection_factory, :statement_timeout, :output_adapter
|
53
|
-
|
54
|
-
def retrieve_statement_timeout(connection)
|
55
|
-
connection.execute("SHOW statement_timeout").first["statement_timeout"]
|
28
|
+
def supports_copy?
|
29
|
+
@supports_copy
|
56
30
|
end
|
57
31
|
|
58
|
-
def
|
59
|
-
|
32
|
+
def timeout_set_command
|
33
|
+
"SET statement_timeout = \"#{statement_timeout}\""
|
60
34
|
end
|
61
35
|
|
62
|
-
def
|
63
|
-
"
|
36
|
+
def reset_timeout_command
|
37
|
+
"RESET statement_timeout"
|
64
38
|
end
|
65
39
|
|
66
|
-
|
67
|
-
connection_factory.call
|
68
|
-
end
|
40
|
+
private
|
69
41
|
|
70
|
-
|
71
|
-
|
42
|
+
attr_reader :connection_factory, :statement_timeout
|
43
|
+
|
44
|
+
def cache_facts
|
45
|
+
connection = connection_factory.call
|
46
|
+
@supports_timeout = connection.adapter_name.downcase.to_sym == :postgresql
|
47
|
+
@supports_copy = connection.raw_connection.respond_to?(:copy_data)
|
48
|
+
ensure
|
49
|
+
connection&.close
|
72
50
|
end
|
73
51
|
end
|
74
52
|
end
|
@@ -2,17 +2,24 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class CopyStrategy
|
5
|
-
def initialize(data_generator,
|
5
|
+
def initialize(data_generator, file_adapter)
|
6
6
|
@data_generator = data_generator
|
7
|
-
@
|
7
|
+
@file_adapter = file_adapter
|
8
8
|
end
|
9
9
|
|
10
10
|
def load_batch(row_numbers, connection)
|
11
|
-
|
11
|
+
data = csv_rows(row_numbers, connection)
|
12
|
+
copy(
|
12
13
|
connection: connection,
|
13
14
|
table: table_name_for_copy(connection),
|
14
15
|
columns: columns_for_copy(connection),
|
15
|
-
data:
|
16
|
+
data: data,
|
17
|
+
row_numbers: row_numbers
|
18
|
+
)
|
19
|
+
file_adapter.copy(
|
20
|
+
table: table_name_for_copy(connection),
|
21
|
+
columns: columns_for_copy(connection),
|
22
|
+
data: data,
|
16
23
|
row_numbers: row_numbers
|
17
24
|
)
|
18
25
|
end
|
@@ -27,12 +34,19 @@ module ActiveRecordDataLoader
|
|
27
34
|
|
28
35
|
private
|
29
36
|
|
30
|
-
attr_reader :data_generator, :
|
37
|
+
attr_reader :data_generator, :file_adapter
|
38
|
+
|
39
|
+
def copy(connection:, table:, columns:, data:, row_numbers:)
|
40
|
+
raw_connection = connection.raw_connection
|
41
|
+
raw_connection.copy_data("COPY #{table} (#{columns}) FROM STDIN WITH (FORMAT CSV)") do
|
42
|
+
raw_connection.put_copy_data(data.join("\n"))
|
43
|
+
end
|
44
|
+
end
|
31
45
|
|
32
46
|
def csv_rows(row_numbers, connection)
|
33
47
|
row_numbers.map do |i|
|
34
|
-
data_generator.generate_row(i)
|
35
|
-
end
|
48
|
+
data_generator.generate_row(i)&.map { |d| quote_data(d, connection) }&.join(",")
|
49
|
+
end.compact
|
36
50
|
end
|
37
51
|
|
38
52
|
def table_name_for_copy(connection)
|
@@ -13,16 +13,24 @@ module ActiveRecordDataLoader
|
|
13
13
|
|
14
14
|
def adapter
|
15
15
|
@adapter ||=
|
16
|
-
if
|
17
|
-
require "ffaker"
|
16
|
+
if can_use?("ffaker", "2.1.0")
|
18
17
|
FFakerGemAdapter.new
|
19
|
-
elsif
|
20
|
-
require "faker"
|
18
|
+
elsif can_use?("faker", "1.9.3")
|
21
19
|
FakerGemAdapter.new
|
22
20
|
else
|
23
21
|
NoGemAdapter.new
|
24
22
|
end
|
25
23
|
end
|
24
|
+
|
25
|
+
def can_use?(gem, min_version)
|
26
|
+
gemspec = Gem.loaded_specs[gem]
|
27
|
+
return false unless gemspec.present? && gemspec.version >= Gem::Version.new(min_version)
|
28
|
+
|
29
|
+
require gem
|
30
|
+
true
|
31
|
+
rescue LoadError
|
32
|
+
false
|
33
|
+
end
|
26
34
|
end
|
27
35
|
|
28
36
|
class FFakerGemAdapter
|
@@ -3,13 +3,16 @@
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
module Dsl
|
5
5
|
class Model
|
6
|
-
attr_reader :klass, :columns, :row_count, :polymorphic_associations, :belongs_to_associations
|
6
|
+
attr_reader :klass, :columns, :row_count, :polymorphic_associations, :belongs_to_associations,
|
7
|
+
:raise_on_duplicates_flag
|
7
8
|
|
8
9
|
def initialize(klass:, configuration:)
|
9
10
|
@klass = klass
|
10
11
|
@columns = {}
|
11
12
|
@row_count = configuration.default_row_count
|
12
13
|
@batch_size = configuration.default_batch_size
|
14
|
+
@raise_on_duplicates_flag = configuration.raise_on_duplicates
|
15
|
+
@max_duplicate_retries = configuration.max_duplicate_retries
|
13
16
|
@polymorphic_associations = []
|
14
17
|
@belongs_to_associations = []
|
15
18
|
end
|
@@ -22,6 +25,20 @@ module ActiveRecordDataLoader
|
|
22
25
|
@batch_size = (size || @batch_size)
|
23
26
|
end
|
24
27
|
|
28
|
+
def raise_on_duplicates
|
29
|
+
@raise_on_duplicates_flag = true
|
30
|
+
end
|
31
|
+
|
32
|
+
def do_not_raise_on_duplicates
|
33
|
+
@raise_on_duplicates_flag = false
|
34
|
+
end
|
35
|
+
|
36
|
+
def max_duplicate_retries(retries = nil)
|
37
|
+
return @max_duplicate_retries if retries.nil?
|
38
|
+
|
39
|
+
@max_duplicate_retries = retries
|
40
|
+
end
|
41
|
+
|
25
42
|
def column(name, func)
|
26
43
|
@columns[name.to_sym] = func
|
27
44
|
end
|
@@ -32,7 +49,7 @@ module ActiveRecordDataLoader
|
|
32
49
|
).tap { |a| block.call(a) }
|
33
50
|
end
|
34
51
|
|
35
|
-
def belongs_to(assoc_name, eligible_set:)
|
52
|
+
def belongs_to(assoc_name, eligible_set: nil)
|
36
53
|
@belongs_to_associations << BelongsToAssociation.new(@klass, assoc_name, eligible_set)
|
37
54
|
end
|
38
55
|
end
|
@@ -2,38 +2,46 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class FileOutputAdapter
|
5
|
+
def self.with_output_options(options)
|
6
|
+
adapter = new(options)
|
7
|
+
pre_command = options[:pre_command]
|
8
|
+
adapter.write_command(pre_command) if pre_command
|
9
|
+
yield adapter
|
10
|
+
post_command = options[:post_command]
|
11
|
+
adapter.write_command(post_command) if post_command
|
12
|
+
end
|
13
|
+
|
5
14
|
def initialize(options)
|
6
15
|
@filename = options.fetch(:filename, "active_record_data_loader_script.sql")
|
7
16
|
@file_basename = File.basename(@filename, File.extname(@filename))
|
8
17
|
@path = File.expand_path(File.dirname(@filename))
|
18
|
+
File.open(@filename, File::TRUNC) if File.exist?(@filename)
|
9
19
|
end
|
10
20
|
|
11
|
-
def
|
12
|
-
true
|
13
|
-
end
|
14
|
-
|
15
|
-
def copy(connection:, table:, columns:, data:, row_numbers:)
|
21
|
+
def copy(table:, columns:, data:, row_numbers:)
|
16
22
|
data_filename = data_filename(table, row_numbers)
|
17
23
|
File.open(data_filename, "w") { |f| f.puts(data) }
|
18
|
-
File.open(
|
24
|
+
File.open(filename, "a") do |file|
|
19
25
|
file.puts("\\COPY #{table} (#{columns}) FROM '#{data_filename}' WITH (FORMAT CSV);")
|
20
26
|
end
|
21
27
|
end
|
22
28
|
|
23
|
-
def insert(
|
24
|
-
|
29
|
+
def insert(command)
|
30
|
+
write_command(command)
|
25
31
|
end
|
26
32
|
|
27
|
-
def
|
28
|
-
File.open(
|
33
|
+
def write_command(command)
|
34
|
+
File.open(filename, "a") { |f| f.puts("#{command.gsub("\n", ' ')};") }
|
29
35
|
end
|
30
36
|
|
31
37
|
private
|
32
38
|
|
39
|
+
attr_reader :filename, :path, :file_basename
|
40
|
+
|
33
41
|
def data_filename(table, row_numbers)
|
34
42
|
File.join(
|
35
|
-
|
36
|
-
"#{
|
43
|
+
path,
|
44
|
+
"#{file_basename}_#{table.gsub(/"/, '')}_rows_#{row_numbers[0]}_to_#{row_numbers[-1]}.csv"
|
37
45
|
)
|
38
46
|
end
|
39
47
|
end
|
@@ -1,76 +1,82 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "benchmark"
|
4
|
-
|
5
3
|
module ActiveRecordDataLoader
|
6
4
|
class Loader
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
batch_size:,
|
12
|
-
configuration:
|
13
|
-
)
|
14
|
-
new(
|
15
|
-
logger: configuration.logger,
|
16
|
-
connection_handler: configuration.connection_handler,
|
17
|
-
strategy: strategy_class(configuration.connection_factory).new(
|
18
|
-
data_generator,
|
19
|
-
configuration.output_adapter
|
20
|
-
)
|
21
|
-
).load_data(batch_size, total_rows)
|
22
|
-
end
|
5
|
+
def initialize(configuration, definition)
|
6
|
+
@configuration = configuration
|
7
|
+
@definition = definition
|
8
|
+
end
|
23
9
|
|
24
|
-
|
10
|
+
def load_data
|
11
|
+
ActiveRecordDataLoader::ActiveRecord::PerRowValueCache.clear
|
25
12
|
|
26
|
-
|
27
|
-
|
28
|
-
ActiveRecordDataLoader::CopyStrategy
|
29
|
-
else
|
30
|
-
ActiveRecordDataLoader::BulkInsertStrategy
|
31
|
-
end
|
13
|
+
file_adapter_class.with_output_options(file_adapter_options) do |file_adapter|
|
14
|
+
definition.models.map { |m| load_model(m, file_adapter) }
|
32
15
|
end
|
33
16
|
end
|
34
17
|
|
35
|
-
|
36
|
-
@logger = logger
|
37
|
-
@connection_handler = connection_handler
|
38
|
-
@strategy = strategy
|
39
|
-
end
|
18
|
+
private
|
40
19
|
|
41
|
-
|
42
|
-
batch_count = (total_rows / batch_size.to_f).ceil
|
20
|
+
attr_reader :definition, :configuration
|
43
21
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
22
|
+
def load_model(model, file_adapter)
|
23
|
+
ActiveRecordDataLoader::TableLoader.load_data(
|
24
|
+
batch_size: model.batch_size,
|
25
|
+
total_rows: model.row_count,
|
26
|
+
connection_handler: connection_handler,
|
27
|
+
strategy: strategy_class.new(generator(model), file_adapter),
|
28
|
+
logger: configuration.logger
|
48
29
|
)
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
30
|
+
end
|
31
|
+
|
32
|
+
def generator(model)
|
33
|
+
ActiveRecordDataLoader::ActiveRecord::ModelDataGenerator.new(
|
34
|
+
model: model.klass,
|
35
|
+
column_settings: model.columns,
|
36
|
+
polymorphic_settings: model.polymorphic_associations,
|
37
|
+
belongs_to_settings: model.belongs_to_associations,
|
38
|
+
connection_factory: configuration.connection_factory,
|
39
|
+
raise_on_duplicates: model.raise_on_duplicates_flag,
|
40
|
+
max_duplicate_retries: model.max_duplicate_retries,
|
41
|
+
logger: configuration.logger
|
56
42
|
)
|
57
43
|
end
|
58
44
|
|
59
|
-
|
45
|
+
def file_adapter_class
|
46
|
+
if configuration.output.present?
|
47
|
+
ActiveRecordDataLoader::FileOutputAdapter
|
48
|
+
else
|
49
|
+
ActiveRecordDataLoader::NullOutputAdapter
|
50
|
+
end
|
51
|
+
end
|
60
52
|
|
61
|
-
|
53
|
+
def file_adapter_options
|
54
|
+
timeout_commands =
|
55
|
+
if connection_handler.supports_timeout?
|
56
|
+
{
|
57
|
+
pre_command: connection_handler.timeout_set_command,
|
58
|
+
post_command: connection_handler.reset_timeout_command,
|
59
|
+
}
|
60
|
+
else
|
61
|
+
{}
|
62
|
+
end
|
62
63
|
|
63
|
-
|
64
|
-
|
65
|
-
total_rows.times.each_slice(batch_size).with_index do |row_numbers, i|
|
66
|
-
time = Benchmark.realtime { strategy.load_batch(row_numbers, connection) }
|
64
|
+
timeout_commands.merge(filename: configuration.output)
|
65
|
+
end
|
67
66
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
67
|
+
def strategy_class
|
68
|
+
@strategy_class ||= if connection_handler.supports_copy?
|
69
|
+
ActiveRecordDataLoader::CopyStrategy
|
70
|
+
else
|
71
|
+
ActiveRecordDataLoader::BulkInsertStrategy
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def connection_handler
|
76
|
+
@connection_handler ||= ActiveRecordDataLoader::ConnectionHandler.new(
|
77
|
+
connection_factory: configuration.connection_factory,
|
78
|
+
statement_timeout: configuration.statement_timeout
|
79
|
+
)
|
74
80
|
end
|
75
81
|
end
|
76
82
|
end
|