active_record_data_loader 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/build.yml +51 -0
- data/.github/workflows/gem-push.yml +29 -0
- data/.rubocop.yml +39 -6
- data/CHANGELOG.md +42 -0
- data/Gemfile.lock +68 -66
- data/README.md +91 -8
- data/Rakefile +8 -2
- data/active_record_data_loader.gemspec +8 -5
- data/config/database.yml +9 -0
- data/docker-compose.yml +18 -0
- data/gemfiles/activerecord_6.gemfile +1 -1
- data/lib/active_record_data_loader/active_record/belongs_to_configuration.rb +1 -1
- data/lib/active_record_data_loader/active_record/column_configuration.rb +14 -4
- data/lib/active_record_data_loader/active_record/datetime_value_generator.rb +21 -0
- data/lib/active_record_data_loader/active_record/enum_value_generator.rb +28 -5
- data/lib/active_record_data_loader/active_record/integer_value_generator.rb +2 -2
- data/lib/active_record_data_loader/active_record/model_data_generator.rb +15 -2
- data/lib/active_record_data_loader/active_record/per_row_value_cache.rb +33 -0
- data/lib/active_record_data_loader/active_record/polymorphic_belongs_to_configuration.rb +1 -1
- data/lib/active_record_data_loader/active_record/text_value_generator.rb +1 -1
- data/lib/active_record_data_loader/bulk_insert_strategy.rb +4 -3
- data/lib/active_record_data_loader/configuration.rb +45 -3
- data/lib/active_record_data_loader/connection_handler.rb +74 -0
- data/lib/active_record_data_loader/connection_output_adapter.rb +20 -0
- data/lib/active_record_data_loader/copy_strategy.rb +20 -20
- data/lib/active_record_data_loader/file_output_adapter.rb +40 -0
- data/lib/active_record_data_loader/loader.rb +11 -27
- data/lib/active_record_data_loader/version.rb +1 -1
- data/lib/active_record_data_loader.rb +26 -14
- metadata +60 -11
- data/.travis.yml +0 -23
- data/config/database.yml.travis +0 -7
@@ -9,15 +9,17 @@ module ActiveRecordDataLoader
|
|
9
9
|
integer: IntegerValueGenerator,
|
10
10
|
string: TextValueGenerator,
|
11
11
|
text: TextValueGenerator,
|
12
|
+
datetime: DatetimeValueGenerator,
|
12
13
|
}.freeze
|
13
14
|
|
14
|
-
def config_for(model_class:, ar_column:)
|
15
|
+
def config_for(model_class:, ar_column:, connection_factory:)
|
15
16
|
raise_error_if_not_supported(model_class, ar_column)
|
16
17
|
|
17
18
|
{
|
18
|
-
ar_column.name.to_sym => VALUE_GENERATORS[ar_column
|
19
|
+
ar_column.name.to_sym => VALUE_GENERATORS[column_type(ar_column)].generator_for(
|
19
20
|
model_class: model_class,
|
20
|
-
ar_column: ar_column
|
21
|
+
ar_column: ar_column,
|
22
|
+
connection_factory: connection_factory
|
21
23
|
),
|
22
24
|
}
|
23
25
|
end
|
@@ -25,7 +27,7 @@ module ActiveRecordDataLoader
|
|
25
27
|
def supported?(model_class:, ar_column:)
|
26
28
|
return false if model_class.reflect_on_association(ar_column.name)
|
27
29
|
|
28
|
-
VALUE_GENERATORS.keys.include?(ar_column
|
30
|
+
VALUE_GENERATORS.keys.include?(column_type(ar_column))
|
29
31
|
end
|
30
32
|
|
31
33
|
private
|
@@ -37,6 +39,14 @@ module ActiveRecordDataLoader
|
|
37
39
|
Column '#{ar_column.name}' of type '#{ar_column.type}' in model '#{model_class.name}' not supported"
|
38
40
|
ERROR
|
39
41
|
end
|
42
|
+
|
43
|
+
def column_type(ar_column)
|
44
|
+
if ar_column.type == :string && ar_column.sql_type.to_s.downcase.start_with?("enum")
|
45
|
+
:enum
|
46
|
+
else
|
47
|
+
ar_column.type
|
48
|
+
end
|
49
|
+
end
|
40
50
|
end
|
41
51
|
end
|
42
52
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
module ActiveRecord
|
5
|
+
class DatetimeValueGenerator
|
6
|
+
class << self
|
7
|
+
def generator_for(model_class:, ar_column:, connection_factory: nil)
|
8
|
+
->(row) { timestamp(model_class, row) }
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def timestamp(model, row_number)
|
14
|
+
PerRowValueCache[:datetime].get_or_set(model: model, row: row_number) do
|
15
|
+
Time.now.utc
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -4,21 +4,44 @@ module ActiveRecordDataLoader
|
|
4
4
|
module ActiveRecord
|
5
5
|
class EnumValueGenerator
|
6
6
|
class << self
|
7
|
-
def generator_for(model_class:, ar_column:)
|
8
|
-
values = enum_values_for(
|
7
|
+
def generator_for(model_class:, ar_column:, connection_factory:)
|
8
|
+
values = enum_values_for(ar_column.sql_type, connection_factory)
|
9
9
|
-> { values.sample }
|
10
10
|
end
|
11
11
|
|
12
12
|
private
|
13
13
|
|
14
|
-
def enum_values_for(
|
15
|
-
|
16
|
-
|
14
|
+
def enum_values_for(enum_type, connection_factory)
|
15
|
+
connection = connection_factory.call
|
16
|
+
|
17
|
+
if connection.adapter_name.downcase.to_sym == :postgresql
|
18
|
+
postgres_enum_values_for(connection, enum_type)
|
19
|
+
elsif connection.adapter_name.downcase.to_s.start_with?("mysql")
|
20
|
+
mysql_enum_values_for(enum_type)
|
21
|
+
else
|
22
|
+
[]
|
23
|
+
end
|
24
|
+
ensure
|
25
|
+
connection&.close
|
26
|
+
end
|
27
|
+
|
28
|
+
def postgres_enum_values_for(connection, enum_type)
|
29
|
+
connection
|
17
30
|
.execute("SELECT unnest(enum_range(NULL::#{enum_type}))::text")
|
18
31
|
.map(&:values)
|
19
32
|
.flatten
|
20
33
|
.compact
|
21
34
|
end
|
35
|
+
|
36
|
+
def mysql_enum_values_for(enum_type)
|
37
|
+
enum_type
|
38
|
+
.to_s
|
39
|
+
.downcase
|
40
|
+
.gsub(/\Aenum\(|\)\Z/, "")
|
41
|
+
.split(",")
|
42
|
+
.map(&:strip)
|
43
|
+
.map { |s| s.gsub(/\A'|'\Z/, "") }
|
44
|
+
end
|
22
45
|
end
|
23
46
|
end
|
24
47
|
end
|
@@ -4,8 +4,8 @@ module ActiveRecordDataLoader
|
|
4
4
|
module ActiveRecord
|
5
5
|
class IntegerValueGenerator
|
6
6
|
class << self
|
7
|
-
def generator_for(model_class:, ar_column:)
|
8
|
-
range_limit = [(256**number_of_bytes(ar_column)) / 2 - 1, 1_000_000_000].min
|
7
|
+
def generator_for(model_class:, ar_column:, connection_factory: nil)
|
8
|
+
range_limit = [((256**number_of_bytes(ar_column)) / 2) - 1, 1_000_000_000].min
|
9
9
|
|
10
10
|
-> { rand(0..range_limit) }
|
11
11
|
end
|
@@ -5,12 +5,19 @@ module ActiveRecordDataLoader
|
|
5
5
|
class ModelDataGenerator
|
6
6
|
attr_reader :table
|
7
7
|
|
8
|
-
def initialize(
|
8
|
+
def initialize(
|
9
|
+
model:,
|
10
|
+
column_settings:,
|
11
|
+
connection_factory:,
|
12
|
+
polymorphic_settings: [],
|
13
|
+
belongs_to_settings: []
|
14
|
+
)
|
9
15
|
@model_class = model
|
10
16
|
@table = model.table_name
|
11
17
|
@column_settings = column_settings
|
12
18
|
@polymorphic_settings = polymorphic_settings
|
13
19
|
@belongs_to_settings = belongs_to_settings.map { |s| [s.name, s.query] }.to_h
|
20
|
+
@connection_factory = connection_factory
|
14
21
|
end
|
15
22
|
|
16
23
|
def column_list
|
@@ -50,7 +57,13 @@ module ActiveRecordDataLoader
|
|
50
57
|
.columns_hash
|
51
58
|
.reject { |name| name == @model_class.primary_key }
|
52
59
|
.select { |_, c| ColumnConfiguration.supported?(model_class: @model_class, ar_column: c) }
|
53
|
-
.map
|
60
|
+
.map do |_, c|
|
61
|
+
ColumnConfiguration.config_for(
|
62
|
+
model_class: @model_class,
|
63
|
+
ar_column: c,
|
64
|
+
connection_factory: @connection_factory
|
65
|
+
)
|
66
|
+
end
|
54
67
|
.reduce({}, :merge)
|
55
68
|
end
|
56
69
|
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
module ActiveRecord
|
5
|
+
class PerRowValueCache
|
6
|
+
class << self
|
7
|
+
def [](key)
|
8
|
+
caches[key] ||= new
|
9
|
+
end
|
10
|
+
|
11
|
+
def clear
|
12
|
+
@caches = {}
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def caches
|
18
|
+
@caches ||= clear
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def initialize
|
23
|
+
@row_caches = Hash.new { |hash, key| hash[key] = {} }
|
24
|
+
end
|
25
|
+
|
26
|
+
def get_or_set(model:, row:)
|
27
|
+
@row_caches[model.name].shift if @row_caches[model.name].size > 1
|
28
|
+
|
29
|
+
@row_caches[model.name][row] ||= yield
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -12,7 +12,7 @@ module ActiveRecordDataLoader
|
|
12
12
|
}.freeze
|
13
13
|
|
14
14
|
class << self
|
15
|
-
def generator_for(model_class:, ar_column:)
|
15
|
+
def generator_for(model_class:, ar_column:, connection_factory: nil)
|
16
16
|
scenario = GENERATORS.keys.find { |m| send(m, model_class, ar_column) }
|
17
17
|
generator = GENERATORS.fetch(scenario, -> { SecureRandom.uuid })
|
18
18
|
|
@@ -2,12 +2,13 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class BulkInsertStrategy
|
5
|
-
def initialize(data_generator)
|
5
|
+
def initialize(data_generator, output_adapter)
|
6
6
|
@data_generator = data_generator
|
7
|
+
@output_adapter = output_adapter
|
7
8
|
end
|
8
9
|
|
9
10
|
def load_batch(row_numbers, connection)
|
10
|
-
|
11
|
+
output_adapter.insert(connection: connection, command: <<~SQL)
|
11
12
|
INSERT INTO #{quoted_table_name(connection)} (#{column_list(connection)})
|
12
13
|
VALUES #{values(row_numbers, connection)}
|
13
14
|
SQL
|
@@ -23,7 +24,7 @@ module ActiveRecordDataLoader
|
|
23
24
|
|
24
25
|
private
|
25
26
|
|
26
|
-
attr_reader :data_generator
|
27
|
+
attr_reader :data_generator, :output_adapter
|
27
28
|
|
28
29
|
def quoted_table_name(connection)
|
29
30
|
@quoted_table_name ||= connection.quote_table_name(data_generator.table)
|
@@ -2,27 +2,69 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class Configuration
|
5
|
-
attr_accessor :
|
5
|
+
attr_accessor :connection_factory, :default_batch_size, :default_row_count,
|
6
|
+
:logger, :statement_timeout
|
7
|
+
attr_reader :output
|
6
8
|
|
7
9
|
def initialize(
|
8
10
|
default_batch_size: 100_000,
|
9
11
|
default_row_count: 1,
|
10
12
|
logger: nil,
|
11
|
-
statement_timeout: "2min"
|
13
|
+
statement_timeout: "2min",
|
14
|
+
connection_factory: -> { ::ActiveRecord::Base.connection },
|
15
|
+
output: :connection
|
12
16
|
)
|
13
17
|
@default_batch_size = default_batch_size
|
14
18
|
@default_row_count = default_row_count
|
15
19
|
@logger = logger || default_logger
|
16
20
|
@statement_timeout = statement_timeout
|
21
|
+
@connection_factory = connection_factory
|
22
|
+
self.output = output
|
23
|
+
end
|
24
|
+
|
25
|
+
def output=(output)
|
26
|
+
@output = validate_output(output || { type: :connection })
|
27
|
+
end
|
28
|
+
|
29
|
+
def output_adapter
|
30
|
+
if output.fetch(:type) == :file
|
31
|
+
ActiveRecordDataLoader::FileOutputAdapter.new(output)
|
32
|
+
else
|
33
|
+
ActiveRecordDataLoader::ConnectionOutputAdapter.new
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def connection_handler
|
38
|
+
ActiveRecordDataLoader::ConnectionHandler.new(
|
39
|
+
connection_factory: connection_factory,
|
40
|
+
statement_timeout: statement_timeout,
|
41
|
+
output_adapter: output_adapter
|
42
|
+
)
|
17
43
|
end
|
18
44
|
|
19
45
|
private
|
20
46
|
|
47
|
+
OUTPUT_OPTIONS_BY_TYPE = { connection: %i[type], file: %i[type filename] }.freeze
|
48
|
+
|
49
|
+
def validate_output(output)
|
50
|
+
if %i[file connection].include?(output)
|
51
|
+
{ type: output }
|
52
|
+
elsif output.is_a?(Hash)
|
53
|
+
raise "The output hash must contain a :type key with either :connection or :file" \
|
54
|
+
unless %i[file connection].include?(output[:type])
|
55
|
+
|
56
|
+
output.slice(*OUTPUT_OPTIONS_BY_TYPE[output[:type]])
|
57
|
+
else
|
58
|
+
raise "The output configuration parameter must be either a symbol for :connection or :file, "\
|
59
|
+
"or a hash with more detailed output options."
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
21
63
|
def default_logger
|
22
64
|
if defined?(Rails) && Rails.respond_to?(:logger)
|
23
65
|
Rails.logger
|
24
66
|
else
|
25
|
-
Logger.new(
|
67
|
+
Logger.new($stdout, level: :info)
|
26
68
|
end
|
27
69
|
end
|
28
70
|
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
class ConnectionHandler
|
5
|
+
def initialize(connection_factory:, statement_timeout:, output_adapter:)
|
6
|
+
@connection_factory = connection_factory
|
7
|
+
@statement_timeout = statement_timeout
|
8
|
+
@output_adapter = output_adapter
|
9
|
+
end
|
10
|
+
|
11
|
+
def with_connection
|
12
|
+
connection = open_connection
|
13
|
+
if postgres?(connection)
|
14
|
+
original_timeout = retrieve_statement_timeout(connection)
|
15
|
+
update_statement_timeout(connection, statement_timeout)
|
16
|
+
yield connection
|
17
|
+
update_statement_timeout(connection, original_timeout)
|
18
|
+
else
|
19
|
+
yield connection
|
20
|
+
end
|
21
|
+
ensure
|
22
|
+
connection&.close
|
23
|
+
end
|
24
|
+
|
25
|
+
# When the output is going to a script file, there are two places to update the
|
26
|
+
# statement_timeout. The connection itself needs to have the timeout updated
|
27
|
+
# because we are reading data from the connection to come up with related data
|
28
|
+
# while generating the data. Also, the final SQL script file needs the timeout
|
29
|
+
# updated so that when those \COPY commands are executed they have the higher
|
30
|
+
# timeout as well.
|
31
|
+
def with_statement_timeout_for_output
|
32
|
+
return yield unless output_adapter.needs_timeout_output?
|
33
|
+
|
34
|
+
original_timeout = begin
|
35
|
+
connection = open_connection
|
36
|
+
retrieve_statement_timeout(connection) if postgres?(connection)
|
37
|
+
ensure
|
38
|
+
connection&.close
|
39
|
+
end
|
40
|
+
|
41
|
+
if original_timeout
|
42
|
+
output_adapter.execute(statement_timeout_set_command(statement_timeout))
|
43
|
+
yield
|
44
|
+
output_adapter.execute(statement_timeout_set_command(original_timeout))
|
45
|
+
else
|
46
|
+
yield
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
attr_reader :connection_factory, :statement_timeout, :output_adapter
|
53
|
+
|
54
|
+
def retrieve_statement_timeout(connection)
|
55
|
+
connection.execute("SHOW statement_timeout").first["statement_timeout"]
|
56
|
+
end
|
57
|
+
|
58
|
+
def update_statement_timeout(connection, timeout)
|
59
|
+
connection.execute(statement_timeout_set_command(timeout))
|
60
|
+
end
|
61
|
+
|
62
|
+
def statement_timeout_set_command(timeout)
|
63
|
+
"SET statement_timeout = \"#{timeout}\""
|
64
|
+
end
|
65
|
+
|
66
|
+
def open_connection
|
67
|
+
connection_factory.call
|
68
|
+
end
|
69
|
+
|
70
|
+
def postgres?(connection)
|
71
|
+
connection.adapter_name.downcase.to_sym == :postgresql
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
class ConnectionOutputAdapter
|
5
|
+
def needs_timeout_output?
|
6
|
+
false
|
7
|
+
end
|
8
|
+
|
9
|
+
def copy(connection:, table:, columns:, data:, row_numbers:)
|
10
|
+
raw_connection = connection.raw_connection
|
11
|
+
raw_connection.copy_data("COPY #{table} (#{columns}) FROM STDIN WITH (FORMAT CSV)") do
|
12
|
+
raw_connection.put_copy_data(data.join("\n"))
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def insert(connection:, command:)
|
17
|
+
connection.insert(command)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -2,15 +2,19 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class CopyStrategy
|
5
|
-
def initialize(data_generator)
|
5
|
+
def initialize(data_generator, output_adapter)
|
6
6
|
@data_generator = data_generator
|
7
|
+
@output_adapter = output_adapter
|
7
8
|
end
|
8
9
|
|
9
10
|
def load_batch(row_numbers, connection)
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
11
|
+
output_adapter.copy(
|
12
|
+
connection: connection,
|
13
|
+
table: table_name_for_copy(connection),
|
14
|
+
columns: columns_for_copy(connection),
|
15
|
+
data: csv_rows(row_numbers, connection),
|
16
|
+
row_numbers: row_numbers
|
17
|
+
)
|
14
18
|
end
|
15
19
|
|
16
20
|
def table_name
|
@@ -23,27 +27,23 @@ module ActiveRecordDataLoader
|
|
23
27
|
|
24
28
|
private
|
25
29
|
|
26
|
-
attr_reader :data_generator
|
30
|
+
attr_reader :data_generator, :output_adapter
|
27
31
|
|
28
|
-
def
|
32
|
+
def csv_rows(row_numbers, connection)
|
29
33
|
row_numbers.map do |i|
|
30
34
|
data_generator.generate_row(i).map { |d| quote_data(d, connection) }.join(",")
|
31
|
-
end
|
35
|
+
end
|
32
36
|
end
|
33
37
|
|
34
|
-
def
|
35
|
-
@
|
36
|
-
|
37
|
-
columns = data_generator
|
38
|
-
.column_list
|
39
|
-
.map { |c| connection.quote_column_name(c) }
|
40
|
-
.join(", ")
|
38
|
+
def table_name_for_copy(connection)
|
39
|
+
@table_name_for_copy ||= connection.quote_table_name(data_generator.table)
|
40
|
+
end
|
41
41
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
42
|
+
def columns_for_copy(connection)
|
43
|
+
@columns_for_copy ||= data_generator
|
44
|
+
.column_list
|
45
|
+
.map { |c| connection.quote_column_name(c) }
|
46
|
+
.join(", ")
|
47
47
|
end
|
48
48
|
|
49
49
|
def quote_data(data, connection)
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
class FileOutputAdapter
|
5
|
+
def initialize(options)
|
6
|
+
@filename = options.fetch(:filename, "active_record_data_loader_script.sql")
|
7
|
+
@file_basename = File.basename(@filename, File.extname(@filename))
|
8
|
+
@path = File.expand_path(File.dirname(@filename))
|
9
|
+
end
|
10
|
+
|
11
|
+
def needs_timeout_output?
|
12
|
+
true
|
13
|
+
end
|
14
|
+
|
15
|
+
def copy(connection:, table:, columns:, data:, row_numbers:)
|
16
|
+
data_filename = data_filename(table, row_numbers)
|
17
|
+
File.open(data_filename, "w") { |f| f.puts(data) }
|
18
|
+
File.open(@filename, "a") do |file|
|
19
|
+
file.puts("\\COPY #{table} (#{columns}) FROM '#{data_filename}' WITH (FORMAT CSV);")
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def insert(connection:, command:)
|
24
|
+
execute(command)
|
25
|
+
end
|
26
|
+
|
27
|
+
def execute(command)
|
28
|
+
File.open(@filename, "a") { |f| f.puts("#{command.gsub("\n", ' ')};") }
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def data_filename(table, row_numbers)
|
34
|
+
File.join(
|
35
|
+
@path,
|
36
|
+
"#{@file_basename}_#{table.gsub(/"/, '')}_rows_#{row_numbers[0]}_to_#{row_numbers[-1]}.csv"
|
37
|
+
)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -13,15 +13,18 @@ module ActiveRecordDataLoader
|
|
13
13
|
)
|
14
14
|
new(
|
15
15
|
logger: configuration.logger,
|
16
|
-
|
17
|
-
strategy: strategy_class.new(
|
16
|
+
connection_handler: configuration.connection_handler,
|
17
|
+
strategy: strategy_class(configuration.connection_factory).new(
|
18
|
+
data_generator,
|
19
|
+
configuration.output_adapter
|
20
|
+
)
|
18
21
|
).load_data(batch_size, total_rows)
|
19
22
|
end
|
20
23
|
|
21
24
|
private
|
22
25
|
|
23
|
-
def strategy_class
|
24
|
-
if
|
26
|
+
def strategy_class(connection_factory)
|
27
|
+
if connection_factory.call.raw_connection.respond_to?(:copy_data)
|
25
28
|
ActiveRecordDataLoader::CopyStrategy
|
26
29
|
else
|
27
30
|
ActiveRecordDataLoader::BulkInsertStrategy
|
@@ -29,10 +32,10 @@ module ActiveRecordDataLoader
|
|
29
32
|
end
|
30
33
|
end
|
31
34
|
|
32
|
-
def initialize(logger:,
|
35
|
+
def initialize(logger:, connection_handler:, strategy:)
|
33
36
|
@logger = logger
|
37
|
+
@connection_handler = connection_handler
|
34
38
|
@strategy = strategy
|
35
|
-
@statement_timeout = statement_timeout
|
36
39
|
end
|
37
40
|
|
38
41
|
def load_data(batch_size, total_rows)
|
@@ -55,10 +58,10 @@ module ActiveRecordDataLoader
|
|
55
58
|
|
56
59
|
private
|
57
60
|
|
58
|
-
attr_reader :strategy, :
|
61
|
+
attr_reader :strategy, :connection_handler, :logger
|
59
62
|
|
60
63
|
def load_in_batches(batch_size, total_rows, batch_count)
|
61
|
-
with_connection do |connection|
|
64
|
+
connection_handler.with_connection do |connection|
|
62
65
|
total_rows.times.each_slice(batch_size).with_index do |row_numbers, i|
|
63
66
|
time = Benchmark.realtime { strategy.load_batch(row_numbers, connection) }
|
64
67
|
|
@@ -69,24 +72,5 @@ module ActiveRecordDataLoader
|
|
69
72
|
end
|
70
73
|
end
|
71
74
|
end
|
72
|
-
|
73
|
-
def with_connection
|
74
|
-
if ::ActiveRecord::Base.connection.adapter_name.downcase.to_sym == :postgresql
|
75
|
-
original_timeout = retrieve_statement_timeout
|
76
|
-
update_statement_timeout(statement_timeout)
|
77
|
-
yield ::ActiveRecord::Base.connection
|
78
|
-
update_statement_timeout(original_timeout)
|
79
|
-
else
|
80
|
-
yield ::ActiveRecord::Base.connection
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
def retrieve_statement_timeout
|
85
|
-
::ActiveRecord::Base.connection.execute("SHOW statement_timeout").first["statement_timeout"]
|
86
|
-
end
|
87
|
-
|
88
|
-
def update_statement_timeout(timeout)
|
89
|
-
::ActiveRecord::Base.connection.execute("SET statement_timeout = \"#{timeout}\"")
|
90
|
-
end
|
91
75
|
end
|
92
76
|
end
|
@@ -3,10 +3,13 @@
|
|
3
3
|
require "active_record_data_loader/version"
|
4
4
|
require "active_record"
|
5
5
|
require "active_record_data_loader/configuration"
|
6
|
+
require "active_record_data_loader/connection_handler"
|
6
7
|
require "active_record_data_loader/data_faker"
|
8
|
+
require "active_record_data_loader/active_record/per_row_value_cache"
|
7
9
|
require "active_record_data_loader/active_record/integer_value_generator"
|
8
10
|
require "active_record_data_loader/active_record/text_value_generator"
|
9
11
|
require "active_record_data_loader/active_record/enum_value_generator"
|
12
|
+
require "active_record_data_loader/active_record/datetime_value_generator"
|
10
13
|
require "active_record_data_loader/active_record/column_configuration"
|
11
14
|
require "active_record_data_loader/active_record/belongs_to_configuration"
|
12
15
|
require "active_record_data_loader/active_record/polymorphic_belongs_to_configuration"
|
@@ -15,6 +18,8 @@ require "active_record_data_loader/dsl/belongs_to_association"
|
|
15
18
|
require "active_record_data_loader/dsl/polymorphic_association"
|
16
19
|
require "active_record_data_loader/dsl/model"
|
17
20
|
require "active_record_data_loader/dsl/definition"
|
21
|
+
require "active_record_data_loader/connection_output_adapter"
|
22
|
+
require "active_record_data_loader/file_output_adapter"
|
18
23
|
require "active_record_data_loader/copy_strategy"
|
19
24
|
require "active_record_data_loader/bulk_insert_strategy"
|
20
25
|
require "active_record_data_loader/loader"
|
@@ -22,7 +27,7 @@ require "active_record_data_loader/loader"
|
|
22
27
|
module ActiveRecordDataLoader
|
23
28
|
def self.define(config = ActiveRecordDataLoader.configuration, &block)
|
24
29
|
LoaderProxy.new(
|
25
|
-
|
30
|
+
config,
|
26
31
|
ActiveRecordDataLoader::Dsl::Definition.new(config).tap { |l| l.instance_eval(&block) }
|
27
32
|
)
|
28
33
|
end
|
@@ -42,25 +47,32 @@ module ActiveRecordDataLoader
|
|
42
47
|
end
|
43
48
|
|
44
49
|
def load_data
|
45
|
-
|
46
|
-
generator = ActiveRecordDataLoader::ActiveRecord::ModelDataGenerator.new(
|
47
|
-
model: m.klass,
|
48
|
-
column_settings: m.columns,
|
49
|
-
polymorphic_settings: m.polymorphic_associations,
|
50
|
-
belongs_to_settings: m.belongs_to_associations
|
51
|
-
)
|
50
|
+
ActiveRecordDataLoader::ActiveRecord::PerRowValueCache.clear
|
52
51
|
|
53
|
-
|
54
|
-
|
55
|
-
batch_size: m.batch_size,
|
56
|
-
total_rows: m.row_count,
|
57
|
-
configuration: configuration
|
58
|
-
)
|
52
|
+
configuration.connection_handler.with_statement_timeout_for_output do
|
53
|
+
definition.models.map { |m| load_model(m) }
|
59
54
|
end
|
60
55
|
end
|
61
56
|
|
62
57
|
private
|
63
58
|
|
64
59
|
attr_reader :definition, :configuration
|
60
|
+
|
61
|
+
def load_model(model)
|
62
|
+
generator = ActiveRecordDataLoader::ActiveRecord::ModelDataGenerator.new(
|
63
|
+
model: model.klass,
|
64
|
+
column_settings: model.columns,
|
65
|
+
polymorphic_settings: model.polymorphic_associations,
|
66
|
+
belongs_to_settings: model.belongs_to_associations,
|
67
|
+
connection_factory: configuration.connection_factory
|
68
|
+
)
|
69
|
+
|
70
|
+
ActiveRecordDataLoader::Loader.load_data(
|
71
|
+
data_generator: generator,
|
72
|
+
batch_size: model.batch_size,
|
73
|
+
total_rows: model.row_count,
|
74
|
+
configuration: configuration
|
75
|
+
)
|
76
|
+
end
|
65
77
|
end
|
66
78
|
end
|