active_record_data_loader 1.0.0 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/build.yml +51 -0
- data/.github/workflows/gem-push.yml +29 -0
- data/.rubocop.yml +39 -6
- data/CHANGELOG.md +42 -0
- data/Gemfile.lock +68 -66
- data/README.md +91 -8
- data/Rakefile +8 -2
- data/active_record_data_loader.gemspec +8 -5
- data/config/database.yml +9 -0
- data/docker-compose.yml +18 -0
- data/gemfiles/activerecord_6.gemfile +1 -1
- data/lib/active_record_data_loader/active_record/belongs_to_configuration.rb +1 -1
- data/lib/active_record_data_loader/active_record/column_configuration.rb +14 -4
- data/lib/active_record_data_loader/active_record/datetime_value_generator.rb +21 -0
- data/lib/active_record_data_loader/active_record/enum_value_generator.rb +28 -5
- data/lib/active_record_data_loader/active_record/integer_value_generator.rb +2 -2
- data/lib/active_record_data_loader/active_record/model_data_generator.rb +15 -2
- data/lib/active_record_data_loader/active_record/per_row_value_cache.rb +33 -0
- data/lib/active_record_data_loader/active_record/polymorphic_belongs_to_configuration.rb +1 -1
- data/lib/active_record_data_loader/active_record/text_value_generator.rb +1 -1
- data/lib/active_record_data_loader/bulk_insert_strategy.rb +4 -3
- data/lib/active_record_data_loader/configuration.rb +45 -3
- data/lib/active_record_data_loader/connection_handler.rb +74 -0
- data/lib/active_record_data_loader/connection_output_adapter.rb +20 -0
- data/lib/active_record_data_loader/copy_strategy.rb +20 -20
- data/lib/active_record_data_loader/file_output_adapter.rb +40 -0
- data/lib/active_record_data_loader/loader.rb +11 -27
- data/lib/active_record_data_loader/version.rb +1 -1
- data/lib/active_record_data_loader.rb +26 -14
- metadata +60 -11
- data/.travis.yml +0 -23
- data/config/database.yml.travis +0 -7
@@ -9,15 +9,17 @@ module ActiveRecordDataLoader
|
|
9
9
|
integer: IntegerValueGenerator,
|
10
10
|
string: TextValueGenerator,
|
11
11
|
text: TextValueGenerator,
|
12
|
+
datetime: DatetimeValueGenerator,
|
12
13
|
}.freeze
|
13
14
|
|
14
|
-
def config_for(model_class:, ar_column:)
|
15
|
+
def config_for(model_class:, ar_column:, connection_factory:)
|
15
16
|
raise_error_if_not_supported(model_class, ar_column)
|
16
17
|
|
17
18
|
{
|
18
|
-
ar_column.name.to_sym => VALUE_GENERATORS[ar_column
|
19
|
+
ar_column.name.to_sym => VALUE_GENERATORS[column_type(ar_column)].generator_for(
|
19
20
|
model_class: model_class,
|
20
|
-
ar_column: ar_column
|
21
|
+
ar_column: ar_column,
|
22
|
+
connection_factory: connection_factory
|
21
23
|
),
|
22
24
|
}
|
23
25
|
end
|
@@ -25,7 +27,7 @@ module ActiveRecordDataLoader
|
|
25
27
|
def supported?(model_class:, ar_column:)
|
26
28
|
return false if model_class.reflect_on_association(ar_column.name)
|
27
29
|
|
28
|
-
VALUE_GENERATORS.keys.include?(ar_column
|
30
|
+
VALUE_GENERATORS.keys.include?(column_type(ar_column))
|
29
31
|
end
|
30
32
|
|
31
33
|
private
|
@@ -37,6 +39,14 @@ module ActiveRecordDataLoader
|
|
37
39
|
Column '#{ar_column.name}' of type '#{ar_column.type}' in model '#{model_class.name}' not supported"
|
38
40
|
ERROR
|
39
41
|
end
|
42
|
+
|
43
|
+
def column_type(ar_column)
|
44
|
+
if ar_column.type == :string && ar_column.sql_type.to_s.downcase.start_with?("enum")
|
45
|
+
:enum
|
46
|
+
else
|
47
|
+
ar_column.type
|
48
|
+
end
|
49
|
+
end
|
40
50
|
end
|
41
51
|
end
|
42
52
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
module ActiveRecord
|
5
|
+
class DatetimeValueGenerator
|
6
|
+
class << self
|
7
|
+
def generator_for(model_class:, ar_column:, connection_factory: nil)
|
8
|
+
->(row) { timestamp(model_class, row) }
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def timestamp(model, row_number)
|
14
|
+
PerRowValueCache[:datetime].get_or_set(model: model, row: row_number) do
|
15
|
+
Time.now.utc
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -4,21 +4,44 @@ module ActiveRecordDataLoader
|
|
4
4
|
module ActiveRecord
|
5
5
|
class EnumValueGenerator
|
6
6
|
class << self
|
7
|
-
def generator_for(model_class:, ar_column:)
|
8
|
-
values = enum_values_for(
|
7
|
+
def generator_for(model_class:, ar_column:, connection_factory:)
|
8
|
+
values = enum_values_for(ar_column.sql_type, connection_factory)
|
9
9
|
-> { values.sample }
|
10
10
|
end
|
11
11
|
|
12
12
|
private
|
13
13
|
|
14
|
-
def enum_values_for(
|
15
|
-
|
16
|
-
|
14
|
+
def enum_values_for(enum_type, connection_factory)
|
15
|
+
connection = connection_factory.call
|
16
|
+
|
17
|
+
if connection.adapter_name.downcase.to_sym == :postgresql
|
18
|
+
postgres_enum_values_for(connection, enum_type)
|
19
|
+
elsif connection.adapter_name.downcase.to_s.start_with?("mysql")
|
20
|
+
mysql_enum_values_for(enum_type)
|
21
|
+
else
|
22
|
+
[]
|
23
|
+
end
|
24
|
+
ensure
|
25
|
+
connection&.close
|
26
|
+
end
|
27
|
+
|
28
|
+
def postgres_enum_values_for(connection, enum_type)
|
29
|
+
connection
|
17
30
|
.execute("SELECT unnest(enum_range(NULL::#{enum_type}))::text")
|
18
31
|
.map(&:values)
|
19
32
|
.flatten
|
20
33
|
.compact
|
21
34
|
end
|
35
|
+
|
36
|
+
def mysql_enum_values_for(enum_type)
|
37
|
+
enum_type
|
38
|
+
.to_s
|
39
|
+
.downcase
|
40
|
+
.gsub(/\Aenum\(|\)\Z/, "")
|
41
|
+
.split(",")
|
42
|
+
.map(&:strip)
|
43
|
+
.map { |s| s.gsub(/\A'|'\Z/, "") }
|
44
|
+
end
|
22
45
|
end
|
23
46
|
end
|
24
47
|
end
|
@@ -4,8 +4,8 @@ module ActiveRecordDataLoader
|
|
4
4
|
module ActiveRecord
|
5
5
|
class IntegerValueGenerator
|
6
6
|
class << self
|
7
|
-
def generator_for(model_class:, ar_column:)
|
8
|
-
range_limit = [(256**number_of_bytes(ar_column)) / 2 - 1, 1_000_000_000].min
|
7
|
+
def generator_for(model_class:, ar_column:, connection_factory: nil)
|
8
|
+
range_limit = [((256**number_of_bytes(ar_column)) / 2) - 1, 1_000_000_000].min
|
9
9
|
|
10
10
|
-> { rand(0..range_limit) }
|
11
11
|
end
|
@@ -5,12 +5,19 @@ module ActiveRecordDataLoader
|
|
5
5
|
class ModelDataGenerator
|
6
6
|
attr_reader :table
|
7
7
|
|
8
|
-
def initialize(
|
8
|
+
def initialize(
|
9
|
+
model:,
|
10
|
+
column_settings:,
|
11
|
+
connection_factory:,
|
12
|
+
polymorphic_settings: [],
|
13
|
+
belongs_to_settings: []
|
14
|
+
)
|
9
15
|
@model_class = model
|
10
16
|
@table = model.table_name
|
11
17
|
@column_settings = column_settings
|
12
18
|
@polymorphic_settings = polymorphic_settings
|
13
19
|
@belongs_to_settings = belongs_to_settings.map { |s| [s.name, s.query] }.to_h
|
20
|
+
@connection_factory = connection_factory
|
14
21
|
end
|
15
22
|
|
16
23
|
def column_list
|
@@ -50,7 +57,13 @@ module ActiveRecordDataLoader
|
|
50
57
|
.columns_hash
|
51
58
|
.reject { |name| name == @model_class.primary_key }
|
52
59
|
.select { |_, c| ColumnConfiguration.supported?(model_class: @model_class, ar_column: c) }
|
53
|
-
.map
|
60
|
+
.map do |_, c|
|
61
|
+
ColumnConfiguration.config_for(
|
62
|
+
model_class: @model_class,
|
63
|
+
ar_column: c,
|
64
|
+
connection_factory: @connection_factory
|
65
|
+
)
|
66
|
+
end
|
54
67
|
.reduce({}, :merge)
|
55
68
|
end
|
56
69
|
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
module ActiveRecord
|
5
|
+
class PerRowValueCache
|
6
|
+
class << self
|
7
|
+
def [](key)
|
8
|
+
caches[key] ||= new
|
9
|
+
end
|
10
|
+
|
11
|
+
def clear
|
12
|
+
@caches = {}
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def caches
|
18
|
+
@caches ||= clear
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def initialize
|
23
|
+
@row_caches = Hash.new { |hash, key| hash[key] = {} }
|
24
|
+
end
|
25
|
+
|
26
|
+
def get_or_set(model:, row:)
|
27
|
+
@row_caches[model.name].shift if @row_caches[model.name].size > 1
|
28
|
+
|
29
|
+
@row_caches[model.name][row] ||= yield
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -12,7 +12,7 @@ module ActiveRecordDataLoader
|
|
12
12
|
}.freeze
|
13
13
|
|
14
14
|
class << self
|
15
|
-
def generator_for(model_class:, ar_column:)
|
15
|
+
def generator_for(model_class:, ar_column:, connection_factory: nil)
|
16
16
|
scenario = GENERATORS.keys.find { |m| send(m, model_class, ar_column) }
|
17
17
|
generator = GENERATORS.fetch(scenario, -> { SecureRandom.uuid })
|
18
18
|
|
@@ -2,12 +2,13 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class BulkInsertStrategy
|
5
|
-
def initialize(data_generator)
|
5
|
+
def initialize(data_generator, output_adapter)
|
6
6
|
@data_generator = data_generator
|
7
|
+
@output_adapter = output_adapter
|
7
8
|
end
|
8
9
|
|
9
10
|
def load_batch(row_numbers, connection)
|
10
|
-
|
11
|
+
output_adapter.insert(connection: connection, command: <<~SQL)
|
11
12
|
INSERT INTO #{quoted_table_name(connection)} (#{column_list(connection)})
|
12
13
|
VALUES #{values(row_numbers, connection)}
|
13
14
|
SQL
|
@@ -23,7 +24,7 @@ module ActiveRecordDataLoader
|
|
23
24
|
|
24
25
|
private
|
25
26
|
|
26
|
-
attr_reader :data_generator
|
27
|
+
attr_reader :data_generator, :output_adapter
|
27
28
|
|
28
29
|
def quoted_table_name(connection)
|
29
30
|
@quoted_table_name ||= connection.quote_table_name(data_generator.table)
|
@@ -2,27 +2,69 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class Configuration
|
5
|
-
attr_accessor :
|
5
|
+
attr_accessor :connection_factory, :default_batch_size, :default_row_count,
|
6
|
+
:logger, :statement_timeout
|
7
|
+
attr_reader :output
|
6
8
|
|
7
9
|
def initialize(
|
8
10
|
default_batch_size: 100_000,
|
9
11
|
default_row_count: 1,
|
10
12
|
logger: nil,
|
11
|
-
statement_timeout: "2min"
|
13
|
+
statement_timeout: "2min",
|
14
|
+
connection_factory: -> { ::ActiveRecord::Base.connection },
|
15
|
+
output: :connection
|
12
16
|
)
|
13
17
|
@default_batch_size = default_batch_size
|
14
18
|
@default_row_count = default_row_count
|
15
19
|
@logger = logger || default_logger
|
16
20
|
@statement_timeout = statement_timeout
|
21
|
+
@connection_factory = connection_factory
|
22
|
+
self.output = output
|
23
|
+
end
|
24
|
+
|
25
|
+
def output=(output)
|
26
|
+
@output = validate_output(output || { type: :connection })
|
27
|
+
end
|
28
|
+
|
29
|
+
def output_adapter
|
30
|
+
if output.fetch(:type) == :file
|
31
|
+
ActiveRecordDataLoader::FileOutputAdapter.new(output)
|
32
|
+
else
|
33
|
+
ActiveRecordDataLoader::ConnectionOutputAdapter.new
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def connection_handler
|
38
|
+
ActiveRecordDataLoader::ConnectionHandler.new(
|
39
|
+
connection_factory: connection_factory,
|
40
|
+
statement_timeout: statement_timeout,
|
41
|
+
output_adapter: output_adapter
|
42
|
+
)
|
17
43
|
end
|
18
44
|
|
19
45
|
private
|
20
46
|
|
47
|
+
OUTPUT_OPTIONS_BY_TYPE = { connection: %i[type], file: %i[type filename] }.freeze
|
48
|
+
|
49
|
+
def validate_output(output)
|
50
|
+
if %i[file connection].include?(output)
|
51
|
+
{ type: output }
|
52
|
+
elsif output.is_a?(Hash)
|
53
|
+
raise "The output hash must contain a :type key with either :connection or :file" \
|
54
|
+
unless %i[file connection].include?(output[:type])
|
55
|
+
|
56
|
+
output.slice(*OUTPUT_OPTIONS_BY_TYPE[output[:type]])
|
57
|
+
else
|
58
|
+
raise "The output configuration parameter must be either a symbol for :connection or :file, "\
|
59
|
+
"or a hash with more detailed output options."
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
21
63
|
def default_logger
|
22
64
|
if defined?(Rails) && Rails.respond_to?(:logger)
|
23
65
|
Rails.logger
|
24
66
|
else
|
25
|
-
Logger.new(
|
67
|
+
Logger.new($stdout, level: :info)
|
26
68
|
end
|
27
69
|
end
|
28
70
|
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
class ConnectionHandler
|
5
|
+
def initialize(connection_factory:, statement_timeout:, output_adapter:)
|
6
|
+
@connection_factory = connection_factory
|
7
|
+
@statement_timeout = statement_timeout
|
8
|
+
@output_adapter = output_adapter
|
9
|
+
end
|
10
|
+
|
11
|
+
def with_connection
|
12
|
+
connection = open_connection
|
13
|
+
if postgres?(connection)
|
14
|
+
original_timeout = retrieve_statement_timeout(connection)
|
15
|
+
update_statement_timeout(connection, statement_timeout)
|
16
|
+
yield connection
|
17
|
+
update_statement_timeout(connection, original_timeout)
|
18
|
+
else
|
19
|
+
yield connection
|
20
|
+
end
|
21
|
+
ensure
|
22
|
+
connection&.close
|
23
|
+
end
|
24
|
+
|
25
|
+
# When the output is going to a script file, there are two places to update the
|
26
|
+
# statement_timeout. The connection itself needs to have the timeout updated
|
27
|
+
# because we are reading data from the connection to come up with related data
|
28
|
+
# while generating the data. Also, the final SQL script file needs the timeout
|
29
|
+
# updated so that when those \COPY commands are executed they have the higher
|
30
|
+
# timeout as well.
|
31
|
+
def with_statement_timeout_for_output
|
32
|
+
return yield unless output_adapter.needs_timeout_output?
|
33
|
+
|
34
|
+
original_timeout = begin
|
35
|
+
connection = open_connection
|
36
|
+
retrieve_statement_timeout(connection) if postgres?(connection)
|
37
|
+
ensure
|
38
|
+
connection&.close
|
39
|
+
end
|
40
|
+
|
41
|
+
if original_timeout
|
42
|
+
output_adapter.execute(statement_timeout_set_command(statement_timeout))
|
43
|
+
yield
|
44
|
+
output_adapter.execute(statement_timeout_set_command(original_timeout))
|
45
|
+
else
|
46
|
+
yield
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
attr_reader :connection_factory, :statement_timeout, :output_adapter
|
53
|
+
|
54
|
+
def retrieve_statement_timeout(connection)
|
55
|
+
connection.execute("SHOW statement_timeout").first["statement_timeout"]
|
56
|
+
end
|
57
|
+
|
58
|
+
def update_statement_timeout(connection, timeout)
|
59
|
+
connection.execute(statement_timeout_set_command(timeout))
|
60
|
+
end
|
61
|
+
|
62
|
+
def statement_timeout_set_command(timeout)
|
63
|
+
"SET statement_timeout = \"#{timeout}\""
|
64
|
+
end
|
65
|
+
|
66
|
+
def open_connection
|
67
|
+
connection_factory.call
|
68
|
+
end
|
69
|
+
|
70
|
+
def postgres?(connection)
|
71
|
+
connection.adapter_name.downcase.to_sym == :postgresql
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
class ConnectionOutputAdapter
|
5
|
+
def needs_timeout_output?
|
6
|
+
false
|
7
|
+
end
|
8
|
+
|
9
|
+
def copy(connection:, table:, columns:, data:, row_numbers:)
|
10
|
+
raw_connection = connection.raw_connection
|
11
|
+
raw_connection.copy_data("COPY #{table} (#{columns}) FROM STDIN WITH (FORMAT CSV)") do
|
12
|
+
raw_connection.put_copy_data(data.join("\n"))
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def insert(connection:, command:)
|
17
|
+
connection.insert(command)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -2,15 +2,19 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class CopyStrategy
|
5
|
-
def initialize(data_generator)
|
5
|
+
def initialize(data_generator, output_adapter)
|
6
6
|
@data_generator = data_generator
|
7
|
+
@output_adapter = output_adapter
|
7
8
|
end
|
8
9
|
|
9
10
|
def load_batch(row_numbers, connection)
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
11
|
+
output_adapter.copy(
|
12
|
+
connection: connection,
|
13
|
+
table: table_name_for_copy(connection),
|
14
|
+
columns: columns_for_copy(connection),
|
15
|
+
data: csv_rows(row_numbers, connection),
|
16
|
+
row_numbers: row_numbers
|
17
|
+
)
|
14
18
|
end
|
15
19
|
|
16
20
|
def table_name
|
@@ -23,27 +27,23 @@ module ActiveRecordDataLoader
|
|
23
27
|
|
24
28
|
private
|
25
29
|
|
26
|
-
attr_reader :data_generator
|
30
|
+
attr_reader :data_generator, :output_adapter
|
27
31
|
|
28
|
-
def
|
32
|
+
def csv_rows(row_numbers, connection)
|
29
33
|
row_numbers.map do |i|
|
30
34
|
data_generator.generate_row(i).map { |d| quote_data(d, connection) }.join(",")
|
31
|
-
end
|
35
|
+
end
|
32
36
|
end
|
33
37
|
|
34
|
-
def
|
35
|
-
@
|
36
|
-
|
37
|
-
columns = data_generator
|
38
|
-
.column_list
|
39
|
-
.map { |c| connection.quote_column_name(c) }
|
40
|
-
.join(", ")
|
38
|
+
def table_name_for_copy(connection)
|
39
|
+
@table_name_for_copy ||= connection.quote_table_name(data_generator.table)
|
40
|
+
end
|
41
41
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
42
|
+
def columns_for_copy(connection)
|
43
|
+
@columns_for_copy ||= data_generator
|
44
|
+
.column_list
|
45
|
+
.map { |c| connection.quote_column_name(c) }
|
46
|
+
.join(", ")
|
47
47
|
end
|
48
48
|
|
49
49
|
def quote_data(data, connection)
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
class FileOutputAdapter
|
5
|
+
def initialize(options)
|
6
|
+
@filename = options.fetch(:filename, "active_record_data_loader_script.sql")
|
7
|
+
@file_basename = File.basename(@filename, File.extname(@filename))
|
8
|
+
@path = File.expand_path(File.dirname(@filename))
|
9
|
+
end
|
10
|
+
|
11
|
+
def needs_timeout_output?
|
12
|
+
true
|
13
|
+
end
|
14
|
+
|
15
|
+
def copy(connection:, table:, columns:, data:, row_numbers:)
|
16
|
+
data_filename = data_filename(table, row_numbers)
|
17
|
+
File.open(data_filename, "w") { |f| f.puts(data) }
|
18
|
+
File.open(@filename, "a") do |file|
|
19
|
+
file.puts("\\COPY #{table} (#{columns}) FROM '#{data_filename}' WITH (FORMAT CSV);")
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def insert(connection:, command:)
|
24
|
+
execute(command)
|
25
|
+
end
|
26
|
+
|
27
|
+
def execute(command)
|
28
|
+
File.open(@filename, "a") { |f| f.puts("#{command.gsub("\n", ' ')};") }
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def data_filename(table, row_numbers)
|
34
|
+
File.join(
|
35
|
+
@path,
|
36
|
+
"#{@file_basename}_#{table.gsub(/"/, '')}_rows_#{row_numbers[0]}_to_#{row_numbers[-1]}.csv"
|
37
|
+
)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -13,15 +13,18 @@ module ActiveRecordDataLoader
|
|
13
13
|
)
|
14
14
|
new(
|
15
15
|
logger: configuration.logger,
|
16
|
-
|
17
|
-
strategy: strategy_class.new(
|
16
|
+
connection_handler: configuration.connection_handler,
|
17
|
+
strategy: strategy_class(configuration.connection_factory).new(
|
18
|
+
data_generator,
|
19
|
+
configuration.output_adapter
|
20
|
+
)
|
18
21
|
).load_data(batch_size, total_rows)
|
19
22
|
end
|
20
23
|
|
21
24
|
private
|
22
25
|
|
23
|
-
def strategy_class
|
24
|
-
if
|
26
|
+
def strategy_class(connection_factory)
|
27
|
+
if connection_factory.call.raw_connection.respond_to?(:copy_data)
|
25
28
|
ActiveRecordDataLoader::CopyStrategy
|
26
29
|
else
|
27
30
|
ActiveRecordDataLoader::BulkInsertStrategy
|
@@ -29,10 +32,10 @@ module ActiveRecordDataLoader
|
|
29
32
|
end
|
30
33
|
end
|
31
34
|
|
32
|
-
def initialize(logger:,
|
35
|
+
def initialize(logger:, connection_handler:, strategy:)
|
33
36
|
@logger = logger
|
37
|
+
@connection_handler = connection_handler
|
34
38
|
@strategy = strategy
|
35
|
-
@statement_timeout = statement_timeout
|
36
39
|
end
|
37
40
|
|
38
41
|
def load_data(batch_size, total_rows)
|
@@ -55,10 +58,10 @@ module ActiveRecordDataLoader
|
|
55
58
|
|
56
59
|
private
|
57
60
|
|
58
|
-
attr_reader :strategy, :
|
61
|
+
attr_reader :strategy, :connection_handler, :logger
|
59
62
|
|
60
63
|
def load_in_batches(batch_size, total_rows, batch_count)
|
61
|
-
with_connection do |connection|
|
64
|
+
connection_handler.with_connection do |connection|
|
62
65
|
total_rows.times.each_slice(batch_size).with_index do |row_numbers, i|
|
63
66
|
time = Benchmark.realtime { strategy.load_batch(row_numbers, connection) }
|
64
67
|
|
@@ -69,24 +72,5 @@ module ActiveRecordDataLoader
|
|
69
72
|
end
|
70
73
|
end
|
71
74
|
end
|
72
|
-
|
73
|
-
def with_connection
|
74
|
-
if ::ActiveRecord::Base.connection.adapter_name.downcase.to_sym == :postgresql
|
75
|
-
original_timeout = retrieve_statement_timeout
|
76
|
-
update_statement_timeout(statement_timeout)
|
77
|
-
yield ::ActiveRecord::Base.connection
|
78
|
-
update_statement_timeout(original_timeout)
|
79
|
-
else
|
80
|
-
yield ::ActiveRecord::Base.connection
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
def retrieve_statement_timeout
|
85
|
-
::ActiveRecord::Base.connection.execute("SHOW statement_timeout").first["statement_timeout"]
|
86
|
-
end
|
87
|
-
|
88
|
-
def update_statement_timeout(timeout)
|
89
|
-
::ActiveRecord::Base.connection.execute("SET statement_timeout = \"#{timeout}\"")
|
90
|
-
end
|
91
75
|
end
|
92
76
|
end
|
@@ -3,10 +3,13 @@
|
|
3
3
|
require "active_record_data_loader/version"
|
4
4
|
require "active_record"
|
5
5
|
require "active_record_data_loader/configuration"
|
6
|
+
require "active_record_data_loader/connection_handler"
|
6
7
|
require "active_record_data_loader/data_faker"
|
8
|
+
require "active_record_data_loader/active_record/per_row_value_cache"
|
7
9
|
require "active_record_data_loader/active_record/integer_value_generator"
|
8
10
|
require "active_record_data_loader/active_record/text_value_generator"
|
9
11
|
require "active_record_data_loader/active_record/enum_value_generator"
|
12
|
+
require "active_record_data_loader/active_record/datetime_value_generator"
|
10
13
|
require "active_record_data_loader/active_record/column_configuration"
|
11
14
|
require "active_record_data_loader/active_record/belongs_to_configuration"
|
12
15
|
require "active_record_data_loader/active_record/polymorphic_belongs_to_configuration"
|
@@ -15,6 +18,8 @@ require "active_record_data_loader/dsl/belongs_to_association"
|
|
15
18
|
require "active_record_data_loader/dsl/polymorphic_association"
|
16
19
|
require "active_record_data_loader/dsl/model"
|
17
20
|
require "active_record_data_loader/dsl/definition"
|
21
|
+
require "active_record_data_loader/connection_output_adapter"
|
22
|
+
require "active_record_data_loader/file_output_adapter"
|
18
23
|
require "active_record_data_loader/copy_strategy"
|
19
24
|
require "active_record_data_loader/bulk_insert_strategy"
|
20
25
|
require "active_record_data_loader/loader"
|
@@ -22,7 +27,7 @@ require "active_record_data_loader/loader"
|
|
22
27
|
module ActiveRecordDataLoader
|
23
28
|
def self.define(config = ActiveRecordDataLoader.configuration, &block)
|
24
29
|
LoaderProxy.new(
|
25
|
-
|
30
|
+
config,
|
26
31
|
ActiveRecordDataLoader::Dsl::Definition.new(config).tap { |l| l.instance_eval(&block) }
|
27
32
|
)
|
28
33
|
end
|
@@ -42,25 +47,32 @@ module ActiveRecordDataLoader
|
|
42
47
|
end
|
43
48
|
|
44
49
|
def load_data
|
45
|
-
|
46
|
-
generator = ActiveRecordDataLoader::ActiveRecord::ModelDataGenerator.new(
|
47
|
-
model: m.klass,
|
48
|
-
column_settings: m.columns,
|
49
|
-
polymorphic_settings: m.polymorphic_associations,
|
50
|
-
belongs_to_settings: m.belongs_to_associations
|
51
|
-
)
|
50
|
+
ActiveRecordDataLoader::ActiveRecord::PerRowValueCache.clear
|
52
51
|
|
53
|
-
|
54
|
-
|
55
|
-
batch_size: m.batch_size,
|
56
|
-
total_rows: m.row_count,
|
57
|
-
configuration: configuration
|
58
|
-
)
|
52
|
+
configuration.connection_handler.with_statement_timeout_for_output do
|
53
|
+
definition.models.map { |m| load_model(m) }
|
59
54
|
end
|
60
55
|
end
|
61
56
|
|
62
57
|
private
|
63
58
|
|
64
59
|
attr_reader :definition, :configuration
|
60
|
+
|
61
|
+
def load_model(model)
|
62
|
+
generator = ActiveRecordDataLoader::ActiveRecord::ModelDataGenerator.new(
|
63
|
+
model: model.klass,
|
64
|
+
column_settings: model.columns,
|
65
|
+
polymorphic_settings: model.polymorphic_associations,
|
66
|
+
belongs_to_settings: model.belongs_to_associations,
|
67
|
+
connection_factory: configuration.connection_factory
|
68
|
+
)
|
69
|
+
|
70
|
+
ActiveRecordDataLoader::Loader.load_data(
|
71
|
+
data_generator: generator,
|
72
|
+
batch_size: model.batch_size,
|
73
|
+
total_rows: model.row_count,
|
74
|
+
configuration: configuration
|
75
|
+
)
|
76
|
+
end
|
65
77
|
end
|
66
78
|
end
|