active_record_data_loader 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/codeql-analysis.yml +70 -0
  3. data/.rubocop.yml +8 -2
  4. data/CHANGELOG.md +9 -0
  5. data/CODE_OF_CONDUCT.md +2 -2
  6. data/Gemfile.lock +24 -24
  7. data/README.md +88 -18
  8. data/active_record_data_loader.gemspec +1 -1
  9. data/lib/active_record_data_loader/active_record/{belongs_to_configuration.rb → belongs_to_data_provider.rb} +7 -6
  10. data/lib/active_record_data_loader/active_record/{column_configuration.rb → column_data_provider.rb} +2 -2
  11. data/lib/active_record_data_loader/active_record/list.rb +35 -0
  12. data/lib/active_record_data_loader/active_record/model_data_generator.rb +60 -5
  13. data/lib/active_record_data_loader/active_record/{polymorphic_belongs_to_configuration.rb → polymorphic_belongs_to_data_provider.rb} +11 -6
  14. data/lib/active_record_data_loader/active_record/unique_index_tracker.rb +67 -0
  15. data/lib/active_record_data_loader/bulk_insert_strategy.rb +16 -9
  16. data/lib/active_record_data_loader/configuration.rb +13 -30
  17. data/lib/active_record_data_loader/connection_handler.rb +23 -45
  18. data/lib/active_record_data_loader/copy_strategy.rb +21 -7
  19. data/lib/active_record_data_loader/data_faker.rb +12 -4
  20. data/lib/active_record_data_loader/dsl/model.rb +19 -2
  21. data/lib/active_record_data_loader/errors.rb +5 -0
  22. data/lib/active_record_data_loader/file_output_adapter.rb +20 -12
  23. data/lib/active_record_data_loader/loader.rb +61 -55
  24. data/lib/active_record_data_loader/null_output_adapter.rb +15 -0
  25. data/lib/active_record_data_loader/table_loader.rb +59 -0
  26. data/lib/active_record_data_loader/version.rb +1 -1
  27. data/lib/active_record_data_loader.rb +9 -41
  28. metadata +12 -7
  29. data/lib/active_record_data_loader/connection_output_adapter.rb +0 -20
@@ -2,20 +2,21 @@
2
2
 
3
3
  module ActiveRecordDataLoader
4
4
  module ActiveRecord
5
- class PolymorphicBelongsToConfiguration
6
- def self.config_for(polymorphic_settings:)
5
+ class PolymorphicBelongsToDataProvider
6
+ def self.provider_for(polymorphic_settings:, strategy: :random)
7
7
  ar_association = polymorphic_settings.model_class.reflect_on_association(
8
8
  polymorphic_settings.name
9
9
  )
10
10
  raise "#{name} only supports polymorphic associations" unless ar_association.polymorphic?
11
11
 
12
- new(polymorphic_settings, ar_association).polymorphic_config
12
+ new(polymorphic_settings, ar_association, strategy).polymorphic_config
13
13
  end
14
14
 
15
- def initialize(settings, ar_association)
15
+ def initialize(settings, ar_association, strategy)
16
16
  @settings = settings
17
17
  @ar_association = ar_association
18
18
  @model_count = settings.weighted_models.size
19
+ @strategy = strategy
19
20
  end
20
21
 
21
22
  def polymorphic_config
@@ -32,19 +33,23 @@ module ActiveRecordDataLoader
32
33
  end
33
34
 
34
35
  def foreign_key(row_number)
35
- possible_values[row_number % @model_count][1].sample
36
+ possible_values[row_number % @model_count][1].next
36
37
  end
37
38
 
38
39
  def possible_values
39
40
  @possible_values ||= begin
40
41
  values = @settings.models.keys.map do |klass|
41
- [klass.name, base_query(klass).pluck(klass.primary_key).to_a]
42
+ [klass.name, values_query(klass)]
42
43
  end.to_h
43
44
 
44
45
  @settings.weighted_models.map { |klass| [klass.name, values[klass.name]] }
45
46
  end
46
47
  end
47
48
 
49
+ def values_query(klass)
50
+ List.for(base_query(klass).pluck(klass.primary_key), strategy: @strategy)
51
+ end
52
+
48
53
  def base_query(klass)
49
54
  if @settings.queries[klass].respond_to?(:call)
50
55
  @settings.queries[klass].call.all
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordDataLoader
4
+ module ActiveRecord
5
+ class UniqueIndexTracker
6
+ Index = Struct.new(:name, :columns, :column_indexes, keyword_init: true)
7
+
8
+ def initialize(model:, connection_factory:)
9
+ @model = model
10
+ @table = model.table_name
11
+ @unique_indexes = []
12
+ @unique_values_used = {}
13
+ find_unique_indexes(connection_factory)
14
+ end
15
+
16
+ def map_indexed_columns(column_list)
17
+ @unique_indexes = @raw_unique_indexes.map do |index|
18
+ @unique_values_used[index.name] = Set.new
19
+ columns = index.columns.map(&:to_sym)
20
+ Index.new(
21
+ name: index.name,
22
+ columns: columns,
23
+ column_indexes: columns.map { |c| column_list.find_index(c) }
24
+ )
25
+ end
26
+ end
27
+
28
+ def repeating_unique_values?(row)
29
+ @unique_indexes.map do |index|
30
+ values = index.column_indexes.map { |i| row[i] }
31
+ @unique_values_used.fetch(index.name).include?(values)
32
+ end.any?
33
+ end
34
+
35
+ def capture_unique_values(row)
36
+ return unless row.present?
37
+
38
+ @unique_indexes.each do |index|
39
+ values = index.column_indexes.map { |i| row[i] }
40
+ @unique_values_used.fetch(index.name) << values
41
+ end
42
+ row
43
+ end
44
+
45
+ def contained_in_index?(ar_column)
46
+ target_column = if @model.reflect_on_association(ar_column.name)&.belongs_to?
47
+ ar_column.join_foreign_key.to_sym
48
+ else
49
+ ar_column.name.to_sym
50
+ end
51
+
52
+ @raw_unique_indexes.flat_map { |i| i.columns.map(&:to_sym) }.include?(target_column)
53
+ end
54
+
55
+ private
56
+
57
+ attr_reader :table
58
+
59
+ def find_unique_indexes(connection_factory)
60
+ connection = connection_factory.call
61
+ @raw_unique_indexes = connection.indexes(table).select(&:unique)
62
+ ensure
63
+ connection&.close
64
+ end
65
+ end
66
+ end
67
+ end
@@ -2,16 +2,18 @@
2
2
 
3
3
  module ActiveRecordDataLoader
4
4
  class BulkInsertStrategy
5
- def initialize(data_generator, output_adapter)
5
+ def initialize(data_generator, file_adapter)
6
6
  @data_generator = data_generator
7
- @output_adapter = output_adapter
7
+ @file_adapter = file_adapter
8
8
  end
9
9
 
10
10
  def load_batch(row_numbers, connection)
11
- output_adapter.insert(connection: connection, command: <<~SQL)
11
+ command = <<~SQL
12
12
  INSERT INTO #{quoted_table_name(connection)} (#{column_list(connection)})
13
13
  VALUES #{values(row_numbers, connection)}
14
14
  SQL
15
+ insert(connection: connection, command: command)
16
+ file_adapter.insert(command)
15
17
  end
16
18
 
17
19
  def table_name
@@ -24,7 +26,11 @@ module ActiveRecordDataLoader
24
26
 
25
27
  private
26
28
 
27
- attr_reader :data_generator, :output_adapter
29
+ attr_reader :data_generator, :file_adapter
30
+
31
+ def insert(connection:, command:)
32
+ connection.insert(command)
33
+ end
28
34
 
29
35
  def quoted_table_name(connection)
30
36
  @quoted_table_name ||= connection.quote_table_name(data_generator.table)
@@ -39,15 +45,16 @@ module ActiveRecordDataLoader
39
45
 
40
46
  def values(row_numbers, connection)
41
47
  row_numbers
42
- .map { |i| "(#{row_values(i, connection)})" }
48
+ .map { |i| row_values(i, connection) }
49
+ .compact
43
50
  .join(",")
44
51
  end
45
52
 
46
53
  def row_values(row_number, connection)
47
- data_generator
48
- .generate_row(row_number)
49
- .map { |v| connection.quote(v) }
50
- .join(",")
54
+ row = data_generator.generate_row(row_number)
55
+ return unless row.present?
56
+
57
+ "(#{row.map { |v| connection.quote(v) }.join(',')})"
51
58
  end
52
59
  end
53
60
  end
@@ -3,7 +3,7 @@
3
3
  module ActiveRecordDataLoader
4
4
  class Configuration
5
5
  attr_accessor :connection_factory, :default_batch_size, :default_row_count,
6
- :logger, :statement_timeout
6
+ :logger, :max_duplicate_retries, :raise_on_duplicates, :statement_timeout
7
7
  attr_reader :output
8
8
 
9
9
  def initialize(
@@ -12,51 +12,34 @@ module ActiveRecordDataLoader
12
12
  logger: nil,
13
13
  statement_timeout: "2min",
14
14
  connection_factory: -> { ::ActiveRecord::Base.connection },
15
- output: :connection
15
+ raise_on_duplicates: false,
16
+ max_duplicate_retries: 5,
17
+ output: nil
16
18
  )
17
19
  @default_batch_size = default_batch_size
18
20
  @default_row_count = default_row_count
19
21
  @logger = logger || default_logger
20
22
  @statement_timeout = statement_timeout
21
23
  @connection_factory = connection_factory
24
+ @raise_on_duplicates = raise_on_duplicates
25
+ @max_duplicate_retries = max_duplicate_retries
22
26
  self.output = output
23
27
  end
24
28
 
25
29
  def output=(output)
26
- @output = validate_output(output || { type: :connection })
27
- end
28
-
29
- def output_adapter
30
- if output.fetch(:type) == :file
31
- ActiveRecordDataLoader::FileOutputAdapter.new(output)
32
- else
33
- ActiveRecordDataLoader::ConnectionOutputAdapter.new
34
- end
35
- end
36
-
37
- def connection_handler
38
- ActiveRecordDataLoader::ConnectionHandler.new(
39
- connection_factory: connection_factory,
40
- statement_timeout: statement_timeout,
41
- output_adapter: output_adapter
42
- )
30
+ @output = validate_output(output)
43
31
  end
44
32
 
45
33
  private
46
34
 
47
- OUTPUT_OPTIONS_BY_TYPE = { connection: %i[type], file: %i[type filename] }.freeze
48
-
49
35
  def validate_output(output)
50
- if %i[file connection].include?(output)
51
- { type: output }
52
- elsif output.is_a?(Hash)
53
- raise "The output hash must contain a :type key with either :connection or :file" \
54
- unless %i[file connection].include?(output[:type])
55
-
56
- output.slice(*OUTPUT_OPTIONS_BY_TYPE[output[:type]])
36
+ if output.to_s.blank?
37
+ nil
38
+ elsif output.is_a?(String)
39
+ output
57
40
  else
58
- raise "The output configuration parameter must be either a symbol for :connection or :file, "\
59
- "or a hash with more detailed output options."
41
+ raise "The output configuration parameter must be a filename meant to be the "\
42
+ "target for the SQL script"
60
43
  end
61
44
  end
62
45
 
@@ -2,19 +2,18 @@
2
2
 
3
3
  module ActiveRecordDataLoader
4
4
  class ConnectionHandler
5
- def initialize(connection_factory:, statement_timeout:, output_adapter:)
5
+ def initialize(connection_factory:, statement_timeout:)
6
6
  @connection_factory = connection_factory
7
7
  @statement_timeout = statement_timeout
8
- @output_adapter = output_adapter
8
+ cache_facts
9
9
  end
10
10
 
11
11
  def with_connection
12
- connection = open_connection
13
- if postgres?(connection)
14
- original_timeout = retrieve_statement_timeout(connection)
15
- update_statement_timeout(connection, statement_timeout)
12
+ connection = connection_factory.call
13
+ if supports_timeout?
14
+ connection.execute(timeout_set_command)
16
15
  yield connection
17
- update_statement_timeout(connection, original_timeout)
16
+ connection.execute(reset_timeout_command)
18
17
  else
19
18
  yield connection
20
19
  end
@@ -22,53 +21,32 @@ module ActiveRecordDataLoader
22
21
  connection&.close
23
22
  end
24
23
 
25
- # When the output is going to a script file, there are two places to update the
26
- # statement_timeout. The connection itself needs to have the timeout updated
27
- # because we are reading data from the connection to come up with related data
28
- # while generating the data. Also, the final SQL script file needs the timeout
29
- # updated so that when those \COPY commands are executed they have the higher
30
- # timeout as well.
31
- def with_statement_timeout_for_output
32
- return yield unless output_adapter.needs_timeout_output?
33
-
34
- original_timeout = begin
35
- connection = open_connection
36
- retrieve_statement_timeout(connection) if postgres?(connection)
37
- ensure
38
- connection&.close
39
- end
40
-
41
- if original_timeout
42
- output_adapter.execute(statement_timeout_set_command(statement_timeout))
43
- yield
44
- output_adapter.execute(statement_timeout_set_command(original_timeout))
45
- else
46
- yield
47
- end
24
+ def supports_timeout?
25
+ @supports_timeout
48
26
  end
49
27
 
50
- private
51
-
52
- attr_reader :connection_factory, :statement_timeout, :output_adapter
53
-
54
- def retrieve_statement_timeout(connection)
55
- connection.execute("SHOW statement_timeout").first["statement_timeout"]
28
+ def supports_copy?
29
+ @supports_copy
56
30
  end
57
31
 
58
- def update_statement_timeout(connection, timeout)
59
- connection.execute(statement_timeout_set_command(timeout))
32
+ def timeout_set_command
33
+ "SET statement_timeout = \"#{statement_timeout}\""
60
34
  end
61
35
 
62
- def statement_timeout_set_command(timeout)
63
- "SET statement_timeout = \"#{timeout}\""
36
+ def reset_timeout_command
37
+ "RESET statement_timeout"
64
38
  end
65
39
 
66
- def open_connection
67
- connection_factory.call
68
- end
40
+ private
69
41
 
70
- def postgres?(connection)
71
- connection.adapter_name.downcase.to_sym == :postgresql
42
+ attr_reader :connection_factory, :statement_timeout
43
+
44
+ def cache_facts
45
+ connection = connection_factory.call
46
+ @supports_timeout = connection.adapter_name.downcase.to_sym == :postgresql
47
+ @supports_copy = connection.raw_connection.respond_to?(:copy_data)
48
+ ensure
49
+ connection&.close
72
50
  end
73
51
  end
74
52
  end
@@ -2,17 +2,24 @@
2
2
 
3
3
  module ActiveRecordDataLoader
4
4
  class CopyStrategy
5
- def initialize(data_generator, output_adapter)
5
+ def initialize(data_generator, file_adapter)
6
6
  @data_generator = data_generator
7
- @output_adapter = output_adapter
7
+ @file_adapter = file_adapter
8
8
  end
9
9
 
10
10
  def load_batch(row_numbers, connection)
11
- output_adapter.copy(
11
+ data = csv_rows(row_numbers, connection)
12
+ copy(
12
13
  connection: connection,
13
14
  table: table_name_for_copy(connection),
14
15
  columns: columns_for_copy(connection),
15
- data: csv_rows(row_numbers, connection),
16
+ data: data,
17
+ row_numbers: row_numbers
18
+ )
19
+ file_adapter.copy(
20
+ table: table_name_for_copy(connection),
21
+ columns: columns_for_copy(connection),
22
+ data: data,
16
23
  row_numbers: row_numbers
17
24
  )
18
25
  end
@@ -27,12 +34,19 @@ module ActiveRecordDataLoader
27
34
 
28
35
  private
29
36
 
30
- attr_reader :data_generator, :output_adapter
37
+ attr_reader :data_generator, :file_adapter
38
+
39
+ def copy(connection:, table:, columns:, data:, row_numbers:)
40
+ raw_connection = connection.raw_connection
41
+ raw_connection.copy_data("COPY #{table} (#{columns}) FROM STDIN WITH (FORMAT CSV)") do
42
+ raw_connection.put_copy_data(data.join("\n"))
43
+ end
44
+ end
31
45
 
32
46
  def csv_rows(row_numbers, connection)
33
47
  row_numbers.map do |i|
34
- data_generator.generate_row(i).map { |d| quote_data(d, connection) }.join(",")
35
- end
48
+ data_generator.generate_row(i)&.map { |d| quote_data(d, connection) }&.join(",")
49
+ end.compact
36
50
  end
37
51
 
38
52
  def table_name_for_copy(connection)
@@ -13,16 +13,24 @@ module ActiveRecordDataLoader
13
13
 
14
14
  def adapter
15
15
  @adapter ||=
16
- if Gem.loaded_specs.key?("ffaker")
17
- require "ffaker"
16
+ if can_use?("ffaker", "2.1.0")
18
17
  FFakerGemAdapter.new
19
- elsif Gem.loaded_specs.key?("faker")
20
- require "faker"
18
+ elsif can_use?("faker", "1.9.3")
21
19
  FakerGemAdapter.new
22
20
  else
23
21
  NoGemAdapter.new
24
22
  end
25
23
  end
24
+
25
+ def can_use?(gem, min_version)
26
+ gemspec = Gem.loaded_specs[gem]
27
+ return false unless gemspec.present? && gemspec.version >= Gem::Version.new(min_version)
28
+
29
+ require gem
30
+ true
31
+ rescue LoadError
32
+ false
33
+ end
26
34
  end
27
35
 
28
36
  class FFakerGemAdapter
@@ -3,13 +3,16 @@
3
3
  module ActiveRecordDataLoader
4
4
  module Dsl
5
5
  class Model
6
- attr_reader :klass, :columns, :row_count, :polymorphic_associations, :belongs_to_associations
6
+ attr_reader :klass, :columns, :row_count, :polymorphic_associations, :belongs_to_associations,
7
+ :raise_on_duplicates_flag
7
8
 
8
9
  def initialize(klass:, configuration:)
9
10
  @klass = klass
10
11
  @columns = {}
11
12
  @row_count = configuration.default_row_count
12
13
  @batch_size = configuration.default_batch_size
14
+ @raise_on_duplicates_flag = configuration.raise_on_duplicates
15
+ @max_duplicate_retries = configuration.max_duplicate_retries
13
16
  @polymorphic_associations = []
14
17
  @belongs_to_associations = []
15
18
  end
@@ -22,6 +25,20 @@ module ActiveRecordDataLoader
22
25
  @batch_size = (size || @batch_size)
23
26
  end
24
27
 
28
+ def raise_on_duplicates
29
+ @raise_on_duplicates_flag = true
30
+ end
31
+
32
+ def do_not_raise_on_duplicates
33
+ @raise_on_duplicates_flag = false
34
+ end
35
+
36
+ def max_duplicate_retries(retries = nil)
37
+ return @max_duplicate_retries if retries.nil?
38
+
39
+ @max_duplicate_retries = retries
40
+ end
41
+
25
42
  def column(name, func)
26
43
  @columns[name.to_sym] = func
27
44
  end
@@ -32,7 +49,7 @@ module ActiveRecordDataLoader
32
49
  ).tap { |a| block.call(a) }
33
50
  end
34
51
 
35
- def belongs_to(assoc_name, eligible_set:)
52
+ def belongs_to(assoc_name, eligible_set: nil)
36
53
  @belongs_to_associations << BelongsToAssociation.new(@klass, assoc_name, eligible_set)
37
54
  end
38
55
  end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordDataLoader
4
+ class DuplicateKeyError < StandardError; end
5
+ end
@@ -2,38 +2,46 @@
2
2
 
3
3
  module ActiveRecordDataLoader
4
4
  class FileOutputAdapter
5
+ def self.with_output_options(options)
6
+ adapter = new(options)
7
+ pre_command = options[:pre_command]
8
+ adapter.write_command(pre_command) if pre_command
9
+ yield adapter
10
+ post_command = options[:post_command]
11
+ adapter.write_command(post_command) if post_command
12
+ end
13
+
5
14
  def initialize(options)
6
15
  @filename = options.fetch(:filename, "active_record_data_loader_script.sql")
7
16
  @file_basename = File.basename(@filename, File.extname(@filename))
8
17
  @path = File.expand_path(File.dirname(@filename))
18
+ File.open(@filename, File::TRUNC) if File.exist?(@filename)
9
19
  end
10
20
 
11
- def needs_timeout_output?
12
- true
13
- end
14
-
15
- def copy(connection:, table:, columns:, data:, row_numbers:)
21
+ def copy(table:, columns:, data:, row_numbers:)
16
22
  data_filename = data_filename(table, row_numbers)
17
23
  File.open(data_filename, "w") { |f| f.puts(data) }
18
- File.open(@filename, "a") do |file|
24
+ File.open(filename, "a") do |file|
19
25
  file.puts("\\COPY #{table} (#{columns}) FROM '#{data_filename}' WITH (FORMAT CSV);")
20
26
  end
21
27
  end
22
28
 
23
- def insert(connection:, command:)
24
- execute(command)
29
+ def insert(command)
30
+ write_command(command)
25
31
  end
26
32
 
27
- def execute(command)
28
- File.open(@filename, "a") { |f| f.puts("#{command.gsub("\n", ' ')};") }
33
+ def write_command(command)
34
+ File.open(filename, "a") { |f| f.puts("#{command.gsub("\n", ' ')};") }
29
35
  end
30
36
 
31
37
  private
32
38
 
39
+ attr_reader :filename, :path, :file_basename
40
+
33
41
  def data_filename(table, row_numbers)
34
42
  File.join(
35
- @path,
36
- "#{@file_basename}_#{table.gsub(/"/, '')}_rows_#{row_numbers[0]}_to_#{row_numbers[-1]}.csv"
43
+ path,
44
+ "#{file_basename}_#{table.gsub(/"/, '')}_rows_#{row_numbers[0]}_to_#{row_numbers[-1]}.csv"
37
45
  )
38
46
  end
39
47
  end
@@ -1,76 +1,82 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "benchmark"
4
-
5
3
  module ActiveRecordDataLoader
6
4
  class Loader
7
- class << self
8
- def load_data(
9
- data_generator:,
10
- total_rows:,
11
- batch_size:,
12
- configuration:
13
- )
14
- new(
15
- logger: configuration.logger,
16
- connection_handler: configuration.connection_handler,
17
- strategy: strategy_class(configuration.connection_factory).new(
18
- data_generator,
19
- configuration.output_adapter
20
- )
21
- ).load_data(batch_size, total_rows)
22
- end
5
+ def initialize(configuration, definition)
6
+ @configuration = configuration
7
+ @definition = definition
8
+ end
23
9
 
24
- private
10
+ def load_data
11
+ ActiveRecordDataLoader::ActiveRecord::PerRowValueCache.clear
25
12
 
26
- def strategy_class(connection_factory)
27
- if connection_factory.call.raw_connection.respond_to?(:copy_data)
28
- ActiveRecordDataLoader::CopyStrategy
29
- else
30
- ActiveRecordDataLoader::BulkInsertStrategy
31
- end
13
+ file_adapter_class.with_output_options(file_adapter_options) do |file_adapter|
14
+ definition.models.map { |m| load_model(m, file_adapter) }
32
15
  end
33
16
  end
34
17
 
35
- def initialize(logger:, connection_handler:, strategy:)
36
- @logger = logger
37
- @connection_handler = connection_handler
38
- @strategy = strategy
39
- end
18
+ private
40
19
 
41
- def load_data(batch_size, total_rows)
42
- batch_count = (total_rows / batch_size.to_f).ceil
20
+ attr_reader :definition, :configuration
43
21
 
44
- logger.info(
45
- "[ActiveRecordDataLoader] "\
46
- "Loading #{total_rows} row(s) into '#{strategy.table_name}' via #{strategy.name}. "\
47
- "#{batch_size} row(s) per batch, #{batch_count} batch(es)."
22
+ def load_model(model, file_adapter)
23
+ ActiveRecordDataLoader::TableLoader.load_data(
24
+ batch_size: model.batch_size,
25
+ total_rows: model.row_count,
26
+ connection_handler: connection_handler,
27
+ strategy: strategy_class.new(generator(model), file_adapter),
28
+ logger: configuration.logger
48
29
  )
49
- total_time = Benchmark.realtime do
50
- load_in_batches(batch_size, total_rows, batch_count)
51
- end
52
- logger.info(
53
- "[ActiveRecordDataLoader] "\
54
- "Completed loading #{total_rows} row(s) into '#{strategy.table_name}' "\
55
- "in #{total_time} seconds."
30
+ end
31
+
32
+ def generator(model)
33
+ ActiveRecordDataLoader::ActiveRecord::ModelDataGenerator.new(
34
+ model: model.klass,
35
+ column_settings: model.columns,
36
+ polymorphic_settings: model.polymorphic_associations,
37
+ belongs_to_settings: model.belongs_to_associations,
38
+ connection_factory: configuration.connection_factory,
39
+ raise_on_duplicates: model.raise_on_duplicates_flag,
40
+ max_duplicate_retries: model.max_duplicate_retries,
41
+ logger: configuration.logger
56
42
  )
57
43
  end
58
44
 
59
- private
45
+ def file_adapter_class
46
+ if configuration.output.present?
47
+ ActiveRecordDataLoader::FileOutputAdapter
48
+ else
49
+ ActiveRecordDataLoader::NullOutputAdapter
50
+ end
51
+ end
60
52
 
61
- attr_reader :strategy, :connection_handler, :logger
53
+ def file_adapter_options
54
+ timeout_commands =
55
+ if connection_handler.supports_timeout?
56
+ {
57
+ pre_command: connection_handler.timeout_set_command,
58
+ post_command: connection_handler.reset_timeout_command,
59
+ }
60
+ else
61
+ {}
62
+ end
62
63
 
63
- def load_in_batches(batch_size, total_rows, batch_count)
64
- connection_handler.with_connection do |connection|
65
- total_rows.times.each_slice(batch_size).with_index do |row_numbers, i|
66
- time = Benchmark.realtime { strategy.load_batch(row_numbers, connection) }
64
+ timeout_commands.merge(filename: configuration.output)
65
+ end
67
66
 
68
- logger.debug(
69
- "[ActiveRecordDataLoader] "\
70
- "Completed batch #{i + 1}/#{batch_count}, #{row_numbers.count} row(s) in #{time} seconds"
71
- )
72
- end
73
- end
67
+ def strategy_class
68
+ @strategy_class ||= if connection_handler.supports_copy?
69
+ ActiveRecordDataLoader::CopyStrategy
70
+ else
71
+ ActiveRecordDataLoader::BulkInsertStrategy
72
+ end
73
+ end
74
+
75
+ def connection_handler
76
+ @connection_handler ||= ActiveRecordDataLoader::ConnectionHandler.new(
77
+ connection_factory: configuration.connection_factory,
78
+ statement_timeout: configuration.statement_timeout
79
+ )
74
80
  end
75
81
  end
76
82
  end