active_record_data_loader 1.2.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (29) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/codeql-analysis.yml +70 -0
  3. data/.rubocop.yml +8 -2
  4. data/CHANGELOG.md +9 -0
  5. data/CODE_OF_CONDUCT.md +2 -2
  6. data/Gemfile.lock +24 -24
  7. data/README.md +88 -18
  8. data/active_record_data_loader.gemspec +1 -1
  9. data/lib/active_record_data_loader/active_record/{belongs_to_configuration.rb → belongs_to_data_provider.rb} +7 -6
  10. data/lib/active_record_data_loader/active_record/{column_configuration.rb → column_data_provider.rb} +2 -2
  11. data/lib/active_record_data_loader/active_record/list.rb +35 -0
  12. data/lib/active_record_data_loader/active_record/model_data_generator.rb +60 -5
  13. data/lib/active_record_data_loader/active_record/{polymorphic_belongs_to_configuration.rb → polymorphic_belongs_to_data_provider.rb} +11 -6
  14. data/lib/active_record_data_loader/active_record/unique_index_tracker.rb +67 -0
  15. data/lib/active_record_data_loader/bulk_insert_strategy.rb +16 -9
  16. data/lib/active_record_data_loader/configuration.rb +13 -30
  17. data/lib/active_record_data_loader/connection_handler.rb +23 -45
  18. data/lib/active_record_data_loader/copy_strategy.rb +21 -7
  19. data/lib/active_record_data_loader/data_faker.rb +12 -4
  20. data/lib/active_record_data_loader/dsl/model.rb +19 -2
  21. data/lib/active_record_data_loader/errors.rb +5 -0
  22. data/lib/active_record_data_loader/file_output_adapter.rb +20 -12
  23. data/lib/active_record_data_loader/loader.rb +61 -55
  24. data/lib/active_record_data_loader/null_output_adapter.rb +15 -0
  25. data/lib/active_record_data_loader/table_loader.rb +59 -0
  26. data/lib/active_record_data_loader/version.rb +1 -1
  27. data/lib/active_record_data_loader.rb +9 -41
  28. metadata +12 -7
  29. data/lib/active_record_data_loader/connection_output_adapter.rb +0 -20
@@ -2,20 +2,21 @@
2
2
 
3
3
  module ActiveRecordDataLoader
4
4
  module ActiveRecord
5
- class PolymorphicBelongsToConfiguration
6
- def self.config_for(polymorphic_settings:)
5
+ class PolymorphicBelongsToDataProvider
6
+ def self.provider_for(polymorphic_settings:, strategy: :random)
7
7
  ar_association = polymorphic_settings.model_class.reflect_on_association(
8
8
  polymorphic_settings.name
9
9
  )
10
10
  raise "#{name} only supports polymorphic associations" unless ar_association.polymorphic?
11
11
 
12
- new(polymorphic_settings, ar_association).polymorphic_config
12
+ new(polymorphic_settings, ar_association, strategy).polymorphic_config
13
13
  end
14
14
 
15
- def initialize(settings, ar_association)
15
+ def initialize(settings, ar_association, strategy)
16
16
  @settings = settings
17
17
  @ar_association = ar_association
18
18
  @model_count = settings.weighted_models.size
19
+ @strategy = strategy
19
20
  end
20
21
 
21
22
  def polymorphic_config
@@ -32,19 +33,23 @@ module ActiveRecordDataLoader
32
33
  end
33
34
 
34
35
  def foreign_key(row_number)
35
- possible_values[row_number % @model_count][1].sample
36
+ possible_values[row_number % @model_count][1].next
36
37
  end
37
38
 
38
39
  def possible_values
39
40
  @possible_values ||= begin
40
41
  values = @settings.models.keys.map do |klass|
41
- [klass.name, base_query(klass).pluck(klass.primary_key).to_a]
42
+ [klass.name, values_query(klass)]
42
43
  end.to_h
43
44
 
44
45
  @settings.weighted_models.map { |klass| [klass.name, values[klass.name]] }
45
46
  end
46
47
  end
47
48
 
49
+ def values_query(klass)
50
+ List.for(base_query(klass).pluck(klass.primary_key), strategy: @strategy)
51
+ end
52
+
48
53
  def base_query(klass)
49
54
  if @settings.queries[klass].respond_to?(:call)
50
55
  @settings.queries[klass].call.all
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordDataLoader
4
+ module ActiveRecord
5
+ class UniqueIndexTracker
6
+ Index = Struct.new(:name, :columns, :column_indexes, keyword_init: true)
7
+
8
+ def initialize(model:, connection_factory:)
9
+ @model = model
10
+ @table = model.table_name
11
+ @unique_indexes = []
12
+ @unique_values_used = {}
13
+ find_unique_indexes(connection_factory)
14
+ end
15
+
16
+ def map_indexed_columns(column_list)
17
+ @unique_indexes = @raw_unique_indexes.map do |index|
18
+ @unique_values_used[index.name] = Set.new
19
+ columns = index.columns.map(&:to_sym)
20
+ Index.new(
21
+ name: index.name,
22
+ columns: columns,
23
+ column_indexes: columns.map { |c| column_list.find_index(c) }
24
+ )
25
+ end
26
+ end
27
+
28
+ def repeating_unique_values?(row)
29
+ @unique_indexes.map do |index|
30
+ values = index.column_indexes.map { |i| row[i] }
31
+ @unique_values_used.fetch(index.name).include?(values)
32
+ end.any?
33
+ end
34
+
35
+ def capture_unique_values(row)
36
+ return unless row.present?
37
+
38
+ @unique_indexes.each do |index|
39
+ values = index.column_indexes.map { |i| row[i] }
40
+ @unique_values_used.fetch(index.name) << values
41
+ end
42
+ row
43
+ end
44
+
45
+ def contained_in_index?(ar_column)
46
+ target_column = if @model.reflect_on_association(ar_column.name)&.belongs_to?
47
+ ar_column.join_foreign_key.to_sym
48
+ else
49
+ ar_column.name.to_sym
50
+ end
51
+
52
+ @raw_unique_indexes.flat_map { |i| i.columns.map(&:to_sym) }.include?(target_column)
53
+ end
54
+
55
+ private
56
+
57
+ attr_reader :table
58
+
59
+ def find_unique_indexes(connection_factory)
60
+ connection = connection_factory.call
61
+ @raw_unique_indexes = connection.indexes(table).select(&:unique)
62
+ ensure
63
+ connection&.close
64
+ end
65
+ end
66
+ end
67
+ end
@@ -2,16 +2,18 @@
2
2
 
3
3
  module ActiveRecordDataLoader
4
4
  class BulkInsertStrategy
5
- def initialize(data_generator, output_adapter)
5
+ def initialize(data_generator, file_adapter)
6
6
  @data_generator = data_generator
7
- @output_adapter = output_adapter
7
+ @file_adapter = file_adapter
8
8
  end
9
9
 
10
10
  def load_batch(row_numbers, connection)
11
- output_adapter.insert(connection: connection, command: <<~SQL)
11
+ command = <<~SQL
12
12
  INSERT INTO #{quoted_table_name(connection)} (#{column_list(connection)})
13
13
  VALUES #{values(row_numbers, connection)}
14
14
  SQL
15
+ insert(connection: connection, command: command)
16
+ file_adapter.insert(command)
15
17
  end
16
18
 
17
19
  def table_name
@@ -24,7 +26,11 @@ module ActiveRecordDataLoader
24
26
 
25
27
  private
26
28
 
27
- attr_reader :data_generator, :output_adapter
29
+ attr_reader :data_generator, :file_adapter
30
+
31
+ def insert(connection:, command:)
32
+ connection.insert(command)
33
+ end
28
34
 
29
35
  def quoted_table_name(connection)
30
36
  @quoted_table_name ||= connection.quote_table_name(data_generator.table)
@@ -39,15 +45,16 @@ module ActiveRecordDataLoader
39
45
 
40
46
  def values(row_numbers, connection)
41
47
  row_numbers
42
- .map { |i| "(#{row_values(i, connection)})" }
48
+ .map { |i| row_values(i, connection) }
49
+ .compact
43
50
  .join(",")
44
51
  end
45
52
 
46
53
  def row_values(row_number, connection)
47
- data_generator
48
- .generate_row(row_number)
49
- .map { |v| connection.quote(v) }
50
- .join(",")
54
+ row = data_generator.generate_row(row_number)
55
+ return unless row.present?
56
+
57
+ "(#{row.map { |v| connection.quote(v) }.join(',')})"
51
58
  end
52
59
  end
53
60
  end
@@ -3,7 +3,7 @@
3
3
  module ActiveRecordDataLoader
4
4
  class Configuration
5
5
  attr_accessor :connection_factory, :default_batch_size, :default_row_count,
6
- :logger, :statement_timeout
6
+ :logger, :max_duplicate_retries, :raise_on_duplicates, :statement_timeout
7
7
  attr_reader :output
8
8
 
9
9
  def initialize(
@@ -12,51 +12,34 @@ module ActiveRecordDataLoader
12
12
  logger: nil,
13
13
  statement_timeout: "2min",
14
14
  connection_factory: -> { ::ActiveRecord::Base.connection },
15
- output: :connection
15
+ raise_on_duplicates: false,
16
+ max_duplicate_retries: 5,
17
+ output: nil
16
18
  )
17
19
  @default_batch_size = default_batch_size
18
20
  @default_row_count = default_row_count
19
21
  @logger = logger || default_logger
20
22
  @statement_timeout = statement_timeout
21
23
  @connection_factory = connection_factory
24
+ @raise_on_duplicates = raise_on_duplicates
25
+ @max_duplicate_retries = max_duplicate_retries
22
26
  self.output = output
23
27
  end
24
28
 
25
29
  def output=(output)
26
- @output = validate_output(output || { type: :connection })
27
- end
28
-
29
- def output_adapter
30
- if output.fetch(:type) == :file
31
- ActiveRecordDataLoader::FileOutputAdapter.new(output)
32
- else
33
- ActiveRecordDataLoader::ConnectionOutputAdapter.new
34
- end
35
- end
36
-
37
- def connection_handler
38
- ActiveRecordDataLoader::ConnectionHandler.new(
39
- connection_factory: connection_factory,
40
- statement_timeout: statement_timeout,
41
- output_adapter: output_adapter
42
- )
30
+ @output = validate_output(output)
43
31
  end
44
32
 
45
33
  private
46
34
 
47
- OUTPUT_OPTIONS_BY_TYPE = { connection: %i[type], file: %i[type filename] }.freeze
48
-
49
35
  def validate_output(output)
50
- if %i[file connection].include?(output)
51
- { type: output }
52
- elsif output.is_a?(Hash)
53
- raise "The output hash must contain a :type key with either :connection or :file" \
54
- unless %i[file connection].include?(output[:type])
55
-
56
- output.slice(*OUTPUT_OPTIONS_BY_TYPE[output[:type]])
36
+ if output.to_s.blank?
37
+ nil
38
+ elsif output.is_a?(String)
39
+ output
57
40
  else
58
- raise "The output configuration parameter must be either a symbol for :connection or :file, "\
59
- "or a hash with more detailed output options."
41
+ raise "The output configuration parameter must be a filename meant to be the "\
42
+ "target for the SQL script"
60
43
  end
61
44
  end
62
45
 
@@ -2,19 +2,18 @@
2
2
 
3
3
  module ActiveRecordDataLoader
4
4
  class ConnectionHandler
5
- def initialize(connection_factory:, statement_timeout:, output_adapter:)
5
+ def initialize(connection_factory:, statement_timeout:)
6
6
  @connection_factory = connection_factory
7
7
  @statement_timeout = statement_timeout
8
- @output_adapter = output_adapter
8
+ cache_facts
9
9
  end
10
10
 
11
11
  def with_connection
12
- connection = open_connection
13
- if postgres?(connection)
14
- original_timeout = retrieve_statement_timeout(connection)
15
- update_statement_timeout(connection, statement_timeout)
12
+ connection = connection_factory.call
13
+ if supports_timeout?
14
+ connection.execute(timeout_set_command)
16
15
  yield connection
17
- update_statement_timeout(connection, original_timeout)
16
+ connection.execute(reset_timeout_command)
18
17
  else
19
18
  yield connection
20
19
  end
@@ -22,53 +21,32 @@ module ActiveRecordDataLoader
22
21
  connection&.close
23
22
  end
24
23
 
25
- # When the output is going to a script file, there are two places to update the
26
- # statement_timeout. The connection itself needs to have the timeout updated
27
- # because we are reading data from the connection to come up with related data
28
- # while generating the data. Also, the final SQL script file needs the timeout
29
- # updated so that when those \COPY commands are executed they have the higher
30
- # timeout as well.
31
- def with_statement_timeout_for_output
32
- return yield unless output_adapter.needs_timeout_output?
33
-
34
- original_timeout = begin
35
- connection = open_connection
36
- retrieve_statement_timeout(connection) if postgres?(connection)
37
- ensure
38
- connection&.close
39
- end
40
-
41
- if original_timeout
42
- output_adapter.execute(statement_timeout_set_command(statement_timeout))
43
- yield
44
- output_adapter.execute(statement_timeout_set_command(original_timeout))
45
- else
46
- yield
47
- end
24
+ def supports_timeout?
25
+ @supports_timeout
48
26
  end
49
27
 
50
- private
51
-
52
- attr_reader :connection_factory, :statement_timeout, :output_adapter
53
-
54
- def retrieve_statement_timeout(connection)
55
- connection.execute("SHOW statement_timeout").first["statement_timeout"]
28
+ def supports_copy?
29
+ @supports_copy
56
30
  end
57
31
 
58
- def update_statement_timeout(connection, timeout)
59
- connection.execute(statement_timeout_set_command(timeout))
32
+ def timeout_set_command
33
+ "SET statement_timeout = \"#{statement_timeout}\""
60
34
  end
61
35
 
62
- def statement_timeout_set_command(timeout)
63
- "SET statement_timeout = \"#{timeout}\""
36
+ def reset_timeout_command
37
+ "RESET statement_timeout"
64
38
  end
65
39
 
66
- def open_connection
67
- connection_factory.call
68
- end
40
+ private
69
41
 
70
- def postgres?(connection)
71
- connection.adapter_name.downcase.to_sym == :postgresql
42
+ attr_reader :connection_factory, :statement_timeout
43
+
44
+ def cache_facts
45
+ connection = connection_factory.call
46
+ @supports_timeout = connection.adapter_name.downcase.to_sym == :postgresql
47
+ @supports_copy = connection.raw_connection.respond_to?(:copy_data)
48
+ ensure
49
+ connection&.close
72
50
  end
73
51
  end
74
52
  end
@@ -2,17 +2,24 @@
2
2
 
3
3
  module ActiveRecordDataLoader
4
4
  class CopyStrategy
5
- def initialize(data_generator, output_adapter)
5
+ def initialize(data_generator, file_adapter)
6
6
  @data_generator = data_generator
7
- @output_adapter = output_adapter
7
+ @file_adapter = file_adapter
8
8
  end
9
9
 
10
10
  def load_batch(row_numbers, connection)
11
- output_adapter.copy(
11
+ data = csv_rows(row_numbers, connection)
12
+ copy(
12
13
  connection: connection,
13
14
  table: table_name_for_copy(connection),
14
15
  columns: columns_for_copy(connection),
15
- data: csv_rows(row_numbers, connection),
16
+ data: data,
17
+ row_numbers: row_numbers
18
+ )
19
+ file_adapter.copy(
20
+ table: table_name_for_copy(connection),
21
+ columns: columns_for_copy(connection),
22
+ data: data,
16
23
  row_numbers: row_numbers
17
24
  )
18
25
  end
@@ -27,12 +34,19 @@ module ActiveRecordDataLoader
27
34
 
28
35
  private
29
36
 
30
- attr_reader :data_generator, :output_adapter
37
+ attr_reader :data_generator, :file_adapter
38
+
39
+ def copy(connection:, table:, columns:, data:, row_numbers:)
40
+ raw_connection = connection.raw_connection
41
+ raw_connection.copy_data("COPY #{table} (#{columns}) FROM STDIN WITH (FORMAT CSV)") do
42
+ raw_connection.put_copy_data(data.join("\n"))
43
+ end
44
+ end
31
45
 
32
46
  def csv_rows(row_numbers, connection)
33
47
  row_numbers.map do |i|
34
- data_generator.generate_row(i).map { |d| quote_data(d, connection) }.join(",")
35
- end
48
+ data_generator.generate_row(i)&.map { |d| quote_data(d, connection) }&.join(",")
49
+ end.compact
36
50
  end
37
51
 
38
52
  def table_name_for_copy(connection)
@@ -13,16 +13,24 @@ module ActiveRecordDataLoader
13
13
 
14
14
  def adapter
15
15
  @adapter ||=
16
- if Gem.loaded_specs.key?("ffaker")
17
- require "ffaker"
16
+ if can_use?("ffaker", "2.1.0")
18
17
  FFakerGemAdapter.new
19
- elsif Gem.loaded_specs.key?("faker")
20
- require "faker"
18
+ elsif can_use?("faker", "1.9.3")
21
19
  FakerGemAdapter.new
22
20
  else
23
21
  NoGemAdapter.new
24
22
  end
25
23
  end
24
+
25
+ def can_use?(gem, min_version)
26
+ gemspec = Gem.loaded_specs[gem]
27
+ return false unless gemspec.present? && gemspec.version >= Gem::Version.new(min_version)
28
+
29
+ require gem
30
+ true
31
+ rescue LoadError
32
+ false
33
+ end
26
34
  end
27
35
 
28
36
  class FFakerGemAdapter
@@ -3,13 +3,16 @@
3
3
  module ActiveRecordDataLoader
4
4
  module Dsl
5
5
  class Model
6
- attr_reader :klass, :columns, :row_count, :polymorphic_associations, :belongs_to_associations
6
+ attr_reader :klass, :columns, :row_count, :polymorphic_associations, :belongs_to_associations,
7
+ :raise_on_duplicates_flag
7
8
 
8
9
  def initialize(klass:, configuration:)
9
10
  @klass = klass
10
11
  @columns = {}
11
12
  @row_count = configuration.default_row_count
12
13
  @batch_size = configuration.default_batch_size
14
+ @raise_on_duplicates_flag = configuration.raise_on_duplicates
15
+ @max_duplicate_retries = configuration.max_duplicate_retries
13
16
  @polymorphic_associations = []
14
17
  @belongs_to_associations = []
15
18
  end
@@ -22,6 +25,20 @@ module ActiveRecordDataLoader
22
25
  @batch_size = (size || @batch_size)
23
26
  end
24
27
 
28
+ def raise_on_duplicates
29
+ @raise_on_duplicates_flag = true
30
+ end
31
+
32
+ def do_not_raise_on_duplicates
33
+ @raise_on_duplicates_flag = false
34
+ end
35
+
36
+ def max_duplicate_retries(retries = nil)
37
+ return @max_duplicate_retries if retries.nil?
38
+
39
+ @max_duplicate_retries = retries
40
+ end
41
+
25
42
  def column(name, func)
26
43
  @columns[name.to_sym] = func
27
44
  end
@@ -32,7 +49,7 @@ module ActiveRecordDataLoader
32
49
  ).tap { |a| block.call(a) }
33
50
  end
34
51
 
35
- def belongs_to(assoc_name, eligible_set:)
52
+ def belongs_to(assoc_name, eligible_set: nil)
36
53
  @belongs_to_associations << BelongsToAssociation.new(@klass, assoc_name, eligible_set)
37
54
  end
38
55
  end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordDataLoader
4
+ class DuplicateKeyError < StandardError; end
5
+ end
@@ -2,38 +2,46 @@
2
2
 
3
3
  module ActiveRecordDataLoader
4
4
  class FileOutputAdapter
5
+ def self.with_output_options(options)
6
+ adapter = new(options)
7
+ pre_command = options[:pre_command]
8
+ adapter.write_command(pre_command) if pre_command
9
+ yield adapter
10
+ post_command = options[:post_command]
11
+ adapter.write_command(post_command) if post_command
12
+ end
13
+
5
14
  def initialize(options)
6
15
  @filename = options.fetch(:filename, "active_record_data_loader_script.sql")
7
16
  @file_basename = File.basename(@filename, File.extname(@filename))
8
17
  @path = File.expand_path(File.dirname(@filename))
18
+ File.open(@filename, File::TRUNC) if File.exist?(@filename)
9
19
  end
10
20
 
11
- def needs_timeout_output?
12
- true
13
- end
14
-
15
- def copy(connection:, table:, columns:, data:, row_numbers:)
21
+ def copy(table:, columns:, data:, row_numbers:)
16
22
  data_filename = data_filename(table, row_numbers)
17
23
  File.open(data_filename, "w") { |f| f.puts(data) }
18
- File.open(@filename, "a") do |file|
24
+ File.open(filename, "a") do |file|
19
25
  file.puts("\\COPY #{table} (#{columns}) FROM '#{data_filename}' WITH (FORMAT CSV);")
20
26
  end
21
27
  end
22
28
 
23
- def insert(connection:, command:)
24
- execute(command)
29
+ def insert(command)
30
+ write_command(command)
25
31
  end
26
32
 
27
- def execute(command)
28
- File.open(@filename, "a") { |f| f.puts("#{command.gsub("\n", ' ')};") }
33
+ def write_command(command)
34
+ File.open(filename, "a") { |f| f.puts("#{command.gsub("\n", ' ')};") }
29
35
  end
30
36
 
31
37
  private
32
38
 
39
+ attr_reader :filename, :path, :file_basename
40
+
33
41
  def data_filename(table, row_numbers)
34
42
  File.join(
35
- @path,
36
- "#{@file_basename}_#{table.gsub(/"/, '')}_rows_#{row_numbers[0]}_to_#{row_numbers[-1]}.csv"
43
+ path,
44
+ "#{file_basename}_#{table.gsub(/"/, '')}_rows_#{row_numbers[0]}_to_#{row_numbers[-1]}.csv"
37
45
  )
38
46
  end
39
47
  end
@@ -1,76 +1,82 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "benchmark"
4
-
5
3
  module ActiveRecordDataLoader
6
4
  class Loader
7
- class << self
8
- def load_data(
9
- data_generator:,
10
- total_rows:,
11
- batch_size:,
12
- configuration:
13
- )
14
- new(
15
- logger: configuration.logger,
16
- connection_handler: configuration.connection_handler,
17
- strategy: strategy_class(configuration.connection_factory).new(
18
- data_generator,
19
- configuration.output_adapter
20
- )
21
- ).load_data(batch_size, total_rows)
22
- end
5
+ def initialize(configuration, definition)
6
+ @configuration = configuration
7
+ @definition = definition
8
+ end
23
9
 
24
- private
10
+ def load_data
11
+ ActiveRecordDataLoader::ActiveRecord::PerRowValueCache.clear
25
12
 
26
- def strategy_class(connection_factory)
27
- if connection_factory.call.raw_connection.respond_to?(:copy_data)
28
- ActiveRecordDataLoader::CopyStrategy
29
- else
30
- ActiveRecordDataLoader::BulkInsertStrategy
31
- end
13
+ file_adapter_class.with_output_options(file_adapter_options) do |file_adapter|
14
+ definition.models.map { |m| load_model(m, file_adapter) }
32
15
  end
33
16
  end
34
17
 
35
- def initialize(logger:, connection_handler:, strategy:)
36
- @logger = logger
37
- @connection_handler = connection_handler
38
- @strategy = strategy
39
- end
18
+ private
40
19
 
41
- def load_data(batch_size, total_rows)
42
- batch_count = (total_rows / batch_size.to_f).ceil
20
+ attr_reader :definition, :configuration
43
21
 
44
- logger.info(
45
- "[ActiveRecordDataLoader] "\
46
- "Loading #{total_rows} row(s) into '#{strategy.table_name}' via #{strategy.name}. "\
47
- "#{batch_size} row(s) per batch, #{batch_count} batch(es)."
22
+ def load_model(model, file_adapter)
23
+ ActiveRecordDataLoader::TableLoader.load_data(
24
+ batch_size: model.batch_size,
25
+ total_rows: model.row_count,
26
+ connection_handler: connection_handler,
27
+ strategy: strategy_class.new(generator(model), file_adapter),
28
+ logger: configuration.logger
48
29
  )
49
- total_time = Benchmark.realtime do
50
- load_in_batches(batch_size, total_rows, batch_count)
51
- end
52
- logger.info(
53
- "[ActiveRecordDataLoader] "\
54
- "Completed loading #{total_rows} row(s) into '#{strategy.table_name}' "\
55
- "in #{total_time} seconds."
30
+ end
31
+
32
+ def generator(model)
33
+ ActiveRecordDataLoader::ActiveRecord::ModelDataGenerator.new(
34
+ model: model.klass,
35
+ column_settings: model.columns,
36
+ polymorphic_settings: model.polymorphic_associations,
37
+ belongs_to_settings: model.belongs_to_associations,
38
+ connection_factory: configuration.connection_factory,
39
+ raise_on_duplicates: model.raise_on_duplicates_flag,
40
+ max_duplicate_retries: model.max_duplicate_retries,
41
+ logger: configuration.logger
56
42
  )
57
43
  end
58
44
 
59
- private
45
+ def file_adapter_class
46
+ if configuration.output.present?
47
+ ActiveRecordDataLoader::FileOutputAdapter
48
+ else
49
+ ActiveRecordDataLoader::NullOutputAdapter
50
+ end
51
+ end
60
52
 
61
- attr_reader :strategy, :connection_handler, :logger
53
+ def file_adapter_options
54
+ timeout_commands =
55
+ if connection_handler.supports_timeout?
56
+ {
57
+ pre_command: connection_handler.timeout_set_command,
58
+ post_command: connection_handler.reset_timeout_command,
59
+ }
60
+ else
61
+ {}
62
+ end
62
63
 
63
- def load_in_batches(batch_size, total_rows, batch_count)
64
- connection_handler.with_connection do |connection|
65
- total_rows.times.each_slice(batch_size).with_index do |row_numbers, i|
66
- time = Benchmark.realtime { strategy.load_batch(row_numbers, connection) }
64
+ timeout_commands.merge(filename: configuration.output)
65
+ end
67
66
 
68
- logger.debug(
69
- "[ActiveRecordDataLoader] "\
70
- "Completed batch #{i + 1}/#{batch_count}, #{row_numbers.count} row(s) in #{time} seconds"
71
- )
72
- end
73
- end
67
+ def strategy_class
68
+ @strategy_class ||= if connection_handler.supports_copy?
69
+ ActiveRecordDataLoader::CopyStrategy
70
+ else
71
+ ActiveRecordDataLoader::BulkInsertStrategy
72
+ end
73
+ end
74
+
75
+ def connection_handler
76
+ @connection_handler ||= ActiveRecordDataLoader::ConnectionHandler.new(
77
+ connection_factory: configuration.connection_factory,
78
+ statement_timeout: configuration.statement_timeout
79
+ )
74
80
  end
75
81
  end
76
82
  end