active_record_data_loader 1.0.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/build.yml +51 -0
- data/.github/workflows/codeql-analysis.yml +70 -0
- data/.github/workflows/gem-push.yml +29 -0
- data/.rubocop.yml +46 -7
- data/CHANGELOG.md +37 -1
- data/CODE_OF_CONDUCT.md +2 -2
- data/Gemfile.lock +72 -72
- data/README.md +162 -9
- data/Rakefile +8 -2
- data/active_record_data_loader.gemspec +8 -6
- data/config/database.yml +9 -0
- data/docker-compose.yml +18 -0
- data/gemfiles/activerecord_6.gemfile +1 -1
- data/lib/active_record_data_loader/active_record/{belongs_to_configuration.rb → belongs_to_data_provider.rb} +8 -7
- data/lib/active_record_data_loader/active_record/{column_configuration.rb → column_data_provider.rb} +14 -5
- data/lib/active_record_data_loader/active_record/datetime_value_generator.rb +1 -1
- data/lib/active_record_data_loader/active_record/enum_value_generator.rb +28 -5
- data/lib/active_record_data_loader/active_record/integer_value_generator.rb +2 -2
- data/lib/active_record_data_loader/active_record/list.rb +35 -0
- data/lib/active_record_data_loader/active_record/model_data_generator.rb +74 -6
- data/lib/active_record_data_loader/active_record/{polymorphic_belongs_to_configuration.rb → polymorphic_belongs_to_data_provider.rb} +12 -7
- data/lib/active_record_data_loader/active_record/text_value_generator.rb +1 -1
- data/lib/active_record_data_loader/active_record/unique_index_tracker.rb +67 -0
- data/lib/active_record_data_loader/bulk_insert_strategy.rb +16 -8
- data/lib/active_record_data_loader/configuration.rb +28 -3
- data/lib/active_record_data_loader/connection_handler.rb +52 -0
- data/lib/active_record_data_loader/copy_strategy.rb +38 -24
- data/lib/active_record_data_loader/data_faker.rb +12 -4
- data/lib/active_record_data_loader/dsl/model.rb +19 -2
- data/lib/active_record_data_loader/errors.rb +5 -0
- data/lib/active_record_data_loader/file_output_adapter.rb +48 -0
- data/lib/active_record_data_loader/loader.rb +57 -67
- data/lib/active_record_data_loader/null_output_adapter.rb +15 -0
- data/lib/active_record_data_loader/table_loader.rb +59 -0
- data/lib/active_record_data_loader/version.rb +1 -1
- data/lib/active_record_data_loader.rb +12 -36
- metadata +52 -15
- data/.travis.yml +0 -23
- data/config/database.yml.travis +0 -7
@@ -5,12 +5,27 @@ module ActiveRecordDataLoader
|
|
5
5
|
class ModelDataGenerator
|
6
6
|
attr_reader :table
|
7
7
|
|
8
|
-
def initialize(
|
8
|
+
def initialize(
|
9
|
+
model:,
|
10
|
+
column_settings:,
|
11
|
+
connection_factory:,
|
12
|
+
logger:,
|
13
|
+
raise_on_duplicates:,
|
14
|
+
max_duplicate_retries:,
|
15
|
+
polymorphic_settings: [],
|
16
|
+
belongs_to_settings: []
|
17
|
+
)
|
9
18
|
@model_class = model
|
10
19
|
@table = model.table_name
|
11
20
|
@column_settings = column_settings
|
12
21
|
@polymorphic_settings = polymorphic_settings
|
13
22
|
@belongs_to_settings = belongs_to_settings.map { |s| [s.name, s.query] }.to_h
|
23
|
+
@connection_factory = connection_factory
|
24
|
+
@raise_on_duplicates = raise_on_duplicates
|
25
|
+
@max_duplicate_retries = max_duplicate_retries
|
26
|
+
@logger = logger
|
27
|
+
@index_tracker = UniqueIndexTracker.new(model: model, connection_factory: connection_factory)
|
28
|
+
@index_tracker.map_indexed_columns(column_list)
|
14
29
|
end
|
15
30
|
|
16
31
|
def column_list
|
@@ -18,11 +33,41 @@ module ActiveRecordDataLoader
|
|
18
33
|
end
|
19
34
|
|
20
35
|
def generate_row(row_number)
|
21
|
-
|
36
|
+
@index_tracker.capture_unique_values(generate_row_with_retries(row_number))
|
22
37
|
end
|
23
38
|
|
24
39
|
private
|
25
40
|
|
41
|
+
def generate_row_with_retries(row_number)
|
42
|
+
retries = 0
|
43
|
+
while @index_tracker.repeating_unique_values?(row = generate_candidate_row(row_number))
|
44
|
+
if (retries += 1) > @max_duplicate_retries
|
45
|
+
raise DuplicateKeyError, <<~MSG if @raise_on_duplicates
|
46
|
+
Exhausted retries looking for unique values for row #{row_number} for '#{table}'.
|
47
|
+
Table '#{table}' has unique indexes that would have prevented inserting this row. If you would
|
48
|
+
like to skip non-unique rows instead of raising, configure `raise_on_duplicates` to be `false`.
|
49
|
+
MSG
|
50
|
+
|
51
|
+
@logger.warn(
|
52
|
+
"[ActiveRecordDataLoader] "\
|
53
|
+
"Exhausted retries looking for unique values. Skipping row #{row_number} for '#{table}'."
|
54
|
+
)
|
55
|
+
return nil
|
56
|
+
else
|
57
|
+
@logger.info(
|
58
|
+
"[ActiveRecordDataLoader] "\
|
59
|
+
"Retrying row #{row_number} for '#{table}' looking for unique values compliant with indexes. "\
|
60
|
+
"Retry number #{retries}."
|
61
|
+
)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
row
|
65
|
+
end
|
66
|
+
|
67
|
+
def generate_candidate_row(row_number)
|
68
|
+
column_list.map { |c| column_data(row_number, c) }
|
69
|
+
end
|
70
|
+
|
26
71
|
def column_data(row_number, column)
|
27
72
|
column_value = columns[column]
|
28
73
|
return column_value unless column_value.respond_to?(:call)
|
@@ -49,8 +94,14 @@ module ActiveRecordDataLoader
|
|
49
94
|
@model_class
|
50
95
|
.columns_hash
|
51
96
|
.reject { |name| name == @model_class.primary_key }
|
52
|
-
.select { |_, c|
|
53
|
-
.map
|
97
|
+
.select { |_, c| ColumnDataProvider.supported?(model_class: @model_class, ar_column: c) }
|
98
|
+
.map do |_, c|
|
99
|
+
ColumnDataProvider.provider_for(
|
100
|
+
model_class: @model_class,
|
101
|
+
ar_column: c,
|
102
|
+
connection_factory: @connection_factory
|
103
|
+
)
|
104
|
+
end
|
54
105
|
.reduce({}, :merge)
|
55
106
|
end
|
56
107
|
|
@@ -60,16 +111,33 @@ module ActiveRecordDataLoader
|
|
60
111
|
.select(&:belongs_to?)
|
61
112
|
.reject(&:polymorphic?)
|
62
113
|
.map do |assoc|
|
63
|
-
|
114
|
+
BelongsToDataProvider.provider_for(
|
115
|
+
ar_association: assoc,
|
116
|
+
query: @belongs_to_settings[assoc.name],
|
117
|
+
strategy: column_config_strategy(assoc)
|
118
|
+
)
|
64
119
|
end
|
65
120
|
.reduce({}, :merge)
|
66
121
|
end
|
67
122
|
|
68
123
|
def polymorphic_config
|
69
124
|
@polymorphic_settings
|
70
|
-
.map
|
125
|
+
.map do |s|
|
126
|
+
PolymorphicBelongsToDataProvider.provider_for(
|
127
|
+
polymorphic_settings: s,
|
128
|
+
strategy: column_config_strategy(s.model_class.reflect_on_association(s.name))
|
129
|
+
)
|
130
|
+
end
|
71
131
|
.reduce({}, :merge)
|
72
132
|
end
|
133
|
+
|
134
|
+
def column_config_strategy(column)
|
135
|
+
if @index_tracker.contained_in_index?(column)
|
136
|
+
:cycle
|
137
|
+
else
|
138
|
+
:random
|
139
|
+
end
|
140
|
+
end
|
73
141
|
end
|
74
142
|
end
|
75
143
|
end
|
@@ -2,20 +2,21 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
module ActiveRecord
|
5
|
-
class
|
6
|
-
def self.
|
5
|
+
class PolymorphicBelongsToDataProvider
|
6
|
+
def self.provider_for(polymorphic_settings:, strategy: :random)
|
7
7
|
ar_association = polymorphic_settings.model_class.reflect_on_association(
|
8
8
|
polymorphic_settings.name
|
9
9
|
)
|
10
10
|
raise "#{name} only supports polymorphic associations" unless ar_association.polymorphic?
|
11
11
|
|
12
|
-
new(polymorphic_settings, ar_association).polymorphic_config
|
12
|
+
new(polymorphic_settings, ar_association, strategy).polymorphic_config
|
13
13
|
end
|
14
14
|
|
15
|
-
def initialize(settings, ar_association)
|
15
|
+
def initialize(settings, ar_association, strategy)
|
16
16
|
@settings = settings
|
17
17
|
@ar_association = ar_association
|
18
18
|
@model_count = settings.weighted_models.size
|
19
|
+
@strategy = strategy
|
19
20
|
end
|
20
21
|
|
21
22
|
def polymorphic_config
|
@@ -32,21 +33,25 @@ module ActiveRecordDataLoader
|
|
32
33
|
end
|
33
34
|
|
34
35
|
def foreign_key(row_number)
|
35
|
-
possible_values[row_number % @model_count][1].
|
36
|
+
possible_values[row_number % @model_count][1].next
|
36
37
|
end
|
37
38
|
|
38
39
|
def possible_values
|
39
40
|
@possible_values ||= begin
|
40
41
|
values = @settings.models.keys.map do |klass|
|
41
|
-
[klass.name,
|
42
|
+
[klass.name, values_query(klass)]
|
42
43
|
end.to_h
|
43
44
|
|
44
45
|
@settings.weighted_models.map { |klass| [klass.name, values[klass.name]] }
|
45
46
|
end
|
46
47
|
end
|
47
48
|
|
49
|
+
def values_query(klass)
|
50
|
+
List.for(base_query(klass).pluck(klass.primary_key), strategy: @strategy)
|
51
|
+
end
|
52
|
+
|
48
53
|
def base_query(klass)
|
49
|
-
if @settings.queries[klass]
|
54
|
+
if @settings.queries[klass].respond_to?(:call)
|
50
55
|
@settings.queries[klass].call.all
|
51
56
|
else
|
52
57
|
klass.all
|
@@ -12,7 +12,7 @@ module ActiveRecordDataLoader
|
|
12
12
|
}.freeze
|
13
13
|
|
14
14
|
class << self
|
15
|
-
def generator_for(model_class:, ar_column:)
|
15
|
+
def generator_for(model_class:, ar_column:, connection_factory: nil)
|
16
16
|
scenario = GENERATORS.keys.find { |m| send(m, model_class, ar_column) }
|
17
17
|
generator = GENERATORS.fetch(scenario, -> { SecureRandom.uuid })
|
18
18
|
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
module ActiveRecord
|
5
|
+
class UniqueIndexTracker
|
6
|
+
Index = Struct.new(:name, :columns, :column_indexes, keyword_init: true)
|
7
|
+
|
8
|
+
def initialize(model:, connection_factory:)
|
9
|
+
@model = model
|
10
|
+
@table = model.table_name
|
11
|
+
@unique_indexes = []
|
12
|
+
@unique_values_used = {}
|
13
|
+
find_unique_indexes(connection_factory)
|
14
|
+
end
|
15
|
+
|
16
|
+
def map_indexed_columns(column_list)
|
17
|
+
@unique_indexes = @raw_unique_indexes.map do |index|
|
18
|
+
@unique_values_used[index.name] = Set.new
|
19
|
+
columns = index.columns.map(&:to_sym)
|
20
|
+
Index.new(
|
21
|
+
name: index.name,
|
22
|
+
columns: columns,
|
23
|
+
column_indexes: columns.map { |c| column_list.find_index(c) }
|
24
|
+
)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def repeating_unique_values?(row)
|
29
|
+
@unique_indexes.map do |index|
|
30
|
+
values = index.column_indexes.map { |i| row[i] }
|
31
|
+
@unique_values_used.fetch(index.name).include?(values)
|
32
|
+
end.any?
|
33
|
+
end
|
34
|
+
|
35
|
+
def capture_unique_values(row)
|
36
|
+
return unless row.present?
|
37
|
+
|
38
|
+
@unique_indexes.each do |index|
|
39
|
+
values = index.column_indexes.map { |i| row[i] }
|
40
|
+
@unique_values_used.fetch(index.name) << values
|
41
|
+
end
|
42
|
+
row
|
43
|
+
end
|
44
|
+
|
45
|
+
def contained_in_index?(ar_column)
|
46
|
+
target_column = if @model.reflect_on_association(ar_column.name)&.belongs_to?
|
47
|
+
ar_column.join_foreign_key.to_sym
|
48
|
+
else
|
49
|
+
ar_column.name.to_sym
|
50
|
+
end
|
51
|
+
|
52
|
+
@raw_unique_indexes.flat_map { |i| i.columns.map(&:to_sym) }.include?(target_column)
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
attr_reader :table
|
58
|
+
|
59
|
+
def find_unique_indexes(connection_factory)
|
60
|
+
connection = connection_factory.call
|
61
|
+
@raw_unique_indexes = connection.indexes(table).select(&:unique)
|
62
|
+
ensure
|
63
|
+
connection&.close
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -2,15 +2,18 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class BulkInsertStrategy
|
5
|
-
def initialize(data_generator)
|
5
|
+
def initialize(data_generator, file_adapter)
|
6
6
|
@data_generator = data_generator
|
7
|
+
@file_adapter = file_adapter
|
7
8
|
end
|
8
9
|
|
9
10
|
def load_batch(row_numbers, connection)
|
10
|
-
|
11
|
+
command = <<~SQL
|
11
12
|
INSERT INTO #{quoted_table_name(connection)} (#{column_list(connection)})
|
12
13
|
VALUES #{values(row_numbers, connection)}
|
13
14
|
SQL
|
15
|
+
insert(connection: connection, command: command)
|
16
|
+
file_adapter.insert(command)
|
14
17
|
end
|
15
18
|
|
16
19
|
def table_name
|
@@ -23,7 +26,11 @@ module ActiveRecordDataLoader
|
|
23
26
|
|
24
27
|
private
|
25
28
|
|
26
|
-
attr_reader :data_generator
|
29
|
+
attr_reader :data_generator, :file_adapter
|
30
|
+
|
31
|
+
def insert(connection:, command:)
|
32
|
+
connection.insert(command)
|
33
|
+
end
|
27
34
|
|
28
35
|
def quoted_table_name(connection)
|
29
36
|
@quoted_table_name ||= connection.quote_table_name(data_generator.table)
|
@@ -38,15 +45,16 @@ module ActiveRecordDataLoader
|
|
38
45
|
|
39
46
|
def values(row_numbers, connection)
|
40
47
|
row_numbers
|
41
|
-
.map { |i|
|
48
|
+
.map { |i| row_values(i, connection) }
|
49
|
+
.compact
|
42
50
|
.join(",")
|
43
51
|
end
|
44
52
|
|
45
53
|
def row_values(row_number, connection)
|
46
|
-
data_generator
|
47
|
-
|
48
|
-
|
49
|
-
|
54
|
+
row = data_generator.generate_row(row_number)
|
55
|
+
return unless row.present?
|
56
|
+
|
57
|
+
"(#{row.map { |v| connection.quote(v) }.join(',')})"
|
50
58
|
end
|
51
59
|
end
|
52
60
|
end
|
@@ -2,27 +2,52 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class Configuration
|
5
|
-
attr_accessor :
|
5
|
+
attr_accessor :connection_factory, :default_batch_size, :default_row_count,
|
6
|
+
:logger, :max_duplicate_retries, :raise_on_duplicates, :statement_timeout
|
7
|
+
attr_reader :output
|
6
8
|
|
7
9
|
def initialize(
|
8
10
|
default_batch_size: 100_000,
|
9
11
|
default_row_count: 1,
|
10
12
|
logger: nil,
|
11
|
-
statement_timeout: "2min"
|
13
|
+
statement_timeout: "2min",
|
14
|
+
connection_factory: -> { ::ActiveRecord::Base.connection },
|
15
|
+
raise_on_duplicates: false,
|
16
|
+
max_duplicate_retries: 5,
|
17
|
+
output: nil
|
12
18
|
)
|
13
19
|
@default_batch_size = default_batch_size
|
14
20
|
@default_row_count = default_row_count
|
15
21
|
@logger = logger || default_logger
|
16
22
|
@statement_timeout = statement_timeout
|
23
|
+
@connection_factory = connection_factory
|
24
|
+
@raise_on_duplicates = raise_on_duplicates
|
25
|
+
@max_duplicate_retries = max_duplicate_retries
|
26
|
+
self.output = output
|
27
|
+
end
|
28
|
+
|
29
|
+
def output=(output)
|
30
|
+
@output = validate_output(output)
|
17
31
|
end
|
18
32
|
|
19
33
|
private
|
20
34
|
|
35
|
+
def validate_output(output)
|
36
|
+
if output.to_s.blank?
|
37
|
+
nil
|
38
|
+
elsif output.is_a?(String)
|
39
|
+
output
|
40
|
+
else
|
41
|
+
raise "The output configuration parameter must be a filename meant to be the "\
|
42
|
+
"target for the SQL script"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
21
46
|
def default_logger
|
22
47
|
if defined?(Rails) && Rails.respond_to?(:logger)
|
23
48
|
Rails.logger
|
24
49
|
else
|
25
|
-
Logger.new(
|
50
|
+
Logger.new($stdout, level: :info)
|
26
51
|
end
|
27
52
|
end
|
28
53
|
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
class ConnectionHandler
|
5
|
+
def initialize(connection_factory:, statement_timeout:)
|
6
|
+
@connection_factory = connection_factory
|
7
|
+
@statement_timeout = statement_timeout
|
8
|
+
cache_facts
|
9
|
+
end
|
10
|
+
|
11
|
+
def with_connection
|
12
|
+
connection = connection_factory.call
|
13
|
+
if supports_timeout?
|
14
|
+
connection.execute(timeout_set_command)
|
15
|
+
yield connection
|
16
|
+
connection.execute(reset_timeout_command)
|
17
|
+
else
|
18
|
+
yield connection
|
19
|
+
end
|
20
|
+
ensure
|
21
|
+
connection&.close
|
22
|
+
end
|
23
|
+
|
24
|
+
def supports_timeout?
|
25
|
+
@supports_timeout
|
26
|
+
end
|
27
|
+
|
28
|
+
def supports_copy?
|
29
|
+
@supports_copy
|
30
|
+
end
|
31
|
+
|
32
|
+
def timeout_set_command
|
33
|
+
"SET statement_timeout = \"#{statement_timeout}\""
|
34
|
+
end
|
35
|
+
|
36
|
+
def reset_timeout_command
|
37
|
+
"RESET statement_timeout"
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
attr_reader :connection_factory, :statement_timeout
|
43
|
+
|
44
|
+
def cache_facts
|
45
|
+
connection = connection_factory.call
|
46
|
+
@supports_timeout = connection.adapter_name.downcase.to_sym == :postgresql
|
47
|
+
@supports_copy = connection.raw_connection.respond_to?(:copy_data)
|
48
|
+
ensure
|
49
|
+
connection&.close
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -2,15 +2,26 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class CopyStrategy
|
5
|
-
def initialize(data_generator)
|
5
|
+
def initialize(data_generator, file_adapter)
|
6
6
|
@data_generator = data_generator
|
7
|
+
@file_adapter = file_adapter
|
7
8
|
end
|
8
9
|
|
9
10
|
def load_batch(row_numbers, connection)
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
11
|
+
data = csv_rows(row_numbers, connection)
|
12
|
+
copy(
|
13
|
+
connection: connection,
|
14
|
+
table: table_name_for_copy(connection),
|
15
|
+
columns: columns_for_copy(connection),
|
16
|
+
data: data,
|
17
|
+
row_numbers: row_numbers
|
18
|
+
)
|
19
|
+
file_adapter.copy(
|
20
|
+
table: table_name_for_copy(connection),
|
21
|
+
columns: columns_for_copy(connection),
|
22
|
+
data: data,
|
23
|
+
row_numbers: row_numbers
|
24
|
+
)
|
14
25
|
end
|
15
26
|
|
16
27
|
def table_name
|
@@ -23,29 +34,32 @@ module ActiveRecordDataLoader
|
|
23
34
|
|
24
35
|
private
|
25
36
|
|
26
|
-
attr_reader :data_generator
|
37
|
+
attr_reader :data_generator, :file_adapter
|
27
38
|
|
28
|
-
def
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
end
|
33
|
-
|
34
|
-
def copy_command(connection)
|
35
|
-
@copy_command ||= begin
|
36
|
-
quoted_table_name = connection.quote_table_name(data_generator.table)
|
37
|
-
columns = data_generator
|
38
|
-
.column_list
|
39
|
-
.map { |c| connection.quote_column_name(c) }
|
40
|
-
.join(", ")
|
41
|
-
|
42
|
-
<<~SQL
|
43
|
-
COPY #{quoted_table_name} (#{columns})
|
44
|
-
FROM STDIN WITH (FORMAT CSV)
|
45
|
-
SQL
|
39
|
+
def copy(connection:, table:, columns:, data:, row_numbers:)
|
40
|
+
raw_connection = connection.raw_connection
|
41
|
+
raw_connection.copy_data("COPY #{table} (#{columns}) FROM STDIN WITH (FORMAT CSV)") do
|
42
|
+
raw_connection.put_copy_data(data.join("\n"))
|
46
43
|
end
|
47
44
|
end
|
48
45
|
|
46
|
+
def csv_rows(row_numbers, connection)
|
47
|
+
row_numbers.map do |i|
|
48
|
+
data_generator.generate_row(i)&.map { |d| quote_data(d, connection) }&.join(",")
|
49
|
+
end.compact
|
50
|
+
end
|
51
|
+
|
52
|
+
def table_name_for_copy(connection)
|
53
|
+
@table_name_for_copy ||= connection.quote_table_name(data_generator.table)
|
54
|
+
end
|
55
|
+
|
56
|
+
def columns_for_copy(connection)
|
57
|
+
@columns_for_copy ||= data_generator
|
58
|
+
.column_list
|
59
|
+
.map { |c| connection.quote_column_name(c) }
|
60
|
+
.join(", ")
|
61
|
+
end
|
62
|
+
|
49
63
|
def quote_data(data, connection)
|
50
64
|
return if data.nil?
|
51
65
|
|
@@ -13,16 +13,24 @@ module ActiveRecordDataLoader
|
|
13
13
|
|
14
14
|
def adapter
|
15
15
|
@adapter ||=
|
16
|
-
if
|
17
|
-
require "ffaker"
|
16
|
+
if can_use?("ffaker", "2.1.0")
|
18
17
|
FFakerGemAdapter.new
|
19
|
-
elsif
|
20
|
-
require "faker"
|
18
|
+
elsif can_use?("faker", "1.9.3")
|
21
19
|
FakerGemAdapter.new
|
22
20
|
else
|
23
21
|
NoGemAdapter.new
|
24
22
|
end
|
25
23
|
end
|
24
|
+
|
25
|
+
def can_use?(gem, min_version)
|
26
|
+
gemspec = Gem.loaded_specs[gem]
|
27
|
+
return false unless gemspec.present? && gemspec.version >= Gem::Version.new(min_version)
|
28
|
+
|
29
|
+
require gem
|
30
|
+
true
|
31
|
+
rescue LoadError
|
32
|
+
false
|
33
|
+
end
|
26
34
|
end
|
27
35
|
|
28
36
|
class FFakerGemAdapter
|
@@ -3,13 +3,16 @@
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
module Dsl
|
5
5
|
class Model
|
6
|
-
attr_reader :klass, :columns, :row_count, :polymorphic_associations, :belongs_to_associations
|
6
|
+
attr_reader :klass, :columns, :row_count, :polymorphic_associations, :belongs_to_associations,
|
7
|
+
:raise_on_duplicates_flag
|
7
8
|
|
8
9
|
def initialize(klass:, configuration:)
|
9
10
|
@klass = klass
|
10
11
|
@columns = {}
|
11
12
|
@row_count = configuration.default_row_count
|
12
13
|
@batch_size = configuration.default_batch_size
|
14
|
+
@raise_on_duplicates_flag = configuration.raise_on_duplicates
|
15
|
+
@max_duplicate_retries = configuration.max_duplicate_retries
|
13
16
|
@polymorphic_associations = []
|
14
17
|
@belongs_to_associations = []
|
15
18
|
end
|
@@ -22,6 +25,20 @@ module ActiveRecordDataLoader
|
|
22
25
|
@batch_size = (size || @batch_size)
|
23
26
|
end
|
24
27
|
|
28
|
+
def raise_on_duplicates
|
29
|
+
@raise_on_duplicates_flag = true
|
30
|
+
end
|
31
|
+
|
32
|
+
def do_not_raise_on_duplicates
|
33
|
+
@raise_on_duplicates_flag = false
|
34
|
+
end
|
35
|
+
|
36
|
+
def max_duplicate_retries(retries = nil)
|
37
|
+
return @max_duplicate_retries if retries.nil?
|
38
|
+
|
39
|
+
@max_duplicate_retries = retries
|
40
|
+
end
|
41
|
+
|
25
42
|
def column(name, func)
|
26
43
|
@columns[name.to_sym] = func
|
27
44
|
end
|
@@ -32,7 +49,7 @@ module ActiveRecordDataLoader
|
|
32
49
|
).tap { |a| block.call(a) }
|
33
50
|
end
|
34
51
|
|
35
|
-
def belongs_to(assoc_name, eligible_set:)
|
52
|
+
def belongs_to(assoc_name, eligible_set: nil)
|
36
53
|
@belongs_to_associations << BelongsToAssociation.new(@klass, assoc_name, eligible_set)
|
37
54
|
end
|
38
55
|
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
class FileOutputAdapter
|
5
|
+
def self.with_output_options(options)
|
6
|
+
adapter = new(options)
|
7
|
+
pre_command = options[:pre_command]
|
8
|
+
adapter.write_command(pre_command) if pre_command
|
9
|
+
yield adapter
|
10
|
+
post_command = options[:post_command]
|
11
|
+
adapter.write_command(post_command) if post_command
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(options)
|
15
|
+
@filename = options.fetch(:filename, "active_record_data_loader_script.sql")
|
16
|
+
@file_basename = File.basename(@filename, File.extname(@filename))
|
17
|
+
@path = File.expand_path(File.dirname(@filename))
|
18
|
+
File.open(@filename, File::TRUNC) if File.exist?(@filename)
|
19
|
+
end
|
20
|
+
|
21
|
+
def copy(table:, columns:, data:, row_numbers:)
|
22
|
+
data_filename = data_filename(table, row_numbers)
|
23
|
+
File.open(data_filename, "w") { |f| f.puts(data) }
|
24
|
+
File.open(filename, "a") do |file|
|
25
|
+
file.puts("\\COPY #{table} (#{columns}) FROM '#{data_filename}' WITH (FORMAT CSV);")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def insert(command)
|
30
|
+
write_command(command)
|
31
|
+
end
|
32
|
+
|
33
|
+
def write_command(command)
|
34
|
+
File.open(filename, "a") { |f| f.puts("#{command.gsub("\n", ' ')};") }
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
attr_reader :filename, :path, :file_basename
|
40
|
+
|
41
|
+
def data_filename(table, row_numbers)
|
42
|
+
File.join(
|
43
|
+
path,
|
44
|
+
"#{file_basename}_#{table.gsub(/"/, '')}_rows_#{row_numbers[0]}_to_#{row_numbers[-1]}.csv"
|
45
|
+
)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|