active_record_data_loader 1.0.2 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/build.yml +51 -0
  3. data/.github/workflows/codeql-analysis.yml +70 -0
  4. data/.github/workflows/gem-push.yml +29 -0
  5. data/.rubocop.yml +46 -7
  6. data/CHANGELOG.md +38 -2
  7. data/CODE_OF_CONDUCT.md +2 -2
  8. data/Gemfile.lock +71 -73
  9. data/README.md +162 -9
  10. data/Rakefile +8 -2
  11. data/active_record_data_loader.gemspec +7 -6
  12. data/config/database.yml +2 -0
  13. data/docker-compose.yml +18 -0
  14. data/gemfiles/activerecord_6.gemfile +1 -1
  15. data/lib/active_record_data_loader/active_record/{belongs_to_configuration.rb → belongs_to_data_provider.rb} +8 -7
  16. data/lib/active_record_data_loader/active_record/{column_configuration.rb → column_data_provider.rb} +2 -2
  17. data/lib/active_record_data_loader/active_record/enum_value_generator.rb +9 -8
  18. data/lib/active_record_data_loader/active_record/integer_value_generator.rb +1 -1
  19. data/lib/active_record_data_loader/active_record/list.rb +47 -0
  20. data/lib/active_record_data_loader/active_record/model_data_generator.rb +62 -7
  21. data/lib/active_record_data_loader/active_record/{polymorphic_belongs_to_configuration.rb → polymorphic_belongs_to_data_provider.rb} +12 -7
  22. data/lib/active_record_data_loader/active_record/unique_index_tracker.rb +67 -0
  23. data/lib/active_record_data_loader/bulk_insert_strategy.rb +16 -8
  24. data/lib/active_record_data_loader/configuration.rb +26 -3
  25. data/lib/active_record_data_loader/connection_handler.rb +52 -0
  26. data/lib/active_record_data_loader/copy_strategy.rb +38 -24
  27. data/lib/active_record_data_loader/data_faker.rb +12 -4
  28. data/lib/active_record_data_loader/dsl/model.rb +19 -2
  29. data/lib/active_record_data_loader/errors.rb +5 -0
  30. data/lib/active_record_data_loader/file_output_adapter.rb +48 -0
  31. data/lib/active_record_data_loader/loader.rb +55 -71
  32. data/lib/active_record_data_loader/null_output_adapter.rb +15 -0
  33. data/lib/active_record_data_loader/table_loader.rb +59 -0
  34. data/lib/active_record_data_loader/version.rb +1 -1
  35. data/lib/active_record_data_loader.rb +11 -38
  36. metadata +51 -29
  37. data/.travis.yml +0 -24
  38. data/config/database.yml.travis +0 -12
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
- # ActiveRecord Data Loader
1
+ # active_record_data_loader
2
2
 
3
- [![Build Status](https://travis-ci.org/abeiderman/active_record_data_loader.svg?branch=master)](https://travis-ci.org/abeiderman/active_record_data_loader)
3
+ [![Build Status](https://github.com/abeiderman/active_record_data_loader/actions/workflows/build.yml/badge.svg)](https://github.com/abeiderman/active_record_data_loader/actions/workflows/build.yml)
4
4
  [![Coverage Status](https://coveralls.io/repos/github/abeiderman/active_record_data_loader/badge.svg?branch=master&service=github)](https://coveralls.io/github/abeiderman/active_record_data_loader?branch=master)
5
5
  [![Maintainability](https://api.codeclimate.com/v1/badges/338904b3f7e8d19a3cb1/maintainability)](https://codeclimate.com/github/abeiderman/active_record_data_loader/maintainability)
6
6
 
@@ -10,6 +10,10 @@ Efficiently bulk load data for your ActiveRecord models with a simple DSL.
10
10
 
11
11
  Load, performance, and stress tests often require setting up a realistic amount of data in your database. This gem is intended to help organize that data load and make it more maintainable than having a collection of SQL scripts.
12
12
 
13
+ #### How is this different from using _factory_bot_?
14
+
15
+ This gem is not a replacement for [factory_bot](https://github.com/thoughtbot/factory_bot). It solves a different use case. While _factory_bot_ is great for organizing test data and reducing duplication in your functional tests, _active_record_data_loader_ is focused around bulk loading data for performance tests. The purpose of _active_record_data_loader_ is loading large amounts of data as efficiently as possible while providing a DSL that helps with maintainability.
16
+
13
17
  ## Installation
14
18
 
15
19
  Add this line to your application's Gemfile:
@@ -37,6 +41,7 @@ Polymorphic associations need to be defined explicitly as shown in [Polymorphic
37
41
  ### Basic usage
38
42
 
39
43
  Let's say you have the following models:
44
+
40
45
  ```ruby
41
46
  class Customer < ApplicationRecord
42
47
  end
@@ -47,6 +52,7 @@ end
47
52
  ```
48
53
 
49
54
  The following code will create 10,000 customers and 100,000 orders, and will associate the orders to those customers evenly:
55
+
50
56
  ```ruby
51
57
  data_loader = ActiveRecordDataLoader.define do
52
58
  model Customer do |m|
@@ -63,6 +69,7 @@ data_loader.load_data
63
69
 
64
70
  #### Overriding column values
65
71
  To provide your own values for columns your can provide a lambda or a constant value:
72
+
66
73
  ```ruby
67
74
  data_loader = ActiveRecordDataLoader.define do
68
75
  model Customer do |m|
@@ -87,7 +94,7 @@ In this example, we are creating 25K orders for customers in CAN with a CAD curr
87
94
  data_loader = ActiveRecordDataLoader.define do
88
95
  model Customer do |m|
89
96
  m.count 10_000
90
- m.column :country, -> { %w[CAN MXN USA].sample }
97
+ m.column :country, -> { %w[CAN MEX USA].sample }
91
98
  end
92
99
 
93
100
  model Order do |m|
@@ -95,13 +102,13 @@ data_loader = ActiveRecordDataLoader.define do
95
102
  m.column :currency, "CAD"
96
103
  m.belongs_to :customer, eligible_set: -> { Customer.where(country: "CAN") }
97
104
  end
98
-
105
+
99
106
  model Order do |m|
100
107
  m.count 25_000
101
108
  m.column :currency, "MXN"
102
109
  m.belongs_to :customer, eligible_set: -> { Customer.where(country: "MEX") }
103
110
  end
104
-
111
+
105
112
  model Order do |m|
106
113
  m.count 50_000
107
114
  m.column :currency, "USD"
@@ -117,6 +124,7 @@ data_loader.load_data
117
124
  If you have a polymorphic `belongs_to` association, you will need to define that explicitly for it to be populated.
118
125
 
119
126
  Let's assume the following models where an order could belong to either a person or a business:
127
+
120
128
  ```ruby
121
129
  class Person < ApplicationRecord
122
130
  has_many :orders
@@ -132,6 +140,7 @@ end
132
140
  ```
133
141
 
134
142
  In order to populate the `customer` association in orders, you would specify them like this:
143
+
135
144
  ```ruby
136
145
  data_loader = ActiveRecordDataLoader.define do
137
146
  model Person do |m|
@@ -144,7 +153,7 @@ data_loader = ActiveRecordDataLoader.define do
144
153
 
145
154
  model Order do |m|
146
155
  m.count 100_000
147
-
156
+
148
157
  m.polymorphic :customer do |c|
149
158
  c.model Person
150
159
  c.model Business
@@ -156,6 +165,7 @@ data_loader.load_data
156
165
  ```
157
166
 
158
167
  You can also provide a `weight` to each of the target models if you want to control how they are distributed. If you wanted to have twice as many orders for `Person` than for `Business`, it would look like this:
168
+
159
169
  ```ruby
160
170
  data_loader = ActiveRecordDataLoader.define do
161
171
  model Person do |m|
@@ -168,7 +178,7 @@ data_loader = ActiveRecordDataLoader.define do
168
178
 
169
179
  model Order do |m|
170
180
  m.count 100_000
171
-
181
+
172
182
  m.polymorphic :customer do |c|
173
183
  c.model Person, weight: 2
174
184
  c.model Business, weight: 1
@@ -180,6 +190,7 @@ data_loader.load_data
180
190
  ```
181
191
 
182
192
  Additionaly, you can also provide an `eligible_set` to control which records to limit the association to:
193
+
183
194
  ```ruby
184
195
  data_loader = ActiveRecordDataLoader.define do
185
196
  model Person do |m|
@@ -193,7 +204,7 @@ data_loader = ActiveRecordDataLoader.define do
193
204
 
194
205
  model Order do |m|
195
206
  m.count 100_000
196
-
207
+
197
208
  m.polymorphic :customer do |c|
198
209
  c.model Person, weight: 2
199
210
  c.model Business, weight: 1, eligible_set: -> { Business.where(country: "USA") }
@@ -204,6 +215,148 @@ end
204
215
  data_loader.load_data
205
216
  ```
206
217
 
218
+ ### Unique indexes
219
+
220
+ Unique indexes will be detected automatically and the data generator will attempt to generate unique values for each row. The generator keeps track of unique values previously generated and retries rows with repeating values. Because some columns could be generating random values, retrying can eventually be successful.
221
+
222
+ There are a couple of behaviors you can control regarding preventing duplicates. The first is the number of times to retry a given row with duplicate values (that would fail the unique index/constraint). The second is what to do if a unique value cannot be generated after the retries are exhausted.
223
+
224
+ By default, there will be 5 retries per row and the row will be skipped after all retries are unsuccessful. This means fewer rows than requested may end up being populated on that table.
225
+
226
+ Alternatively, you can choose to raise an error if a unique row cannot be generated. You can also set the number of retries to 0 to not retry at all. If the table in question is a primary target for your testing and will be loaded with a lot of data, you will likely not want to have retries since it could potentially slow down data generation significantly.
227
+
228
+ Here is how to adjust these settings. Here let's assyme that `daily_notes` has a unique index on both `date` and `person_id`:
229
+
230
+ ```ruby
231
+ class Person < ApplicationRecord
232
+ end
233
+
234
+ class DailyNotes < ApplicationRecord
235
+ belongs_to :person
236
+ end
237
+
238
+ data_loader = ActiveRecordDataLoader.define do
239
+ model Person do |m|
240
+ m.count 500
241
+ end
242
+
243
+ model DailyNotes do |m|
244
+ m.count 10_000
245
+ m.max_duplicate_retries 10
246
+ m.do_not_raise_on_duplicates
247
+
248
+ m.column :date, -> { Date.today - rand(20) }
249
+ end
250
+ end
251
+
252
+ data_loader.load_data
253
+ ```
254
+
255
+ In the case above, retrying could be a reasonable choice since the date is generated at random and it's a small number of rows being generated.
256
+
257
+ If you want to disable retrying duplicates altogether and raise an error to fail fast you can specify it like this:
258
+
259
+ ```ruby
260
+ class Person < ApplicationRecord
261
+ end
262
+
263
+ class Skill < ApplicationRecord
264
+ end
265
+
266
+ class SkillRating < ApplicationRecord
267
+ belongs_to :person
268
+ belongs_to :skill
269
+ end
270
+
271
+ data_loader = ActiveRecordDataLoader.define do
272
+ model Person do |m|
273
+ m.count 100_000
274
+ end
275
+
276
+ model Skill do |m|
277
+ m.count 100
278
+ end
279
+
280
+ model SkillRating do |m|
281
+ m.count 10_000_000
282
+ m.max_duplicate_retries 0
283
+ m.raise_on_duplicates
284
+
285
+ m.column :rating, -> { rand(1..10) }
286
+ end
287
+ end
288
+
289
+ data_loader.load_data
290
+ ```
291
+
292
+
293
+ ### Configuration options
294
+
295
+ You can define global configuration options like this:
296
+
297
+ ```ruby
298
+ ActiveRecordDataLoader.configure do |c|
299
+ c.logger = ActiveSupport::Logger.new("my_file.log", level: :debug)
300
+ c.statement_timeout = "5min"
301
+ end
302
+ ```
303
+
304
+ Or you can create a configuration object for the specific data loader instance rather than globally:
305
+
306
+ ```ruby
307
+ config = ActiveRecordDataLoader::Configuration.new(
308
+ c.logger = ActiveSupport::Logger.new("my_file.log", level: :debug)
309
+ c.statement_timeout = "5min"
310
+ )
311
+ loader = ActiveRecordDataLoader.define(config) do
312
+ model Company do |m|
313
+ m.count 10
314
+ end
315
+
316
+ # ... more definitions
317
+ end
318
+ ```
319
+
320
+ #### statement_timeout
321
+
322
+ This is currently only used for Postgres connections to adjust the `statement_timeout` value for the connection. The default is `2min`. Depending on the size of the batches you are loading and overall size of the tables you may need to increase this value:
323
+
324
+ ```ruby
325
+ ActiveRecordDataLoader.configure do |c|
326
+ c.statement_timeout = "5min"
327
+ end
328
+ ```
329
+
330
+ #### connection_factory
331
+
332
+ The `connection_factory` option accepts a lambda that should return a connection object whenever executed. If not specified, the default behavior is to retrieve a connection using `ActiveRecord::Base.connection`. You can configure it like this:
333
+
334
+ ```ruby
335
+ ActiveRecordDataLoader.configure do |c|
336
+ c.connection_factory = -> { MyCustomConnectionHandler.open_connection }
337
+ end
338
+ ```
339
+
340
+ #### output
341
+
342
+ The `output` option accepts an optional file name to write a SQL script with the data loading statements. This script file can then be executed manually to load the data. This can be helpful if you need to load the same data multiple times. For example if you are profiling different alternatives in your code and you want to see how each performs with a fully loaded database. In that case you would want to have the same data starting point for each alternative you evaluate. By generating the script file, it would be significantly faster to load that data over and over by executing the existing script.
343
+
344
+ If `output` is nil or empty, no script file will be written.
345
+
346
+ Example usage:
347
+
348
+ ```ruby
349
+ ActiveRecordDataLoader.configure do |c|
350
+ c.output = "./my_script.sql" # Outputs to the provided file
351
+ end
352
+ ```
353
+
354
+ When using an output script file with Postgres, the resulting script will have `\COPY` commands which reference CSV files that contain the data batches to be copied. The CSV files will be created along side the SQL script and will have a naming convention of using the table name and the rows range for the given batch. For example `./my_script_customers_1_to_1000.csv`. Each `\COPY` command in the SQL file will reference the corresponding CSV file so all you need to do is execute the SQL file using `psql`:
355
+
356
+ ```bash
357
+ psql -h my-db-host -U my_user -f my_script.sql
358
+ ```
359
+
207
360
  ## Development
208
361
 
209
362
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -220,4 +373,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
220
373
 
221
374
  ## Code of Conduct
222
375
 
223
- Everyone interacting in the ActiveRecord Data Loader project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/abeiderman/active_record_data_loader/blob/master/CODE_OF_CONDUCT.md).
376
+ Everyone interacting in the _active_record_data_loader_ project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/abeiderman/active_record_data_loader/blob/master/CODE_OF_CONDUCT.md).
data/Rakefile CHANGED
@@ -3,10 +3,16 @@
3
3
  require "bundler/gem_tasks"
4
4
  require "rspec/core/rake_task"
5
5
  require "rubocop/rake_task"
6
- require "coveralls/rake/task"
7
6
 
8
7
  RSpec::Core::RakeTask.new(:spec)
9
8
  RuboCop::RakeTask.new(:rubocop)
10
- Coveralls::RakeTask.new
11
9
 
12
10
  task default: [:spec, :rubocop]
11
+
12
+ task :wait_for_test_db do
13
+ require "active_record_data_loader"
14
+ require "./spec/active_record_helper"
15
+
16
+ ActiveRecordHelper.wait_for_mysql
17
+ ActiveRecordHelper.wait_for_postgres
18
+ end
@@ -8,7 +8,7 @@ Gem::Specification.new do |spec|
8
8
  spec.name = "active_record_data_loader"
9
9
  spec.version = ActiveRecordDataLoader::VERSION
10
10
  spec.authors = ["Alejandro Beiderman"]
11
- spec.email = ["abeiderman@gmail.com"]
11
+ spec.email = ["active_record_data_loader@ossprojects.dev"]
12
12
 
13
13
  spec.summary = "A utility to bulk load test data for performance testing."
14
14
  spec.description = "A utility to bulk load test data for performance testing."
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
20
20
  spec.metadata["source_code_uri"] = "https://github.com/abeiderman/active_record_data_loader"
21
21
  else
22
22
  raise "RubyGems 2.0 or newer is required to protect against " \
23
- "public gem pushes."
23
+ "public gem pushes."
24
24
  end
25
25
 
26
26
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
@@ -30,20 +30,21 @@ Gem::Specification.new do |spec|
30
30
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
31
31
  spec.require_paths = ["lib"]
32
32
 
33
- spec.required_ruby_version = ">= 2.3.0"
33
+ spec.required_ruby_version = ">= 2.5.0"
34
34
 
35
- spec.add_dependency "activerecord", ">= 4.0"
35
+ spec.add_dependency "activerecord", ">= 5.0"
36
36
 
37
37
  spec.add_development_dependency "appraisal"
38
38
  spec.add_development_dependency "bundler", ">= 1.16"
39
- spec.add_development_dependency "coveralls"
40
39
  spec.add_development_dependency "mysql2"
41
40
  spec.add_development_dependency "pg"
42
41
  spec.add_development_dependency "pry"
43
- spec.add_development_dependency "rake", "~> 12.0"
42
+ spec.add_development_dependency "rake", "~> 13.0"
44
43
  spec.add_development_dependency "rspec", "~> 3.0"
45
44
  spec.add_development_dependency "rspec-collection_matchers"
46
45
  spec.add_development_dependency "rubocop"
46
+ spec.add_development_dependency "simplecov"
47
+ spec.add_development_dependency "simplecov-lcov"
47
48
  spec.add_development_dependency "sqlite3"
48
49
  spec.add_development_dependency "timecop"
49
50
  end
data/config/database.yml CHANGED
@@ -1,6 +1,7 @@
1
1
  postgres:
2
2
  adapter: "postgresql"
3
3
  host: "127.0.0.1"
4
+ port: "2345"
4
5
  database: "test"
5
6
  username: "test"
6
7
  password: "test"
@@ -12,6 +13,7 @@ sqlite3:
12
13
  mysql:
13
14
  adapter: "mysql2"
14
15
  host: "127.0.0.1"
16
+ port: "3306"
15
17
  database: "test"
16
18
  username: "test"
17
19
  password: "test"
@@ -0,0 +1,18 @@
1
+ version: "3.9"
2
+ services:
3
+ postgres:
4
+ image: postgres:11
5
+ ports:
6
+ - "2345:5432"
7
+ environment:
8
+ - POSTGRES_USER=test
9
+ - POSTGRES_PASSWORD=test
10
+ mysql:
11
+ image: mysql:5
12
+ ports:
13
+ - "3306:3306"
14
+ environment:
15
+ - MYSQL_ROOT_PASSWORD=test
16
+ - MYSQL_USER=test
17
+ - MYSQL_PASSWORD=test
18
+ - MYSQL_DATABASE=test
@@ -2,6 +2,6 @@
2
2
 
3
3
  source "https://rubygems.org"
4
4
 
5
- gem "activerecord", "6.0.0.rc1"
5
+ gem "activerecord", "~>6.1"
6
6
 
7
7
  gemspec path: "../"
@@ -2,30 +2,31 @@
2
2
 
3
3
  module ActiveRecordDataLoader
4
4
  module ActiveRecord
5
- class BelongsToConfiguration
6
- def self.config_for(ar_association:, query: nil)
5
+ class BelongsToDataProvider
6
+ def self.provider_for(ar_association:, query: nil, strategy: :random)
7
7
  raise "#{name} does not support polymorphic associations" if ar_association.polymorphic?
8
8
 
9
- { ar_association.join_foreign_key.to_sym => new(ar_association, query).foreign_key_func }
9
+ { ar_association.join_foreign_key.to_sym => new(ar_association, query, strategy).foreign_key_func }
10
10
  end
11
11
 
12
- def initialize(ar_association, query)
12
+ def initialize(ar_association, query, strategy)
13
13
  @ar_association = ar_association
14
14
  @query = query
15
+ @strategy = strategy
15
16
  end
16
17
 
17
18
  def foreign_key_func
18
- -> { possible_values.sample }
19
+ -> { possible_values.next }
19
20
  end
20
21
 
21
22
  private
22
23
 
23
24
  def possible_values
24
- @possible_values ||= base_query.pluck(@ar_association.join_primary_key).to_a
25
+ @possible_values ||= List.for(base_query.pluck(@ar_association.join_primary_key), strategy: @strategy)
25
26
  end
26
27
 
27
28
  def base_query
28
- if @query&.respond_to?(:call)
29
+ if @query.respond_to?(:call)
29
30
  @query.call.all
30
31
  else
31
32
  @ar_association.klass.all
@@ -2,7 +2,7 @@
2
2
 
3
3
  module ActiveRecordDataLoader
4
4
  module ActiveRecord
5
- class ColumnConfiguration
5
+ class ColumnDataProvider
6
6
  class << self
7
7
  VALUE_GENERATORS = {
8
8
  enum: EnumValueGenerator,
@@ -12,7 +12,7 @@ module ActiveRecordDataLoader
12
12
  datetime: DatetimeValueGenerator,
13
13
  }.freeze
14
14
 
15
- def config_for(model_class:, ar_column:, connection_factory:)
15
+ def provider_for(model_class:, ar_column:, connection_factory:)
16
16
  raise_error_if_not_supported(model_class, ar_column)
17
17
 
18
18
  {
@@ -5,34 +5,35 @@ module ActiveRecordDataLoader
5
5
  class EnumValueGenerator
6
6
  class << self
7
7
  def generator_for(model_class:, ar_column:, connection_factory:)
8
- values = enum_values_for(model_class, ar_column.sql_type, connection_factory)
8
+ values = enum_values_for(ar_column.sql_type, connection_factory)
9
9
  -> { values.sample }
10
10
  end
11
11
 
12
12
  private
13
13
 
14
- def enum_values_for(model_class, enum_type, connection_factory)
14
+ def enum_values_for(enum_type, connection_factory)
15
15
  connection = connection_factory.call
16
16
 
17
17
  if connection.adapter_name.downcase.to_sym == :postgresql
18
- postgres_enum_values_for(model_class, enum_type)
18
+ postgres_enum_values_for(connection, enum_type)
19
19
  elsif connection.adapter_name.downcase.to_s.start_with?("mysql")
20
- mysql_enum_values_for(model_class, enum_type)
20
+ mysql_enum_values_for(enum_type)
21
21
  else
22
22
  []
23
23
  end
24
+ ensure
25
+ connection&.close
24
26
  end
25
27
 
26
- def postgres_enum_values_for(model_class, enum_type)
27
- model_class
28
- .connection
28
+ def postgres_enum_values_for(connection, enum_type)
29
+ connection
29
30
  .execute("SELECT unnest(enum_range(NULL::#{enum_type}))::text")
30
31
  .map(&:values)
31
32
  .flatten
32
33
  .compact
33
34
  end
34
35
 
35
- def mysql_enum_values_for(_model_class, enum_type)
36
+ def mysql_enum_values_for(enum_type)
36
37
  enum_type
37
38
  .to_s
38
39
  .downcase
@@ -5,7 +5,7 @@ module ActiveRecordDataLoader
5
5
  class IntegerValueGenerator
6
6
  class << self
7
7
  def generator_for(model_class:, ar_column:, connection_factory: nil)
8
- range_limit = [(256**number_of_bytes(ar_column)) / 2 - 1, 1_000_000_000].min
8
+ range_limit = [((256**number_of_bytes(ar_column)) / 2) - 1, 1_000_000_000].min
9
9
 
10
10
  -> { rand(0..range_limit) }
11
11
  end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordDataLoader
4
+ module ActiveRecord
5
+ class List
6
+ def self.for(enumerable, strategy: :random)
7
+ if strategy == :random_cycle
8
+ RandomCycle.new(enumerable)
9
+ else
10
+ Random.new(enumerable)
11
+ end
12
+ end
13
+
14
+ class Random
15
+ def initialize(enumerable)
16
+ @list = enumerable
17
+ end
18
+
19
+ def next
20
+ @list.sample
21
+ end
22
+ end
23
+
24
+ class RandomCycle
25
+ def initialize(enumerable)
26
+ @enumerable = enumerable
27
+ @count = enumerable.count
28
+ reset_list
29
+ end
30
+
31
+ def next
32
+ value = @list.next
33
+ reset_list if (@index += 1) >= @count
34
+ value
35
+ end
36
+
37
+ private
38
+
39
+ def reset_list
40
+ @index = 0
41
+ @enumerable = @enumerable.shuffle
42
+ @list = @enumerable.cycle
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -8,9 +8,12 @@ module ActiveRecordDataLoader
8
8
  def initialize(
9
9
  model:,
10
10
  column_settings:,
11
+ connection_factory:,
12
+ logger:,
13
+ raise_on_duplicates:,
14
+ max_duplicate_retries:,
11
15
  polymorphic_settings: [],
12
- belongs_to_settings: [],
13
- connection_factory:
16
+ belongs_to_settings: []
14
17
  )
15
18
  @model_class = model
16
19
  @table = model.table_name
@@ -18,6 +21,11 @@ module ActiveRecordDataLoader
18
21
  @polymorphic_settings = polymorphic_settings
19
22
  @belongs_to_settings = belongs_to_settings.map { |s| [s.name, s.query] }.to_h
20
23
  @connection_factory = connection_factory
24
+ @raise_on_duplicates = raise_on_duplicates
25
+ @max_duplicate_retries = max_duplicate_retries
26
+ @logger = logger
27
+ @index_tracker = UniqueIndexTracker.new(model: model, connection_factory: connection_factory)
28
+ @index_tracker.map_indexed_columns(column_list)
21
29
  end
22
30
 
23
31
  def column_list
@@ -25,11 +33,41 @@ module ActiveRecordDataLoader
25
33
  end
26
34
 
27
35
  def generate_row(row_number)
28
- column_list.map { |c| column_data(row_number, c) }
36
+ @index_tracker.capture_unique_values(generate_row_with_retries(row_number))
29
37
  end
30
38
 
31
39
  private
32
40
 
41
+ def generate_row_with_retries(row_number)
42
+ retries = 0
43
+ while @index_tracker.repeating_unique_values?(row = generate_candidate_row(row_number))
44
+ if (retries += 1) > @max_duplicate_retries
45
+ raise DuplicateKeyError, <<~MSG if @raise_on_duplicates
46
+ Exhausted retries looking for unique values for row #{row_number} for '#{table}'.
47
+ Table '#{table}' has unique indexes that would have prevented inserting this row. If you would
48
+ like to skip non-unique rows instead of raising, configure `raise_on_duplicates` to be `false`.
49
+ MSG
50
+
51
+ @logger.warn(
52
+ "[ActiveRecordDataLoader] "\
53
+ "Exhausted retries looking for unique values. Skipping row #{row_number} for '#{table}'."
54
+ )
55
+ return nil
56
+ else
57
+ @logger.info(
58
+ "[ActiveRecordDataLoader] "\
59
+ "Retrying row #{row_number} for '#{table}' looking for unique values compliant with indexes. "\
60
+ "Retry number #{retries}."
61
+ )
62
+ end
63
+ end
64
+ row
65
+ end
66
+
67
+ def generate_candidate_row(row_number)
68
+ column_list.map { |c| column_data(row_number, c) }
69
+ end
70
+
33
71
  def column_data(row_number, column)
34
72
  column_value = columns[column]
35
73
  return column_value unless column_value.respond_to?(:call)
@@ -56,9 +94,9 @@ module ActiveRecordDataLoader
56
94
  @model_class
57
95
  .columns_hash
58
96
  .reject { |name| name == @model_class.primary_key }
59
- .select { |_, c| ColumnConfiguration.supported?(model_class: @model_class, ar_column: c) }
97
+ .select { |_, c| ColumnDataProvider.supported?(model_class: @model_class, ar_column: c) }
60
98
  .map do |_, c|
61
- ColumnConfiguration.config_for(
99
+ ColumnDataProvider.provider_for(
62
100
  model_class: @model_class,
63
101
  ar_column: c,
64
102
  connection_factory: @connection_factory
@@ -73,16 +111,33 @@ module ActiveRecordDataLoader
73
111
  .select(&:belongs_to?)
74
112
  .reject(&:polymorphic?)
75
113
  .map do |assoc|
76
- BelongsToConfiguration.config_for(ar_association: assoc, query: @belongs_to_settings[assoc.name])
114
+ BelongsToDataProvider.provider_for(
115
+ ar_association: assoc,
116
+ query: @belongs_to_settings[assoc.name],
117
+ strategy: column_config_strategy(assoc)
118
+ )
77
119
  end
78
120
  .reduce({}, :merge)
79
121
  end
80
122
 
81
123
  def polymorphic_config
82
124
  @polymorphic_settings
83
- .map { |s| PolymorphicBelongsToConfiguration.config_for(polymorphic_settings: s) }
125
+ .map do |s|
126
+ PolymorphicBelongsToDataProvider.provider_for(
127
+ polymorphic_settings: s,
128
+ strategy: column_config_strategy(s.model_class.reflect_on_association(s.name))
129
+ )
130
+ end
84
131
  .reduce({}, :merge)
85
132
  end
133
+
134
+ def column_config_strategy(column)
135
+ if @index_tracker.contained_in_index?(column)
136
+ :random_cycle
137
+ else
138
+ :random
139
+ end
140
+ end
86
141
  end
87
142
  end
88
143
  end