active_record_data_loader 1.2.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (29) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/codeql-analysis.yml +70 -0
  3. data/.rubocop.yml +8 -2
  4. data/CHANGELOG.md +9 -0
  5. data/CODE_OF_CONDUCT.md +2 -2
  6. data/Gemfile.lock +24 -24
  7. data/README.md +88 -18
  8. data/active_record_data_loader.gemspec +1 -1
  9. data/lib/active_record_data_loader/active_record/{belongs_to_configuration.rb → belongs_to_data_provider.rb} +7 -6
  10. data/lib/active_record_data_loader/active_record/{column_configuration.rb → column_data_provider.rb} +2 -2
  11. data/lib/active_record_data_loader/active_record/list.rb +35 -0
  12. data/lib/active_record_data_loader/active_record/model_data_generator.rb +60 -5
  13. data/lib/active_record_data_loader/active_record/{polymorphic_belongs_to_configuration.rb → polymorphic_belongs_to_data_provider.rb} +11 -6
  14. data/lib/active_record_data_loader/active_record/unique_index_tracker.rb +67 -0
  15. data/lib/active_record_data_loader/bulk_insert_strategy.rb +16 -9
  16. data/lib/active_record_data_loader/configuration.rb +13 -30
  17. data/lib/active_record_data_loader/connection_handler.rb +23 -45
  18. data/lib/active_record_data_loader/copy_strategy.rb +21 -7
  19. data/lib/active_record_data_loader/data_faker.rb +12 -4
  20. data/lib/active_record_data_loader/dsl/model.rb +19 -2
  21. data/lib/active_record_data_loader/errors.rb +5 -0
  22. data/lib/active_record_data_loader/file_output_adapter.rb +20 -12
  23. data/lib/active_record_data_loader/loader.rb +61 -55
  24. data/lib/active_record_data_loader/null_output_adapter.rb +15 -0
  25. data/lib/active_record_data_loader/table_loader.rb +59 -0
  26. data/lib/active_record_data_loader/version.rb +1 -1
  27. data/lib/active_record_data_loader.rb +9 -41
  28. metadata +12 -7
  29. data/lib/active_record_data_loader/connection_output_adapter.rb +0 -20
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8ae60492b342e71d8a1a2e20aff31794af784f1009346f4d5dc4375dd939bb48
4
- data.tar.gz: ab5be5bdf1cb5af69f5f8a1ac65ea8f64ee1338398c7f6cae6e15e6276d2e954
3
+ metadata.gz: 19d357e3af1a3864a020996df4da4d1d4d710dccd15e50cc10c30f0498acb64b
4
+ data.tar.gz: 7a706dcaa17777c1fa93090212385fcd18742d84099663d9555e54452c046595
5
5
  SHA512:
6
- metadata.gz: 8b860a06433a0661a765428f37df596ac0b3fd46ff447fbf3edfa5cf50f89dcda0f146b25074959ca5aee792dff153a56eb37888075ae10649d6cde1e598c53d
7
- data.tar.gz: dc9b43011f8c1b8c7b663b650b5ba11e43b9dd040a8ef6b7a58b24c7da903b1c963104a06c39f45e5ebaa4f01e815d8d6d565ded2fd9b5a186dcd50ed69b2e7c
6
+ metadata.gz: 6b541ceb75b06c6152184ae4f9d66e6a1268a8a758c23ce959fb608a78a7860c8f17596fd90b5a9bbadf0853c2d2009b8b68304e6f852d4eed2a3e5ddd1b59d1
7
+ data.tar.gz: 509fce080286cae5770bbe6b42ace34b34f1804c8834114d7c0fbbd2e0323b2c785fde4188de6d7221666fd6489eb8229a76c47fea90cddcd96bf3628260c5e8
@@ -0,0 +1,70 @@
1
+ # For most projects, this workflow file will not need changing; you simply need
2
+ # to commit it to your repository.
3
+ #
4
+ # You may wish to alter this file to override the set of languages analyzed,
5
+ # or to provide custom queries or build logic.
6
+ #
7
+ # ******** NOTE ********
8
+ # We have attempted to detect the languages in your repository. Please check
9
+ # the `language` matrix defined below to confirm you have the correct set of
10
+ # supported CodeQL languages.
11
+ #
12
+ name: "CodeQL"
13
+
14
+ on:
15
+ push:
16
+ branches: [ master ]
17
+ pull_request:
18
+ # The branches below must be a subset of the branches above
19
+ branches: [ master ]
20
+ schedule:
21
+ - cron: '26 13 * * 4'
22
+
23
+ jobs:
24
+ analyze:
25
+ name: Analyze
26
+ runs-on: ubuntu-latest
27
+ permissions:
28
+ actions: read
29
+ contents: read
30
+ security-events: write
31
+
32
+ strategy:
33
+ fail-fast: false
34
+ matrix:
35
+ language: [ 'ruby' ]
36
+ # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
37
+ # Learn more about CodeQL language support at https://git.io/codeql-language-support
38
+
39
+ steps:
40
+ - name: Checkout repository
41
+ uses: actions/checkout@v2
42
+
43
+ # Initializes the CodeQL tools for scanning.
44
+ - name: Initialize CodeQL
45
+ uses: github/codeql-action/init@v1
46
+ with:
47
+ languages: ${{ matrix.language }}
48
+ # If you wish to specify custom queries, you can do so here or in a config file.
49
+ # By default, queries listed here will override any specified in a config file.
50
+ # Prefix the list here with "+" to use these queries and those in the config file.
51
+ # queries: ./path/to/local/query, your-org/your-repo/queries@main
52
+
53
+ # Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
54
+ # If this step fails, then you should remove it and run the build manually (see below)
55
+ - name: Autobuild
56
+ uses: github/codeql-action/autobuild@v1
57
+
58
+ # ℹ️ Command-line programs to run using the OS shell.
59
+ # 📚 https://git.io/JvXDl
60
+
61
+ # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
62
+ # and modify them (or add more) to build your code if your project
63
+ # uses a compiled language
64
+
65
+ #- run: |
66
+ # make bootstrap
67
+ # make release
68
+
69
+ - name: Perform CodeQL Analysis
70
+ uses: github/codeql-action/analyze@v1
data/.rubocop.yml CHANGED
@@ -3,6 +3,9 @@ AllCops:
3
3
  NewCops: enable
4
4
  SuggestExtensions: false
5
5
 
6
+ Gemspec/RequireMFA:
7
+ Enabled: false
8
+
6
9
  Layout/LineLength:
7
10
  Max: 110
8
11
  Exclude: ["*.gemspec"]
@@ -27,15 +30,18 @@ Metrics/BlockLength:
27
30
  Exclude: ["spec/**/*", "*.gemspec"]
28
31
 
29
32
  Metrics/ClassLength:
33
+ Max: 150
30
34
  Exclude: ["spec/**/*", "*.gemspec"]
31
35
 
32
36
  Metrics/MethodLength:
33
- Max: 15
37
+ Max: 25
34
38
  Exclude: ["spec/**/*"]
35
39
 
36
40
  Metrics/ParameterLists:
37
41
  Max: 5
38
- Exclude: ["lib/active_record_data_loader/configuration.rb"]
42
+ Exclude:
43
+ - "lib/active_record_data_loader/configuration.rb"
44
+ - "lib/active_record_data_loader/active_record/model_data_generator.rb"
39
45
 
40
46
  Style/CaseLikeIf:
41
47
  Enabled: false
data/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # Change log
2
2
 
3
+ ## [v1.3.0] - 2021-12-10
4
+
5
+ [Diff](https://github.com/abeiderman/active_record_data_loader/compare/v1.2.0...v1.3.0)
6
+
7
+ ### Changes:
8
+ * Replace the `:file` output option with simply accepting an optional file path as `output`. A SQL script file will be generated in addition to loading the data into the database.
9
+ * Identify and handle unique indexes by attempting to generate unique values. Add configuration options for behavior around duplicate rows.
10
+
3
11
  ## [v1.2.0] - 2021-11-14
4
12
 
5
13
  [Diff](https://github.com/abeiderman/active_record_data_loader/compare/v1.1.0...v1.2.0)
@@ -40,3 +48,4 @@ Initial stable release
40
48
  [v1.0.2]: https://github.com/abeiderman/active_record_data_loader/releases/tag/v1.0.2
41
49
  [v1.1.0]: https://github.com/abeiderman/active_record_data_loader/releases/tag/v1.1.0
42
50
  [v1.2.0]: https://github.com/abeiderman/active_record_data_loader/releases/tag/v1.2.0
51
+ [v1.3.0]: https://github.com/abeiderman/active_record_data_loader/releases/tag/v1.3.0
data/CODE_OF_CONDUCT.md CHANGED
@@ -55,8 +55,8 @@ further defined and clarified by project maintainers.
55
55
  ## Enforcement
56
56
 
57
57
  Instances of abusive, harassing, or otherwise unacceptable behavior may be
58
- reported by contacting the project team at abeiderman@gmail.com. All
59
- complaints will be reviewed and investigated and will result in a response that
58
+ reported by contacting the project team at active_record_data_loader@ossprojects.dev.
59
+ All complaints will be reviewed and investigated and will result in a response that
60
60
  is deemed necessary and appropriate to the circumstances. The project team is
61
61
  obligated to maintain confidentiality with regard to the reporter of an incident.
62
62
  Further details of specific enforcement policies may be posted separately.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- active_record_data_loader (1.2.0)
4
+ active_record_data_loader (1.3.0)
5
5
  activerecord (>= 5.0)
6
6
 
7
7
  GEM
@@ -18,14 +18,14 @@ GEM
18
18
  minitest (>= 5.1)
19
19
  tzinfo (~> 2.0)
20
20
  zeitwerk (~> 2.3)
21
- appraisal (2.2.0)
21
+ appraisal (2.4.1)
22
22
  bundler
23
23
  rake
24
24
  thor (>= 0.14.0)
25
25
  ast (2.4.2)
26
- coderay (1.1.2)
26
+ coderay (1.1.3)
27
27
  concurrent-ruby (1.1.9)
28
- diff-lcs (1.3)
28
+ diff-lcs (1.4.4)
29
29
  docile (1.4.0)
30
30
  i18n (1.8.11)
31
31
  concurrent-ruby (~> 1.0)
@@ -33,32 +33,32 @@ GEM
33
33
  minitest (5.14.4)
34
34
  mysql2 (0.5.3)
35
35
  parallel (1.21.0)
36
- parser (3.0.2.0)
36
+ parser (3.0.3.2)
37
37
  ast (~> 2.4.1)
38
38
  pg (1.2.3)
39
- pry (0.13.1)
39
+ pry (0.14.1)
40
40
  coderay (~> 1.1)
41
41
  method_source (~> 1.0)
42
42
  rainbow (3.0.0)
43
- rake (13.0.1)
44
- regexp_parser (2.1.1)
43
+ rake (13.0.6)
44
+ regexp_parser (2.2.0)
45
45
  rexml (3.2.5)
46
- rspec (3.9.0)
47
- rspec-core (~> 3.9.0)
48
- rspec-expectations (~> 3.9.0)
49
- rspec-mocks (~> 3.9.0)
46
+ rspec (3.10.0)
47
+ rspec-core (~> 3.10.0)
48
+ rspec-expectations (~> 3.10.0)
49
+ rspec-mocks (~> 3.10.0)
50
50
  rspec-collection_matchers (1.2.0)
51
51
  rspec-expectations (>= 2.99.0.beta1)
52
- rspec-core (3.9.2)
53
- rspec-support (~> 3.9.3)
54
- rspec-expectations (3.9.1)
52
+ rspec-core (3.10.1)
53
+ rspec-support (~> 3.10.0)
54
+ rspec-expectations (3.10.1)
55
55
  diff-lcs (>= 1.2.0, < 2.0)
56
- rspec-support (~> 3.9.0)
57
- rspec-mocks (3.9.1)
56
+ rspec-support (~> 3.10.0)
57
+ rspec-mocks (3.10.2)
58
58
  diff-lcs (>= 1.2.0, < 2.0)
59
- rspec-support (~> 3.9.0)
60
- rspec-support (3.9.3)
61
- rubocop (1.22.3)
59
+ rspec-support (~> 3.10.0)
60
+ rspec-support (3.10.3)
61
+ rubocop (1.23.0)
62
62
  parallel (~> 1.10)
63
63
  parser (>= 3.0.0.0)
64
64
  rainbow (>= 2.2.2, < 4.0)
@@ -67,7 +67,7 @@ GEM
67
67
  rubocop-ast (>= 1.12.0, < 2.0)
68
68
  ruby-progressbar (~> 1.7)
69
69
  unicode-display_width (>= 1.4.0, < 3.0)
70
- rubocop-ast (1.13.0)
70
+ rubocop-ast (1.14.0)
71
71
  parser (>= 3.0.1.1)
72
72
  ruby-progressbar (1.11.0)
73
73
  simplecov (0.21.2)
@@ -77,9 +77,9 @@ GEM
77
77
  simplecov-html (0.12.3)
78
78
  simplecov-lcov (0.8.0)
79
79
  simplecov_json_formatter (0.1.3)
80
- sqlite3 (1.4.1)
81
- thor (0.20.3)
82
- timecop (0.9.1)
80
+ sqlite3 (1.4.2)
81
+ thor (1.1.0)
82
+ timecop (0.9.4)
83
83
  tzinfo (2.0.4)
84
84
  concurrent-ruby (~> 1.0)
85
85
  unicode-display_width (2.1.0)
data/README.md CHANGED
@@ -41,6 +41,7 @@ Polymorphic associations need to be defined explicitly as shown in [Polymorphic
41
41
  ### Basic usage
42
42
 
43
43
  Let's say you have the following models:
44
+
44
45
  ```ruby
45
46
  class Customer < ApplicationRecord
46
47
  end
@@ -51,6 +52,7 @@ end
51
52
  ```
52
53
 
53
54
  The following code will create 10,000 customers and 100,000 orders, and will associate the orders to those customers evenly:
55
+
54
56
  ```ruby
55
57
  data_loader = ActiveRecordDataLoader.define do
56
58
  model Customer do |m|
@@ -67,6 +69,7 @@ data_loader.load_data
67
69
 
68
70
  #### Overriding column values
69
71
  To provide your own values for columns your can provide a lambda or a constant value:
72
+
70
73
  ```ruby
71
74
  data_loader = ActiveRecordDataLoader.define do
72
75
  model Customer do |m|
@@ -91,7 +94,7 @@ In this example, we are creating 25K orders for customers in CAN with a CAD curr
91
94
  data_loader = ActiveRecordDataLoader.define do
92
95
  model Customer do |m|
93
96
  m.count 10_000
94
- m.column :country, -> { %w[CAN MXN USA].sample }
97
+ m.column :country, -> { %w[CAN MEX USA].sample }
95
98
  end
96
99
 
97
100
  model Order do |m|
@@ -121,6 +124,7 @@ data_loader.load_data
121
124
  If you have a polymorphic `belongs_to` association, you will need to define that explicitly for it to be populated.
122
125
 
123
126
  Let's assume the following models where an order could belong to either a person or a business:
127
+
124
128
  ```ruby
125
129
  class Person < ApplicationRecord
126
130
  has_many :orders
@@ -136,6 +140,7 @@ end
136
140
  ```
137
141
 
138
142
  In order to populate the `customer` association in orders, you would specify them like this:
143
+
139
144
  ```ruby
140
145
  data_loader = ActiveRecordDataLoader.define do
141
146
  model Person do |m|
@@ -160,6 +165,7 @@ data_loader.load_data
160
165
  ```
161
166
 
162
167
  You can also provide a `weight` to each of the target models if you want to control how they are distributed. If you wanted to have twice as many orders for `Person` than for `Business`, it would look like this:
168
+
163
169
  ```ruby
164
170
  data_loader = ActiveRecordDataLoader.define do
165
171
  model Person do |m|
@@ -184,6 +190,7 @@ data_loader.load_data
184
190
  ```
185
191
 
186
192
  Additionaly, you can also provide an `eligible_set` to control which records to limit the association to:
193
+
187
194
  ```ruby
188
195
  data_loader = ActiveRecordDataLoader.define do
189
196
  model Person do |m|
@@ -208,6 +215,81 @@ end
208
215
  data_loader.load_data
209
216
  ```
210
217
 
218
+ ### Unique indexes
219
+
220
+ Unique indexes will be detected automatically and the data generator will attempt to generate unique values for each row. The generator keeps track of unique values previously generated and retries rows with repeating values. Because some columns could be generating random values, retrying can eventually be successful.
221
+
222
+ There are a couple of behaviors you can control regarding preventing duplicates. The first is the number of times to retry a given row with duplicate values (that would fail the unique index/constraint). The second is what to do if a unique value cannot be generated after the retries are exhausted.
223
+
224
+ By default, there will be 5 retries per row and the row will be skipped after all retries are unsuccessful. This means fewer rows than requested may end up being populated on that table.
225
+
226
+ Alternatively, you can choose to raise an error if a unique row cannot be generated. You can also set the number of retries to 0 to not retry at all. If the table in question is a primary target for your testing and will be loaded with a lot of data, you will likely not want to have retries since it could potentially slow down data generation significantly.
227
+
228
+ Here is how to adjust these settings. Here let's assyme that `daily_notes` has a unique index on both `date` and `person_id`:
229
+
230
+ ```ruby
231
+ class Person < ApplicationRecord
232
+ end
233
+
234
+ class DailyNotes < ApplicationRecord
235
+ belongs_to :person
236
+ end
237
+
238
+ data_loader = ActiveRecordDataLoader.define do
239
+ model Person do |m|
240
+ m.count 500
241
+ end
242
+
243
+ model DailyNotes do |m|
244
+ m.count 10_000
245
+ m.max_duplicate_retries 10
246
+ m.do_not_raise_on_duplicates
247
+
248
+ m.column :date, -> { Date.today - rand(20) }
249
+ end
250
+ end
251
+
252
+ data_loader.load_data
253
+ ```
254
+
255
+ In the case above, retrying could be a reasonable choice since the date is generated at random and it's a small number of rows being generated.
256
+
257
+ If you want to disable retrying duplicates altogether and raise an error to fail fast you can specify it like this:
258
+
259
+ ```ruby
260
+ class Person < ApplicationRecord
261
+ end
262
+
263
+ class Skill < ApplicationRecord
264
+ end
265
+
266
+ class SkillRating < ApplicationRecord
267
+ belongs_to :person
268
+ belongs_to :skill
269
+ end
270
+
271
+ data_loader = ActiveRecordDataLoader.define do
272
+ model Person do |m|
273
+ m.count 100_000
274
+ end
275
+
276
+ model Skill do |m|
277
+ m.count 100
278
+ end
279
+
280
+ model SkillRating do |m|
281
+ m.count 10_000_000
282
+ m.max_duplicate_retries 0
283
+ m.raise_on_duplicates
284
+
285
+ m.column :rating, -> { rand(1..10) }
286
+ end
287
+ end
288
+
289
+ data_loader.load_data
290
+ ```
291
+
292
+
211
293
  ### Configuration options
212
294
 
213
295
  You can define global configuration options like this:
@@ -257,31 +339,19 @@ end
257
339
 
258
340
  #### output
259
341
 
260
- The `output` option accepts either `:connection` or `:file`. The default behavior is `:connection` which means the data will be loaded into the database using the database connection.
261
-
262
- If `:file` is specified, instead of the data being loaded into the database, a script file will be generated. This script file can then be executed manually to load the data. This can be helpful if you need to load the same data multiple times. For example if you are profiling different alternatives in your code and you want to see how each performs with a fully loaded database. In that case you would want to have the same data starting point for each alternative you evaluate. By generating the script file ahead of time, it would be significantly faster to load that data over and over by executing the existing script.
263
-
264
- Here are some examples on how to use the `output` option:
342
+ The `output` option accepts an optional file name to write a SQL script with the data loading statements. This script file can then be executed manually to load the data. This can be helpful if you need to load the same data multiple times. For example if you are profiling different alternatives in your code and you want to see how each performs with a fully loaded database. In that case you would want to have the same data starting point for each alternative you evaluate. By generating the script file, it would be significantly faster to load that data over and over by executing the existing script.
265
343
 
266
- ```ruby
267
- ActiveRecordDataLoader.configure do |c|
268
- c.output = :connection # This is the default behavior
269
- end
270
- ```
344
+ If `output` is nil or empty, no script file will be written.
271
345
 
272
- ```ruby
273
- ActiveRecordDataLoader.configure do |c|
274
- c.output = :file # Outputs to a file with a default name
275
- end
276
- ```
346
+ Example usage:
277
347
 
278
348
  ```ruby
279
349
  ActiveRecordDataLoader.configure do |c|
280
- c.output = { type: :file, filename: "./my_script.sql" } # Outputs to the provided file
350
+ c.output = "./my_script.sql" # Outputs to the provided file
281
351
  end
282
352
  ```
283
353
 
284
- When using the `:file` type with Postgres, the resulting script will have `\COPY` commands which reference CSV files that contain the data batches to be copied. The CSV files will be created along side the SQL script and will have a naming convention of using the table name and the rows range for the given batch. For example `./my_script_customers_1_to_1000.csv`. Each `\COPY` command in the SQL file will reference the corresponding CSV file so all you need to do is execute the SQL file using `psql`:
354
+ When using an output script file with Postgres, the resulting script will have `\COPY` commands which reference CSV files that contain the data batches to be copied. The CSV files will be created along side the SQL script and will have a naming convention of using the table name and the rows range for the given batch. For example `./my_script_customers_1_to_1000.csv`. Each `\COPY` command in the SQL file will reference the corresponding CSV file so all you need to do is execute the SQL file using `psql`:
285
355
 
286
356
  ```bash
287
357
  psql -h my-db-host -U my_user -f my_script.sql
@@ -8,7 +8,7 @@ Gem::Specification.new do |spec|
8
8
  spec.name = "active_record_data_loader"
9
9
  spec.version = ActiveRecordDataLoader::VERSION
10
10
  spec.authors = ["Alejandro Beiderman"]
11
- spec.email = ["abeiderman@gmail.com"]
11
+ spec.email = ["active_record_data_loader@ossprojects.dev"]
12
12
 
13
13
  spec.summary = "A utility to bulk load test data for performance testing."
14
14
  spec.description = "A utility to bulk load test data for performance testing."
@@ -2,26 +2,27 @@
2
2
 
3
3
  module ActiveRecordDataLoader
4
4
  module ActiveRecord
5
- class BelongsToConfiguration
6
- def self.config_for(ar_association:, query: nil)
5
+ class BelongsToDataProvider
6
+ def self.provider_for(ar_association:, query: nil, strategy: :random)
7
7
  raise "#{name} does not support polymorphic associations" if ar_association.polymorphic?
8
8
 
9
- { ar_association.join_foreign_key.to_sym => new(ar_association, query).foreign_key_func }
9
+ { ar_association.join_foreign_key.to_sym => new(ar_association, query, strategy).foreign_key_func }
10
10
  end
11
11
 
12
- def initialize(ar_association, query)
12
+ def initialize(ar_association, query, strategy)
13
13
  @ar_association = ar_association
14
14
  @query = query
15
+ @strategy = strategy
15
16
  end
16
17
 
17
18
  def foreign_key_func
18
- -> { possible_values.sample }
19
+ -> { possible_values.next }
19
20
  end
20
21
 
21
22
  private
22
23
 
23
24
  def possible_values
24
- @possible_values ||= base_query.pluck(@ar_association.join_primary_key).to_a
25
+ @possible_values ||= List.for(base_query.pluck(@ar_association.join_primary_key), strategy: @strategy)
25
26
  end
26
27
 
27
28
  def base_query
@@ -2,7 +2,7 @@
2
2
 
3
3
  module ActiveRecordDataLoader
4
4
  module ActiveRecord
5
- class ColumnConfiguration
5
+ class ColumnDataProvider
6
6
  class << self
7
7
  VALUE_GENERATORS = {
8
8
  enum: EnumValueGenerator,
@@ -12,7 +12,7 @@ module ActiveRecordDataLoader
12
12
  datetime: DatetimeValueGenerator,
13
13
  }.freeze
14
14
 
15
- def config_for(model_class:, ar_column:, connection_factory:)
15
+ def provider_for(model_class:, ar_column:, connection_factory:)
16
16
  raise_error_if_not_supported(model_class, ar_column)
17
17
 
18
18
  {
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordDataLoader
4
+ module ActiveRecord
5
+ class List
6
+ def self.for(enumerable, strategy: :random)
7
+ if strategy == :cycle
8
+ Cycle.new(enumerable)
9
+ else
10
+ Random.new(enumerable)
11
+ end
12
+ end
13
+
14
+ class Random
15
+ def initialize(enumerable)
16
+ @list = enumerable
17
+ end
18
+
19
+ def next
20
+ @list.sample
21
+ end
22
+ end
23
+
24
+ class Cycle
25
+ def initialize(enumerable)
26
+ @list = enumerable.cycle
27
+ end
28
+
29
+ def next
30
+ @list.next
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -9,6 +9,9 @@ module ActiveRecordDataLoader
9
9
  model:,
10
10
  column_settings:,
11
11
  connection_factory:,
12
+ logger:,
13
+ raise_on_duplicates:,
14
+ max_duplicate_retries:,
12
15
  polymorphic_settings: [],
13
16
  belongs_to_settings: []
14
17
  )
@@ -18,6 +21,11 @@ module ActiveRecordDataLoader
18
21
  @polymorphic_settings = polymorphic_settings
19
22
  @belongs_to_settings = belongs_to_settings.map { |s| [s.name, s.query] }.to_h
20
23
  @connection_factory = connection_factory
24
+ @raise_on_duplicates = raise_on_duplicates
25
+ @max_duplicate_retries = max_duplicate_retries
26
+ @logger = logger
27
+ @index_tracker = UniqueIndexTracker.new(model: model, connection_factory: connection_factory)
28
+ @index_tracker.map_indexed_columns(column_list)
21
29
  end
22
30
 
23
31
  def column_list
@@ -25,11 +33,41 @@ module ActiveRecordDataLoader
25
33
  end
26
34
 
27
35
  def generate_row(row_number)
28
- column_list.map { |c| column_data(row_number, c) }
36
+ @index_tracker.capture_unique_values(generate_row_with_retries(row_number))
29
37
  end
30
38
 
31
39
  private
32
40
 
41
+ def generate_row_with_retries(row_number)
42
+ retries = 0
43
+ while @index_tracker.repeating_unique_values?(row = generate_candidate_row(row_number))
44
+ if (retries += 1) > @max_duplicate_retries
45
+ raise DuplicateKeyError, <<~MSG if @raise_on_duplicates
46
+ Exhausted retries looking for unique values for row #{row_number} for '#{table}'.
47
+ Table '#{table}' has unique indexes that would have prevented inserting this row. If you would
48
+ like to skip non-unique rows instead of raising, configure `raise_on_duplicates` to be `false`.
49
+ MSG
50
+
51
+ @logger.warn(
52
+ "[ActiveRecordDataLoader] "\
53
+ "Exhausted retries looking for unique values. Skipping row #{row_number} for '#{table}'."
54
+ )
55
+ return nil
56
+ else
57
+ @logger.info(
58
+ "[ActiveRecordDataLoader] "\
59
+ "Retrying row #{row_number} for '#{table}' looking for unique values compliant with indexes. "\
60
+ "Retry number #{retries}."
61
+ )
62
+ end
63
+ end
64
+ row
65
+ end
66
+
67
+ def generate_candidate_row(row_number)
68
+ column_list.map { |c| column_data(row_number, c) }
69
+ end
70
+
33
71
  def column_data(row_number, column)
34
72
  column_value = columns[column]
35
73
  return column_value unless column_value.respond_to?(:call)
@@ -56,9 +94,9 @@ module ActiveRecordDataLoader
56
94
  @model_class
57
95
  .columns_hash
58
96
  .reject { |name| name == @model_class.primary_key }
59
- .select { |_, c| ColumnConfiguration.supported?(model_class: @model_class, ar_column: c) }
97
+ .select { |_, c| ColumnDataProvider.supported?(model_class: @model_class, ar_column: c) }
60
98
  .map do |_, c|
61
- ColumnConfiguration.config_for(
99
+ ColumnDataProvider.provider_for(
62
100
  model_class: @model_class,
63
101
  ar_column: c,
64
102
  connection_factory: @connection_factory
@@ -73,16 +111,33 @@ module ActiveRecordDataLoader
73
111
  .select(&:belongs_to?)
74
112
  .reject(&:polymorphic?)
75
113
  .map do |assoc|
76
- BelongsToConfiguration.config_for(ar_association: assoc, query: @belongs_to_settings[assoc.name])
114
+ BelongsToDataProvider.provider_for(
115
+ ar_association: assoc,
116
+ query: @belongs_to_settings[assoc.name],
117
+ strategy: column_config_strategy(assoc)
118
+ )
77
119
  end
78
120
  .reduce({}, :merge)
79
121
  end
80
122
 
81
123
  def polymorphic_config
82
124
  @polymorphic_settings
83
- .map { |s| PolymorphicBelongsToConfiguration.config_for(polymorphic_settings: s) }
125
+ .map do |s|
126
+ PolymorphicBelongsToDataProvider.provider_for(
127
+ polymorphic_settings: s,
128
+ strategy: column_config_strategy(s.model_class.reflect_on_association(s.name))
129
+ )
130
+ end
84
131
  .reduce({}, :merge)
85
132
  end
133
+
134
+ def column_config_strategy(column)
135
+ if @index_tracker.contained_in_index?(column)
136
+ :cycle
137
+ else
138
+ :random
139
+ end
140
+ end
86
141
  end
87
142
  end
88
143
  end