active_record_data_loader 1.2.0 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/codeql-analysis.yml +70 -0
- data/.rubocop.yml +8 -2
- data/CHANGELOG.md +9 -0
- data/CODE_OF_CONDUCT.md +2 -2
- data/Gemfile.lock +24 -24
- data/README.md +88 -18
- data/active_record_data_loader.gemspec +1 -1
- data/lib/active_record_data_loader/active_record/{belongs_to_configuration.rb → belongs_to_data_provider.rb} +7 -6
- data/lib/active_record_data_loader/active_record/{column_configuration.rb → column_data_provider.rb} +2 -2
- data/lib/active_record_data_loader/active_record/list.rb +35 -0
- data/lib/active_record_data_loader/active_record/model_data_generator.rb +60 -5
- data/lib/active_record_data_loader/active_record/{polymorphic_belongs_to_configuration.rb → polymorphic_belongs_to_data_provider.rb} +11 -6
- data/lib/active_record_data_loader/active_record/unique_index_tracker.rb +67 -0
- data/lib/active_record_data_loader/bulk_insert_strategy.rb +16 -9
- data/lib/active_record_data_loader/configuration.rb +13 -30
- data/lib/active_record_data_loader/connection_handler.rb +23 -45
- data/lib/active_record_data_loader/copy_strategy.rb +21 -7
- data/lib/active_record_data_loader/data_faker.rb +12 -4
- data/lib/active_record_data_loader/dsl/model.rb +19 -2
- data/lib/active_record_data_loader/errors.rb +5 -0
- data/lib/active_record_data_loader/file_output_adapter.rb +20 -12
- data/lib/active_record_data_loader/loader.rb +61 -55
- data/lib/active_record_data_loader/null_output_adapter.rb +15 -0
- data/lib/active_record_data_loader/table_loader.rb +59 -0
- data/lib/active_record_data_loader/version.rb +1 -1
- data/lib/active_record_data_loader.rb +9 -41
- metadata +12 -7
- data/lib/active_record_data_loader/connection_output_adapter.rb +0 -20
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 19d357e3af1a3864a020996df4da4d1d4d710dccd15e50cc10c30f0498acb64b
|
4
|
+
data.tar.gz: 7a706dcaa17777c1fa93090212385fcd18742d84099663d9555e54452c046595
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6b541ceb75b06c6152184ae4f9d66e6a1268a8a758c23ce959fb608a78a7860c8f17596fd90b5a9bbadf0853c2d2009b8b68304e6f852d4eed2a3e5ddd1b59d1
|
7
|
+
data.tar.gz: 509fce080286cae5770bbe6b42ace34b34f1804c8834114d7c0fbbd2e0323b2c785fde4188de6d7221666fd6489eb8229a76c47fea90cddcd96bf3628260c5e8
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# For most projects, this workflow file will not need changing; you simply need
|
2
|
+
# to commit it to your repository.
|
3
|
+
#
|
4
|
+
# You may wish to alter this file to override the set of languages analyzed,
|
5
|
+
# or to provide custom queries or build logic.
|
6
|
+
#
|
7
|
+
# ******** NOTE ********
|
8
|
+
# We have attempted to detect the languages in your repository. Please check
|
9
|
+
# the `language` matrix defined below to confirm you have the correct set of
|
10
|
+
# supported CodeQL languages.
|
11
|
+
#
|
12
|
+
name: "CodeQL"
|
13
|
+
|
14
|
+
on:
|
15
|
+
push:
|
16
|
+
branches: [ master ]
|
17
|
+
pull_request:
|
18
|
+
# The branches below must be a subset of the branches above
|
19
|
+
branches: [ master ]
|
20
|
+
schedule:
|
21
|
+
- cron: '26 13 * * 4'
|
22
|
+
|
23
|
+
jobs:
|
24
|
+
analyze:
|
25
|
+
name: Analyze
|
26
|
+
runs-on: ubuntu-latest
|
27
|
+
permissions:
|
28
|
+
actions: read
|
29
|
+
contents: read
|
30
|
+
security-events: write
|
31
|
+
|
32
|
+
strategy:
|
33
|
+
fail-fast: false
|
34
|
+
matrix:
|
35
|
+
language: [ 'ruby' ]
|
36
|
+
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
|
37
|
+
# Learn more about CodeQL language support at https://git.io/codeql-language-support
|
38
|
+
|
39
|
+
steps:
|
40
|
+
- name: Checkout repository
|
41
|
+
uses: actions/checkout@v2
|
42
|
+
|
43
|
+
# Initializes the CodeQL tools for scanning.
|
44
|
+
- name: Initialize CodeQL
|
45
|
+
uses: github/codeql-action/init@v1
|
46
|
+
with:
|
47
|
+
languages: ${{ matrix.language }}
|
48
|
+
# If you wish to specify custom queries, you can do so here or in a config file.
|
49
|
+
# By default, queries listed here will override any specified in a config file.
|
50
|
+
# Prefix the list here with "+" to use these queries and those in the config file.
|
51
|
+
# queries: ./path/to/local/query, your-org/your-repo/queries@main
|
52
|
+
|
53
|
+
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
54
|
+
# If this step fails, then you should remove it and run the build manually (see below)
|
55
|
+
- name: Autobuild
|
56
|
+
uses: github/codeql-action/autobuild@v1
|
57
|
+
|
58
|
+
# ℹ️ Command-line programs to run using the OS shell.
|
59
|
+
# 📚 https://git.io/JvXDl
|
60
|
+
|
61
|
+
# ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
|
62
|
+
# and modify them (or add more) to build your code if your project
|
63
|
+
# uses a compiled language
|
64
|
+
|
65
|
+
#- run: |
|
66
|
+
# make bootstrap
|
67
|
+
# make release
|
68
|
+
|
69
|
+
- name: Perform CodeQL Analysis
|
70
|
+
uses: github/codeql-action/analyze@v1
|
data/.rubocop.yml
CHANGED
@@ -3,6 +3,9 @@ AllCops:
|
|
3
3
|
NewCops: enable
|
4
4
|
SuggestExtensions: false
|
5
5
|
|
6
|
+
Gemspec/RequireMFA:
|
7
|
+
Enabled: false
|
8
|
+
|
6
9
|
Layout/LineLength:
|
7
10
|
Max: 110
|
8
11
|
Exclude: ["*.gemspec"]
|
@@ -27,15 +30,18 @@ Metrics/BlockLength:
|
|
27
30
|
Exclude: ["spec/**/*", "*.gemspec"]
|
28
31
|
|
29
32
|
Metrics/ClassLength:
|
33
|
+
Max: 150
|
30
34
|
Exclude: ["spec/**/*", "*.gemspec"]
|
31
35
|
|
32
36
|
Metrics/MethodLength:
|
33
|
-
Max:
|
37
|
+
Max: 25
|
34
38
|
Exclude: ["spec/**/*"]
|
35
39
|
|
36
40
|
Metrics/ParameterLists:
|
37
41
|
Max: 5
|
38
|
-
Exclude:
|
42
|
+
Exclude:
|
43
|
+
- "lib/active_record_data_loader/configuration.rb"
|
44
|
+
- "lib/active_record_data_loader/active_record/model_data_generator.rb"
|
39
45
|
|
40
46
|
Style/CaseLikeIf:
|
41
47
|
Enabled: false
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,13 @@
|
|
1
1
|
# Change log
|
2
2
|
|
3
|
+
## [v1.3.0] - 2021-12-10
|
4
|
+
|
5
|
+
[Diff](https://github.com/abeiderman/active_record_data_loader/compare/v1.2.0...v1.3.0)
|
6
|
+
|
7
|
+
### Changes:
|
8
|
+
* Replace the `:file` output option with simply accepting an optional file path as `output`. A SQL script file will be generated in addition to loading the data into the database.
|
9
|
+
* Identify and handle unique indexes by attempting to generate unique values. Add configuration options for behavior around duplicate rows.
|
10
|
+
|
3
11
|
## [v1.2.0] - 2021-11-14
|
4
12
|
|
5
13
|
[Diff](https://github.com/abeiderman/active_record_data_loader/compare/v1.1.0...v1.2.0)
|
@@ -40,3 +48,4 @@ Initial stable release
|
|
40
48
|
[v1.0.2]: https://github.com/abeiderman/active_record_data_loader/releases/tag/v1.0.2
|
41
49
|
[v1.1.0]: https://github.com/abeiderman/active_record_data_loader/releases/tag/v1.1.0
|
42
50
|
[v1.2.0]: https://github.com/abeiderman/active_record_data_loader/releases/tag/v1.2.0
|
51
|
+
[v1.3.0]: https://github.com/abeiderman/active_record_data_loader/releases/tag/v1.3.0
|
data/CODE_OF_CONDUCT.md
CHANGED
@@ -55,8 +55,8 @@ further defined and clarified by project maintainers.
|
|
55
55
|
## Enforcement
|
56
56
|
|
57
57
|
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
58
|
-
reported by contacting the project team at
|
59
|
-
complaints will be reviewed and investigated and will result in a response that
|
58
|
+
reported by contacting the project team at active_record_data_loader@ossprojects.dev.
|
59
|
+
All complaints will be reviewed and investigated and will result in a response that
|
60
60
|
is deemed necessary and appropriate to the circumstances. The project team is
|
61
61
|
obligated to maintain confidentiality with regard to the reporter of an incident.
|
62
62
|
Further details of specific enforcement policies may be posted separately.
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
active_record_data_loader (1.
|
4
|
+
active_record_data_loader (1.3.0)
|
5
5
|
activerecord (>= 5.0)
|
6
6
|
|
7
7
|
GEM
|
@@ -18,14 +18,14 @@ GEM
|
|
18
18
|
minitest (>= 5.1)
|
19
19
|
tzinfo (~> 2.0)
|
20
20
|
zeitwerk (~> 2.3)
|
21
|
-
appraisal (2.
|
21
|
+
appraisal (2.4.1)
|
22
22
|
bundler
|
23
23
|
rake
|
24
24
|
thor (>= 0.14.0)
|
25
25
|
ast (2.4.2)
|
26
|
-
coderay (1.1.
|
26
|
+
coderay (1.1.3)
|
27
27
|
concurrent-ruby (1.1.9)
|
28
|
-
diff-lcs (1.
|
28
|
+
diff-lcs (1.4.4)
|
29
29
|
docile (1.4.0)
|
30
30
|
i18n (1.8.11)
|
31
31
|
concurrent-ruby (~> 1.0)
|
@@ -33,32 +33,32 @@ GEM
|
|
33
33
|
minitest (5.14.4)
|
34
34
|
mysql2 (0.5.3)
|
35
35
|
parallel (1.21.0)
|
36
|
-
parser (3.0.2
|
36
|
+
parser (3.0.3.2)
|
37
37
|
ast (~> 2.4.1)
|
38
38
|
pg (1.2.3)
|
39
|
-
pry (0.
|
39
|
+
pry (0.14.1)
|
40
40
|
coderay (~> 1.1)
|
41
41
|
method_source (~> 1.0)
|
42
42
|
rainbow (3.0.0)
|
43
|
-
rake (13.0.
|
44
|
-
regexp_parser (2.
|
43
|
+
rake (13.0.6)
|
44
|
+
regexp_parser (2.2.0)
|
45
45
|
rexml (3.2.5)
|
46
|
-
rspec (3.
|
47
|
-
rspec-core (~> 3.
|
48
|
-
rspec-expectations (~> 3.
|
49
|
-
rspec-mocks (~> 3.
|
46
|
+
rspec (3.10.0)
|
47
|
+
rspec-core (~> 3.10.0)
|
48
|
+
rspec-expectations (~> 3.10.0)
|
49
|
+
rspec-mocks (~> 3.10.0)
|
50
50
|
rspec-collection_matchers (1.2.0)
|
51
51
|
rspec-expectations (>= 2.99.0.beta1)
|
52
|
-
rspec-core (3.
|
53
|
-
rspec-support (~> 3.
|
54
|
-
rspec-expectations (3.
|
52
|
+
rspec-core (3.10.1)
|
53
|
+
rspec-support (~> 3.10.0)
|
54
|
+
rspec-expectations (3.10.1)
|
55
55
|
diff-lcs (>= 1.2.0, < 2.0)
|
56
|
-
rspec-support (~> 3.
|
57
|
-
rspec-mocks (3.
|
56
|
+
rspec-support (~> 3.10.0)
|
57
|
+
rspec-mocks (3.10.2)
|
58
58
|
diff-lcs (>= 1.2.0, < 2.0)
|
59
|
-
rspec-support (~> 3.
|
60
|
-
rspec-support (3.
|
61
|
-
rubocop (1.
|
59
|
+
rspec-support (~> 3.10.0)
|
60
|
+
rspec-support (3.10.3)
|
61
|
+
rubocop (1.23.0)
|
62
62
|
parallel (~> 1.10)
|
63
63
|
parser (>= 3.0.0.0)
|
64
64
|
rainbow (>= 2.2.2, < 4.0)
|
@@ -67,7 +67,7 @@ GEM
|
|
67
67
|
rubocop-ast (>= 1.12.0, < 2.0)
|
68
68
|
ruby-progressbar (~> 1.7)
|
69
69
|
unicode-display_width (>= 1.4.0, < 3.0)
|
70
|
-
rubocop-ast (1.
|
70
|
+
rubocop-ast (1.14.0)
|
71
71
|
parser (>= 3.0.1.1)
|
72
72
|
ruby-progressbar (1.11.0)
|
73
73
|
simplecov (0.21.2)
|
@@ -77,9 +77,9 @@ GEM
|
|
77
77
|
simplecov-html (0.12.3)
|
78
78
|
simplecov-lcov (0.8.0)
|
79
79
|
simplecov_json_formatter (0.1.3)
|
80
|
-
sqlite3 (1.4.
|
81
|
-
thor (
|
82
|
-
timecop (0.9.
|
80
|
+
sqlite3 (1.4.2)
|
81
|
+
thor (1.1.0)
|
82
|
+
timecop (0.9.4)
|
83
83
|
tzinfo (2.0.4)
|
84
84
|
concurrent-ruby (~> 1.0)
|
85
85
|
unicode-display_width (2.1.0)
|
data/README.md
CHANGED
@@ -41,6 +41,7 @@ Polymorphic associations need to be defined explicitly as shown in [Polymorphic
|
|
41
41
|
### Basic usage
|
42
42
|
|
43
43
|
Let's say you have the following models:
|
44
|
+
|
44
45
|
```ruby
|
45
46
|
class Customer < ApplicationRecord
|
46
47
|
end
|
@@ -51,6 +52,7 @@ end
|
|
51
52
|
```
|
52
53
|
|
53
54
|
The following code will create 10,000 customers and 100,000 orders, and will associate the orders to those customers evenly:
|
55
|
+
|
54
56
|
```ruby
|
55
57
|
data_loader = ActiveRecordDataLoader.define do
|
56
58
|
model Customer do |m|
|
@@ -67,6 +69,7 @@ data_loader.load_data
|
|
67
69
|
|
68
70
|
#### Overriding column values
|
69
71
|
To provide your own values for columns your can provide a lambda or a constant value:
|
72
|
+
|
70
73
|
```ruby
|
71
74
|
data_loader = ActiveRecordDataLoader.define do
|
72
75
|
model Customer do |m|
|
@@ -91,7 +94,7 @@ In this example, we are creating 25K orders for customers in CAN with a CAD curr
|
|
91
94
|
data_loader = ActiveRecordDataLoader.define do
|
92
95
|
model Customer do |m|
|
93
96
|
m.count 10_000
|
94
|
-
m.column :country, -> { %w[CAN
|
97
|
+
m.column :country, -> { %w[CAN MEX USA].sample }
|
95
98
|
end
|
96
99
|
|
97
100
|
model Order do |m|
|
@@ -121,6 +124,7 @@ data_loader.load_data
|
|
121
124
|
If you have a polymorphic `belongs_to` association, you will need to define that explicitly for it to be populated.
|
122
125
|
|
123
126
|
Let's assume the following models where an order could belong to either a person or a business:
|
127
|
+
|
124
128
|
```ruby
|
125
129
|
class Person < ApplicationRecord
|
126
130
|
has_many :orders
|
@@ -136,6 +140,7 @@ end
|
|
136
140
|
```
|
137
141
|
|
138
142
|
In order to populate the `customer` association in orders, you would specify them like this:
|
143
|
+
|
139
144
|
```ruby
|
140
145
|
data_loader = ActiveRecordDataLoader.define do
|
141
146
|
model Person do |m|
|
@@ -160,6 +165,7 @@ data_loader.load_data
|
|
160
165
|
```
|
161
166
|
|
162
167
|
You can also provide a `weight` to each of the target models if you want to control how they are distributed. If you wanted to have twice as many orders for `Person` than for `Business`, it would look like this:
|
168
|
+
|
163
169
|
```ruby
|
164
170
|
data_loader = ActiveRecordDataLoader.define do
|
165
171
|
model Person do |m|
|
@@ -184,6 +190,7 @@ data_loader.load_data
|
|
184
190
|
```
|
185
191
|
|
186
192
|
Additionaly, you can also provide an `eligible_set` to control which records to limit the association to:
|
193
|
+
|
187
194
|
```ruby
|
188
195
|
data_loader = ActiveRecordDataLoader.define do
|
189
196
|
model Person do |m|
|
@@ -208,6 +215,81 @@ end
|
|
208
215
|
data_loader.load_data
|
209
216
|
```
|
210
217
|
|
218
|
+
### Unique indexes
|
219
|
+
|
220
|
+
Unique indexes will be detected automatically and the data generator will attempt to generate unique values for each row. The generator keeps track of unique values previously generated and retries rows with repeating values. Because some columns could be generating random values, retrying can eventually be successful.
|
221
|
+
|
222
|
+
There are a couple of behaviors you can control regarding preventing duplicates. The first is the number of times to retry a given row with duplicate values (that would fail the unique index/constraint). The second is what to do if a unique value cannot be generated after the retries are exhausted.
|
223
|
+
|
224
|
+
By default, there will be 5 retries per row and the row will be skipped after all retries are unsuccessful. This means fewer rows than requested may end up being populated on that table.
|
225
|
+
|
226
|
+
Alternatively, you can choose to raise an error if a unique row cannot be generated. You can also set the number of retries to 0 to not retry at all. If the table in question is a primary target for your testing and will be loaded with a lot of data, you will likely not want to have retries since it could potentially slow down data generation significantly.
|
227
|
+
|
228
|
+
Here is how to adjust these settings. Here let's assyme that `daily_notes` has a unique index on both `date` and `person_id`:
|
229
|
+
|
230
|
+
```ruby
|
231
|
+
class Person < ApplicationRecord
|
232
|
+
end
|
233
|
+
|
234
|
+
class DailyNotes < ApplicationRecord
|
235
|
+
belongs_to :person
|
236
|
+
end
|
237
|
+
|
238
|
+
data_loader = ActiveRecordDataLoader.define do
|
239
|
+
model Person do |m|
|
240
|
+
m.count 500
|
241
|
+
end
|
242
|
+
|
243
|
+
model DailyNotes do |m|
|
244
|
+
m.count 10_000
|
245
|
+
m.max_duplicate_retries 10
|
246
|
+
m.do_not_raise_on_duplicates
|
247
|
+
|
248
|
+
m.column :date, -> { Date.today - rand(20) }
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
data_loader.load_data
|
253
|
+
```
|
254
|
+
|
255
|
+
In the case above, retrying could be a reasonable choice since the date is generated at random and it's a small number of rows being generated.
|
256
|
+
|
257
|
+
If you want to disable retrying duplicates altogether and raise an error to fail fast you can specify it like this:
|
258
|
+
|
259
|
+
```ruby
|
260
|
+
class Person < ApplicationRecord
|
261
|
+
end
|
262
|
+
|
263
|
+
class Skill < ApplicationRecord
|
264
|
+
end
|
265
|
+
|
266
|
+
class SkillRating < ApplicationRecord
|
267
|
+
belongs_to :person
|
268
|
+
belongs_to :skill
|
269
|
+
end
|
270
|
+
|
271
|
+
data_loader = ActiveRecordDataLoader.define do
|
272
|
+
model Person do |m|
|
273
|
+
m.count 100_000
|
274
|
+
end
|
275
|
+
|
276
|
+
model Skill do |m|
|
277
|
+
m.count 100
|
278
|
+
end
|
279
|
+
|
280
|
+
model SkillRating do |m|
|
281
|
+
m.count 10_000_000
|
282
|
+
m.max_duplicate_retries 0
|
283
|
+
m.raise_on_duplicates
|
284
|
+
|
285
|
+
m.column :rating, -> { rand(1..10) }
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
data_loader.load_data
|
290
|
+
```
|
291
|
+
|
292
|
+
|
211
293
|
### Configuration options
|
212
294
|
|
213
295
|
You can define global configuration options like this:
|
@@ -257,31 +339,19 @@ end
|
|
257
339
|
|
258
340
|
#### output
|
259
341
|
|
260
|
-
The `output` option accepts
|
261
|
-
|
262
|
-
If `:file` is specified, instead of the data being loaded into the database, a script file will be generated. This script file can then be executed manually to load the data. This can be helpful if you need to load the same data multiple times. For example if you are profiling different alternatives in your code and you want to see how each performs with a fully loaded database. In that case you would want to have the same data starting point for each alternative you evaluate. By generating the script file ahead of time, it would be significantly faster to load that data over and over by executing the existing script.
|
263
|
-
|
264
|
-
Here are some examples on how to use the `output` option:
|
342
|
+
The `output` option accepts an optional file name to write a SQL script with the data loading statements. This script file can then be executed manually to load the data. This can be helpful if you need to load the same data multiple times. For example if you are profiling different alternatives in your code and you want to see how each performs with a fully loaded database. In that case you would want to have the same data starting point for each alternative you evaluate. By generating the script file, it would be significantly faster to load that data over and over by executing the existing script.
|
265
343
|
|
266
|
-
|
267
|
-
ActiveRecordDataLoader.configure do |c|
|
268
|
-
c.output = :connection # This is the default behavior
|
269
|
-
end
|
270
|
-
```
|
344
|
+
If `output` is nil or empty, no script file will be written.
|
271
345
|
|
272
|
-
|
273
|
-
ActiveRecordDataLoader.configure do |c|
|
274
|
-
c.output = :file # Outputs to a file with a default name
|
275
|
-
end
|
276
|
-
```
|
346
|
+
Example usage:
|
277
347
|
|
278
348
|
```ruby
|
279
349
|
ActiveRecordDataLoader.configure do |c|
|
280
|
-
c.output =
|
350
|
+
c.output = "./my_script.sql" # Outputs to the provided file
|
281
351
|
end
|
282
352
|
```
|
283
353
|
|
284
|
-
When using
|
354
|
+
When using an output script file with Postgres, the resulting script will have `\COPY` commands which reference CSV files that contain the data batches to be copied. The CSV files will be created along side the SQL script and will have a naming convention of using the table name and the rows range for the given batch. For example `./my_script_customers_1_to_1000.csv`. Each `\COPY` command in the SQL file will reference the corresponding CSV file so all you need to do is execute the SQL file using `psql`:
|
285
355
|
|
286
356
|
```bash
|
287
357
|
psql -h my-db-host -U my_user -f my_script.sql
|
@@ -8,7 +8,7 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.name = "active_record_data_loader"
|
9
9
|
spec.version = ActiveRecordDataLoader::VERSION
|
10
10
|
spec.authors = ["Alejandro Beiderman"]
|
11
|
-
spec.email = ["
|
11
|
+
spec.email = ["active_record_data_loader@ossprojects.dev"]
|
12
12
|
|
13
13
|
spec.summary = "A utility to bulk load test data for performance testing."
|
14
14
|
spec.description = "A utility to bulk load test data for performance testing."
|
@@ -2,26 +2,27 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
module ActiveRecord
|
5
|
-
class
|
6
|
-
def self.
|
5
|
+
class BelongsToDataProvider
|
6
|
+
def self.provider_for(ar_association:, query: nil, strategy: :random)
|
7
7
|
raise "#{name} does not support polymorphic associations" if ar_association.polymorphic?
|
8
8
|
|
9
|
-
{ ar_association.join_foreign_key.to_sym => new(ar_association, query).foreign_key_func }
|
9
|
+
{ ar_association.join_foreign_key.to_sym => new(ar_association, query, strategy).foreign_key_func }
|
10
10
|
end
|
11
11
|
|
12
|
-
def initialize(ar_association, query)
|
12
|
+
def initialize(ar_association, query, strategy)
|
13
13
|
@ar_association = ar_association
|
14
14
|
@query = query
|
15
|
+
@strategy = strategy
|
15
16
|
end
|
16
17
|
|
17
18
|
def foreign_key_func
|
18
|
-
-> { possible_values.
|
19
|
+
-> { possible_values.next }
|
19
20
|
end
|
20
21
|
|
21
22
|
private
|
22
23
|
|
23
24
|
def possible_values
|
24
|
-
@possible_values ||= base_query.pluck(@ar_association.join_primary_key)
|
25
|
+
@possible_values ||= List.for(base_query.pluck(@ar_association.join_primary_key), strategy: @strategy)
|
25
26
|
end
|
26
27
|
|
27
28
|
def base_query
|
data/lib/active_record_data_loader/active_record/{column_configuration.rb → column_data_provider.rb}
RENAMED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
module ActiveRecord
|
5
|
-
class
|
5
|
+
class ColumnDataProvider
|
6
6
|
class << self
|
7
7
|
VALUE_GENERATORS = {
|
8
8
|
enum: EnumValueGenerator,
|
@@ -12,7 +12,7 @@ module ActiveRecordDataLoader
|
|
12
12
|
datetime: DatetimeValueGenerator,
|
13
13
|
}.freeze
|
14
14
|
|
15
|
-
def
|
15
|
+
def provider_for(model_class:, ar_column:, connection_factory:)
|
16
16
|
raise_error_if_not_supported(model_class, ar_column)
|
17
17
|
|
18
18
|
{
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
module ActiveRecord
|
5
|
+
class List
|
6
|
+
def self.for(enumerable, strategy: :random)
|
7
|
+
if strategy == :cycle
|
8
|
+
Cycle.new(enumerable)
|
9
|
+
else
|
10
|
+
Random.new(enumerable)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class Random
|
15
|
+
def initialize(enumerable)
|
16
|
+
@list = enumerable
|
17
|
+
end
|
18
|
+
|
19
|
+
def next
|
20
|
+
@list.sample
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class Cycle
|
25
|
+
def initialize(enumerable)
|
26
|
+
@list = enumerable.cycle
|
27
|
+
end
|
28
|
+
|
29
|
+
def next
|
30
|
+
@list.next
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -9,6 +9,9 @@ module ActiveRecordDataLoader
|
|
9
9
|
model:,
|
10
10
|
column_settings:,
|
11
11
|
connection_factory:,
|
12
|
+
logger:,
|
13
|
+
raise_on_duplicates:,
|
14
|
+
max_duplicate_retries:,
|
12
15
|
polymorphic_settings: [],
|
13
16
|
belongs_to_settings: []
|
14
17
|
)
|
@@ -18,6 +21,11 @@ module ActiveRecordDataLoader
|
|
18
21
|
@polymorphic_settings = polymorphic_settings
|
19
22
|
@belongs_to_settings = belongs_to_settings.map { |s| [s.name, s.query] }.to_h
|
20
23
|
@connection_factory = connection_factory
|
24
|
+
@raise_on_duplicates = raise_on_duplicates
|
25
|
+
@max_duplicate_retries = max_duplicate_retries
|
26
|
+
@logger = logger
|
27
|
+
@index_tracker = UniqueIndexTracker.new(model: model, connection_factory: connection_factory)
|
28
|
+
@index_tracker.map_indexed_columns(column_list)
|
21
29
|
end
|
22
30
|
|
23
31
|
def column_list
|
@@ -25,11 +33,41 @@ module ActiveRecordDataLoader
|
|
25
33
|
end
|
26
34
|
|
27
35
|
def generate_row(row_number)
|
28
|
-
|
36
|
+
@index_tracker.capture_unique_values(generate_row_with_retries(row_number))
|
29
37
|
end
|
30
38
|
|
31
39
|
private
|
32
40
|
|
41
|
+
def generate_row_with_retries(row_number)
|
42
|
+
retries = 0
|
43
|
+
while @index_tracker.repeating_unique_values?(row = generate_candidate_row(row_number))
|
44
|
+
if (retries += 1) > @max_duplicate_retries
|
45
|
+
raise DuplicateKeyError, <<~MSG if @raise_on_duplicates
|
46
|
+
Exhausted retries looking for unique values for row #{row_number} for '#{table}'.
|
47
|
+
Table '#{table}' has unique indexes that would have prevented inserting this row. If you would
|
48
|
+
like to skip non-unique rows instead of raising, configure `raise_on_duplicates` to be `false`.
|
49
|
+
MSG
|
50
|
+
|
51
|
+
@logger.warn(
|
52
|
+
"[ActiveRecordDataLoader] "\
|
53
|
+
"Exhausted retries looking for unique values. Skipping row #{row_number} for '#{table}'."
|
54
|
+
)
|
55
|
+
return nil
|
56
|
+
else
|
57
|
+
@logger.info(
|
58
|
+
"[ActiveRecordDataLoader] "\
|
59
|
+
"Retrying row #{row_number} for '#{table}' looking for unique values compliant with indexes. "\
|
60
|
+
"Retry number #{retries}."
|
61
|
+
)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
row
|
65
|
+
end
|
66
|
+
|
67
|
+
def generate_candidate_row(row_number)
|
68
|
+
column_list.map { |c| column_data(row_number, c) }
|
69
|
+
end
|
70
|
+
|
33
71
|
def column_data(row_number, column)
|
34
72
|
column_value = columns[column]
|
35
73
|
return column_value unless column_value.respond_to?(:call)
|
@@ -56,9 +94,9 @@ module ActiveRecordDataLoader
|
|
56
94
|
@model_class
|
57
95
|
.columns_hash
|
58
96
|
.reject { |name| name == @model_class.primary_key }
|
59
|
-
.select { |_, c|
|
97
|
+
.select { |_, c| ColumnDataProvider.supported?(model_class: @model_class, ar_column: c) }
|
60
98
|
.map do |_, c|
|
61
|
-
|
99
|
+
ColumnDataProvider.provider_for(
|
62
100
|
model_class: @model_class,
|
63
101
|
ar_column: c,
|
64
102
|
connection_factory: @connection_factory
|
@@ -73,16 +111,33 @@ module ActiveRecordDataLoader
|
|
73
111
|
.select(&:belongs_to?)
|
74
112
|
.reject(&:polymorphic?)
|
75
113
|
.map do |assoc|
|
76
|
-
|
114
|
+
BelongsToDataProvider.provider_for(
|
115
|
+
ar_association: assoc,
|
116
|
+
query: @belongs_to_settings[assoc.name],
|
117
|
+
strategy: column_config_strategy(assoc)
|
118
|
+
)
|
77
119
|
end
|
78
120
|
.reduce({}, :merge)
|
79
121
|
end
|
80
122
|
|
81
123
|
def polymorphic_config
|
82
124
|
@polymorphic_settings
|
83
|
-
.map
|
125
|
+
.map do |s|
|
126
|
+
PolymorphicBelongsToDataProvider.provider_for(
|
127
|
+
polymorphic_settings: s,
|
128
|
+
strategy: column_config_strategy(s.model_class.reflect_on_association(s.name))
|
129
|
+
)
|
130
|
+
end
|
84
131
|
.reduce({}, :merge)
|
85
132
|
end
|
133
|
+
|
134
|
+
def column_config_strategy(column)
|
135
|
+
if @index_tracker.contained_in_index?(column)
|
136
|
+
:cycle
|
137
|
+
else
|
138
|
+
:random
|
139
|
+
end
|
140
|
+
end
|
86
141
|
end
|
87
142
|
end
|
88
143
|
end
|