active_record_data_loader 1.0.2 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/workflows/build.yml +51 -0
- data/.github/workflows/codeql-analysis.yml +70 -0
- data/.github/workflows/gem-push.yml +29 -0
- data/.rubocop.yml +46 -7
- data/CHANGELOG.md +38 -2
- data/CODE_OF_CONDUCT.md +2 -2
- data/Gemfile.lock +71 -73
- data/README.md +162 -9
- data/Rakefile +8 -2
- data/active_record_data_loader.gemspec +7 -6
- data/config/database.yml +2 -0
- data/docker-compose.yml +18 -0
- data/gemfiles/activerecord_6.gemfile +1 -1
- data/lib/active_record_data_loader/active_record/{belongs_to_configuration.rb → belongs_to_data_provider.rb} +8 -7
- data/lib/active_record_data_loader/active_record/{column_configuration.rb → column_data_provider.rb} +2 -2
- data/lib/active_record_data_loader/active_record/enum_value_generator.rb +9 -8
- data/lib/active_record_data_loader/active_record/integer_value_generator.rb +1 -1
- data/lib/active_record_data_loader/active_record/list.rb +47 -0
- data/lib/active_record_data_loader/active_record/model_data_generator.rb +62 -7
- data/lib/active_record_data_loader/active_record/{polymorphic_belongs_to_configuration.rb → polymorphic_belongs_to_data_provider.rb} +12 -7
- data/lib/active_record_data_loader/active_record/unique_index_tracker.rb +67 -0
- data/lib/active_record_data_loader/bulk_insert_strategy.rb +16 -8
- data/lib/active_record_data_loader/configuration.rb +26 -3
- data/lib/active_record_data_loader/connection_handler.rb +52 -0
- data/lib/active_record_data_loader/copy_strategy.rb +38 -24
- data/lib/active_record_data_loader/data_faker.rb +12 -4
- data/lib/active_record_data_loader/dsl/model.rb +19 -2
- data/lib/active_record_data_loader/errors.rb +5 -0
- data/lib/active_record_data_loader/file_output_adapter.rb +48 -0
- data/lib/active_record_data_loader/loader.rb +55 -71
- data/lib/active_record_data_loader/null_output_adapter.rb +15 -0
- data/lib/active_record_data_loader/table_loader.rb +59 -0
- data/lib/active_record_data_loader/version.rb +1 -1
- data/lib/active_record_data_loader.rb +11 -38
- metadata +51 -29
- data/.travis.yml +0 -24
- data/config/database.yml.travis +0 -12
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
#
|
1
|
+
# active_record_data_loader
|
2
2
|
|
3
|
-
[![Build Status](https://
|
3
|
+
[![Build Status](https://github.com/abeiderman/active_record_data_loader/actions/workflows/build.yml/badge.svg)](https://github.com/abeiderman/active_record_data_loader/actions/workflows/build.yml)
|
4
4
|
[![Coverage Status](https://coveralls.io/repos/github/abeiderman/active_record_data_loader/badge.svg?branch=master&service=github)](https://coveralls.io/github/abeiderman/active_record_data_loader?branch=master)
|
5
5
|
[![Maintainability](https://api.codeclimate.com/v1/badges/338904b3f7e8d19a3cb1/maintainability)](https://codeclimate.com/github/abeiderman/active_record_data_loader/maintainability)
|
6
6
|
|
@@ -10,6 +10,10 @@ Efficiently bulk load data for your ActiveRecord models with a simple DSL.
|
|
10
10
|
|
11
11
|
Load, performance, and stress tests often require setting up a realistic amount of data in your database. This gem is intended to help organize that data load and make it more maintainable than having a collection of SQL scripts.
|
12
12
|
|
13
|
+
#### How is this different from using _factory_bot_?
|
14
|
+
|
15
|
+
This gem is not a replacement for [factory_bot](https://github.com/thoughtbot/factory_bot). It solves a different use case. While _factory_bot_ is great for organizing test data and reducing duplication in your functional tests, _active_record_data_loader_ is focused around bulk loading data for performance tests. The purpose of _active_record_data_loader_ is loading large amounts of data as efficiently as possible while providing a DSL that helps with maintainability.
|
16
|
+
|
13
17
|
## Installation
|
14
18
|
|
15
19
|
Add this line to your application's Gemfile:
|
@@ -37,6 +41,7 @@ Polymorphic associations need to be defined explicitly as shown in [Polymorphic
|
|
37
41
|
### Basic usage
|
38
42
|
|
39
43
|
Let's say you have the following models:
|
44
|
+
|
40
45
|
```ruby
|
41
46
|
class Customer < ApplicationRecord
|
42
47
|
end
|
@@ -47,6 +52,7 @@ end
|
|
47
52
|
```
|
48
53
|
|
49
54
|
The following code will create 10,000 customers and 100,000 orders, and will associate the orders to those customers evenly:
|
55
|
+
|
50
56
|
```ruby
|
51
57
|
data_loader = ActiveRecordDataLoader.define do
|
52
58
|
model Customer do |m|
|
@@ -63,6 +69,7 @@ data_loader.load_data
|
|
63
69
|
|
64
70
|
#### Overriding column values
|
65
71
|
To provide your own values for columns your can provide a lambda or a constant value:
|
72
|
+
|
66
73
|
```ruby
|
67
74
|
data_loader = ActiveRecordDataLoader.define do
|
68
75
|
model Customer do |m|
|
@@ -87,7 +94,7 @@ In this example, we are creating 25K orders for customers in CAN with a CAD curr
|
|
87
94
|
data_loader = ActiveRecordDataLoader.define do
|
88
95
|
model Customer do |m|
|
89
96
|
m.count 10_000
|
90
|
-
m.column :country, -> { %w[CAN
|
97
|
+
m.column :country, -> { %w[CAN MEX USA].sample }
|
91
98
|
end
|
92
99
|
|
93
100
|
model Order do |m|
|
@@ -95,13 +102,13 @@ data_loader = ActiveRecordDataLoader.define do
|
|
95
102
|
m.column :currency, "CAD"
|
96
103
|
m.belongs_to :customer, eligible_set: -> { Customer.where(country: "CAN") }
|
97
104
|
end
|
98
|
-
|
105
|
+
|
99
106
|
model Order do |m|
|
100
107
|
m.count 25_000
|
101
108
|
m.column :currency, "MXN"
|
102
109
|
m.belongs_to :customer, eligible_set: -> { Customer.where(country: "MEX") }
|
103
110
|
end
|
104
|
-
|
111
|
+
|
105
112
|
model Order do |m|
|
106
113
|
m.count 50_000
|
107
114
|
m.column :currency, "USD"
|
@@ -117,6 +124,7 @@ data_loader.load_data
|
|
117
124
|
If you have a polymorphic `belongs_to` association, you will need to define that explicitly for it to be populated.
|
118
125
|
|
119
126
|
Let's assume the following models where an order could belong to either a person or a business:
|
127
|
+
|
120
128
|
```ruby
|
121
129
|
class Person < ApplicationRecord
|
122
130
|
has_many :orders
|
@@ -132,6 +140,7 @@ end
|
|
132
140
|
```
|
133
141
|
|
134
142
|
In order to populate the `customer` association in orders, you would specify them like this:
|
143
|
+
|
135
144
|
```ruby
|
136
145
|
data_loader = ActiveRecordDataLoader.define do
|
137
146
|
model Person do |m|
|
@@ -144,7 +153,7 @@ data_loader = ActiveRecordDataLoader.define do
|
|
144
153
|
|
145
154
|
model Order do |m|
|
146
155
|
m.count 100_000
|
147
|
-
|
156
|
+
|
148
157
|
m.polymorphic :customer do |c|
|
149
158
|
c.model Person
|
150
159
|
c.model Business
|
@@ -156,6 +165,7 @@ data_loader.load_data
|
|
156
165
|
```
|
157
166
|
|
158
167
|
You can also provide a `weight` to each of the target models if you want to control how they are distributed. If you wanted to have twice as many orders for `Person` than for `Business`, it would look like this:
|
168
|
+
|
159
169
|
```ruby
|
160
170
|
data_loader = ActiveRecordDataLoader.define do
|
161
171
|
model Person do |m|
|
@@ -168,7 +178,7 @@ data_loader = ActiveRecordDataLoader.define do
|
|
168
178
|
|
169
179
|
model Order do |m|
|
170
180
|
m.count 100_000
|
171
|
-
|
181
|
+
|
172
182
|
m.polymorphic :customer do |c|
|
173
183
|
c.model Person, weight: 2
|
174
184
|
c.model Business, weight: 1
|
@@ -180,6 +190,7 @@ data_loader.load_data
|
|
180
190
|
```
|
181
191
|
|
182
192
|
Additionaly, you can also provide an `eligible_set` to control which records to limit the association to:
|
193
|
+
|
183
194
|
```ruby
|
184
195
|
data_loader = ActiveRecordDataLoader.define do
|
185
196
|
model Person do |m|
|
@@ -193,7 +204,7 @@ data_loader = ActiveRecordDataLoader.define do
|
|
193
204
|
|
194
205
|
model Order do |m|
|
195
206
|
m.count 100_000
|
196
|
-
|
207
|
+
|
197
208
|
m.polymorphic :customer do |c|
|
198
209
|
c.model Person, weight: 2
|
199
210
|
c.model Business, weight: 1, eligible_set: -> { Business.where(country: "USA") }
|
@@ -204,6 +215,148 @@ end
|
|
204
215
|
data_loader.load_data
|
205
216
|
```
|
206
217
|
|
218
|
+
### Unique indexes
|
219
|
+
|
220
|
+
Unique indexes will be detected automatically and the data generator will attempt to generate unique values for each row. The generator keeps track of unique values previously generated and retries rows with repeating values. Because some columns could be generating random values, retrying can eventually be successful.
|
221
|
+
|
222
|
+
There are a couple of behaviors you can control regarding preventing duplicates. The first is the number of times to retry a given row with duplicate values (that would fail the unique index/constraint). The second is what to do if a unique value cannot be generated after the retries are exhausted.
|
223
|
+
|
224
|
+
By default, there will be 5 retries per row and the row will be skipped after all retries are unsuccessful. This means fewer rows than requested may end up being populated on that table.
|
225
|
+
|
226
|
+
Alternatively, you can choose to raise an error if a unique row cannot be generated. You can also set the number of retries to 0 to not retry at all. If the table in question is a primary target for your testing and will be loaded with a lot of data, you will likely not want to have retries since it could potentially slow down data generation significantly.
|
227
|
+
|
228
|
+
Here is how to adjust these settings. Here let's assyme that `daily_notes` has a unique index on both `date` and `person_id`:
|
229
|
+
|
230
|
+
```ruby
|
231
|
+
class Person < ApplicationRecord
|
232
|
+
end
|
233
|
+
|
234
|
+
class DailyNotes < ApplicationRecord
|
235
|
+
belongs_to :person
|
236
|
+
end
|
237
|
+
|
238
|
+
data_loader = ActiveRecordDataLoader.define do
|
239
|
+
model Person do |m|
|
240
|
+
m.count 500
|
241
|
+
end
|
242
|
+
|
243
|
+
model DailyNotes do |m|
|
244
|
+
m.count 10_000
|
245
|
+
m.max_duplicate_retries 10
|
246
|
+
m.do_not_raise_on_duplicates
|
247
|
+
|
248
|
+
m.column :date, -> { Date.today - rand(20) }
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
data_loader.load_data
|
253
|
+
```
|
254
|
+
|
255
|
+
In the case above, retrying could be a reasonable choice since the date is generated at random and it's a small number of rows being generated.
|
256
|
+
|
257
|
+
If you want to disable retrying duplicates altogether and raise an error to fail fast you can specify it like this:
|
258
|
+
|
259
|
+
```ruby
|
260
|
+
class Person < ApplicationRecord
|
261
|
+
end
|
262
|
+
|
263
|
+
class Skill < ApplicationRecord
|
264
|
+
end
|
265
|
+
|
266
|
+
class SkillRating < ApplicationRecord
|
267
|
+
belongs_to :person
|
268
|
+
belongs_to :skill
|
269
|
+
end
|
270
|
+
|
271
|
+
data_loader = ActiveRecordDataLoader.define do
|
272
|
+
model Person do |m|
|
273
|
+
m.count 100_000
|
274
|
+
end
|
275
|
+
|
276
|
+
model Skill do |m|
|
277
|
+
m.count 100
|
278
|
+
end
|
279
|
+
|
280
|
+
model SkillRating do |m|
|
281
|
+
m.count 10_000_000
|
282
|
+
m.max_duplicate_retries 0
|
283
|
+
m.raise_on_duplicates
|
284
|
+
|
285
|
+
m.column :rating, -> { rand(1..10) }
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
data_loader.load_data
|
290
|
+
```
|
291
|
+
|
292
|
+
|
293
|
+
### Configuration options
|
294
|
+
|
295
|
+
You can define global configuration options like this:
|
296
|
+
|
297
|
+
```ruby
|
298
|
+
ActiveRecordDataLoader.configure do |c|
|
299
|
+
c.logger = ActiveSupport::Logger.new("my_file.log", level: :debug)
|
300
|
+
c.statement_timeout = "5min"
|
301
|
+
end
|
302
|
+
```
|
303
|
+
|
304
|
+
Or you can create a configuration object for the specific data loader instance rather than globally:
|
305
|
+
|
306
|
+
```ruby
|
307
|
+
config = ActiveRecordDataLoader::Configuration.new(
|
308
|
+
c.logger = ActiveSupport::Logger.new("my_file.log", level: :debug)
|
309
|
+
c.statement_timeout = "5min"
|
310
|
+
)
|
311
|
+
loader = ActiveRecordDataLoader.define(config) do
|
312
|
+
model Company do |m|
|
313
|
+
m.count 10
|
314
|
+
end
|
315
|
+
|
316
|
+
# ... more definitions
|
317
|
+
end
|
318
|
+
```
|
319
|
+
|
320
|
+
#### statement_timeout
|
321
|
+
|
322
|
+
This is currently only used for Postgres connections to adjust the `statement_timeout` value for the connection. The default is `2min`. Depending on the size of the batches you are loading and overall size of the tables you may need to increase this value:
|
323
|
+
|
324
|
+
```ruby
|
325
|
+
ActiveRecordDataLoader.configure do |c|
|
326
|
+
c.statement_timeout = "5min"
|
327
|
+
end
|
328
|
+
```
|
329
|
+
|
330
|
+
#### connection_factory
|
331
|
+
|
332
|
+
The `connection_factory` option accepts a lambda that should return a connection object whenever executed. If not specified, the default behavior is to retrieve a connection using `ActiveRecord::Base.connection`. You can configure it like this:
|
333
|
+
|
334
|
+
```ruby
|
335
|
+
ActiveRecordDataLoader.configure do |c|
|
336
|
+
c.connection_factory = -> { MyCustomConnectionHandler.open_connection }
|
337
|
+
end
|
338
|
+
```
|
339
|
+
|
340
|
+
#### output
|
341
|
+
|
342
|
+
The `output` option accepts an optional file name to write a SQL script with the data loading statements. This script file can then be executed manually to load the data. This can be helpful if you need to load the same data multiple times. For example if you are profiling different alternatives in your code and you want to see how each performs with a fully loaded database. In that case you would want to have the same data starting point for each alternative you evaluate. By generating the script file, it would be significantly faster to load that data over and over by executing the existing script.
|
343
|
+
|
344
|
+
If `output` is nil or empty, no script file will be written.
|
345
|
+
|
346
|
+
Example usage:
|
347
|
+
|
348
|
+
```ruby
|
349
|
+
ActiveRecordDataLoader.configure do |c|
|
350
|
+
c.output = "./my_script.sql" # Outputs to the provided file
|
351
|
+
end
|
352
|
+
```
|
353
|
+
|
354
|
+
When using an output script file with Postgres, the resulting script will have `\COPY` commands which reference CSV files that contain the data batches to be copied. The CSV files will be created along side the SQL script and will have a naming convention of using the table name and the rows range for the given batch. For example `./my_script_customers_1_to_1000.csv`. Each `\COPY` command in the SQL file will reference the corresponding CSV file so all you need to do is execute the SQL file using `psql`:
|
355
|
+
|
356
|
+
```bash
|
357
|
+
psql -h my-db-host -U my_user -f my_script.sql
|
358
|
+
```
|
359
|
+
|
207
360
|
## Development
|
208
361
|
|
209
362
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -220,4 +373,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
|
|
220
373
|
|
221
374
|
## Code of Conduct
|
222
375
|
|
223
|
-
Everyone interacting in the
|
376
|
+
Everyone interacting in the _active_record_data_loader_ project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/abeiderman/active_record_data_loader/blob/master/CODE_OF_CONDUCT.md).
|
data/Rakefile
CHANGED
@@ -3,10 +3,16 @@
|
|
3
3
|
require "bundler/gem_tasks"
|
4
4
|
require "rspec/core/rake_task"
|
5
5
|
require "rubocop/rake_task"
|
6
|
-
require "coveralls/rake/task"
|
7
6
|
|
8
7
|
RSpec::Core::RakeTask.new(:spec)
|
9
8
|
RuboCop::RakeTask.new(:rubocop)
|
10
|
-
Coveralls::RakeTask.new
|
11
9
|
|
12
10
|
task default: [:spec, :rubocop]
|
11
|
+
|
12
|
+
task :wait_for_test_db do
|
13
|
+
require "active_record_data_loader"
|
14
|
+
require "./spec/active_record_helper"
|
15
|
+
|
16
|
+
ActiveRecordHelper.wait_for_mysql
|
17
|
+
ActiveRecordHelper.wait_for_postgres
|
18
|
+
end
|
@@ -8,7 +8,7 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.name = "active_record_data_loader"
|
9
9
|
spec.version = ActiveRecordDataLoader::VERSION
|
10
10
|
spec.authors = ["Alejandro Beiderman"]
|
11
|
-
spec.email = ["
|
11
|
+
spec.email = ["active_record_data_loader@ossprojects.dev"]
|
12
12
|
|
13
13
|
spec.summary = "A utility to bulk load test data for performance testing."
|
14
14
|
spec.description = "A utility to bulk load test data for performance testing."
|
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.metadata["source_code_uri"] = "https://github.com/abeiderman/active_record_data_loader"
|
21
21
|
else
|
22
22
|
raise "RubyGems 2.0 or newer is required to protect against " \
|
23
|
-
|
23
|
+
"public gem pushes."
|
24
24
|
end
|
25
25
|
|
26
26
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
@@ -30,20 +30,21 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
31
31
|
spec.require_paths = ["lib"]
|
32
32
|
|
33
|
-
spec.required_ruby_version = ">= 2.
|
33
|
+
spec.required_ruby_version = ">= 2.5.0"
|
34
34
|
|
35
|
-
spec.add_dependency "activerecord", ">=
|
35
|
+
spec.add_dependency "activerecord", ">= 5.0"
|
36
36
|
|
37
37
|
spec.add_development_dependency "appraisal"
|
38
38
|
spec.add_development_dependency "bundler", ">= 1.16"
|
39
|
-
spec.add_development_dependency "coveralls"
|
40
39
|
spec.add_development_dependency "mysql2"
|
41
40
|
spec.add_development_dependency "pg"
|
42
41
|
spec.add_development_dependency "pry"
|
43
|
-
spec.add_development_dependency "rake", "~>
|
42
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
44
43
|
spec.add_development_dependency "rspec", "~> 3.0"
|
45
44
|
spec.add_development_dependency "rspec-collection_matchers"
|
46
45
|
spec.add_development_dependency "rubocop"
|
46
|
+
spec.add_development_dependency "simplecov"
|
47
|
+
spec.add_development_dependency "simplecov-lcov"
|
47
48
|
spec.add_development_dependency "sqlite3"
|
48
49
|
spec.add_development_dependency "timecop"
|
49
50
|
end
|
data/config/database.yml
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
postgres:
|
2
2
|
adapter: "postgresql"
|
3
3
|
host: "127.0.0.1"
|
4
|
+
port: "2345"
|
4
5
|
database: "test"
|
5
6
|
username: "test"
|
6
7
|
password: "test"
|
@@ -12,6 +13,7 @@ sqlite3:
|
|
12
13
|
mysql:
|
13
14
|
adapter: "mysql2"
|
14
15
|
host: "127.0.0.1"
|
16
|
+
port: "3306"
|
15
17
|
database: "test"
|
16
18
|
username: "test"
|
17
19
|
password: "test"
|
data/docker-compose.yml
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
version: "3.9"
|
2
|
+
services:
|
3
|
+
postgres:
|
4
|
+
image: postgres:11
|
5
|
+
ports:
|
6
|
+
- "2345:5432"
|
7
|
+
environment:
|
8
|
+
- POSTGRES_USER=test
|
9
|
+
- POSTGRES_PASSWORD=test
|
10
|
+
mysql:
|
11
|
+
image: mysql:5
|
12
|
+
ports:
|
13
|
+
- "3306:3306"
|
14
|
+
environment:
|
15
|
+
- MYSQL_ROOT_PASSWORD=test
|
16
|
+
- MYSQL_USER=test
|
17
|
+
- MYSQL_PASSWORD=test
|
18
|
+
- MYSQL_DATABASE=test
|
@@ -2,30 +2,31 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
module ActiveRecord
|
5
|
-
class
|
6
|
-
def self.
|
5
|
+
class BelongsToDataProvider
|
6
|
+
def self.provider_for(ar_association:, query: nil, strategy: :random)
|
7
7
|
raise "#{name} does not support polymorphic associations" if ar_association.polymorphic?
|
8
8
|
|
9
|
-
{ ar_association.join_foreign_key.to_sym => new(ar_association, query).foreign_key_func }
|
9
|
+
{ ar_association.join_foreign_key.to_sym => new(ar_association, query, strategy).foreign_key_func }
|
10
10
|
end
|
11
11
|
|
12
|
-
def initialize(ar_association, query)
|
12
|
+
def initialize(ar_association, query, strategy)
|
13
13
|
@ar_association = ar_association
|
14
14
|
@query = query
|
15
|
+
@strategy = strategy
|
15
16
|
end
|
16
17
|
|
17
18
|
def foreign_key_func
|
18
|
-
-> { possible_values.
|
19
|
+
-> { possible_values.next }
|
19
20
|
end
|
20
21
|
|
21
22
|
private
|
22
23
|
|
23
24
|
def possible_values
|
24
|
-
@possible_values ||= base_query.pluck(@ar_association.join_primary_key)
|
25
|
+
@possible_values ||= List.for(base_query.pluck(@ar_association.join_primary_key), strategy: @strategy)
|
25
26
|
end
|
26
27
|
|
27
28
|
def base_query
|
28
|
-
if @query
|
29
|
+
if @query.respond_to?(:call)
|
29
30
|
@query.call.all
|
30
31
|
else
|
31
32
|
@ar_association.klass.all
|
data/lib/active_record_data_loader/active_record/{column_configuration.rb → column_data_provider.rb}
RENAMED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
module ActiveRecord
|
5
|
-
class
|
5
|
+
class ColumnDataProvider
|
6
6
|
class << self
|
7
7
|
VALUE_GENERATORS = {
|
8
8
|
enum: EnumValueGenerator,
|
@@ -12,7 +12,7 @@ module ActiveRecordDataLoader
|
|
12
12
|
datetime: DatetimeValueGenerator,
|
13
13
|
}.freeze
|
14
14
|
|
15
|
-
def
|
15
|
+
def provider_for(model_class:, ar_column:, connection_factory:)
|
16
16
|
raise_error_if_not_supported(model_class, ar_column)
|
17
17
|
|
18
18
|
{
|
@@ -5,34 +5,35 @@ module ActiveRecordDataLoader
|
|
5
5
|
class EnumValueGenerator
|
6
6
|
class << self
|
7
7
|
def generator_for(model_class:, ar_column:, connection_factory:)
|
8
|
-
values = enum_values_for(
|
8
|
+
values = enum_values_for(ar_column.sql_type, connection_factory)
|
9
9
|
-> { values.sample }
|
10
10
|
end
|
11
11
|
|
12
12
|
private
|
13
13
|
|
14
|
-
def enum_values_for(
|
14
|
+
def enum_values_for(enum_type, connection_factory)
|
15
15
|
connection = connection_factory.call
|
16
16
|
|
17
17
|
if connection.adapter_name.downcase.to_sym == :postgresql
|
18
|
-
postgres_enum_values_for(
|
18
|
+
postgres_enum_values_for(connection, enum_type)
|
19
19
|
elsif connection.adapter_name.downcase.to_s.start_with?("mysql")
|
20
|
-
mysql_enum_values_for(
|
20
|
+
mysql_enum_values_for(enum_type)
|
21
21
|
else
|
22
22
|
[]
|
23
23
|
end
|
24
|
+
ensure
|
25
|
+
connection&.close
|
24
26
|
end
|
25
27
|
|
26
|
-
def postgres_enum_values_for(
|
27
|
-
|
28
|
-
.connection
|
28
|
+
def postgres_enum_values_for(connection, enum_type)
|
29
|
+
connection
|
29
30
|
.execute("SELECT unnest(enum_range(NULL::#{enum_type}))::text")
|
30
31
|
.map(&:values)
|
31
32
|
.flatten
|
32
33
|
.compact
|
33
34
|
end
|
34
35
|
|
35
|
-
def mysql_enum_values_for(
|
36
|
+
def mysql_enum_values_for(enum_type)
|
36
37
|
enum_type
|
37
38
|
.to_s
|
38
39
|
.downcase
|
@@ -5,7 +5,7 @@ module ActiveRecordDataLoader
|
|
5
5
|
class IntegerValueGenerator
|
6
6
|
class << self
|
7
7
|
def generator_for(model_class:, ar_column:, connection_factory: nil)
|
8
|
-
range_limit = [(256**number_of_bytes(ar_column)) / 2 - 1, 1_000_000_000].min
|
8
|
+
range_limit = [((256**number_of_bytes(ar_column)) / 2) - 1, 1_000_000_000].min
|
9
9
|
|
10
10
|
-> { rand(0..range_limit) }
|
11
11
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
module ActiveRecord
|
5
|
+
class List
|
6
|
+
def self.for(enumerable, strategy: :random)
|
7
|
+
if strategy == :random_cycle
|
8
|
+
RandomCycle.new(enumerable)
|
9
|
+
else
|
10
|
+
Random.new(enumerable)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class Random
|
15
|
+
def initialize(enumerable)
|
16
|
+
@list = enumerable
|
17
|
+
end
|
18
|
+
|
19
|
+
def next
|
20
|
+
@list.sample
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class RandomCycle
|
25
|
+
def initialize(enumerable)
|
26
|
+
@enumerable = enumerable
|
27
|
+
@count = enumerable.count
|
28
|
+
reset_list
|
29
|
+
end
|
30
|
+
|
31
|
+
def next
|
32
|
+
value = @list.next
|
33
|
+
reset_list if (@index += 1) >= @count
|
34
|
+
value
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def reset_list
|
40
|
+
@index = 0
|
41
|
+
@enumerable = @enumerable.shuffle
|
42
|
+
@list = @enumerable.cycle
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -8,9 +8,12 @@ module ActiveRecordDataLoader
|
|
8
8
|
def initialize(
|
9
9
|
model:,
|
10
10
|
column_settings:,
|
11
|
+
connection_factory:,
|
12
|
+
logger:,
|
13
|
+
raise_on_duplicates:,
|
14
|
+
max_duplicate_retries:,
|
11
15
|
polymorphic_settings: [],
|
12
|
-
belongs_to_settings: []
|
13
|
-
connection_factory:
|
16
|
+
belongs_to_settings: []
|
14
17
|
)
|
15
18
|
@model_class = model
|
16
19
|
@table = model.table_name
|
@@ -18,6 +21,11 @@ module ActiveRecordDataLoader
|
|
18
21
|
@polymorphic_settings = polymorphic_settings
|
19
22
|
@belongs_to_settings = belongs_to_settings.map { |s| [s.name, s.query] }.to_h
|
20
23
|
@connection_factory = connection_factory
|
24
|
+
@raise_on_duplicates = raise_on_duplicates
|
25
|
+
@max_duplicate_retries = max_duplicate_retries
|
26
|
+
@logger = logger
|
27
|
+
@index_tracker = UniqueIndexTracker.new(model: model, connection_factory: connection_factory)
|
28
|
+
@index_tracker.map_indexed_columns(column_list)
|
21
29
|
end
|
22
30
|
|
23
31
|
def column_list
|
@@ -25,11 +33,41 @@ module ActiveRecordDataLoader
|
|
25
33
|
end
|
26
34
|
|
27
35
|
def generate_row(row_number)
|
28
|
-
|
36
|
+
@index_tracker.capture_unique_values(generate_row_with_retries(row_number))
|
29
37
|
end
|
30
38
|
|
31
39
|
private
|
32
40
|
|
41
|
+
def generate_row_with_retries(row_number)
|
42
|
+
retries = 0
|
43
|
+
while @index_tracker.repeating_unique_values?(row = generate_candidate_row(row_number))
|
44
|
+
if (retries += 1) > @max_duplicate_retries
|
45
|
+
raise DuplicateKeyError, <<~MSG if @raise_on_duplicates
|
46
|
+
Exhausted retries looking for unique values for row #{row_number} for '#{table}'.
|
47
|
+
Table '#{table}' has unique indexes that would have prevented inserting this row. If you would
|
48
|
+
like to skip non-unique rows instead of raising, configure `raise_on_duplicates` to be `false`.
|
49
|
+
MSG
|
50
|
+
|
51
|
+
@logger.warn(
|
52
|
+
"[ActiveRecordDataLoader] "\
|
53
|
+
"Exhausted retries looking for unique values. Skipping row #{row_number} for '#{table}'."
|
54
|
+
)
|
55
|
+
return nil
|
56
|
+
else
|
57
|
+
@logger.info(
|
58
|
+
"[ActiveRecordDataLoader] "\
|
59
|
+
"Retrying row #{row_number} for '#{table}' looking for unique values compliant with indexes. "\
|
60
|
+
"Retry number #{retries}."
|
61
|
+
)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
row
|
65
|
+
end
|
66
|
+
|
67
|
+
def generate_candidate_row(row_number)
|
68
|
+
column_list.map { |c| column_data(row_number, c) }
|
69
|
+
end
|
70
|
+
|
33
71
|
def column_data(row_number, column)
|
34
72
|
column_value = columns[column]
|
35
73
|
return column_value unless column_value.respond_to?(:call)
|
@@ -56,9 +94,9 @@ module ActiveRecordDataLoader
|
|
56
94
|
@model_class
|
57
95
|
.columns_hash
|
58
96
|
.reject { |name| name == @model_class.primary_key }
|
59
|
-
.select { |_, c|
|
97
|
+
.select { |_, c| ColumnDataProvider.supported?(model_class: @model_class, ar_column: c) }
|
60
98
|
.map do |_, c|
|
61
|
-
|
99
|
+
ColumnDataProvider.provider_for(
|
62
100
|
model_class: @model_class,
|
63
101
|
ar_column: c,
|
64
102
|
connection_factory: @connection_factory
|
@@ -73,16 +111,33 @@ module ActiveRecordDataLoader
|
|
73
111
|
.select(&:belongs_to?)
|
74
112
|
.reject(&:polymorphic?)
|
75
113
|
.map do |assoc|
|
76
|
-
|
114
|
+
BelongsToDataProvider.provider_for(
|
115
|
+
ar_association: assoc,
|
116
|
+
query: @belongs_to_settings[assoc.name],
|
117
|
+
strategy: column_config_strategy(assoc)
|
118
|
+
)
|
77
119
|
end
|
78
120
|
.reduce({}, :merge)
|
79
121
|
end
|
80
122
|
|
81
123
|
def polymorphic_config
|
82
124
|
@polymorphic_settings
|
83
|
-
.map
|
125
|
+
.map do |s|
|
126
|
+
PolymorphicBelongsToDataProvider.provider_for(
|
127
|
+
polymorphic_settings: s,
|
128
|
+
strategy: column_config_strategy(s.model_class.reflect_on_association(s.name))
|
129
|
+
)
|
130
|
+
end
|
84
131
|
.reduce({}, :merge)
|
85
132
|
end
|
133
|
+
|
134
|
+
def column_config_strategy(column)
|
135
|
+
if @index_tracker.contained_in_index?(column)
|
136
|
+
:random_cycle
|
137
|
+
else
|
138
|
+
:random
|
139
|
+
end
|
140
|
+
end
|
86
141
|
end
|
87
142
|
end
|
88
143
|
end
|