active_record_data_loader 0.1.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +33 -7
- data/.travis.yml +16 -3
- data/Appraisals +2 -2
- data/CHANGELOG.md +24 -0
- data/Gemfile.lock +56 -46
- data/README.md +182 -4
- data/Rakefile +2 -0
- data/active_record_data_loader.gemspec +5 -3
- data/config/database.yml +9 -0
- data/config/database.yml.travis +5 -0
- data/docker-compose.yml +18 -0
- data/gemfiles/activerecord_6.gemfile +1 -1
- data/gemfiles/rails.gemfile +7 -0
- data/lib/active_record_data_loader.rb +23 -14
- data/lib/active_record_data_loader/active_record/belongs_to_configuration.rb +13 -4
- data/lib/active_record_data_loader/active_record/column_configuration.rb +14 -4
- data/lib/active_record_data_loader/active_record/datetime_value_generator.rb +21 -0
- data/lib/active_record_data_loader/active_record/enum_value_generator.rb +25 -3
- data/lib/active_record_data_loader/active_record/integer_value_generator.rb +1 -1
- data/lib/active_record_data_loader/active_record/model_data_generator.rb +20 -4
- data/lib/active_record_data_loader/active_record/per_row_value_cache.rb +33 -0
- data/lib/active_record_data_loader/active_record/polymorphic_belongs_to_configuration.rb +9 -1
- data/lib/active_record_data_loader/active_record/text_value_generator.rb +1 -1
- data/lib/active_record_data_loader/bulk_insert_strategy.rb +1 -6
- data/lib/active_record_data_loader/configuration.rb +17 -3
- data/lib/active_record_data_loader/copy_strategy.rb +11 -14
- data/lib/active_record_data_loader/dsl/belongs_to_association.rb +15 -0
- data/lib/active_record_data_loader/dsl/model.rb +6 -1
- data/lib/active_record_data_loader/dsl/polymorphic_association.rb +4 -2
- data/lib/active_record_data_loader/loader.rb +44 -12
- data/lib/active_record_data_loader/version.rb +1 -1
- metadata +46 -13
- data/script/ci_build.sh +0 -6
@@ -1,17 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "csv"
|
4
|
-
require "benchmark"
|
5
|
-
|
6
3
|
module ActiveRecordDataLoader
|
7
4
|
class BulkInsertStrategy
|
8
5
|
def initialize(data_generator)
|
9
6
|
@data_generator = data_generator
|
10
7
|
end
|
11
8
|
|
12
|
-
def load_batch(row_numbers)
|
13
|
-
connection = ::ActiveRecord::Base.connection
|
14
|
-
|
9
|
+
def load_batch(row_numbers, connection)
|
15
10
|
connection.insert(<<~SQL)
|
16
11
|
INSERT INTO #{quoted_table_name(connection)} (#{column_list(connection)})
|
17
12
|
VALUES #{values(row_numbers, connection)}
|
@@ -2,16 +2,30 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class Configuration
|
5
|
-
attr_accessor :default_batch_size, :default_row_count, :logger
|
5
|
+
attr_accessor :default_batch_size, :default_row_count, :logger, :statement_timeout, :connection_factory
|
6
6
|
|
7
7
|
def initialize(
|
8
8
|
default_batch_size: 100_000,
|
9
9
|
default_row_count: 1,
|
10
|
-
logger:
|
10
|
+
logger: nil,
|
11
|
+
statement_timeout: "2min",
|
12
|
+
connection_factory: -> { ::ActiveRecord::Base.connection }
|
11
13
|
)
|
12
14
|
@default_batch_size = default_batch_size
|
13
15
|
@default_row_count = default_row_count
|
14
|
-
@logger = logger
|
16
|
+
@logger = logger || default_logger
|
17
|
+
@statement_timeout = statement_timeout
|
18
|
+
@connection_factory = connection_factory
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def default_logger
|
24
|
+
if defined?(Rails) && Rails.respond_to?(:logger)
|
25
|
+
Rails.logger
|
26
|
+
else
|
27
|
+
Logger.new($stdout, level: :info)
|
28
|
+
end
|
15
29
|
end
|
16
30
|
end
|
17
31
|
end
|
@@ -1,19 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "csv"
|
4
|
-
require "benchmark"
|
5
|
-
|
6
3
|
module ActiveRecordDataLoader
|
7
4
|
class CopyStrategy
|
8
5
|
def initialize(data_generator)
|
9
6
|
@data_generator = data_generator
|
10
7
|
end
|
11
8
|
|
12
|
-
def load_batch(row_numbers)
|
13
|
-
csv_data = csv_data_batch(row_numbers)
|
9
|
+
def load_batch(row_numbers, connection)
|
10
|
+
csv_data = csv_data_batch(row_numbers, connection)
|
14
11
|
|
15
|
-
|
16
|
-
|
12
|
+
raw_connection = connection.raw_connection
|
13
|
+
raw_connection.copy_data(copy_command(connection)) { raw_connection.put_copy_data(csv_data) }
|
17
14
|
end
|
18
15
|
|
19
16
|
def table_name
|
@@ -28,18 +25,18 @@ module ActiveRecordDataLoader
|
|
28
25
|
|
29
26
|
attr_reader :data_generator
|
30
27
|
|
31
|
-
def csv_data_batch(row_numbers)
|
28
|
+
def csv_data_batch(row_numbers, connection)
|
32
29
|
row_numbers.map do |i|
|
33
|
-
data_generator.generate_row(i).map { |d| quote_data(d) }.join(",")
|
30
|
+
data_generator.generate_row(i).map { |d| quote_data(d, connection) }.join(",")
|
34
31
|
end.join("\n")
|
35
32
|
end
|
36
33
|
|
37
|
-
def copy_command
|
34
|
+
def copy_command(connection)
|
38
35
|
@copy_command ||= begin
|
39
|
-
quoted_table_name =
|
36
|
+
quoted_table_name = connection.quote_table_name(data_generator.table)
|
40
37
|
columns = data_generator
|
41
38
|
.column_list
|
42
|
-
.map { |c|
|
39
|
+
.map { |c| connection.quote_column_name(c) }
|
43
40
|
.join(", ")
|
44
41
|
|
45
42
|
<<~SQL
|
@@ -49,10 +46,10 @@ module ActiveRecordDataLoader
|
|
49
46
|
end
|
50
47
|
end
|
51
48
|
|
52
|
-
def quote_data(data)
|
49
|
+
def quote_data(data, connection)
|
53
50
|
return if data.nil?
|
54
51
|
|
55
|
-
"\"#{
|
52
|
+
"\"#{connection.quote_string(data.to_s)}\""
|
56
53
|
end
|
57
54
|
end
|
58
55
|
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
module Dsl
|
5
|
+
class BelongsToAssociation
|
6
|
+
attr_reader :model_class, :name, :query
|
7
|
+
|
8
|
+
def initialize(model_class, name, query = nil)
|
9
|
+
@model_class = model_class
|
10
|
+
@name = name
|
11
|
+
@query = query
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -3,7 +3,7 @@
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
module Dsl
|
5
5
|
class Model
|
6
|
-
attr_reader :klass, :columns, :row_count, :polymorphic_associations
|
6
|
+
attr_reader :klass, :columns, :row_count, :polymorphic_associations, :belongs_to_associations
|
7
7
|
|
8
8
|
def initialize(klass:, configuration:)
|
9
9
|
@klass = klass
|
@@ -11,6 +11,7 @@ module ActiveRecordDataLoader
|
|
11
11
|
@row_count = configuration.default_row_count
|
12
12
|
@batch_size = configuration.default_batch_size
|
13
13
|
@polymorphic_associations = []
|
14
|
+
@belongs_to_associations = []
|
14
15
|
end
|
15
16
|
|
16
17
|
def count(count)
|
@@ -30,6 +31,10 @@ module ActiveRecordDataLoader
|
|
30
31
|
@klass, assoc_name
|
31
32
|
).tap { |a| block.call(a) }
|
32
33
|
end
|
34
|
+
|
35
|
+
def belongs_to(assoc_name, eligible_set:)
|
36
|
+
@belongs_to_associations << BelongsToAssociation.new(@klass, assoc_name, eligible_set)
|
37
|
+
end
|
33
38
|
end
|
34
39
|
end
|
35
40
|
end
|
@@ -3,16 +3,18 @@
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
module Dsl
|
5
5
|
class PolymorphicAssociation
|
6
|
-
attr_reader :model_class, :name, :models
|
6
|
+
attr_reader :model_class, :name, :models, :queries
|
7
7
|
|
8
8
|
def initialize(model_class, name)
|
9
9
|
@model_class = model_class
|
10
10
|
@name = name
|
11
11
|
@models = {}
|
12
|
+
@queries = {}
|
12
13
|
end
|
13
14
|
|
14
|
-
def model(klass, weight: 1)
|
15
|
+
def model(klass, weight: 1, eligible_set: nil)
|
15
16
|
@models[klass] = weight.to_i
|
17
|
+
@queries[klass] = eligible_set if eligible_set
|
16
18
|
end
|
17
19
|
|
18
20
|
def weighted_models
|
@@ -9,18 +9,20 @@ module ActiveRecordDataLoader
|
|
9
9
|
data_generator:,
|
10
10
|
total_rows:,
|
11
11
|
batch_size:,
|
12
|
-
|
12
|
+
configuration:
|
13
13
|
)
|
14
14
|
new(
|
15
|
-
logger: logger,
|
16
|
-
|
15
|
+
logger: configuration.logger,
|
16
|
+
statement_timeout: configuration.statement_timeout,
|
17
|
+
strategy: strategy_class(configuration.connection_factory).new(data_generator),
|
18
|
+
connection_factory: configuration.connection_factory
|
17
19
|
).load_data(batch_size, total_rows)
|
18
20
|
end
|
19
21
|
|
20
22
|
private
|
21
23
|
|
22
|
-
def strategy_class
|
23
|
-
if
|
24
|
+
def strategy_class(connection_factory)
|
25
|
+
if connection_factory.call.raw_connection.respond_to?(:copy_data)
|
24
26
|
ActiveRecordDataLoader::CopyStrategy
|
25
27
|
else
|
26
28
|
ActiveRecordDataLoader::BulkInsertStrategy
|
@@ -28,15 +30,18 @@ module ActiveRecordDataLoader
|
|
28
30
|
end
|
29
31
|
end
|
30
32
|
|
31
|
-
def initialize(logger:, strategy:)
|
33
|
+
def initialize(logger:, statement_timeout:, strategy:, connection_factory:)
|
32
34
|
@logger = logger
|
33
35
|
@strategy = strategy
|
36
|
+
@statement_timeout = statement_timeout
|
37
|
+
@connection_factory = connection_factory
|
34
38
|
end
|
35
39
|
|
36
40
|
def load_data(batch_size, total_rows)
|
37
41
|
batch_count = (total_rows / batch_size.to_f).ceil
|
38
42
|
|
39
43
|
logger.info(
|
44
|
+
"[ActiveRecordDataLoader] "\
|
40
45
|
"Loading #{total_rows} row(s) into '#{strategy.table_name}' via #{strategy.name}. "\
|
41
46
|
"#{batch_size} row(s) per batch, #{batch_count} batch(es)."
|
42
47
|
)
|
@@ -44,6 +49,7 @@ module ActiveRecordDataLoader
|
|
44
49
|
load_in_batches(batch_size, total_rows, batch_count)
|
45
50
|
end
|
46
51
|
logger.info(
|
52
|
+
"[ActiveRecordDataLoader] "\
|
47
53
|
"Completed loading #{total_rows} row(s) into '#{strategy.table_name}' "\
|
48
54
|
"in #{total_time} seconds."
|
49
55
|
)
|
@@ -51,16 +57,42 @@ module ActiveRecordDataLoader
|
|
51
57
|
|
52
58
|
private
|
53
59
|
|
54
|
-
attr_reader :strategy, :logger
|
60
|
+
attr_reader :strategy, :statement_timeout, :logger, :connection_factory
|
55
61
|
|
56
62
|
def load_in_batches(batch_size, total_rows, batch_count)
|
57
|
-
|
58
|
-
|
63
|
+
with_connection do |connection|
|
64
|
+
total_rows.times.each_slice(batch_size).with_index do |row_numbers, i|
|
65
|
+
time = Benchmark.realtime { strategy.load_batch(row_numbers, connection) }
|
59
66
|
|
60
|
-
|
61
|
-
|
62
|
-
|
67
|
+
logger.debug(
|
68
|
+
"[ActiveRecordDataLoader] "\
|
69
|
+
"Completed batch #{i + 1}/#{batch_count}, #{row_numbers.count} row(s) in #{time} seconds"
|
70
|
+
)
|
71
|
+
end
|
63
72
|
end
|
64
73
|
end
|
74
|
+
|
75
|
+
def with_connection
|
76
|
+
if connection.adapter_name.downcase.to_sym == :postgresql
|
77
|
+
original_timeout = retrieve_statement_timeout
|
78
|
+
update_statement_timeout(statement_timeout)
|
79
|
+
yield connection
|
80
|
+
update_statement_timeout(original_timeout)
|
81
|
+
else
|
82
|
+
yield connection
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def retrieve_statement_timeout
|
87
|
+
connection.execute("SHOW statement_timeout").first["statement_timeout"]
|
88
|
+
end
|
89
|
+
|
90
|
+
def update_statement_timeout(timeout)
|
91
|
+
connection.execute("SET statement_timeout = \"#{timeout}\"")
|
92
|
+
end
|
93
|
+
|
94
|
+
def connection
|
95
|
+
connection_factory.call
|
96
|
+
end
|
65
97
|
end
|
66
98
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: active_record_data_loader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alejandro Beiderman
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-05-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '5.0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '5.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: appraisal
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: mysql2
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: pg
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -100,14 +114,14 @@ dependencies:
|
|
100
114
|
requirements:
|
101
115
|
- - "~>"
|
102
116
|
- !ruby/object:Gem::Version
|
103
|
-
version: '
|
117
|
+
version: '13.0'
|
104
118
|
type: :development
|
105
119
|
prerelease: false
|
106
120
|
version_requirements: !ruby/object:Gem::Requirement
|
107
121
|
requirements:
|
108
122
|
- - "~>"
|
109
123
|
- !ruby/object:Gem::Version
|
110
|
-
version: '
|
124
|
+
version: '13.0'
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: rspec
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -164,6 +178,20 @@ dependencies:
|
|
164
178
|
- - ">="
|
165
179
|
- !ruby/object:Gem::Version
|
166
180
|
version: '0'
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: timecop
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - ">="
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: '0'
|
188
|
+
type: :development
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - ">="
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: '0'
|
167
195
|
description: A utility to bulk load test data for performance testing.
|
168
196
|
email:
|
169
197
|
- abeiderman@gmail.com
|
@@ -178,6 +206,7 @@ files:
|
|
178
206
|
- ".rubocop.yml"
|
179
207
|
- ".travis.yml"
|
180
208
|
- Appraisals
|
209
|
+
- CHANGELOG.md
|
181
210
|
- CODE_OF_CONDUCT.md
|
182
211
|
- Gemfile
|
183
212
|
- Gemfile.lock
|
@@ -189,36 +218,40 @@ files:
|
|
189
218
|
- bin/setup
|
190
219
|
- config/database.yml
|
191
220
|
- config/database.yml.travis
|
221
|
+
- docker-compose.yml
|
192
222
|
- gemfiles/.bundle/config
|
193
223
|
- gemfiles/activerecord_5.gemfile
|
194
224
|
- gemfiles/activerecord_6.gemfile
|
195
225
|
- gemfiles/faker.gemfile
|
196
226
|
- gemfiles/ffaker.gemfile
|
227
|
+
- gemfiles/rails.gemfile
|
197
228
|
- lib/active_record_data_loader.rb
|
198
229
|
- lib/active_record_data_loader/active_record/belongs_to_configuration.rb
|
199
230
|
- lib/active_record_data_loader/active_record/column_configuration.rb
|
231
|
+
- lib/active_record_data_loader/active_record/datetime_value_generator.rb
|
200
232
|
- lib/active_record_data_loader/active_record/enum_value_generator.rb
|
201
233
|
- lib/active_record_data_loader/active_record/integer_value_generator.rb
|
202
234
|
- lib/active_record_data_loader/active_record/model_data_generator.rb
|
235
|
+
- lib/active_record_data_loader/active_record/per_row_value_cache.rb
|
203
236
|
- lib/active_record_data_loader/active_record/polymorphic_belongs_to_configuration.rb
|
204
237
|
- lib/active_record_data_loader/active_record/text_value_generator.rb
|
205
238
|
- lib/active_record_data_loader/bulk_insert_strategy.rb
|
206
239
|
- lib/active_record_data_loader/configuration.rb
|
207
240
|
- lib/active_record_data_loader/copy_strategy.rb
|
208
241
|
- lib/active_record_data_loader/data_faker.rb
|
242
|
+
- lib/active_record_data_loader/dsl/belongs_to_association.rb
|
209
243
|
- lib/active_record_data_loader/dsl/definition.rb
|
210
244
|
- lib/active_record_data_loader/dsl/model.rb
|
211
245
|
- lib/active_record_data_loader/dsl/polymorphic_association.rb
|
212
246
|
- lib/active_record_data_loader/loader.rb
|
213
247
|
- lib/active_record_data_loader/version.rb
|
214
248
|
- log/.keep
|
215
|
-
|
216
|
-
homepage:
|
249
|
+
homepage:
|
217
250
|
licenses:
|
218
251
|
- MIT
|
219
252
|
metadata:
|
220
253
|
source_code_uri: https://github.com/abeiderman/active_record_data_loader
|
221
|
-
post_install_message:
|
254
|
+
post_install_message:
|
222
255
|
rdoc_options: []
|
223
256
|
require_paths:
|
224
257
|
- lib
|
@@ -226,15 +259,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
226
259
|
requirements:
|
227
260
|
- - ">="
|
228
261
|
- !ruby/object:Gem::Version
|
229
|
-
version: 2.
|
262
|
+
version: 2.5.0
|
230
263
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
231
264
|
requirements:
|
232
265
|
- - ">="
|
233
266
|
- !ruby/object:Gem::Version
|
234
267
|
version: '0'
|
235
268
|
requirements: []
|
236
|
-
rubygems_version: 3.
|
237
|
-
signing_key:
|
269
|
+
rubygems_version: 3.1.4
|
270
|
+
signing_key:
|
238
271
|
specification_version: 4
|
239
272
|
summary: A utility to bulk load test data for performance testing.
|
240
273
|
test_files: []
|