active_record_data_loader 0.1.1 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +33 -7
- data/.travis.yml +16 -3
- data/Appraisals +2 -2
- data/CHANGELOG.md +24 -0
- data/Gemfile.lock +56 -46
- data/README.md +182 -4
- data/Rakefile +2 -0
- data/active_record_data_loader.gemspec +5 -3
- data/config/database.yml +9 -0
- data/config/database.yml.travis +5 -0
- data/docker-compose.yml +18 -0
- data/gemfiles/activerecord_6.gemfile +1 -1
- data/gemfiles/rails.gemfile +7 -0
- data/lib/active_record_data_loader.rb +23 -14
- data/lib/active_record_data_loader/active_record/belongs_to_configuration.rb +13 -4
- data/lib/active_record_data_loader/active_record/column_configuration.rb +14 -4
- data/lib/active_record_data_loader/active_record/datetime_value_generator.rb +21 -0
- data/lib/active_record_data_loader/active_record/enum_value_generator.rb +25 -3
- data/lib/active_record_data_loader/active_record/integer_value_generator.rb +1 -1
- data/lib/active_record_data_loader/active_record/model_data_generator.rb +20 -4
- data/lib/active_record_data_loader/active_record/per_row_value_cache.rb +33 -0
- data/lib/active_record_data_loader/active_record/polymorphic_belongs_to_configuration.rb +9 -1
- data/lib/active_record_data_loader/active_record/text_value_generator.rb +1 -1
- data/lib/active_record_data_loader/bulk_insert_strategy.rb +1 -6
- data/lib/active_record_data_loader/configuration.rb +17 -3
- data/lib/active_record_data_loader/copy_strategy.rb +11 -14
- data/lib/active_record_data_loader/dsl/belongs_to_association.rb +15 -0
- data/lib/active_record_data_loader/dsl/model.rb +6 -1
- data/lib/active_record_data_loader/dsl/polymorphic_association.rb +4 -2
- data/lib/active_record_data_loader/loader.rb +44 -12
- data/lib/active_record_data_loader/version.rb +1 -1
- metadata +46 -13
- data/script/ci_build.sh +0 -6
@@ -1,17 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "csv"
|
4
|
-
require "benchmark"
|
5
|
-
|
6
3
|
module ActiveRecordDataLoader
|
7
4
|
class BulkInsertStrategy
|
8
5
|
def initialize(data_generator)
|
9
6
|
@data_generator = data_generator
|
10
7
|
end
|
11
8
|
|
12
|
-
def load_batch(row_numbers)
|
13
|
-
connection = ::ActiveRecord::Base.connection
|
14
|
-
|
9
|
+
def load_batch(row_numbers, connection)
|
15
10
|
connection.insert(<<~SQL)
|
16
11
|
INSERT INTO #{quoted_table_name(connection)} (#{column_list(connection)})
|
17
12
|
VALUES #{values(row_numbers, connection)}
|
@@ -2,16 +2,30 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class Configuration
|
5
|
-
attr_accessor :default_batch_size, :default_row_count, :logger
|
5
|
+
attr_accessor :default_batch_size, :default_row_count, :logger, :statement_timeout, :connection_factory
|
6
6
|
|
7
7
|
def initialize(
|
8
8
|
default_batch_size: 100_000,
|
9
9
|
default_row_count: 1,
|
10
|
-
logger:
|
10
|
+
logger: nil,
|
11
|
+
statement_timeout: "2min",
|
12
|
+
connection_factory: -> { ::ActiveRecord::Base.connection }
|
11
13
|
)
|
12
14
|
@default_batch_size = default_batch_size
|
13
15
|
@default_row_count = default_row_count
|
14
|
-
@logger = logger
|
16
|
+
@logger = logger || default_logger
|
17
|
+
@statement_timeout = statement_timeout
|
18
|
+
@connection_factory = connection_factory
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def default_logger
|
24
|
+
if defined?(Rails) && Rails.respond_to?(:logger)
|
25
|
+
Rails.logger
|
26
|
+
else
|
27
|
+
Logger.new($stdout, level: :info)
|
28
|
+
end
|
15
29
|
end
|
16
30
|
end
|
17
31
|
end
|
@@ -1,19 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "csv"
|
4
|
-
require "benchmark"
|
5
|
-
|
6
3
|
module ActiveRecordDataLoader
|
7
4
|
class CopyStrategy
|
8
5
|
def initialize(data_generator)
|
9
6
|
@data_generator = data_generator
|
10
7
|
end
|
11
8
|
|
12
|
-
def load_batch(row_numbers)
|
13
|
-
csv_data = csv_data_batch(row_numbers)
|
9
|
+
def load_batch(row_numbers, connection)
|
10
|
+
csv_data = csv_data_batch(row_numbers, connection)
|
14
11
|
|
15
|
-
|
16
|
-
|
12
|
+
raw_connection = connection.raw_connection
|
13
|
+
raw_connection.copy_data(copy_command(connection)) { raw_connection.put_copy_data(csv_data) }
|
17
14
|
end
|
18
15
|
|
19
16
|
def table_name
|
@@ -28,18 +25,18 @@ module ActiveRecordDataLoader
|
|
28
25
|
|
29
26
|
attr_reader :data_generator
|
30
27
|
|
31
|
-
def csv_data_batch(row_numbers)
|
28
|
+
def csv_data_batch(row_numbers, connection)
|
32
29
|
row_numbers.map do |i|
|
33
|
-
data_generator.generate_row(i).map { |d| quote_data(d) }.join(",")
|
30
|
+
data_generator.generate_row(i).map { |d| quote_data(d, connection) }.join(",")
|
34
31
|
end.join("\n")
|
35
32
|
end
|
36
33
|
|
37
|
-
def copy_command
|
34
|
+
def copy_command(connection)
|
38
35
|
@copy_command ||= begin
|
39
|
-
quoted_table_name =
|
36
|
+
quoted_table_name = connection.quote_table_name(data_generator.table)
|
40
37
|
columns = data_generator
|
41
38
|
.column_list
|
42
|
-
.map { |c|
|
39
|
+
.map { |c| connection.quote_column_name(c) }
|
43
40
|
.join(", ")
|
44
41
|
|
45
42
|
<<~SQL
|
@@ -49,10 +46,10 @@ module ActiveRecordDataLoader
|
|
49
46
|
end
|
50
47
|
end
|
51
48
|
|
52
|
-
def quote_data(data)
|
49
|
+
def quote_data(data, connection)
|
53
50
|
return if data.nil?
|
54
51
|
|
55
|
-
"\"#{
|
52
|
+
"\"#{connection.quote_string(data.to_s)}\""
|
56
53
|
end
|
57
54
|
end
|
58
55
|
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
module Dsl
|
5
|
+
class BelongsToAssociation
|
6
|
+
attr_reader :model_class, :name, :query
|
7
|
+
|
8
|
+
def initialize(model_class, name, query = nil)
|
9
|
+
@model_class = model_class
|
10
|
+
@name = name
|
11
|
+
@query = query
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -3,7 +3,7 @@
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
module Dsl
|
5
5
|
class Model
|
6
|
-
attr_reader :klass, :columns, :row_count, :polymorphic_associations
|
6
|
+
attr_reader :klass, :columns, :row_count, :polymorphic_associations, :belongs_to_associations
|
7
7
|
|
8
8
|
def initialize(klass:, configuration:)
|
9
9
|
@klass = klass
|
@@ -11,6 +11,7 @@ module ActiveRecordDataLoader
|
|
11
11
|
@row_count = configuration.default_row_count
|
12
12
|
@batch_size = configuration.default_batch_size
|
13
13
|
@polymorphic_associations = []
|
14
|
+
@belongs_to_associations = []
|
14
15
|
end
|
15
16
|
|
16
17
|
def count(count)
|
@@ -30,6 +31,10 @@ module ActiveRecordDataLoader
|
|
30
31
|
@klass, assoc_name
|
31
32
|
).tap { |a| block.call(a) }
|
32
33
|
end
|
34
|
+
|
35
|
+
def belongs_to(assoc_name, eligible_set:)
|
36
|
+
@belongs_to_associations << BelongsToAssociation.new(@klass, assoc_name, eligible_set)
|
37
|
+
end
|
33
38
|
end
|
34
39
|
end
|
35
40
|
end
|
@@ -3,16 +3,18 @@
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
module Dsl
|
5
5
|
class PolymorphicAssociation
|
6
|
-
attr_reader :model_class, :name, :models
|
6
|
+
attr_reader :model_class, :name, :models, :queries
|
7
7
|
|
8
8
|
def initialize(model_class, name)
|
9
9
|
@model_class = model_class
|
10
10
|
@name = name
|
11
11
|
@models = {}
|
12
|
+
@queries = {}
|
12
13
|
end
|
13
14
|
|
14
|
-
def model(klass, weight: 1)
|
15
|
+
def model(klass, weight: 1, eligible_set: nil)
|
15
16
|
@models[klass] = weight.to_i
|
17
|
+
@queries[klass] = eligible_set if eligible_set
|
16
18
|
end
|
17
19
|
|
18
20
|
def weighted_models
|
@@ -9,18 +9,20 @@ module ActiveRecordDataLoader
|
|
9
9
|
data_generator:,
|
10
10
|
total_rows:,
|
11
11
|
batch_size:,
|
12
|
-
|
12
|
+
configuration:
|
13
13
|
)
|
14
14
|
new(
|
15
|
-
logger: logger,
|
16
|
-
|
15
|
+
logger: configuration.logger,
|
16
|
+
statement_timeout: configuration.statement_timeout,
|
17
|
+
strategy: strategy_class(configuration.connection_factory).new(data_generator),
|
18
|
+
connection_factory: configuration.connection_factory
|
17
19
|
).load_data(batch_size, total_rows)
|
18
20
|
end
|
19
21
|
|
20
22
|
private
|
21
23
|
|
22
|
-
def strategy_class
|
23
|
-
if
|
24
|
+
def strategy_class(connection_factory)
|
25
|
+
if connection_factory.call.raw_connection.respond_to?(:copy_data)
|
24
26
|
ActiveRecordDataLoader::CopyStrategy
|
25
27
|
else
|
26
28
|
ActiveRecordDataLoader::BulkInsertStrategy
|
@@ -28,15 +30,18 @@ module ActiveRecordDataLoader
|
|
28
30
|
end
|
29
31
|
end
|
30
32
|
|
31
|
-
def initialize(logger:, strategy:)
|
33
|
+
def initialize(logger:, statement_timeout:, strategy:, connection_factory:)
|
32
34
|
@logger = logger
|
33
35
|
@strategy = strategy
|
36
|
+
@statement_timeout = statement_timeout
|
37
|
+
@connection_factory = connection_factory
|
34
38
|
end
|
35
39
|
|
36
40
|
def load_data(batch_size, total_rows)
|
37
41
|
batch_count = (total_rows / batch_size.to_f).ceil
|
38
42
|
|
39
43
|
logger.info(
|
44
|
+
"[ActiveRecordDataLoader] "\
|
40
45
|
"Loading #{total_rows} row(s) into '#{strategy.table_name}' via #{strategy.name}. "\
|
41
46
|
"#{batch_size} row(s) per batch, #{batch_count} batch(es)."
|
42
47
|
)
|
@@ -44,6 +49,7 @@ module ActiveRecordDataLoader
|
|
44
49
|
load_in_batches(batch_size, total_rows, batch_count)
|
45
50
|
end
|
46
51
|
logger.info(
|
52
|
+
"[ActiveRecordDataLoader] "\
|
47
53
|
"Completed loading #{total_rows} row(s) into '#{strategy.table_name}' "\
|
48
54
|
"in #{total_time} seconds."
|
49
55
|
)
|
@@ -51,16 +57,42 @@ module ActiveRecordDataLoader
|
|
51
57
|
|
52
58
|
private
|
53
59
|
|
54
|
-
attr_reader :strategy, :logger
|
60
|
+
attr_reader :strategy, :statement_timeout, :logger, :connection_factory
|
55
61
|
|
56
62
|
def load_in_batches(batch_size, total_rows, batch_count)
|
57
|
-
|
58
|
-
|
63
|
+
with_connection do |connection|
|
64
|
+
total_rows.times.each_slice(batch_size).with_index do |row_numbers, i|
|
65
|
+
time = Benchmark.realtime { strategy.load_batch(row_numbers, connection) }
|
59
66
|
|
60
|
-
|
61
|
-
|
62
|
-
|
67
|
+
logger.debug(
|
68
|
+
"[ActiveRecordDataLoader] "\
|
69
|
+
"Completed batch #{i + 1}/#{batch_count}, #{row_numbers.count} row(s) in #{time} seconds"
|
70
|
+
)
|
71
|
+
end
|
63
72
|
end
|
64
73
|
end
|
74
|
+
|
75
|
+
def with_connection
|
76
|
+
if connection.adapter_name.downcase.to_sym == :postgresql
|
77
|
+
original_timeout = retrieve_statement_timeout
|
78
|
+
update_statement_timeout(statement_timeout)
|
79
|
+
yield connection
|
80
|
+
update_statement_timeout(original_timeout)
|
81
|
+
else
|
82
|
+
yield connection
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def retrieve_statement_timeout
|
87
|
+
connection.execute("SHOW statement_timeout").first["statement_timeout"]
|
88
|
+
end
|
89
|
+
|
90
|
+
def update_statement_timeout(timeout)
|
91
|
+
connection.execute("SET statement_timeout = \"#{timeout}\"")
|
92
|
+
end
|
93
|
+
|
94
|
+
def connection
|
95
|
+
connection_factory.call
|
96
|
+
end
|
65
97
|
end
|
66
98
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: active_record_data_loader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alejandro Beiderman
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-05-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '5.0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '5.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: appraisal
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: mysql2
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: pg
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -100,14 +114,14 @@ dependencies:
|
|
100
114
|
requirements:
|
101
115
|
- - "~>"
|
102
116
|
- !ruby/object:Gem::Version
|
103
|
-
version: '
|
117
|
+
version: '13.0'
|
104
118
|
type: :development
|
105
119
|
prerelease: false
|
106
120
|
version_requirements: !ruby/object:Gem::Requirement
|
107
121
|
requirements:
|
108
122
|
- - "~>"
|
109
123
|
- !ruby/object:Gem::Version
|
110
|
-
version: '
|
124
|
+
version: '13.0'
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: rspec
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -164,6 +178,20 @@ dependencies:
|
|
164
178
|
- - ">="
|
165
179
|
- !ruby/object:Gem::Version
|
166
180
|
version: '0'
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: timecop
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - ">="
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: '0'
|
188
|
+
type: :development
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - ">="
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: '0'
|
167
195
|
description: A utility to bulk load test data for performance testing.
|
168
196
|
email:
|
169
197
|
- abeiderman@gmail.com
|
@@ -178,6 +206,7 @@ files:
|
|
178
206
|
- ".rubocop.yml"
|
179
207
|
- ".travis.yml"
|
180
208
|
- Appraisals
|
209
|
+
- CHANGELOG.md
|
181
210
|
- CODE_OF_CONDUCT.md
|
182
211
|
- Gemfile
|
183
212
|
- Gemfile.lock
|
@@ -189,36 +218,40 @@ files:
|
|
189
218
|
- bin/setup
|
190
219
|
- config/database.yml
|
191
220
|
- config/database.yml.travis
|
221
|
+
- docker-compose.yml
|
192
222
|
- gemfiles/.bundle/config
|
193
223
|
- gemfiles/activerecord_5.gemfile
|
194
224
|
- gemfiles/activerecord_6.gemfile
|
195
225
|
- gemfiles/faker.gemfile
|
196
226
|
- gemfiles/ffaker.gemfile
|
227
|
+
- gemfiles/rails.gemfile
|
197
228
|
- lib/active_record_data_loader.rb
|
198
229
|
- lib/active_record_data_loader/active_record/belongs_to_configuration.rb
|
199
230
|
- lib/active_record_data_loader/active_record/column_configuration.rb
|
231
|
+
- lib/active_record_data_loader/active_record/datetime_value_generator.rb
|
200
232
|
- lib/active_record_data_loader/active_record/enum_value_generator.rb
|
201
233
|
- lib/active_record_data_loader/active_record/integer_value_generator.rb
|
202
234
|
- lib/active_record_data_loader/active_record/model_data_generator.rb
|
235
|
+
- lib/active_record_data_loader/active_record/per_row_value_cache.rb
|
203
236
|
- lib/active_record_data_loader/active_record/polymorphic_belongs_to_configuration.rb
|
204
237
|
- lib/active_record_data_loader/active_record/text_value_generator.rb
|
205
238
|
- lib/active_record_data_loader/bulk_insert_strategy.rb
|
206
239
|
- lib/active_record_data_loader/configuration.rb
|
207
240
|
- lib/active_record_data_loader/copy_strategy.rb
|
208
241
|
- lib/active_record_data_loader/data_faker.rb
|
242
|
+
- lib/active_record_data_loader/dsl/belongs_to_association.rb
|
209
243
|
- lib/active_record_data_loader/dsl/definition.rb
|
210
244
|
- lib/active_record_data_loader/dsl/model.rb
|
211
245
|
- lib/active_record_data_loader/dsl/polymorphic_association.rb
|
212
246
|
- lib/active_record_data_loader/loader.rb
|
213
247
|
- lib/active_record_data_loader/version.rb
|
214
248
|
- log/.keep
|
215
|
-
|
216
|
-
homepage:
|
249
|
+
homepage:
|
217
250
|
licenses:
|
218
251
|
- MIT
|
219
252
|
metadata:
|
220
253
|
source_code_uri: https://github.com/abeiderman/active_record_data_loader
|
221
|
-
post_install_message:
|
254
|
+
post_install_message:
|
222
255
|
rdoc_options: []
|
223
256
|
require_paths:
|
224
257
|
- lib
|
@@ -226,15 +259,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
226
259
|
requirements:
|
227
260
|
- - ">="
|
228
261
|
- !ruby/object:Gem::Version
|
229
|
-
version: 2.
|
262
|
+
version: 2.5.0
|
230
263
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
231
264
|
requirements:
|
232
265
|
- - ">="
|
233
266
|
- !ruby/object:Gem::Version
|
234
267
|
version: '0'
|
235
268
|
requirements: []
|
236
|
-
rubygems_version: 3.
|
237
|
-
signing_key:
|
269
|
+
rubygems_version: 3.1.4
|
270
|
+
signing_key:
|
238
271
|
specification_version: 4
|
239
272
|
summary: A utility to bulk load test data for performance testing.
|
240
273
|
test_files: []
|