active_record_data_loader 0.1.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +33 -7
  3. data/.travis.yml +16 -3
  4. data/Appraisals +2 -2
  5. data/CHANGELOG.md +24 -0
  6. data/Gemfile.lock +56 -46
  7. data/README.md +182 -4
  8. data/Rakefile +2 -0
  9. data/active_record_data_loader.gemspec +5 -3
  10. data/config/database.yml +9 -0
  11. data/config/database.yml.travis +5 -0
  12. data/docker-compose.yml +18 -0
  13. data/gemfiles/activerecord_6.gemfile +1 -1
  14. data/gemfiles/rails.gemfile +7 -0
  15. data/lib/active_record_data_loader.rb +23 -14
  16. data/lib/active_record_data_loader/active_record/belongs_to_configuration.rb +13 -4
  17. data/lib/active_record_data_loader/active_record/column_configuration.rb +14 -4
  18. data/lib/active_record_data_loader/active_record/datetime_value_generator.rb +21 -0
  19. data/lib/active_record_data_loader/active_record/enum_value_generator.rb +25 -3
  20. data/lib/active_record_data_loader/active_record/integer_value_generator.rb +1 -1
  21. data/lib/active_record_data_loader/active_record/model_data_generator.rb +20 -4
  22. data/lib/active_record_data_loader/active_record/per_row_value_cache.rb +33 -0
  23. data/lib/active_record_data_loader/active_record/polymorphic_belongs_to_configuration.rb +9 -1
  24. data/lib/active_record_data_loader/active_record/text_value_generator.rb +1 -1
  25. data/lib/active_record_data_loader/bulk_insert_strategy.rb +1 -6
  26. data/lib/active_record_data_loader/configuration.rb +17 -3
  27. data/lib/active_record_data_loader/copy_strategy.rb +11 -14
  28. data/lib/active_record_data_loader/dsl/belongs_to_association.rb +15 -0
  29. data/lib/active_record_data_loader/dsl/model.rb +6 -1
  30. data/lib/active_record_data_loader/dsl/polymorphic_association.rb +4 -2
  31. data/lib/active_record_data_loader/loader.rb +44 -12
  32. data/lib/active_record_data_loader/version.rb +1 -1
  33. metadata +46 -13
  34. data/script/ci_build.sh +0 -6
@@ -1,17 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "csv"
4
- require "benchmark"
5
-
6
3
  module ActiveRecordDataLoader
7
4
  class BulkInsertStrategy
8
5
  def initialize(data_generator)
9
6
  @data_generator = data_generator
10
7
  end
11
8
 
12
- def load_batch(row_numbers)
13
- connection = ::ActiveRecord::Base.connection
14
-
9
+ def load_batch(row_numbers, connection)
15
10
  connection.insert(<<~SQL)
16
11
  INSERT INTO #{quoted_table_name(connection)} (#{column_list(connection)})
17
12
  VALUES #{values(row_numbers, connection)}
@@ -2,16 +2,30 @@
2
2
 
3
3
  module ActiveRecordDataLoader
4
4
  class Configuration
5
- attr_accessor :default_batch_size, :default_row_count, :logger
5
+ attr_accessor :default_batch_size, :default_row_count, :logger, :statement_timeout, :connection_factory
6
6
 
7
7
  def initialize(
8
8
  default_batch_size: 100_000,
9
9
  default_row_count: 1,
10
- logger: Logger.new(STDOUT, level: :info)
10
+ logger: nil,
11
+ statement_timeout: "2min",
12
+ connection_factory: -> { ::ActiveRecord::Base.connection }
11
13
  )
12
14
  @default_batch_size = default_batch_size
13
15
  @default_row_count = default_row_count
14
- @logger = logger
16
+ @logger = logger || default_logger
17
+ @statement_timeout = statement_timeout
18
+ @connection_factory = connection_factory
19
+ end
20
+
21
+ private
22
+
23
+ def default_logger
24
+ if defined?(Rails) && Rails.respond_to?(:logger)
25
+ Rails.logger
26
+ else
27
+ Logger.new($stdout, level: :info)
28
+ end
15
29
  end
16
30
  end
17
31
  end
@@ -1,19 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "csv"
4
- require "benchmark"
5
-
6
3
  module ActiveRecordDataLoader
7
4
  class CopyStrategy
8
5
  def initialize(data_generator)
9
6
  @data_generator = data_generator
10
7
  end
11
8
 
12
- def load_batch(row_numbers)
13
- csv_data = csv_data_batch(row_numbers)
9
+ def load_batch(row_numbers, connection)
10
+ csv_data = csv_data_batch(row_numbers, connection)
14
11
 
15
- connection = ::ActiveRecord::Base.connection.raw_connection
16
- connection.copy_data(copy_command) { connection.put_copy_data(csv_data) }
12
+ raw_connection = connection.raw_connection
13
+ raw_connection.copy_data(copy_command(connection)) { raw_connection.put_copy_data(csv_data) }
17
14
  end
18
15
 
19
16
  def table_name
@@ -28,18 +25,18 @@ module ActiveRecordDataLoader
28
25
 
29
26
  attr_reader :data_generator
30
27
 
31
- def csv_data_batch(row_numbers)
28
+ def csv_data_batch(row_numbers, connection)
32
29
  row_numbers.map do |i|
33
- data_generator.generate_row(i).map { |d| quote_data(d) }.join(",")
30
+ data_generator.generate_row(i).map { |d| quote_data(d, connection) }.join(",")
34
31
  end.join("\n")
35
32
  end
36
33
 
37
- def copy_command
34
+ def copy_command(connection)
38
35
  @copy_command ||= begin
39
- quoted_table_name = ::ActiveRecord::Base.connection.quote_table_name(data_generator.table)
36
+ quoted_table_name = connection.quote_table_name(data_generator.table)
40
37
  columns = data_generator
41
38
  .column_list
42
- .map { |c| ::ActiveRecord::Base.connection.quote_column_name(c) }
39
+ .map { |c| connection.quote_column_name(c) }
43
40
  .join(", ")
44
41
 
45
42
  <<~SQL
@@ -49,10 +46,10 @@ module ActiveRecordDataLoader
49
46
  end
50
47
  end
51
48
 
52
- def quote_data(data)
49
+ def quote_data(data, connection)
53
50
  return if data.nil?
54
51
 
55
- "\"#{::ActiveRecord::Base.connection.quote_string(data.to_s)}\""
52
+ "\"#{connection.quote_string(data.to_s)}\""
56
53
  end
57
54
  end
58
55
  end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordDataLoader
4
+ module Dsl
5
+ class BelongsToAssociation
6
+ attr_reader :model_class, :name, :query
7
+
8
+ def initialize(model_class, name, query = nil)
9
+ @model_class = model_class
10
+ @name = name
11
+ @query = query
12
+ end
13
+ end
14
+ end
15
+ end
@@ -3,7 +3,7 @@
3
3
  module ActiveRecordDataLoader
4
4
  module Dsl
5
5
  class Model
6
- attr_reader :klass, :columns, :row_count, :polymorphic_associations
6
+ attr_reader :klass, :columns, :row_count, :polymorphic_associations, :belongs_to_associations
7
7
 
8
8
  def initialize(klass:, configuration:)
9
9
  @klass = klass
@@ -11,6 +11,7 @@ module ActiveRecordDataLoader
11
11
  @row_count = configuration.default_row_count
12
12
  @batch_size = configuration.default_batch_size
13
13
  @polymorphic_associations = []
14
+ @belongs_to_associations = []
14
15
  end
15
16
 
16
17
  def count(count)
@@ -30,6 +31,10 @@ module ActiveRecordDataLoader
30
31
  @klass, assoc_name
31
32
  ).tap { |a| block.call(a) }
32
33
  end
34
+
35
+ def belongs_to(assoc_name, eligible_set:)
36
+ @belongs_to_associations << BelongsToAssociation.new(@klass, assoc_name, eligible_set)
37
+ end
33
38
  end
34
39
  end
35
40
  end
@@ -3,16 +3,18 @@
3
3
  module ActiveRecordDataLoader
4
4
  module Dsl
5
5
  class PolymorphicAssociation
6
- attr_reader :model_class, :name, :models
6
+ attr_reader :model_class, :name, :models, :queries
7
7
 
8
8
  def initialize(model_class, name)
9
9
  @model_class = model_class
10
10
  @name = name
11
11
  @models = {}
12
+ @queries = {}
12
13
  end
13
14
 
14
- def model(klass, weight: 1)
15
+ def model(klass, weight: 1, eligible_set: nil)
15
16
  @models[klass] = weight.to_i
17
+ @queries[klass] = eligible_set if eligible_set
16
18
  end
17
19
 
18
20
  def weighted_models
@@ -9,18 +9,20 @@ module ActiveRecordDataLoader
9
9
  data_generator:,
10
10
  total_rows:,
11
11
  batch_size:,
12
- logger:
12
+ configuration:
13
13
  )
14
14
  new(
15
- logger: logger,
16
- strategy: strategy_class.new(data_generator)
15
+ logger: configuration.logger,
16
+ statement_timeout: configuration.statement_timeout,
17
+ strategy: strategy_class(configuration.connection_factory).new(data_generator),
18
+ connection_factory: configuration.connection_factory
17
19
  ).load_data(batch_size, total_rows)
18
20
  end
19
21
 
20
22
  private
21
23
 
22
- def strategy_class
23
- if ::ActiveRecord::Base.connection.raw_connection.respond_to?(:copy_data)
24
+ def strategy_class(connection_factory)
25
+ if connection_factory.call.raw_connection.respond_to?(:copy_data)
24
26
  ActiveRecordDataLoader::CopyStrategy
25
27
  else
26
28
  ActiveRecordDataLoader::BulkInsertStrategy
@@ -28,15 +30,18 @@ module ActiveRecordDataLoader
28
30
  end
29
31
  end
30
32
 
31
- def initialize(logger:, strategy:)
33
+ def initialize(logger:, statement_timeout:, strategy:, connection_factory:)
32
34
  @logger = logger
33
35
  @strategy = strategy
36
+ @statement_timeout = statement_timeout
37
+ @connection_factory = connection_factory
34
38
  end
35
39
 
36
40
  def load_data(batch_size, total_rows)
37
41
  batch_count = (total_rows / batch_size.to_f).ceil
38
42
 
39
43
  logger.info(
44
+ "[ActiveRecordDataLoader] "\
40
45
  "Loading #{total_rows} row(s) into '#{strategy.table_name}' via #{strategy.name}. "\
41
46
  "#{batch_size} row(s) per batch, #{batch_count} batch(es)."
42
47
  )
@@ -44,6 +49,7 @@ module ActiveRecordDataLoader
44
49
  load_in_batches(batch_size, total_rows, batch_count)
45
50
  end
46
51
  logger.info(
52
+ "[ActiveRecordDataLoader] "\
47
53
  "Completed loading #{total_rows} row(s) into '#{strategy.table_name}' "\
48
54
  "in #{total_time} seconds."
49
55
  )
@@ -51,16 +57,42 @@ module ActiveRecordDataLoader
51
57
 
52
58
  private
53
59
 
54
- attr_reader :strategy, :logger
60
+ attr_reader :strategy, :statement_timeout, :logger, :connection_factory
55
61
 
56
62
  def load_in_batches(batch_size, total_rows, batch_count)
57
- total_rows.times.each_slice(batch_size).with_index do |row_numbers, i|
58
- time = Benchmark.realtime { strategy.load_batch(row_numbers) }
63
+ with_connection do |connection|
64
+ total_rows.times.each_slice(batch_size).with_index do |row_numbers, i|
65
+ time = Benchmark.realtime { strategy.load_batch(row_numbers, connection) }
59
66
 
60
- logger.debug(
61
- "Completed batch #{i + 1}/#{batch_count}, #{row_numbers.count} row(s) in #{time} seconds"
62
- )
67
+ logger.debug(
68
+ "[ActiveRecordDataLoader] "\
69
+ "Completed batch #{i + 1}/#{batch_count}, #{row_numbers.count} row(s) in #{time} seconds"
70
+ )
71
+ end
63
72
  end
64
73
  end
74
+
75
+ def with_connection
76
+ if connection.adapter_name.downcase.to_sym == :postgresql
77
+ original_timeout = retrieve_statement_timeout
78
+ update_statement_timeout(statement_timeout)
79
+ yield connection
80
+ update_statement_timeout(original_timeout)
81
+ else
82
+ yield connection
83
+ end
84
+ end
85
+
86
+ def retrieve_statement_timeout
87
+ connection.execute("SHOW statement_timeout").first["statement_timeout"]
88
+ end
89
+
90
+ def update_statement_timeout(timeout)
91
+ connection.execute("SET statement_timeout = \"#{timeout}\"")
92
+ end
93
+
94
+ def connection
95
+ connection_factory.call
96
+ end
65
97
  end
66
98
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ActiveRecordDataLoader
4
- VERSION = "0.1.1"
4
+ VERSION = "1.1.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: active_record_data_loader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alejandro Beiderman
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-06-10 00:00:00.000000000 Z
11
+ date: 2021-05-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '4.0'
19
+ version: '5.0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '4.0'
26
+ version: '5.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: appraisal
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: mysql2
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: pg
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -100,14 +114,14 @@ dependencies:
100
114
  requirements:
101
115
  - - "~>"
102
116
  - !ruby/object:Gem::Version
103
- version: '12.0'
117
+ version: '13.0'
104
118
  type: :development
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
107
121
  requirements:
108
122
  - - "~>"
109
123
  - !ruby/object:Gem::Version
110
- version: '12.0'
124
+ version: '13.0'
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: rspec
113
127
  requirement: !ruby/object:Gem::Requirement
@@ -164,6 +178,20 @@ dependencies:
164
178
  - - ">="
165
179
  - !ruby/object:Gem::Version
166
180
  version: '0'
181
+ - !ruby/object:Gem::Dependency
182
+ name: timecop
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - ">="
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - ">="
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
167
195
  description: A utility to bulk load test data for performance testing.
168
196
  email:
169
197
  - abeiderman@gmail.com
@@ -178,6 +206,7 @@ files:
178
206
  - ".rubocop.yml"
179
207
  - ".travis.yml"
180
208
  - Appraisals
209
+ - CHANGELOG.md
181
210
  - CODE_OF_CONDUCT.md
182
211
  - Gemfile
183
212
  - Gemfile.lock
@@ -189,36 +218,40 @@ files:
189
218
  - bin/setup
190
219
  - config/database.yml
191
220
  - config/database.yml.travis
221
+ - docker-compose.yml
192
222
  - gemfiles/.bundle/config
193
223
  - gemfiles/activerecord_5.gemfile
194
224
  - gemfiles/activerecord_6.gemfile
195
225
  - gemfiles/faker.gemfile
196
226
  - gemfiles/ffaker.gemfile
227
+ - gemfiles/rails.gemfile
197
228
  - lib/active_record_data_loader.rb
198
229
  - lib/active_record_data_loader/active_record/belongs_to_configuration.rb
199
230
  - lib/active_record_data_loader/active_record/column_configuration.rb
231
+ - lib/active_record_data_loader/active_record/datetime_value_generator.rb
200
232
  - lib/active_record_data_loader/active_record/enum_value_generator.rb
201
233
  - lib/active_record_data_loader/active_record/integer_value_generator.rb
202
234
  - lib/active_record_data_loader/active_record/model_data_generator.rb
235
+ - lib/active_record_data_loader/active_record/per_row_value_cache.rb
203
236
  - lib/active_record_data_loader/active_record/polymorphic_belongs_to_configuration.rb
204
237
  - lib/active_record_data_loader/active_record/text_value_generator.rb
205
238
  - lib/active_record_data_loader/bulk_insert_strategy.rb
206
239
  - lib/active_record_data_loader/configuration.rb
207
240
  - lib/active_record_data_loader/copy_strategy.rb
208
241
  - lib/active_record_data_loader/data_faker.rb
242
+ - lib/active_record_data_loader/dsl/belongs_to_association.rb
209
243
  - lib/active_record_data_loader/dsl/definition.rb
210
244
  - lib/active_record_data_loader/dsl/model.rb
211
245
  - lib/active_record_data_loader/dsl/polymorphic_association.rb
212
246
  - lib/active_record_data_loader/loader.rb
213
247
  - lib/active_record_data_loader/version.rb
214
248
  - log/.keep
215
- - script/ci_build.sh
216
- homepage:
249
+ homepage:
217
250
  licenses:
218
251
  - MIT
219
252
  metadata:
220
253
  source_code_uri: https://github.com/abeiderman/active_record_data_loader
221
- post_install_message:
254
+ post_install_message:
222
255
  rdoc_options: []
223
256
  require_paths:
224
257
  - lib
@@ -226,15 +259,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
226
259
  requirements:
227
260
  - - ">="
228
261
  - !ruby/object:Gem::Version
229
- version: 2.3.0
262
+ version: 2.5.0
230
263
  required_rubygems_version: !ruby/object:Gem::Requirement
231
264
  requirements:
232
265
  - - ">="
233
266
  - !ruby/object:Gem::Version
234
267
  version: '0'
235
268
  requirements: []
236
- rubygems_version: 3.0.3
237
- signing_key:
269
+ rubygems_version: 3.1.4
270
+ signing_key:
238
271
  specification_version: 4
239
272
  summary: A utility to bulk load test data for performance testing.
240
273
  test_files: []