active_record_data_loader 0.1.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +33 -7
  3. data/.travis.yml +16 -3
  4. data/Appraisals +2 -2
  5. data/CHANGELOG.md +24 -0
  6. data/Gemfile.lock +56 -46
  7. data/README.md +182 -4
  8. data/Rakefile +2 -0
  9. data/active_record_data_loader.gemspec +5 -3
  10. data/config/database.yml +9 -0
  11. data/config/database.yml.travis +5 -0
  12. data/docker-compose.yml +18 -0
  13. data/gemfiles/activerecord_6.gemfile +1 -1
  14. data/gemfiles/rails.gemfile +7 -0
  15. data/lib/active_record_data_loader.rb +23 -14
  16. data/lib/active_record_data_loader/active_record/belongs_to_configuration.rb +13 -4
  17. data/lib/active_record_data_loader/active_record/column_configuration.rb +14 -4
  18. data/lib/active_record_data_loader/active_record/datetime_value_generator.rb +21 -0
  19. data/lib/active_record_data_loader/active_record/enum_value_generator.rb +25 -3
  20. data/lib/active_record_data_loader/active_record/integer_value_generator.rb +1 -1
  21. data/lib/active_record_data_loader/active_record/model_data_generator.rb +20 -4
  22. data/lib/active_record_data_loader/active_record/per_row_value_cache.rb +33 -0
  23. data/lib/active_record_data_loader/active_record/polymorphic_belongs_to_configuration.rb +9 -1
  24. data/lib/active_record_data_loader/active_record/text_value_generator.rb +1 -1
  25. data/lib/active_record_data_loader/bulk_insert_strategy.rb +1 -6
  26. data/lib/active_record_data_loader/configuration.rb +17 -3
  27. data/lib/active_record_data_loader/copy_strategy.rb +11 -14
  28. data/lib/active_record_data_loader/dsl/belongs_to_association.rb +15 -0
  29. data/lib/active_record_data_loader/dsl/model.rb +6 -1
  30. data/lib/active_record_data_loader/dsl/polymorphic_association.rb +4 -2
  31. data/lib/active_record_data_loader/loader.rb +44 -12
  32. data/lib/active_record_data_loader/version.rb +1 -1
  33. metadata +46 -13
  34. data/script/ci_build.sh +0 -6
@@ -1,17 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "csv"
4
- require "benchmark"
5
-
6
3
  module ActiveRecordDataLoader
7
4
  class BulkInsertStrategy
8
5
  def initialize(data_generator)
9
6
  @data_generator = data_generator
10
7
  end
11
8
 
12
- def load_batch(row_numbers)
13
- connection = ::ActiveRecord::Base.connection
14
-
9
+ def load_batch(row_numbers, connection)
15
10
  connection.insert(<<~SQL)
16
11
  INSERT INTO #{quoted_table_name(connection)} (#{column_list(connection)})
17
12
  VALUES #{values(row_numbers, connection)}
@@ -2,16 +2,30 @@
2
2
 
3
3
  module ActiveRecordDataLoader
4
4
  class Configuration
5
- attr_accessor :default_batch_size, :default_row_count, :logger
5
+ attr_accessor :default_batch_size, :default_row_count, :logger, :statement_timeout, :connection_factory
6
6
 
7
7
  def initialize(
8
8
  default_batch_size: 100_000,
9
9
  default_row_count: 1,
10
- logger: Logger.new(STDOUT, level: :info)
10
+ logger: nil,
11
+ statement_timeout: "2min",
12
+ connection_factory: -> { ::ActiveRecord::Base.connection }
11
13
  )
12
14
  @default_batch_size = default_batch_size
13
15
  @default_row_count = default_row_count
14
- @logger = logger
16
+ @logger = logger || default_logger
17
+ @statement_timeout = statement_timeout
18
+ @connection_factory = connection_factory
19
+ end
20
+
21
+ private
22
+
23
+ def default_logger
24
+ if defined?(Rails) && Rails.respond_to?(:logger)
25
+ Rails.logger
26
+ else
27
+ Logger.new($stdout, level: :info)
28
+ end
15
29
  end
16
30
  end
17
31
  end
@@ -1,19 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "csv"
4
- require "benchmark"
5
-
6
3
  module ActiveRecordDataLoader
7
4
  class CopyStrategy
8
5
  def initialize(data_generator)
9
6
  @data_generator = data_generator
10
7
  end
11
8
 
12
- def load_batch(row_numbers)
13
- csv_data = csv_data_batch(row_numbers)
9
+ def load_batch(row_numbers, connection)
10
+ csv_data = csv_data_batch(row_numbers, connection)
14
11
 
15
- connection = ::ActiveRecord::Base.connection.raw_connection
16
- connection.copy_data(copy_command) { connection.put_copy_data(csv_data) }
12
+ raw_connection = connection.raw_connection
13
+ raw_connection.copy_data(copy_command(connection)) { raw_connection.put_copy_data(csv_data) }
17
14
  end
18
15
 
19
16
  def table_name
@@ -28,18 +25,18 @@ module ActiveRecordDataLoader
28
25
 
29
26
  attr_reader :data_generator
30
27
 
31
- def csv_data_batch(row_numbers)
28
+ def csv_data_batch(row_numbers, connection)
32
29
  row_numbers.map do |i|
33
- data_generator.generate_row(i).map { |d| quote_data(d) }.join(",")
30
+ data_generator.generate_row(i).map { |d| quote_data(d, connection) }.join(",")
34
31
  end.join("\n")
35
32
  end
36
33
 
37
- def copy_command
34
+ def copy_command(connection)
38
35
  @copy_command ||= begin
39
- quoted_table_name = ::ActiveRecord::Base.connection.quote_table_name(data_generator.table)
36
+ quoted_table_name = connection.quote_table_name(data_generator.table)
40
37
  columns = data_generator
41
38
  .column_list
42
- .map { |c| ::ActiveRecord::Base.connection.quote_column_name(c) }
39
+ .map { |c| connection.quote_column_name(c) }
43
40
  .join(", ")
44
41
 
45
42
  <<~SQL
@@ -49,10 +46,10 @@ module ActiveRecordDataLoader
49
46
  end
50
47
  end
51
48
 
52
- def quote_data(data)
49
+ def quote_data(data, connection)
53
50
  return if data.nil?
54
51
 
55
- "\"#{::ActiveRecord::Base.connection.quote_string(data.to_s)}\""
52
+ "\"#{connection.quote_string(data.to_s)}\""
56
53
  end
57
54
  end
58
55
  end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordDataLoader
4
+ module Dsl
5
+ class BelongsToAssociation
6
+ attr_reader :model_class, :name, :query
7
+
8
+ def initialize(model_class, name, query = nil)
9
+ @model_class = model_class
10
+ @name = name
11
+ @query = query
12
+ end
13
+ end
14
+ end
15
+ end
@@ -3,7 +3,7 @@
3
3
  module ActiveRecordDataLoader
4
4
  module Dsl
5
5
  class Model
6
- attr_reader :klass, :columns, :row_count, :polymorphic_associations
6
+ attr_reader :klass, :columns, :row_count, :polymorphic_associations, :belongs_to_associations
7
7
 
8
8
  def initialize(klass:, configuration:)
9
9
  @klass = klass
@@ -11,6 +11,7 @@ module ActiveRecordDataLoader
11
11
  @row_count = configuration.default_row_count
12
12
  @batch_size = configuration.default_batch_size
13
13
  @polymorphic_associations = []
14
+ @belongs_to_associations = []
14
15
  end
15
16
 
16
17
  def count(count)
@@ -30,6 +31,10 @@ module ActiveRecordDataLoader
30
31
  @klass, assoc_name
31
32
  ).tap { |a| block.call(a) }
32
33
  end
34
+
35
+ def belongs_to(assoc_name, eligible_set:)
36
+ @belongs_to_associations << BelongsToAssociation.new(@klass, assoc_name, eligible_set)
37
+ end
33
38
  end
34
39
  end
35
40
  end
@@ -3,16 +3,18 @@
3
3
  module ActiveRecordDataLoader
4
4
  module Dsl
5
5
  class PolymorphicAssociation
6
- attr_reader :model_class, :name, :models
6
+ attr_reader :model_class, :name, :models, :queries
7
7
 
8
8
  def initialize(model_class, name)
9
9
  @model_class = model_class
10
10
  @name = name
11
11
  @models = {}
12
+ @queries = {}
12
13
  end
13
14
 
14
- def model(klass, weight: 1)
15
+ def model(klass, weight: 1, eligible_set: nil)
15
16
  @models[klass] = weight.to_i
17
+ @queries[klass] = eligible_set if eligible_set
16
18
  end
17
19
 
18
20
  def weighted_models
@@ -9,18 +9,20 @@ module ActiveRecordDataLoader
9
9
  data_generator:,
10
10
  total_rows:,
11
11
  batch_size:,
12
- logger:
12
+ configuration:
13
13
  )
14
14
  new(
15
- logger: logger,
16
- strategy: strategy_class.new(data_generator)
15
+ logger: configuration.logger,
16
+ statement_timeout: configuration.statement_timeout,
17
+ strategy: strategy_class(configuration.connection_factory).new(data_generator),
18
+ connection_factory: configuration.connection_factory
17
19
  ).load_data(batch_size, total_rows)
18
20
  end
19
21
 
20
22
  private
21
23
 
22
- def strategy_class
23
- if ::ActiveRecord::Base.connection.raw_connection.respond_to?(:copy_data)
24
+ def strategy_class(connection_factory)
25
+ if connection_factory.call.raw_connection.respond_to?(:copy_data)
24
26
  ActiveRecordDataLoader::CopyStrategy
25
27
  else
26
28
  ActiveRecordDataLoader::BulkInsertStrategy
@@ -28,15 +30,18 @@ module ActiveRecordDataLoader
28
30
  end
29
31
  end
30
32
 
31
- def initialize(logger:, strategy:)
33
+ def initialize(logger:, statement_timeout:, strategy:, connection_factory:)
32
34
  @logger = logger
33
35
  @strategy = strategy
36
+ @statement_timeout = statement_timeout
37
+ @connection_factory = connection_factory
34
38
  end
35
39
 
36
40
  def load_data(batch_size, total_rows)
37
41
  batch_count = (total_rows / batch_size.to_f).ceil
38
42
 
39
43
  logger.info(
44
+ "[ActiveRecordDataLoader] "\
40
45
  "Loading #{total_rows} row(s) into '#{strategy.table_name}' via #{strategy.name}. "\
41
46
  "#{batch_size} row(s) per batch, #{batch_count} batch(es)."
42
47
  )
@@ -44,6 +49,7 @@ module ActiveRecordDataLoader
44
49
  load_in_batches(batch_size, total_rows, batch_count)
45
50
  end
46
51
  logger.info(
52
+ "[ActiveRecordDataLoader] "\
47
53
  "Completed loading #{total_rows} row(s) into '#{strategy.table_name}' "\
48
54
  "in #{total_time} seconds."
49
55
  )
@@ -51,16 +57,42 @@ module ActiveRecordDataLoader
51
57
 
52
58
  private
53
59
 
54
- attr_reader :strategy, :logger
60
+ attr_reader :strategy, :statement_timeout, :logger, :connection_factory
55
61
 
56
62
  def load_in_batches(batch_size, total_rows, batch_count)
57
- total_rows.times.each_slice(batch_size).with_index do |row_numbers, i|
58
- time = Benchmark.realtime { strategy.load_batch(row_numbers) }
63
+ with_connection do |connection|
64
+ total_rows.times.each_slice(batch_size).with_index do |row_numbers, i|
65
+ time = Benchmark.realtime { strategy.load_batch(row_numbers, connection) }
59
66
 
60
- logger.debug(
61
- "Completed batch #{i + 1}/#{batch_count}, #{row_numbers.count} row(s) in #{time} seconds"
62
- )
67
+ logger.debug(
68
+ "[ActiveRecordDataLoader] "\
69
+ "Completed batch #{i + 1}/#{batch_count}, #{row_numbers.count} row(s) in #{time} seconds"
70
+ )
71
+ end
63
72
  end
64
73
  end
74
+
75
+ def with_connection
76
+ if connection.adapter_name.downcase.to_sym == :postgresql
77
+ original_timeout = retrieve_statement_timeout
78
+ update_statement_timeout(statement_timeout)
79
+ yield connection
80
+ update_statement_timeout(original_timeout)
81
+ else
82
+ yield connection
83
+ end
84
+ end
85
+
86
+ def retrieve_statement_timeout
87
+ connection.execute("SHOW statement_timeout").first["statement_timeout"]
88
+ end
89
+
90
+ def update_statement_timeout(timeout)
91
+ connection.execute("SET statement_timeout = \"#{timeout}\"")
92
+ end
93
+
94
+ def connection
95
+ connection_factory.call
96
+ end
65
97
  end
66
98
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ActiveRecordDataLoader
4
- VERSION = "0.1.1"
4
+ VERSION = "1.1.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: active_record_data_loader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alejandro Beiderman
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-06-10 00:00:00.000000000 Z
11
+ date: 2021-05-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '4.0'
19
+ version: '5.0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '4.0'
26
+ version: '5.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: appraisal
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: mysql2
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: pg
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -100,14 +114,14 @@ dependencies:
100
114
  requirements:
101
115
  - - "~>"
102
116
  - !ruby/object:Gem::Version
103
- version: '12.0'
117
+ version: '13.0'
104
118
  type: :development
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
107
121
  requirements:
108
122
  - - "~>"
109
123
  - !ruby/object:Gem::Version
110
- version: '12.0'
124
+ version: '13.0'
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: rspec
113
127
  requirement: !ruby/object:Gem::Requirement
@@ -164,6 +178,20 @@ dependencies:
164
178
  - - ">="
165
179
  - !ruby/object:Gem::Version
166
180
  version: '0'
181
+ - !ruby/object:Gem::Dependency
182
+ name: timecop
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - ">="
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - ">="
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
167
195
  description: A utility to bulk load test data for performance testing.
168
196
  email:
169
197
  - abeiderman@gmail.com
@@ -178,6 +206,7 @@ files:
178
206
  - ".rubocop.yml"
179
207
  - ".travis.yml"
180
208
  - Appraisals
209
+ - CHANGELOG.md
181
210
  - CODE_OF_CONDUCT.md
182
211
  - Gemfile
183
212
  - Gemfile.lock
@@ -189,36 +218,40 @@ files:
189
218
  - bin/setup
190
219
  - config/database.yml
191
220
  - config/database.yml.travis
221
+ - docker-compose.yml
192
222
  - gemfiles/.bundle/config
193
223
  - gemfiles/activerecord_5.gemfile
194
224
  - gemfiles/activerecord_6.gemfile
195
225
  - gemfiles/faker.gemfile
196
226
  - gemfiles/ffaker.gemfile
227
+ - gemfiles/rails.gemfile
197
228
  - lib/active_record_data_loader.rb
198
229
  - lib/active_record_data_loader/active_record/belongs_to_configuration.rb
199
230
  - lib/active_record_data_loader/active_record/column_configuration.rb
231
+ - lib/active_record_data_loader/active_record/datetime_value_generator.rb
200
232
  - lib/active_record_data_loader/active_record/enum_value_generator.rb
201
233
  - lib/active_record_data_loader/active_record/integer_value_generator.rb
202
234
  - lib/active_record_data_loader/active_record/model_data_generator.rb
235
+ - lib/active_record_data_loader/active_record/per_row_value_cache.rb
203
236
  - lib/active_record_data_loader/active_record/polymorphic_belongs_to_configuration.rb
204
237
  - lib/active_record_data_loader/active_record/text_value_generator.rb
205
238
  - lib/active_record_data_loader/bulk_insert_strategy.rb
206
239
  - lib/active_record_data_loader/configuration.rb
207
240
  - lib/active_record_data_loader/copy_strategy.rb
208
241
  - lib/active_record_data_loader/data_faker.rb
242
+ - lib/active_record_data_loader/dsl/belongs_to_association.rb
209
243
  - lib/active_record_data_loader/dsl/definition.rb
210
244
  - lib/active_record_data_loader/dsl/model.rb
211
245
  - lib/active_record_data_loader/dsl/polymorphic_association.rb
212
246
  - lib/active_record_data_loader/loader.rb
213
247
  - lib/active_record_data_loader/version.rb
214
248
  - log/.keep
215
- - script/ci_build.sh
216
- homepage:
249
+ homepage:
217
250
  licenses:
218
251
  - MIT
219
252
  metadata:
220
253
  source_code_uri: https://github.com/abeiderman/active_record_data_loader
221
- post_install_message:
254
+ post_install_message:
222
255
  rdoc_options: []
223
256
  require_paths:
224
257
  - lib
@@ -226,15 +259,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
226
259
  requirements:
227
260
  - - ">="
228
261
  - !ruby/object:Gem::Version
229
- version: 2.3.0
262
+ version: 2.5.0
230
263
  required_rubygems_version: !ruby/object:Gem::Requirement
231
264
  requirements:
232
265
  - - ">="
233
266
  - !ruby/object:Gem::Version
234
267
  version: '0'
235
268
  requirements: []
236
- rubygems_version: 3.0.3
237
- signing_key:
269
+ rubygems_version: 3.1.4
270
+ signing_key:
238
271
  specification_version: 4
239
272
  summary: A utility to bulk load test data for performance testing.
240
273
  test_files: []