active_record_data_loader 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +13 -0
  3. data/.rspec +3 -0
  4. data/.rubocop.yml +38 -0
  5. data/.travis.yml +11 -0
  6. data/Appraisals +17 -0
  7. data/CODE_OF_CONDUCT.md +74 -0
  8. data/Gemfile +8 -0
  9. data/Gemfile.lock +107 -0
  10. data/LICENSE.txt +21 -0
  11. data/README.md +49 -0
  12. data/Rakefile +10 -0
  13. data/active_record_data_loader.gemspec +47 -0
  14. data/bin/console +15 -0
  15. data/bin/setup +8 -0
  16. data/config/database.yml +10 -0
  17. data/config/database.yml.travis +7 -0
  18. data/gemfiles/.bundle/config +2 -0
  19. data/gemfiles/activerecord_5.gemfile +7 -0
  20. data/gemfiles/activerecord_6.gemfile +7 -0
  21. data/gemfiles/faker.gemfile +7 -0
  22. data/gemfiles/ffaker.gemfile +7 -0
  23. data/lib/active_record_data_loader.rb +64 -0
  24. data/lib/active_record_data_loader/active_record/belongs_to_configuration.rb +27 -0
  25. data/lib/active_record_data_loader/active_record/column_configuration.rb +43 -0
  26. data/lib/active_record_data_loader/active_record/enum_value_generator.rb +25 -0
  27. data/lib/active_record_data_loader/active_record/integer_value_generator.rb +21 -0
  28. data/lib/active_record_data_loader/active_record/model_data_generator.rb +72 -0
  29. data/lib/active_record_data_loader/active_record/polymorphic_belongs_to_configuration.rb +49 -0
  30. data/lib/active_record_data_loader/active_record/text_value_generator.rb +72 -0
  31. data/lib/active_record_data_loader/bulk_insert_strategy.rb +57 -0
  32. data/lib/active_record_data_loader/configuration.rb +17 -0
  33. data/lib/active_record_data_loader/copy_strategy.rb +58 -0
  34. data/lib/active_record_data_loader/data_faker.rb +85 -0
  35. data/lib/active_record_data_loader/dsl/definition.rb +25 -0
  36. data/lib/active_record_data_loader/dsl/model.rb +35 -0
  37. data/lib/active_record_data_loader/dsl/polymorphic_association.rb +25 -0
  38. data/lib/active_record_data_loader/loader.rb +66 -0
  39. data/lib/active_record_data_loader/version.rb +5 -0
  40. data/script/ci_build.sh +6 -0
  41. metadata +240 -0
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+ require "benchmark"
5
+
6
+ module ActiveRecordDataLoader
7
+ class BulkInsertStrategy
8
+ def initialize(data_generator)
9
+ @data_generator = data_generator
10
+ end
11
+
12
+ def load_batch(row_numbers)
13
+ connection = ::ActiveRecord::Base.connection
14
+
15
+ connection.insert(<<~SQL)
16
+ INSERT INTO #{quoted_table_name(connection)} (#{column_list(connection)})
17
+ VALUES #{values(row_numbers, connection)}
18
+ SQL
19
+ end
20
+
21
+ def table_name
22
+ data_generator.table
23
+ end
24
+
25
+ def name
26
+ "BULK INSERT"
27
+ end
28
+
29
+ private
30
+
31
+ attr_reader :data_generator
32
+
33
+ def quoted_table_name(connection)
34
+ @quoted_table_name ||= connection.quote_table_name(data_generator.table)
35
+ end
36
+
37
+ def column_list(connection)
38
+ @column_list ||= data_generator
39
+ .column_list
40
+ .map { |c| connection.quote_column_name(c) }
41
+ .join(",")
42
+ end
43
+
44
+ def values(row_numbers, connection)
45
+ row_numbers
46
+ .map { |i| "(#{row_values(i, connection)})" }
47
+ .join(",")
48
+ end
49
+
50
+ def row_values(row_number, connection)
51
+ data_generator
52
+ .generate_row(row_number)
53
+ .map { |v| connection.quote(v) }
54
+ .join(",")
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordDataLoader
4
+ class Configuration
5
+ attr_accessor :default_batch_size, :default_row_count, :logger
6
+
7
+ def initialize(
8
+ default_batch_size: 100_000,
9
+ default_row_count: 1,
10
+ logger: Logger.new(STDOUT, level: :info)
11
+ )
12
+ @default_batch_size = default_batch_size
13
+ @default_row_count = default_row_count
14
+ @logger = logger
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+ require "benchmark"
5
+
6
+ module ActiveRecordDataLoader
7
+ class CopyStrategy
8
+ def initialize(data_generator)
9
+ @data_generator = data_generator
10
+ end
11
+
12
+ def load_batch(row_numbers)
13
+ csv_data = csv_data_batch(row_numbers)
14
+
15
+ connection = ::ActiveRecord::Base.connection.raw_connection
16
+ connection.copy_data(copy_command) { connection.put_copy_data(csv_data) }
17
+ end
18
+
19
+ def table_name
20
+ data_generator.table
21
+ end
22
+
23
+ def name
24
+ "COPY"
25
+ end
26
+
27
+ private
28
+
29
+ attr_reader :data_generator
30
+
31
+ def csv_data_batch(row_numbers)
32
+ row_numbers.map do |i|
33
+ data_generator.generate_row(i).map { |d| quote_data(d) }.join(",")
34
+ end.join("\n")
35
+ end
36
+
37
+ def copy_command
38
+ @copy_command ||= begin
39
+ quoted_table_name = ::ActiveRecord::Base.connection.quote_table_name(data_generator.table)
40
+ columns = data_generator
41
+ .column_list
42
+ .map { |c| ::ActiveRecord::Base.connection.quote_column_name(c) }
43
+ .join(", ")
44
+
45
+ <<~SQL
46
+ COPY #{quoted_table_name} (#{columns})
47
+ FROM STDIN WITH (FORMAT CSV)
48
+ SQL
49
+ end
50
+ end
51
+
52
+ def quote_data(data)
53
+ return if data.nil?
54
+
55
+ "\"#{::ActiveRecord::Base.connection.quote_string(data.to_s)}\""
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "forwardable"
4
+
5
+ module ActiveRecordDataLoader
6
+ class DataFaker
7
+ class << self
8
+ extend Forwardable
9
+
10
+ def_delegators :adapter, :person_name, :first_name, :middle_name, :last_name, :company_name
11
+
12
+ private
13
+
14
+ def adapter
15
+ @adapter ||=
16
+ if Gem.loaded_specs.key?("ffaker")
17
+ require "ffaker"
18
+ FFakerGemAdapter.new
19
+ elsif Gem.loaded_specs.key?("faker")
20
+ require "faker"
21
+ FakerGemAdapter.new
22
+ else
23
+ NoGemAdapter.new
24
+ end
25
+ end
26
+ end
27
+
28
+ class FFakerGemAdapter
29
+ extend Forwardable
30
+
31
+ def_delegators :ffaker_name, :first_name, :last_name
32
+ def_delegator :ffaker_name, :name, :person_name
33
+ def_delegator :ffaker_name, :first_name, :middle_name
34
+
35
+ def company_name
36
+ FFaker::Company.name
37
+ end
38
+
39
+ def ffaker_name
40
+ FFaker::Name
41
+ end
42
+ end
43
+
44
+ class FakerGemAdapter
45
+ extend Forwardable
46
+
47
+ def_delegators :faker_name, :first_name, :middle_name, :last_name
48
+ def_delegator :faker_name, :name, :person_name
49
+
50
+ def company_name
51
+ Faker::Company.name
52
+ end
53
+
54
+ def faker_name
55
+ Faker::Name
56
+ end
57
+ end
58
+
59
+ class NoGemAdapter
60
+ FIRST_NAMES = %w[John Mary].freeze
61
+ MIDDLE_NAMES = %w[Madison Ashley].freeze
62
+ LAST_NAMES = %w[Doe Smith].freeze
63
+
64
+ def first_name
65
+ FIRST_NAMES.sample
66
+ end
67
+
68
+ def middle_name
69
+ MIDDLE_NAMES.sample
70
+ end
71
+
72
+ def last_name
73
+ LAST_NAMES.sample
74
+ end
75
+
76
+ def person_name
77
+ "#{first_name} #{middle_name} #{last_name}"
78
+ end
79
+
80
+ def company_name
81
+ "Acme"
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordDataLoader
4
+ module Dsl
5
+ class Definition
6
+ attr_reader :models
7
+
8
+ def initialize(config = ActiveRecordDataLoader.configuration)
9
+ @models = []
10
+ @config = config
11
+ end
12
+
13
+ def model(klass, &block)
14
+ t = Model.new(klass: klass, configuration: config)
15
+ block&.call(t)
16
+ models << t
17
+ t
18
+ end
19
+
20
+ private
21
+
22
+ attr_reader :config
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordDataLoader
4
+ module Dsl
5
+ class Model
6
+ attr_reader :klass, :columns, :row_count, :polymorphic_associations
7
+
8
+ def initialize(klass:, configuration:)
9
+ @klass = klass
10
+ @columns = {}
11
+ @row_count = configuration.default_row_count
12
+ @batch_size = configuration.default_batch_size
13
+ @polymorphic_associations = []
14
+ end
15
+
16
+ def count(count)
17
+ @row_count = count
18
+ end
19
+
20
+ def batch_size(size = nil)
21
+ @batch_size = (size || @batch_size)
22
+ end
23
+
24
+ def column(name, func)
25
+ @columns[name.to_sym] = func
26
+ end
27
+
28
+ def polymorphic(assoc_name, &block)
29
+ @polymorphic_associations << PolymorphicAssociation.new(
30
+ @klass, assoc_name
31
+ ).tap { |a| block.call(a) }
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordDataLoader
4
+ module Dsl
5
+ class PolymorphicAssociation
6
+ attr_reader :model_class, :name, :models
7
+
8
+ def initialize(model_class, name)
9
+ @model_class = model_class
10
+ @name = name
11
+ @models = {}
12
+ end
13
+
14
+ def model(klass, weight: 1)
15
+ @models[klass] = weight.to_i
16
+ end
17
+
18
+ def weighted_models
19
+ gcd = models.values.reduce(:gcd)
20
+
21
+ models.map { |m, w| [m] * (w / gcd) }.flatten
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "benchmark"
4
+
5
+ module ActiveRecordDataLoader
6
+ class Loader
7
+ class << self
8
+ def load_data(
9
+ data_generator:,
10
+ total_rows:,
11
+ batch_size:,
12
+ logger:
13
+ )
14
+ new(
15
+ logger: logger,
16
+ strategy: strategy_class.new(data_generator)
17
+ ).load_data(batch_size, total_rows)
18
+ end
19
+
20
+ private
21
+
22
+ def strategy_class
23
+ if ::ActiveRecord::Base.connection.raw_connection.respond_to?(:copy_data)
24
+ ActiveRecordDataLoader::CopyStrategy
25
+ else
26
+ ActiveRecordDataLoader::BulkInsertStrategy
27
+ end
28
+ end
29
+ end
30
+
31
+ def initialize(logger:, strategy:)
32
+ @logger = logger
33
+ @strategy = strategy
34
+ end
35
+
36
+ def load_data(batch_size, total_rows)
37
+ batch_count = (total_rows / batch_size.to_f).ceil
38
+
39
+ logger.info(
40
+ "Loading #{total_rows} row(s) into '#{strategy.table_name}' via #{strategy.name}. "\
41
+ "#{batch_size} row(s) per batch, #{batch_count} batch(es)."
42
+ )
43
+ total_time = Benchmark.realtime do
44
+ load_in_batches(batch_size, total_rows, batch_count)
45
+ end
46
+ logger.info(
47
+ "Completed loading #{total_rows} row(s) into '#{strategy.table_name}' "\
48
+ "in #{total_time} seconds."
49
+ )
50
+ end
51
+
52
+ private
53
+
54
+ attr_reader :strategy, :logger
55
+
56
+ def load_in_batches(batch_size, total_rows, batch_count)
57
+ total_rows.times.each_slice(batch_size).with_index do |row_numbers, i|
58
+ time = Benchmark.realtime { strategy.load_batch(row_numbers) }
59
+
60
+ logger.debug(
61
+ "Completed batch #{i + 1}/#{batch_count}, #{row_numbers.count} row(s) in #{time} seconds"
62
+ )
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordDataLoader
4
+ VERSION = "0.1.1"
5
+ end
@@ -0,0 +1,6 @@
1
+ #! /bin/bash
2
+ bundle exec appraisal
3
+ bundle exec appraisal activerecord-5 rake
4
+ bundle exec appraisal faker rake
5
+ bundle exec appraisal ffaker rake
6
+ bundle exec rake
metadata ADDED
@@ -0,0 +1,240 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: active_record_data_loader
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Alejandro Beiderman
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-06-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activerecord
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '4.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '4.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: appraisal
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '1.16'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '1.16'
55
+ - !ruby/object:Gem::Dependency
56
+ name: coveralls
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pg
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: pry
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rake
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '12.0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '12.0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: rspec
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '3.0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '3.0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: rspec-collection_matchers
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: rubocop
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: sqlite3
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
167
+ description: A utility to bulk load test data for performance testing.
168
+ email:
169
+ - abeiderman@gmail.com
170
+ executables:
171
+ - console
172
+ - setup
173
+ extensions: []
174
+ extra_rdoc_files: []
175
+ files:
176
+ - ".gitignore"
177
+ - ".rspec"
178
+ - ".rubocop.yml"
179
+ - ".travis.yml"
180
+ - Appraisals
181
+ - CODE_OF_CONDUCT.md
182
+ - Gemfile
183
+ - Gemfile.lock
184
+ - LICENSE.txt
185
+ - README.md
186
+ - Rakefile
187
+ - active_record_data_loader.gemspec
188
+ - bin/console
189
+ - bin/setup
190
+ - config/database.yml
191
+ - config/database.yml.travis
192
+ - gemfiles/.bundle/config
193
+ - gemfiles/activerecord_5.gemfile
194
+ - gemfiles/activerecord_6.gemfile
195
+ - gemfiles/faker.gemfile
196
+ - gemfiles/ffaker.gemfile
197
+ - lib/active_record_data_loader.rb
198
+ - lib/active_record_data_loader/active_record/belongs_to_configuration.rb
199
+ - lib/active_record_data_loader/active_record/column_configuration.rb
200
+ - lib/active_record_data_loader/active_record/enum_value_generator.rb
201
+ - lib/active_record_data_loader/active_record/integer_value_generator.rb
202
+ - lib/active_record_data_loader/active_record/model_data_generator.rb
203
+ - lib/active_record_data_loader/active_record/polymorphic_belongs_to_configuration.rb
204
+ - lib/active_record_data_loader/active_record/text_value_generator.rb
205
+ - lib/active_record_data_loader/bulk_insert_strategy.rb
206
+ - lib/active_record_data_loader/configuration.rb
207
+ - lib/active_record_data_loader/copy_strategy.rb
208
+ - lib/active_record_data_loader/data_faker.rb
209
+ - lib/active_record_data_loader/dsl/definition.rb
210
+ - lib/active_record_data_loader/dsl/model.rb
211
+ - lib/active_record_data_loader/dsl/polymorphic_association.rb
212
+ - lib/active_record_data_loader/loader.rb
213
+ - lib/active_record_data_loader/version.rb
214
+ - log/.keep
215
+ - script/ci_build.sh
216
+ homepage:
217
+ licenses:
218
+ - MIT
219
+ metadata:
220
+ source_code_uri: https://github.com/abeiderman/active_record_data_loader
221
+ post_install_message:
222
+ rdoc_options: []
223
+ require_paths:
224
+ - lib
225
+ required_ruby_version: !ruby/object:Gem::Requirement
226
+ requirements:
227
+ - - ">="
228
+ - !ruby/object:Gem::Version
229
+ version: 2.3.0
230
+ required_rubygems_version: !ruby/object:Gem::Requirement
231
+ requirements:
232
+ - - ">="
233
+ - !ruby/object:Gem::Version
234
+ version: '0'
235
+ requirements: []
236
+ rubygems_version: 3.0.3
237
+ signing_key:
238
+ specification_version: 4
239
+ summary: A utility to bulk load test data for performance testing.
240
+ test_files: []