active_record_data_loader 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +13 -0
  3. data/.rspec +3 -0
  4. data/.rubocop.yml +38 -0
  5. data/.travis.yml +11 -0
  6. data/Appraisals +17 -0
  7. data/CODE_OF_CONDUCT.md +74 -0
  8. data/Gemfile +8 -0
  9. data/Gemfile.lock +107 -0
  10. data/LICENSE.txt +21 -0
  11. data/README.md +49 -0
  12. data/Rakefile +10 -0
  13. data/active_record_data_loader.gemspec +47 -0
  14. data/bin/console +15 -0
  15. data/bin/setup +8 -0
  16. data/config/database.yml +10 -0
  17. data/config/database.yml.travis +7 -0
  18. data/gemfiles/.bundle/config +2 -0
  19. data/gemfiles/activerecord_5.gemfile +7 -0
  20. data/gemfiles/activerecord_6.gemfile +7 -0
  21. data/gemfiles/faker.gemfile +7 -0
  22. data/gemfiles/ffaker.gemfile +7 -0
  23. data/lib/active_record_data_loader.rb +64 -0
  24. data/lib/active_record_data_loader/active_record/belongs_to_configuration.rb +27 -0
  25. data/lib/active_record_data_loader/active_record/column_configuration.rb +43 -0
  26. data/lib/active_record_data_loader/active_record/enum_value_generator.rb +25 -0
  27. data/lib/active_record_data_loader/active_record/integer_value_generator.rb +21 -0
  28. data/lib/active_record_data_loader/active_record/model_data_generator.rb +72 -0
  29. data/lib/active_record_data_loader/active_record/polymorphic_belongs_to_configuration.rb +49 -0
  30. data/lib/active_record_data_loader/active_record/text_value_generator.rb +72 -0
  31. data/lib/active_record_data_loader/bulk_insert_strategy.rb +57 -0
  32. data/lib/active_record_data_loader/configuration.rb +17 -0
  33. data/lib/active_record_data_loader/copy_strategy.rb +58 -0
  34. data/lib/active_record_data_loader/data_faker.rb +85 -0
  35. data/lib/active_record_data_loader/dsl/definition.rb +25 -0
  36. data/lib/active_record_data_loader/dsl/model.rb +35 -0
  37. data/lib/active_record_data_loader/dsl/polymorphic_association.rb +25 -0
  38. data/lib/active_record_data_loader/loader.rb +66 -0
  39. data/lib/active_record_data_loader/version.rb +5 -0
  40. data/script/ci_build.sh +6 -0
  41. metadata +240 -0
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+ require "benchmark"
5
+
6
+ module ActiveRecordDataLoader
7
+ class BulkInsertStrategy
8
+ def initialize(data_generator)
9
+ @data_generator = data_generator
10
+ end
11
+
12
+ def load_batch(row_numbers)
13
+ connection = ::ActiveRecord::Base.connection
14
+
15
+ connection.insert(<<~SQL)
16
+ INSERT INTO #{quoted_table_name(connection)} (#{column_list(connection)})
17
+ VALUES #{values(row_numbers, connection)}
18
+ SQL
19
+ end
20
+
21
+ def table_name
22
+ data_generator.table
23
+ end
24
+
25
+ def name
26
+ "BULK INSERT"
27
+ end
28
+
29
+ private
30
+
31
+ attr_reader :data_generator
32
+
33
+ def quoted_table_name(connection)
34
+ @quoted_table_name ||= connection.quote_table_name(data_generator.table)
35
+ end
36
+
37
+ def column_list(connection)
38
+ @column_list ||= data_generator
39
+ .column_list
40
+ .map { |c| connection.quote_column_name(c) }
41
+ .join(",")
42
+ end
43
+
44
+ def values(row_numbers, connection)
45
+ row_numbers
46
+ .map { |i| "(#{row_values(i, connection)})" }
47
+ .join(",")
48
+ end
49
+
50
+ def row_values(row_number, connection)
51
+ data_generator
52
+ .generate_row(row_number)
53
+ .map { |v| connection.quote(v) }
54
+ .join(",")
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordDataLoader
4
+ class Configuration
5
+ attr_accessor :default_batch_size, :default_row_count, :logger
6
+
7
+ def initialize(
8
+ default_batch_size: 100_000,
9
+ default_row_count: 1,
10
+ logger: Logger.new(STDOUT, level: :info)
11
+ )
12
+ @default_batch_size = default_batch_size
13
+ @default_row_count = default_row_count
14
+ @logger = logger
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+ require "benchmark"
5
+
6
+ module ActiveRecordDataLoader
7
+ class CopyStrategy
8
+ def initialize(data_generator)
9
+ @data_generator = data_generator
10
+ end
11
+
12
+ def load_batch(row_numbers)
13
+ csv_data = csv_data_batch(row_numbers)
14
+
15
+ connection = ::ActiveRecord::Base.connection.raw_connection
16
+ connection.copy_data(copy_command) { connection.put_copy_data(csv_data) }
17
+ end
18
+
19
+ def table_name
20
+ data_generator.table
21
+ end
22
+
23
+ def name
24
+ "COPY"
25
+ end
26
+
27
+ private
28
+
29
+ attr_reader :data_generator
30
+
31
+ def csv_data_batch(row_numbers)
32
+ row_numbers.map do |i|
33
+ data_generator.generate_row(i).map { |d| quote_data(d) }.join(",")
34
+ end.join("\n")
35
+ end
36
+
37
+ def copy_command
38
+ @copy_command ||= begin
39
+ quoted_table_name = ::ActiveRecord::Base.connection.quote_table_name(data_generator.table)
40
+ columns = data_generator
41
+ .column_list
42
+ .map { |c| ::ActiveRecord::Base.connection.quote_column_name(c) }
43
+ .join(", ")
44
+
45
+ <<~SQL
46
+ COPY #{quoted_table_name} (#{columns})
47
+ FROM STDIN WITH (FORMAT CSV)
48
+ SQL
49
+ end
50
+ end
51
+
52
+ def quote_data(data)
53
+ return if data.nil?
54
+
55
+ "\"#{::ActiveRecord::Base.connection.quote_string(data.to_s)}\""
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "forwardable"
4
+
5
+ module ActiveRecordDataLoader
6
+ class DataFaker
7
+ class << self
8
+ extend Forwardable
9
+
10
+ def_delegators :adapter, :person_name, :first_name, :middle_name, :last_name, :company_name
11
+
12
+ private
13
+
14
+ def adapter
15
+ @adapter ||=
16
+ if Gem.loaded_specs.key?("ffaker")
17
+ require "ffaker"
18
+ FFakerGemAdapter.new
19
+ elsif Gem.loaded_specs.key?("faker")
20
+ require "faker"
21
+ FakerGemAdapter.new
22
+ else
23
+ NoGemAdapter.new
24
+ end
25
+ end
26
+ end
27
+
28
+ class FFakerGemAdapter
29
+ extend Forwardable
30
+
31
+ def_delegators :ffaker_name, :first_name, :last_name
32
+ def_delegator :ffaker_name, :name, :person_name
33
+ def_delegator :ffaker_name, :first_name, :middle_name
34
+
35
+ def company_name
36
+ FFaker::Company.name
37
+ end
38
+
39
+ def ffaker_name
40
+ FFaker::Name
41
+ end
42
+ end
43
+
44
+ class FakerGemAdapter
45
+ extend Forwardable
46
+
47
+ def_delegators :faker_name, :first_name, :middle_name, :last_name
48
+ def_delegator :faker_name, :name, :person_name
49
+
50
+ def company_name
51
+ Faker::Company.name
52
+ end
53
+
54
+ def faker_name
55
+ Faker::Name
56
+ end
57
+ end
58
+
59
+ class NoGemAdapter
60
+ FIRST_NAMES = %w[John Mary].freeze
61
+ MIDDLE_NAMES = %w[Madison Ashley].freeze
62
+ LAST_NAMES = %w[Doe Smith].freeze
63
+
64
+ def first_name
65
+ FIRST_NAMES.sample
66
+ end
67
+
68
+ def middle_name
69
+ MIDDLE_NAMES.sample
70
+ end
71
+
72
+ def last_name
73
+ LAST_NAMES.sample
74
+ end
75
+
76
+ def person_name
77
+ "#{first_name} #{middle_name} #{last_name}"
78
+ end
79
+
80
+ def company_name
81
+ "Acme"
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordDataLoader
4
+ module Dsl
5
+ class Definition
6
+ attr_reader :models
7
+
8
+ def initialize(config = ActiveRecordDataLoader.configuration)
9
+ @models = []
10
+ @config = config
11
+ end
12
+
13
+ def model(klass, &block)
14
+ t = Model.new(klass: klass, configuration: config)
15
+ block&.call(t)
16
+ models << t
17
+ t
18
+ end
19
+
20
+ private
21
+
22
+ attr_reader :config
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordDataLoader
4
+ module Dsl
5
+ class Model
6
+ attr_reader :klass, :columns, :row_count, :polymorphic_associations
7
+
8
+ def initialize(klass:, configuration:)
9
+ @klass = klass
10
+ @columns = {}
11
+ @row_count = configuration.default_row_count
12
+ @batch_size = configuration.default_batch_size
13
+ @polymorphic_associations = []
14
+ end
15
+
16
+ def count(count)
17
+ @row_count = count
18
+ end
19
+
20
+ def batch_size(size = nil)
21
+ @batch_size = (size || @batch_size)
22
+ end
23
+
24
+ def column(name, func)
25
+ @columns[name.to_sym] = func
26
+ end
27
+
28
+ def polymorphic(assoc_name, &block)
29
+ @polymorphic_associations << PolymorphicAssociation.new(
30
+ @klass, assoc_name
31
+ ).tap { |a| block.call(a) }
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordDataLoader
4
+ module Dsl
5
+ class PolymorphicAssociation
6
+ attr_reader :model_class, :name, :models
7
+
8
+ def initialize(model_class, name)
9
+ @model_class = model_class
10
+ @name = name
11
+ @models = {}
12
+ end
13
+
14
+ def model(klass, weight: 1)
15
+ @models[klass] = weight.to_i
16
+ end
17
+
18
+ def weighted_models
19
+ gcd = models.values.reduce(:gcd)
20
+
21
+ models.map { |m, w| [m] * (w / gcd) }.flatten
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "benchmark"
4
+
5
+ module ActiveRecordDataLoader
6
+ class Loader
7
+ class << self
8
+ def load_data(
9
+ data_generator:,
10
+ total_rows:,
11
+ batch_size:,
12
+ logger:
13
+ )
14
+ new(
15
+ logger: logger,
16
+ strategy: strategy_class.new(data_generator)
17
+ ).load_data(batch_size, total_rows)
18
+ end
19
+
20
+ private
21
+
22
+ def strategy_class
23
+ if ::ActiveRecord::Base.connection.raw_connection.respond_to?(:copy_data)
24
+ ActiveRecordDataLoader::CopyStrategy
25
+ else
26
+ ActiveRecordDataLoader::BulkInsertStrategy
27
+ end
28
+ end
29
+ end
30
+
31
+ def initialize(logger:, strategy:)
32
+ @logger = logger
33
+ @strategy = strategy
34
+ end
35
+
36
+ def load_data(batch_size, total_rows)
37
+ batch_count = (total_rows / batch_size.to_f).ceil
38
+
39
+ logger.info(
40
+ "Loading #{total_rows} row(s) into '#{strategy.table_name}' via #{strategy.name}. "\
41
+ "#{batch_size} row(s) per batch, #{batch_count} batch(es)."
42
+ )
43
+ total_time = Benchmark.realtime do
44
+ load_in_batches(batch_size, total_rows, batch_count)
45
+ end
46
+ logger.info(
47
+ "Completed loading #{total_rows} row(s) into '#{strategy.table_name}' "\
48
+ "in #{total_time} seconds."
49
+ )
50
+ end
51
+
52
+ private
53
+
54
+ attr_reader :strategy, :logger
55
+
56
+ def load_in_batches(batch_size, total_rows, batch_count)
57
+ total_rows.times.each_slice(batch_size).with_index do |row_numbers, i|
58
+ time = Benchmark.realtime { strategy.load_batch(row_numbers) }
59
+
60
+ logger.debug(
61
+ "Completed batch #{i + 1}/#{batch_count}, #{row_numbers.count} row(s) in #{time} seconds"
62
+ )
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordDataLoader
4
+ VERSION = "0.1.1"
5
+ end
@@ -0,0 +1,6 @@
1
+ #! /bin/bash
2
+ bundle exec appraisal
3
+ bundle exec appraisal activerecord-5 rake
4
+ bundle exec appraisal faker rake
5
+ bundle exec appraisal ffaker rake
6
+ bundle exec rake
metadata ADDED
@@ -0,0 +1,240 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: active_record_data_loader
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Alejandro Beiderman
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-06-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activerecord
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '4.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '4.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: appraisal
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '1.16'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '1.16'
55
+ - !ruby/object:Gem::Dependency
56
+ name: coveralls
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pg
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: pry
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rake
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '12.0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '12.0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: rspec
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '3.0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '3.0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: rspec-collection_matchers
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: rubocop
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: sqlite3
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
167
+ description: A utility to bulk load test data for performance testing.
168
+ email:
169
+ - abeiderman@gmail.com
170
+ executables:
171
+ - console
172
+ - setup
173
+ extensions: []
174
+ extra_rdoc_files: []
175
+ files:
176
+ - ".gitignore"
177
+ - ".rspec"
178
+ - ".rubocop.yml"
179
+ - ".travis.yml"
180
+ - Appraisals
181
+ - CODE_OF_CONDUCT.md
182
+ - Gemfile
183
+ - Gemfile.lock
184
+ - LICENSE.txt
185
+ - README.md
186
+ - Rakefile
187
+ - active_record_data_loader.gemspec
188
+ - bin/console
189
+ - bin/setup
190
+ - config/database.yml
191
+ - config/database.yml.travis
192
+ - gemfiles/.bundle/config
193
+ - gemfiles/activerecord_5.gemfile
194
+ - gemfiles/activerecord_6.gemfile
195
+ - gemfiles/faker.gemfile
196
+ - gemfiles/ffaker.gemfile
197
+ - lib/active_record_data_loader.rb
198
+ - lib/active_record_data_loader/active_record/belongs_to_configuration.rb
199
+ - lib/active_record_data_loader/active_record/column_configuration.rb
200
+ - lib/active_record_data_loader/active_record/enum_value_generator.rb
201
+ - lib/active_record_data_loader/active_record/integer_value_generator.rb
202
+ - lib/active_record_data_loader/active_record/model_data_generator.rb
203
+ - lib/active_record_data_loader/active_record/polymorphic_belongs_to_configuration.rb
204
+ - lib/active_record_data_loader/active_record/text_value_generator.rb
205
+ - lib/active_record_data_loader/bulk_insert_strategy.rb
206
+ - lib/active_record_data_loader/configuration.rb
207
+ - lib/active_record_data_loader/copy_strategy.rb
208
+ - lib/active_record_data_loader/data_faker.rb
209
+ - lib/active_record_data_loader/dsl/definition.rb
210
+ - lib/active_record_data_loader/dsl/model.rb
211
+ - lib/active_record_data_loader/dsl/polymorphic_association.rb
212
+ - lib/active_record_data_loader/loader.rb
213
+ - lib/active_record_data_loader/version.rb
214
+ - log/.keep
215
+ - script/ci_build.sh
216
+ homepage:
217
+ licenses:
218
+ - MIT
219
+ metadata:
220
+ source_code_uri: https://github.com/abeiderman/active_record_data_loader
221
+ post_install_message:
222
+ rdoc_options: []
223
+ require_paths:
224
+ - lib
225
+ required_ruby_version: !ruby/object:Gem::Requirement
226
+ requirements:
227
+ - - ">="
228
+ - !ruby/object:Gem::Version
229
+ version: 2.3.0
230
+ required_rubygems_version: !ruby/object:Gem::Requirement
231
+ requirements:
232
+ - - ">="
233
+ - !ruby/object:Gem::Version
234
+ version: '0'
235
+ requirements: []
236
+ rubygems_version: 3.0.3
237
+ signing_key:
238
+ specification_version: 4
239
+ summary: A utility to bulk load test data for performance testing.
240
+ test_files: []