active_record_data_loader 1.1.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 33d3769ad2f8c008cf51e75a62b43ba3333dce8e7e45387e01ab3470f2a5bf6c
4
- data.tar.gz: 6f2b812d1e3837d05ff565c050cd6c80110929136601fb95436996b14de4e809
3
+ metadata.gz: 8ae60492b342e71d8a1a2e20aff31794af784f1009346f4d5dc4375dd939bb48
4
+ data.tar.gz: ab5be5bdf1cb5af69f5f8a1ac65ea8f64ee1338398c7f6cae6e15e6276d2e954
5
5
  SHA512:
6
- metadata.gz: 8813fa219e7a710f147b79bb3a81826a5fe23ed7e3bef21a95fca310b3c8f3b09c7fb2c89d0cb3a65c825d01941eca8e394ee42900e13941d050e05fcb2c76d5
7
- data.tar.gz: 43392902671095ffcf3d9f33984b7f77de3a9b2fb95bd99ee65378d046e2d509d34f8b92220de04642e6e33d816926215581caa58d02b0d30ac02011f1956776
6
+ metadata.gz: 8b860a06433a0661a765428f37df596ac0b3fd46ff447fbf3edfa5cf50f89dcda0f146b25074959ca5aee792dff153a56eb37888075ae10649d6cde1e598c53d
7
+ data.tar.gz: dc9b43011f8c1b8c7b663b650b5ba11e43b9dd040a8ef6b7a58b24c7da903b1c963104a06c39f45e5ebaa4f01e815d8d6d565ded2fd9b5a186dcd50ed69b2e7c
@@ -0,0 +1,51 @@
1
+ name: Build
2
+
3
+ on: [push, workflow_dispatch]
4
+
5
+ jobs:
6
+ build:
7
+ name: Build + Test
8
+ runs-on: ubuntu-latest
9
+ services:
10
+ postgres:
11
+ image: postgres:11
12
+ ports:
13
+ - "2345:5432"
14
+ env:
15
+ POSTGRES_USER: test
16
+ POSTGRES_PASSWORD: test
17
+ mysql:
18
+ image: mysql:5
19
+ ports:
20
+ - "3306:3306"
21
+ env:
22
+ MYSQL_ROOT_PASSWORD: test
23
+ MYSQL_USER: test
24
+ MYSQL_PASSWORD: test
25
+ MYSQL_DATABASE: test
26
+ strategy:
27
+ matrix:
28
+ ruby: [2.5.9, 2.6.7, 2.7.3]
29
+ gemfile: [activerecord_5, rails, faker, ffaker]
30
+ env:
31
+ BUNDLE_GEMFILE: ${{ github.workspace }}/gemfiles/${{ matrix.gemfile }}.gemfile
32
+ steps:
33
+ - name: Checkout
34
+ uses: actions/checkout@v2
35
+
36
+ - name: Setup ruby
37
+ uses: ruby/setup-ruby@v1
38
+ with:
39
+ ruby-version: ${{ matrix.ruby }}
40
+ bundler-cache: true
41
+
42
+ - name: Wait for DBs to be ready
43
+ run: bundle exec rake wait_for_test_db
44
+
45
+ - name: Run tests
46
+ run: bundle exec rake
47
+
48
+ - name: Coveralls
49
+ uses: coverallsapp/github-action@master
50
+ with:
51
+ github-token: ${{ secrets.github_token }}
@@ -0,0 +1,29 @@
1
+ name: Ruby Gem
2
+
3
+ on: workflow_dispatch
4
+
5
+ jobs:
6
+ build:
7
+ name: Build + Publish
8
+ runs-on: ubuntu-latest
9
+ permissions:
10
+ contents: read
11
+ packages: write
12
+
13
+ steps:
14
+ - uses: actions/checkout@v2
15
+ - name: Set up Ruby 2.6
16
+ uses: actions/setup-ruby@v1
17
+ with:
18
+ ruby-version: 2.6.x
19
+
20
+ - name: Publish to RubyGems
21
+ run: |
22
+ mkdir -p $HOME/.gem
23
+ touch $HOME/.gem/credentials
24
+ chmod 0600 $HOME/.gem/credentials
25
+ printf -- "---\n:rubygems_api_key: ${GEM_HOST_API_KEY}\n" > $HOME/.gem/credentials
26
+ gem build *.gemspec
27
+ gem push *.gem
28
+ env:
29
+ GEM_HOST_API_KEY: "${{ secrets.RUBYGEMS_API_KEY }}"
data/.rubocop.yml CHANGED
@@ -26,10 +26,17 @@ Metrics/AbcSize:
26
26
  Metrics/BlockLength:
27
27
  Exclude: ["spec/**/*", "*.gemspec"]
28
28
 
29
+ Metrics/ClassLength:
30
+ Exclude: ["spec/**/*", "*.gemspec"]
31
+
29
32
  Metrics/MethodLength:
30
33
  Max: 15
31
34
  Exclude: ["spec/**/*"]
32
35
 
36
+ Metrics/ParameterLists:
37
+ Max: 5
38
+ Exclude: ["lib/active_record_data_loader/configuration.rb"]
39
+
33
40
  Style/CaseLikeIf:
34
41
  Enabled: false
35
42
 
data/CHANGELOG.md CHANGED
@@ -1,10 +1,26 @@
1
1
  # Change log
2
2
 
3
+ ## [v1.2.0] - 2021-11-14
4
+
5
+ [Diff](https://github.com/abeiderman/active_record_data_loader/compare/v1.1.0...v1.2.0)
6
+
7
+ ### Changes:
8
+ * Add `:file` output option for generating a SQL script instead of loading the data into the database.
9
+ * Fix some connection handling issues when a custom connection factory is provided.
10
+
11
+ ## [v1.1.0] - 2021-05-01
12
+
13
+ [Diff](https://github.com/abeiderman/active_record_data_loader/compare/v1.0.2...v1.1.0)
14
+
15
+ ### Changes:
16
+ * Bump ruby version requirement to >= 2.5
17
+ * Bump activerecord requirement to >= 5.0
18
+
3
19
  ## [v1.0.2] - 2019-07-05
4
20
 
5
21
  [Diff](https://github.com/abeiderman/active_record_data_loader/compare/v1.0.1...v1.0.2)
6
22
 
7
- ### Enhancements:
23
+ ### Changes:
8
24
  * Add support for MySQL enums
9
25
  * Accept a connection factory lambda as part of the configuration
10
26
 
@@ -12,7 +28,7 @@
12
28
 
13
29
  [Diff](https://github.com/abeiderman/active_record_data_loader/compare/v1.0.0...v1.0.1)
14
30
 
15
- ### Enhancements:
31
+ ### Changes:
16
32
  * Generate values for datetime column types. This also fixes the fact that `created_at` and `updated_at` were not being populated by default.
17
33
 
18
34
  ## [v1.0.0] - 2019-06-15
@@ -22,3 +38,5 @@ Initial stable release
22
38
  [v1.0.0]: https://github.com/abeiderman/active_record_data_loader/releases/tag/v1.0.0
23
39
  [v1.0.1]: https://github.com/abeiderman/active_record_data_loader/releases/tag/v1.0.1
24
40
  [v1.0.2]: https://github.com/abeiderman/active_record_data_loader/releases/tag/v1.0.2
41
+ [v1.1.0]: https://github.com/abeiderman/active_record_data_loader/releases/tag/v1.1.0
42
+ [v1.2.0]: https://github.com/abeiderman/active_record_data_loader/releases/tag/v1.2.0
data/Gemfile.lock CHANGED
@@ -1,47 +1,39 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- active_record_data_loader (1.1.0)
4
+ active_record_data_loader (1.2.0)
5
5
  activerecord (>= 5.0)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
- activemodel (5.2.4.5)
11
- activesupport (= 5.2.4.5)
12
- activerecord (5.2.4.5)
13
- activemodel (= 5.2.4.5)
14
- activesupport (= 5.2.4.5)
15
- arel (>= 9.0)
16
- activesupport (5.2.4.5)
10
+ activemodel (6.1.4.1)
11
+ activesupport (= 6.1.4.1)
12
+ activerecord (6.1.4.1)
13
+ activemodel (= 6.1.4.1)
14
+ activesupport (= 6.1.4.1)
15
+ activesupport (6.1.4.1)
17
16
  concurrent-ruby (~> 1.0, >= 1.0.2)
18
- i18n (>= 0.7, < 2)
19
- minitest (~> 5.1)
20
- tzinfo (~> 1.1)
17
+ i18n (>= 1.6, < 2)
18
+ minitest (>= 5.1)
19
+ tzinfo (~> 2.0)
20
+ zeitwerk (~> 2.3)
21
21
  appraisal (2.2.0)
22
22
  bundler
23
23
  rake
24
24
  thor (>= 0.14.0)
25
- arel (9.0.0)
26
- ast (2.4.1)
25
+ ast (2.4.2)
27
26
  coderay (1.1.2)
28
- concurrent-ruby (1.1.8)
29
- coveralls (0.8.23)
30
- json (>= 1.8, < 3)
31
- simplecov (~> 0.16.1)
32
- term-ansicolor (~> 1.3)
33
- thor (>= 0.19.4, < 2.0)
34
- tins (~> 1.6)
27
+ concurrent-ruby (1.1.9)
35
28
  diff-lcs (1.3)
36
- docile (1.3.2)
37
- i18n (1.8.10)
29
+ docile (1.4.0)
30
+ i18n (1.8.11)
38
31
  concurrent-ruby (~> 1.0)
39
- json (2.3.1)
40
32
  method_source (1.0.0)
41
33
  minitest (5.14.4)
42
34
  mysql2 (0.5.3)
43
- parallel (1.20.1)
44
- parser (2.7.2.0)
35
+ parallel (1.21.0)
36
+ parser (3.0.2.0)
45
37
  ast (~> 2.4.1)
46
38
  pg (1.2.3)
47
39
  pry (0.13.1)
@@ -49,7 +41,7 @@ GEM
49
41
  method_source (~> 1.0)
50
42
  rainbow (3.0.0)
51
43
  rake (13.0.1)
52
- regexp_parser (2.0.0)
44
+ regexp_parser (2.1.1)
53
45
  rexml (3.2.5)
54
46
  rspec (3.9.0)
55
47
  rspec-core (~> 3.9.0)
@@ -66,33 +58,32 @@ GEM
66
58
  diff-lcs (>= 1.2.0, < 2.0)
67
59
  rspec-support (~> 3.9.0)
68
60
  rspec-support (3.9.3)
69
- rubocop (1.6.1)
61
+ rubocop (1.22.3)
70
62
  parallel (~> 1.10)
71
- parser (>= 2.7.1.5)
63
+ parser (>= 3.0.0.0)
72
64
  rainbow (>= 2.2.2, < 4.0)
73
65
  regexp_parser (>= 1.8, < 3.0)
74
66
  rexml
75
- rubocop-ast (>= 1.2.0, < 2.0)
67
+ rubocop-ast (>= 1.12.0, < 2.0)
76
68
  ruby-progressbar (~> 1.7)
77
- unicode-display_width (>= 1.4.0, < 2.0)
78
- rubocop-ast (1.3.0)
79
- parser (>= 2.7.1.5)
80
- ruby-progressbar (1.10.1)
81
- simplecov (0.16.1)
69
+ unicode-display_width (>= 1.4.0, < 3.0)
70
+ rubocop-ast (1.13.0)
71
+ parser (>= 3.0.1.1)
72
+ ruby-progressbar (1.11.0)
73
+ simplecov (0.21.2)
82
74
  docile (~> 1.1)
83
- json (>= 1.8, < 3)
84
- simplecov-html (~> 0.10.0)
85
- simplecov-html (0.10.2)
75
+ simplecov-html (~> 0.11)
76
+ simplecov_json_formatter (~> 0.1)
77
+ simplecov-html (0.12.3)
78
+ simplecov-lcov (0.8.0)
79
+ simplecov_json_formatter (0.1.3)
86
80
  sqlite3 (1.4.1)
87
- term-ansicolor (1.7.1)
88
- tins (~> 1.0)
89
81
  thor (0.20.3)
90
- thread_safe (0.3.6)
91
82
  timecop (0.9.1)
92
- tins (1.21.0)
93
- tzinfo (1.2.9)
94
- thread_safe (~> 0.1)
95
- unicode-display_width (1.7.0)
83
+ tzinfo (2.0.4)
84
+ concurrent-ruby (~> 1.0)
85
+ unicode-display_width (2.1.0)
86
+ zeitwerk (2.5.1)
96
87
 
97
88
  PLATFORMS
98
89
  ruby
@@ -102,7 +93,6 @@ DEPENDENCIES
102
93
  active_record_data_loader!
103
94
  appraisal
104
95
  bundler (>= 1.16)
105
- coveralls
106
96
  mysql2
107
97
  pg
108
98
  pry
@@ -110,8 +100,10 @@ DEPENDENCIES
110
100
  rspec (~> 3.0)
111
101
  rspec-collection_matchers
112
102
  rubocop
103
+ simplecov
104
+ simplecov-lcov
113
105
  sqlite3
114
106
  timecop
115
107
 
116
108
  BUNDLED WITH
117
- 2.2.1
109
+ 2.2.31
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # active_record_data_loader
2
2
 
3
- [![Build Status](https://travis-ci.org/abeiderman/active_record_data_loader.svg?branch=master)](https://travis-ci.org/abeiderman/active_record_data_loader)
3
+ [![Build Status](https://github.com/abeiderman/active_record_data_loader/actions/workflows/build.yml/badge.svg)](https://github.com/abeiderman/active_record_data_loader/actions/workflows/build.yml)
4
4
  [![Coverage Status](https://coveralls.io/repos/github/abeiderman/active_record_data_loader/badge.svg?branch=master&service=github)](https://coveralls.io/github/abeiderman/active_record_data_loader?branch=master)
5
5
  [![Maintainability](https://api.codeclimate.com/v1/badges/338904b3f7e8d19a3cb1/maintainability)](https://codeclimate.com/github/abeiderman/active_record_data_loader/maintainability)
6
6
 
@@ -99,13 +99,13 @@ data_loader = ActiveRecordDataLoader.define do
99
99
  m.column :currency, "CAD"
100
100
  m.belongs_to :customer, eligible_set: -> { Customer.where(country: "CAN") }
101
101
  end
102
-
102
+
103
103
  model Order do |m|
104
104
  m.count 25_000
105
105
  m.column :currency, "MXN"
106
106
  m.belongs_to :customer, eligible_set: -> { Customer.where(country: "MEX") }
107
107
  end
108
-
108
+
109
109
  model Order do |m|
110
110
  m.count 50_000
111
111
  m.column :currency, "USD"
@@ -148,7 +148,7 @@ data_loader = ActiveRecordDataLoader.define do
148
148
 
149
149
  model Order do |m|
150
150
  m.count 100_000
151
-
151
+
152
152
  m.polymorphic :customer do |c|
153
153
  c.model Person
154
154
  c.model Business
@@ -172,7 +172,7 @@ data_loader = ActiveRecordDataLoader.define do
172
172
 
173
173
  model Order do |m|
174
174
  m.count 100_000
175
-
175
+
176
176
  m.polymorphic :customer do |c|
177
177
  c.model Person, weight: 2
178
178
  c.model Business, weight: 1
@@ -197,7 +197,7 @@ data_loader = ActiveRecordDataLoader.define do
197
197
 
198
198
  model Order do |m|
199
199
  m.count 100_000
200
-
200
+
201
201
  m.polymorphic :customer do |c|
202
202
  c.model Person, weight: 2
203
203
  c.model Business, weight: 1, eligible_set: -> { Business.where(country: "USA") }
@@ -208,6 +208,85 @@ end
208
208
  data_loader.load_data
209
209
  ```
210
210
 
211
+ ### Configuration options
212
+
213
+ You can define global configuration options like this:
214
+
215
+ ```ruby
216
+ ActiveRecordDataLoader.configure do |c|
217
+ c.logger = ActiveSupport::Logger.new("my_file.log", level: :debug)
218
+ c.statement_timeout = "5min"
219
+ end
220
+ ```
221
+
222
+ Or you can create a configuration object for the specific data loader instance rather than globally:
223
+
224
+ ```ruby
225
+ config = ActiveRecordDataLoader::Configuration.new(
226
+ c.logger = ActiveSupport::Logger.new("my_file.log", level: :debug)
227
+ c.statement_timeout = "5min"
228
+ )
229
+ loader = ActiveRecordDataLoader.define(config) do
230
+ model Company do |m|
231
+ m.count 10
232
+ end
233
+
234
+ # ... more definitions
235
+ end
236
+ ```
237
+
238
+ #### statement_timeout
239
+
240
+ This is currently only used for Postgres connections to adjust the `statement_timeout` value for the connection. The default is `2min`. Depending on the size of the batches you are loading and overall size of the tables you may need to increase this value:
241
+
242
+ ```ruby
243
+ ActiveRecordDataLoader.configure do |c|
244
+ c.statement_timeout = "5min"
245
+ end
246
+ ```
247
+
248
+ #### connection_factory
249
+
250
+ The `connection_factory` option accepts a lambda that should return a connection object whenever executed. If not specified, the default behavior is to retrieve a connection using `ActiveRecord::Base.connection`. You can configure it like this:
251
+
252
+ ```ruby
253
+ ActiveRecordDataLoader.configure do |c|
254
+ c.connection_factory = -> { MyCustomConnectionHandler.open_connection }
255
+ end
256
+ ```
257
+
258
+ #### output
259
+
260
+ The `output` option accepts either `:connection` or `:file`. The default behavior is `:connection` which means the data will be loaded into the database using the database connection.
261
+
262
+ If `:file` is specified, instead of the data being loaded into the database, a script file will be generated. This script file can then be executed manually to load the data. This can be helpful if you need to load the same data multiple times. For example if you are profiling different alternatives in your code and you want to see how each performs with a fully loaded database. In that case you would want to have the same data starting point for each alternative you evaluate. By generating the script file ahead of time, it would be significantly faster to load that data over and over by executing the existing script.
263
+
264
+ Here are some examples on how to use the `output` option:
265
+
266
+ ```ruby
267
+ ActiveRecordDataLoader.configure do |c|
268
+ c.output = :connection # This is the default behavior
269
+ end
270
+ ```
271
+
272
+ ```ruby
273
+ ActiveRecordDataLoader.configure do |c|
274
+ c.output = :file # Outputs to a file with a default name
275
+ end
276
+ ```
277
+
278
+ ```ruby
279
+ ActiveRecordDataLoader.configure do |c|
280
+ c.output = { type: :file, filename: "./my_script.sql" } # Outputs to the provided file
281
+ end
282
+ ```
283
+
284
+ When using the `:file` type with Postgres, the resulting script will have `\COPY` commands which reference CSV files that contain the data batches to be copied. The CSV files will be created along side the SQL script and will have a naming convention of using the table name and the rows range for the given batch. For example `./my_script_customers_1_to_1000.csv`. Each `\COPY` command in the SQL file will reference the corresponding CSV file so all you need to do is execute the SQL file using `psql`:
285
+
286
+ ```bash
287
+ psql -h my-db-host -U my_user -f my_script.sql
288
+ ```
289
+
211
290
  ## Development
212
291
 
213
292
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
data/Rakefile CHANGED
@@ -3,10 +3,16 @@
3
3
  require "bundler/gem_tasks"
4
4
  require "rspec/core/rake_task"
5
5
  require "rubocop/rake_task"
6
- require "coveralls/rake/task"
7
6
 
8
7
  RSpec::Core::RakeTask.new(:spec)
9
8
  RuboCop::RakeTask.new(:rubocop)
10
- Coveralls::RakeTask.new
11
9
 
12
10
  task default: [:spec, :rubocop]
11
+
12
+ task :wait_for_test_db do
13
+ require "active_record_data_loader"
14
+ require "./spec/active_record_helper"
15
+
16
+ ActiveRecordHelper.wait_for_mysql
17
+ ActiveRecordHelper.wait_for_postgres
18
+ end
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
20
20
  spec.metadata["source_code_uri"] = "https://github.com/abeiderman/active_record_data_loader"
21
21
  else
22
22
  raise "RubyGems 2.0 or newer is required to protect against " \
23
- "public gem pushes."
23
+ "public gem pushes."
24
24
  end
25
25
 
26
26
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
@@ -36,7 +36,6 @@ Gem::Specification.new do |spec|
36
36
 
37
37
  spec.add_development_dependency "appraisal"
38
38
  spec.add_development_dependency "bundler", ">= 1.16"
39
- spec.add_development_dependency "coveralls"
40
39
  spec.add_development_dependency "mysql2"
41
40
  spec.add_development_dependency "pg"
42
41
  spec.add_development_dependency "pry"
@@ -44,6 +43,8 @@ Gem::Specification.new do |spec|
44
43
  spec.add_development_dependency "rspec", "~> 3.0"
45
44
  spec.add_development_dependency "rspec-collection_matchers"
46
45
  spec.add_development_dependency "rubocop"
46
+ spec.add_development_dependency "simplecov"
47
+ spec.add_development_dependency "simplecov-lcov"
47
48
  spec.add_development_dependency "sqlite3"
48
49
  spec.add_development_dependency "timecop"
49
50
  end
@@ -5,34 +5,35 @@ module ActiveRecordDataLoader
5
5
  class EnumValueGenerator
6
6
  class << self
7
7
  def generator_for(model_class:, ar_column:, connection_factory:)
8
- values = enum_values_for(model_class, ar_column.sql_type, connection_factory)
8
+ values = enum_values_for(ar_column.sql_type, connection_factory)
9
9
  -> { values.sample }
10
10
  end
11
11
 
12
12
  private
13
13
 
14
- def enum_values_for(model_class, enum_type, connection_factory)
14
+ def enum_values_for(enum_type, connection_factory)
15
15
  connection = connection_factory.call
16
16
 
17
17
  if connection.adapter_name.downcase.to_sym == :postgresql
18
- postgres_enum_values_for(model_class, enum_type)
18
+ postgres_enum_values_for(connection, enum_type)
19
19
  elsif connection.adapter_name.downcase.to_s.start_with?("mysql")
20
- mysql_enum_values_for(model_class, enum_type)
20
+ mysql_enum_values_for(enum_type)
21
21
  else
22
22
  []
23
23
  end
24
+ ensure
25
+ connection&.close
24
26
  end
25
27
 
26
- def postgres_enum_values_for(model_class, enum_type)
27
- model_class
28
- .connection
28
+ def postgres_enum_values_for(connection, enum_type)
29
+ connection
29
30
  .execute("SELECT unnest(enum_range(NULL::#{enum_type}))::text")
30
31
  .map(&:values)
31
32
  .flatten
32
33
  .compact
33
34
  end
34
35
 
35
- def mysql_enum_values_for(_model_class, enum_type)
36
+ def mysql_enum_values_for(enum_type)
36
37
  enum_type
37
38
  .to_s
38
39
  .downcase
@@ -5,7 +5,7 @@ module ActiveRecordDataLoader
5
5
  class IntegerValueGenerator
6
6
  class << self
7
7
  def generator_for(model_class:, ar_column:, connection_factory: nil)
8
- range_limit = [(256**number_of_bytes(ar_column)) / 2 - 1, 1_000_000_000].min
8
+ range_limit = [((256**number_of_bytes(ar_column)) / 2) - 1, 1_000_000_000].min
9
9
 
10
10
  -> { rand(0..range_limit) }
11
11
  end
@@ -2,12 +2,13 @@
2
2
 
3
3
  module ActiveRecordDataLoader
4
4
  class BulkInsertStrategy
5
- def initialize(data_generator)
5
+ def initialize(data_generator, output_adapter)
6
6
  @data_generator = data_generator
7
+ @output_adapter = output_adapter
7
8
  end
8
9
 
9
10
  def load_batch(row_numbers, connection)
10
- connection.insert(<<~SQL)
11
+ output_adapter.insert(connection: connection, command: <<~SQL)
11
12
  INSERT INTO #{quoted_table_name(connection)} (#{column_list(connection)})
12
13
  VALUES #{values(row_numbers, connection)}
13
14
  SQL
@@ -23,7 +24,7 @@ module ActiveRecordDataLoader
23
24
 
24
25
  private
25
26
 
26
- attr_reader :data_generator
27
+ attr_reader :data_generator, :output_adapter
27
28
 
28
29
  def quoted_table_name(connection)
29
30
  @quoted_table_name ||= connection.quote_table_name(data_generator.table)
@@ -2,24 +2,64 @@
2
2
 
3
3
  module ActiveRecordDataLoader
4
4
  class Configuration
5
- attr_accessor :default_batch_size, :default_row_count, :logger, :statement_timeout, :connection_factory
5
+ attr_accessor :connection_factory, :default_batch_size, :default_row_count,
6
+ :logger, :statement_timeout
7
+ attr_reader :output
6
8
 
7
9
  def initialize(
8
10
  default_batch_size: 100_000,
9
11
  default_row_count: 1,
10
12
  logger: nil,
11
13
  statement_timeout: "2min",
12
- connection_factory: -> { ::ActiveRecord::Base.connection }
14
+ connection_factory: -> { ::ActiveRecord::Base.connection },
15
+ output: :connection
13
16
  )
14
17
  @default_batch_size = default_batch_size
15
18
  @default_row_count = default_row_count
16
19
  @logger = logger || default_logger
17
20
  @statement_timeout = statement_timeout
18
21
  @connection_factory = connection_factory
22
+ self.output = output
23
+ end
24
+
25
+ def output=(output)
26
+ @output = validate_output(output || { type: :connection })
27
+ end
28
+
29
+ def output_adapter
30
+ if output.fetch(:type) == :file
31
+ ActiveRecordDataLoader::FileOutputAdapter.new(output)
32
+ else
33
+ ActiveRecordDataLoader::ConnectionOutputAdapter.new
34
+ end
35
+ end
36
+
37
+ def connection_handler
38
+ ActiveRecordDataLoader::ConnectionHandler.new(
39
+ connection_factory: connection_factory,
40
+ statement_timeout: statement_timeout,
41
+ output_adapter: output_adapter
42
+ )
19
43
  end
20
44
 
21
45
  private
22
46
 
47
+ OUTPUT_OPTIONS_BY_TYPE = { connection: %i[type], file: %i[type filename] }.freeze
48
+
49
+ def validate_output(output)
50
+ if %i[file connection].include?(output)
51
+ { type: output }
52
+ elsif output.is_a?(Hash)
53
+ raise "The output hash must contain a :type key with either :connection or :file" \
54
+ unless %i[file connection].include?(output[:type])
55
+
56
+ output.slice(*OUTPUT_OPTIONS_BY_TYPE[output[:type]])
57
+ else
58
+ raise "The output configuration parameter must be either a symbol for :connection or :file, "\
59
+ "or a hash with more detailed output options."
60
+ end
61
+ end
62
+
23
63
  def default_logger
24
64
  if defined?(Rails) && Rails.respond_to?(:logger)
25
65
  Rails.logger
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordDataLoader
4
+ class ConnectionHandler
5
+ def initialize(connection_factory:, statement_timeout:, output_adapter:)
6
+ @connection_factory = connection_factory
7
+ @statement_timeout = statement_timeout
8
+ @output_adapter = output_adapter
9
+ end
10
+
11
+ def with_connection
12
+ connection = open_connection
13
+ if postgres?(connection)
14
+ original_timeout = retrieve_statement_timeout(connection)
15
+ update_statement_timeout(connection, statement_timeout)
16
+ yield connection
17
+ update_statement_timeout(connection, original_timeout)
18
+ else
19
+ yield connection
20
+ end
21
+ ensure
22
+ connection&.close
23
+ end
24
+
25
+ # When the output is going to a script file, there are two places to update the
26
+ # statement_timeout. The connection itself needs to have the timeout updated
27
+ # because we are reading data from the connection to come up with related data
28
+ # while generating the data. Also, the final SQL script file needs the timeout
29
+ # updated so that when those \COPY commands are executed they have the higher
30
+ # timeout as well.
31
+ def with_statement_timeout_for_output
32
+ return yield unless output_adapter.needs_timeout_output?
33
+
34
+ original_timeout = begin
35
+ connection = open_connection
36
+ retrieve_statement_timeout(connection) if postgres?(connection)
37
+ ensure
38
+ connection&.close
39
+ end
40
+
41
+ if original_timeout
42
+ output_adapter.execute(statement_timeout_set_command(statement_timeout))
43
+ yield
44
+ output_adapter.execute(statement_timeout_set_command(original_timeout))
45
+ else
46
+ yield
47
+ end
48
+ end
49
+
50
+ private
51
+
52
+ attr_reader :connection_factory, :statement_timeout, :output_adapter
53
+
54
+ def retrieve_statement_timeout(connection)
55
+ connection.execute("SHOW statement_timeout").first["statement_timeout"]
56
+ end
57
+
58
+ def update_statement_timeout(connection, timeout)
59
+ connection.execute(statement_timeout_set_command(timeout))
60
+ end
61
+
62
+ def statement_timeout_set_command(timeout)
63
+ "SET statement_timeout = \"#{timeout}\""
64
+ end
65
+
66
+ def open_connection
67
+ connection_factory.call
68
+ end
69
+
70
+ def postgres?(connection)
71
+ connection.adapter_name.downcase.to_sym == :postgresql
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordDataLoader
4
+ class ConnectionOutputAdapter
5
+ def needs_timeout_output?
6
+ false
7
+ end
8
+
9
+ def copy(connection:, table:, columns:, data:, row_numbers:)
10
+ raw_connection = connection.raw_connection
11
+ raw_connection.copy_data("COPY #{table} (#{columns}) FROM STDIN WITH (FORMAT CSV)") do
12
+ raw_connection.put_copy_data(data.join("\n"))
13
+ end
14
+ end
15
+
16
+ def insert(connection:, command:)
17
+ connection.insert(command)
18
+ end
19
+ end
20
+ end
@@ -2,15 +2,19 @@
2
2
 
3
3
  module ActiveRecordDataLoader
4
4
  class CopyStrategy
5
- def initialize(data_generator)
5
+ def initialize(data_generator, output_adapter)
6
6
  @data_generator = data_generator
7
+ @output_adapter = output_adapter
7
8
  end
8
9
 
9
10
  def load_batch(row_numbers, connection)
10
- csv_data = csv_data_batch(row_numbers, connection)
11
-
12
- raw_connection = connection.raw_connection
13
- raw_connection.copy_data(copy_command(connection)) { raw_connection.put_copy_data(csv_data) }
11
+ output_adapter.copy(
12
+ connection: connection,
13
+ table: table_name_for_copy(connection),
14
+ columns: columns_for_copy(connection),
15
+ data: csv_rows(row_numbers, connection),
16
+ row_numbers: row_numbers
17
+ )
14
18
  end
15
19
 
16
20
  def table_name
@@ -23,27 +27,23 @@ module ActiveRecordDataLoader
23
27
 
24
28
  private
25
29
 
26
- attr_reader :data_generator
30
+ attr_reader :data_generator, :output_adapter
27
31
 
28
- def csv_data_batch(row_numbers, connection)
32
+ def csv_rows(row_numbers, connection)
29
33
  row_numbers.map do |i|
30
34
  data_generator.generate_row(i).map { |d| quote_data(d, connection) }.join(",")
31
- end.join("\n")
35
+ end
32
36
  end
33
37
 
34
- def copy_command(connection)
35
- @copy_command ||= begin
36
- quoted_table_name = connection.quote_table_name(data_generator.table)
37
- columns = data_generator
38
- .column_list
39
- .map { |c| connection.quote_column_name(c) }
40
- .join(", ")
38
+ def table_name_for_copy(connection)
39
+ @table_name_for_copy ||= connection.quote_table_name(data_generator.table)
40
+ end
41
41
 
42
- <<~SQL
43
- COPY #{quoted_table_name} (#{columns})
44
- FROM STDIN WITH (FORMAT CSV)
45
- SQL
46
- end
42
+ def columns_for_copy(connection)
43
+ @columns_for_copy ||= data_generator
44
+ .column_list
45
+ .map { |c| connection.quote_column_name(c) }
46
+ .join(", ")
47
47
  end
48
48
 
49
49
  def quote_data(data, connection)
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecordDataLoader
4
+ class FileOutputAdapter
5
+ def initialize(options)
6
+ @filename = options.fetch(:filename, "active_record_data_loader_script.sql")
7
+ @file_basename = File.basename(@filename, File.extname(@filename))
8
+ @path = File.expand_path(File.dirname(@filename))
9
+ end
10
+
11
+ def needs_timeout_output?
12
+ true
13
+ end
14
+
15
+ def copy(connection:, table:, columns:, data:, row_numbers:)
16
+ data_filename = data_filename(table, row_numbers)
17
+ File.open(data_filename, "w") { |f| f.puts(data) }
18
+ File.open(@filename, "a") do |file|
19
+ file.puts("\\COPY #{table} (#{columns}) FROM '#{data_filename}' WITH (FORMAT CSV);")
20
+ end
21
+ end
22
+
23
+ def insert(connection:, command:)
24
+ execute(command)
25
+ end
26
+
27
+ def execute(command)
28
+ File.open(@filename, "a") { |f| f.puts("#{command.gsub("\n", ' ')};") }
29
+ end
30
+
31
+ private
32
+
33
+ def data_filename(table, row_numbers)
34
+ File.join(
35
+ @path,
36
+ "#{@file_basename}_#{table.gsub(/"/, '')}_rows_#{row_numbers[0]}_to_#{row_numbers[-1]}.csv"
37
+ )
38
+ end
39
+ end
40
+ end
@@ -13,9 +13,11 @@ module ActiveRecordDataLoader
13
13
  )
14
14
  new(
15
15
  logger: configuration.logger,
16
- statement_timeout: configuration.statement_timeout,
17
- strategy: strategy_class(configuration.connection_factory).new(data_generator),
18
- connection_factory: configuration.connection_factory
16
+ connection_handler: configuration.connection_handler,
17
+ strategy: strategy_class(configuration.connection_factory).new(
18
+ data_generator,
19
+ configuration.output_adapter
20
+ )
19
21
  ).load_data(batch_size, total_rows)
20
22
  end
21
23
 
@@ -30,11 +32,10 @@ module ActiveRecordDataLoader
30
32
  end
31
33
  end
32
34
 
33
- def initialize(logger:, statement_timeout:, strategy:, connection_factory:)
35
+ def initialize(logger:, connection_handler:, strategy:)
34
36
  @logger = logger
37
+ @connection_handler = connection_handler
35
38
  @strategy = strategy
36
- @statement_timeout = statement_timeout
37
- @connection_factory = connection_factory
38
39
  end
39
40
 
40
41
  def load_data(batch_size, total_rows)
@@ -57,10 +58,10 @@ module ActiveRecordDataLoader
57
58
 
58
59
  private
59
60
 
60
- attr_reader :strategy, :statement_timeout, :logger, :connection_factory
61
+ attr_reader :strategy, :connection_handler, :logger
61
62
 
62
63
  def load_in_batches(batch_size, total_rows, batch_count)
63
- with_connection do |connection|
64
+ connection_handler.with_connection do |connection|
64
65
  total_rows.times.each_slice(batch_size).with_index do |row_numbers, i|
65
66
  time = Benchmark.realtime { strategy.load_batch(row_numbers, connection) }
66
67
 
@@ -71,28 +72,5 @@ module ActiveRecordDataLoader
71
72
  end
72
73
  end
73
74
  end
74
-
75
- def with_connection
76
- if connection.adapter_name.downcase.to_sym == :postgresql
77
- original_timeout = retrieve_statement_timeout
78
- update_statement_timeout(statement_timeout)
79
- yield connection
80
- update_statement_timeout(original_timeout)
81
- else
82
- yield connection
83
- end
84
- end
85
-
86
- def retrieve_statement_timeout
87
- connection.execute("SHOW statement_timeout").first["statement_timeout"]
88
- end
89
-
90
- def update_statement_timeout(timeout)
91
- connection.execute("SET statement_timeout = \"#{timeout}\"")
92
- end
93
-
94
- def connection
95
- connection_factory.call
96
- end
97
75
  end
98
76
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ActiveRecordDataLoader
4
- VERSION = "1.1.0"
4
+ VERSION = "1.2.0"
5
5
  end
@@ -3,6 +3,7 @@
3
3
  require "active_record_data_loader/version"
4
4
  require "active_record"
5
5
  require "active_record_data_loader/configuration"
6
+ require "active_record_data_loader/connection_handler"
6
7
  require "active_record_data_loader/data_faker"
7
8
  require "active_record_data_loader/active_record/per_row_value_cache"
8
9
  require "active_record_data_loader/active_record/integer_value_generator"
@@ -17,6 +18,8 @@ require "active_record_data_loader/dsl/belongs_to_association"
17
18
  require "active_record_data_loader/dsl/polymorphic_association"
18
19
  require "active_record_data_loader/dsl/model"
19
20
  require "active_record_data_loader/dsl/definition"
21
+ require "active_record_data_loader/connection_output_adapter"
22
+ require "active_record_data_loader/file_output_adapter"
20
23
  require "active_record_data_loader/copy_strategy"
21
24
  require "active_record_data_loader/bulk_insert_strategy"
22
25
  require "active_record_data_loader/loader"
@@ -46,7 +49,9 @@ module ActiveRecordDataLoader
46
49
  def load_data
47
50
  ActiveRecordDataLoader::ActiveRecord::PerRowValueCache.clear
48
51
 
49
- definition.models.map { |m| load_model(m) }
52
+ configuration.connection_handler.with_statement_timeout_for_output do
53
+ definition.models.map { |m| load_model(m) }
54
+ end
50
55
  end
51
56
 
52
57
  private
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: active_record_data_loader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alejandro Beiderman
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-01 00:00:00.000000000 Z
11
+ date: 2021-11-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -52,20 +52,6 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '1.16'
55
- - !ruby/object:Gem::Dependency
56
- name: coveralls
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :development
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- version: '0'
69
55
  - !ruby/object:Gem::Dependency
70
56
  name: mysql2
71
57
  requirement: !ruby/object:Gem::Requirement
@@ -164,6 +150,34 @@ dependencies:
164
150
  - - ">="
165
151
  - !ruby/object:Gem::Version
166
152
  version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: simplecov
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
167
+ - !ruby/object:Gem::Dependency
168
+ name: simplecov-lcov
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
167
181
  - !ruby/object:Gem::Dependency
168
182
  name: sqlite3
169
183
  requirement: !ruby/object:Gem::Requirement
@@ -201,10 +215,11 @@ executables:
201
215
  extensions: []
202
216
  extra_rdoc_files: []
203
217
  files:
218
+ - ".github/workflows/build.yml"
219
+ - ".github/workflows/gem-push.yml"
204
220
  - ".gitignore"
205
221
  - ".rspec"
206
222
  - ".rubocop.yml"
207
- - ".travis.yml"
208
223
  - Appraisals
209
224
  - CHANGELOG.md
210
225
  - CODE_OF_CONDUCT.md
@@ -217,7 +232,6 @@ files:
217
232
  - bin/console
218
233
  - bin/setup
219
234
  - config/database.yml
220
- - config/database.yml.travis
221
235
  - docker-compose.yml
222
236
  - gemfiles/.bundle/config
223
237
  - gemfiles/activerecord_5.gemfile
@@ -237,21 +251,24 @@ files:
237
251
  - lib/active_record_data_loader/active_record/text_value_generator.rb
238
252
  - lib/active_record_data_loader/bulk_insert_strategy.rb
239
253
  - lib/active_record_data_loader/configuration.rb
254
+ - lib/active_record_data_loader/connection_handler.rb
255
+ - lib/active_record_data_loader/connection_output_adapter.rb
240
256
  - lib/active_record_data_loader/copy_strategy.rb
241
257
  - lib/active_record_data_loader/data_faker.rb
242
258
  - lib/active_record_data_loader/dsl/belongs_to_association.rb
243
259
  - lib/active_record_data_loader/dsl/definition.rb
244
260
  - lib/active_record_data_loader/dsl/model.rb
245
261
  - lib/active_record_data_loader/dsl/polymorphic_association.rb
262
+ - lib/active_record_data_loader/file_output_adapter.rb
246
263
  - lib/active_record_data_loader/loader.rb
247
264
  - lib/active_record_data_loader/version.rb
248
265
  - log/.keep
249
- homepage:
266
+ homepage:
250
267
  licenses:
251
268
  - MIT
252
269
  metadata:
253
270
  source_code_uri: https://github.com/abeiderman/active_record_data_loader
254
- post_install_message:
271
+ post_install_message:
255
272
  rdoc_options: []
256
273
  require_paths:
257
274
  - lib
@@ -266,8 +283,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
266
283
  - !ruby/object:Gem::Version
267
284
  version: '0'
268
285
  requirements: []
269
- rubygems_version: 3.1.4
270
- signing_key:
286
+ rubygems_version: 3.0.3.1
287
+ signing_key:
271
288
  specification_version: 4
272
289
  summary: A utility to bulk load test data for performance testing.
273
290
  test_files: []
data/.travis.yml DELETED
@@ -1,24 +0,0 @@
1
- sudo: false
2
- language: ruby
3
- env:
4
- - COVERALLS_PARALLEL=true
5
- rvm:
6
- - 2.5.9
7
- - 2.6.7
8
- - 2.7.3
9
- gemfile:
10
- - gemfiles/activerecord_5.gemfile
11
- - gemfiles/rails.gemfile
12
- - gemfiles/faker.gemfile
13
- - gemfiles/ffaker.gemfile
14
- services:
15
- - postgresql
16
- - mysql
17
- notifications:
18
- webhooks: https://coveralls.io/webhook
19
- before_install: "gem update --system && gem install bundler"
20
- before_script:
21
- - psql -c 'create database test;' -U postgres
22
- - mysql -e 'CREATE DATABASE IF NOT EXISTS test;'
23
- - cp config/database.yml.travis config/database.yml
24
- script: "bundle exec rake"
@@ -1,12 +0,0 @@
1
- postgres:
2
- adapter: "postgresql"
3
- database: "test"
4
-
5
- sqlite3:
6
- adapter: "sqlite3"
7
- database: ":memory:"
8
-
9
- mysql:
10
- adapter: "mysql2"
11
- database: "test"
12
- username: "travis"