active_record_data_loader 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Appraisals +0 -4
- data/Gemfile.lock +1 -1
- data/README.md +2 -1
- data/Rakefile +2 -0
- data/lib/active_record_data_loader.rb +4 -2
- data/lib/active_record_data_loader/active_record/belongs_to_configuration.rb +13 -4
- data/lib/active_record_data_loader/active_record/model_data_generator.rb +6 -3
- data/lib/active_record_data_loader/bulk_insert_strategy.rb +1 -6
- data/lib/active_record_data_loader/configuration.rb +4 -2
- data/lib/active_record_data_loader/copy_strategy.rb +11 -14
- data/lib/active_record_data_loader/dsl/belongs_to_association.rb +15 -0
- data/lib/active_record_data_loader/dsl/model.rb +6 -1
- data/lib/active_record_data_loader/loader.rb +32 -9
- data/lib/active_record_data_loader/version.rb +1 -1
- data/script/ci_build.sh +2 -4
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 43867d09da0479be24e9a23687d53cd4137f5ff8b47f769183a9bee19d6f8372
|
4
|
+
data.tar.gz: 71ea6ea8d1cbc9821168a0f87e90c7e6e08f2b2053c48905bb0bcfe95f5d37f0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2523c1db84ecd726ba333369646a0fe1e9b6ee8e0e0dc5694e2b8a8ef693fe752c8685914d122c7d9561dd05ff3c1d1905a328445a7407c82596b72cf0f5e9ff
|
7
|
+
data.tar.gz: 3c95553f3b39a535739e3fd998f67ca0c5420e37411d229b9284c0ccc99c9b7690d56a835b8fdaadfddc6f8750604f29852f77198b5a178c4cbab00bf3d80a1f
|
data/Appraisals
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
# ActiveRecord Data Loader
|
2
2
|
|
3
|
+
[](https://badge.fury.io/rb/active_record_data_loader)
|
3
4
|
[](https://travis-ci.org/abeiderman/active_record_data_loader)
|
4
|
-
[](https://coveralls.io/github/abeiderman/active_record_data_loader?branch=master)
|
5
|
+
[](https://coveralls.io/github/abeiderman/active_record_data_loader?branch=master)
|
5
6
|
[](https://codeclimate.com/github/abeiderman/active_record_data_loader/maintainability)
|
6
7
|
|
7
8
|
Efficiently bulk load data for your ActiveRecord models with a simple DSL.
|
data/Rakefile
CHANGED
@@ -11,6 +11,7 @@ require "active_record_data_loader/active_record/column_configuration"
|
|
11
11
|
require "active_record_data_loader/active_record/belongs_to_configuration"
|
12
12
|
require "active_record_data_loader/active_record/polymorphic_belongs_to_configuration"
|
13
13
|
require "active_record_data_loader/active_record/model_data_generator"
|
14
|
+
require "active_record_data_loader/dsl/belongs_to_association"
|
14
15
|
require "active_record_data_loader/dsl/polymorphic_association"
|
15
16
|
require "active_record_data_loader/dsl/model"
|
16
17
|
require "active_record_data_loader/dsl/definition"
|
@@ -45,14 +46,15 @@ module ActiveRecordDataLoader
|
|
45
46
|
generator = ActiveRecordDataLoader::ActiveRecord::ModelDataGenerator.new(
|
46
47
|
model: m.klass,
|
47
48
|
column_settings: m.columns,
|
48
|
-
polymorphic_settings: m.polymorphic_associations
|
49
|
+
polymorphic_settings: m.polymorphic_associations,
|
50
|
+
belongs_to_settings: m.belongs_to_associations
|
49
51
|
)
|
50
52
|
|
51
53
|
ActiveRecordDataLoader::Loader.load_data(
|
52
54
|
data_generator: generator,
|
53
55
|
batch_size: m.batch_size,
|
54
56
|
total_rows: m.row_count,
|
55
|
-
|
57
|
+
configuration: configuration
|
56
58
|
)
|
57
59
|
end
|
58
60
|
end
|
@@ -3,14 +3,15 @@
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
module ActiveRecord
|
5
5
|
class BelongsToConfiguration
|
6
|
-
def self.config_for(ar_association:)
|
6
|
+
def self.config_for(ar_association:, query: nil)
|
7
7
|
raise "#{name} does not support polymorphic associations" if ar_association.polymorphic?
|
8
8
|
|
9
|
-
{ ar_association.join_foreign_key.to_sym => new(ar_association).foreign_key_func }
|
9
|
+
{ ar_association.join_foreign_key.to_sym => new(ar_association, query).foreign_key_func }
|
10
10
|
end
|
11
11
|
|
12
|
-
def initialize(ar_association)
|
12
|
+
def initialize(ar_association, query)
|
13
13
|
@ar_association = ar_association
|
14
|
+
@query = query
|
14
15
|
end
|
15
16
|
|
16
17
|
def foreign_key_func
|
@@ -20,7 +21,15 @@ module ActiveRecordDataLoader
|
|
20
21
|
private
|
21
22
|
|
22
23
|
def possible_values
|
23
|
-
@possible_values ||=
|
24
|
+
@possible_values ||= base_query.pluck(@ar_association.join_primary_key).to_a
|
25
|
+
end
|
26
|
+
|
27
|
+
def base_query
|
28
|
+
if @query&.respond_to?(:call)
|
29
|
+
@query.call.all
|
30
|
+
else
|
31
|
+
@ar_association.klass.all
|
32
|
+
end
|
24
33
|
end
|
25
34
|
end
|
26
35
|
end
|
@@ -5,11 +5,12 @@ module ActiveRecordDataLoader
|
|
5
5
|
class ModelDataGenerator
|
6
6
|
attr_reader :table
|
7
7
|
|
8
|
-
def initialize(model:, column_settings:, polymorphic_settings: [])
|
8
|
+
def initialize(model:, column_settings:, polymorphic_settings: [], belongs_to_settings: [])
|
9
9
|
@model_class = model
|
10
10
|
@table = model.table_name
|
11
|
-
@polymorphic_settings = polymorphic_settings
|
12
11
|
@column_settings = column_settings
|
12
|
+
@polymorphic_settings = polymorphic_settings
|
13
|
+
@belongs_to_settings = belongs_to_settings.map { |s| [s.name, s.query] }.to_h
|
13
14
|
end
|
14
15
|
|
15
16
|
def column_list
|
@@ -58,7 +59,9 @@ module ActiveRecordDataLoader
|
|
58
59
|
.reflect_on_all_associations
|
59
60
|
.select(&:belongs_to?)
|
60
61
|
.reject(&:polymorphic?)
|
61
|
-
.map
|
62
|
+
.map do |assoc|
|
63
|
+
BelongsToConfiguration.config_for(ar_association: assoc, query: @belongs_to_settings[assoc.name])
|
64
|
+
end
|
62
65
|
.reduce({}, :merge)
|
63
66
|
end
|
64
67
|
|
@@ -1,17 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "csv"
|
4
|
-
require "benchmark"
|
5
|
-
|
6
3
|
module ActiveRecordDataLoader
|
7
4
|
class BulkInsertStrategy
|
8
5
|
def initialize(data_generator)
|
9
6
|
@data_generator = data_generator
|
10
7
|
end
|
11
8
|
|
12
|
-
def load_batch(row_numbers)
|
13
|
-
connection = ::ActiveRecord::Base.connection
|
14
|
-
|
9
|
+
def load_batch(row_numbers, connection)
|
15
10
|
connection.insert(<<~SQL)
|
16
11
|
INSERT INTO #{quoted_table_name(connection)} (#{column_list(connection)})
|
17
12
|
VALUES #{values(row_numbers, connection)}
|
@@ -2,16 +2,18 @@
|
|
2
2
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
class Configuration
|
5
|
-
attr_accessor :default_batch_size, :default_row_count, :logger
|
5
|
+
attr_accessor :default_batch_size, :default_row_count, :logger, :statement_timeout
|
6
6
|
|
7
7
|
def initialize(
|
8
8
|
default_batch_size: 100_000,
|
9
9
|
default_row_count: 1,
|
10
|
-
logger: Logger.new(STDOUT, level: :info)
|
10
|
+
logger: Logger.new(STDOUT, level: :info),
|
11
|
+
statement_timeout: "2min"
|
11
12
|
)
|
12
13
|
@default_batch_size = default_batch_size
|
13
14
|
@default_row_count = default_row_count
|
14
15
|
@logger = logger
|
16
|
+
@statement_timeout = statement_timeout
|
15
17
|
end
|
16
18
|
end
|
17
19
|
end
|
@@ -1,19 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "csv"
|
4
|
-
require "benchmark"
|
5
|
-
|
6
3
|
module ActiveRecordDataLoader
|
7
4
|
class CopyStrategy
|
8
5
|
def initialize(data_generator)
|
9
6
|
@data_generator = data_generator
|
10
7
|
end
|
11
8
|
|
12
|
-
def load_batch(row_numbers)
|
13
|
-
csv_data = csv_data_batch(row_numbers)
|
9
|
+
def load_batch(row_numbers, connection)
|
10
|
+
csv_data = csv_data_batch(row_numbers, connection)
|
14
11
|
|
15
|
-
|
16
|
-
|
12
|
+
raw_connection = connection.raw_connection
|
13
|
+
raw_connection.copy_data(copy_command(connection)) { raw_connection.put_copy_data(csv_data) }
|
17
14
|
end
|
18
15
|
|
19
16
|
def table_name
|
@@ -28,18 +25,18 @@ module ActiveRecordDataLoader
|
|
28
25
|
|
29
26
|
attr_reader :data_generator
|
30
27
|
|
31
|
-
def csv_data_batch(row_numbers)
|
28
|
+
def csv_data_batch(row_numbers, connection)
|
32
29
|
row_numbers.map do |i|
|
33
|
-
data_generator.generate_row(i).map { |d| quote_data(d) }.join(",")
|
30
|
+
data_generator.generate_row(i).map { |d| quote_data(d, connection) }.join(",")
|
34
31
|
end.join("\n")
|
35
32
|
end
|
36
33
|
|
37
|
-
def copy_command
|
34
|
+
def copy_command(connection)
|
38
35
|
@copy_command ||= begin
|
39
|
-
quoted_table_name =
|
36
|
+
quoted_table_name = connection.quote_table_name(data_generator.table)
|
40
37
|
columns = data_generator
|
41
38
|
.column_list
|
42
|
-
.map { |c|
|
39
|
+
.map { |c| connection.quote_column_name(c) }
|
43
40
|
.join(", ")
|
44
41
|
|
45
42
|
<<~SQL
|
@@ -49,10 +46,10 @@ module ActiveRecordDataLoader
|
|
49
46
|
end
|
50
47
|
end
|
51
48
|
|
52
|
-
def quote_data(data)
|
49
|
+
def quote_data(data, connection)
|
53
50
|
return if data.nil?
|
54
51
|
|
55
|
-
"\"#{
|
52
|
+
"\"#{connection.quote_string(data.to_s)}\""
|
56
53
|
end
|
57
54
|
end
|
58
55
|
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ActiveRecordDataLoader
|
4
|
+
module Dsl
|
5
|
+
class BelongsToAssociation
|
6
|
+
attr_reader :model_class, :name, :query
|
7
|
+
|
8
|
+
def initialize(model_class, name, query = nil)
|
9
|
+
@model_class = model_class
|
10
|
+
@name = name
|
11
|
+
@query = query
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -3,7 +3,7 @@
|
|
3
3
|
module ActiveRecordDataLoader
|
4
4
|
module Dsl
|
5
5
|
class Model
|
6
|
-
attr_reader :klass, :columns, :row_count, :polymorphic_associations
|
6
|
+
attr_reader :klass, :columns, :row_count, :polymorphic_associations, :belongs_to_associations
|
7
7
|
|
8
8
|
def initialize(klass:, configuration:)
|
9
9
|
@klass = klass
|
@@ -11,6 +11,7 @@ module ActiveRecordDataLoader
|
|
11
11
|
@row_count = configuration.default_row_count
|
12
12
|
@batch_size = configuration.default_batch_size
|
13
13
|
@polymorphic_associations = []
|
14
|
+
@belongs_to_associations = []
|
14
15
|
end
|
15
16
|
|
16
17
|
def count(count)
|
@@ -30,6 +31,10 @@ module ActiveRecordDataLoader
|
|
30
31
|
@klass, assoc_name
|
31
32
|
).tap { |a| block.call(a) }
|
32
33
|
end
|
34
|
+
|
35
|
+
def belongs_to(assoc_name, eligible_set:)
|
36
|
+
@belongs_to_associations << BelongsToAssociation.new(@klass, assoc_name, eligible_set)
|
37
|
+
end
|
33
38
|
end
|
34
39
|
end
|
35
40
|
end
|
@@ -9,10 +9,11 @@ module ActiveRecordDataLoader
|
|
9
9
|
data_generator:,
|
10
10
|
total_rows:,
|
11
11
|
batch_size:,
|
12
|
-
|
12
|
+
configuration:
|
13
13
|
)
|
14
14
|
new(
|
15
|
-
logger: logger,
|
15
|
+
logger: configuration.logger,
|
16
|
+
statement_timeout: configuration.statement_timeout,
|
16
17
|
strategy: strategy_class.new(data_generator)
|
17
18
|
).load_data(batch_size, total_rows)
|
18
19
|
end
|
@@ -28,9 +29,10 @@ module ActiveRecordDataLoader
|
|
28
29
|
end
|
29
30
|
end
|
30
31
|
|
31
|
-
def initialize(logger:, strategy:)
|
32
|
+
def initialize(logger:, statement_timeout:, strategy:)
|
32
33
|
@logger = logger
|
33
34
|
@strategy = strategy
|
35
|
+
@statement_timeout = statement_timeout
|
34
36
|
end
|
35
37
|
|
36
38
|
def load_data(batch_size, total_rows)
|
@@ -51,16 +53,37 @@ module ActiveRecordDataLoader
|
|
51
53
|
|
52
54
|
private
|
53
55
|
|
54
|
-
attr_reader :strategy, :logger
|
56
|
+
attr_reader :strategy, :statement_timeout, :logger
|
55
57
|
|
56
58
|
def load_in_batches(batch_size, total_rows, batch_count)
|
57
|
-
|
58
|
-
|
59
|
+
with_connection do |connection|
|
60
|
+
total_rows.times.each_slice(batch_size).with_index do |row_numbers, i|
|
61
|
+
time = Benchmark.realtime { strategy.load_batch(row_numbers, connection) }
|
59
62
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
+
logger.debug(
|
64
|
+
"Completed batch #{i + 1}/#{batch_count}, #{row_numbers.count} row(s) in #{time} seconds"
|
65
|
+
)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def with_connection
|
71
|
+
if ::ActiveRecord::Base.connection.adapter_name.downcase.to_sym == :postgresql
|
72
|
+
original_timeout = retrieve_statement_timeout
|
73
|
+
update_statement_timeout(statement_timeout)
|
74
|
+
yield ::ActiveRecord::Base.connection
|
75
|
+
update_statement_timeout(original_timeout)
|
76
|
+
else
|
77
|
+
yield ::ActiveRecord::Base.connection
|
63
78
|
end
|
64
79
|
end
|
80
|
+
|
81
|
+
def retrieve_statement_timeout
|
82
|
+
::ActiveRecord::Base.connection.execute("SHOW statement_timeout").first["statement_timeout"]
|
83
|
+
end
|
84
|
+
|
85
|
+
def update_statement_timeout(timeout)
|
86
|
+
::ActiveRecord::Base.connection.execute("SET statement_timeout = \"#{timeout}\"")
|
87
|
+
end
|
65
88
|
end
|
66
89
|
end
|
data/script/ci_build.sh
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: active_record_data_loader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alejandro Beiderman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-06-
|
11
|
+
date: 2019-06-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -206,6 +206,7 @@ files:
|
|
206
206
|
- lib/active_record_data_loader/configuration.rb
|
207
207
|
- lib/active_record_data_loader/copy_strategy.rb
|
208
208
|
- lib/active_record_data_loader/data_faker.rb
|
209
|
+
- lib/active_record_data_loader/dsl/belongs_to_association.rb
|
209
210
|
- lib/active_record_data_loader/dsl/definition.rb
|
210
211
|
- lib/active_record_data_loader/dsl/model.rb
|
211
212
|
- lib/active_record_data_loader/dsl/polymorphic_association.rb
|