bulk_insert2 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/MIT-LICENSE +20 -0
- data/README.md +199 -0
- data/Rakefile +29 -0
- data/lib/bulk_insert.rb +38 -0
- data/lib/bulk_insert/statement_adapters.rb +22 -0
- data/lib/bulk_insert/statement_adapters/base_adapter.rb +21 -0
- data/lib/bulk_insert/statement_adapters/generic_adapter.rb +19 -0
- data/lib/bulk_insert/statement_adapters/mysql_adapter.rb +24 -0
- data/lib/bulk_insert/statement_adapters/postgresql_adapter.rb +28 -0
- data/lib/bulk_insert/statement_adapters/sqlite_adapter.rb +19 -0
- data/lib/bulk_insert/version.rb +7 -0
- data/lib/bulk_insert/worker.rb +136 -0
- data/test/bulk_insert/worker_test.rb +459 -0
- data/test/bulk_insert_test.rb +52 -0
- data/test/dummy/README.rdoc +28 -0
- data/test/dummy/Rakefile +6 -0
- data/test/dummy/app/assets/javascripts/application.js +13 -0
- data/test/dummy/app/assets/stylesheets/application.css +15 -0
- data/test/dummy/app/controllers/application_controller.rb +5 -0
- data/test/dummy/app/helpers/application_helper.rb +2 -0
- data/test/dummy/app/models/testing.rb +2 -0
- data/test/dummy/app/views/layouts/application.html.erb +14 -0
- data/test/dummy/bin/bundle +3 -0
- data/test/dummy/bin/rails +4 -0
- data/test/dummy/bin/rake +4 -0
- data/test/dummy/bin/setup +29 -0
- data/test/dummy/config.ru +4 -0
- data/test/dummy/config/application.rb +24 -0
- data/test/dummy/config/boot.rb +5 -0
- data/test/dummy/config/database.yml +25 -0
- data/test/dummy/config/environment.rb +5 -0
- data/test/dummy/config/environments/development.rb +41 -0
- data/test/dummy/config/environments/production.rb +79 -0
- data/test/dummy/config/environments/test.rb +42 -0
- data/test/dummy/config/initializers/assets.rb +11 -0
- data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
- data/test/dummy/config/initializers/cookies_serializer.rb +3 -0
- data/test/dummy/config/initializers/filter_parameter_logging.rb +4 -0
- data/test/dummy/config/initializers/inflections.rb +16 -0
- data/test/dummy/config/initializers/mime_types.rb +4 -0
- data/test/dummy/config/initializers/session_store.rb +3 -0
- data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
- data/test/dummy/config/locales/en.yml +23 -0
- data/test/dummy/config/routes.rb +56 -0
- data/test/dummy/config/secrets.yml +22 -0
- data/test/dummy/db/migrate/20151008181535_create_testings.rb +11 -0
- data/test/dummy/db/migrate/20151028194232_add_default_value.rb +5 -0
- data/test/dummy/db/schema.rb +25 -0
- data/test/dummy/public/404.html +67 -0
- data/test/dummy/public/422.html +67 -0
- data/test/dummy/public/500.html +66 -0
- data/test/dummy/public/favicon.ico +0 -0
- data/test/test_helper.rb +19 -0
- metadata +181 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3e6a484a5e8f122b794605f2664ca20a5d0c887cf78f05581bf40c2aaaa61970
|
4
|
+
data.tar.gz: 4196d55dfd00c9d571bf8608257807a7fe4d290b4dc90b67fb390f66a58a9303
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 457740006d28c583a70f71ba63c4abf81e4bd875f9fec9dc468f35605a558fc4b610d5a09f921f7e57fbbf9759aece0a008dbc6095a2f7e406ae196580517978
|
7
|
+
data.tar.gz: d338d37cabb673e8417273832229e252988648a8e5d653b428bc77526797ce21789fc2673b519506a159f1319249827d6701d15d13448b558b8f432be55d9ac3
|
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright 2015 Jamis Buck
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,199 @@
|
|
1
|
+
### Replaced type\_cast\_from\_column to lookup\_cast\_type\_from\_column
|
2
|
+
|
3
|
+
# BulkInsert
|
4
|
+
|
5
|
+
A little ActiveRecord extension for helping to insert lots of rows in a
|
6
|
+
single insert statement.
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
Add it to your Gemfile:
|
11
|
+
|
12
|
+
```ruby
|
13
|
+
gem 'bulk_insert'
|
14
|
+
```
|
15
|
+
|
16
|
+
## Usage
|
17
|
+
|
18
|
+
BulkInsert adds a new class method to your ActiveRecord models:
|
19
|
+
|
20
|
+
```ruby
|
21
|
+
class Book < ActiveRecord::Base
|
22
|
+
end
|
23
|
+
|
24
|
+
book_attrs = ... # some array of hashes, for instance
|
25
|
+
Book.bulk_insert do |worker|
|
26
|
+
book_attrs.each do |attrs|
|
27
|
+
worker.add(attrs)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
```
|
31
|
+
|
32
|
+
All of those `#add` calls will be accumulated into a single SQL insert
|
33
|
+
statement, vastly improving the performance of multiple sequential
|
34
|
+
inserts (think data imports and the like).
|
35
|
+
|
36
|
+
If you don't like using a block API, you can also simply pass an array
|
37
|
+
of rows to be inserted:
|
38
|
+
|
39
|
+
```ruby
|
40
|
+
book_attrs = ... # some array of hashes, for instance
|
41
|
+
Book.bulk_insert values: book_attrs
|
42
|
+
```
|
43
|
+
|
44
|
+
By default, the columns to be inserted will be all columns in the table,
|
45
|
+
minus the `id` column, but if you want, you can explicitly enumerate
|
46
|
+
the columns:
|
47
|
+
|
48
|
+
```ruby
|
49
|
+
Book.bulk_insert(:title, :author) do |worker|
|
50
|
+
# specify a row as an array of values...
|
51
|
+
worker.add ["Eye of the World", "Robert Jordan"]
|
52
|
+
|
53
|
+
# or as a hash
|
54
|
+
worker.add title: "Lord of Light", author: "Roger Zelazny"
|
55
|
+
end
|
56
|
+
```
|
57
|
+
|
58
|
+
It will automatically set `created_at`/`updated_at` columns to the current
|
59
|
+
date, as well.
|
60
|
+
|
61
|
+
```ruby
|
62
|
+
Book.bulk_insert(:title, :author, :created_at, :updated_at) do |worker|
|
63
|
+
# specify created_at/updated_at explicitly...
|
64
|
+
worker.add ["The Chosen", "Chaim Potok", Time.now, Time.now]
|
65
|
+
|
66
|
+
# or let BulkInsert set them by default...
|
67
|
+
worker.add ["Hello Ruby", "Linda Liukas"]
|
68
|
+
end
|
69
|
+
```
|
70
|
+
|
71
|
+
Similarly, if a value is omitted, BulkInsert will use whatever default
|
72
|
+
value is defined for that column in the database:
|
73
|
+
|
74
|
+
```ruby
|
75
|
+
# create_table :books do |t|
|
76
|
+
# ...
|
77
|
+
# t.string "medium", default: "paper"
|
78
|
+
# ...
|
79
|
+
# end
|
80
|
+
|
81
|
+
Book.bulk_insert(:title, :author, :medium) do |worker|
|
82
|
+
worker.add title: "Ender's Game", author: "Orson Scott Card"
|
83
|
+
end
|
84
|
+
|
85
|
+
Book.first.medium #-> "paper"
|
86
|
+
```
|
87
|
+
|
88
|
+
By default, the batch is always saved when the block finishes, but you
|
89
|
+
can explicitly save inside the block whenever you want, by calling
|
90
|
+
`#save!` on the worker:
|
91
|
+
|
92
|
+
```ruby
|
93
|
+
Book.bulk_insert do |worker|
|
94
|
+
worker.add(...)
|
95
|
+
worker.add(...)
|
96
|
+
|
97
|
+
worker.save!
|
98
|
+
|
99
|
+
worker.add(...)
|
100
|
+
#...
|
101
|
+
end
|
102
|
+
```
|
103
|
+
|
104
|
+
That will save the batch as it has been defined to that point, and then
|
105
|
+
empty the batch so that you can add more rows to it if you want. Note
|
106
|
+
that all records saved together will have the same created_at/updated_at
|
107
|
+
timestamp (unless one was explicitly set).
|
108
|
+
|
109
|
+
### Batch Set Size
|
110
|
+
|
111
|
+
By default, the size of the insert is limited to 500 rows at a time.
|
112
|
+
This is called the _set size_. If you add another row that causes the
|
113
|
+
set to exceed the set size, the insert statement is automatically built
|
114
|
+
and executed, and the batch is reset.
|
115
|
+
|
116
|
+
If you want a larger (or smaller) set size, you can specify it in
|
117
|
+
two ways:
|
118
|
+
|
119
|
+
```ruby
|
120
|
+
# specify set_size when initializing the bulk insert...
|
121
|
+
Book.bulk_insert(set_size: 100) do |worker|
|
122
|
+
# ...
|
123
|
+
end
|
124
|
+
|
125
|
+
# specify it on the worker directly...
|
126
|
+
Book.bulk_insert do |worker|
|
127
|
+
worker.set_size = 100
|
128
|
+
# ...
|
129
|
+
end
|
130
|
+
```
|
131
|
+
|
132
|
+
### Insert Ignore
|
133
|
+
|
134
|
+
By default, when an insert fails the whole batch of inserts fail. The
|
135
|
+
_ignore_ option ignores the inserts that would have failed (because of
|
136
|
+
duplicate keys or a null in column with a not null constraint) and
|
137
|
+
inserts the rest of the batch.
|
138
|
+
|
139
|
+
This is not the default because no errors are raised for the bad
|
140
|
+
inserts in the batch.
|
141
|
+
|
142
|
+
```ruby
|
143
|
+
destination_columns = [:title, :author]
|
144
|
+
|
145
|
+
# Ignore bad inserts in the batch
|
146
|
+
Book.bulk_insert(*destination_columns, ignore: true) do |worker|
|
147
|
+
worker.add(...)
|
148
|
+
worker.add(...)
|
149
|
+
# ...
|
150
|
+
end
|
151
|
+
```
|
152
|
+
|
153
|
+
### Update Duplicates (MySQL, PostgreSQL)
|
154
|
+
|
155
|
+
If you don't want to ignore duplicate rows but instead want to update them
|
156
|
+
then you can use the _update_duplicates_ option. Set this option to true
|
157
|
+
(MySQL) or list unique column names (PostgreSQL) and when a duplicate row
|
158
|
+
is found the row will be updated with your new values.
|
159
|
+
Default value for this option is false.
|
160
|
+
|
161
|
+
```ruby
|
162
|
+
destination_columns = [:title, :author]
|
163
|
+
|
164
|
+
# Update duplicate rows (MySQL)
|
165
|
+
Book.bulk_insert(*destination_columns, update_duplicates: true) do |worker|
|
166
|
+
worker.add(...)
|
167
|
+
worker.add(...)
|
168
|
+
# ...
|
169
|
+
end
|
170
|
+
|
171
|
+
# Update duplicate rows (PostgreSQL)
|
172
|
+
Book.bulk_insert(*destination_columns, update_duplicates: %w[title]) do |worker|
|
173
|
+
worker.add(...)
|
174
|
+
# ...
|
175
|
+
end
|
176
|
+
```
|
177
|
+
|
178
|
+
### Return Primary Keys (PostgreSQL, PostGIS)
|
179
|
+
|
180
|
+
If you want the worker to store primary keys of inserted records, then you can
|
181
|
+
use the _return_primary_keys_ option. The worker will store a `result_sets`
|
182
|
+
array of `ActiveRecord::Result` objects. Each `ActiveRecord::Result` object
|
183
|
+
will contain the primary keys of a batch of inserted records.
|
184
|
+
|
185
|
+
```ruby
|
186
|
+
worker = Book.bulk_insert(*destination_columns, return_primary_keys: true) do
|
187
|
+
|worker|
|
188
|
+
worker.add(...)
|
189
|
+
worker.add(...)
|
190
|
+
# ...
|
191
|
+
end
|
192
|
+
|
193
|
+
worker.result_sets
|
194
|
+
```
|
195
|
+
|
196
|
+
## License
|
197
|
+
|
198
|
+
BulkInsert is released under the MIT license (see MIT-LICENSE) by
|
199
|
+
Jamis Buck (jamis@jamisbuck.org).
|
data/Rakefile
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
begin
|
2
|
+
require 'bundler/setup'
|
3
|
+
rescue LoadError
|
4
|
+
puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'rdoc/task'
|
8
|
+
|
9
|
+
RDoc::Task.new(:rdoc) do |rdoc|
|
10
|
+
rdoc.rdoc_dir = 'rdoc'
|
11
|
+
rdoc.title = 'BulkInsert'
|
12
|
+
rdoc.options << '--line-numbers'
|
13
|
+
rdoc.rdoc_files.include('README.rdoc')
|
14
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
15
|
+
end
|
16
|
+
|
17
|
+
Bundler::GemHelper.install_tasks
|
18
|
+
|
19
|
+
require 'rake/testtask'
|
20
|
+
|
21
|
+
Rake::TestTask.new(:test) do |t|
|
22
|
+
t.libs << 'lib'
|
23
|
+
t.libs << 'test'
|
24
|
+
t.pattern = 'test/**/*_test.rb'
|
25
|
+
t.verbose = false
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
task default: :test
|
data/lib/bulk_insert.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'bulk_insert/worker'
|
2
|
+
|
3
|
+
module BulkInsert
|
4
|
+
extend ActiveSupport::Concern
|
5
|
+
|
6
|
+
module ClassMethods
|
7
|
+
def bulk_insert(*columns, values: nil, set_size:500, ignore: false, update_duplicates: false, return_primary_keys: false)
|
8
|
+
columns = default_bulk_columns if columns.empty?
|
9
|
+
worker = BulkInsert::Worker.new(connection, table_name, primary_key, columns, set_size, ignore, update_duplicates, return_primary_keys)
|
10
|
+
|
11
|
+
if values.present?
|
12
|
+
transaction do
|
13
|
+
worker.add_all(values)
|
14
|
+
worker.save!
|
15
|
+
end
|
16
|
+
nil
|
17
|
+
elsif block_given?
|
18
|
+
transaction do
|
19
|
+
yield worker
|
20
|
+
worker.save!
|
21
|
+
end
|
22
|
+
nil
|
23
|
+
else
|
24
|
+
worker
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# helper method for preparing the columns before a call to :bulk_insert
|
29
|
+
def default_bulk_columns
|
30
|
+
self.column_names - %w(id)
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
ActiveSupport.on_load(:active_record) do
|
37
|
+
send(:include, BulkInsert)
|
38
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require_relative 'statement_adapters/generic_adapter'
|
2
|
+
require_relative 'statement_adapters/mysql_adapter'
|
3
|
+
require_relative 'statement_adapters/postgresql_adapter'
|
4
|
+
require_relative 'statement_adapters/sqlite_adapter'
|
5
|
+
|
6
|
+
module BulkInsert
|
7
|
+
module StatementAdapters
|
8
|
+
def adapter_for(connection)
|
9
|
+
case connection.adapter_name
|
10
|
+
when /^mysql/i
|
11
|
+
MySQLAdapter.new
|
12
|
+
when /\APost(?:greSQL|GIS)/i
|
13
|
+
PostgreSQLAdapter.new
|
14
|
+
when /\ASQLite/i
|
15
|
+
SQLiteAdapter.new
|
16
|
+
else
|
17
|
+
GenericAdapter.new
|
18
|
+
end
|
19
|
+
end
|
20
|
+
module_function :adapter_for
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module BulkInsert
|
2
|
+
module StatementAdapters
|
3
|
+
class BaseAdapter
|
4
|
+
def initialize
|
5
|
+
raise "You cannot initialize base adapter" if self.class == BaseAdapter
|
6
|
+
end
|
7
|
+
|
8
|
+
def insert_ignore_statement
|
9
|
+
raise "Not implemented"
|
10
|
+
end
|
11
|
+
|
12
|
+
def on_conflict_statement(_columns, _ignore, _update_duplicates)
|
13
|
+
raise "Not implemented"
|
14
|
+
end
|
15
|
+
|
16
|
+
def primary_key_return_statement(_primary_key)
|
17
|
+
raise "Not implemented"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require_relative 'base_adapter'
|
2
|
+
|
3
|
+
module BulkInsert
|
4
|
+
module StatementAdapters
|
5
|
+
class GenericAdapter < BaseAdapter
|
6
|
+
def insert_ignore_statement
|
7
|
+
''
|
8
|
+
end
|
9
|
+
|
10
|
+
def on_conflict_statement(_columns, _ignore, _update_duplicates)
|
11
|
+
''
|
12
|
+
end
|
13
|
+
|
14
|
+
def primary_key_return_statement(_primary_key)
|
15
|
+
''
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require_relative 'base_adapter'
|
2
|
+
|
3
|
+
module BulkInsert
|
4
|
+
module StatementAdapters
|
5
|
+
class MySQLAdapter < BaseAdapter
|
6
|
+
def insert_ignore_statement
|
7
|
+
'IGNORE'
|
8
|
+
end
|
9
|
+
|
10
|
+
def on_conflict_statement(columns, _ignore, update_duplicates)
|
11
|
+
return '' unless update_duplicates
|
12
|
+
|
13
|
+
update_values = columns.map do |column|
|
14
|
+
"`#{column.name}`=VALUES(`#{column.name}`)"
|
15
|
+
end.join(', ')
|
16
|
+
' ON DUPLICATE KEY UPDATE ' + update_values
|
17
|
+
end
|
18
|
+
|
19
|
+
def primary_key_return_statement(_primary_key)
|
20
|
+
''
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require_relative 'base_adapter'
|
2
|
+
|
3
|
+
module BulkInsert
|
4
|
+
module StatementAdapters
|
5
|
+
class PostgreSQLAdapter < BaseAdapter
|
6
|
+
def insert_ignore_statement
|
7
|
+
''
|
8
|
+
end
|
9
|
+
|
10
|
+
def on_conflict_statement(columns, ignore, update_duplicates)
|
11
|
+
if ignore
|
12
|
+
' ON CONFLICT DO NOTHING'
|
13
|
+
elsif update_duplicates
|
14
|
+
update_values = columns.map do |column|
|
15
|
+
"#{column.name}=EXCLUDED.#{column.name}"
|
16
|
+
end.join(', ')
|
17
|
+
' ON CONFLICT(' + update_duplicates.join(', ') + ') DO UPDATE SET ' + update_values
|
18
|
+
else
|
19
|
+
''
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def primary_key_return_statement(primary_key)
|
24
|
+
" RETURNING #{primary_key}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require_relative 'base_adapter'
|
2
|
+
|
3
|
+
module BulkInsert
|
4
|
+
module StatementAdapters
|
5
|
+
class SQLiteAdapter < BaseAdapter
|
6
|
+
def insert_ignore_statement
|
7
|
+
'OR IGNORE'
|
8
|
+
end
|
9
|
+
|
10
|
+
def on_conflict_statement(_columns, _ignore, _update_duplicates)
|
11
|
+
''
|
12
|
+
end
|
13
|
+
|
14
|
+
def primary_key_return_statement(_primary_key)
|
15
|
+
''
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|