bulk_insert2 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/MIT-LICENSE +20 -0
- data/README.md +199 -0
- data/Rakefile +29 -0
- data/lib/bulk_insert.rb +38 -0
- data/lib/bulk_insert/statement_adapters.rb +22 -0
- data/lib/bulk_insert/statement_adapters/base_adapter.rb +21 -0
- data/lib/bulk_insert/statement_adapters/generic_adapter.rb +19 -0
- data/lib/bulk_insert/statement_adapters/mysql_adapter.rb +24 -0
- data/lib/bulk_insert/statement_adapters/postgresql_adapter.rb +28 -0
- data/lib/bulk_insert/statement_adapters/sqlite_adapter.rb +19 -0
- data/lib/bulk_insert/version.rb +7 -0
- data/lib/bulk_insert/worker.rb +136 -0
- data/test/bulk_insert/worker_test.rb +459 -0
- data/test/bulk_insert_test.rb +52 -0
- data/test/dummy/README.rdoc +28 -0
- data/test/dummy/Rakefile +6 -0
- data/test/dummy/app/assets/javascripts/application.js +13 -0
- data/test/dummy/app/assets/stylesheets/application.css +15 -0
- data/test/dummy/app/controllers/application_controller.rb +5 -0
- data/test/dummy/app/helpers/application_helper.rb +2 -0
- data/test/dummy/app/models/testing.rb +2 -0
- data/test/dummy/app/views/layouts/application.html.erb +14 -0
- data/test/dummy/bin/bundle +3 -0
- data/test/dummy/bin/rails +4 -0
- data/test/dummy/bin/rake +4 -0
- data/test/dummy/bin/setup +29 -0
- data/test/dummy/config.ru +4 -0
- data/test/dummy/config/application.rb +24 -0
- data/test/dummy/config/boot.rb +5 -0
- data/test/dummy/config/database.yml +25 -0
- data/test/dummy/config/environment.rb +5 -0
- data/test/dummy/config/environments/development.rb +41 -0
- data/test/dummy/config/environments/production.rb +79 -0
- data/test/dummy/config/environments/test.rb +42 -0
- data/test/dummy/config/initializers/assets.rb +11 -0
- data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
- data/test/dummy/config/initializers/cookies_serializer.rb +3 -0
- data/test/dummy/config/initializers/filter_parameter_logging.rb +4 -0
- data/test/dummy/config/initializers/inflections.rb +16 -0
- data/test/dummy/config/initializers/mime_types.rb +4 -0
- data/test/dummy/config/initializers/session_store.rb +3 -0
- data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
- data/test/dummy/config/locales/en.yml +23 -0
- data/test/dummy/config/routes.rb +56 -0
- data/test/dummy/config/secrets.yml +22 -0
- data/test/dummy/db/migrate/20151008181535_create_testings.rb +11 -0
- data/test/dummy/db/migrate/20151028194232_add_default_value.rb +5 -0
- data/test/dummy/db/schema.rb +25 -0
- data/test/dummy/public/404.html +67 -0
- data/test/dummy/public/422.html +67 -0
- data/test/dummy/public/500.html +66 -0
- data/test/dummy/public/favicon.ico +0 -0
- data/test/test_helper.rb +19 -0
- metadata +181 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3e6a484a5e8f122b794605f2664ca20a5d0c887cf78f05581bf40c2aaaa61970
|
4
|
+
data.tar.gz: 4196d55dfd00c9d571bf8608257807a7fe4d290b4dc90b67fb390f66a58a9303
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 457740006d28c583a70f71ba63c4abf81e4bd875f9fec9dc468f35605a558fc4b610d5a09f921f7e57fbbf9759aece0a008dbc6095a2f7e406ae196580517978
|
7
|
+
data.tar.gz: d338d37cabb673e8417273832229e252988648a8e5d653b428bc77526797ce21789fc2673b519506a159f1319249827d6701d15d13448b558b8f432be55d9ac3
|
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright 2015 Jamis Buck
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,199 @@
|
|
1
|
+
### Replaced type\_cast\_from\_column to lookup\_cast\_type\_from\_column
|
2
|
+
|
3
|
+
# BulkInsert
|
4
|
+
|
5
|
+
A little ActiveRecord extension for helping to insert lots of rows in a
|
6
|
+
single insert statement.
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
Add it to your Gemfile:
|
11
|
+
|
12
|
+
```ruby
|
13
|
+
gem 'bulk_insert'
|
14
|
+
```
|
15
|
+
|
16
|
+
## Usage
|
17
|
+
|
18
|
+
BulkInsert adds a new class method to your ActiveRecord models:
|
19
|
+
|
20
|
+
```ruby
|
21
|
+
class Book < ActiveRecord::Base
|
22
|
+
end
|
23
|
+
|
24
|
+
book_attrs = ... # some array of hashes, for instance
|
25
|
+
Book.bulk_insert do |worker|
|
26
|
+
book_attrs.each do |attrs|
|
27
|
+
worker.add(attrs)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
```
|
31
|
+
|
32
|
+
All of those `#add` calls will be accumulated into a single SQL insert
|
33
|
+
statement, vastly improving the performance of multiple sequential
|
34
|
+
inserts (think data imports and the like).
|
35
|
+
|
36
|
+
If you don't like using a block API, you can also simply pass an array
|
37
|
+
of rows to be inserted:
|
38
|
+
|
39
|
+
```ruby
|
40
|
+
book_attrs = ... # some array of hashes, for instance
|
41
|
+
Book.bulk_insert values: book_attrs
|
42
|
+
```
|
43
|
+
|
44
|
+
By default, the columns to be inserted will be all columns in the table,
|
45
|
+
minus the `id` column, but if you want, you can explicitly enumerate
|
46
|
+
the columns:
|
47
|
+
|
48
|
+
```ruby
|
49
|
+
Book.bulk_insert(:title, :author) do |worker|
|
50
|
+
# specify a row as an array of values...
|
51
|
+
worker.add ["Eye of the World", "Robert Jordan"]
|
52
|
+
|
53
|
+
# or as a hash
|
54
|
+
worker.add title: "Lord of Light", author: "Roger Zelazny"
|
55
|
+
end
|
56
|
+
```
|
57
|
+
|
58
|
+
It will automatically set `created_at`/`updated_at` columns to the current
|
59
|
+
date, as well.
|
60
|
+
|
61
|
+
```ruby
|
62
|
+
Book.bulk_insert(:title, :author, :created_at, :updated_at) do |worker|
|
63
|
+
# specify created_at/updated_at explicitly...
|
64
|
+
worker.add ["The Chosen", "Chaim Potok", Time.now, Time.now]
|
65
|
+
|
66
|
+
# or let BulkInsert set them by default...
|
67
|
+
worker.add ["Hello Ruby", "Linda Liukas"]
|
68
|
+
end
|
69
|
+
```
|
70
|
+
|
71
|
+
Similarly, if a value is omitted, BulkInsert will use whatever default
|
72
|
+
value is defined for that column in the database:
|
73
|
+
|
74
|
+
```ruby
|
75
|
+
# create_table :books do |t|
|
76
|
+
# ...
|
77
|
+
# t.string "medium", default: "paper"
|
78
|
+
# ...
|
79
|
+
# end
|
80
|
+
|
81
|
+
Book.bulk_insert(:title, :author, :medium) do |worker|
|
82
|
+
worker.add title: "Ender's Game", author: "Orson Scott Card"
|
83
|
+
end
|
84
|
+
|
85
|
+
Book.first.medium #-> "paper"
|
86
|
+
```
|
87
|
+
|
88
|
+
By default, the batch is always saved when the block finishes, but you
|
89
|
+
can explicitly save inside the block whenever you want, by calling
|
90
|
+
`#save!` on the worker:
|
91
|
+
|
92
|
+
```ruby
|
93
|
+
Book.bulk_insert do |worker|
|
94
|
+
worker.add(...)
|
95
|
+
worker.add(...)
|
96
|
+
|
97
|
+
worker.save!
|
98
|
+
|
99
|
+
worker.add(...)
|
100
|
+
#...
|
101
|
+
end
|
102
|
+
```
|
103
|
+
|
104
|
+
That will save the batch as it has been defined to that point, and then
|
105
|
+
empty the batch so that you can add more rows to it if you want. Note
|
106
|
+
that all records saved together will have the same created_at/updated_at
|
107
|
+
timestamp (unless one was explicitly set).
|
108
|
+
|
109
|
+
### Batch Set Size
|
110
|
+
|
111
|
+
By default, the size of the insert is limited to 500 rows at a time.
|
112
|
+
This is called the _set size_. If you add another row that causes the
|
113
|
+
set to exceed the set size, the insert statement is automatically built
|
114
|
+
and executed, and the batch is reset.
|
115
|
+
|
116
|
+
If you want a larger (or smaller) set size, you can specify it in
|
117
|
+
two ways:
|
118
|
+
|
119
|
+
```ruby
|
120
|
+
# specify set_size when initializing the bulk insert...
|
121
|
+
Book.bulk_insert(set_size: 100) do |worker|
|
122
|
+
# ...
|
123
|
+
end
|
124
|
+
|
125
|
+
# specify it on the worker directly...
|
126
|
+
Book.bulk_insert do |worker|
|
127
|
+
worker.set_size = 100
|
128
|
+
# ...
|
129
|
+
end
|
130
|
+
```
|
131
|
+
|
132
|
+
### Insert Ignore
|
133
|
+
|
134
|
+
By default, when an insert fails the whole batch of inserts fail. The
|
135
|
+
_ignore_ option ignores the inserts that would have failed (because of
|
136
|
+
duplicate keys or a null in column with a not null constraint) and
|
137
|
+
inserts the rest of the batch.
|
138
|
+
|
139
|
+
This is not the default because no errors are raised for the bad
|
140
|
+
inserts in the batch.
|
141
|
+
|
142
|
+
```ruby
|
143
|
+
destination_columns = [:title, :author]
|
144
|
+
|
145
|
+
# Ignore bad inserts in the batch
|
146
|
+
Book.bulk_insert(*destination_columns, ignore: true) do |worker|
|
147
|
+
worker.add(...)
|
148
|
+
worker.add(...)
|
149
|
+
# ...
|
150
|
+
end
|
151
|
+
```
|
152
|
+
|
153
|
+
### Update Duplicates (MySQL, PostgreSQL)
|
154
|
+
|
155
|
+
If you don't want to ignore duplicate rows but instead want to update them
|
156
|
+
then you can use the _update_duplicates_ option. Set this option to true
|
157
|
+
(MySQL) or list unique column names (PostgreSQL) and when a duplicate row
|
158
|
+
is found the row will be updated with your new values.
|
159
|
+
Default value for this option is false.
|
160
|
+
|
161
|
+
```ruby
|
162
|
+
destination_columns = [:title, :author]
|
163
|
+
|
164
|
+
# Update duplicate rows (MySQL)
|
165
|
+
Book.bulk_insert(*destination_columns, update_duplicates: true) do |worker|
|
166
|
+
worker.add(...)
|
167
|
+
worker.add(...)
|
168
|
+
# ...
|
169
|
+
end
|
170
|
+
|
171
|
+
# Update duplicate rows (PostgreSQL)
|
172
|
+
Book.bulk_insert(*destination_columns, update_duplicates: %w[title]) do |worker|
|
173
|
+
worker.add(...)
|
174
|
+
# ...
|
175
|
+
end
|
176
|
+
```
|
177
|
+
|
178
|
+
### Return Primary Keys (PostgreSQL, PostGIS)
|
179
|
+
|
180
|
+
If you want the worker to store primary keys of inserted records, then you can
|
181
|
+
use the _return_primary_keys_ option. The worker will store a `result_sets`
|
182
|
+
array of `ActiveRecord::Result` objects. Each `ActiveRecord::Result` object
|
183
|
+
will contain the primary keys of a batch of inserted records.
|
184
|
+
|
185
|
+
```ruby
|
186
|
+
worker = Book.bulk_insert(*destination_columns, return_primary_keys: true) do
|
187
|
+
|worker|
|
188
|
+
worker.add(...)
|
189
|
+
worker.add(...)
|
190
|
+
# ...
|
191
|
+
end
|
192
|
+
|
193
|
+
worker.result_sets
|
194
|
+
```
|
195
|
+
|
196
|
+
## License
|
197
|
+
|
198
|
+
BulkInsert is released under the MIT license (see MIT-LICENSE) by
|
199
|
+
Jamis Buck (jamis@jamisbuck.org).
|
data/Rakefile
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
begin
|
2
|
+
require 'bundler/setup'
|
3
|
+
rescue LoadError
|
4
|
+
puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'rdoc/task'
|
8
|
+
|
9
|
+
RDoc::Task.new(:rdoc) do |rdoc|
|
10
|
+
rdoc.rdoc_dir = 'rdoc'
|
11
|
+
rdoc.title = 'BulkInsert'
|
12
|
+
rdoc.options << '--line-numbers'
|
13
|
+
rdoc.rdoc_files.include('README.rdoc')
|
14
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
15
|
+
end
|
16
|
+
|
17
|
+
Bundler::GemHelper.install_tasks
|
18
|
+
|
19
|
+
require 'rake/testtask'
|
20
|
+
|
21
|
+
Rake::TestTask.new(:test) do |t|
|
22
|
+
t.libs << 'lib'
|
23
|
+
t.libs << 'test'
|
24
|
+
t.pattern = 'test/**/*_test.rb'
|
25
|
+
t.verbose = false
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
task default: :test
|
data/lib/bulk_insert.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'bulk_insert/worker'
|
2
|
+
|
3
|
+
module BulkInsert
|
4
|
+
extend ActiveSupport::Concern
|
5
|
+
|
6
|
+
module ClassMethods
|
7
|
+
def bulk_insert(*columns, values: nil, set_size:500, ignore: false, update_duplicates: false, return_primary_keys: false)
|
8
|
+
columns = default_bulk_columns if columns.empty?
|
9
|
+
worker = BulkInsert::Worker.new(connection, table_name, primary_key, columns, set_size, ignore, update_duplicates, return_primary_keys)
|
10
|
+
|
11
|
+
if values.present?
|
12
|
+
transaction do
|
13
|
+
worker.add_all(values)
|
14
|
+
worker.save!
|
15
|
+
end
|
16
|
+
nil
|
17
|
+
elsif block_given?
|
18
|
+
transaction do
|
19
|
+
yield worker
|
20
|
+
worker.save!
|
21
|
+
end
|
22
|
+
nil
|
23
|
+
else
|
24
|
+
worker
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# helper method for preparing the columns before a call to :bulk_insert
|
29
|
+
def default_bulk_columns
|
30
|
+
self.column_names - %w(id)
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
ActiveSupport.on_load(:active_record) do
|
37
|
+
send(:include, BulkInsert)
|
38
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require_relative 'statement_adapters/generic_adapter'
|
2
|
+
require_relative 'statement_adapters/mysql_adapter'
|
3
|
+
require_relative 'statement_adapters/postgresql_adapter'
|
4
|
+
require_relative 'statement_adapters/sqlite_adapter'
|
5
|
+
|
6
|
+
module BulkInsert
|
7
|
+
module StatementAdapters
|
8
|
+
def adapter_for(connection)
|
9
|
+
case connection.adapter_name
|
10
|
+
when /^mysql/i
|
11
|
+
MySQLAdapter.new
|
12
|
+
when /\APost(?:greSQL|GIS)/i
|
13
|
+
PostgreSQLAdapter.new
|
14
|
+
when /\ASQLite/i
|
15
|
+
SQLiteAdapter.new
|
16
|
+
else
|
17
|
+
GenericAdapter.new
|
18
|
+
end
|
19
|
+
end
|
20
|
+
module_function :adapter_for
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module BulkInsert
|
2
|
+
module StatementAdapters
|
3
|
+
class BaseAdapter
|
4
|
+
def initialize
|
5
|
+
raise "You cannot initialize base adapter" if self.class == BaseAdapter
|
6
|
+
end
|
7
|
+
|
8
|
+
def insert_ignore_statement
|
9
|
+
raise "Not implemented"
|
10
|
+
end
|
11
|
+
|
12
|
+
def on_conflict_statement(_columns, _ignore, _update_duplicates)
|
13
|
+
raise "Not implemented"
|
14
|
+
end
|
15
|
+
|
16
|
+
def primary_key_return_statement(_primary_key)
|
17
|
+
raise "Not implemented"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require_relative 'base_adapter'
|
2
|
+
|
3
|
+
module BulkInsert
|
4
|
+
module StatementAdapters
|
5
|
+
class GenericAdapter < BaseAdapter
|
6
|
+
def insert_ignore_statement
|
7
|
+
''
|
8
|
+
end
|
9
|
+
|
10
|
+
def on_conflict_statement(_columns, _ignore, _update_duplicates)
|
11
|
+
''
|
12
|
+
end
|
13
|
+
|
14
|
+
def primary_key_return_statement(_primary_key)
|
15
|
+
''
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require_relative 'base_adapter'
|
2
|
+
|
3
|
+
module BulkInsert
|
4
|
+
module StatementAdapters
|
5
|
+
class MySQLAdapter < BaseAdapter
|
6
|
+
def insert_ignore_statement
|
7
|
+
'IGNORE'
|
8
|
+
end
|
9
|
+
|
10
|
+
def on_conflict_statement(columns, _ignore, update_duplicates)
|
11
|
+
return '' unless update_duplicates
|
12
|
+
|
13
|
+
update_values = columns.map do |column|
|
14
|
+
"`#{column.name}`=VALUES(`#{column.name}`)"
|
15
|
+
end.join(', ')
|
16
|
+
' ON DUPLICATE KEY UPDATE ' + update_values
|
17
|
+
end
|
18
|
+
|
19
|
+
def primary_key_return_statement(_primary_key)
|
20
|
+
''
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require_relative 'base_adapter'
|
2
|
+
|
3
|
+
module BulkInsert
|
4
|
+
module StatementAdapters
|
5
|
+
class PostgreSQLAdapter < BaseAdapter
|
6
|
+
def insert_ignore_statement
|
7
|
+
''
|
8
|
+
end
|
9
|
+
|
10
|
+
def on_conflict_statement(columns, ignore, update_duplicates)
|
11
|
+
if ignore
|
12
|
+
' ON CONFLICT DO NOTHING'
|
13
|
+
elsif update_duplicates
|
14
|
+
update_values = columns.map do |column|
|
15
|
+
"#{column.name}=EXCLUDED.#{column.name}"
|
16
|
+
end.join(', ')
|
17
|
+
' ON CONFLICT(' + update_duplicates.join(', ') + ') DO UPDATE SET ' + update_values
|
18
|
+
else
|
19
|
+
''
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def primary_key_return_statement(primary_key)
|
24
|
+
" RETURNING #{primary_key}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require_relative 'base_adapter'
|
2
|
+
|
3
|
+
module BulkInsert
|
4
|
+
module StatementAdapters
|
5
|
+
class SQLiteAdapter < BaseAdapter
|
6
|
+
def insert_ignore_statement
|
7
|
+
'OR IGNORE'
|
8
|
+
end
|
9
|
+
|
10
|
+
def on_conflict_statement(_columns, _ignore, _update_duplicates)
|
11
|
+
''
|
12
|
+
end
|
13
|
+
|
14
|
+
def primary_key_return_statement(_primary_key)
|
15
|
+
''
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|