bulk_insert2 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +199 -0
  4. data/Rakefile +29 -0
  5. data/lib/bulk_insert.rb +38 -0
  6. data/lib/bulk_insert/statement_adapters.rb +22 -0
  7. data/lib/bulk_insert/statement_adapters/base_adapter.rb +21 -0
  8. data/lib/bulk_insert/statement_adapters/generic_adapter.rb +19 -0
  9. data/lib/bulk_insert/statement_adapters/mysql_adapter.rb +24 -0
  10. data/lib/bulk_insert/statement_adapters/postgresql_adapter.rb +28 -0
  11. data/lib/bulk_insert/statement_adapters/sqlite_adapter.rb +19 -0
  12. data/lib/bulk_insert/version.rb +7 -0
  13. data/lib/bulk_insert/worker.rb +136 -0
  14. data/test/bulk_insert/worker_test.rb +459 -0
  15. data/test/bulk_insert_test.rb +52 -0
  16. data/test/dummy/README.rdoc +28 -0
  17. data/test/dummy/Rakefile +6 -0
  18. data/test/dummy/app/assets/javascripts/application.js +13 -0
  19. data/test/dummy/app/assets/stylesheets/application.css +15 -0
  20. data/test/dummy/app/controllers/application_controller.rb +5 -0
  21. data/test/dummy/app/helpers/application_helper.rb +2 -0
  22. data/test/dummy/app/models/testing.rb +2 -0
  23. data/test/dummy/app/views/layouts/application.html.erb +14 -0
  24. data/test/dummy/bin/bundle +3 -0
  25. data/test/dummy/bin/rails +4 -0
  26. data/test/dummy/bin/rake +4 -0
  27. data/test/dummy/bin/setup +29 -0
  28. data/test/dummy/config.ru +4 -0
  29. data/test/dummy/config/application.rb +24 -0
  30. data/test/dummy/config/boot.rb +5 -0
  31. data/test/dummy/config/database.yml +25 -0
  32. data/test/dummy/config/environment.rb +5 -0
  33. data/test/dummy/config/environments/development.rb +41 -0
  34. data/test/dummy/config/environments/production.rb +79 -0
  35. data/test/dummy/config/environments/test.rb +42 -0
  36. data/test/dummy/config/initializers/assets.rb +11 -0
  37. data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
  38. data/test/dummy/config/initializers/cookies_serializer.rb +3 -0
  39. data/test/dummy/config/initializers/filter_parameter_logging.rb +4 -0
  40. data/test/dummy/config/initializers/inflections.rb +16 -0
  41. data/test/dummy/config/initializers/mime_types.rb +4 -0
  42. data/test/dummy/config/initializers/session_store.rb +3 -0
  43. data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
  44. data/test/dummy/config/locales/en.yml +23 -0
  45. data/test/dummy/config/routes.rb +56 -0
  46. data/test/dummy/config/secrets.yml +22 -0
  47. data/test/dummy/db/migrate/20151008181535_create_testings.rb +11 -0
  48. data/test/dummy/db/migrate/20151028194232_add_default_value.rb +5 -0
  49. data/test/dummy/db/schema.rb +25 -0
  50. data/test/dummy/public/404.html +67 -0
  51. data/test/dummy/public/422.html +67 -0
  52. data/test/dummy/public/500.html +66 -0
  53. data/test/dummy/public/favicon.ico +0 -0
  54. data/test/test_helper.rb +19 -0
  55. metadata +181 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 3e6a484a5e8f122b794605f2664ca20a5d0c887cf78f05581bf40c2aaaa61970
4
+ data.tar.gz: 4196d55dfd00c9d571bf8608257807a7fe4d290b4dc90b67fb390f66a58a9303
5
+ SHA512:
6
+ metadata.gz: 457740006d28c583a70f71ba63c4abf81e4bd875f9fec9dc468f35605a558fc4b610d5a09f921f7e57fbbf9759aece0a008dbc6095a2f7e406ae196580517978
7
+ data.tar.gz: d338d37cabb673e8417273832229e252988648a8e5d653b428bc77526797ce21789fc2673b519506a159f1319249827d6701d15d13448b558b8f432be55d9ac3
@@ -0,0 +1,20 @@
1
+ Copyright 2015 Jamis Buck
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,199 @@
1
+ ### Replaced type\_cast\_from\_column to lookup\_cast\_type\_from\_column
2
+
3
+ # BulkInsert
4
+
5
+ A little ActiveRecord extension for helping to insert lots of rows in a
6
+ single insert statement.
7
+
8
+ ## Installation
9
+
10
+ Add it to your Gemfile:
11
+
12
+ ```ruby
13
+ gem 'bulk_insert'
14
+ ```
15
+
16
+ ## Usage
17
+
18
+ BulkInsert adds a new class method to your ActiveRecord models:
19
+
20
+ ```ruby
21
+ class Book < ActiveRecord::Base
22
+ end
23
+
24
+ book_attrs = ... # some array of hashes, for instance
25
+ Book.bulk_insert do |worker|
26
+ book_attrs.each do |attrs|
27
+ worker.add(attrs)
28
+ end
29
+ end
30
+ ```
31
+
32
+ All of those `#add` calls will be accumulated into a single SQL insert
33
+ statement, vastly improving the performance of multiple sequential
34
+ inserts (think data imports and the like).
35
+
36
+ If you don't like using a block API, you can also simply pass an array
37
+ of rows to be inserted:
38
+
39
+ ```ruby
40
+ book_attrs = ... # some array of hashes, for instance
41
+ Book.bulk_insert values: book_attrs
42
+ ```
43
+
44
+ By default, the columns to be inserted will be all columns in the table,
45
+ minus the `id` column, but if you want, you can explicitly enumerate
46
+ the columns:
47
+
48
+ ```ruby
49
+ Book.bulk_insert(:title, :author) do |worker|
50
+ # specify a row as an array of values...
51
+ worker.add ["Eye of the World", "Robert Jordan"]
52
+
53
+ # or as a hash
54
+ worker.add title: "Lord of Light", author: "Roger Zelazny"
55
+ end
56
+ ```
57
+
58
+ It will automatically set `created_at`/`updated_at` columns to the current
59
+ date, as well.
60
+
61
+ ```ruby
62
+ Book.bulk_insert(:title, :author, :created_at, :updated_at) do |worker|
63
+ # specify created_at/updated_at explicitly...
64
+ worker.add ["The Chosen", "Chaim Potok", Time.now, Time.now]
65
+
66
+ # or let BulkInsert set them by default...
67
+ worker.add ["Hello Ruby", "Linda Liukas"]
68
+ end
69
+ ```
70
+
71
+ Similarly, if a value is omitted, BulkInsert will use whatever default
72
+ value is defined for that column in the database:
73
+
74
+ ```ruby
75
+ # create_table :books do |t|
76
+ # ...
77
+ # t.string "medium", default: "paper"
78
+ # ...
79
+ # end
80
+
81
+ Book.bulk_insert(:title, :author, :medium) do |worker|
82
+ worker.add title: "Ender's Game", author: "Orson Scott Card"
83
+ end
84
+
85
+ Book.first.medium #-> "paper"
86
+ ```
87
+
88
+ By default, the batch is always saved when the block finishes, but you
89
+ can explicitly save inside the block whenever you want, by calling
90
+ `#save!` on the worker:
91
+
92
+ ```ruby
93
+ Book.bulk_insert do |worker|
94
+ worker.add(...)
95
+ worker.add(...)
96
+
97
+ worker.save!
98
+
99
+ worker.add(...)
100
+ #...
101
+ end
102
+ ```
103
+
104
+ That will save the batch as it has been defined to that point, and then
105
+ empty the batch so that you can add more rows to it if you want. Note
106
+ that all records saved together will have the same created_at/updated_at
107
+ timestamp (unless one was explicitly set).
108
+
109
+ ### Batch Set Size
110
+
111
+ By default, the size of the insert is limited to 500 rows at a time.
112
+ This is called the _set size_. If you add another row that causes the
113
+ set to exceed the set size, the insert statement is automatically built
114
+ and executed, and the batch is reset.
115
+
116
+ If you want a larger (or smaller) set size, you can specify it in
117
+ two ways:
118
+
119
+ ```ruby
120
+ # specify set_size when initializing the bulk insert...
121
+ Book.bulk_insert(set_size: 100) do |worker|
122
+ # ...
123
+ end
124
+
125
+ # specify it on the worker directly...
126
+ Book.bulk_insert do |worker|
127
+ worker.set_size = 100
128
+ # ...
129
+ end
130
+ ```
131
+
132
+ ### Insert Ignore
133
+
134
+ By default, when an insert fails the whole batch of inserts fail. The
135
+ _ignore_ option ignores the inserts that would have failed (because of
136
+ duplicate keys or a null in column with a not null constraint) and
137
+ inserts the rest of the batch.
138
+
139
+ This is not the default because no errors are raised for the bad
140
+ inserts in the batch.
141
+
142
+ ```ruby
143
+ destination_columns = [:title, :author]
144
+
145
+ # Ignore bad inserts in the batch
146
+ Book.bulk_insert(*destination_columns, ignore: true) do |worker|
147
+ worker.add(...)
148
+ worker.add(...)
149
+ # ...
150
+ end
151
+ ```
152
+
153
+ ### Update Duplicates (MySQL, PostgreSQL)
154
+
155
+ If you don't want to ignore duplicate rows but instead want to update them
156
+ then you can use the _update_duplicates_ option. Set this option to true
157
+ (MySQL) or list unique column names (PostgreSQL) and when a duplicate row
158
+ is found the row will be updated with your new values.
159
+ Default value for this option is false.
160
+
161
+ ```ruby
162
+ destination_columns = [:title, :author]
163
+
164
+ # Update duplicate rows (MySQL)
165
+ Book.bulk_insert(*destination_columns, update_duplicates: true) do |worker|
166
+ worker.add(...)
167
+ worker.add(...)
168
+ # ...
169
+ end
170
+
171
+ # Update duplicate rows (PostgreSQL)
172
+ Book.bulk_insert(*destination_columns, update_duplicates: %w[title]) do |worker|
173
+ worker.add(...)
174
+ # ...
175
+ end
176
+ ```
177
+
178
+ ### Return Primary Keys (PostgreSQL, PostGIS)
179
+
180
+ If you want the worker to store primary keys of inserted records, then you can
181
+ use the _return_primary_keys_ option. The worker will store a `result_sets`
182
+ array of `ActiveRecord::Result` objects. Each `ActiveRecord::Result` object
183
+ will contain the primary keys of a batch of inserted records.
184
+
185
+ ```ruby
186
+ worker = Book.bulk_insert(*destination_columns, return_primary_keys: true) do
187
+ |worker|
188
+ worker.add(...)
189
+ worker.add(...)
190
+ # ...
191
+ end
192
+
193
+ worker.result_sets
194
+ ```
195
+
196
+ ## License
197
+
198
+ BulkInsert is released under the MIT license (see MIT-LICENSE) by
199
+ Jamis Buck (jamis@jamisbuck.org).
@@ -0,0 +1,29 @@
1
+ begin
2
+ require 'bundler/setup'
3
+ rescue LoadError
4
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
5
+ end
6
+
7
+ require 'rdoc/task'
8
+
9
+ RDoc::Task.new(:rdoc) do |rdoc|
10
+ rdoc.rdoc_dir = 'rdoc'
11
+ rdoc.title = 'BulkInsert'
12
+ rdoc.options << '--line-numbers'
13
+ rdoc.rdoc_files.include('README.rdoc')
14
+ rdoc.rdoc_files.include('lib/**/*.rb')
15
+ end
16
+
17
+ Bundler::GemHelper.install_tasks
18
+
19
+ require 'rake/testtask'
20
+
21
+ Rake::TestTask.new(:test) do |t|
22
+ t.libs << 'lib'
23
+ t.libs << 'test'
24
+ t.pattern = 'test/**/*_test.rb'
25
+ t.verbose = false
26
+ end
27
+
28
+
29
+ task default: :test
@@ -0,0 +1,38 @@
1
+ require 'bulk_insert/worker'
2
+
3
+ module BulkInsert
4
+ extend ActiveSupport::Concern
5
+
6
+ module ClassMethods
7
+ def bulk_insert(*columns, values: nil, set_size:500, ignore: false, update_duplicates: false, return_primary_keys: false)
8
+ columns = default_bulk_columns if columns.empty?
9
+ worker = BulkInsert::Worker.new(connection, table_name, primary_key, columns, set_size, ignore, update_duplicates, return_primary_keys)
10
+
11
+ if values.present?
12
+ transaction do
13
+ worker.add_all(values)
14
+ worker.save!
15
+ end
16
+ nil
17
+ elsif block_given?
18
+ transaction do
19
+ yield worker
20
+ worker.save!
21
+ end
22
+ nil
23
+ else
24
+ worker
25
+ end
26
+ end
27
+
28
+ # helper method for preparing the columns before a call to :bulk_insert
29
+ def default_bulk_columns
30
+ self.column_names - %w(id)
31
+ end
32
+
33
+ end
34
+ end
35
+
36
+ ActiveSupport.on_load(:active_record) do
37
+ send(:include, BulkInsert)
38
+ end
@@ -0,0 +1,22 @@
1
+ require_relative 'statement_adapters/generic_adapter'
2
+ require_relative 'statement_adapters/mysql_adapter'
3
+ require_relative 'statement_adapters/postgresql_adapter'
4
+ require_relative 'statement_adapters/sqlite_adapter'
5
+
6
+ module BulkInsert
7
+ module StatementAdapters
8
+ def adapter_for(connection)
9
+ case connection.adapter_name
10
+ when /^mysql/i
11
+ MySQLAdapter.new
12
+ when /\APost(?:greSQL|GIS)/i
13
+ PostgreSQLAdapter.new
14
+ when /\ASQLite/i
15
+ SQLiteAdapter.new
16
+ else
17
+ GenericAdapter.new
18
+ end
19
+ end
20
+ module_function :adapter_for
21
+ end
22
+ end
@@ -0,0 +1,21 @@
1
+ module BulkInsert
2
+ module StatementAdapters
3
+ class BaseAdapter
4
+ def initialize
5
+ raise "You cannot initialize base adapter" if self.class == BaseAdapter
6
+ end
7
+
8
+ def insert_ignore_statement
9
+ raise "Not implemented"
10
+ end
11
+
12
+ def on_conflict_statement(_columns, _ignore, _update_duplicates)
13
+ raise "Not implemented"
14
+ end
15
+
16
+ def primary_key_return_statement(_primary_key)
17
+ raise "Not implemented"
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,19 @@
1
+ require_relative 'base_adapter'
2
+
3
+ module BulkInsert
4
+ module StatementAdapters
5
+ class GenericAdapter < BaseAdapter
6
+ def insert_ignore_statement
7
+ ''
8
+ end
9
+
10
+ def on_conflict_statement(_columns, _ignore, _update_duplicates)
11
+ ''
12
+ end
13
+
14
+ def primary_key_return_statement(_primary_key)
15
+ ''
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,24 @@
1
+ require_relative 'base_adapter'
2
+
3
+ module BulkInsert
4
+ module StatementAdapters
5
+ class MySQLAdapter < BaseAdapter
6
+ def insert_ignore_statement
7
+ 'IGNORE'
8
+ end
9
+
10
+ def on_conflict_statement(columns, _ignore, update_duplicates)
11
+ return '' unless update_duplicates
12
+
13
+ update_values = columns.map do |column|
14
+ "`#{column.name}`=VALUES(`#{column.name}`)"
15
+ end.join(', ')
16
+ ' ON DUPLICATE KEY UPDATE ' + update_values
17
+ end
18
+
19
+ def primary_key_return_statement(_primary_key)
20
+ ''
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,28 @@
1
+ require_relative 'base_adapter'
2
+
3
+ module BulkInsert
4
+ module StatementAdapters
5
+ class PostgreSQLAdapter < BaseAdapter
6
+ def insert_ignore_statement
7
+ ''
8
+ end
9
+
10
+ def on_conflict_statement(columns, ignore, update_duplicates)
11
+ if ignore
12
+ ' ON CONFLICT DO NOTHING'
13
+ elsif update_duplicates
14
+ update_values = columns.map do |column|
15
+ "#{column.name}=EXCLUDED.#{column.name}"
16
+ end.join(', ')
17
+ ' ON CONFLICT(' + update_duplicates.join(', ') + ') DO UPDATE SET ' + update_values
18
+ else
19
+ ''
20
+ end
21
+ end
22
+
23
+ def primary_key_return_statement(primary_key)
24
+ " RETURNING #{primary_key}"
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,19 @@
1
+ require_relative 'base_adapter'
2
+
3
+ module BulkInsert
4
+ module StatementAdapters
5
+ class SQLiteAdapter < BaseAdapter
6
+ def insert_ignore_statement
7
+ 'OR IGNORE'
8
+ end
9
+
10
+ def on_conflict_statement(_columns, _ignore, _update_duplicates)
11
+ ''
12
+ end
13
+
14
+ def primary_key_return_statement(_primary_key)
15
+ ''
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,7 @@
1
+ module BulkInsert
2
+ MAJOR = 1
3
+ MINOR = 0
4
+ TINY = 0
5
+
6
+ VERSION = [MAJOR, MINOR, TINY].join(".")
7
+ end