multisert 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +24 -10
- data/lib/multisert.rb +5 -0
- data/lib/multisert/version.rb +1 -1
- data/performance/multisert_performance_test.rb +5 -4
- data/spec/multisert_spec.rb +88 -19
- metadata +2 -2
data/README.md
CHANGED
@@ -53,6 +53,23 @@ buffer = Multisert.new connection: dbclient,
|
|
53
53
|
table: 'some_table',
|
54
54
|
fields: ['field_1', 'field_2', 'field_3', 'field_4']
|
55
55
|
|
56
|
+
buffer.with_buffering do |buffer|
|
57
|
+
(0..1_000_000).each do |i|
|
58
|
+
res = some_magical_calculation(i)
|
59
|
+
buffer << res
|
60
|
+
end
|
61
|
+
end
|
62
|
+
```
|
63
|
+
|
64
|
+
We start by creating a new Multisert instance, providing the database
|
65
|
+
connection, database and table, and fields as attributes. Next, we leverage
|
66
|
+
`#with_buffering` to wrap our sample iteration. Within the block, we shovel the
|
67
|
+
results from `some_magical_calculation` into the Multisert instance, which then
|
68
|
+
handles all the heavy lifting in terms of writing to the database.
|
69
|
+
|
70
|
+
As an aside, `#with_buffering` is handling the following under the hood:
|
71
|
+
|
72
|
+
```ruby
|
56
73
|
(0..1_000_000).each do |i|
|
57
74
|
res = some_magical_calculation(i)
|
58
75
|
buffer << res
|
@@ -60,16 +77,13 @@ end
|
|
60
77
|
buffer.write!
|
61
78
|
```
|
62
79
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
script. This ensures that any pending entries are written to the database table
|
71
|
-
that were not automatically taken care of by the auto-write that will kick in
|
72
|
-
during the iteration.
|
80
|
+
As we iterate through, the Multisert instance will build up the records and
|
81
|
+
then write itself to the specified database table when it hits an internal
|
82
|
+
count (default is 10_000 entries, but this can be adjusted via the
|
83
|
+
`max_buffer_count` attribute). The `buffer.write!` at the end ensures that
|
84
|
+
any pending entries are written to the database table that were not
|
85
|
+
automatically taken care of by the auto-write that will kick in during the
|
86
|
+
iteration.
|
73
87
|
|
74
88
|
## Insert Strategies
|
75
89
|
|
data/lib/multisert.rb
CHANGED
data/lib/multisert/version.rb
CHANGED
@@ -38,7 +38,7 @@ def insert_performance_test connection, cleaner, sample_records, destination
|
|
38
38
|
sample_records.each do |record|
|
39
39
|
connection.query %[
|
40
40
|
INSERT INTO #{destination} (#{fields})
|
41
|
-
VALUES (#{record.
|
41
|
+
VALUES (#{record.values.join(', ')})]
|
42
42
|
end
|
43
43
|
runtime = timer.stop!
|
44
44
|
ensure_data_completeness! connection, destination, sample_records.count
|
@@ -57,10 +57,11 @@ def multinsert_performance_test connection, cleaner, sample_records, destination
|
|
57
57
|
cleaner.ensure_clean_database!
|
58
58
|
|
59
59
|
(timer = Timer.new).start!
|
60
|
-
|
61
|
-
|
60
|
+
buffer.with_buffering do |buffer|
|
61
|
+
sample_records.each do |record|
|
62
|
+
buffer << record.values
|
63
|
+
end
|
62
64
|
end
|
63
|
-
buffer.flush!
|
64
65
|
runtime = timer.stop!
|
65
66
|
ensure_data_completeness! connection, destination, sample_records.count
|
66
67
|
puts "multisert w/ buffer of #{buffer.max_buffer_count} took #{runtime.round(2)}s to insert #{sample_records.count} entries"
|
data/spec/multisert_spec.rb
CHANGED
@@ -7,12 +7,22 @@ TEST_DATABASE = 'multisert_test'
|
|
7
7
|
TEST_TABLE = 'test_data'
|
8
8
|
TEST_INDEXED_TABLE = 'test_indexed_data'
|
9
9
|
|
10
|
+
def test_table
|
11
|
+
"#{TEST_DATABASE}.#{TEST_TABLE}"
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_indexed_table
|
15
|
+
"#{TEST_DATABASE}.#{TEST_INDEXED_TABLE}"
|
16
|
+
end
|
17
|
+
|
10
18
|
# TODO: make into yaml config
|
11
19
|
$connection = Mysql2::Client.new(host: 'localhost', username: 'root')
|
12
20
|
|
13
|
-
$cleaner = MultisertSpec::MrClean.new(database: TEST_DATABASE,
|
21
|
+
$cleaner = MultisertSpec::MrClean.new(database: TEST_DATABASE,
|
22
|
+
connection: $connection) do |mgr|
|
23
|
+
|
14
24
|
mgr.create_table_schemas << %[
|
15
|
-
CREATE TABLE IF NOT EXISTS #{
|
25
|
+
CREATE TABLE IF NOT EXISTS #{test_table} (
|
16
26
|
test_field_int_1 int default null,
|
17
27
|
test_field_int_2 int default null,
|
18
28
|
test_field_int_3 int default null,
|
@@ -22,7 +32,7 @@ $cleaner = MultisertSpec::MrClean.new(database: TEST_DATABASE, connection: $conn
|
|
22
32
|
test_field_datetime DATETIME default null)]
|
23
33
|
|
24
34
|
mgr.create_table_schemas << %[
|
25
|
-
CREATE TABLE IF NOT EXISTS #{
|
35
|
+
CREATE TABLE IF NOT EXISTS #{test_indexed_table} (
|
26
36
|
test_id int not null,
|
27
37
|
test_field varchar(15) default null,
|
28
38
|
primary key (test_id))]
|
@@ -54,17 +64,17 @@ describe Multisert do
|
|
54
64
|
end
|
55
65
|
|
56
66
|
it "does not fall over when there are no entries" do
|
57
|
-
connection.query "DELETE FROM #{
|
67
|
+
connection.query "DELETE FROM #{test_table}"
|
58
68
|
|
59
69
|
buffer.write_buffer!
|
60
70
|
|
61
|
-
write_buffer_records = connection.query "SELECT * FROM #{
|
71
|
+
write_buffer_records = connection.query "SELECT * FROM #{test_table}"
|
62
72
|
expect(write_buffer_records.to_a).to eq []
|
63
73
|
expect(buffer.entries).to eq []
|
64
74
|
end
|
65
75
|
|
66
76
|
it "multi-inserts all added entries and clears #entries" do
|
67
|
-
pre_write_buffer_records = connection.query "SELECT * FROM #{
|
77
|
+
pre_write_buffer_records = connection.query "SELECT * FROM #{test_table}"
|
68
78
|
expect(pre_write_buffer_records.to_a).to eq []
|
69
79
|
|
70
80
|
buffer.connection = connection
|
@@ -88,7 +98,7 @@ describe Multisert do
|
|
88
98
|
, test_field_int_2
|
89
99
|
, test_field_int_3
|
90
100
|
, test_field_int_4
|
91
|
-
FROM #{
|
101
|
+
FROM #{test_table}]
|
92
102
|
|
93
103
|
expect(post_write_buffer_records.to_a).to eq [
|
94
104
|
{'test_field_int_1' => 1, 'test_field_int_2' => 3, 'test_field_int_3' => 4, 'test_field_int_4' => 5},
|
@@ -100,7 +110,7 @@ describe Multisert do
|
|
100
110
|
end
|
101
111
|
|
102
112
|
it "works with strings" do
|
103
|
-
pre_write_buffer_records = connection.query "SELECT * FROM #{
|
113
|
+
pre_write_buffer_records = connection.query "SELECT * FROM #{test_table}"
|
104
114
|
expect(pre_write_buffer_records.to_a).to eq []
|
105
115
|
|
106
116
|
buffer.connection = connection
|
@@ -115,7 +125,7 @@ describe Multisert do
|
|
115
125
|
|
116
126
|
buffer.write_buffer!
|
117
127
|
|
118
|
-
post_write_buffer_records = connection.query %[SELECT test_field_varchar FROM #{
|
128
|
+
post_write_buffer_records = connection.query %[SELECT test_field_varchar FROM #{test_table}]
|
119
129
|
expect(post_write_buffer_records.to_a).to eq [
|
120
130
|
{'test_field_varchar' => 'a'},
|
121
131
|
{'test_field_varchar' => 'b'},
|
@@ -128,7 +138,7 @@ describe Multisert do
|
|
128
138
|
it "works with strings that have illegal characters"
|
129
139
|
|
130
140
|
it "works with dates" do
|
131
|
-
pre_write_buffer_records = connection.query "SELECT * FROM #{
|
141
|
+
pre_write_buffer_records = connection.query "SELECT * FROM #{test_table}"
|
132
142
|
expect(pre_write_buffer_records.to_a).to eq []
|
133
143
|
|
134
144
|
buffer.connection = connection
|
@@ -143,7 +153,7 @@ describe Multisert do
|
|
143
153
|
|
144
154
|
buffer.write_buffer!
|
145
155
|
|
146
|
-
post_write_buffer_records = connection.query %[SELECT test_field_date FROM #{
|
156
|
+
post_write_buffer_records = connection.query %[SELECT test_field_date FROM #{test_table}]
|
147
157
|
|
148
158
|
expect(post_write_buffer_records.to_a).to eq [
|
149
159
|
{'test_field_date' => Date.parse('2013-01-15')},
|
@@ -155,7 +165,7 @@ describe Multisert do
|
|
155
165
|
end
|
156
166
|
|
157
167
|
it "works with times" do
|
158
|
-
pre_write_buffer_records = connection.query "SELECT * FROM #{
|
168
|
+
pre_write_buffer_records = connection.query "SELECT * FROM #{test_table}"
|
159
169
|
expect(pre_write_buffer_records.to_a).to eq []
|
160
170
|
|
161
171
|
buffer.connection = connection
|
@@ -170,7 +180,7 @@ describe Multisert do
|
|
170
180
|
|
171
181
|
buffer.write_buffer!
|
172
182
|
|
173
|
-
post_write_buffer_records = connection.query %[SELECT test_field_datetime FROM #{
|
183
|
+
post_write_buffer_records = connection.query %[SELECT test_field_datetime FROM #{test_table}]
|
174
184
|
|
175
185
|
expect(post_write_buffer_records.to_a).to eq [
|
176
186
|
{'test_field_datetime' => Time.new(2013, 1, 15, 1, 5, 11)},
|
@@ -208,7 +218,7 @@ describe Multisert do
|
|
208
218
|
|
209
219
|
context "set to replace" do
|
210
220
|
it "writes over an existing record with the same primary / unique key" do
|
211
|
-
connection.query %[INSERT INTO #{
|
221
|
+
connection.query %[INSERT INTO #{test_indexed_table} (test_id, test_field)
|
212
222
|
VALUES (1, 'ONE'), (2, 'TWO')]
|
213
223
|
|
214
224
|
buffer.connection = connection
|
@@ -218,6 +228,7 @@ describe Multisert do
|
|
218
228
|
buffer.insert_strategy = :replace
|
219
229
|
|
220
230
|
buffer << [1, 'SOMETHING NEW']
|
231
|
+
buffer << [3, 'ALSO NEW']
|
221
232
|
|
222
233
|
buffer.write_buffer!
|
223
234
|
|
@@ -225,17 +236,18 @@ describe Multisert do
|
|
225
236
|
SELECT
|
226
237
|
test_id
|
227
238
|
, test_field
|
228
|
-
FROM #{
|
239
|
+
FROM #{test_indexed_table}]
|
229
240
|
|
230
241
|
expect(post_write_buffer_records.to_a).to eq [
|
231
242
|
{'test_id' => 1, 'test_field' => 'SOMETHING NEW'},
|
232
|
-
{'test_id' => 2, 'test_field' => 'TWO'}
|
243
|
+
{'test_id' => 2, 'test_field' => 'TWO'},
|
244
|
+
{'test_id' => 3, 'test_field' => 'ALSO NEW'}]
|
233
245
|
end
|
234
246
|
end
|
235
247
|
|
236
248
|
context "set to ignore" do
|
237
249
|
before do
|
238
|
-
connection.query %[INSERT INTO #{
|
250
|
+
connection.query %[INSERT INTO #{test_indexed_table} (test_id, test_field)
|
239
251
|
VALUES (1, 'ONE'), (2, 'TWO')]
|
240
252
|
|
241
253
|
buffer.connection = connection
|
@@ -245,6 +257,7 @@ describe Multisert do
|
|
245
257
|
buffer.insert_strategy = :ignore
|
246
258
|
|
247
259
|
buffer << [1, 'NEW']
|
260
|
+
buffer << [3, 'ALSO NEW']
|
248
261
|
end
|
249
262
|
|
250
263
|
it "does not raise an error" do
|
@@ -258,11 +271,12 @@ describe Multisert do
|
|
258
271
|
SELECT
|
259
272
|
test_id
|
260
273
|
, test_field
|
261
|
-
FROM #{
|
274
|
+
FROM #{test_indexed_table}]
|
262
275
|
|
263
276
|
expect(post_write_buffer_records.to_a).to eq [
|
264
277
|
{'test_id' => 1, 'test_field' => 'ONE'},
|
265
|
-
{'test_id' => 2, 'test_field' => 'TWO'}
|
278
|
+
{'test_id' => 2, 'test_field' => 'TWO'},
|
279
|
+
{'test_id' => 3, 'test_field' => 'ALSO NEW'}]
|
266
280
|
end
|
267
281
|
end
|
268
282
|
|
@@ -280,4 +294,59 @@ describe Multisert do
|
|
280
294
|
end
|
281
295
|
end
|
282
296
|
end
|
297
|
+
|
298
|
+
describe "#with_buffering" do
|
299
|
+
let(:connection) { $connection }
|
300
|
+
let(:buffer) { described_class.new }
|
301
|
+
|
302
|
+
before do
|
303
|
+
$cleaner.ensure_clean_database! teardown_tables: (!!ENV['TEARDOWN'] || false)
|
304
|
+
end
|
305
|
+
|
306
|
+
it "ensures all records are inserted" do
|
307
|
+
pre_write_buffer_records = connection.query "SELECT * FROM #{test_table}"
|
308
|
+
expect(pre_write_buffer_records.to_a).to eq []
|
309
|
+
|
310
|
+
buffer.connection = connection
|
311
|
+
buffer.database = TEST_DATABASE
|
312
|
+
buffer.table = TEST_TABLE
|
313
|
+
buffer.fields = ['test_field_int_1',
|
314
|
+
'test_field_int_2',
|
315
|
+
'test_field_int_3',
|
316
|
+
'test_field_int_4']
|
317
|
+
|
318
|
+
|
319
|
+
|
320
|
+
sample_entries = [[ 1, 3, 4, 5],
|
321
|
+
[ 6, 7, 8, 9],
|
322
|
+
[10, 11, 12, 13],
|
323
|
+
[14, 15, 16, 17]]
|
324
|
+
|
325
|
+
# set this to 1 less than the total number of entries we want to buffer.
|
326
|
+
# test that all are written w/o explicitly #write!
|
327
|
+
buffer.max_buffer_count = sample_entries.length - 1
|
328
|
+
|
329
|
+
buffer.with_buffering do |buffer|
|
330
|
+
sample_entries.each do |entry|
|
331
|
+
buffer << entry.to_a
|
332
|
+
end
|
333
|
+
end
|
334
|
+
|
335
|
+
post_write_buffer_records = connection.query %[
|
336
|
+
SELECT
|
337
|
+
test_field_int_1
|
338
|
+
, test_field_int_2
|
339
|
+
, test_field_int_3
|
340
|
+
, test_field_int_4
|
341
|
+
FROM #{test_table}]
|
342
|
+
|
343
|
+
expect(post_write_buffer_records.to_a).to eq [
|
344
|
+
{'test_field_int_1' => 1, 'test_field_int_2' => 3, 'test_field_int_3' => 4, 'test_field_int_4' => 5},
|
345
|
+
{'test_field_int_1' => 6, 'test_field_int_2' => 7, 'test_field_int_3' => 8, 'test_field_int_4' => 9},
|
346
|
+
{'test_field_int_1' => 10, 'test_field_int_2' => 11, 'test_field_int_3' => 12, 'test_field_int_4' => 13},
|
347
|
+
{'test_field_int_1' => 14, 'test_field_int_2' => 15, 'test_field_int_3' => 16, 'test_field_int_4' => 17}]
|
348
|
+
|
349
|
+
expect(buffer.entries).to eq []
|
350
|
+
end
|
351
|
+
end
|
283
352
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: multisert
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-04-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mysql2
|