multisert 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +24 -10
- data/lib/multisert.rb +5 -0
- data/lib/multisert/version.rb +1 -1
- data/performance/multisert_performance_test.rb +5 -4
- data/spec/multisert_spec.rb +88 -19
- metadata +2 -2
data/README.md
CHANGED
@@ -53,6 +53,23 @@ buffer = Multisert.new connection: dbclient,
|
|
53
53
|
table: 'some_table',
|
54
54
|
fields: ['field_1', 'field_2', 'field_3', 'field_4']
|
55
55
|
|
56
|
+
buffer.with_buffering do |buffer|
|
57
|
+
(0..1_000_000).each do |i|
|
58
|
+
res = some_magical_calculation(i)
|
59
|
+
buffer << res
|
60
|
+
end
|
61
|
+
end
|
62
|
+
```
|
63
|
+
|
64
|
+
We start by creating a new Multisert instance, providing the database
|
65
|
+
connection, database and table, and fields as attributes. Next, we leverage
|
66
|
+
`#with_buffering` to wrap our sample iteration. Within the block, we shovel the
|
67
|
+
results from `some_magical_calculation` into the Multisert instance, which then
|
68
|
+
handles all the heavy lifting in terms of writing to the database.
|
69
|
+
|
70
|
+
As an aside, `#with_buffering` is handling the following under the hood:
|
71
|
+
|
72
|
+
```ruby
|
56
73
|
(0..1_000_000).each do |i|
|
57
74
|
res = some_magical_calculation(i)
|
58
75
|
buffer << res
|
@@ -60,16 +77,13 @@ end
|
|
60
77
|
buffer.write!
|
61
78
|
```
|
62
79
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
script. This ensures that any pending entries are written to the database table
|
71
|
-
that were not automatically taken care of by the auto-write that will kick in
|
72
|
-
during the iteration.
|
80
|
+
As we iterate through, the Multisert instance will build up the records and
|
81
|
+
then write itself to the specified database table when it hits an internal
|
82
|
+
count (default is 10_000 entries, but this can be adjusted via the
|
83
|
+
`max_buffer_count` attribute). The `buffer.write!` at the end ensures that
|
84
|
+
any pending entries are written to the database table that were not
|
85
|
+
automatically taken care of by the auto-write that will kick in during the
|
86
|
+
iteration.
|
73
87
|
|
74
88
|
## Insert Strategies
|
75
89
|
|
data/lib/multisert.rb
CHANGED
data/lib/multisert/version.rb
CHANGED
@@ -38,7 +38,7 @@ def insert_performance_test connection, cleaner, sample_records, destination
|
|
38
38
|
sample_records.each do |record|
|
39
39
|
connection.query %[
|
40
40
|
INSERT INTO #{destination} (#{fields})
|
41
|
-
VALUES (#{record.
|
41
|
+
VALUES (#{record.values.join(', ')})]
|
42
42
|
end
|
43
43
|
runtime = timer.stop!
|
44
44
|
ensure_data_completeness! connection, destination, sample_records.count
|
@@ -57,10 +57,11 @@ def multinsert_performance_test connection, cleaner, sample_records, destination
|
|
57
57
|
cleaner.ensure_clean_database!
|
58
58
|
|
59
59
|
(timer = Timer.new).start!
|
60
|
-
|
61
|
-
|
60
|
+
buffer.with_buffering do |buffer|
|
61
|
+
sample_records.each do |record|
|
62
|
+
buffer << record.values
|
63
|
+
end
|
62
64
|
end
|
63
|
-
buffer.flush!
|
64
65
|
runtime = timer.stop!
|
65
66
|
ensure_data_completeness! connection, destination, sample_records.count
|
66
67
|
puts "multisert w/ buffer of #{buffer.max_buffer_count} took #{runtime.round(2)}s to insert #{sample_records.count} entries"
|
data/spec/multisert_spec.rb
CHANGED
@@ -7,12 +7,22 @@ TEST_DATABASE = 'multisert_test'
|
|
7
7
|
TEST_TABLE = 'test_data'
|
8
8
|
TEST_INDEXED_TABLE = 'test_indexed_data'
|
9
9
|
|
10
|
+
def test_table
|
11
|
+
"#{TEST_DATABASE}.#{TEST_TABLE}"
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_indexed_table
|
15
|
+
"#{TEST_DATABASE}.#{TEST_INDEXED_TABLE}"
|
16
|
+
end
|
17
|
+
|
10
18
|
# TODO: make into yaml config
|
11
19
|
$connection = Mysql2::Client.new(host: 'localhost', username: 'root')
|
12
20
|
|
13
|
-
$cleaner = MultisertSpec::MrClean.new(database: TEST_DATABASE,
|
21
|
+
$cleaner = MultisertSpec::MrClean.new(database: TEST_DATABASE,
|
22
|
+
connection: $connection) do |mgr|
|
23
|
+
|
14
24
|
mgr.create_table_schemas << %[
|
15
|
-
CREATE TABLE IF NOT EXISTS #{
|
25
|
+
CREATE TABLE IF NOT EXISTS #{test_table} (
|
16
26
|
test_field_int_1 int default null,
|
17
27
|
test_field_int_2 int default null,
|
18
28
|
test_field_int_3 int default null,
|
@@ -22,7 +32,7 @@ $cleaner = MultisertSpec::MrClean.new(database: TEST_DATABASE, connection: $conn
|
|
22
32
|
test_field_datetime DATETIME default null)]
|
23
33
|
|
24
34
|
mgr.create_table_schemas << %[
|
25
|
-
CREATE TABLE IF NOT EXISTS #{
|
35
|
+
CREATE TABLE IF NOT EXISTS #{test_indexed_table} (
|
26
36
|
test_id int not null,
|
27
37
|
test_field varchar(15) default null,
|
28
38
|
primary key (test_id))]
|
@@ -54,17 +64,17 @@ describe Multisert do
|
|
54
64
|
end
|
55
65
|
|
56
66
|
it "does not fall over when there are no entries" do
|
57
|
-
connection.query "DELETE FROM #{
|
67
|
+
connection.query "DELETE FROM #{test_table}"
|
58
68
|
|
59
69
|
buffer.write_buffer!
|
60
70
|
|
61
|
-
write_buffer_records = connection.query "SELECT * FROM #{
|
71
|
+
write_buffer_records = connection.query "SELECT * FROM #{test_table}"
|
62
72
|
expect(write_buffer_records.to_a).to eq []
|
63
73
|
expect(buffer.entries).to eq []
|
64
74
|
end
|
65
75
|
|
66
76
|
it "multi-inserts all added entries and clears #entries" do
|
67
|
-
pre_write_buffer_records = connection.query "SELECT * FROM #{
|
77
|
+
pre_write_buffer_records = connection.query "SELECT * FROM #{test_table}"
|
68
78
|
expect(pre_write_buffer_records.to_a).to eq []
|
69
79
|
|
70
80
|
buffer.connection = connection
|
@@ -88,7 +98,7 @@ describe Multisert do
|
|
88
98
|
, test_field_int_2
|
89
99
|
, test_field_int_3
|
90
100
|
, test_field_int_4
|
91
|
-
FROM #{
|
101
|
+
FROM #{test_table}]
|
92
102
|
|
93
103
|
expect(post_write_buffer_records.to_a).to eq [
|
94
104
|
{'test_field_int_1' => 1, 'test_field_int_2' => 3, 'test_field_int_3' => 4, 'test_field_int_4' => 5},
|
@@ -100,7 +110,7 @@ describe Multisert do
|
|
100
110
|
end
|
101
111
|
|
102
112
|
it "works with strings" do
|
103
|
-
pre_write_buffer_records = connection.query "SELECT * FROM #{
|
113
|
+
pre_write_buffer_records = connection.query "SELECT * FROM #{test_table}"
|
104
114
|
expect(pre_write_buffer_records.to_a).to eq []
|
105
115
|
|
106
116
|
buffer.connection = connection
|
@@ -115,7 +125,7 @@ describe Multisert do
|
|
115
125
|
|
116
126
|
buffer.write_buffer!
|
117
127
|
|
118
|
-
post_write_buffer_records = connection.query %[SELECT test_field_varchar FROM #{
|
128
|
+
post_write_buffer_records = connection.query %[SELECT test_field_varchar FROM #{test_table}]
|
119
129
|
expect(post_write_buffer_records.to_a).to eq [
|
120
130
|
{'test_field_varchar' => 'a'},
|
121
131
|
{'test_field_varchar' => 'b'},
|
@@ -128,7 +138,7 @@ describe Multisert do
|
|
128
138
|
it "works with strings that have illegal characters"
|
129
139
|
|
130
140
|
it "works with dates" do
|
131
|
-
pre_write_buffer_records = connection.query "SELECT * FROM #{
|
141
|
+
pre_write_buffer_records = connection.query "SELECT * FROM #{test_table}"
|
132
142
|
expect(pre_write_buffer_records.to_a).to eq []
|
133
143
|
|
134
144
|
buffer.connection = connection
|
@@ -143,7 +153,7 @@ describe Multisert do
|
|
143
153
|
|
144
154
|
buffer.write_buffer!
|
145
155
|
|
146
|
-
post_write_buffer_records = connection.query %[SELECT test_field_date FROM #{
|
156
|
+
post_write_buffer_records = connection.query %[SELECT test_field_date FROM #{test_table}]
|
147
157
|
|
148
158
|
expect(post_write_buffer_records.to_a).to eq [
|
149
159
|
{'test_field_date' => Date.parse('2013-01-15')},
|
@@ -155,7 +165,7 @@ describe Multisert do
|
|
155
165
|
end
|
156
166
|
|
157
167
|
it "works with times" do
|
158
|
-
pre_write_buffer_records = connection.query "SELECT * FROM #{
|
168
|
+
pre_write_buffer_records = connection.query "SELECT * FROM #{test_table}"
|
159
169
|
expect(pre_write_buffer_records.to_a).to eq []
|
160
170
|
|
161
171
|
buffer.connection = connection
|
@@ -170,7 +180,7 @@ describe Multisert do
|
|
170
180
|
|
171
181
|
buffer.write_buffer!
|
172
182
|
|
173
|
-
post_write_buffer_records = connection.query %[SELECT test_field_datetime FROM #{
|
183
|
+
post_write_buffer_records = connection.query %[SELECT test_field_datetime FROM #{test_table}]
|
174
184
|
|
175
185
|
expect(post_write_buffer_records.to_a).to eq [
|
176
186
|
{'test_field_datetime' => Time.new(2013, 1, 15, 1, 5, 11)},
|
@@ -208,7 +218,7 @@ describe Multisert do
|
|
208
218
|
|
209
219
|
context "set to replace" do
|
210
220
|
it "writes over an existing record with the same primary / unique key" do
|
211
|
-
connection.query %[INSERT INTO #{
|
221
|
+
connection.query %[INSERT INTO #{test_indexed_table} (test_id, test_field)
|
212
222
|
VALUES (1, 'ONE'), (2, 'TWO')]
|
213
223
|
|
214
224
|
buffer.connection = connection
|
@@ -218,6 +228,7 @@ describe Multisert do
|
|
218
228
|
buffer.insert_strategy = :replace
|
219
229
|
|
220
230
|
buffer << [1, 'SOMETHING NEW']
|
231
|
+
buffer << [3, 'ALSO NEW']
|
221
232
|
|
222
233
|
buffer.write_buffer!
|
223
234
|
|
@@ -225,17 +236,18 @@ describe Multisert do
|
|
225
236
|
SELECT
|
226
237
|
test_id
|
227
238
|
, test_field
|
228
|
-
FROM #{
|
239
|
+
FROM #{test_indexed_table}]
|
229
240
|
|
230
241
|
expect(post_write_buffer_records.to_a).to eq [
|
231
242
|
{'test_id' => 1, 'test_field' => 'SOMETHING NEW'},
|
232
|
-
{'test_id' => 2, 'test_field' => 'TWO'}
|
243
|
+
{'test_id' => 2, 'test_field' => 'TWO'},
|
244
|
+
{'test_id' => 3, 'test_field' => 'ALSO NEW'}]
|
233
245
|
end
|
234
246
|
end
|
235
247
|
|
236
248
|
context "set to ignore" do
|
237
249
|
before do
|
238
|
-
connection.query %[INSERT INTO #{
|
250
|
+
connection.query %[INSERT INTO #{test_indexed_table} (test_id, test_field)
|
239
251
|
VALUES (1, 'ONE'), (2, 'TWO')]
|
240
252
|
|
241
253
|
buffer.connection = connection
|
@@ -245,6 +257,7 @@ describe Multisert do
|
|
245
257
|
buffer.insert_strategy = :ignore
|
246
258
|
|
247
259
|
buffer << [1, 'NEW']
|
260
|
+
buffer << [3, 'ALSO NEW']
|
248
261
|
end
|
249
262
|
|
250
263
|
it "does not raise an error" do
|
@@ -258,11 +271,12 @@ describe Multisert do
|
|
258
271
|
SELECT
|
259
272
|
test_id
|
260
273
|
, test_field
|
261
|
-
FROM #{
|
274
|
+
FROM #{test_indexed_table}]
|
262
275
|
|
263
276
|
expect(post_write_buffer_records.to_a).to eq [
|
264
277
|
{'test_id' => 1, 'test_field' => 'ONE'},
|
265
|
-
{'test_id' => 2, 'test_field' => 'TWO'}
|
278
|
+
{'test_id' => 2, 'test_field' => 'TWO'},
|
279
|
+
{'test_id' => 3, 'test_field' => 'ALSO NEW'}]
|
266
280
|
end
|
267
281
|
end
|
268
282
|
|
@@ -280,4 +294,59 @@ describe Multisert do
|
|
280
294
|
end
|
281
295
|
end
|
282
296
|
end
|
297
|
+
|
298
|
+
describe "#with_buffering" do
|
299
|
+
let(:connection) { $connection }
|
300
|
+
let(:buffer) { described_class.new }
|
301
|
+
|
302
|
+
before do
|
303
|
+
$cleaner.ensure_clean_database! teardown_tables: (!!ENV['TEARDOWN'] || false)
|
304
|
+
end
|
305
|
+
|
306
|
+
it "ensures all records are inserted" do
|
307
|
+
pre_write_buffer_records = connection.query "SELECT * FROM #{test_table}"
|
308
|
+
expect(pre_write_buffer_records.to_a).to eq []
|
309
|
+
|
310
|
+
buffer.connection = connection
|
311
|
+
buffer.database = TEST_DATABASE
|
312
|
+
buffer.table = TEST_TABLE
|
313
|
+
buffer.fields = ['test_field_int_1',
|
314
|
+
'test_field_int_2',
|
315
|
+
'test_field_int_3',
|
316
|
+
'test_field_int_4']
|
317
|
+
|
318
|
+
|
319
|
+
|
320
|
+
sample_entries = [[ 1, 3, 4, 5],
|
321
|
+
[ 6, 7, 8, 9],
|
322
|
+
[10, 11, 12, 13],
|
323
|
+
[14, 15, 16, 17]]
|
324
|
+
|
325
|
+
# set this to 1 less than the total number of entries we want to buffer.
|
326
|
+
# test that all are written w/o explicitly #write!
|
327
|
+
buffer.max_buffer_count = sample_entries.length - 1
|
328
|
+
|
329
|
+
buffer.with_buffering do |buffer|
|
330
|
+
sample_entries.each do |entry|
|
331
|
+
buffer << entry.to_a
|
332
|
+
end
|
333
|
+
end
|
334
|
+
|
335
|
+
post_write_buffer_records = connection.query %[
|
336
|
+
SELECT
|
337
|
+
test_field_int_1
|
338
|
+
, test_field_int_2
|
339
|
+
, test_field_int_3
|
340
|
+
, test_field_int_4
|
341
|
+
FROM #{test_table}]
|
342
|
+
|
343
|
+
expect(post_write_buffer_records.to_a).to eq [
|
344
|
+
{'test_field_int_1' => 1, 'test_field_int_2' => 3, 'test_field_int_3' => 4, 'test_field_int_4' => 5},
|
345
|
+
{'test_field_int_1' => 6, 'test_field_int_2' => 7, 'test_field_int_3' => 8, 'test_field_int_4' => 9},
|
346
|
+
{'test_field_int_1' => 10, 'test_field_int_2' => 11, 'test_field_int_3' => 12, 'test_field_int_4' => 13},
|
347
|
+
{'test_field_int_1' => 14, 'test_field_int_2' => 15, 'test_field_int_3' => 16, 'test_field_int_4' => 17}]
|
348
|
+
|
349
|
+
expect(buffer.entries).to eq []
|
350
|
+
end
|
351
|
+
end
|
283
352
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: multisert
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-04-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mysql2
|