multisert 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -53,6 +53,23 @@ buffer = Multisert.new connection: dbclient,
53
53
  table: 'some_table',
54
54
  fields: ['field_1', 'field_2', 'field_3', 'field_4']
55
55
 
56
+ buffer.with_buffering do |buffer|
57
+ (0..1_000_000).each do |i|
58
+ res = some_magical_calculation(i)
59
+ buffer << res
60
+ end
61
+ end
62
+ ```
63
+
64
+ We start by creating a new Multisert instance, providing the database
65
+ connection, database and table, and fields as attributes. Next, we leverage
66
+ `#with_buffering` to wrap our sample iteration. Within the block, we shovel the
67
+ results from `some_magical_calculation` into the Multisert instance, which then
68
+ handles all the heavy lifting in terms of writing to the database.
69
+
70
+ As an aside, `#with_buffering` is handling the following under the hood:
71
+
72
+ ```ruby
56
73
  (0..1_000_000).each do |i|
57
74
  res = some_magical_calculation(i)
58
75
  buffer << res
@@ -60,16 +77,13 @@ end
60
77
  buffer.write!
61
78
  ```
62
79
 
63
- We start by creating a new Multisert instance, providing the database
64
- connection, database and table, and fields as attributes. Next, as we get the
65
- results from `some_magical_calculation`, we shovel each into the Multisert
66
- instance. As we iterate through, the Multisert instance will build up the
67
- records and then write itself to the specified database table when it hits an
68
- internal count (default is 10_000, but can be set via the `max_buffer_count`
69
- attribute). One last thing to note is the `buffer.write!` at the end of the
70
- script. This ensures that any pending entries are written to the database table
71
- that were not automatically taken care of by the auto-write that will kick in
72
- during the iteration.
80
+ As we iterate through, the Multisert instance will build up the records and
81
+ then write itself to the specified database table when it hits an internal
82
+ count (default is 10_000 entries, but this can be adjusted via the
83
+ `max_buffer_count` attribute). The `buffer.write!` at the end ensures that
84
+ any pending entries are written to the database table that were not
85
+ automatically taken care of by the auto-write that will kick in during the
86
+ iteration.
73
87
 
74
88
  ## Insert Strategies
75
89
 
@@ -43,6 +43,11 @@ class Multisert
43
43
  @max_buffer_count || MAX_BUFFER_COUNT_DEFAULT
44
44
  end
45
45
 
46
+ def with_buffering &block
47
+ yield self
48
+ write_buffer!
49
+ end
50
+
46
51
  private
47
52
 
48
53
  def insert_strategy?
@@ -1,3 +1,3 @@
1
1
  class Multisert
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
@@ -38,7 +38,7 @@ def insert_performance_test connection, cleaner, sample_records, destination
38
38
  sample_records.each do |record|
39
39
  connection.query %[
40
40
  INSERT INTO #{destination} (#{fields})
41
- VALUES (#{record.map { |k,v| v }.join(', ')})]
41
+ VALUES (#{record.values.join(', ')})]
42
42
  end
43
43
  runtime = timer.stop!
44
44
  ensure_data_completeness! connection, destination, sample_records.count
@@ -57,10 +57,11 @@ def multinsert_performance_test connection, cleaner, sample_records, destination
57
57
  cleaner.ensure_clean_database!
58
58
 
59
59
  (timer = Timer.new).start!
60
- sample_records.each do |record|
61
- buffer << record.map { |k, v| v }
60
+ buffer.with_buffering do |buffer|
61
+ sample_records.each do |record|
62
+ buffer << record.values
63
+ end
62
64
  end
63
- buffer.flush!
64
65
  runtime = timer.stop!
65
66
  ensure_data_completeness! connection, destination, sample_records.count
66
67
  puts "multisert w/ buffer of #{buffer.max_buffer_count} took #{runtime.round(2)}s to insert #{sample_records.count} entries"
@@ -7,12 +7,22 @@ TEST_DATABASE = 'multisert_test'
7
7
  TEST_TABLE = 'test_data'
8
8
  TEST_INDEXED_TABLE = 'test_indexed_data'
9
9
 
10
+ def test_table
11
+ "#{TEST_DATABASE}.#{TEST_TABLE}"
12
+ end
13
+
14
+ def test_indexed_table
15
+ "#{TEST_DATABASE}.#{TEST_INDEXED_TABLE}"
16
+ end
17
+
10
18
  # TODO: make into yaml config
11
19
  $connection = Mysql2::Client.new(host: 'localhost', username: 'root')
12
20
 
13
- $cleaner = MultisertSpec::MrClean.new(database: TEST_DATABASE, connection: $connection) do |mgr|
21
+ $cleaner = MultisertSpec::MrClean.new(database: TEST_DATABASE,
22
+ connection: $connection) do |mgr|
23
+
14
24
  mgr.create_table_schemas << %[
15
- CREATE TABLE IF NOT EXISTS #{mgr.database}.#{TEST_TABLE} (
25
+ CREATE TABLE IF NOT EXISTS #{test_table} (
16
26
  test_field_int_1 int default null,
17
27
  test_field_int_2 int default null,
18
28
  test_field_int_3 int default null,
@@ -22,7 +32,7 @@ $cleaner = MultisertSpec::MrClean.new(database: TEST_DATABASE, connection: $conn
22
32
  test_field_datetime DATETIME default null)]
23
33
 
24
34
  mgr.create_table_schemas << %[
25
- CREATE TABLE IF NOT EXISTS #{mgr.database}.#{TEST_INDEXED_TABLE} (
35
+ CREATE TABLE IF NOT EXISTS #{test_indexed_table} (
26
36
  test_id int not null,
27
37
  test_field varchar(15) default null,
28
38
  primary key (test_id))]
@@ -54,17 +64,17 @@ describe Multisert do
54
64
  end
55
65
 
56
66
  it "does not fall over when there are no entries" do
57
- connection.query "DELETE FROM #{TEST_DATABASE}.#{TEST_TABLE}"
67
+ connection.query "DELETE FROM #{test_table}"
58
68
 
59
69
  buffer.write_buffer!
60
70
 
61
- write_buffer_records = connection.query "SELECT * FROM #{TEST_DATABASE}.#{TEST_TABLE}"
71
+ write_buffer_records = connection.query "SELECT * FROM #{test_table}"
62
72
  expect(write_buffer_records.to_a).to eq []
63
73
  expect(buffer.entries).to eq []
64
74
  end
65
75
 
66
76
  it "multi-inserts all added entries and clears #entries" do
67
- pre_write_buffer_records = connection.query "SELECT * FROM #{TEST_DATABASE}.#{TEST_TABLE}"
77
+ pre_write_buffer_records = connection.query "SELECT * FROM #{test_table}"
68
78
  expect(pre_write_buffer_records.to_a).to eq []
69
79
 
70
80
  buffer.connection = connection
@@ -88,7 +98,7 @@ describe Multisert do
88
98
  , test_field_int_2
89
99
  , test_field_int_3
90
100
  , test_field_int_4
91
- FROM #{TEST_DATABASE}.#{TEST_TABLE}]
101
+ FROM #{test_table}]
92
102
 
93
103
  expect(post_write_buffer_records.to_a).to eq [
94
104
  {'test_field_int_1' => 1, 'test_field_int_2' => 3, 'test_field_int_3' => 4, 'test_field_int_4' => 5},
@@ -100,7 +110,7 @@ describe Multisert do
100
110
  end
101
111
 
102
112
  it "works with strings" do
103
- pre_write_buffer_records = connection.query "SELECT * FROM #{TEST_DATABASE}.#{TEST_TABLE}"
113
+ pre_write_buffer_records = connection.query "SELECT * FROM #{test_table}"
104
114
  expect(pre_write_buffer_records.to_a).to eq []
105
115
 
106
116
  buffer.connection = connection
@@ -115,7 +125,7 @@ describe Multisert do
115
125
 
116
126
  buffer.write_buffer!
117
127
 
118
- post_write_buffer_records = connection.query %[SELECT test_field_varchar FROM #{TEST_DATABASE}.#{TEST_TABLE}]
128
+ post_write_buffer_records = connection.query %[SELECT test_field_varchar FROM #{test_table}]
119
129
  expect(post_write_buffer_records.to_a).to eq [
120
130
  {'test_field_varchar' => 'a'},
121
131
  {'test_field_varchar' => 'b'},
@@ -128,7 +138,7 @@ describe Multisert do
128
138
  it "works with strings that have illegal characters"
129
139
 
130
140
  it "works with dates" do
131
- pre_write_buffer_records = connection.query "SELECT * FROM #{TEST_DATABASE}.#{TEST_TABLE}"
141
+ pre_write_buffer_records = connection.query "SELECT * FROM #{test_table}"
132
142
  expect(pre_write_buffer_records.to_a).to eq []
133
143
 
134
144
  buffer.connection = connection
@@ -143,7 +153,7 @@ describe Multisert do
143
153
 
144
154
  buffer.write_buffer!
145
155
 
146
- post_write_buffer_records = connection.query %[SELECT test_field_date FROM #{TEST_DATABASE}.#{TEST_TABLE}]
156
+ post_write_buffer_records = connection.query %[SELECT test_field_date FROM #{test_table}]
147
157
 
148
158
  expect(post_write_buffer_records.to_a).to eq [
149
159
  {'test_field_date' => Date.parse('2013-01-15')},
@@ -155,7 +165,7 @@ describe Multisert do
155
165
  end
156
166
 
157
167
  it "works with times" do
158
- pre_write_buffer_records = connection.query "SELECT * FROM #{TEST_DATABASE}.#{TEST_TABLE}"
168
+ pre_write_buffer_records = connection.query "SELECT * FROM #{test_table}"
159
169
  expect(pre_write_buffer_records.to_a).to eq []
160
170
 
161
171
  buffer.connection = connection
@@ -170,7 +180,7 @@ describe Multisert do
170
180
 
171
181
  buffer.write_buffer!
172
182
 
173
- post_write_buffer_records = connection.query %[SELECT test_field_datetime FROM #{TEST_DATABASE}.#{TEST_TABLE}]
183
+ post_write_buffer_records = connection.query %[SELECT test_field_datetime FROM #{test_table}]
174
184
 
175
185
  expect(post_write_buffer_records.to_a).to eq [
176
186
  {'test_field_datetime' => Time.new(2013, 1, 15, 1, 5, 11)},
@@ -208,7 +218,7 @@ describe Multisert do
208
218
 
209
219
  context "set to replace" do
210
220
  it "writes over an existing record with the same primary / unique key" do
211
- connection.query %[INSERT INTO #{TEST_DATABASE}.#{TEST_INDEXED_TABLE} (test_id, test_field)
221
+ connection.query %[INSERT INTO #{test_indexed_table} (test_id, test_field)
212
222
  VALUES (1, 'ONE'), (2, 'TWO')]
213
223
 
214
224
  buffer.connection = connection
@@ -218,6 +228,7 @@ describe Multisert do
218
228
  buffer.insert_strategy = :replace
219
229
 
220
230
  buffer << [1, 'SOMETHING NEW']
231
+ buffer << [3, 'ALSO NEW']
221
232
 
222
233
  buffer.write_buffer!
223
234
 
@@ -225,17 +236,18 @@ describe Multisert do
225
236
  SELECT
226
237
  test_id
227
238
  , test_field
228
- FROM #{TEST_DATABASE}.#{TEST_INDEXED_TABLE}]
239
+ FROM #{test_indexed_table}]
229
240
 
230
241
  expect(post_write_buffer_records.to_a).to eq [
231
242
  {'test_id' => 1, 'test_field' => 'SOMETHING NEW'},
232
- {'test_id' => 2, 'test_field' => 'TWO'}]
243
+ {'test_id' => 2, 'test_field' => 'TWO'},
244
+ {'test_id' => 3, 'test_field' => 'ALSO NEW'}]
233
245
  end
234
246
  end
235
247
 
236
248
  context "set to ignore" do
237
249
  before do
238
- connection.query %[INSERT INTO #{TEST_DATABASE}.#{TEST_INDEXED_TABLE} (test_id, test_field)
250
+ connection.query %[INSERT INTO #{test_indexed_table} (test_id, test_field)
239
251
  VALUES (1, 'ONE'), (2, 'TWO')]
240
252
 
241
253
  buffer.connection = connection
@@ -245,6 +257,7 @@ describe Multisert do
245
257
  buffer.insert_strategy = :ignore
246
258
 
247
259
  buffer << [1, 'NEW']
260
+ buffer << [3, 'ALSO NEW']
248
261
  end
249
262
 
250
263
  it "does not raise an error" do
@@ -258,11 +271,12 @@ describe Multisert do
258
271
  SELECT
259
272
  test_id
260
273
  , test_field
261
- FROM #{TEST_DATABASE}.#{TEST_INDEXED_TABLE}]
274
+ FROM #{test_indexed_table}]
262
275
 
263
276
  expect(post_write_buffer_records.to_a).to eq [
264
277
  {'test_id' => 1, 'test_field' => 'ONE'},
265
- {'test_id' => 2, 'test_field' => 'TWO'}]
278
+ {'test_id' => 2, 'test_field' => 'TWO'},
279
+ {'test_id' => 3, 'test_field' => 'ALSO NEW'}]
266
280
  end
267
281
  end
268
282
 
@@ -280,4 +294,59 @@ describe Multisert do
280
294
  end
281
295
  end
282
296
  end
297
+
298
+ describe "#with_buffering" do
299
+ let(:connection) { $connection }
300
+ let(:buffer) { described_class.new }
301
+
302
+ before do
303
+ $cleaner.ensure_clean_database! teardown_tables: (!!ENV['TEARDOWN'] || false)
304
+ end
305
+
306
+ it "ensures all records are inserted" do
307
+ pre_write_buffer_records = connection.query "SELECT * FROM #{test_table}"
308
+ expect(pre_write_buffer_records.to_a).to eq []
309
+
310
+ buffer.connection = connection
311
+ buffer.database = TEST_DATABASE
312
+ buffer.table = TEST_TABLE
313
+ buffer.fields = ['test_field_int_1',
314
+ 'test_field_int_2',
315
+ 'test_field_int_3',
316
+ 'test_field_int_4']
317
+
318
+
319
+
320
+ sample_entries = [[ 1, 3, 4, 5],
321
+ [ 6, 7, 8, 9],
322
+ [10, 11, 12, 13],
323
+ [14, 15, 16, 17]]
324
+
325
+ # set this to 1 less than the total number of entries we want to buffer.
326
+ # test that all are written w/o explicitly #write!
327
+ buffer.max_buffer_count = sample_entries.length - 1
328
+
329
+ buffer.with_buffering do |buffer|
330
+ sample_entries.each do |entry|
331
+ buffer << entry.to_a
332
+ end
333
+ end
334
+
335
+ post_write_buffer_records = connection.query %[
336
+ SELECT
337
+ test_field_int_1
338
+ , test_field_int_2
339
+ , test_field_int_3
340
+ , test_field_int_4
341
+ FROM #{test_table}]
342
+
343
+ expect(post_write_buffer_records.to_a).to eq [
344
+ {'test_field_int_1' => 1, 'test_field_int_2' => 3, 'test_field_int_3' => 4, 'test_field_int_4' => 5},
345
+ {'test_field_int_1' => 6, 'test_field_int_2' => 7, 'test_field_int_3' => 8, 'test_field_int_4' => 9},
346
+ {'test_field_int_1' => 10, 'test_field_int_2' => 11, 'test_field_int_3' => 12, 'test_field_int_4' => 13},
347
+ {'test_field_int_1' => 14, 'test_field_int_2' => 15, 'test_field_int_3' => 16, 'test_field_int_4' => 17}]
348
+
349
+ expect(buffer.entries).to eq []
350
+ end
351
+ end
283
352
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: multisert
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-03-27 00:00:00.000000000 Z
12
+ date: 2013-04-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mysql2