multisert 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -53,6 +53,23 @@ buffer = Multisert.new connection: dbclient,
53
53
  table: 'some_table',
54
54
  fields: ['field_1', 'field_2', 'field_3', 'field_4']
55
55
 
56
+ buffer.with_buffering do |buffer|
57
+ (0..1_000_000).each do |i|
58
+ res = some_magical_calculation(i)
59
+ buffer << res
60
+ end
61
+ end
62
+ ```
63
+
64
+ We start by creating a new Multisert instance, providing the database
65
+ connection, database and table, and fields as attributes. Next, we leverage
66
+ `#with_buffering` to wrap our sample iteration. Within the block, we shovel the
67
+ results from `some_magical_calculation` into the Multisert instance, which then
68
+ handles all the heavy lifting in terms of writing to the database.
69
+
70
+ As an aside, `#with_buffering` is handling the following under the hood:
71
+
72
+ ```ruby
56
73
  (0..1_000_000).each do |i|
57
74
  res = some_magical_calculation(i)
58
75
  buffer << res
@@ -60,16 +77,13 @@ end
60
77
  buffer.write!
61
78
  ```
62
79
 
63
- We start by creating a new Multisert instance, providing the database
64
- connection, database and table, and fields as attributes. Next, as we get the
65
- results from `some_magical_calculation`, we shovel each into the Multisert
66
- instance. As we iterate through, the Multisert instance will build up the
67
- records and then write itself to the specified database table when it hits an
68
- internal count (default is 10_000, but can be set via the `max_buffer_count`
69
- attribute). One last thing to note is the `buffer.write!` at the end of the
70
- script. This ensures that any pending entries are written to the database table
71
- that were not automatically taken care of by the auto-write that will kick in
72
- during the iteration.
80
+ As we iterate through, the Multisert instance will build up the records and
81
+ then write itself to the specified database table when it hits an internal
82
+ count (default is 10_000 entries, but this can be adjusted via the
83
+ `max_buffer_count` attribute). The `buffer.write!` at the end ensures that
84
+ any pending entries are written to the database table that were not
85
+ automatically taken care of by the auto-write that will kick in during the
86
+ iteration.
73
87
 
74
88
  ## Insert Strategies
75
89
 
@@ -43,6 +43,11 @@ class Multisert
43
43
  @max_buffer_count || MAX_BUFFER_COUNT_DEFAULT
44
44
  end
45
45
 
46
+ def with_buffering &block
47
+ yield self
48
+ write_buffer!
49
+ end
50
+
46
51
  private
47
52
 
48
53
  def insert_strategy?
@@ -1,3 +1,3 @@
1
1
  class Multisert
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
@@ -38,7 +38,7 @@ def insert_performance_test connection, cleaner, sample_records, destination
38
38
  sample_records.each do |record|
39
39
  connection.query %[
40
40
  INSERT INTO #{destination} (#{fields})
41
- VALUES (#{record.map { |k,v| v }.join(', ')})]
41
+ VALUES (#{record.values.join(', ')})]
42
42
  end
43
43
  runtime = timer.stop!
44
44
  ensure_data_completeness! connection, destination, sample_records.count
@@ -57,10 +57,11 @@ def multinsert_performance_test connection, cleaner, sample_records, destination
57
57
  cleaner.ensure_clean_database!
58
58
 
59
59
  (timer = Timer.new).start!
60
- sample_records.each do |record|
61
- buffer << record.map { |k, v| v }
60
+ buffer.with_buffering do |buffer|
61
+ sample_records.each do |record|
62
+ buffer << record.values
63
+ end
62
64
  end
63
- buffer.flush!
64
65
  runtime = timer.stop!
65
66
  ensure_data_completeness! connection, destination, sample_records.count
66
67
  puts "multisert w/ buffer of #{buffer.max_buffer_count} took #{runtime.round(2)}s to insert #{sample_records.count} entries"
@@ -7,12 +7,22 @@ TEST_DATABASE = 'multisert_test'
7
7
  TEST_TABLE = 'test_data'
8
8
  TEST_INDEXED_TABLE = 'test_indexed_data'
9
9
 
10
+ def test_table
11
+ "#{TEST_DATABASE}.#{TEST_TABLE}"
12
+ end
13
+
14
+ def test_indexed_table
15
+ "#{TEST_DATABASE}.#{TEST_INDEXED_TABLE}"
16
+ end
17
+
10
18
  # TODO: make into yaml config
11
19
  $connection = Mysql2::Client.new(host: 'localhost', username: 'root')
12
20
 
13
- $cleaner = MultisertSpec::MrClean.new(database: TEST_DATABASE, connection: $connection) do |mgr|
21
+ $cleaner = MultisertSpec::MrClean.new(database: TEST_DATABASE,
22
+ connection: $connection) do |mgr|
23
+
14
24
  mgr.create_table_schemas << %[
15
- CREATE TABLE IF NOT EXISTS #{mgr.database}.#{TEST_TABLE} (
25
+ CREATE TABLE IF NOT EXISTS #{test_table} (
16
26
  test_field_int_1 int default null,
17
27
  test_field_int_2 int default null,
18
28
  test_field_int_3 int default null,
@@ -22,7 +32,7 @@ $cleaner = MultisertSpec::MrClean.new(database: TEST_DATABASE, connection: $conn
22
32
  test_field_datetime DATETIME default null)]
23
33
 
24
34
  mgr.create_table_schemas << %[
25
- CREATE TABLE IF NOT EXISTS #{mgr.database}.#{TEST_INDEXED_TABLE} (
35
+ CREATE TABLE IF NOT EXISTS #{test_indexed_table} (
26
36
  test_id int not null,
27
37
  test_field varchar(15) default null,
28
38
  primary key (test_id))]
@@ -54,17 +64,17 @@ describe Multisert do
54
64
  end
55
65
 
56
66
  it "does not fall over when there are no entries" do
57
- connection.query "DELETE FROM #{TEST_DATABASE}.#{TEST_TABLE}"
67
+ connection.query "DELETE FROM #{test_table}"
58
68
 
59
69
  buffer.write_buffer!
60
70
 
61
- write_buffer_records = connection.query "SELECT * FROM #{TEST_DATABASE}.#{TEST_TABLE}"
71
+ write_buffer_records = connection.query "SELECT * FROM #{test_table}"
62
72
  expect(write_buffer_records.to_a).to eq []
63
73
  expect(buffer.entries).to eq []
64
74
  end
65
75
 
66
76
  it "multi-inserts all added entries and clears #entries" do
67
- pre_write_buffer_records = connection.query "SELECT * FROM #{TEST_DATABASE}.#{TEST_TABLE}"
77
+ pre_write_buffer_records = connection.query "SELECT * FROM #{test_table}"
68
78
  expect(pre_write_buffer_records.to_a).to eq []
69
79
 
70
80
  buffer.connection = connection
@@ -88,7 +98,7 @@ describe Multisert do
88
98
  , test_field_int_2
89
99
  , test_field_int_3
90
100
  , test_field_int_4
91
- FROM #{TEST_DATABASE}.#{TEST_TABLE}]
101
+ FROM #{test_table}]
92
102
 
93
103
  expect(post_write_buffer_records.to_a).to eq [
94
104
  {'test_field_int_1' => 1, 'test_field_int_2' => 3, 'test_field_int_3' => 4, 'test_field_int_4' => 5},
@@ -100,7 +110,7 @@ describe Multisert do
100
110
  end
101
111
 
102
112
  it "works with strings" do
103
- pre_write_buffer_records = connection.query "SELECT * FROM #{TEST_DATABASE}.#{TEST_TABLE}"
113
+ pre_write_buffer_records = connection.query "SELECT * FROM #{test_table}"
104
114
  expect(pre_write_buffer_records.to_a).to eq []
105
115
 
106
116
  buffer.connection = connection
@@ -115,7 +125,7 @@ describe Multisert do
115
125
 
116
126
  buffer.write_buffer!
117
127
 
118
- post_write_buffer_records = connection.query %[SELECT test_field_varchar FROM #{TEST_DATABASE}.#{TEST_TABLE}]
128
+ post_write_buffer_records = connection.query %[SELECT test_field_varchar FROM #{test_table}]
119
129
  expect(post_write_buffer_records.to_a).to eq [
120
130
  {'test_field_varchar' => 'a'},
121
131
  {'test_field_varchar' => 'b'},
@@ -128,7 +138,7 @@ describe Multisert do
128
138
  it "works with strings that have illegal characters"
129
139
 
130
140
  it "works with dates" do
131
- pre_write_buffer_records = connection.query "SELECT * FROM #{TEST_DATABASE}.#{TEST_TABLE}"
141
+ pre_write_buffer_records = connection.query "SELECT * FROM #{test_table}"
132
142
  expect(pre_write_buffer_records.to_a).to eq []
133
143
 
134
144
  buffer.connection = connection
@@ -143,7 +153,7 @@ describe Multisert do
143
153
 
144
154
  buffer.write_buffer!
145
155
 
146
- post_write_buffer_records = connection.query %[SELECT test_field_date FROM #{TEST_DATABASE}.#{TEST_TABLE}]
156
+ post_write_buffer_records = connection.query %[SELECT test_field_date FROM #{test_table}]
147
157
 
148
158
  expect(post_write_buffer_records.to_a).to eq [
149
159
  {'test_field_date' => Date.parse('2013-01-15')},
@@ -155,7 +165,7 @@ describe Multisert do
155
165
  end
156
166
 
157
167
  it "works with times" do
158
- pre_write_buffer_records = connection.query "SELECT * FROM #{TEST_DATABASE}.#{TEST_TABLE}"
168
+ pre_write_buffer_records = connection.query "SELECT * FROM #{test_table}"
159
169
  expect(pre_write_buffer_records.to_a).to eq []
160
170
 
161
171
  buffer.connection = connection
@@ -170,7 +180,7 @@ describe Multisert do
170
180
 
171
181
  buffer.write_buffer!
172
182
 
173
- post_write_buffer_records = connection.query %[SELECT test_field_datetime FROM #{TEST_DATABASE}.#{TEST_TABLE}]
183
+ post_write_buffer_records = connection.query %[SELECT test_field_datetime FROM #{test_table}]
174
184
 
175
185
  expect(post_write_buffer_records.to_a).to eq [
176
186
  {'test_field_datetime' => Time.new(2013, 1, 15, 1, 5, 11)},
@@ -208,7 +218,7 @@ describe Multisert do
208
218
 
209
219
  context "set to replace" do
210
220
  it "writes over an existing record with the same primary / unique key" do
211
- connection.query %[INSERT INTO #{TEST_DATABASE}.#{TEST_INDEXED_TABLE} (test_id, test_field)
221
+ connection.query %[INSERT INTO #{test_indexed_table} (test_id, test_field)
212
222
  VALUES (1, 'ONE'), (2, 'TWO')]
213
223
 
214
224
  buffer.connection = connection
@@ -218,6 +228,7 @@ describe Multisert do
218
228
  buffer.insert_strategy = :replace
219
229
 
220
230
  buffer << [1, 'SOMETHING NEW']
231
+ buffer << [3, 'ALSO NEW']
221
232
 
222
233
  buffer.write_buffer!
223
234
 
@@ -225,17 +236,18 @@ describe Multisert do
225
236
  SELECT
226
237
  test_id
227
238
  , test_field
228
- FROM #{TEST_DATABASE}.#{TEST_INDEXED_TABLE}]
239
+ FROM #{test_indexed_table}]
229
240
 
230
241
  expect(post_write_buffer_records.to_a).to eq [
231
242
  {'test_id' => 1, 'test_field' => 'SOMETHING NEW'},
232
- {'test_id' => 2, 'test_field' => 'TWO'}]
243
+ {'test_id' => 2, 'test_field' => 'TWO'},
244
+ {'test_id' => 3, 'test_field' => 'ALSO NEW'}]
233
245
  end
234
246
  end
235
247
 
236
248
  context "set to ignore" do
237
249
  before do
238
- connection.query %[INSERT INTO #{TEST_DATABASE}.#{TEST_INDEXED_TABLE} (test_id, test_field)
250
+ connection.query %[INSERT INTO #{test_indexed_table} (test_id, test_field)
239
251
  VALUES (1, 'ONE'), (2, 'TWO')]
240
252
 
241
253
  buffer.connection = connection
@@ -245,6 +257,7 @@ describe Multisert do
245
257
  buffer.insert_strategy = :ignore
246
258
 
247
259
  buffer << [1, 'NEW']
260
+ buffer << [3, 'ALSO NEW']
248
261
  end
249
262
 
250
263
  it "does not raise an error" do
@@ -258,11 +271,12 @@ describe Multisert do
258
271
  SELECT
259
272
  test_id
260
273
  , test_field
261
- FROM #{TEST_DATABASE}.#{TEST_INDEXED_TABLE}]
274
+ FROM #{test_indexed_table}]
262
275
 
263
276
  expect(post_write_buffer_records.to_a).to eq [
264
277
  {'test_id' => 1, 'test_field' => 'ONE'},
265
- {'test_id' => 2, 'test_field' => 'TWO'}]
278
+ {'test_id' => 2, 'test_field' => 'TWO'},
279
+ {'test_id' => 3, 'test_field' => 'ALSO NEW'}]
266
280
  end
267
281
  end
268
282
 
@@ -280,4 +294,59 @@ describe Multisert do
280
294
  end
281
295
  end
282
296
  end
297
+
298
+ describe "#with_buffering" do
299
+ let(:connection) { $connection }
300
+ let(:buffer) { described_class.new }
301
+
302
+ before do
303
+ $cleaner.ensure_clean_database! teardown_tables: (!!ENV['TEARDOWN'] || false)
304
+ end
305
+
306
+ it "ensures all records are inserted" do
307
+ pre_write_buffer_records = connection.query "SELECT * FROM #{test_table}"
308
+ expect(pre_write_buffer_records.to_a).to eq []
309
+
310
+ buffer.connection = connection
311
+ buffer.database = TEST_DATABASE
312
+ buffer.table = TEST_TABLE
313
+ buffer.fields = ['test_field_int_1',
314
+ 'test_field_int_2',
315
+ 'test_field_int_3',
316
+ 'test_field_int_4']
317
+
318
+
319
+
320
+ sample_entries = [[ 1, 3, 4, 5],
321
+ [ 6, 7, 8, 9],
322
+ [10, 11, 12, 13],
323
+ [14, 15, 16, 17]]
324
+
325
+ # set this to 1 less than the total number of entries we want to buffer.
326
+ # test that all are written w/o explicitly #write!
327
+ buffer.max_buffer_count = sample_entries.length - 1
328
+
329
+ buffer.with_buffering do |buffer|
330
+ sample_entries.each do |entry|
331
+ buffer << entry.to_a
332
+ end
333
+ end
334
+
335
+ post_write_buffer_records = connection.query %[
336
+ SELECT
337
+ test_field_int_1
338
+ , test_field_int_2
339
+ , test_field_int_3
340
+ , test_field_int_4
341
+ FROM #{test_table}]
342
+
343
+ expect(post_write_buffer_records.to_a).to eq [
344
+ {'test_field_int_1' => 1, 'test_field_int_2' => 3, 'test_field_int_3' => 4, 'test_field_int_4' => 5},
345
+ {'test_field_int_1' => 6, 'test_field_int_2' => 7, 'test_field_int_3' => 8, 'test_field_int_4' => 9},
346
+ {'test_field_int_1' => 10, 'test_field_int_2' => 11, 'test_field_int_3' => 12, 'test_field_int_4' => 13},
347
+ {'test_field_int_1' => 14, 'test_field_int_2' => 15, 'test_field_int_3' => 16, 'test_field_int_4' => 17}]
348
+
349
+ expect(buffer.entries).to eq []
350
+ end
351
+ end
283
352
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: multisert
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-03-27 00:00:00.000000000 Z
12
+ date: 2013-04-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mysql2