multisert 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +37 -5
- data/lib/multisert.rb +18 -11
- data/lib/multisert/version.rb +1 -1
- data/multisert.gemspec +1 -1
- data/performance/multisert_performance_test.rb +9 -2
- data/spec/multisert_spec.rb +55 -36
- metadata +2 -2
data/README.md
CHANGED
@@ -27,7 +27,7 @@ CREATE TABLE IF NOT EXISTS some_database.some_table (
|
|
27
27
|
field_2 int default null,
|
28
28
|
field_3 int default null,
|
29
29
|
field_4 int default null
|
30
|
-
);
|
30
|
+
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
|
31
31
|
```
|
32
32
|
|
33
33
|
Now let's say we want to insert 1,000,000 records after running the
|
@@ -57,22 +57,24 @@ buffer = Multisert.new connection: dbclient,
|
|
57
57
|
res = some_magical_calculation(i)
|
58
58
|
buffer << res
|
59
59
|
end
|
60
|
-
buffer.
|
60
|
+
buffer.write!
|
61
61
|
```
|
62
62
|
|
63
63
|
We start by creating a new Multisert instance, providing the database
|
64
64
|
connection, database and table, and fields as attributes. Next, as we get the
|
65
65
|
results from `some_magical_calculation`, we shovel each into the Multisert
|
66
66
|
instance. As we iterate through, the Multisert instance will build up the
|
67
|
-
records and then
|
67
|
+
records and then write itself to the specified database table when it hits an
|
68
68
|
internal count (default is 10_000, but can be set via the `max_buffer_count`
|
69
|
-
attribute). One last thing to note is the `buffer.
|
69
|
+
attribute). One last thing to note is the `buffer.write!` at the end of the
|
70
70
|
script. This ensures that any pending entries are written to the database table
|
71
|
-
that were not automatically taken care of by the auto-
|
71
|
+
that were not automatically taken care of by the auto-write that will kick in
|
72
72
|
during the iteration.
|
73
73
|
|
74
74
|
## Performance
|
75
75
|
|
76
|
+
### Individual vs Buffer
|
77
|
+
|
76
78
|
The gem has a quick performance test built in that can be run via:
|
77
79
|
```bash
|
78
80
|
$ ruby ./performance/multisert_performance_test
|
@@ -117,6 +119,36 @@ The performance test was run on a computer with the following specs:
|
|
117
119
|
L3 Cache: 3 MB
|
118
120
|
Memory: 4 GB
|
119
121
|
|
122
|
+
All data was written to a mysql instance on localhost.
|
123
|
+
|
124
|
+
### Buffer Sizes
|
125
|
+
|
126
|
+
Let's take a look at how buffer size comes into play.
|
127
|
+
|
128
|
+
We ran 3 separate and independent tests on the same computer as above.
|
129
|
+
Additionally, also note that a buffer size of 0 and 1 are basically identical.
|
130
|
+
|
131
|
+
If we look at using a buffer size ranging from 0 - 10, we see the following
|
132
|
+
performance:
|
133
|
+
|
134
|
+
<img src="https://raw.github.com/jeffreyiacono/images/master/multisert/multisert-performance-test-0-10.png" width="900" alt="Buffer size: 0 - 10" />
|
135
|
+
|
136
|
+
If we take a step back and look at buffer sizes ranging from 0 - 100, we see the
|
137
|
+
following performance:
|
138
|
+
|
139
|
+
<img src="https://raw.github.com/jeffreyiacono/images/master/multisert/multisert-performance-test-0-100.png" width="900" alt="Buffer size: 0 - 100" />
|
140
|
+
|
141
|
+
Finally, if we look at buffer sizes ranging from 0 - 1,000 and 0 - 10,000 we see
|
142
|
+
the following performance (spoiler alert: not much difference, just more data
|
143
|
+
points!):
|
144
|
+
|
145
|
+
<img src="https://raw.github.com/jeffreyiacono/images/master/multisert/multisert-performance-test-0-1000.png" width="900" alt="Buffer size: 0 - 100" />
|
146
|
+
|
147
|
+
<img src="https://raw.github.com/jeffreyiacono/images/master/multisert/multisert-performance-test-0-10000.png" width="900" alt="Buffer size: 0 - 100" />
|
148
|
+
|
149
|
+
As can be seen, we see vastly improved performance as we increment our buffer
|
150
|
+
from 0 - 100, but then level off thereafter.
|
151
|
+
|
120
152
|
## FAQ
|
121
153
|
|
122
154
|
### Packet Too Large / Connection Lost Errors
|
data/lib/multisert.rb
CHANGED
@@ -18,37 +18,44 @@ class Multisert
|
|
18
18
|
end
|
19
19
|
|
20
20
|
def entries
|
21
|
-
|
21
|
+
buffer
|
22
22
|
end
|
23
23
|
|
24
24
|
def << entry
|
25
25
|
entries << entry
|
26
|
-
|
26
|
+
write_buffer! if write_buffer?
|
27
27
|
entry
|
28
28
|
end
|
29
29
|
|
30
|
-
def
|
30
|
+
def write_buffer!
|
31
31
|
return if buffer_empty?
|
32
32
|
@connection.query multisert_sql
|
33
|
-
|
33
|
+
reset_buffer!
|
34
34
|
end
|
35
35
|
|
36
|
+
alias_method :write!, :write_buffer!
|
37
|
+
alias_method :flush!, :write_buffer!
|
38
|
+
|
36
39
|
def max_buffer_count
|
37
40
|
@max_buffer_count || MAX_BUFFER_COUNT_DEFAULT
|
38
41
|
end
|
39
42
|
|
40
43
|
private
|
41
44
|
|
42
|
-
def
|
43
|
-
|
45
|
+
def buffer
|
46
|
+
@buffer ||= []
|
44
47
|
end
|
45
48
|
|
46
|
-
def
|
47
|
-
|
49
|
+
def reset_buffer!
|
50
|
+
@buffer = []
|
51
|
+
end
|
52
|
+
|
53
|
+
def buffer_empty?
|
54
|
+
buffer.empty?
|
48
55
|
end
|
49
56
|
|
50
|
-
def
|
51
|
-
|
57
|
+
def write_buffer?
|
58
|
+
buffer.count >= max_buffer_count
|
52
59
|
end
|
53
60
|
|
54
61
|
def multisert_sql
|
@@ -60,7 +67,7 @@ private
|
|
60
67
|
end
|
61
68
|
|
62
69
|
def multisert_values
|
63
|
-
@
|
70
|
+
@buffer.reduce([]) { |memo, entries|
|
64
71
|
memo << "(#{entries.map { |e| cast e }.join(',')})"
|
65
72
|
memo
|
66
73
|
}.join(",")
|
data/lib/multisert/version.rb
CHANGED
data/multisert.gemspec
CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |gem|
|
|
8
8
|
gem.summary = %q{Buffer to handle bulk INSERTs}
|
9
9
|
gem.homepage = "https://github.com/jeffreyiacono/multisert"
|
10
10
|
|
11
|
-
gem.files = `git ls-files`.split($\)
|
11
|
+
gem.files = `git ls-files`.split($\).delete_if { |f| f =~ /^data\// }
|
12
12
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
13
13
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
14
|
gem.name = "multisert"
|
@@ -79,7 +79,14 @@ sample_records = generate_records
|
|
79
79
|
|
80
80
|
puts_with_time "starting performance test: using #{sample_records.count} random entries, writing to #{PERFORMANCE_DESTINATION}"
|
81
81
|
|
82
|
-
#
|
83
|
-
|
82
|
+
# individual insert vs multisert
|
83
|
+
insert_performance_test CONNECTION, cleaner, sample_records, PERFORMANCE_DESTINATION
|
84
|
+
multinsert_performance_test CONNECTION, cleaner, sample_records, PERFORMANCE_DESTINATION, 10_000
|
85
|
+
|
86
|
+
mini_steps = (0..9)
|
87
|
+
big_steps = (10..10_000).step(10)
|
88
|
+
|
89
|
+
# buffer size performance test
|
90
|
+
[*mini_steps, *big_steps].each do |i|
|
84
91
|
multinsert_performance_test CONNECTION, cleaner, sample_records, PERFORMANCE_DESTINATION, i
|
85
92
|
end
|
data/spec/multisert_spec.rb
CHANGED
@@ -28,18 +28,18 @@ describe Multisert do
|
|
28
28
|
|
29
29
|
it "addes to the entries" do
|
30
30
|
buffer << [1, 2, 3]
|
31
|
-
buffer.entries.
|
31
|
+
expect(buffer.entries).to eq [[1, 2, 3]]
|
32
32
|
end
|
33
33
|
|
34
34
|
it "calls #flush! when the number of entries equals (or exceeds) max buffer count" do
|
35
35
|
buffer.max_buffer_count = 2
|
36
|
-
buffer.should_receive(:
|
36
|
+
buffer.should_receive(:write_buffer!)
|
37
37
|
buffer << [1, 2, 3]
|
38
38
|
buffer << [1, 2, 3]
|
39
39
|
end
|
40
40
|
end
|
41
41
|
|
42
|
-
describe "#
|
42
|
+
describe "#write_buffer!" do
|
43
43
|
let(:connection) { $connection }
|
44
44
|
let(:buffer) { described_class.new }
|
45
45
|
|
@@ -48,19 +48,19 @@ describe Multisert do
|
|
48
48
|
end
|
49
49
|
|
50
50
|
it "does not fall over when there are no entries" do
|
51
|
-
|
52
|
-
|
51
|
+
write_buffer_records = connection.query "DELETE FROM #{TEST_DATABASE}.#{TEST_TABLE}"
|
52
|
+
expect(write_buffer_records.to_a).to eq []
|
53
53
|
|
54
|
-
buffer.
|
54
|
+
buffer.write_buffer!
|
55
55
|
|
56
|
-
|
57
|
-
|
58
|
-
buffer.entries.
|
56
|
+
write_buffer_records = connection.query "SELECT * FROM #{TEST_DATABASE}.#{TEST_TABLE}"
|
57
|
+
expect(write_buffer_records.to_a).to eq []
|
58
|
+
expect(buffer.entries).to eq []
|
59
59
|
end
|
60
60
|
|
61
61
|
it "multi-inserts all added entries" do
|
62
|
-
|
63
|
-
|
62
|
+
pre_write_buffer_records = connection.query "SELECT * FROM #{TEST_DATABASE}.#{TEST_TABLE}"
|
63
|
+
expect(pre_write_buffer_records.to_a).to eq []
|
64
64
|
|
65
65
|
buffer.connection = connection
|
66
66
|
buffer.database = TEST_DATABASE
|
@@ -75,9 +75,9 @@ describe Multisert do
|
|
75
75
|
buffer << [10, 11, 12, 13]
|
76
76
|
buffer << [14, 15, 16, 17]
|
77
77
|
|
78
|
-
buffer.
|
78
|
+
buffer.write_buffer!
|
79
79
|
|
80
|
-
|
80
|
+
post_write_buffer_records = connection.query %[
|
81
81
|
SELECT
|
82
82
|
test_field_int_1
|
83
83
|
, test_field_int_2
|
@@ -85,18 +85,19 @@ describe Multisert do
|
|
85
85
|
, test_field_int_4
|
86
86
|
FROM #{TEST_DATABASE}.#{TEST_TABLE}]
|
87
87
|
|
88
|
-
|
88
|
+
expect(post_write_buffer_records.to_a).to eq [
|
89
89
|
{'test_field_int_1' => 1, 'test_field_int_2' => 3, 'test_field_int_3' => 4, 'test_field_int_4' => 5},
|
90
90
|
{'test_field_int_1' => 6, 'test_field_int_2' => 7, 'test_field_int_3' => 8, 'test_field_int_4' => 9},
|
91
91
|
{'test_field_int_1' => 10, 'test_field_int_2' => 11, 'test_field_int_3' => 12, 'test_field_int_4' => 13},
|
92
|
-
{'test_field_int_1' => 14, 'test_field_int_2' => 15, 'test_field_int_3' => 16, 'test_field_int_4' => 17}
|
92
|
+
{'test_field_int_1' => 14, 'test_field_int_2' => 15, 'test_field_int_3' => 16, 'test_field_int_4' => 17}
|
93
|
+
]
|
93
94
|
|
94
|
-
buffer.entries.
|
95
|
+
expect(buffer.entries).to eq []
|
95
96
|
end
|
96
97
|
|
97
98
|
it "works with strings" do
|
98
|
-
|
99
|
-
|
99
|
+
pre_write_buffer_records = connection.query "SELECT * FROM #{TEST_DATABASE}.#{TEST_TABLE}"
|
100
|
+
expect(pre_write_buffer_records.to_a).to eq []
|
100
101
|
|
101
102
|
buffer.connection = connection
|
102
103
|
buffer.database = TEST_DATABASE
|
@@ -108,23 +109,24 @@ describe Multisert do
|
|
108
109
|
buffer << ['c']
|
109
110
|
buffer << ['d']
|
110
111
|
|
111
|
-
buffer.
|
112
|
+
buffer.write_buffer!
|
112
113
|
|
113
|
-
|
114
|
-
|
114
|
+
post_write_buffer_records = connection.query %[SELECT test_field_varchar FROM #{TEST_DATABASE}.#{TEST_TABLE}]
|
115
|
+
expect(post_write_buffer_records.to_a).to eq [
|
115
116
|
{'test_field_varchar' => 'a'},
|
116
117
|
{'test_field_varchar' => 'b'},
|
117
118
|
{'test_field_varchar' => 'c'},
|
118
|
-
{'test_field_varchar' => 'd'}
|
119
|
+
{'test_field_varchar' => 'd'}
|
120
|
+
]
|
119
121
|
|
120
|
-
buffer.entries.
|
122
|
+
expect(buffer.entries).to eq []
|
121
123
|
end
|
122
124
|
|
123
125
|
it "works with strings that have illegal characters"
|
124
126
|
|
125
127
|
it "works with dates" do
|
126
|
-
|
127
|
-
|
128
|
+
pre_write_buffer_records = connection.query "SELECT * FROM #{TEST_DATABASE}.#{TEST_TABLE}"
|
129
|
+
expect(pre_write_buffer_records.to_a).to eq []
|
128
130
|
|
129
131
|
buffer.connection = connection
|
130
132
|
buffer.database = TEST_DATABASE
|
@@ -136,22 +138,23 @@ describe Multisert do
|
|
136
138
|
buffer << [Date.new(2013, 1, 17)]
|
137
139
|
buffer << [Date.new(2013, 1, 18)]
|
138
140
|
|
139
|
-
buffer.
|
141
|
+
buffer.write_buffer!
|
140
142
|
|
141
|
-
|
143
|
+
post_write_buffer_records = connection.query %[SELECT test_field_date FROM #{TEST_DATABASE}.#{TEST_TABLE}]
|
142
144
|
|
143
|
-
|
145
|
+
expect(post_write_buffer_records.to_a).to eq [
|
144
146
|
{'test_field_date' => Date.parse('2013-01-15')},
|
145
147
|
{'test_field_date' => Date.parse('2013-01-16')},
|
146
148
|
{'test_field_date' => Date.parse('2013-01-17')},
|
147
|
-
{'test_field_date' => Date.parse('2013-01-18')}
|
149
|
+
{'test_field_date' => Date.parse('2013-01-18')}
|
150
|
+
]
|
148
151
|
|
149
|
-
buffer.entries.
|
152
|
+
expect(buffer.entries).to eq []
|
150
153
|
end
|
151
154
|
|
152
155
|
it "works with times" do
|
153
|
-
|
154
|
-
|
156
|
+
pre_write_buffer_records = connection.query "SELECT * FROM #{TEST_DATABASE}.#{TEST_TABLE}"
|
157
|
+
expect(pre_write_buffer_records.to_a).to eq []
|
155
158
|
|
156
159
|
buffer.connection = connection
|
157
160
|
buffer.database = TEST_DATABASE
|
@@ -163,17 +166,33 @@ describe Multisert do
|
|
163
166
|
buffer << [Time.new(2013, 1, 17, 3, 7, 33)]
|
164
167
|
buffer << [Time.new(2013, 1, 18, 4, 8, 44)]
|
165
168
|
|
166
|
-
buffer.
|
169
|
+
buffer.write_buffer!
|
167
170
|
|
168
|
-
|
171
|
+
post_write_buffer_records = connection.query %[SELECT test_field_datetime FROM #{TEST_DATABASE}.#{TEST_TABLE}]
|
169
172
|
|
170
|
-
|
173
|
+
expect(post_write_buffer_records.to_a).to eq [
|
171
174
|
{'test_field_datetime' => Time.new(2013, 1, 15, 1, 5, 11)},
|
172
175
|
{'test_field_datetime' => Time.new(2013, 1, 16, 2, 6, 22)},
|
173
176
|
{'test_field_datetime' => Time.new(2013, 1, 17, 3, 7, 33)},
|
174
177
|
{'test_field_datetime' => Time.new(2013, 1, 18, 4, 8, 44)}]
|
175
178
|
|
176
|
-
buffer.entries.
|
179
|
+
expect(buffer.entries).to eq []
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
describe "#flush!" do
|
184
|
+
it "aliases #write_buffer!" do
|
185
|
+
instance = described_class.new
|
186
|
+
flush_method = instance.method(:flush!)
|
187
|
+
expect(flush_method).to eq instance.method(:write_buffer!)
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
describe "#write!" do
|
192
|
+
it "aliases #write_buffer!" do
|
193
|
+
instance = described_class.new
|
194
|
+
flush_method = instance.method(:write!)
|
195
|
+
expect(flush_method).to eq instance.method(:write_buffer!)
|
177
196
|
end
|
178
197
|
end
|
179
198
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: multisert
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-03-
|
12
|
+
date: 2013-03-13 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mysql2
|