upsert 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +1 -1
- data/README.md +132 -35
- data/lib/upsert.rb +30 -20
- data/lib/upsert/buffer.rb +10 -2
- data/lib/upsert/buffer/mysql2_client.rb +53 -31
- data/lib/upsert/buffer/pg_connection.rb +1 -1
- data/lib/upsert/buffer/pg_connection/column_definition.rb +14 -14
- data/lib/upsert/buffer/sqlite3_database.rb +2 -5
- data/lib/upsert/row.rb +2 -2
- data/lib/upsert/version.rb +1 -1
- data/test/helper.rb +18 -13
- data/test/shared/binary.rb +12 -14
- data/test/shared/correctness.rb +52 -31
- data/test/shared/database.rb +16 -37
- data/test/shared/multibyte.rb +24 -13
- data/test/shared/speed.rb +51 -53
- data/test/shared/threaded.rb +31 -0
- data/test/shared/timezones.rb +19 -21
- data/test/test_mysql2.rb +3 -1
- data/test/test_pg.rb +3 -1
- data/test/test_sqlite.rb +3 -1
- metadata +4 -2
data/LICENSE
CHANGED
data/README.md
CHANGED
|
@@ -1,59 +1,89 @@
|
|
|
1
1
|
# Upsert
|
|
2
2
|
|
|
3
|
-
Finally, all those SQL MERGE tricks codified.
|
|
3
|
+
Finally, all those SQL MERGE tricks codified so that you can do "upsert" on MySQL, PostgreSQL, and Sqlite.
|
|
4
4
|
|
|
5
5
|
## Usage
|
|
6
6
|
|
|
7
|
+
Let's say you have...
|
|
8
|
+
|
|
9
|
+
class Pet < ActiveRecord::Base
|
|
10
|
+
# col :name
|
|
11
|
+
# col :breed
|
|
12
|
+
end
|
|
13
|
+
|
|
7
14
|
### One at a time
|
|
8
15
|
|
|
9
16
|
upsert = Upsert.new Pet.connection, Pet.table_name
|
|
10
|
-
|
|
11
|
-
|
|
17
|
+
selector = {:name => 'Jerry'}
|
|
18
|
+
document = {:breed => 'beagle'}
|
|
19
|
+
upsert.row selector, document
|
|
12
20
|
|
|
13
|
-
### Multiple upserts
|
|
21
|
+
### Multiple upserts bundled together for speed
|
|
14
22
|
|
|
15
|
-
Upsert.
|
|
23
|
+
Upsert.stream(Pet.connection, Pet.table_name) do |upsert|
|
|
24
|
+
# [...]
|
|
16
25
|
upsert.row({:name => 'Jerry'}, :breed => 'beagle')
|
|
26
|
+
# [...]
|
|
17
27
|
upsert.row({:name => 'Pierre'}, :breed => 'tabby')
|
|
28
|
+
# [...]
|
|
18
29
|
end
|
|
19
30
|
|
|
20
|
-
|
|
31
|
+
Rows are buffered in memory until it's efficient to send them to the database.
|
|
32
|
+
|
|
33
|
+
## Real-world usage
|
|
21
34
|
|
|
22
|
-
|
|
35
|
+
<p><a href="http://brighterplanet.com"><img src="https://s3.amazonaws.com/static.brighterplanet.com/assets/logos/flush-left/inline/green/rasterized/brighter_planet-160-transparent.png" alt="Brighter Planet logo"/></a></p>
|
|
23
36
|
|
|
24
|
-
|
|
37
|
+
We use `upsert` for [big data processing at Brighter Planet](http://brighterplanet.com/research) and in production at
|
|
38
|
+
|
|
39
|
+
* [Brighter Planet's impact estimate web service](http://impact.brighterplanet.com)
|
|
40
|
+
* [Brighter Planet's reference data web service](http://data.brighterplanet.com)
|
|
41
|
+
|
|
42
|
+
Originally written to speed up the [`data_miner`](https://github.com/seamusabshere/data_miner) data mining library.
|
|
43
|
+
|
|
44
|
+
## Supported databases
|
|
25
45
|
|
|
26
46
|
### MySQL
|
|
27
47
|
|
|
28
|
-
|
|
48
|
+
Using the [mysql2](https://rubygems.org/gems/mysql2) driver.
|
|
49
|
+
|
|
50
|
+
Upsert.new Mysql2::Connection.new([...]), :pets
|
|
51
|
+
|
|
52
|
+
#### Speed
|
|
53
|
+
|
|
54
|
+
From the tests:
|
|
29
55
|
|
|
30
|
-
Upsert was 47% faster than faking upserts with activerecord-import
|
|
31
56
|
Upsert was 77% faster than find + new/set/save
|
|
32
57
|
Upsert was 84% faster than create + rescue/find/update
|
|
33
58
|
Upsert was 82% faster than find_or_create + update_attributes
|
|
59
|
+
Upsert was 47% faster than faking upserts with activerecord-import
|
|
34
60
|
|
|
35
|
-
|
|
61
|
+
#### SQL MERGE trick
|
|
36
62
|
|
|
37
|
-
|
|
38
|
-
Upsert was 84% faster than find_or_create + update_attributes
|
|
39
|
-
Upsert was 87% faster than create + rescue/find/update
|
|
63
|
+
"ON DUPLICATE KEY UPDATE" where we just set everything to the value of the insert.
|
|
40
64
|
|
|
41
|
-
|
|
65
|
+
# http://dev.mysql.com/doc/refman/5.0/en/insert-on-duplicate.html
|
|
66
|
+
INSERT INTO table (a,b,c) VALUES (1,2,3), (4,5,6)
|
|
67
|
+
ON DUPLICATE KEY UPDATE a=VALUES(a),b=VALUES(b),c=VALUES(c);
|
|
42
68
|
|
|
43
|
-
|
|
44
|
-
2. [sqlite3](https://rubygems.org/gems/sqlite3)
|
|
45
|
-
3. [pg](https://rubygems.org/gems/pg)
|
|
46
|
-
4. Any of these wrapped in an ActiveRecord connection adapter (e.g. `Upsert.new(Pet.connection, Pet.table_name)`)
|
|
69
|
+
Since this is an upsert helper library, not a general-use ON DUPLICATE KEY UPDATE wrapper, you **can't** do things like `c=c+1`.
|
|
47
70
|
|
|
48
|
-
|
|
71
|
+
### PostgreSQL
|
|
49
72
|
|
|
50
|
-
|
|
73
|
+
Using the [pg](https://rubygems.org/gems/pg) driver.
|
|
51
74
|
|
|
52
|
-
|
|
53
|
-
INSERT INTO table (a,b,c) VALUES (1,2,3)
|
|
54
|
-
ON DUPLICATE KEY UPDATE c=c+1;
|
|
75
|
+
Upsert.new PG.connect([...]), :pets
|
|
55
76
|
|
|
56
|
-
|
|
77
|
+
#### Speed
|
|
78
|
+
|
|
79
|
+
From the tests:
|
|
80
|
+
|
|
81
|
+
Upsert was 73% faster than find + new/set/save
|
|
82
|
+
Upsert was 84% faster than find_or_create + update_attributes
|
|
83
|
+
Upsert was 87% faster than create + rescue/find/update
|
|
84
|
+
# (can't compare to activerecord-import because you can't fake it on pg)
|
|
85
|
+
|
|
86
|
+
#### SQL MERGE trick
|
|
57
87
|
|
|
58
88
|
# http://www.postgresql.org/docs/current/interactive/plpgsql-control-structures.html#PLPGSQL-ERROR-TRAPPING
|
|
59
89
|
CREATE TABLE db (a INT PRIMARY KEY, b TEXT);
|
|
@@ -82,15 +112,7 @@ Finally, all those SQL MERGE tricks codified.
|
|
|
82
112
|
SELECT merge_db(1, 'david');
|
|
83
113
|
SELECT merge_db(1, 'dennis');
|
|
84
114
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
# http://stackoverflow.com/questions/2717590/sqlite-upsert-on-duplicate-key-update
|
|
88
|
-
INSERT OR IGNORE INTO visits VALUES ($ip, 0);
|
|
89
|
-
UPDATE visits SET hits = hits + 1 WHERE ip LIKE $ip;
|
|
90
|
-
|
|
91
|
-
### Unused alternatives
|
|
92
|
-
|
|
93
|
-
#### PostgreSQL
|
|
115
|
+
The decision was made **not** to use the following because it's not straight from the manual:
|
|
94
116
|
|
|
95
117
|
# http://stackoverflow.com/questions/1109061/insert-on-duplicate-update-postgresql
|
|
96
118
|
UPDATE table SET field='C', field2='Z' WHERE id=3;
|
|
@@ -98,6 +120,8 @@ Finally, all those SQL MERGE tricks codified.
|
|
|
98
120
|
SELECT 3, 'C', 'Z'
|
|
99
121
|
WHERE NOT EXISTS (SELECT 1 FROM table WHERE id=3);
|
|
100
122
|
|
|
123
|
+
This was also rejected because there's something we can use in the manual:
|
|
124
|
+
|
|
101
125
|
# http://stackoverflow.com/questions/5269590/why-doesnt-this-rule-prevent-duplicate-key-violations
|
|
102
126
|
BEGIN;
|
|
103
127
|
CREATE TEMP TABLE stage_data(key_column, data_columns...) ON COMMIT DROP;
|
|
@@ -110,4 +134,77 @@ Finally, all those SQL MERGE tricks codified.
|
|
|
110
134
|
FROM stage_data
|
|
111
135
|
WHERE NOT EXISTS (SELECT 1 FROM target_data
|
|
112
136
|
WHERE target_data.key_column = stage_data.key_column)
|
|
113
|
-
END;
|
|
137
|
+
END;
|
|
138
|
+
|
|
139
|
+
### Sqlite
|
|
140
|
+
|
|
141
|
+
Using the [sqlite3](https://rubygems.org/gems/sqlite3) driver.
|
|
142
|
+
|
|
143
|
+
Upsert.new SQLite3::Database.open([...]), :pets
|
|
144
|
+
|
|
145
|
+
#### Speed
|
|
146
|
+
|
|
147
|
+
FIXME tests are segfaulting. Pull request would be lovely.
|
|
148
|
+
|
|
149
|
+
#### SQL MERGE trick
|
|
150
|
+
|
|
151
|
+
# http://stackoverflow.com/questions/2717590/sqlite-upsert-on-duplicate-key-update
|
|
152
|
+
# bad example because we're not doing on-duplicate-key update
|
|
153
|
+
INSERT OR IGNORE INTO visits VALUES (127.0.0.1, 1);
|
|
154
|
+
UPDATE visits SET visits = 1 WHERE ip LIKE 127.0.0.1;
|
|
155
|
+
|
|
156
|
+
### Rails / ActiveRecord
|
|
157
|
+
|
|
158
|
+
(assuming that one of the other three supported drivers is being used under the covers)
|
|
159
|
+
|
|
160
|
+
Upsert.new Pet.connection, Pet.table_name
|
|
161
|
+
|
|
162
|
+
#### Speed
|
|
163
|
+
|
|
164
|
+
Depends on the driver being used!
|
|
165
|
+
|
|
166
|
+
#### SQL MERGE trick
|
|
167
|
+
|
|
168
|
+
Depends on the driver being used!
|
|
169
|
+
|
|
170
|
+
## Features
|
|
171
|
+
|
|
172
|
+
### Tested to be fast and portable
|
|
173
|
+
|
|
174
|
+
In addition to correctness, the library's tests check that it is
|
|
175
|
+
|
|
176
|
+
1. Faster than comparable upsert techniques
|
|
177
|
+
2. Compatible with supported databases
|
|
178
|
+
|
|
179
|
+
### Not dependent on ActiveRecord
|
|
180
|
+
|
|
181
|
+
As below, all you need is a raw database connection like a `Mysql2::Connection`, `PG::Connection` or a `SQLite3::Database`. These are equivalent:
|
|
182
|
+
|
|
183
|
+
# with activerecord
|
|
184
|
+
Upsert.new ActiveRecord::Base.connection, :pets
|
|
185
|
+
# with activerecord, prettier
|
|
186
|
+
Upsert.new Pet.connection, Pet.table_name
|
|
187
|
+
# without activerecord
|
|
188
|
+
Upsert.new Mysql2::Connection.new([...]), :pets
|
|
189
|
+
|
|
190
|
+
### For a specific use case, faster and more portable than `activerecord-import`
|
|
191
|
+
|
|
192
|
+
You could also use [activerecord-import](https://github.com/zdennis/activerecord-import) to upsert:
|
|
193
|
+
|
|
194
|
+
Pet.import columns, all_values, :timestamps => false, :on_duplicate_key_update => columns
|
|
195
|
+
|
|
196
|
+
This, however, only works on MySQL and requires ActiveRecord—and if all you are doing is upserts, `upsert` is tested to be 40% faster. And you don't have to put all of the rows to be upserted into a single huge array - you can stream them using `Upsert.stream`.
|
|
197
|
+
|
|
198
|
+
### Loosely based on mongo-ruby-driver's upsert functionality
|
|
199
|
+
|
|
200
|
+
The `selector` and `document` arguments are inspired by the upsert functionality of the [mongo-ruby-driver's update method](http://api.mongodb.org/ruby/1.6.4/Mongo/Collection.html#update-instance_method).
|
|
201
|
+
|
|
202
|
+
## Wishlist
|
|
203
|
+
|
|
204
|
+
1. `Pet.upsert`... duh
|
|
205
|
+
2. Don't need a separate buffer class... just extend an instance of Upsert with the appropriate database driver module.
|
|
206
|
+
|
|
207
|
+
## Copyright
|
|
208
|
+
|
|
209
|
+
Copyright 2012 Brighter Planet, Inc.
|
|
210
|
+
|
data/lib/upsert.rb
CHANGED
|
@@ -17,6 +17,30 @@ class Upsert
|
|
|
17
17
|
def binary(v)
|
|
18
18
|
Binary.new v
|
|
19
19
|
end
|
|
20
|
+
|
|
21
|
+
# @yield [Upsert] An +Upsert+ object in streaming mode. You can call #row on it multiple times and it will try to optimize on speed.
|
|
22
|
+
#
|
|
23
|
+
# @note Buffered in memory until it's efficient to send to the server a packet.
|
|
24
|
+
#
|
|
25
|
+
# @raise [Upsert::TooBig] If any row is too big to fit inside a single packet.
|
|
26
|
+
#
|
|
27
|
+
# @return [nil]
|
|
28
|
+
#
|
|
29
|
+
# @example Many at once
|
|
30
|
+
# Upsert.stream(Pet.connection, Pet.table_name) do |upsert|
|
|
31
|
+
# upsert.row({:name => 'Jerry'}, :breed => 'beagle')
|
|
32
|
+
# upsert.row({:name => 'Pierre'}, :breed => 'tabby')
|
|
33
|
+
# end
|
|
34
|
+
def stream(connection, table_name)
|
|
35
|
+
upsert = new connection, table_name
|
|
36
|
+
upsert.buffer.async!
|
|
37
|
+
yield upsert
|
|
38
|
+
upsert.buffer.sync!
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Raised if a query would be too large to send in a single packet.
|
|
43
|
+
class TooBig < RuntimeError
|
|
20
44
|
end
|
|
21
45
|
|
|
22
46
|
# @private
|
|
@@ -25,13 +49,18 @@ class Upsert
|
|
|
25
49
|
# @param [Mysql2::Client,Sqlite3::Database,PG::Connection,#raw_connection] connection A supported database connection.
|
|
26
50
|
# @param [String,Symbol] table_name The name of the table into which you will be upserting.
|
|
27
51
|
def initialize(connection, table_name)
|
|
28
|
-
@multi_mutex = Mutex.new
|
|
29
52
|
@buffer = Buffer.for connection, table_name
|
|
30
53
|
end
|
|
31
54
|
|
|
55
|
+
# Upsert a row given a selector and a document.
|
|
56
|
+
#
|
|
57
|
+
# @see http://api.mongodb.org/ruby/1.6.4/Mongo/Collection.html#update-instance_method Loosely based on the upsert functionality of the mongo-ruby-driver #update method
|
|
58
|
+
#
|
|
32
59
|
# @param [Hash] selector Key-value pairs that will be used to find or create a row.
|
|
33
60
|
# @param [Hash] document Key-value pairs that will be set on the row, whether it previously existed or not.
|
|
34
61
|
#
|
|
62
|
+
# @raise [Upsert::TooBig] If any row is too big to fit inside a single packet.
|
|
63
|
+
#
|
|
35
64
|
# @return [nil]
|
|
36
65
|
#
|
|
37
66
|
# @example One at a time
|
|
@@ -42,23 +71,4 @@ class Upsert
|
|
|
42
71
|
buffer.add selector, document
|
|
43
72
|
nil
|
|
44
73
|
end
|
|
45
|
-
|
|
46
|
-
# @yield [Upsert] An +Upsert+ object in "async" mode. You can call #row on it multiple times and it will try to optimize on speed.
|
|
47
|
-
#
|
|
48
|
-
# @return [nil]
|
|
49
|
-
#
|
|
50
|
-
# @example Many at once
|
|
51
|
-
# Upsert.new(Pet.connection, Pet.table_name).multi do |upsert|
|
|
52
|
-
# upsert.row({:name => 'Jerry'}, :breed => 'beagle')
|
|
53
|
-
# upsert.row({:name => 'Pierre'}, :breed => 'tabby')
|
|
54
|
-
# end
|
|
55
|
-
def multi
|
|
56
|
-
@multi_mutex.synchronize do
|
|
57
|
-
buffer.async = true
|
|
58
|
-
yield self
|
|
59
|
-
buffer.async = false
|
|
60
|
-
buffer.clear
|
|
61
|
-
end
|
|
62
|
-
nil
|
|
63
|
-
end
|
|
64
74
|
end
|
data/lib/upsert/buffer.rb
CHANGED
|
@@ -17,12 +17,11 @@ class Upsert
|
|
|
17
17
|
E_AND_SINGLE_QUOTE = %{E'}
|
|
18
18
|
X_AND_SINGLE_QUOTE = %{x'}
|
|
19
19
|
USEC_SPRINTF = '%06d'
|
|
20
|
-
ISO8601_DATETIME = '%Y-%m-%d %H:%M:%S'
|
|
20
|
+
ISO8601_DATETIME = '%Y-%m-%d %H:%M:%S'
|
|
21
21
|
|
|
22
22
|
attr_reader :connection
|
|
23
23
|
attr_reader :table_name
|
|
24
24
|
attr_reader :rows
|
|
25
|
-
attr_writer :async
|
|
26
25
|
|
|
27
26
|
def initialize(connection, table_name)
|
|
28
27
|
@connection = connection
|
|
@@ -34,6 +33,15 @@ class Upsert
|
|
|
34
33
|
!!@async
|
|
35
34
|
end
|
|
36
35
|
|
|
36
|
+
def async!
|
|
37
|
+
@async = true
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def sync!
|
|
41
|
+
@async = false
|
|
42
|
+
clear
|
|
43
|
+
end
|
|
44
|
+
|
|
37
45
|
def add(selector, document)
|
|
38
46
|
rows << Row.new(self, selector, document)
|
|
39
47
|
if sql = chunk
|
|
@@ -5,29 +5,43 @@ class Upsert
|
|
|
5
5
|
include Quoter
|
|
6
6
|
|
|
7
7
|
def chunk
|
|
8
|
-
return
|
|
9
|
-
|
|
10
|
-
|
|
8
|
+
return if rows.empty?
|
|
9
|
+
all = rows.length
|
|
10
|
+
take = all
|
|
11
|
+
while take > 1 and probably_oversize?(take)
|
|
11
12
|
take -= 1
|
|
12
13
|
end
|
|
13
|
-
if async? and
|
|
14
|
-
return
|
|
14
|
+
if async? and take == all
|
|
15
|
+
return
|
|
15
16
|
end
|
|
16
|
-
|
|
17
|
+
while take > 1 and oversize?(take)
|
|
18
|
+
$stderr.puts " Length prediction via sampling failed, shrinking" if ENV['UPSERT_DEBUG'] == 'true'
|
|
19
|
+
take -= 1
|
|
20
|
+
end
|
|
21
|
+
chunk = sql take
|
|
22
|
+
while take > 1 and chunk.bytesize > max_sql_bytesize
|
|
23
|
+
$stderr.puts " Supposedly exact bytesize guess failed, shrinking" if ENV['UPSERT_DEBUG'] == 'true'
|
|
24
|
+
take -= 1
|
|
25
|
+
chunk = sql take
|
|
26
|
+
end
|
|
27
|
+
if chunk.bytesize > max_sql_bytesize
|
|
28
|
+
raise TooBig
|
|
29
|
+
end
|
|
30
|
+
$stderr.puts " Chunk (#{take}/#{chunk.bytesize}) was #{(chunk.bytesize / max_sql_bytesize.to_f * 100).round}% of the max" if ENV['UPSERT_DEBUG'] == 'true'
|
|
17
31
|
@rows = rows.drop(take)
|
|
18
|
-
|
|
32
|
+
chunk
|
|
19
33
|
end
|
|
20
34
|
|
|
21
35
|
def execute(sql)
|
|
22
36
|
connection.query sql
|
|
23
37
|
end
|
|
24
38
|
|
|
25
|
-
def
|
|
26
|
-
|
|
39
|
+
def probably_oversize?(take)
|
|
40
|
+
estimate_sql_bytesize(take) > max_sql_bytesize
|
|
27
41
|
end
|
|
28
42
|
|
|
29
|
-
def
|
|
30
|
-
|
|
43
|
+
def oversize?(take)
|
|
44
|
+
sql_bytesize(take) > max_sql_bytesize
|
|
31
45
|
end
|
|
32
46
|
|
|
33
47
|
def columns
|
|
@@ -49,17 +63,26 @@ class Upsert
|
|
|
49
63
|
end
|
|
50
64
|
|
|
51
65
|
# where 2 is the parens
|
|
52
|
-
def
|
|
53
|
-
@
|
|
66
|
+
def static_sql_bytesize
|
|
67
|
+
@static_sql_bytesize ||= insert_part.bytesize + update_part.bytesize + 2
|
|
54
68
|
end
|
|
55
69
|
|
|
56
70
|
# where 3 is parens and comma
|
|
57
|
-
def
|
|
58
|
-
rows.first(take).inject(0) { |sum, row| sum + row.
|
|
71
|
+
def variable_sql_bytesize(take)
|
|
72
|
+
rows.first(take).inject(0) { |sum, row| sum + row.values_sql_bytesize + 3 }
|
|
59
73
|
end
|
|
60
74
|
|
|
61
|
-
def
|
|
62
|
-
|
|
75
|
+
def estimate_variable_sql_bytesize(take)
|
|
76
|
+
p = (take / 10.0).ceil
|
|
77
|
+
10.0 * rows.sample(p).inject(0) { |sum, row| sum + row.values_sql_bytesize + 3 }
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def sql_bytesize(take)
|
|
81
|
+
static_sql_bytesize + variable_sql_bytesize(take)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def estimate_sql_bytesize(take)
|
|
85
|
+
static_sql_bytesize + estimate_variable_sql_bytesize(take)
|
|
63
86
|
end
|
|
64
87
|
|
|
65
88
|
def sql(take)
|
|
@@ -67,11 +90,11 @@ class Upsert
|
|
|
67
90
|
[ insert_part, '(', all_value_sql.join('),('), ')', update_part ].join
|
|
68
91
|
end
|
|
69
92
|
|
|
70
|
-
def
|
|
71
|
-
@
|
|
93
|
+
def max_sql_bytesize
|
|
94
|
+
@max_sql_bytesize ||= connection.query("SHOW VARIABLES LIKE 'max_allowed_packet'", :as => :hash).first['Value'].to_i
|
|
72
95
|
end
|
|
73
96
|
|
|
74
|
-
def
|
|
97
|
+
def quoted_value_bytesize(v)
|
|
75
98
|
case v
|
|
76
99
|
when NilClass
|
|
77
100
|
4
|
|
@@ -80,15 +103,13 @@ class Upsert
|
|
|
80
103
|
when FalseClass
|
|
81
104
|
5
|
|
82
105
|
when BigDecimal
|
|
83
|
-
v.to_s('F').
|
|
106
|
+
v.to_s('F').bytesize
|
|
84
107
|
when Upsert::Binary
|
|
85
|
-
|
|
86
|
-
v.length * 2 + 3
|
|
108
|
+
v.bytesize * 2 + 3
|
|
87
109
|
when Numeric
|
|
88
|
-
v.to_s.
|
|
110
|
+
v.to_s.bytesize
|
|
89
111
|
when String
|
|
90
|
-
|
|
91
|
-
v.length * 2 + 2
|
|
112
|
+
v.bytesize + 2
|
|
92
113
|
when Time, DateTime
|
|
93
114
|
24 + 2
|
|
94
115
|
when Date
|
|
@@ -106,13 +127,14 @@ class Upsert
|
|
|
106
127
|
SINGLE_QUOTE + connection.escape(v) + SINGLE_QUOTE
|
|
107
128
|
end
|
|
108
129
|
|
|
109
|
-
#
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
130
|
+
# This doubles the size of the representation.
|
|
131
|
+
def quote_binary(v)
|
|
132
|
+
X_AND_SINGLE_QUOTE + v.unpack("H*")[0] + SINGLE_QUOTE
|
|
133
|
+
end
|
|
113
134
|
|
|
114
135
|
# put raw binary straight into sql
|
|
115
|
-
|
|
136
|
+
# might work if we could get the encoding issues fixed when joining together the values for the sql
|
|
137
|
+
# alias_method :quote_binary, :quote_string
|
|
116
138
|
|
|
117
139
|
def quote_time(v)
|
|
118
140
|
quote_string v.strftime(ISO8601_DATETIME)
|
|
@@ -7,20 +7,20 @@ class Upsert
|
|
|
7
7
|
class << self
|
|
8
8
|
def auto_increment_primary_key(connection, table_name)
|
|
9
9
|
res = connection.exec <<-EOS
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
10
|
+
SELECT attr.attname, seq.relname
|
|
11
|
+
FROM pg_class seq,
|
|
12
|
+
pg_attribute attr,
|
|
13
|
+
pg_depend dep,
|
|
14
|
+
pg_namespace name,
|
|
15
|
+
pg_constraint cons
|
|
16
|
+
WHERE seq.oid = dep.objid
|
|
17
|
+
AND seq.relkind = 'S'
|
|
18
|
+
AND attr.attrelid = dep.refobjid
|
|
19
|
+
AND attr.attnum = dep.refobjsubid
|
|
20
|
+
AND attr.attrelid = cons.conrelid
|
|
21
|
+
AND attr.attnum = cons.conkey[1]
|
|
22
|
+
AND cons.contype = 'p'
|
|
23
|
+
AND dep.refobjid = '#{connection.quote_ident(table_name.to_s)}'::regclass
|
|
24
24
|
EOS
|
|
25
25
|
if hit = res.first
|
|
26
26
|
hit['attname']
|
|
@@ -5,12 +5,9 @@ class Upsert
|
|
|
5
5
|
include Quoter
|
|
6
6
|
|
|
7
7
|
def chunk
|
|
8
|
-
return
|
|
8
|
+
return if rows.empty?
|
|
9
9
|
row = rows.shift
|
|
10
|
-
%{
|
|
11
|
-
INSERT OR IGNORE INTO "#{table_name}" (#{row.columns_sql}) VALUES (#{row.values_sql});
|
|
12
|
-
UPDATE "#{table_name}" SET #{row.set_sql} WHERE #{row.where_sql}
|
|
13
|
-
}
|
|
10
|
+
%{INSERT OR IGNORE INTO "#{table_name}" (#{row.columns_sql}) VALUES (#{row.values_sql});UPDATE "#{table_name}" SET #{row.set_sql} WHERE #{row.where_sql}}
|
|
14
11
|
end
|
|
15
12
|
|
|
16
13
|
def execute(sql)
|
data/lib/upsert/row.rb
CHANGED
|
@@ -15,8 +15,8 @@ class Upsert
|
|
|
15
15
|
@columns ||= (selector.keys + document.keys).uniq
|
|
16
16
|
end
|
|
17
17
|
|
|
18
|
-
def
|
|
19
|
-
@
|
|
18
|
+
def values_sql_bytesize
|
|
19
|
+
@values_sql_bytesize ||= pairs.inject(0) { |sum, (_, v)| sum + buffer.quoted_value_bytesize(v) }
|
|
20
20
|
end
|
|
21
21
|
|
|
22
22
|
def values_sql
|
data/lib/upsert/version.rb
CHANGED
data/test/helper.rb
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
require 'rubygems'
|
|
2
2
|
require 'bundler/setup'
|
|
3
|
+
require 'securerandom'
|
|
3
4
|
require 'zlib'
|
|
4
5
|
require 'benchmark'
|
|
5
6
|
require 'faker'
|
|
@@ -20,6 +21,7 @@ require 'active_record_inline_schema'
|
|
|
20
21
|
class Pet < ActiveRecord::Base
|
|
21
22
|
col :name
|
|
22
23
|
col :gender
|
|
24
|
+
col :spiel
|
|
23
25
|
col :good, :type => :boolean
|
|
24
26
|
col :lovability, :type => :float
|
|
25
27
|
col :morning_walk_time, :type => :datetime
|
|
@@ -30,6 +32,8 @@ class Pet < ActiveRecord::Base
|
|
|
30
32
|
add_index :name, :unique => true
|
|
31
33
|
end
|
|
32
34
|
|
|
35
|
+
# ENV['UPSERT_DEBUG'] = 'true'
|
|
36
|
+
|
|
33
37
|
require 'upsert'
|
|
34
38
|
|
|
35
39
|
MiniTest::Spec.class_eval do
|
|
@@ -41,20 +45,21 @@ MiniTest::Spec.class_eval do
|
|
|
41
45
|
@records ||= begin
|
|
42
46
|
memo = []
|
|
43
47
|
names = []
|
|
44
|
-
|
|
48
|
+
333.times do
|
|
45
49
|
names << Faker::Name.name
|
|
46
50
|
end
|
|
47
|
-
|
|
51
|
+
2000.times do
|
|
48
52
|
selector = ActiveSupport::OrderedHash.new
|
|
49
53
|
selector[:name] = names.sample(1).first
|
|
50
54
|
document = {
|
|
51
55
|
:lovability => BigDecimal.new(rand(1e11), 2),
|
|
52
56
|
:tag_number => rand(1e8),
|
|
57
|
+
:spiel => SecureRandom.hex(rand(127)),
|
|
53
58
|
:good => true,
|
|
54
59
|
:birthday => Time.at(rand * Time.now.to_i).to_date,
|
|
55
60
|
:morning_walk_time => Time.at(rand * Time.now.to_i),
|
|
56
|
-
:home_address =>
|
|
57
|
-
:zipped_biography => Upsert.binary(Zlib::Deflate.deflate(
|
|
61
|
+
:home_address => SecureRandom.hex(rand(1000)),
|
|
62
|
+
:zipped_biography => Upsert.binary(Zlib::Deflate.deflate(SecureRandom.hex(rand(1000)), Zlib::BEST_SPEED))
|
|
58
63
|
}
|
|
59
64
|
memo << [selector, document]
|
|
60
65
|
end
|
|
@@ -68,10 +73,9 @@ MiniTest::Spec.class_eval do
|
|
|
68
73
|
|
|
69
74
|
Pet.delete_all
|
|
70
75
|
|
|
71
|
-
|
|
72
|
-
upsert.multi do |xxx|
|
|
76
|
+
Upsert.stream(connection, :pets) do |upsert|
|
|
73
77
|
records.each do |selector, document|
|
|
74
|
-
|
|
78
|
+
upsert.row(selector, document)
|
|
75
79
|
end
|
|
76
80
|
end
|
|
77
81
|
ref2 = Pet.order(:name).all.map { |pet| pet.attributes.except('id') }
|
|
@@ -101,10 +105,9 @@ MiniTest::Spec.class_eval do
|
|
|
101
105
|
sleep 1
|
|
102
106
|
|
|
103
107
|
upsert_time = Benchmark.realtime do
|
|
104
|
-
|
|
105
|
-
upsert.multi do |xxx|
|
|
108
|
+
Upsert.stream(connection, :pets) do |upsert|
|
|
106
109
|
records.each do |selector, document|
|
|
107
|
-
|
|
110
|
+
upsert.row(selector, document)
|
|
108
111
|
end
|
|
109
112
|
end
|
|
110
113
|
end
|
|
@@ -119,9 +122,11 @@ module MiniTest::Spec::SharedExamples
|
|
|
119
122
|
end
|
|
120
123
|
|
|
121
124
|
def it_also(desc)
|
|
122
|
-
self.instance_eval do
|
|
123
|
-
|
|
124
|
-
|
|
125
|
+
self.instance_eval(&MiniTest::Spec.shared_examples[desc])# do
|
|
126
|
+
# describe desc do
|
|
127
|
+
# .call
|
|
128
|
+
# end
|
|
129
|
+
# end
|
|
125
130
|
end
|
|
126
131
|
end
|
|
127
132
|
|
data/test/shared/binary.rb
CHANGED
|
@@ -1,20 +1,18 @@
|
|
|
1
1
|
shared_examples_for 'supports binary upserts' do
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
10.
|
|
6
|
-
@fakes << [Faker::Name.name, Faker::Lorem.paragraphs(10).join("\n\n")]
|
|
7
|
-
end
|
|
2
|
+
before do
|
|
3
|
+
@fakes = []
|
|
4
|
+
10.times do
|
|
5
|
+
@fakes << [Faker::Name.name, Faker::Lorem.paragraphs(10).join("\n\n")]
|
|
8
6
|
end
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
Zlib::Inflate.inflate(Pet.find_by_name(name).zipped_biography).must_equal biography
|
|
7
|
+
end
|
|
8
|
+
it "saves binary one by one" do
|
|
9
|
+
@fakes.each do |name, biography|
|
|
10
|
+
zipped_biography = Zlib::Deflate.deflate biography
|
|
11
|
+
upsert = Upsert.new connection, :pets
|
|
12
|
+
assert_creates(Pet, [{:name => name, :zipped_biography => zipped_biography}]) do
|
|
13
|
+
upsert.row({:name => name}, {:zipped_biography => Upsert.binary(zipped_biography)})
|
|
17
14
|
end
|
|
15
|
+
Zlib::Inflate.inflate(Pet.find_by_name(name).zipped_biography).must_equal biography
|
|
18
16
|
end
|
|
19
17
|
end
|
|
20
18
|
end
|
data/test/shared/correctness.rb
CHANGED
|
@@ -1,47 +1,68 @@
|
|
|
1
1
|
shared_examples_for 'is just as correct as other ways' do
|
|
2
|
-
describe
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
pet.send "#{k}=", v
|
|
16
|
-
end
|
|
17
|
-
pet.save!
|
|
2
|
+
describe 'compared to native ActiveRecord' do
|
|
3
|
+
it "is as correct as than new/set/save" do
|
|
4
|
+
assert_same_result lotsa_records do |records|
|
|
5
|
+
records.each do |selector, document|
|
|
6
|
+
if pet = Pet.where(selector).first
|
|
7
|
+
pet.update_attributes document, :without_protection => true
|
|
8
|
+
else
|
|
9
|
+
pet = Pet.new
|
|
10
|
+
selector.each do |k, v|
|
|
11
|
+
pet.send "#{k}=", v
|
|
12
|
+
end
|
|
13
|
+
document.each do |k, v|
|
|
14
|
+
pet.send "#{k}=", v
|
|
18
15
|
end
|
|
16
|
+
pet.save!
|
|
19
17
|
end
|
|
20
18
|
end
|
|
21
19
|
end
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
20
|
+
end
|
|
21
|
+
it "is as correct as than find_or_create + update_attributes" do
|
|
22
|
+
assert_same_result lotsa_records do |records|
|
|
23
|
+
dynamic_method = nil
|
|
24
|
+
records.each do |selector, document|
|
|
25
|
+
dynamic_method ||= "find_or_create_by_#{selector.keys.join('_or_')}"
|
|
26
|
+
pet = Pet.send(dynamic_method, *selector.values)
|
|
27
|
+
pet.update_attributes document, :without_protection => true
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
it "is as correct as than create + rescue/find/update" do
|
|
32
|
+
assert_same_result lotsa_records do |records|
|
|
33
|
+
dynamic_method = nil
|
|
34
|
+
records.each do |selector, document|
|
|
35
|
+
dynamic_method ||= "find_or_create_by_#{selector.keys.join('_or_')}"
|
|
36
|
+
begin
|
|
37
|
+
Pet.create selector.merge(document), :without_protection => true
|
|
38
|
+
rescue
|
|
27
39
|
pet = Pet.send(dynamic_method, *selector.values)
|
|
28
40
|
pet.update_attributes document, :without_protection => true
|
|
29
41
|
end
|
|
30
42
|
end
|
|
31
43
|
end
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
describe 'compared to activerecord-import' do
|
|
47
|
+
it "is as correct as faking upserts with activerecord-import" do
|
|
48
|
+
unless Pet.connection.respond_to?(:sql_for_on_duplicate_key_update)
|
|
49
|
+
flunk "#{Pet.connection} does not support activerecord-import's :on_duplicate_key_update"
|
|
50
|
+
end
|
|
51
|
+
assert_same_result lotsa_records do |records|
|
|
52
|
+
columns = nil
|
|
53
|
+
all_values = []
|
|
54
|
+
records.each do |selector, document|
|
|
55
|
+
columns ||= (selector.keys + document.keys).uniq
|
|
56
|
+
all_values << columns.map do |k|
|
|
57
|
+
if document.has_key?(k)
|
|
58
|
+
# prefer the document so that you can change rows
|
|
59
|
+
document[k]
|
|
60
|
+
else
|
|
61
|
+
selector[k]
|
|
42
62
|
end
|
|
43
63
|
end
|
|
44
64
|
end
|
|
65
|
+
Pet.import columns, all_values, :timestamps => false, :on_duplicate_key_update => columns
|
|
45
66
|
end
|
|
46
67
|
end
|
|
47
68
|
end
|
data/test/shared/database.rb
CHANGED
|
@@ -6,6 +6,13 @@ shared_examples_for 'is a database with an upsert trick' do
|
|
|
6
6
|
upsert.row({:name => 'Jerry'}, {:gender => 'male'})
|
|
7
7
|
end
|
|
8
8
|
end
|
|
9
|
+
it "works for complex selectors" do
|
|
10
|
+
upsert = Upsert.new connection, :pets
|
|
11
|
+
assert_creates(Pet, [{:name => 'Jerry', :gender => 'male', :tag_number => 4}]) do
|
|
12
|
+
upsert.row({:name => 'Jerry', :gender => 'male'}, {:tag_number => 1})
|
|
13
|
+
upsert.row({:name => 'Jerry', :gender => 'male'}, {:tag_number => 4})
|
|
14
|
+
end
|
|
15
|
+
end
|
|
9
16
|
it "works for a single row (not changing anything)" do
|
|
10
17
|
upsert = Upsert.new connection, :pets
|
|
11
18
|
assert_creates(Pet, [{:name => 'Jerry', :gender => 'male'}]) do
|
|
@@ -36,59 +43,31 @@ shared_examples_for 'is a database with an upsert trick' do
|
|
|
36
43
|
upsert.row({:name => 'Inky'}, {:gender => nil})
|
|
37
44
|
end
|
|
38
45
|
end
|
|
39
|
-
# it "works for a single row upserted many times" do
|
|
40
|
-
# assert_creates(Pet, [{:name => 'Jerry', :gender => 'male'}]) do
|
|
41
|
-
# ts = (0..5).map do
|
|
42
|
-
# Thread.new do
|
|
43
|
-
# upsert = Upsert.new new_connection, :pets
|
|
44
|
-
# upsert.row({:name => 'Jerry'}, {:gender => 'male'})
|
|
45
|
-
# end
|
|
46
|
-
# end
|
|
47
|
-
# ts.each { |t| t.join }
|
|
48
|
-
# end
|
|
49
|
-
# end
|
|
50
46
|
end
|
|
51
|
-
describe :
|
|
47
|
+
describe :stream do
|
|
52
48
|
it "works for multiple rows (base case)" do
|
|
53
|
-
upsert = Upsert.new connection, :pets
|
|
54
49
|
assert_creates(Pet, [{:name => 'Jerry', :gender => 'male'}]) do
|
|
55
|
-
|
|
56
|
-
|
|
50
|
+
Upsert.stream(connection, :pets) do |upsert|
|
|
51
|
+
upsert.row({:name => 'Jerry'}, :gender => 'male')
|
|
57
52
|
end
|
|
58
53
|
end
|
|
59
54
|
end
|
|
60
55
|
it "works for multiple rows (not changing anything)" do
|
|
61
|
-
upsert = Upsert.new connection, :pets
|
|
62
56
|
assert_creates(Pet, [{:name => 'Jerry', :gender => 'male'}]) do
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
57
|
+
Upsert.stream(connection, :pets) do |upsert|
|
|
58
|
+
upsert.row({:name => 'Jerry'}, :gender => 'male')
|
|
59
|
+
upsert.row({:name => 'Jerry'}, :gender => 'male')
|
|
66
60
|
end
|
|
67
61
|
end
|
|
68
62
|
end
|
|
69
63
|
it "works for multiple rows (changing something)" do
|
|
70
|
-
upsert = Upsert.new connection, :pets
|
|
71
64
|
assert_creates(Pet, [{:name => 'Jerry', :gender => 'neutered'}]) do
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
65
|
+
Upsert.stream(connection, :pets) do |upsert|
|
|
66
|
+
upsert.row({:name => 'Jerry'}, :gender => 'male')
|
|
67
|
+
upsert.row({:name => 'Jerry'}, :gender => 'neutered')
|
|
75
68
|
end
|
|
76
69
|
end
|
|
77
70
|
Pet.where(:gender => 'male').count.must_equal 0
|
|
78
71
|
end
|
|
79
|
-
# it "works for multiple rows upserted many times" do
|
|
80
|
-
# assert_creates(Pet, [{:name => 'Jerry', :gender => 'male'}]) do
|
|
81
|
-
# ts = (0..5).map do
|
|
82
|
-
# Thread.new do
|
|
83
|
-
# upsert = Upsert.new new_connection, :pets
|
|
84
|
-
# upsert.multi do
|
|
85
|
-
# row({:name => 'Jerry'}, :gender => 'male')
|
|
86
|
-
# row({:name => 'Jerry'}, :gender => 'male')
|
|
87
|
-
# end
|
|
88
|
-
# end
|
|
89
|
-
# end
|
|
90
|
-
# ts.each { |t| t.join }
|
|
91
|
-
# end
|
|
92
|
-
# end
|
|
93
72
|
end
|
|
94
73
|
end
|
data/test/shared/multibyte.rb
CHANGED
|
@@ -1,26 +1,37 @@
|
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
|
2
2
|
shared_examples_for "supports multibyte" do
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
it "works one-by-one" do
|
|
4
|
+
assert_creates(Pet, [{:name => 'I♥NY', :gender => 'périferôl'}]) do
|
|
5
5
|
upsert = Upsert.new connection, :pets
|
|
6
|
-
|
|
7
|
-
upsert.row({:name => 'I♥NY'}, {:gender => 'périferôl'})
|
|
8
|
-
end
|
|
6
|
+
upsert.row({:name => 'I♥NY'}, {:gender => 'périferôl'})
|
|
9
7
|
end
|
|
10
|
-
|
|
8
|
+
end
|
|
9
|
+
it "works serially" do
|
|
10
|
+
assert_creates(Pet, [{:name => 'I♥NY', :gender => 'jÚrgen'}]) do
|
|
11
11
|
upsert = Upsert.new connection, :pets
|
|
12
|
-
|
|
12
|
+
upsert.row({:name => 'I♥NY'}, {:gender => 'périferôl'})
|
|
13
|
+
upsert.row({:name => 'I♥NY'}, {:gender => 'jÚrgen'})
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
it "works streaming" do
|
|
17
|
+
assert_creates(Pet, [{:name => 'I♥NY', :gender => 'jÚrgen'}]) do
|
|
18
|
+
Upsert.stream(connection, :pets) do |upsert|
|
|
13
19
|
upsert.row({:name => 'I♥NY'}, {:gender => 'périferôl'})
|
|
14
20
|
upsert.row({:name => 'I♥NY'}, {:gender => 'jÚrgen'})
|
|
15
21
|
end
|
|
16
22
|
end
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
23
|
+
end
|
|
24
|
+
it "won't overflow" do
|
|
25
|
+
upsert = Upsert.new connection, :pets
|
|
26
|
+
if upsert.buffer.respond_to?(:max_sql_bytesize)
|
|
27
|
+
max = upsert.buffer.send(:max_sql_bytesize)
|
|
28
|
+
ticks = max / 3 - 2
|
|
29
|
+
lambda do
|
|
30
|
+
loop do
|
|
31
|
+
upsert.row({:name => 'Jerry'}, :home_address => ("日" * ticks))
|
|
32
|
+
ticks += 1
|
|
22
33
|
end
|
|
23
|
-
end
|
|
34
|
+
end.must_raise Upsert::TooBig
|
|
24
35
|
end
|
|
25
36
|
end
|
|
26
37
|
end
|
data/test/shared/speed.rb
CHANGED
|
@@ -1,71 +1,69 @@
|
|
|
1
1
|
shared_examples_for 'can be speeded up with upserting' do
|
|
2
|
-
describe
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
pet.send "#{k}=", v
|
|
13
|
-
end
|
|
14
|
-
document.each do |k, v|
|
|
15
|
-
pet.send "#{k}=", v
|
|
16
|
-
end
|
|
17
|
-
pet.save!
|
|
2
|
+
describe 'compared to native ActiveRecord' do
|
|
3
|
+
it "is faster than new/set/save" do
|
|
4
|
+
assert_faster_than 'find + new/set/save', lotsa_records do |records|
|
|
5
|
+
records.each do |selector, document|
|
|
6
|
+
if pet = Pet.where(selector).first
|
|
7
|
+
pet.update_attributes document, :without_protection => true
|
|
8
|
+
else
|
|
9
|
+
pet = Pet.new
|
|
10
|
+
selector.each do |k, v|
|
|
11
|
+
pet.send "#{k}=", v
|
|
18
12
|
end
|
|
13
|
+
document.each do |k, v|
|
|
14
|
+
pet.send "#{k}=", v
|
|
15
|
+
end
|
|
16
|
+
pet.save!
|
|
19
17
|
end
|
|
20
18
|
end
|
|
21
19
|
end
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
20
|
+
end
|
|
21
|
+
it "is faster than find_or_create + update_attributes" do
|
|
22
|
+
assert_faster_than 'find_or_create + update_attributes', lotsa_records do |records|
|
|
23
|
+
dynamic_method = nil
|
|
24
|
+
records.each do |selector, document|
|
|
25
|
+
dynamic_method ||= "find_or_create_by_#{selector.keys.join('_or_')}"
|
|
26
|
+
pet = Pet.send(dynamic_method, *selector.values)
|
|
27
|
+
pet.update_attributes document, :without_protection => true
|
|
30
28
|
end
|
|
31
29
|
end
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
30
|
+
end
|
|
31
|
+
it "is faster than create + rescue/find/update" do
|
|
32
|
+
assert_faster_than 'create + rescue/find/update', lotsa_records do |records|
|
|
33
|
+
dynamic_method = nil
|
|
34
|
+
records.each do |selector, document|
|
|
35
|
+
dynamic_method ||= "find_or_create_by_#{selector.keys.join('_or_')}"
|
|
36
|
+
begin
|
|
37
|
+
Pet.create selector.merge(document), :without_protection => true
|
|
38
|
+
rescue
|
|
39
|
+
pet = Pet.send(dynamic_method, *selector.values)
|
|
40
|
+
pet.update_attributes document, :without_protection => true
|
|
43
41
|
end
|
|
44
42
|
end
|
|
45
43
|
end
|
|
46
44
|
end
|
|
45
|
+
end
|
|
47
46
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
end
|
|
47
|
+
describe 'compared to activerecord-import' do
|
|
48
|
+
it "is faster than faking upserts with activerecord-import" do
|
|
49
|
+
unless Pet.connection.respond_to?(:sql_for_on_duplicate_key_update)
|
|
50
|
+
flunk "#{Pet.connection} does not support activerecord-import's :on_duplicate_key_update"
|
|
51
|
+
end
|
|
52
|
+
assert_faster_than 'faking upserts with activerecord-import', lotsa_records do |records|
|
|
53
|
+
columns = nil
|
|
54
|
+
all_values = []
|
|
55
|
+
records.each do |selector, document|
|
|
56
|
+
columns ||= (selector.keys + document.keys).uniq
|
|
57
|
+
all_values << columns.map do |k|
|
|
58
|
+
if document.has_key?(k)
|
|
59
|
+
# prefer the document so that you can change rows
|
|
60
|
+
document[k]
|
|
61
|
+
else
|
|
62
|
+
selector[k]
|
|
65
63
|
end
|
|
66
64
|
end
|
|
67
|
-
Pet.import columns, all_values, :timestamps => false, :on_duplicate_key_update => columns
|
|
68
65
|
end
|
|
66
|
+
Pet.import columns, all_values, :timestamps => false, :on_duplicate_key_update => columns
|
|
69
67
|
end
|
|
70
68
|
end
|
|
71
69
|
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
shared_examples_for 'is thread-safe' do
|
|
2
|
+
it "is safe to use one-by-one" do
|
|
3
|
+
upsert = Upsert.new connection, :pets
|
|
4
|
+
assert_creates(Pet, [{:name => 'Jerry', :gender => 'neutered'}]) do
|
|
5
|
+
ts = []
|
|
6
|
+
10.times do
|
|
7
|
+
ts << Thread.new do
|
|
8
|
+
sleep 0.2
|
|
9
|
+
upsert.row({:name => 'Jerry'}, :gender => 'male')
|
|
10
|
+
upsert.row({:name => 'Jerry'}, :gender => 'neutered')
|
|
11
|
+
end
|
|
12
|
+
ts.each { |t| t.join }
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
it "is safe to use streaming" do
|
|
17
|
+
assert_creates(Pet, [{:name => 'Jerry', :gender => 'neutered'}]) do
|
|
18
|
+
Upsert.stream(connection, :pets) do |upsert|
|
|
19
|
+
ts = []
|
|
20
|
+
10.times do
|
|
21
|
+
ts << Thread.new do
|
|
22
|
+
sleep 0.2
|
|
23
|
+
upsert.row({:name => 'Jerry'}, :gender => 'male')
|
|
24
|
+
upsert.row({:name => 'Jerry'}, :gender => 'neutered')
|
|
25
|
+
end
|
|
26
|
+
ts.each { |t| t.join }
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
data/test/shared/timezones.rb
CHANGED
|
@@ -1,27 +1,25 @@
|
|
|
1
1
|
shared_examples_for "doesn't mess with timezones" do
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
end
|
|
2
|
+
before do
|
|
3
|
+
@old_default_tz = ActiveRecord::Base.default_timezone
|
|
4
|
+
end
|
|
5
|
+
after do
|
|
6
|
+
ActiveRecord::Base.default_timezone = @old_default_tz
|
|
7
|
+
end
|
|
9
8
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
end
|
|
9
|
+
it "deals fine with UTC" do
|
|
10
|
+
ActiveRecord::Base.default_timezone = :utc
|
|
11
|
+
time = Time.now.utc
|
|
12
|
+
upsert = Upsert.new connection, :pets
|
|
13
|
+
assert_creates(Pet, [{:name => 'Jerry', :morning_walk_time => time}]) do
|
|
14
|
+
upsert.row({:name => 'Jerry'}, {:morning_walk_time => time})
|
|
17
15
|
end
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
16
|
+
end
|
|
17
|
+
it "won't mess with UTC" do
|
|
18
|
+
ActiveRecord::Base.default_timezone = :local
|
|
19
|
+
time = Time.now
|
|
20
|
+
upsert = Upsert.new connection, :pets
|
|
21
|
+
assert_creates(Pet, [{:name => 'Jerry', :morning_walk_time => time}]) do
|
|
22
|
+
upsert.row({:name => 'Jerry'}, {:morning_walk_time => time})
|
|
25
23
|
end
|
|
26
24
|
end
|
|
27
25
|
end
|
data/test/test_mysql2.rb
CHANGED
data/test/test_pg.rb
CHANGED
data/test/test_sqlite.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: upsert
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.1
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2012-06-
|
|
12
|
+
date: 2012-06-19 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: sqlite3
|
|
@@ -202,6 +202,7 @@ files:
|
|
|
202
202
|
- test/shared/database.rb
|
|
203
203
|
- test/shared/multibyte.rb
|
|
204
204
|
- test/shared/speed.rb
|
|
205
|
+
- test/shared/threaded.rb
|
|
205
206
|
- test/shared/timezones.rb
|
|
206
207
|
- test/test_active_record_connection_adapter.rb
|
|
207
208
|
- test/test_mysql2.rb
|
|
@@ -240,6 +241,7 @@ test_files:
|
|
|
240
241
|
- test/shared/database.rb
|
|
241
242
|
- test/shared/multibyte.rb
|
|
242
243
|
- test/shared/speed.rb
|
|
244
|
+
- test/shared/threaded.rb
|
|
243
245
|
- test/shared/timezones.rb
|
|
244
246
|
- test/test_active_record_connection_adapter.rb
|
|
245
247
|
- test/test_mysql2.rb
|