upsert 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.yardopts +2 -0
- data/README.md +54 -11
- data/Rakefile +1 -1
- data/lib/upsert.rb +39 -16
- data/lib/upsert/binary.rb +7 -0
- data/lib/upsert/buffer.rb +14 -22
- data/lib/upsert/buffer/mysql2_client.rb +103 -20
- data/lib/upsert/buffer/pg_connection.rb +33 -40
- data/lib/upsert/buffer/pg_connection/column_definition.rb +28 -2
- data/lib/upsert/buffer/sqlite3_database.rb +25 -23
- data/lib/upsert/quoter.rb +29 -1
- data/lib/upsert/row.rb +29 -13
- data/lib/upsert/version.rb +1 -1
- data/test/helper.rb +80 -3
- data/test/shared/binary.rb +20 -0
- data/test/shared/correctness.rb +48 -0
- data/test/{shared_examples.rb → shared/database.rb} +9 -9
- data/test/shared/multibyte.rb +26 -0
- data/test/shared/speed.rb +72 -0
- data/test/shared/timezones.rb +27 -0
- data/test/test_active_record_connection_adapter.rb +36 -0
- data/test/test_mysql2.rb +11 -2
- data/test/test_pg.rb +11 -2
- data/test/test_sqlite.rb +20 -4
- data/upsert.gemspec +2 -0
- metadata +50 -6
- data/test/test_upsert.rb +0 -7
data/.yardopts
ADDED
data/README.md
CHANGED
@@ -2,7 +2,50 @@
|
|
2
2
|
|
3
3
|
Finally, all those SQL MERGE tricks codified.
|
4
4
|
|
5
|
-
##
|
5
|
+
## Usage
|
6
|
+
|
7
|
+
### One at a time
|
8
|
+
|
9
|
+
upsert = Upsert.new Pet.connection, Pet.table_name
|
10
|
+
upsert.row({:name => 'Jerry'}, :breed => 'beagle')
|
11
|
+
upsert.row({:name => 'Pierre'}, :breed => 'tabby')
|
12
|
+
|
13
|
+
### Multiple upserts at once
|
14
|
+
|
15
|
+
Upsert.new(Pet.connection, Pet.table_name).multi do |upsert|
|
16
|
+
upsert.row({:name => 'Jerry'}, :breed => 'beagle')
|
17
|
+
upsert.row({:name => 'Pierre'}, :breed => 'tabby')
|
18
|
+
end
|
19
|
+
|
20
|
+
## Wishlist
|
21
|
+
|
22
|
+
1. Make `c=c+1` stuff possible with `Upsert.sql('c=c+1')` or something
|
23
|
+
|
24
|
+
## Speed
|
25
|
+
|
26
|
+
### MySQL
|
27
|
+
|
28
|
+
(from the tests)
|
29
|
+
|
30
|
+
Upsert was 47% faster than faking upserts with activerecord-import
|
31
|
+
Upsert was 77% faster than find + new/set/save
|
32
|
+
Upsert was 84% faster than create + rescue/find/update
|
33
|
+
Upsert was 82% faster than find_or_create + update_attributes
|
34
|
+
|
35
|
+
### PostgreSQL
|
36
|
+
|
37
|
+
Upsert was 73% faster than find + new/set/save
|
38
|
+
Upsert was 84% faster than find_or_create + update_attributes
|
39
|
+
Upsert was 87% faster than create + rescue/find/update
|
40
|
+
|
41
|
+
## Supported database drivers
|
42
|
+
|
43
|
+
1. [mysql2](https://rubygems.org/gems/mysql2) (e.g. `Upsert.new(Mysql2::Connection.new([...]), :pets)`)
|
44
|
+
2. [sqlite3](https://rubygems.org/gems/sqlite3)
|
45
|
+
3. [pg](https://rubygems.org/gems/pg)
|
46
|
+
4. Any of these wrapped in an ActiveRecord connection adapter (e.g. `Upsert.new(Pet.connection, Pet.table_name)`)
|
47
|
+
|
48
|
+
## SQL merge tricks in use
|
6
49
|
|
7
50
|
### MySQL
|
8
51
|
|
@@ -12,8 +55,6 @@ Finally, all those SQL MERGE tricks codified.
|
|
12
55
|
|
13
56
|
### PostgreSQL
|
14
57
|
|
15
|
-
#### Used
|
16
|
-
|
17
58
|
# http://www.postgresql.org/docs/current/interactive/plpgsql-control-structures.html#PLPGSQL-ERROR-TRAPPING
|
18
59
|
CREATE TABLE db (a INT PRIMARY KEY, b TEXT);
|
19
60
|
CREATE FUNCTION merge_db(key INT, data TEXT) RETURNS VOID AS
|
@@ -41,7 +82,15 @@ Finally, all those SQL MERGE tricks codified.
|
|
41
82
|
SELECT merge_db(1, 'david');
|
42
83
|
SELECT merge_db(1, 'dennis');
|
43
84
|
|
44
|
-
|
85
|
+
### Sqlite
|
86
|
+
|
87
|
+
# http://stackoverflow.com/questions/2717590/sqlite-upsert-on-duplicate-key-update
|
88
|
+
INSERT OR IGNORE INTO visits VALUES ($ip, 0);
|
89
|
+
UPDATE visits SET hits = hits + 1 WHERE ip LIKE $ip;
|
90
|
+
|
91
|
+
### Unused alternatives
|
92
|
+
|
93
|
+
#### PostgreSQL
|
45
94
|
|
46
95
|
# http://stackoverflow.com/questions/1109061/insert-on-duplicate-update-postgresql
|
47
96
|
UPDATE table SET field='C', field2='Z' WHERE id=3;
|
@@ -61,10 +110,4 @@ Finally, all those SQL MERGE tricks codified.
|
|
61
110
|
FROM stage_data
|
62
111
|
WHERE NOT EXISTS (SELECT 1 FROM target_data
|
63
112
|
WHERE target_data.key_column = stage_data.key_column)
|
64
|
-
END;
|
65
|
-
|
66
|
-
### Sqlite
|
67
|
-
|
68
|
-
# http://stackoverflow.com/questions/2717590/sqlite-upsert-on-duplicate-key-update
|
69
|
-
INSERT OR IGNORE INTO visits VALUES ($ip, 0);
|
70
|
-
UPDATE visits SET hits = hits + 1 WHERE ip LIKE $ip;
|
113
|
+
END;
|
data/Rakefile
CHANGED
@@ -10,7 +10,7 @@ Rake::TestTask.new(:_test) do |test|
|
|
10
10
|
end
|
11
11
|
|
12
12
|
task :test_each_db_adapter do
|
13
|
-
%w{ mysql2 sqlite pg }.each do |database|
|
13
|
+
%w{ mysql2 sqlite pg active_record_connection_adapter }.each do |database|
|
14
14
|
puts
|
15
15
|
puts "#{'*'*10} Running #{database} tests"
|
16
16
|
puts
|
data/lib/upsert.rb
CHANGED
@@ -1,4 +1,7 @@
|
|
1
|
+
require 'bigdecimal'
|
2
|
+
|
1
3
|
require 'upsert/version'
|
4
|
+
require 'upsert/binary'
|
2
5
|
require 'upsert/buffer'
|
3
6
|
require 'upsert/quoter'
|
4
7
|
require 'upsert/row'
|
@@ -7,35 +10,55 @@ require 'upsert/buffer/pg_connection'
|
|
7
10
|
require 'upsert/buffer/sqlite3_database'
|
8
11
|
|
9
12
|
class Upsert
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
13
|
+
class << self
|
14
|
+
# @param [String] v A string containing binary data that should be inserted/escaped as such.
|
15
|
+
#
|
16
|
+
# @return [Upsert::Binary]
|
17
|
+
def binary(v)
|
18
|
+
Binary.new v
|
19
|
+
end
|
20
|
+
end
|
14
21
|
|
22
|
+
# @private
|
15
23
|
attr_reader :buffer
|
16
24
|
|
25
|
+
# @param [Mysql2::Client,Sqlite3::Database,PG::Connection,#raw_connection] connection A supported database connection.
|
26
|
+
# @param [String,Symbol] table_name The name of the table into which you will be upserting.
|
17
27
|
def initialize(connection, table_name)
|
18
28
|
@multi_mutex = Mutex.new
|
19
29
|
@buffer = Buffer.for connection, table_name
|
20
30
|
end
|
21
31
|
|
32
|
+
# @param [Hash] selector Key-value pairs that will be used to find or create a row.
|
33
|
+
# @param [Hash] document Key-value pairs that will be set on the row, whether it previously existed or not.
|
34
|
+
#
|
35
|
+
# @return [nil]
|
36
|
+
#
|
37
|
+
# @example One at a time
|
38
|
+
# upsert = Upsert.new Pet.connection, Pet.table_name
|
39
|
+
# upsert.row({:name => 'Jerry'}, :breed => 'beagle')
|
40
|
+
# upsert.row({:name => 'Pierre'}, :breed => 'tabby')
|
22
41
|
def row(selector, document)
|
23
42
|
buffer.add selector, document
|
43
|
+
nil
|
24
44
|
end
|
25
45
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
46
|
+
# @yield [Upsert] An +Upsert+ object in "async" mode. You can call #row on it multiple times and it will try to optimize on speed.
|
47
|
+
#
|
48
|
+
# @return [nil]
|
49
|
+
#
|
50
|
+
# @example Many at once
|
51
|
+
# Upsert.new(Pet.connection, Pet.table_name).multi do |upsert|
|
52
|
+
# upsert.row({:name => 'Jerry'}, :breed => 'beagle')
|
53
|
+
# upsert.row({:name => 'Pierre'}, :breed => 'tabby')
|
54
|
+
# end
|
55
|
+
def multi
|
31
56
|
@multi_mutex.synchronize do
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
ensure
|
37
|
-
buffer.async = nil
|
38
|
-
end
|
57
|
+
buffer.async = true
|
58
|
+
yield self
|
59
|
+
buffer.async = false
|
60
|
+
buffer.clear
|
39
61
|
end
|
62
|
+
nil
|
40
63
|
end
|
41
64
|
end
|
data/lib/upsert/buffer.rb
CHANGED
@@ -1,11 +1,24 @@
|
|
1
1
|
class Upsert
|
2
|
+
# @private
|
2
3
|
class Buffer
|
3
4
|
class << self
|
4
5
|
def for(connection, table_name)
|
6
|
+
if connection.respond_to?(:raw_connection)
|
7
|
+
# deal with ActiveRecord::Base.connection or ActiveRecord::Base.connection_pool.checkout
|
8
|
+
connection = connection.raw_connection
|
9
|
+
end
|
5
10
|
const_get(connection.class.name.gsub(/\W+/, '_')).new connection, table_name
|
6
11
|
end
|
7
12
|
end
|
8
13
|
|
14
|
+
SINGLE_QUOTE = %{'}
|
15
|
+
DOUBLE_QUOTE = %{"}
|
16
|
+
BACKTICK = %{`}
|
17
|
+
E_AND_SINGLE_QUOTE = %{E'}
|
18
|
+
X_AND_SINGLE_QUOTE = %{x'}
|
19
|
+
USEC_SPRINTF = '%06d'
|
20
|
+
ISO8601_DATETIME = '%Y-%m-%d %H:%M:%S' #FIXME ignores timezones i think
|
21
|
+
|
9
22
|
attr_reader :connection
|
10
23
|
attr_reader :table_name
|
11
24
|
attr_reader :rows
|
@@ -22,7 +35,7 @@ class Upsert
|
|
22
35
|
end
|
23
36
|
|
24
37
|
def add(selector, document)
|
25
|
-
rows << Row.new(selector, document)
|
38
|
+
rows << Row.new(self, selector, document)
|
26
39
|
if sql = chunk
|
27
40
|
execute sql
|
28
41
|
end
|
@@ -33,26 +46,5 @@ class Upsert
|
|
33
46
|
execute sql
|
34
47
|
end
|
35
48
|
end
|
36
|
-
|
37
|
-
def chunk
|
38
|
-
return if rows.empty?
|
39
|
-
targets = []
|
40
|
-
sql = nil
|
41
|
-
begin
|
42
|
-
targets << rows.pop
|
43
|
-
last_sql = sql
|
44
|
-
sql = compose(targets)
|
45
|
-
end until rows.empty? or targets.length >= max_targets or sql.length > max_length
|
46
|
-
if sql.length > max_length
|
47
|
-
raise if last_sql.nil?
|
48
|
-
sql = last_sql
|
49
|
-
rows << targets.pop
|
50
|
-
end
|
51
|
-
sql
|
52
|
-
end
|
53
|
-
|
54
|
-
def cleanup
|
55
|
-
clear
|
56
|
-
end
|
57
49
|
end
|
58
50
|
end
|
@@ -1,16 +1,20 @@
|
|
1
1
|
class Upsert
|
2
2
|
class Buffer
|
3
|
+
# @private
|
3
4
|
class Mysql2_Client < Buffer
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
include Quoter
|
6
|
+
|
7
|
+
def chunk
|
8
|
+
return false if rows.empty?
|
9
|
+
take = rows.length
|
10
|
+
until take == 1 or fits_in_single_query?(take)
|
11
|
+
take -= 1
|
9
12
|
end
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
13
|
+
if async? and not maximal?(take)
|
14
|
+
return false
|
15
|
+
end
|
16
|
+
sql = sql take
|
17
|
+
@rows = rows.drop(take)
|
14
18
|
sql
|
15
19
|
end
|
16
20
|
|
@@ -18,30 +22,109 @@ EOS
|
|
18
22
|
connection.query sql
|
19
23
|
end
|
20
24
|
|
21
|
-
def
|
22
|
-
|
25
|
+
def fits_in_single_query?(take)
|
26
|
+
sql_length(take) <= max_sql_length
|
23
27
|
end
|
24
28
|
|
25
|
-
def
|
26
|
-
|
29
|
+
def maximal?(take)
|
30
|
+
sql_length(take) >= max_sql_length
|
27
31
|
end
|
28
32
|
|
29
|
-
|
33
|
+
def columns
|
34
|
+
@columns ||= rows.first.columns
|
35
|
+
end
|
36
|
+
|
37
|
+
def insert_part
|
38
|
+
@insert_part ||= %{INSERT INTO "#{table_name}" (#{quote_idents(columns)}) VALUES }
|
39
|
+
end
|
40
|
+
|
41
|
+
def update_part
|
42
|
+
@update_part ||= begin
|
43
|
+
updaters = columns.map do |k|
|
44
|
+
qk = quote_ident k
|
45
|
+
[ qk, "VALUES(#{qk})" ].join('=')
|
46
|
+
end.join(',')
|
47
|
+
%{ ON DUPLICATE KEY UPDATE #{updaters}}
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# where 2 is the parens
|
52
|
+
def static_sql_length
|
53
|
+
@static_sql_length ||= insert_part.length + update_part.length + 2
|
54
|
+
end
|
55
|
+
|
56
|
+
# where 3 is parens and comma
|
57
|
+
def variable_sql_length(take)
|
58
|
+
rows.first(take).inject(0) { |sum, row| sum + row.values_sql_length + 3 }
|
59
|
+
end
|
60
|
+
|
61
|
+
def sql_length(take)
|
62
|
+
static_sql_length + variable_sql_length(take)
|
63
|
+
end
|
30
64
|
|
31
|
-
def
|
65
|
+
def sql(take)
|
66
|
+
all_value_sql = rows.first(take).map { |row| row.values_sql }
|
67
|
+
[ insert_part, '(', all_value_sql.join('),('), ')', update_part ].join
|
68
|
+
end
|
69
|
+
|
70
|
+
def max_sql_length
|
71
|
+
@max_sql_length ||= connection.query("SHOW VARIABLES LIKE 'max_allowed_packet'", :as => :hash).first['Value'].to_i
|
72
|
+
end
|
73
|
+
|
74
|
+
def quoted_value_length(v)
|
32
75
|
case v
|
33
76
|
when NilClass
|
34
|
-
|
35
|
-
when
|
36
|
-
|
77
|
+
4
|
78
|
+
when TrueClass
|
79
|
+
4
|
80
|
+
when FalseClass
|
81
|
+
5
|
82
|
+
when BigDecimal
|
83
|
+
v.to_s('F').length
|
84
|
+
when Upsert::Binary
|
85
|
+
# conservative
|
86
|
+
v.length * 2 + 3
|
87
|
+
when Numeric
|
88
|
+
v.to_s.length
|
89
|
+
when String
|
90
|
+
# conservative
|
91
|
+
v.length * 2 + 2
|
92
|
+
when Time, DateTime
|
93
|
+
24 + 2
|
94
|
+
when Date
|
95
|
+
10 + 2
|
37
96
|
else
|
38
|
-
v
|
97
|
+
raise "not sure how to get quoted length of #{v.class}: #{v.inspect}"
|
39
98
|
end
|
40
99
|
end
|
41
|
-
|
100
|
+
|
101
|
+
def quote_boolean(v)
|
102
|
+
v ? 'TRUE' : 'FALSE'
|
103
|
+
end
|
104
|
+
|
105
|
+
def quote_string(v)
|
106
|
+
SINGLE_QUOTE + connection.escape(v) + SINGLE_QUOTE
|
107
|
+
end
|
108
|
+
|
109
|
+
# We **could** do this, but I don't think it's necessary.
|
110
|
+
# def quote_binary(v)
|
111
|
+
# X_AND_SINGLE_QUOTE + v.unpack("H*")[0] + SINGLE_QUOTE
|
112
|
+
# end
|
113
|
+
|
114
|
+
# put raw binary straight into sql
|
115
|
+
alias_method :quote_binary, :quote_string
|
116
|
+
|
117
|
+
def quote_time(v)
|
118
|
+
quote_string v.strftime(ISO8601_DATETIME)
|
119
|
+
end
|
120
|
+
|
42
121
|
def quote_ident(k)
|
43
122
|
BACKTICK + connection.escape(k.to_s) + BACKTICK
|
44
123
|
end
|
124
|
+
|
125
|
+
def quote_big_decimal(v)
|
126
|
+
v.to_s('F')
|
127
|
+
end
|
45
128
|
end
|
46
129
|
end
|
47
130
|
end
|
@@ -2,69 +2,63 @@ require 'upsert/buffer/pg_connection/column_definition'
|
|
2
2
|
|
3
3
|
class Upsert
|
4
4
|
class Buffer
|
5
|
+
# @private
|
5
6
|
class PG_Connection < Buffer
|
6
|
-
|
7
|
+
include Quoter
|
8
|
+
|
9
|
+
attr_reader :merge_function
|
7
10
|
|
8
|
-
def
|
9
|
-
|
10
|
-
|
11
|
-
|
11
|
+
def chunk
|
12
|
+
return false if rows.empty?
|
13
|
+
row = rows.shift
|
14
|
+
unless merge_function
|
15
|
+
create_merge_function row
|
12
16
|
end
|
13
|
-
hsh =
|
17
|
+
hsh = row.to_hash
|
14
18
|
ordered_args = column_definitions.map do |c|
|
15
|
-
|
16
|
-
hsh[c.name]
|
17
|
-
else
|
18
|
-
nil
|
19
|
-
end
|
19
|
+
hsh[c.name]
|
20
20
|
end
|
21
|
-
%{
|
21
|
+
%{SELECT #{merge_function}(#{quote_values(ordered_args)})}
|
22
22
|
end
|
23
23
|
|
24
24
|
def execute(sql)
|
25
25
|
connection.exec sql
|
26
26
|
end
|
27
27
|
|
28
|
-
def
|
29
|
-
|
28
|
+
def quote_string(v)
|
29
|
+
SINGLE_QUOTE + connection.escape_string(v) + SINGLE_QUOTE
|
30
30
|
end
|
31
31
|
|
32
|
-
def
|
33
|
-
|
32
|
+
def quote_binary(v)
|
33
|
+
E_AND_SINGLE_QUOTE + connection.escape_bytea(v) + SINGLE_QUOTE
|
34
34
|
end
|
35
35
|
|
36
|
-
|
37
|
-
|
38
|
-
def quote_ident(k)
|
39
|
-
SINGLE_QUOTE + connection.quote_ident(k) + SINGLE_QUOTE
|
36
|
+
def quote_time(v)
|
37
|
+
quote_string [v.strftime(ISO8601_DATETIME), sprintf(USEC_SPRINTF, v.usec)].join('.')
|
40
38
|
end
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
case v
|
45
|
-
when NilClass
|
46
|
-
'NULL'
|
47
|
-
when String, Symbol
|
48
|
-
SINGLE_QUOTE + connection.escape_string(v.to_s) + SINGLE_QUOTE
|
49
|
-
else
|
50
|
-
v
|
51
|
-
end
|
39
|
+
|
40
|
+
def quote_big_decimal(v)
|
41
|
+
v.to_s('F')
|
52
42
|
end
|
53
|
-
|
43
|
+
|
44
|
+
def quote_boolean(v)
|
45
|
+
v ? 'TRUE' : 'FALSE'
|
46
|
+
end
|
47
|
+
|
48
|
+
def quote_ident(k)
|
49
|
+
DOUBLE_QUOTE + connection.quote_ident(k.to_s) + DOUBLE_QUOTE
|
50
|
+
end
|
51
|
+
|
54
52
|
def column_definitions
|
55
53
|
@column_definitions ||= ColumnDefinition.all(connection, table_name)
|
56
54
|
end
|
57
55
|
|
58
56
|
private
|
59
57
|
|
60
|
-
def
|
61
|
-
|
62
|
-
end
|
63
|
-
|
64
|
-
def create_db_function(example_row)
|
65
|
-
@db_function_name = "pg_temp.merge_#{table_name}_#{Kernel.rand(1e11)}"
|
58
|
+
def create_merge_function(example_row)
|
59
|
+
@merge_function = "pg_temp.merge_#{table_name}_#{Kernel.rand(1e11)}"
|
66
60
|
execute <<-EOS
|
67
|
-
CREATE FUNCTION #{
|
61
|
+
CREATE FUNCTION #{merge_function}(#{column_definitions.map { |c| "#{c.name}_input #{c.sql_type} DEFAULT #{c.default || 'NULL'}" }.join(',') }) RETURNS VOID AS
|
68
62
|
$$
|
69
63
|
BEGIN
|
70
64
|
LOOP
|
@@ -87,7 +81,6 @@ END;
|
|
87
81
|
$$
|
88
82
|
LANGUAGE plpgsql;
|
89
83
|
EOS
|
90
|
-
@created_db_function_query = true
|
91
84
|
end
|
92
85
|
end
|
93
86
|
end
|