upsert 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.yardopts +2 -0
- data/README.md +54 -11
- data/Rakefile +1 -1
- data/lib/upsert.rb +39 -16
- data/lib/upsert/binary.rb +7 -0
- data/lib/upsert/buffer.rb +14 -22
- data/lib/upsert/buffer/mysql2_client.rb +103 -20
- data/lib/upsert/buffer/pg_connection.rb +33 -40
- data/lib/upsert/buffer/pg_connection/column_definition.rb +28 -2
- data/lib/upsert/buffer/sqlite3_database.rb +25 -23
- data/lib/upsert/quoter.rb +29 -1
- data/lib/upsert/row.rb +29 -13
- data/lib/upsert/version.rb +1 -1
- data/test/helper.rb +80 -3
- data/test/shared/binary.rb +20 -0
- data/test/shared/correctness.rb +48 -0
- data/test/{shared_examples.rb → shared/database.rb} +9 -9
- data/test/shared/multibyte.rb +26 -0
- data/test/shared/speed.rb +72 -0
- data/test/shared/timezones.rb +27 -0
- data/test/test_active_record_connection_adapter.rb +36 -0
- data/test/test_mysql2.rb +11 -2
- data/test/test_pg.rb +11 -2
- data/test/test_sqlite.rb +20 -4
- data/upsert.gemspec +2 -0
- metadata +50 -6
- data/test/test_upsert.rb +0 -7
data/.yardopts
ADDED
data/README.md
CHANGED
@@ -2,7 +2,50 @@
|
|
2
2
|
|
3
3
|
Finally, all those SQL MERGE tricks codified.
|
4
4
|
|
5
|
-
##
|
5
|
+
## Usage
|
6
|
+
|
7
|
+
### One at a time
|
8
|
+
|
9
|
+
upsert = Upsert.new Pet.connection, Pet.table_name
|
10
|
+
upsert.row({:name => 'Jerry'}, :breed => 'beagle')
|
11
|
+
upsert.row({:name => 'Pierre'}, :breed => 'tabby')
|
12
|
+
|
13
|
+
### Multiple upserts at once
|
14
|
+
|
15
|
+
Upsert.new(Pet.connection, Pet.table_name).multi do |upsert|
|
16
|
+
upsert.row({:name => 'Jerry'}, :breed => 'beagle')
|
17
|
+
upsert.row({:name => 'Pierre'}, :breed => 'tabby')
|
18
|
+
end
|
19
|
+
|
20
|
+
## Wishlist
|
21
|
+
|
22
|
+
1. Make `c=c+1` stuff possible with `Upsert.sql('c=c+1')` or something
|
23
|
+
|
24
|
+
## Speed
|
25
|
+
|
26
|
+
### MySQL
|
27
|
+
|
28
|
+
(from the tests)
|
29
|
+
|
30
|
+
Upsert was 47% faster than faking upserts with activerecord-import
|
31
|
+
Upsert was 77% faster than find + new/set/save
|
32
|
+
Upsert was 84% faster than create + rescue/find/update
|
33
|
+
Upsert was 82% faster than find_or_create + update_attributes
|
34
|
+
|
35
|
+
### PostgreSQL
|
36
|
+
|
37
|
+
Upsert was 73% faster than find + new/set/save
|
38
|
+
Upsert was 84% faster than find_or_create + update_attributes
|
39
|
+
Upsert was 87% faster than create + rescue/find/update
|
40
|
+
|
41
|
+
## Supported database drivers
|
42
|
+
|
43
|
+
1. [mysql2](https://rubygems.org/gems/mysql2) (e.g. `Upsert.new(Mysql2::Connection.new([...]), :pets)`)
|
44
|
+
2. [sqlite3](https://rubygems.org/gems/sqlite3)
|
45
|
+
3. [pg](https://rubygems.org/gems/pg)
|
46
|
+
4. Any of these wrapped in an ActiveRecord connection adapter (e.g. `Upsert.new(Pet.connection, Pet.table_name)`)
|
47
|
+
|
48
|
+
## SQL merge tricks in use
|
6
49
|
|
7
50
|
### MySQL
|
8
51
|
|
@@ -12,8 +55,6 @@ Finally, all those SQL MERGE tricks codified.
|
|
12
55
|
|
13
56
|
### PostgreSQL
|
14
57
|
|
15
|
-
#### Used
|
16
|
-
|
17
58
|
# http://www.postgresql.org/docs/current/interactive/plpgsql-control-structures.html#PLPGSQL-ERROR-TRAPPING
|
18
59
|
CREATE TABLE db (a INT PRIMARY KEY, b TEXT);
|
19
60
|
CREATE FUNCTION merge_db(key INT, data TEXT) RETURNS VOID AS
|
@@ -41,7 +82,15 @@ Finally, all those SQL MERGE tricks codified.
|
|
41
82
|
SELECT merge_db(1, 'david');
|
42
83
|
SELECT merge_db(1, 'dennis');
|
43
84
|
|
44
|
-
|
85
|
+
### Sqlite
|
86
|
+
|
87
|
+
# http://stackoverflow.com/questions/2717590/sqlite-upsert-on-duplicate-key-update
|
88
|
+
INSERT OR IGNORE INTO visits VALUES ($ip, 0);
|
89
|
+
UPDATE visits SET hits = hits + 1 WHERE ip LIKE $ip;
|
90
|
+
|
91
|
+
### Unused alternatives
|
92
|
+
|
93
|
+
#### PostgreSQL
|
45
94
|
|
46
95
|
# http://stackoverflow.com/questions/1109061/insert-on-duplicate-update-postgresql
|
47
96
|
UPDATE table SET field='C', field2='Z' WHERE id=3;
|
@@ -61,10 +110,4 @@ Finally, all those SQL MERGE tricks codified.
|
|
61
110
|
FROM stage_data
|
62
111
|
WHERE NOT EXISTS (SELECT 1 FROM target_data
|
63
112
|
WHERE target_data.key_column = stage_data.key_column)
|
64
|
-
END;
|
65
|
-
|
66
|
-
### Sqlite
|
67
|
-
|
68
|
-
# http://stackoverflow.com/questions/2717590/sqlite-upsert-on-duplicate-key-update
|
69
|
-
INSERT OR IGNORE INTO visits VALUES ($ip, 0);
|
70
|
-
UPDATE visits SET hits = hits + 1 WHERE ip LIKE $ip;
|
113
|
+
END;
|
data/Rakefile
CHANGED
@@ -10,7 +10,7 @@ Rake::TestTask.new(:_test) do |test|
|
|
10
10
|
end
|
11
11
|
|
12
12
|
task :test_each_db_adapter do
|
13
|
-
%w{ mysql2 sqlite pg }.each do |database|
|
13
|
+
%w{ mysql2 sqlite pg active_record_connection_adapter }.each do |database|
|
14
14
|
puts
|
15
15
|
puts "#{'*'*10} Running #{database} tests"
|
16
16
|
puts
|
data/lib/upsert.rb
CHANGED
@@ -1,4 +1,7 @@
|
|
1
|
+
require 'bigdecimal'
|
2
|
+
|
1
3
|
require 'upsert/version'
|
4
|
+
require 'upsert/binary'
|
2
5
|
require 'upsert/buffer'
|
3
6
|
require 'upsert/quoter'
|
4
7
|
require 'upsert/row'
|
@@ -7,35 +10,55 @@ require 'upsert/buffer/pg_connection'
|
|
7
10
|
require 'upsert/buffer/sqlite3_database'
|
8
11
|
|
9
12
|
class Upsert
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
13
|
+
class << self
|
14
|
+
# @param [String] v A string containing binary data that should be inserted/escaped as such.
|
15
|
+
#
|
16
|
+
# @return [Upsert::Binary]
|
17
|
+
def binary(v)
|
18
|
+
Binary.new v
|
19
|
+
end
|
20
|
+
end
|
14
21
|
|
22
|
+
# @private
|
15
23
|
attr_reader :buffer
|
16
24
|
|
25
|
+
# @param [Mysql2::Client,Sqlite3::Database,PG::Connection,#raw_connection] connection A supported database connection.
|
26
|
+
# @param [String,Symbol] table_name The name of the table into which you will be upserting.
|
17
27
|
def initialize(connection, table_name)
|
18
28
|
@multi_mutex = Mutex.new
|
19
29
|
@buffer = Buffer.for connection, table_name
|
20
30
|
end
|
21
31
|
|
32
|
+
# @param [Hash] selector Key-value pairs that will be used to find or create a row.
|
33
|
+
# @param [Hash] document Key-value pairs that will be set on the row, whether it previously existed or not.
|
34
|
+
#
|
35
|
+
# @return [nil]
|
36
|
+
#
|
37
|
+
# @example One at a time
|
38
|
+
# upsert = Upsert.new Pet.connection, Pet.table_name
|
39
|
+
# upsert.row({:name => 'Jerry'}, :breed => 'beagle')
|
40
|
+
# upsert.row({:name => 'Pierre'}, :breed => 'tabby')
|
22
41
|
def row(selector, document)
|
23
42
|
buffer.add selector, document
|
43
|
+
nil
|
24
44
|
end
|
25
45
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
46
|
+
# @yield [Upsert] An +Upsert+ object in "async" mode. You can call #row on it multiple times and it will try to optimize on speed.
|
47
|
+
#
|
48
|
+
# @return [nil]
|
49
|
+
#
|
50
|
+
# @example Many at once
|
51
|
+
# Upsert.new(Pet.connection, Pet.table_name).multi do |upsert|
|
52
|
+
# upsert.row({:name => 'Jerry'}, :breed => 'beagle')
|
53
|
+
# upsert.row({:name => 'Pierre'}, :breed => 'tabby')
|
54
|
+
# end
|
55
|
+
def multi
|
31
56
|
@multi_mutex.synchronize do
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
ensure
|
37
|
-
buffer.async = nil
|
38
|
-
end
|
57
|
+
buffer.async = true
|
58
|
+
yield self
|
59
|
+
buffer.async = false
|
60
|
+
buffer.clear
|
39
61
|
end
|
62
|
+
nil
|
40
63
|
end
|
41
64
|
end
|
data/lib/upsert/buffer.rb
CHANGED
@@ -1,11 +1,24 @@
|
|
1
1
|
class Upsert
|
2
|
+
# @private
|
2
3
|
class Buffer
|
3
4
|
class << self
|
4
5
|
def for(connection, table_name)
|
6
|
+
if connection.respond_to?(:raw_connection)
|
7
|
+
# deal with ActiveRecord::Base.connection or ActiveRecord::Base.connection_pool.checkout
|
8
|
+
connection = connection.raw_connection
|
9
|
+
end
|
5
10
|
const_get(connection.class.name.gsub(/\W+/, '_')).new connection, table_name
|
6
11
|
end
|
7
12
|
end
|
8
13
|
|
14
|
+
SINGLE_QUOTE = %{'}
|
15
|
+
DOUBLE_QUOTE = %{"}
|
16
|
+
BACKTICK = %{`}
|
17
|
+
E_AND_SINGLE_QUOTE = %{E'}
|
18
|
+
X_AND_SINGLE_QUOTE = %{x'}
|
19
|
+
USEC_SPRINTF = '%06d'
|
20
|
+
ISO8601_DATETIME = '%Y-%m-%d %H:%M:%S' #FIXME ignores timezones i think
|
21
|
+
|
9
22
|
attr_reader :connection
|
10
23
|
attr_reader :table_name
|
11
24
|
attr_reader :rows
|
@@ -22,7 +35,7 @@ class Upsert
|
|
22
35
|
end
|
23
36
|
|
24
37
|
def add(selector, document)
|
25
|
-
rows << Row.new(selector, document)
|
38
|
+
rows << Row.new(self, selector, document)
|
26
39
|
if sql = chunk
|
27
40
|
execute sql
|
28
41
|
end
|
@@ -33,26 +46,5 @@ class Upsert
|
|
33
46
|
execute sql
|
34
47
|
end
|
35
48
|
end
|
36
|
-
|
37
|
-
def chunk
|
38
|
-
return if rows.empty?
|
39
|
-
targets = []
|
40
|
-
sql = nil
|
41
|
-
begin
|
42
|
-
targets << rows.pop
|
43
|
-
last_sql = sql
|
44
|
-
sql = compose(targets)
|
45
|
-
end until rows.empty? or targets.length >= max_targets or sql.length > max_length
|
46
|
-
if sql.length > max_length
|
47
|
-
raise if last_sql.nil?
|
48
|
-
sql = last_sql
|
49
|
-
rows << targets.pop
|
50
|
-
end
|
51
|
-
sql
|
52
|
-
end
|
53
|
-
|
54
|
-
def cleanup
|
55
|
-
clear
|
56
|
-
end
|
57
49
|
end
|
58
50
|
end
|
@@ -1,16 +1,20 @@
|
|
1
1
|
class Upsert
|
2
2
|
class Buffer
|
3
|
+
# @private
|
3
4
|
class Mysql2_Client < Buffer
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
include Quoter
|
6
|
+
|
7
|
+
def chunk
|
8
|
+
return false if rows.empty?
|
9
|
+
take = rows.length
|
10
|
+
until take == 1 or fits_in_single_query?(take)
|
11
|
+
take -= 1
|
9
12
|
end
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
13
|
+
if async? and not maximal?(take)
|
14
|
+
return false
|
15
|
+
end
|
16
|
+
sql = sql take
|
17
|
+
@rows = rows.drop(take)
|
14
18
|
sql
|
15
19
|
end
|
16
20
|
|
@@ -18,30 +22,109 @@ EOS
|
|
18
22
|
connection.query sql
|
19
23
|
end
|
20
24
|
|
21
|
-
def
|
22
|
-
|
25
|
+
def fits_in_single_query?(take)
|
26
|
+
sql_length(take) <= max_sql_length
|
23
27
|
end
|
24
28
|
|
25
|
-
def
|
26
|
-
|
29
|
+
def maximal?(take)
|
30
|
+
sql_length(take) >= max_sql_length
|
27
31
|
end
|
28
32
|
|
29
|
-
|
33
|
+
def columns
|
34
|
+
@columns ||= rows.first.columns
|
35
|
+
end
|
36
|
+
|
37
|
+
def insert_part
|
38
|
+
@insert_part ||= %{INSERT INTO "#{table_name}" (#{quote_idents(columns)}) VALUES }
|
39
|
+
end
|
40
|
+
|
41
|
+
def update_part
|
42
|
+
@update_part ||= begin
|
43
|
+
updaters = columns.map do |k|
|
44
|
+
qk = quote_ident k
|
45
|
+
[ qk, "VALUES(#{qk})" ].join('=')
|
46
|
+
end.join(',')
|
47
|
+
%{ ON DUPLICATE KEY UPDATE #{updaters}}
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# where 2 is the parens
|
52
|
+
def static_sql_length
|
53
|
+
@static_sql_length ||= insert_part.length + update_part.length + 2
|
54
|
+
end
|
55
|
+
|
56
|
+
# where 3 is parens and comma
|
57
|
+
def variable_sql_length(take)
|
58
|
+
rows.first(take).inject(0) { |sum, row| sum + row.values_sql_length + 3 }
|
59
|
+
end
|
60
|
+
|
61
|
+
def sql_length(take)
|
62
|
+
static_sql_length + variable_sql_length(take)
|
63
|
+
end
|
30
64
|
|
31
|
-
def
|
65
|
+
def sql(take)
|
66
|
+
all_value_sql = rows.first(take).map { |row| row.values_sql }
|
67
|
+
[ insert_part, '(', all_value_sql.join('),('), ')', update_part ].join
|
68
|
+
end
|
69
|
+
|
70
|
+
def max_sql_length
|
71
|
+
@max_sql_length ||= connection.query("SHOW VARIABLES LIKE 'max_allowed_packet'", :as => :hash).first['Value'].to_i
|
72
|
+
end
|
73
|
+
|
74
|
+
def quoted_value_length(v)
|
32
75
|
case v
|
33
76
|
when NilClass
|
34
|
-
|
35
|
-
when
|
36
|
-
|
77
|
+
4
|
78
|
+
when TrueClass
|
79
|
+
4
|
80
|
+
when FalseClass
|
81
|
+
5
|
82
|
+
when BigDecimal
|
83
|
+
v.to_s('F').length
|
84
|
+
when Upsert::Binary
|
85
|
+
# conservative
|
86
|
+
v.length * 2 + 3
|
87
|
+
when Numeric
|
88
|
+
v.to_s.length
|
89
|
+
when String
|
90
|
+
# conservative
|
91
|
+
v.length * 2 + 2
|
92
|
+
when Time, DateTime
|
93
|
+
24 + 2
|
94
|
+
when Date
|
95
|
+
10 + 2
|
37
96
|
else
|
38
|
-
v
|
97
|
+
raise "not sure how to get quoted length of #{v.class}: #{v.inspect}"
|
39
98
|
end
|
40
99
|
end
|
41
|
-
|
100
|
+
|
101
|
+
def quote_boolean(v)
|
102
|
+
v ? 'TRUE' : 'FALSE'
|
103
|
+
end
|
104
|
+
|
105
|
+
def quote_string(v)
|
106
|
+
SINGLE_QUOTE + connection.escape(v) + SINGLE_QUOTE
|
107
|
+
end
|
108
|
+
|
109
|
+
# We **could** do this, but I don't think it's necessary.
|
110
|
+
# def quote_binary(v)
|
111
|
+
# X_AND_SINGLE_QUOTE + v.unpack("H*")[0] + SINGLE_QUOTE
|
112
|
+
# end
|
113
|
+
|
114
|
+
# put raw binary straight into sql
|
115
|
+
alias_method :quote_binary, :quote_string
|
116
|
+
|
117
|
+
def quote_time(v)
|
118
|
+
quote_string v.strftime(ISO8601_DATETIME)
|
119
|
+
end
|
120
|
+
|
42
121
|
def quote_ident(k)
|
43
122
|
BACKTICK + connection.escape(k.to_s) + BACKTICK
|
44
123
|
end
|
124
|
+
|
125
|
+
def quote_big_decimal(v)
|
126
|
+
v.to_s('F')
|
127
|
+
end
|
45
128
|
end
|
46
129
|
end
|
47
130
|
end
|
@@ -2,69 +2,63 @@ require 'upsert/buffer/pg_connection/column_definition'
|
|
2
2
|
|
3
3
|
class Upsert
|
4
4
|
class Buffer
|
5
|
+
# @private
|
5
6
|
class PG_Connection < Buffer
|
6
|
-
|
7
|
+
include Quoter
|
8
|
+
|
9
|
+
attr_reader :merge_function
|
7
10
|
|
8
|
-
def
|
9
|
-
|
10
|
-
|
11
|
-
|
11
|
+
def chunk
|
12
|
+
return false if rows.empty?
|
13
|
+
row = rows.shift
|
14
|
+
unless merge_function
|
15
|
+
create_merge_function row
|
12
16
|
end
|
13
|
-
hsh =
|
17
|
+
hsh = row.to_hash
|
14
18
|
ordered_args = column_definitions.map do |c|
|
15
|
-
|
16
|
-
hsh[c.name]
|
17
|
-
else
|
18
|
-
nil
|
19
|
-
end
|
19
|
+
hsh[c.name]
|
20
20
|
end
|
21
|
-
%{
|
21
|
+
%{SELECT #{merge_function}(#{quote_values(ordered_args)})}
|
22
22
|
end
|
23
23
|
|
24
24
|
def execute(sql)
|
25
25
|
connection.exec sql
|
26
26
|
end
|
27
27
|
|
28
|
-
def
|
29
|
-
|
28
|
+
def quote_string(v)
|
29
|
+
SINGLE_QUOTE + connection.escape_string(v) + SINGLE_QUOTE
|
30
30
|
end
|
31
31
|
|
32
|
-
def
|
33
|
-
|
32
|
+
def quote_binary(v)
|
33
|
+
E_AND_SINGLE_QUOTE + connection.escape_bytea(v) + SINGLE_QUOTE
|
34
34
|
end
|
35
35
|
|
36
|
-
|
37
|
-
|
38
|
-
def quote_ident(k)
|
39
|
-
SINGLE_QUOTE + connection.quote_ident(k) + SINGLE_QUOTE
|
36
|
+
def quote_time(v)
|
37
|
+
quote_string [v.strftime(ISO8601_DATETIME), sprintf(USEC_SPRINTF, v.usec)].join('.')
|
40
38
|
end
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
case v
|
45
|
-
when NilClass
|
46
|
-
'NULL'
|
47
|
-
when String, Symbol
|
48
|
-
SINGLE_QUOTE + connection.escape_string(v.to_s) + SINGLE_QUOTE
|
49
|
-
else
|
50
|
-
v
|
51
|
-
end
|
39
|
+
|
40
|
+
def quote_big_decimal(v)
|
41
|
+
v.to_s('F')
|
52
42
|
end
|
53
|
-
|
43
|
+
|
44
|
+
def quote_boolean(v)
|
45
|
+
v ? 'TRUE' : 'FALSE'
|
46
|
+
end
|
47
|
+
|
48
|
+
def quote_ident(k)
|
49
|
+
DOUBLE_QUOTE + connection.quote_ident(k.to_s) + DOUBLE_QUOTE
|
50
|
+
end
|
51
|
+
|
54
52
|
def column_definitions
|
55
53
|
@column_definitions ||= ColumnDefinition.all(connection, table_name)
|
56
54
|
end
|
57
55
|
|
58
56
|
private
|
59
57
|
|
60
|
-
def
|
61
|
-
|
62
|
-
end
|
63
|
-
|
64
|
-
def create_db_function(example_row)
|
65
|
-
@db_function_name = "pg_temp.merge_#{table_name}_#{Kernel.rand(1e11)}"
|
58
|
+
def create_merge_function(example_row)
|
59
|
+
@merge_function = "pg_temp.merge_#{table_name}_#{Kernel.rand(1e11)}"
|
66
60
|
execute <<-EOS
|
67
|
-
CREATE FUNCTION #{
|
61
|
+
CREATE FUNCTION #{merge_function}(#{column_definitions.map { |c| "#{c.name}_input #{c.sql_type} DEFAULT #{c.default || 'NULL'}" }.join(',') }) RETURNS VOID AS
|
68
62
|
$$
|
69
63
|
BEGIN
|
70
64
|
LOOP
|
@@ -87,7 +81,6 @@ END;
|
|
87
81
|
$$
|
88
82
|
LANGUAGE plpgsql;
|
89
83
|
EOS
|
90
|
-
@created_db_function_query = true
|
91
84
|
end
|
92
85
|
end
|
93
86
|
end
|