upsert 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +23 -1
- data/README.md +31 -18
- data/Rakefile +1 -0
- data/lib/upsert.rb +41 -10
- data/lib/upsert/active_record_upsert.rb +2 -2
- data/lib/upsert/binary.rb +1 -2
- data/lib/upsert/buffer/mysql2_client.rb +26 -13
- data/lib/upsert/buffer/pg_connection.rb +3 -38
- data/lib/upsert/buffer/pg_connection/column_definition.rb +59 -0
- data/lib/upsert/buffer/pg_connection/merge_function.rb +107 -66
- data/lib/upsert/buffer/sqlite3_database.rb +10 -2
- data/lib/upsert/cell.rb +9 -0
- data/lib/upsert/cell/mysql2_client.rb +16 -0
- data/lib/upsert/cell/pg_connection.rb +28 -0
- data/lib/upsert/cell/sqlite3_database.rb +36 -0
- data/lib/upsert/connection.rb +1 -1
- data/lib/upsert/connection/pg_connection.rb +8 -27
- data/lib/upsert/connection/sqlite3_database.rb +8 -24
- data/lib/upsert/row.rb +33 -47
- data/lib/upsert/row/mysql2_client.rb +21 -0
- data/lib/upsert/row/pg_connection.rb +7 -0
- data/lib/upsert/row/sqlite3_database.rb +7 -0
- data/lib/upsert/version.rb +1 -1
- data/spec/binary_spec.rb +2 -0
- data/spec/correctness_spec.rb +46 -13
- data/spec/database_functions_spec.rb +2 -2
- data/spec/database_spec.rb +2 -2
- data/spec/logger_spec.rb +1 -1
- data/spec/reserved_words_spec.rb +3 -3
- data/spec/spec_helper.rb +18 -12
- data/spec/speed_spec.rb +13 -13
- data/upsert.gemspec +6 -2
- metadata +12 -4
@@ -1,118 +1,154 @@
|
|
1
1
|
require 'digest/md5'
|
2
2
|
|
3
3
|
class Upsert
|
4
|
-
# @private
|
5
4
|
class Buffer
|
6
5
|
class PG_Connection < Buffer
|
6
|
+
# @private
|
7
7
|
class MergeFunction
|
8
8
|
class << self
|
9
9
|
def execute(buffer, row)
|
10
|
-
first_try = true
|
11
|
-
begin
|
12
|
-
buffer.parent.connection.execute sql(buffer, row)
|
13
|
-
rescue PG::Error => pg_error
|
14
|
-
if first_try and pg_error.message =~ /function upsert_(.+) does not exist/
|
15
|
-
Upsert.logger.info %{[upsert] Function #{"upsert_#{$1}".inspect} went missing, trying to recreate}
|
16
|
-
first_try = false
|
17
|
-
@lookup.clear
|
18
|
-
retry
|
19
|
-
else
|
20
|
-
raise pg_error
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
def sql(buffer, row)
|
26
10
|
merge_function = lookup buffer, row
|
27
|
-
|
11
|
+
merge_function.execute row
|
28
12
|
end
|
29
13
|
|
30
|
-
def
|
31
|
-
[
|
14
|
+
def unique_name(table_name, selector, setter)
|
15
|
+
parts = [
|
16
|
+
'upsert',
|
32
17
|
table_name,
|
33
|
-
|
34
|
-
|
35
|
-
|
18
|
+
'SEL',
|
19
|
+
selector.join('_A_'),
|
20
|
+
'SET',
|
21
|
+
setter.join('_A_')
|
22
|
+
].join('_')
|
23
|
+
# maybe i should md5 instead
|
24
|
+
crc32 = Zlib.crc32(parts).to_s
|
25
|
+
[ parts.first(MAX_NAME_LENGTH-11), crc32 ].join
|
36
26
|
end
|
37
27
|
|
38
28
|
def lookup(buffer, row)
|
39
29
|
@lookup ||= {}
|
40
|
-
|
41
|
-
|
42
|
-
|
30
|
+
selector = row.selector.keys
|
31
|
+
setter = row.setter.keys
|
32
|
+
key = [buffer.parent.table_name, selector, setter]
|
33
|
+
@lookup[key] ||= new(buffer, selector, setter)
|
34
|
+
end
|
35
|
+
|
36
|
+
def clear(buffer)
|
37
|
+
connection = buffer.parent.connection
|
38
|
+
# http://stackoverflow.com/questions/7622908/postgresql-drop-function-without-knowing-the-number-type-of-parameters
|
39
|
+
connection.execute <<-EOS
|
40
|
+
CREATE OR REPLACE FUNCTION pg_temp.upsert_delfunc(text)
|
41
|
+
RETURNS void AS
|
42
|
+
$BODY$
|
43
|
+
DECLARE
|
44
|
+
_sql text;
|
45
|
+
BEGIN
|
46
|
+
|
47
|
+
FOR _sql IN
|
48
|
+
SELECT 'DROP FUNCTION ' || quote_ident(n.nspname)
|
49
|
+
|| '.' || quote_ident(p.proname)
|
50
|
+
|| '(' || pg_catalog.pg_get_function_identity_arguments(p.oid) || ');'
|
51
|
+
FROM pg_catalog.pg_proc p
|
52
|
+
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
|
53
|
+
WHERE p.proname = $1
|
54
|
+
AND pg_catalog.pg_function_is_visible(p.oid) -- you may or may not want this
|
55
|
+
LOOP
|
56
|
+
EXECUTE _sql;
|
57
|
+
END LOOP;
|
58
|
+
|
59
|
+
END;
|
60
|
+
$BODY$
|
61
|
+
LANGUAGE plpgsql;
|
62
|
+
EOS
|
63
|
+
res = connection.execute(%{SELECT proname FROM pg_proc WHERE proname LIKE 'upsert_%'})
|
64
|
+
res.each do |row|
|
65
|
+
k = row['proname']
|
66
|
+
next if k == 'upsert_delfunc'
|
67
|
+
Upsert.logger.info %{[upsert] Dropping function #{k.inspect}}
|
68
|
+
connection.execute %{SELECT pg_temp.upsert_delfunc('#{k}')}
|
69
|
+
end
|
43
70
|
end
|
44
71
|
end
|
45
72
|
|
73
|
+
MAX_NAME_LENGTH = 63
|
74
|
+
|
46
75
|
attr_reader :buffer
|
47
76
|
attr_reader :selector
|
48
|
-
attr_reader :
|
77
|
+
attr_reader :setter
|
49
78
|
|
50
|
-
def initialize(buffer, selector,
|
79
|
+
def initialize(buffer, selector, setter)
|
51
80
|
@buffer = buffer
|
52
81
|
@selector = selector
|
53
|
-
@
|
82
|
+
@setter = setter
|
54
83
|
create!
|
55
84
|
end
|
56
85
|
|
57
86
|
def name
|
58
|
-
@name ||=
|
87
|
+
@name ||= MergeFunction.unique_name table_name, selector, setter
|
59
88
|
end
|
60
89
|
|
61
|
-
def
|
62
|
-
|
63
|
-
|
64
|
-
|
90
|
+
def execute(row)
|
91
|
+
first_try = true
|
92
|
+
bind_selector_values = row.selector.values.map(&:bind_value)
|
93
|
+
bind_setter_values = row.setter.values.map(&:bind_value)
|
94
|
+
begin
|
95
|
+
connection.execute sql, (bind_selector_values + bind_setter_values)
|
96
|
+
rescue PG::Error => pg_error
|
97
|
+
if pg_error.message =~ /function #{name}.* does not exist/i
|
98
|
+
if first_try
|
99
|
+
Upsert.logger.info %{[upsert] Function #{name.inspect} went missing, trying to recreate}
|
100
|
+
first_try = false
|
101
|
+
create!
|
102
|
+
retry
|
103
|
+
else
|
104
|
+
Upsert.logger.info %{[upsert] Failed to create function #{name.inspect} for some reason}
|
105
|
+
raise pg_error
|
106
|
+
end
|
107
|
+
else
|
108
|
+
raise pg_error
|
109
|
+
end
|
110
|
+
end
|
65
111
|
end
|
66
112
|
|
67
113
|
private
|
68
114
|
|
69
|
-
def
|
70
|
-
@
|
115
|
+
def sql
|
116
|
+
@sql ||= begin
|
117
|
+
bind_params = []
|
118
|
+
1.upto(selector.length + setter.length) { |i| bind_params << "$#{i}" }
|
119
|
+
%{SELECT #{name}(#{bind_params.join(', ')})}
|
120
|
+
end
|
71
121
|
end
|
72
122
|
|
73
123
|
def connection
|
74
124
|
buffer.parent.connection
|
75
125
|
end
|
76
126
|
|
77
|
-
def
|
78
|
-
buffer.parent.
|
127
|
+
def table_name
|
128
|
+
buffer.parent.table_name
|
79
129
|
end
|
80
130
|
|
81
|
-
|
82
|
-
|
83
|
-
# activerecord-3.2.5/lib/active_record/connection_adapters/postgresql_adapter.rb#column_definitions
|
84
|
-
def get_column_definitions
|
85
|
-
res = connection.execute <<-EOS
|
86
|
-
SELECT a.attname AS name, format_type(a.atttypid, a.atttypmod) AS sql_type, d.adsrc AS default
|
87
|
-
FROM pg_attribute a LEFT JOIN pg_attrdef d
|
88
|
-
ON a.attrelid = d.adrelid AND a.attnum = d.adnum
|
89
|
-
WHERE a.attrelid = '#{quoted_table_name}'::regclass
|
90
|
-
AND a.attnum > 0 AND NOT a.attisdropped
|
91
|
-
EOS
|
92
|
-
unsorted = res.select do |row|
|
93
|
-
columns.include? row['name']
|
94
|
-
end.inject({}) do |memo, row|
|
95
|
-
k = row['name']
|
96
|
-
memo[k] = ColumnDefinition.new connection.quote_ident(k), connection.quote_ident("#{k}_input"), row['sql_type'], row['default']
|
97
|
-
memo
|
98
|
-
end
|
99
|
-
columns.map do |k|
|
100
|
-
unsorted[k]
|
101
|
-
end
|
131
|
+
def quoted_table_name
|
132
|
+
buffer.parent.quoted_table_name
|
102
133
|
end
|
103
134
|
|
104
135
|
# the "canonical example" from http://www.postgresql.org/docs/9.1/static/plpgsql-control-structures.html#PLPGSQL-UPSERT-EXAMPLE
|
136
|
+
# differentiate between selector and setter
|
105
137
|
def create!
|
106
|
-
Upsert.logger.info "[upsert] Creating or replacing database function #{name.inspect} on table #{
|
107
|
-
column_definitions =
|
138
|
+
Upsert.logger.info "[upsert] Creating or replacing database function #{name.inspect} on table #{table_name.inspect} for selector #{selector.map(&:inspect).join(', ')} and setter #{setter.map(&:inspect).join(', ')}"
|
139
|
+
column_definitions = ColumnDefinition.all buffer, table_name
|
140
|
+
selector_column_definitions = column_definitions.select { |cd| selector.include?(cd.name) }
|
141
|
+
setter_column_definitions = column_definitions.select { |cd| setter.include?(cd.name) }
|
108
142
|
connection.execute <<-EOS
|
109
|
-
CREATE OR REPLACE FUNCTION #{name}(#{
|
143
|
+
CREATE OR REPLACE FUNCTION #{name}(#{(selector_column_definitions.map(&:to_selector_arg) + setter_column_definitions.map(&:to_setter_arg)).join(', ')}) RETURNS VOID AS
|
110
144
|
$$
|
145
|
+
DECLARE
|
146
|
+
first_try INTEGER := 1;
|
111
147
|
BEGIN
|
112
148
|
LOOP
|
113
149
|
-- first try to update the key
|
114
|
-
UPDATE #{quoted_table_name} SET #{
|
115
|
-
WHERE #{
|
150
|
+
UPDATE #{quoted_table_name} SET #{setter_column_definitions.map(&:to_setter).join(', ')}
|
151
|
+
WHERE #{selector_column_definitions.map(&:to_selector).join(' AND ') };
|
116
152
|
IF found THEN
|
117
153
|
RETURN;
|
118
154
|
END IF;
|
@@ -120,9 +156,15 @@ BEGIN
|
|
120
156
|
-- if someone else inserts the same key concurrently,
|
121
157
|
-- we could get a unique-key failure
|
122
158
|
BEGIN
|
123
|
-
INSERT INTO #{quoted_table_name}(#{
|
159
|
+
INSERT INTO #{quoted_table_name}(#{setter_column_definitions.map(&:quoted_name).join(', ')}) VALUES (#{setter_column_definitions.map(&:quoted_setter_name).join(', ')});
|
124
160
|
RETURN;
|
125
161
|
EXCEPTION WHEN unique_violation THEN
|
162
|
+
-- seamusabshere 9/20/12 only retry once
|
163
|
+
IF (first_try = 1) THEN
|
164
|
+
first_try := 0;
|
165
|
+
ELSE
|
166
|
+
RETURN;
|
167
|
+
END IF;
|
126
168
|
-- Do nothing, and loop to try the UPDATE again.
|
127
169
|
END;
|
128
170
|
END LOOP;
|
@@ -131,7 +173,6 @@ $$
|
|
131
173
|
LANGUAGE plpgsql;
|
132
174
|
EOS
|
133
175
|
end
|
134
|
-
|
135
176
|
end
|
136
177
|
end
|
137
178
|
end
|
@@ -5,8 +5,16 @@ class Upsert
|
|
5
5
|
def ready
|
6
6
|
return if rows.empty?
|
7
7
|
row = rows.shift
|
8
|
-
|
9
|
-
|
8
|
+
connection = parent.connection
|
9
|
+
bind_setter_values = row.setter.values.map(&:bind_value)
|
10
|
+
quoted_setter_names = row.setter.values.map(&:quoted_name)
|
11
|
+
quoted_selector_names = row.selector.values.map(&:quoted_name)
|
12
|
+
|
13
|
+
insert_or_ignore_sql = %{INSERT OR IGNORE INTO #{parent.quoted_table_name} (#{quoted_setter_names.join(',')}) VALUES (#{Array.new(bind_setter_values.length, '?').join(',')})}
|
14
|
+
connection.execute insert_or_ignore_sql, bind_setter_values
|
15
|
+
|
16
|
+
update_sql = %{UPDATE #{parent.quoted_table_name} SET #{quoted_setter_names.map { |qk| "#{qk}=?" }.join(',')} WHERE #{quoted_selector_names.map { |qk| "#{qk}=?" }.join(' AND ')}}
|
17
|
+
connection.execute update_sql, (bind_setter_values + row.selector.values.map(&:bind_value))
|
10
18
|
end
|
11
19
|
end
|
12
20
|
end
|
data/lib/upsert/cell.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
class Upsert
|
2
|
+
class Cell
|
3
|
+
# @private
|
4
|
+
class Mysql2_Client < Cell
|
5
|
+
attr_reader :name
|
6
|
+
attr_reader :value
|
7
|
+
attr_reader :quoted_value
|
8
|
+
|
9
|
+
def initialize(connection, name, value)
|
10
|
+
@name = name
|
11
|
+
@value = value
|
12
|
+
@quoted_value = connection.quote_value value
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
class Upsert
|
2
|
+
class Cell
|
3
|
+
# @private
|
4
|
+
class PG_Connection < Cell
|
5
|
+
attr_reader :name
|
6
|
+
attr_reader :value
|
7
|
+
attr_reader :quoted_name
|
8
|
+
|
9
|
+
def initialize(connection, name, value)
|
10
|
+
@name = name
|
11
|
+
@value = value
|
12
|
+
@quoted_name = connection.quote_ident name
|
13
|
+
end
|
14
|
+
|
15
|
+
def bind_value
|
16
|
+
return @bind_value if defined?(@bind_value)
|
17
|
+
@bind_value = case value
|
18
|
+
when Upsert::Binary
|
19
|
+
{ :value => value.value, :format => 1 }
|
20
|
+
when Time, DateTime
|
21
|
+
[value.strftime(ISO8601_DATETIME), sprintf(USEC_SPRINTF, value.usec)].join('.')
|
22
|
+
else
|
23
|
+
value
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
class Upsert
|
2
|
+
class Cell
|
3
|
+
# @private
|
4
|
+
class SQLite3_Database < Cell
|
5
|
+
attr_reader :name
|
6
|
+
attr_reader :value
|
7
|
+
attr_reader :quoted_name
|
8
|
+
|
9
|
+
def initialize(connection, name, value)
|
10
|
+
@name = name
|
11
|
+
@value = value
|
12
|
+
@quoted_name = connection.quote_ident name
|
13
|
+
end
|
14
|
+
|
15
|
+
def bind_value
|
16
|
+
return @bind_value if defined?(@bind_value)
|
17
|
+
@bind_value = case value
|
18
|
+
when Upsert::Binary
|
19
|
+
SQLite3::Blob.new value.value
|
20
|
+
when BigDecimal
|
21
|
+
value.to_s('F')
|
22
|
+
when TrueClass
|
23
|
+
't'
|
24
|
+
when FalseClass
|
25
|
+
'f'
|
26
|
+
when Time, DateTime
|
27
|
+
[value.strftime(ISO8601_DATETIME), sprintf(USEC_SPRINTF, value.usec)].join('.')
|
28
|
+
when Date
|
29
|
+
value.strftime ISO8601_DATE
|
30
|
+
else
|
31
|
+
value
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/upsert/connection.rb
CHANGED
@@ -2,38 +2,19 @@ class Upsert
|
|
2
2
|
class Connection
|
3
3
|
# @private
|
4
4
|
class PG_Connection < Connection
|
5
|
-
def execute(sql)
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
def quote_binary(v)
|
15
|
-
E_AND_SINGLE_QUOTE + raw_connection.escape_bytea(v) + SINGLE_QUOTE
|
16
|
-
end
|
17
|
-
|
18
|
-
def quote_time(v)
|
19
|
-
quote_string [v.strftime(ISO8601_DATETIME), sprintf(USEC_SPRINTF, v.usec)].join('.')
|
20
|
-
end
|
21
|
-
|
22
|
-
def quote_big_decimal(v)
|
23
|
-
v.to_s('F')
|
24
|
-
end
|
25
|
-
|
26
|
-
def quote_boolean(v)
|
27
|
-
v ? 'TRUE' : 'FALSE'
|
5
|
+
def execute(sql, params = nil)
|
6
|
+
if params
|
7
|
+
Upsert.logger.debug { %{[upsert] #{sql} with #{params.inspect}} }
|
8
|
+
raw_connection.exec sql, params
|
9
|
+
else
|
10
|
+
Upsert.logger.debug { %{[upsert] #{sql}} }
|
11
|
+
raw_connection.exec sql
|
12
|
+
end
|
28
13
|
end
|
29
14
|
|
30
15
|
def quote_ident(k)
|
31
16
|
raw_connection.quote_ident k.to_s
|
32
17
|
end
|
33
18
|
end
|
34
|
-
|
35
|
-
# @private
|
36
|
-
# backwards compatibility - https://github.com/seamusabshere/upsert/issues/2
|
37
|
-
PGconn = PG_Connection
|
38
19
|
end
|
39
20
|
end
|
@@ -2,35 +2,19 @@ class Upsert
|
|
2
2
|
class Connection
|
3
3
|
# @private
|
4
4
|
class SQLite3_Database < Connection
|
5
|
-
def execute(sql)
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
def quote_binary(v)
|
15
|
-
X_AND_SINGLE_QUOTE + v.unpack("H*")[0] + SINGLE_QUOTE
|
16
|
-
end
|
17
|
-
|
18
|
-
def quote_time(v)
|
19
|
-
quote_string [v.strftime(ISO8601_DATETIME), sprintf(USEC_SPRINTF, v.usec)].join('.')
|
5
|
+
def execute(sql, params = nil)
|
6
|
+
if params
|
7
|
+
Upsert.logger.debug { %{[upsert] #{sql} with #{params.inspect}} }
|
8
|
+
raw_connection.execute sql, params
|
9
|
+
else
|
10
|
+
Upsert.logger.debug { %{[upsert] #{sql}} }
|
11
|
+
raw_connection.execute sql
|
12
|
+
end
|
20
13
|
end
|
21
14
|
|
22
15
|
def quote_ident(k)
|
23
16
|
DOUBLE_QUOTE + SQLite3::Database.quote(k.to_s) + DOUBLE_QUOTE
|
24
17
|
end
|
25
|
-
|
26
|
-
def quote_boolean(v)
|
27
|
-
s = v ? 't' : 'f'
|
28
|
-
quote_string s
|
29
|
-
end
|
30
|
-
|
31
|
-
def quote_big_decimal(v)
|
32
|
-
v.to_f
|
33
|
-
end
|
34
18
|
end
|
35
19
|
end
|
36
20
|
end
|