upsert 0.5.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. data/CHANGELOG +29 -0
  2. data/README.md +165 -105
  3. data/lib/upsert.rb +32 -17
  4. data/lib/upsert/cell.rb +0 -4
  5. data/lib/upsert/cell/{mysql2_client.rb → Mysql2_Client.rb} +0 -0
  6. data/lib/upsert/cell/{pg_connection.rb → PG_Connection.rb} +0 -0
  7. data/lib/upsert/cell/{sqlite3_database.rb → SQLite3_Database.rb} +0 -0
  8. data/lib/upsert/column_definition.rb +43 -0
  9. data/lib/upsert/column_definition/Mysql2_Client.rb +24 -0
  10. data/lib/upsert/column_definition/PG_Connection.rb +24 -0
  11. data/lib/upsert/column_definition/SQLite3_Database.rb +7 -0
  12. data/lib/upsert/connection.rb +3 -7
  13. data/lib/upsert/connection/{mysql2_client.rb → Mysql2_Client.rb} +0 -0
  14. data/lib/upsert/connection/{pg_connection.rb → PG_Connection.rb} +0 -0
  15. data/lib/upsert/connection/{sqlite3_database.rb → SQLite3_Database.rb} +0 -0
  16. data/lib/upsert/merge_function.rb +72 -0
  17. data/lib/upsert/merge_function/Mysql2_Client.rb +89 -0
  18. data/lib/upsert/merge_function/PG_Connection.rb +114 -0
  19. data/lib/upsert/merge_function/SQLite3_Database.rb +29 -0
  20. data/lib/upsert/row.rb +3 -7
  21. data/lib/upsert/row/{mysql2_client.rb → Mysql2_Client.rb} +1 -1
  22. data/lib/upsert/row/{pg_connection.rb → PG_Connection.rb} +0 -0
  23. data/lib/upsert/row/{sqlite3_database.rb → SQLite3_Database.rb} +0 -0
  24. data/lib/upsert/version.rb +1 -1
  25. data/spec/correctness_spec.rb +15 -1
  26. data/spec/database_functions_spec.rb +32 -26
  27. data/spec/logger_spec.rb +8 -8
  28. data/spec/spec_helper.rb +11 -5
  29. data/spec/type_safety_spec.rb +11 -0
  30. data/upsert.gemspec +4 -2
  31. metadata +41 -22
  32. data/lib/upsert/buffer.rb +0 -36
  33. data/lib/upsert/buffer/mysql2_client.rb +0 -80
  34. data/lib/upsert/buffer/pg_connection.rb +0 -19
  35. data/lib/upsert/buffer/pg_connection/column_definition.rb +0 -59
  36. data/lib/upsert/buffer/pg_connection/merge_function.rb +0 -179
  37. data/lib/upsert/buffer/sqlite3_database.rb +0 -21
data/lib/upsert/cell.rb CHANGED
@@ -1,7 +1,3 @@
1
- require 'upsert/cell/mysql2_client'
2
- require 'upsert/cell/pg_connection'
3
- require 'upsert/cell/sqlite3_database'
4
-
5
1
  class Upsert
6
2
  # @private
7
3
  class Cell
@@ -0,0 +1,43 @@
1
+ class Upsert
2
+ # @private
3
+ class ColumnDefinition
4
+ class << self
5
+ # activerecord-3.2.X/lib/active_record/connection_adapters/XXXXXXXXX_adapter.rb#column_definitions
6
+ def all(connection, table_name)
7
+ raise "not impl"
8
+ end
9
+ end
10
+
11
+ attr_reader :name
12
+ attr_reader :sql_type
13
+ attr_reader :default
14
+ attr_reader :quoted_name
15
+ attr_reader :quoted_selector_name
16
+ attr_reader :quoted_setter_name
17
+
18
+ def initialize(connection, name, sql_type, default)
19
+ @name = name
20
+ @sql_type = sql_type
21
+ @default = default
22
+ @quoted_name = connection.quote_ident name
23
+ @quoted_selector_name = connection.quote_ident "#{name}_sel"
24
+ @quoted_setter_name = connection.quote_ident "#{name}_set"
25
+ end
26
+
27
+ def to_selector_arg
28
+ "#{quoted_selector_name} #{sql_type}"
29
+ end
30
+
31
+ def to_setter_arg
32
+ "#{quoted_setter_name} #{sql_type}"
33
+ end
34
+
35
+ def to_setter
36
+ "#{quoted_name} = #{quoted_setter_name}"
37
+ end
38
+
39
+ def to_selector
40
+ "#{quoted_name} = #{quoted_selector_name}"
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,24 @@
1
+ class Upsert
2
+ class ColumnDefinition
3
+ # @private
4
+ class Mysql2_Client < ColumnDefinition
5
+ class << self
6
+ def all(connection, table_name)
7
+ connection.execute("SHOW COLUMNS FROM #{connection.quote_ident(table_name)}").map do |row|
8
+ name, type, default = if row.is_a?(Array)
9
+ # you don't know if mysql2 is going to give you an array or a hash... and you shouldn't specify, because it's sticky
10
+ # ["name", "varchar(255)", "YES", "UNI", nil, ""]
11
+ row.values_at(0,1,4)
12
+ else
13
+ # {"Field"=>"name", "Type"=>"varchar(255)", "Null"=>"NO", "Key"=>"PRI", "Default"=>nil, "Extra"=>""}
14
+ [row['Field'], row['Type'], row['Default']]
15
+ end
16
+ new connection, name, type, default
17
+ end.sort_by do |cd|
18
+ cd.name
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,24 @@
1
+ class Upsert
2
+ class ColumnDefinition
3
+ # @private
4
+ class PG_Connection < ColumnDefinition
5
+ class << self
6
+ # activerecord-3.2.5/lib/active_record/connection_adapters/postgresql_adapter.rb#column_definitions
7
+ def all(connection, table_name)
8
+ res = connection.execute <<-EOS
9
+ SELECT a.attname AS name, format_type(a.atttypid, a.atttypmod) AS sql_type, d.adsrc AS default
10
+ FROM pg_attribute a LEFT JOIN pg_attrdef d
11
+ ON a.attrelid = d.adrelid AND a.attnum = d.adnum
12
+ WHERE a.attrelid = '#{connection.quote_ident(table_name)}'::regclass
13
+ AND a.attnum > 0 AND NOT a.attisdropped
14
+ EOS
15
+ res.map do |row|
16
+ new connection, row['name'], row['sql_type'], row['default']
17
+ end.sort_by do |cd|
18
+ cd.name
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,7 @@
1
+ class Upsert
2
+ class ColumnDefinition
3
+ # @private
4
+ class SQLite3_Database < ColumnDefinition
5
+ end
6
+ end
7
+ end
@@ -1,15 +1,11 @@
1
- require 'upsert/connection/mysql2_client'
2
- require 'upsert/connection/pg_connection'
3
- require 'upsert/connection/sqlite3_database'
4
-
5
1
  class Upsert
6
2
  # @private
7
3
  class Connection
8
- attr_reader :parent
4
+ attr_reader :controller
9
5
  attr_reader :raw_connection
10
6
 
11
- def initialize(parent, raw_connection)
12
- @parent = parent
7
+ def initialize(controller, raw_connection)
8
+ @controller = controller
13
9
  @raw_connection = raw_connection
14
10
  end
15
11
 
@@ -0,0 +1,72 @@
1
+ require 'zlib'
2
+
3
+ class Upsert
4
+ # @private
5
+ class MergeFunction
6
+ MAX_NAME_LENGTH = 63
7
+
8
+ class << self
9
+ def execute(controller, row)
10
+ merge_function = lookup controller, row
11
+ merge_function.execute row
12
+ end
13
+
14
+ def unique_name(table_name, selector_keys, setter_keys)
15
+ parts = [
16
+ 'upsert',
17
+ table_name,
18
+ 'SEL',
19
+ selector_keys.join('_A_'),
20
+ 'SET',
21
+ setter_keys.join('_A_')
22
+ ].join('_')
23
+ if parts.length > MAX_NAME_LENGTH
24
+ # maybe i should md5 instead
25
+ crc32 = Zlib.crc32(parts).to_s
26
+ [ parts.first(MAX_NAME_LENGTH-11), crc32 ].join
27
+ else
28
+ parts
29
+ end
30
+ end
31
+
32
+ def lookup(controller, row)
33
+ @lookup ||= {}
34
+ selector_keys = row.selector.keys
35
+ setter_keys = row.setter.keys
36
+ key = [controller.table_name, selector_keys, setter_keys]
37
+ @lookup[key] ||= new(controller, selector_keys, setter_keys)
38
+ end
39
+ end
40
+
41
+ attr_reader :controller
42
+ attr_reader :selector_keys
43
+ attr_reader :setter_keys
44
+
45
+ def initialize(controller, selector_keys, setter_keys)
46
+ @controller = controller
47
+ @selector_keys = selector_keys
48
+ @setter_keys = setter_keys
49
+ create!
50
+ end
51
+
52
+ def name
53
+ @name ||= MergeFunction.unique_name table_name, selector_keys, setter_keys
54
+ end
55
+
56
+ def connection
57
+ controller.connection
58
+ end
59
+
60
+ def table_name
61
+ controller.table_name
62
+ end
63
+
64
+ def quoted_table_name
65
+ controller.quoted_table_name
66
+ end
67
+
68
+ def column_definitions
69
+ controller.column_definitions
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,89 @@
1
+ require 'digest/md5'
2
+
3
+ class Upsert
4
+ class MergeFunction
5
+ # @private
6
+ class Mysql2_Client < MergeFunction
7
+ MAX_NAME_LENGTH = 63
8
+
9
+ class << self
10
+ # http://stackoverflow.com/questions/733349/list-of-stored-procedures-functions-mysql-command-line
11
+ def clear(connection)
12
+ connection.execute("SHOW PROCEDURE STATUS WHERE Db = DATABASE() AND Name LIKE 'upsert_%'").map { |row| row['Name'] }.each do |name|
13
+ connection.execute "DROP PROCEDURE IF EXISTS #{connection.quote_ident(name)}"
14
+ end
15
+ end
16
+ end
17
+
18
+ def execute(row)
19
+ first_try = true
20
+ begin
21
+ connection.execute sql(row)
22
+ rescue Mysql2::Error => e
23
+ if e.message =~ /PROCEDURE.*does not exist/i
24
+ if first_try
25
+ Upsert.logger.info %{[upsert] Function #{name.inspect} went missing, trying to recreate}
26
+ first_try = false
27
+ create!
28
+ retry
29
+ else
30
+ Upsert.logger.info %{[upsert] Failed to create function #{name.inspect} for some reason}
31
+ raise e
32
+ end
33
+ else
34
+ raise e
35
+ end
36
+ end
37
+ end
38
+
39
+ def sql(row)
40
+ quoted_params = (row.selector.values + row.setter.values).map(&:quoted_value)
41
+ %{CALL #{name}(#{quoted_params.join(', ')})}
42
+ end
43
+
44
+ # http://stackoverflow.com/questions/11371479/how-to-translate-postgresql-merge-db-aka-upsert-function-into-mysql/
45
+ def create!
46
+ Upsert.logger.info "[upsert] Creating or replacing database function #{name.inspect} on table #{table_name.inspect} for selector #{selector_keys.map(&:inspect).join(', ')} and setter #{setter_keys.map(&:inspect).join(', ')}"
47
+ selector_column_definitions = column_definitions.select { |cd| selector_keys.include?(cd.name) }
48
+ setter_column_definitions = column_definitions.select { |cd| setter_keys.include?(cd.name) }
49
+ quoted_name = connection.quote_ident name
50
+ connection.execute "DROP PROCEDURE IF EXISTS #{quoted_name}"
51
+ connection.execute(%{
52
+ CREATE PROCEDURE #{quoted_name}(#{(selector_column_definitions.map(&:to_selector_arg) + setter_column_definitions.map(&:to_setter_arg)).join(', ')})
53
+ BEGIN
54
+ DECLARE done BOOLEAN;
55
+ REPEAT
56
+ BEGIN
57
+ -- If there is a unique key constraint error then
58
+ -- someone made a concurrent insert. Reset the sentinel
59
+ -- and try again.
60
+ DECLARE ER_DUP_UNIQUE CONDITION FOR 23000;
61
+ DECLARE ER_INTEG CONDITION FOR 1062;
62
+ DECLARE CONTINUE HANDLER FOR ER_DUP_UNIQUE BEGIN
63
+ SET done = FALSE;
64
+ END;
65
+
66
+ DECLARE CONTINUE HANDLER FOR ER_INTEG BEGIN
67
+ SET done = TRUE;
68
+ END;
69
+
70
+ SET done = TRUE;
71
+ SELECT COUNT(*) INTO @count FROM #{quoted_table_name} WHERE #{selector_column_definitions.map(&:to_selector).join(' AND ')};
72
+ -- Race condition here. If a concurrent INSERT is made after
73
+ -- the SELECT but before the INSERT below we'll get a duplicate
74
+ -- key error. But the handler above will take care of that.
75
+ IF @count > 0 THEN
76
+ -- UPDATE table_name SET b = b_SET WHERE a = a_SEL;
77
+ UPDATE #{quoted_table_name} SET #{setter_column_definitions.map(&:to_setter).join(', ')} WHERE #{selector_column_definitions.map(&:to_selector).join(' AND ')};
78
+ ELSE
79
+ -- INSERT INTO table_name (a, b) VALUES (k, data);
80
+ INSERT INTO #{quoted_table_name} (#{setter_column_definitions.map(&:quoted_name).join(', ')}) VALUES (#{setter_column_definitions.map(&:quoted_setter_name).join(', ')});
81
+ END IF;
82
+ END;
83
+ UNTIL done END REPEAT;
84
+ END
85
+ })
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,114 @@
1
+ class Upsert
2
+ class MergeFunction
3
+ # @private
4
+ class PG_Connection < MergeFunction
5
+ MAX_NAME_LENGTH = 63
6
+
7
+ class << self
8
+ def clear(connection)
9
+ # http://stackoverflow.com/questions/7622908/postgresql-drop-function-without-knowing-the-number-type-of-parameters
10
+ connection.execute(%{
11
+ CREATE OR REPLACE FUNCTION pg_temp.upsert_delfunc(text)
12
+ RETURNS void AS
13
+ $BODY$
14
+ DECLARE
15
+ _sql text;
16
+ BEGIN
17
+ FOR _sql IN
18
+ SELECT 'DROP FUNCTION ' || quote_ident(n.nspname)
19
+ || '.' || quote_ident(p.proname)
20
+ || '(' || pg_catalog.pg_get_function_identity_arguments(p.oid) || ');'
21
+ FROM pg_catalog.pg_proc p
22
+ LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
23
+ WHERE p.proname = $1
24
+ AND pg_catalog.pg_function_is_visible(p.oid) -- you may or may not want this
25
+ LOOP
26
+ EXECUTE _sql;
27
+ END LOOP;
28
+ END;
29
+ $BODY$
30
+ LANGUAGE plpgsql;
31
+ })
32
+ connection.execute(%{SELECT proname FROM pg_proc WHERE proname LIKE 'upsert_%'}).each do |row|
33
+ k = row['proname']
34
+ next if k == 'upsert_delfunc'
35
+ Upsert.logger.info %{[upsert] Dropping function #{k.inspect}}
36
+ connection.execute %{SELECT pg_temp.upsert_delfunc('#{k}')}
37
+ end
38
+ end
39
+ end
40
+
41
+ def execute(row)
42
+ first_try = true
43
+ bind_selector_values = row.selector.values.map(&:bind_value)
44
+ bind_setter_values = row.setter.values.map(&:bind_value)
45
+ begin
46
+ connection.execute sql, (bind_selector_values + bind_setter_values)
47
+ rescue PG::Error => pg_error
48
+ if pg_error.message =~ /function #{name}.* does not exist/i
49
+ if first_try
50
+ Upsert.logger.info %{[upsert] Function #{name.inspect} went missing, trying to recreate}
51
+ first_try = false
52
+ create!
53
+ retry
54
+ else
55
+ Upsert.logger.info %{[upsert] Failed to create function #{name.inspect} for some reason}
56
+ raise pg_error
57
+ end
58
+ else
59
+ raise pg_error
60
+ end
61
+ end
62
+ end
63
+
64
+ def sql
65
+ @sql ||= begin
66
+ bind_params = []
67
+ 1.upto(selector_keys.length + setter_keys.length) { |i| bind_params << "$#{i}" }
68
+ %{SELECT #{name}(#{bind_params.join(', ')})}
69
+ end
70
+ end
71
+
72
+ # the "canonical example" from http://www.postgresql.org/docs/9.1/static/plpgsql-control-structures.html#PLPGSQL-UPSERT-EXAMPLE
73
+ # differentiate between selector and setter
74
+ def create!
75
+ Upsert.logger.info "[upsert] Creating or replacing database function #{name.inspect} on table #{table_name.inspect} for selector #{selector_keys.map(&:inspect).join(', ')} and setter #{setter_keys.map(&:inspect).join(', ')}"
76
+ selector_column_definitions = column_definitions.select { |cd| selector_keys.include?(cd.name) }
77
+ setter_column_definitions = column_definitions.select { |cd| setter_keys.include?(cd.name) }
78
+ connection.execute(%{
79
+ CREATE OR REPLACE FUNCTION #{name}(#{(selector_column_definitions.map(&:to_selector_arg) + setter_column_definitions.map(&:to_setter_arg)).join(', ')}) RETURNS VOID AS
80
+ $$
81
+ DECLARE
82
+ first_try INTEGER := 1;
83
+ BEGIN
84
+ LOOP
85
+ -- first try to update the key
86
+ UPDATE #{quoted_table_name} SET #{setter_column_definitions.map(&:to_setter).join(', ')}
87
+ WHERE #{selector_column_definitions.map(&:to_selector).join(' AND ') };
88
+ IF found THEN
89
+ RETURN;
90
+ END IF;
91
+ -- not there, so try to insert the key
92
+ -- if someone else inserts the same key concurrently,
93
+ -- we could get a unique-key failure
94
+ BEGIN
95
+ INSERT INTO #{quoted_table_name}(#{setter_column_definitions.map(&:quoted_name).join(', ')}) VALUES (#{setter_column_definitions.map(&:quoted_setter_name).join(', ')});
96
+ RETURN;
97
+ EXCEPTION WHEN unique_violation THEN
98
+ -- seamusabshere 9/20/12 only retry once
99
+ IF (first_try = 1) THEN
100
+ first_try := 0;
101
+ ELSE
102
+ RETURN;
103
+ END IF;
104
+ -- Do nothing, and loop to try the UPDATE again.
105
+ END;
106
+ END LOOP;
107
+ END;
108
+ $$
109
+ LANGUAGE plpgsql;
110
+ })
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,29 @@
1
+ class Upsert
2
+ class MergeFunction
3
+ # @private
4
+ class SQLite3_Database < MergeFunction
5
+ attr_reader :quoted_setter_names
6
+ attr_reader :quoted_selector_names
7
+
8
+ def initialize(*)
9
+ super
10
+ @quoted_setter_names = setter_keys.map { |k| connection.quote_ident k }
11
+ @quoted_selector_names = selector_keys.map { |k| connection.quote_ident k }
12
+ end
13
+
14
+ def create!
15
+ # not necessary
16
+ end
17
+
18
+ def execute(row)
19
+ bind_setter_values = row.setter.values.map(&:bind_value)
20
+
21
+ insert_or_ignore_sql = %{INSERT OR IGNORE INTO #{quoted_table_name} (#{quoted_setter_names.join(',')}) VALUES (#{Array.new(bind_setter_values.length, '?').join(',')})}
22
+ connection.execute insert_or_ignore_sql, bind_setter_values
23
+
24
+ update_sql = %{UPDATE #{quoted_table_name} SET #{quoted_setter_names.map { |qk| "#{qk}=?" }.join(',')} WHERE #{quoted_selector_names.map { |qk| "#{qk}=?" }.join(' AND ')}}
25
+ connection.execute update_sql, (bind_setter_values + row.selector.values.map(&:bind_value))
26
+ end
27
+ end
28
+ end
29
+ end