upsert 0.5.0 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. data/CHANGELOG +29 -0
  2. data/README.md +165 -105
  3. data/lib/upsert.rb +32 -17
  4. data/lib/upsert/cell.rb +0 -4
  5. data/lib/upsert/cell/{mysql2_client.rb → Mysql2_Client.rb} +0 -0
  6. data/lib/upsert/cell/{pg_connection.rb → PG_Connection.rb} +0 -0
  7. data/lib/upsert/cell/{sqlite3_database.rb → SQLite3_Database.rb} +0 -0
  8. data/lib/upsert/column_definition.rb +43 -0
  9. data/lib/upsert/column_definition/Mysql2_Client.rb +24 -0
  10. data/lib/upsert/column_definition/PG_Connection.rb +24 -0
  11. data/lib/upsert/column_definition/SQLite3_Database.rb +7 -0
  12. data/lib/upsert/connection.rb +3 -7
  13. data/lib/upsert/connection/{mysql2_client.rb → Mysql2_Client.rb} +0 -0
  14. data/lib/upsert/connection/{pg_connection.rb → PG_Connection.rb} +0 -0
  15. data/lib/upsert/connection/{sqlite3_database.rb → SQLite3_Database.rb} +0 -0
  16. data/lib/upsert/merge_function.rb +72 -0
  17. data/lib/upsert/merge_function/Mysql2_Client.rb +89 -0
  18. data/lib/upsert/merge_function/PG_Connection.rb +114 -0
  19. data/lib/upsert/merge_function/SQLite3_Database.rb +29 -0
  20. data/lib/upsert/row.rb +3 -7
  21. data/lib/upsert/row/{mysql2_client.rb → Mysql2_Client.rb} +1 -1
  22. data/lib/upsert/row/{pg_connection.rb → PG_Connection.rb} +0 -0
  23. data/lib/upsert/row/{sqlite3_database.rb → SQLite3_Database.rb} +0 -0
  24. data/lib/upsert/version.rb +1 -1
  25. data/spec/correctness_spec.rb +15 -1
  26. data/spec/database_functions_spec.rb +32 -26
  27. data/spec/logger_spec.rb +8 -8
  28. data/spec/spec_helper.rb +11 -5
  29. data/spec/type_safety_spec.rb +11 -0
  30. data/upsert.gemspec +4 -2
  31. metadata +41 -22
  32. data/lib/upsert/buffer.rb +0 -36
  33. data/lib/upsert/buffer/mysql2_client.rb +0 -80
  34. data/lib/upsert/buffer/pg_connection.rb +0 -19
  35. data/lib/upsert/buffer/pg_connection/column_definition.rb +0 -59
  36. data/lib/upsert/buffer/pg_connection/merge_function.rb +0 -179
  37. data/lib/upsert/buffer/sqlite3_database.rb +0 -21
data/lib/upsert/cell.rb CHANGED
@@ -1,7 +1,3 @@
1
- require 'upsert/cell/mysql2_client'
2
- require 'upsert/cell/pg_connection'
3
- require 'upsert/cell/sqlite3_database'
4
-
5
1
  class Upsert
6
2
  # @private
7
3
  class Cell
@@ -0,0 +1,43 @@
1
+ class Upsert
2
+ # @private
3
+ class ColumnDefinition
4
+ class << self
5
+ # activerecord-3.2.X/lib/active_record/connection_adapters/XXXXXXXXX_adapter.rb#column_definitions
6
+ def all(connection, table_name)
7
+ raise "not impl"
8
+ end
9
+ end
10
+
11
+ attr_reader :name
12
+ attr_reader :sql_type
13
+ attr_reader :default
14
+ attr_reader :quoted_name
15
+ attr_reader :quoted_selector_name
16
+ attr_reader :quoted_setter_name
17
+
18
+ def initialize(connection, name, sql_type, default)
19
+ @name = name
20
+ @sql_type = sql_type
21
+ @default = default
22
+ @quoted_name = connection.quote_ident name
23
+ @quoted_selector_name = connection.quote_ident "#{name}_sel"
24
+ @quoted_setter_name = connection.quote_ident "#{name}_set"
25
+ end
26
+
27
+ def to_selector_arg
28
+ "#{quoted_selector_name} #{sql_type}"
29
+ end
30
+
31
+ def to_setter_arg
32
+ "#{quoted_setter_name} #{sql_type}"
33
+ end
34
+
35
+ def to_setter
36
+ "#{quoted_name} = #{quoted_setter_name}"
37
+ end
38
+
39
+ def to_selector
40
+ "#{quoted_name} = #{quoted_selector_name}"
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,24 @@
1
+ class Upsert
2
+ class ColumnDefinition
3
+ # @private
4
+ class Mysql2_Client < ColumnDefinition
5
+ class << self
6
+ def all(connection, table_name)
7
+ connection.execute("SHOW COLUMNS FROM #{connection.quote_ident(table_name)}").map do |row|
8
+ name, type, default = if row.is_a?(Array)
9
+ # you don't know if mysql2 is going to give you an array or a hash... and you shouldn't specify, because it's sticky
10
+ # ["name", "varchar(255)", "YES", "UNI", nil, ""]
11
+ row.values_at(0,1,4)
12
+ else
13
+ # {"Field"=>"name", "Type"=>"varchar(255)", "Null"=>"NO", "Key"=>"PRI", "Default"=>nil, "Extra"=>""}
14
+ [row['Field'], row['Type'], row['Default']]
15
+ end
16
+ new connection, name, type, default
17
+ end.sort_by do |cd|
18
+ cd.name
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,24 @@
1
+ class Upsert
2
+ class ColumnDefinition
3
+ # @private
4
+ class PG_Connection < ColumnDefinition
5
+ class << self
6
+ # activerecord-3.2.5/lib/active_record/connection_adapters/postgresql_adapter.rb#column_definitions
7
+ def all(connection, table_name)
8
+ res = connection.execute <<-EOS
9
+ SELECT a.attname AS name, format_type(a.atttypid, a.atttypmod) AS sql_type, d.adsrc AS default
10
+ FROM pg_attribute a LEFT JOIN pg_attrdef d
11
+ ON a.attrelid = d.adrelid AND a.attnum = d.adnum
12
+ WHERE a.attrelid = '#{connection.quote_ident(table_name)}'::regclass
13
+ AND a.attnum > 0 AND NOT a.attisdropped
14
+ EOS
15
+ res.map do |row|
16
+ new connection, row['name'], row['sql_type'], row['default']
17
+ end.sort_by do |cd|
18
+ cd.name
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,7 @@
1
+ class Upsert
2
+ class ColumnDefinition
3
+ # @private
4
+ class SQLite3_Database < ColumnDefinition
5
+ end
6
+ end
7
+ end
@@ -1,15 +1,11 @@
1
- require 'upsert/connection/mysql2_client'
2
- require 'upsert/connection/pg_connection'
3
- require 'upsert/connection/sqlite3_database'
4
-
5
1
  class Upsert
6
2
  # @private
7
3
  class Connection
8
- attr_reader :parent
4
+ attr_reader :controller
9
5
  attr_reader :raw_connection
10
6
 
11
- def initialize(parent, raw_connection)
12
- @parent = parent
7
+ def initialize(controller, raw_connection)
8
+ @controller = controller
13
9
  @raw_connection = raw_connection
14
10
  end
15
11
 
@@ -0,0 +1,72 @@
1
+ require 'zlib'
2
+
3
+ class Upsert
4
+ # @private
5
+ class MergeFunction
6
+ MAX_NAME_LENGTH = 63
7
+
8
+ class << self
9
+ def execute(controller, row)
10
+ merge_function = lookup controller, row
11
+ merge_function.execute row
12
+ end
13
+
14
+ def unique_name(table_name, selector_keys, setter_keys)
15
+ parts = [
16
+ 'upsert',
17
+ table_name,
18
+ 'SEL',
19
+ selector_keys.join('_A_'),
20
+ 'SET',
21
+ setter_keys.join('_A_')
22
+ ].join('_')
23
+ if parts.length > MAX_NAME_LENGTH
24
+ # maybe i should md5 instead
25
+ crc32 = Zlib.crc32(parts).to_s
26
+ [ parts.first(MAX_NAME_LENGTH-11), crc32 ].join
27
+ else
28
+ parts
29
+ end
30
+ end
31
+
32
+ def lookup(controller, row)
33
+ @lookup ||= {}
34
+ selector_keys = row.selector.keys
35
+ setter_keys = row.setter.keys
36
+ key = [controller.table_name, selector_keys, setter_keys]
37
+ @lookup[key] ||= new(controller, selector_keys, setter_keys)
38
+ end
39
+ end
40
+
41
+ attr_reader :controller
42
+ attr_reader :selector_keys
43
+ attr_reader :setter_keys
44
+
45
+ def initialize(controller, selector_keys, setter_keys)
46
+ @controller = controller
47
+ @selector_keys = selector_keys
48
+ @setter_keys = setter_keys
49
+ create!
50
+ end
51
+
52
+ def name
53
+ @name ||= MergeFunction.unique_name table_name, selector_keys, setter_keys
54
+ end
55
+
56
+ def connection
57
+ controller.connection
58
+ end
59
+
60
+ def table_name
61
+ controller.table_name
62
+ end
63
+
64
+ def quoted_table_name
65
+ controller.quoted_table_name
66
+ end
67
+
68
+ def column_definitions
69
+ controller.column_definitions
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,89 @@
1
+ require 'digest/md5'
2
+
3
+ class Upsert
4
+ class MergeFunction
5
+ # @private
6
+ class Mysql2_Client < MergeFunction
7
+ MAX_NAME_LENGTH = 63
8
+
9
+ class << self
10
+ # http://stackoverflow.com/questions/733349/list-of-stored-procedures-functions-mysql-command-line
11
+ def clear(connection)
12
+ connection.execute("SHOW PROCEDURE STATUS WHERE Db = DATABASE() AND Name LIKE 'upsert_%'").map { |row| row['Name'] }.each do |name|
13
+ connection.execute "DROP PROCEDURE IF EXISTS #{connection.quote_ident(name)}"
14
+ end
15
+ end
16
+ end
17
+
18
+ def execute(row)
19
+ first_try = true
20
+ begin
21
+ connection.execute sql(row)
22
+ rescue Mysql2::Error => e
23
+ if e.message =~ /PROCEDURE.*does not exist/i
24
+ if first_try
25
+ Upsert.logger.info %{[upsert] Function #{name.inspect} went missing, trying to recreate}
26
+ first_try = false
27
+ create!
28
+ retry
29
+ else
30
+ Upsert.logger.info %{[upsert] Failed to create function #{name.inspect} for some reason}
31
+ raise e
32
+ end
33
+ else
34
+ raise e
35
+ end
36
+ end
37
+ end
38
+
39
+ def sql(row)
40
+ quoted_params = (row.selector.values + row.setter.values).map(&:quoted_value)
41
+ %{CALL #{name}(#{quoted_params.join(', ')})}
42
+ end
43
+
44
+ # http://stackoverflow.com/questions/11371479/how-to-translate-postgresql-merge-db-aka-upsert-function-into-mysql/
45
+ def create!
46
+ Upsert.logger.info "[upsert] Creating or replacing database function #{name.inspect} on table #{table_name.inspect} for selector #{selector_keys.map(&:inspect).join(', ')} and setter #{setter_keys.map(&:inspect).join(', ')}"
47
+ selector_column_definitions = column_definitions.select { |cd| selector_keys.include?(cd.name) }
48
+ setter_column_definitions = column_definitions.select { |cd| setter_keys.include?(cd.name) }
49
+ quoted_name = connection.quote_ident name
50
+ connection.execute "DROP PROCEDURE IF EXISTS #{quoted_name}"
51
+ connection.execute(%{
52
+ CREATE PROCEDURE #{quoted_name}(#{(selector_column_definitions.map(&:to_selector_arg) + setter_column_definitions.map(&:to_setter_arg)).join(', ')})
53
+ BEGIN
54
+ DECLARE done BOOLEAN;
55
+ REPEAT
56
+ BEGIN
57
+ -- If there is a unique key constraint error then
58
+ -- someone made a concurrent insert. Reset the sentinel
59
+ -- and try again.
60
+ DECLARE ER_DUP_UNIQUE CONDITION FOR 23000;
61
+ DECLARE ER_INTEG CONDITION FOR 1062;
62
+ DECLARE CONTINUE HANDLER FOR ER_DUP_UNIQUE BEGIN
63
+ SET done = FALSE;
64
+ END;
65
+
66
+ DECLARE CONTINUE HANDLER FOR ER_INTEG BEGIN
67
+ SET done = TRUE;
68
+ END;
69
+
70
+ SET done = TRUE;
71
+ SELECT COUNT(*) INTO @count FROM #{quoted_table_name} WHERE #{selector_column_definitions.map(&:to_selector).join(' AND ')};
72
+ -- Race condition here. If a concurrent INSERT is made after
73
+ -- the SELECT but before the INSERT below we'll get a duplicate
74
+ -- key error. But the handler above will take care of that.
75
+ IF @count > 0 THEN
76
+ -- UPDATE table_name SET b = b_SET WHERE a = a_SEL;
77
+ UPDATE #{quoted_table_name} SET #{setter_column_definitions.map(&:to_setter).join(', ')} WHERE #{selector_column_definitions.map(&:to_selector).join(' AND ')};
78
+ ELSE
79
+ -- INSERT INTO table_name (a, b) VALUES (k, data);
80
+ INSERT INTO #{quoted_table_name} (#{setter_column_definitions.map(&:quoted_name).join(', ')}) VALUES (#{setter_column_definitions.map(&:quoted_setter_name).join(', ')});
81
+ END IF;
82
+ END;
83
+ UNTIL done END REPEAT;
84
+ END
85
+ })
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,114 @@
1
+ class Upsert
2
+ class MergeFunction
3
+ # @private
4
+ class PG_Connection < MergeFunction
5
+ MAX_NAME_LENGTH = 63
6
+
7
+ class << self
8
+ def clear(connection)
9
+ # http://stackoverflow.com/questions/7622908/postgresql-drop-function-without-knowing-the-number-type-of-parameters
10
+ connection.execute(%{
11
+ CREATE OR REPLACE FUNCTION pg_temp.upsert_delfunc(text)
12
+ RETURNS void AS
13
+ $BODY$
14
+ DECLARE
15
+ _sql text;
16
+ BEGIN
17
+ FOR _sql IN
18
+ SELECT 'DROP FUNCTION ' || quote_ident(n.nspname)
19
+ || '.' || quote_ident(p.proname)
20
+ || '(' || pg_catalog.pg_get_function_identity_arguments(p.oid) || ');'
21
+ FROM pg_catalog.pg_proc p
22
+ LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
23
+ WHERE p.proname = $1
24
+ AND pg_catalog.pg_function_is_visible(p.oid) -- you may or may not want this
25
+ LOOP
26
+ EXECUTE _sql;
27
+ END LOOP;
28
+ END;
29
+ $BODY$
30
+ LANGUAGE plpgsql;
31
+ })
32
+ connection.execute(%{SELECT proname FROM pg_proc WHERE proname LIKE 'upsert_%'}).each do |row|
33
+ k = row['proname']
34
+ next if k == 'upsert_delfunc'
35
+ Upsert.logger.info %{[upsert] Dropping function #{k.inspect}}
36
+ connection.execute %{SELECT pg_temp.upsert_delfunc('#{k}')}
37
+ end
38
+ end
39
+ end
40
+
41
+ def execute(row)
42
+ first_try = true
43
+ bind_selector_values = row.selector.values.map(&:bind_value)
44
+ bind_setter_values = row.setter.values.map(&:bind_value)
45
+ begin
46
+ connection.execute sql, (bind_selector_values + bind_setter_values)
47
+ rescue PG::Error => pg_error
48
+ if pg_error.message =~ /function #{name}.* does not exist/i
49
+ if first_try
50
+ Upsert.logger.info %{[upsert] Function #{name.inspect} went missing, trying to recreate}
51
+ first_try = false
52
+ create!
53
+ retry
54
+ else
55
+ Upsert.logger.info %{[upsert] Failed to create function #{name.inspect} for some reason}
56
+ raise pg_error
57
+ end
58
+ else
59
+ raise pg_error
60
+ end
61
+ end
62
+ end
63
+
64
+ def sql
65
+ @sql ||= begin
66
+ bind_params = []
67
+ 1.upto(selector_keys.length + setter_keys.length) { |i| bind_params << "$#{i}" }
68
+ %{SELECT #{name}(#{bind_params.join(', ')})}
69
+ end
70
+ end
71
+
72
+ # the "canonical example" from http://www.postgresql.org/docs/9.1/static/plpgsql-control-structures.html#PLPGSQL-UPSERT-EXAMPLE
73
+ # differentiate between selector and setter
74
+ def create!
75
+ Upsert.logger.info "[upsert] Creating or replacing database function #{name.inspect} on table #{table_name.inspect} for selector #{selector_keys.map(&:inspect).join(', ')} and setter #{setter_keys.map(&:inspect).join(', ')}"
76
+ selector_column_definitions = column_definitions.select { |cd| selector_keys.include?(cd.name) }
77
+ setter_column_definitions = column_definitions.select { |cd| setter_keys.include?(cd.name) }
78
+ connection.execute(%{
79
+ CREATE OR REPLACE FUNCTION #{name}(#{(selector_column_definitions.map(&:to_selector_arg) + setter_column_definitions.map(&:to_setter_arg)).join(', ')}) RETURNS VOID AS
80
+ $$
81
+ DECLARE
82
+ first_try INTEGER := 1;
83
+ BEGIN
84
+ LOOP
85
+ -- first try to update the key
86
+ UPDATE #{quoted_table_name} SET #{setter_column_definitions.map(&:to_setter).join(', ')}
87
+ WHERE #{selector_column_definitions.map(&:to_selector).join(' AND ') };
88
+ IF found THEN
89
+ RETURN;
90
+ END IF;
91
+ -- not there, so try to insert the key
92
+ -- if someone else inserts the same key concurrently,
93
+ -- we could get a unique-key failure
94
+ BEGIN
95
+ INSERT INTO #{quoted_table_name}(#{setter_column_definitions.map(&:quoted_name).join(', ')}) VALUES (#{setter_column_definitions.map(&:quoted_setter_name).join(', ')});
96
+ RETURN;
97
+ EXCEPTION WHEN unique_violation THEN
98
+ -- seamusabshere 9/20/12 only retry once
99
+ IF (first_try = 1) THEN
100
+ first_try := 0;
101
+ ELSE
102
+ RETURN;
103
+ END IF;
104
+ -- Do nothing, and loop to try the UPDATE again.
105
+ END;
106
+ END LOOP;
107
+ END;
108
+ $$
109
+ LANGUAGE plpgsql;
110
+ })
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,29 @@
1
+ class Upsert
2
+ class MergeFunction
3
+ # @private
4
+ class SQLite3_Database < MergeFunction
5
+ attr_reader :quoted_setter_names
6
+ attr_reader :quoted_selector_names
7
+
8
+ def initialize(*)
9
+ super
10
+ @quoted_setter_names = setter_keys.map { |k| connection.quote_ident k }
11
+ @quoted_selector_names = selector_keys.map { |k| connection.quote_ident k }
12
+ end
13
+
14
+ def create!
15
+ # not necessary
16
+ end
17
+
18
+ def execute(row)
19
+ bind_setter_values = row.setter.values.map(&:bind_value)
20
+
21
+ insert_or_ignore_sql = %{INSERT OR IGNORE INTO #{quoted_table_name} (#{quoted_setter_names.join(',')}) VALUES (#{Array.new(bind_setter_values.length, '?').join(',')})}
22
+ connection.execute insert_or_ignore_sql, bind_setter_values
23
+
24
+ update_sql = %{UPDATE #{quoted_table_name} SET #{quoted_setter_names.map { |qk| "#{qk}=?" }.join(',')} WHERE #{quoted_selector_names.map { |qk| "#{qk}=?" }.join(' AND ')}}
25
+ connection.execute update_sql, (bind_setter_values + row.selector.values.map(&:bind_value))
26
+ end
27
+ end
28
+ end
29
+ end