upsert 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.yardopts ADDED
@@ -0,0 +1,2 @@
1
+ --no-private
2
+ --readme README.md
data/README.md CHANGED
@@ -2,7 +2,50 @@
2
2
 
3
3
  Finally, all those SQL MERGE tricks codified.
4
4
 
5
- ## Supported databases
5
+ ## Usage
6
+
7
+ ### One at a time
8
+
9
+ upsert = Upsert.new Pet.connection, Pet.table_name
10
+ upsert.row({:name => 'Jerry'}, :breed => 'beagle')
11
+ upsert.row({:name => 'Pierre'}, :breed => 'tabby')
12
+
13
+ ### Multiple upserts at once
14
+
15
+ Upsert.new(Pet.connection, Pet.table_name).multi do |upsert|
16
+ upsert.row({:name => 'Jerry'}, :breed => 'beagle')
17
+ upsert.row({:name => 'Pierre'}, :breed => 'tabby')
18
+ end
19
+
20
+ ## Wishlist
21
+
22
+ 1. Make `c=c+1` stuff possible with `Upsert.sql('c=c+1')` or something
23
+
24
+ ## Speed
25
+
26
+ ### MySQL
27
+
28
+ (from the tests)
29
+
30
+ Upsert was 47% faster than faking upserts with activerecord-import
31
+ Upsert was 77% faster than find + new/set/save
32
+ Upsert was 84% faster than create + rescue/find/update
33
+ Upsert was 82% faster than find_or_create + update_attributes
34
+
35
+ ### PostgreSQL
36
+
37
+ Upsert was 73% faster than find + new/set/save
38
+ Upsert was 84% faster than find_or_create + update_attributes
39
+ Upsert was 87% faster than create + rescue/find/update
40
+
41
+ ## Supported database drivers
42
+
43
+ 1. [mysql2](https://rubygems.org/gems/mysql2) (e.g. `Upsert.new(Mysql2::Connection.new([...]), :pets)`)
44
+ 2. [sqlite3](https://rubygems.org/gems/sqlite3)
45
+ 3. [pg](https://rubygems.org/gems/pg)
46
+ 4. Any of these wrapped in an ActiveRecord connection adapter (e.g. `Upsert.new(Pet.connection, Pet.table_name)`)
47
+
48
+ ## SQL merge tricks in use
6
49
 
7
50
  ### MySQL
8
51
 
@@ -12,8 +55,6 @@ Finally, all those SQL MERGE tricks codified.
12
55
 
13
56
  ### PostgreSQL
14
57
 
15
- #### Used
16
-
17
58
  # http://www.postgresql.org/docs/current/interactive/plpgsql-control-structures.html#PLPGSQL-ERROR-TRAPPING
18
59
  CREATE TABLE db (a INT PRIMARY KEY, b TEXT);
19
60
  CREATE FUNCTION merge_db(key INT, data TEXT) RETURNS VOID AS
@@ -41,7 +82,15 @@ Finally, all those SQL MERGE tricks codified.
41
82
  SELECT merge_db(1, 'david');
42
83
  SELECT merge_db(1, 'dennis');
43
84
 
44
- #### Alternatives (not used)
85
+ ### Sqlite
86
+
87
+ # http://stackoverflow.com/questions/2717590/sqlite-upsert-on-duplicate-key-update
88
+ INSERT OR IGNORE INTO visits VALUES ($ip, 0);
89
+ UPDATE visits SET hits = hits + 1 WHERE ip LIKE $ip;
90
+
91
+ ### Unused alternatives
92
+
93
+ #### PostgreSQL
45
94
 
46
95
  # http://stackoverflow.com/questions/1109061/insert-on-duplicate-update-postgresql
47
96
  UPDATE table SET field='C', field2='Z' WHERE id=3;
@@ -61,10 +110,4 @@ Finally, all those SQL MERGE tricks codified.
61
110
  FROM stage_data
62
111
  WHERE NOT EXISTS (SELECT 1 FROM target_data
63
112
  WHERE target_data.key_column = stage_data.key_column)
64
- END;
65
-
66
- ### Sqlite
67
-
68
- # http://stackoverflow.com/questions/2717590/sqlite-upsert-on-duplicate-key-update
69
- INSERT OR IGNORE INTO visits VALUES ($ip, 0);
70
- UPDATE visits SET hits = hits + 1 WHERE ip LIKE $ip;
113
+ END;
data/Rakefile CHANGED
@@ -10,7 +10,7 @@ Rake::TestTask.new(:_test) do |test|
10
10
  end
11
11
 
12
12
  task :test_each_db_adapter do
13
- %w{ mysql2 sqlite pg }.each do |database|
13
+ %w{ mysql2 sqlite pg active_record_connection_adapter }.each do |database|
14
14
  puts
15
15
  puts "#{'*'*10} Running #{database} tests"
16
16
  puts
data/lib/upsert.rb CHANGED
@@ -1,4 +1,7 @@
1
+ require 'bigdecimal'
2
+
1
3
  require 'upsert/version'
4
+ require 'upsert/binary'
2
5
  require 'upsert/buffer'
3
6
  require 'upsert/quoter'
4
7
  require 'upsert/row'
@@ -7,35 +10,55 @@ require 'upsert/buffer/pg_connection'
7
10
  require 'upsert/buffer/sqlite3_database'
8
11
 
9
12
  class Upsert
10
- INFINITY = 1.0/0
11
- SINGLE_QUOTE = %{'}
12
- DOUBLE_QUOTE = %{"}
13
- BACKTICK = %{`}
13
+ class << self
14
+ # @param [String] v A string containing binary data that should be inserted/escaped as such.
15
+ #
16
+ # @return [Upsert::Binary]
17
+ def binary(v)
18
+ Binary.new v
19
+ end
20
+ end
14
21
 
22
+ # @private
15
23
  attr_reader :buffer
16
24
 
25
+ # @param [Mysql2::Client,Sqlite3::Database,PG::Connection,#raw_connection] connection A supported database connection.
26
+ # @param [String,Symbol] table_name The name of the table into which you will be upserting.
17
27
  def initialize(connection, table_name)
18
28
  @multi_mutex = Mutex.new
19
29
  @buffer = Buffer.for connection, table_name
20
30
  end
21
31
 
32
+ # @param [Hash] selector Key-value pairs that will be used to find or create a row.
33
+ # @param [Hash] document Key-value pairs that will be set on the row, whether it previously existed or not.
34
+ #
35
+ # @return [nil]
36
+ #
37
+ # @example One at a time
38
+ # upsert = Upsert.new Pet.connection, Pet.table_name
39
+ # upsert.row({:name => 'Jerry'}, :breed => 'beagle')
40
+ # upsert.row({:name => 'Pierre'}, :breed => 'tabby')
22
41
  def row(selector, document)
23
42
  buffer.add selector, document
43
+ nil
24
44
  end
25
45
 
26
- def cleanup
27
- buffer.cleanup
28
- end
29
-
30
- def multi(&blk)
46
+ # @yield [Upsert] An +Upsert+ object in "async" mode. You can call #row on it multiple times and it will try to optimize on speed.
47
+ #
48
+ # @return [nil]
49
+ #
50
+ # @example Many at once
51
+ # Upsert.new(Pet.connection, Pet.table_name).multi do |upsert|
52
+ # upsert.row({:name => 'Jerry'}, :breed => 'beagle')
53
+ # upsert.row({:name => 'Pierre'}, :breed => 'tabby')
54
+ # end
55
+ def multi
31
56
  @multi_mutex.synchronize do
32
- begin
33
- buffer.async = true
34
- instance_eval(&blk)
35
- buffer.cleanup
36
- ensure
37
- buffer.async = nil
38
- end
57
+ buffer.async = true
58
+ yield self
59
+ buffer.async = false
60
+ buffer.clear
39
61
  end
62
+ nil
40
63
  end
41
64
  end
@@ -0,0 +1,7 @@
1
+ class Upsert
2
+ # A wrapper class for binary strings so that Upsert knows to escape them as such.
3
+ #
4
+ # Create them with +Upsert.binary(x)+
5
+ class Binary < ::String
6
+ end
7
+ end
data/lib/upsert/buffer.rb CHANGED
@@ -1,11 +1,24 @@
1
1
  class Upsert
2
+ # @private
2
3
  class Buffer
3
4
  class << self
4
5
  def for(connection, table_name)
6
+ if connection.respond_to?(:raw_connection)
7
+ # deal with ActiveRecord::Base.connection or ActiveRecord::Base.connection_pool.checkout
8
+ connection = connection.raw_connection
9
+ end
5
10
  const_get(connection.class.name.gsub(/\W+/, '_')).new connection, table_name
6
11
  end
7
12
  end
8
13
 
14
+ SINGLE_QUOTE = %{'}
15
+ DOUBLE_QUOTE = %{"}
16
+ BACKTICK = %{`}
17
+ E_AND_SINGLE_QUOTE = %{E'}
18
+ X_AND_SINGLE_QUOTE = %{x'}
19
+ USEC_SPRINTF = '%06d'
20
+ ISO8601_DATETIME = '%Y-%m-%d %H:%M:%S' #FIXME ignores timezones i think
21
+
9
22
  attr_reader :connection
10
23
  attr_reader :table_name
11
24
  attr_reader :rows
@@ -22,7 +35,7 @@ class Upsert
22
35
  end
23
36
 
24
37
  def add(selector, document)
25
- rows << Row.new(selector, document)
38
+ rows << Row.new(self, selector, document)
26
39
  if sql = chunk
27
40
  execute sql
28
41
  end
@@ -33,26 +46,5 @@ class Upsert
33
46
  execute sql
34
47
  end
35
48
  end
36
-
37
- def chunk
38
- return if rows.empty?
39
- targets = []
40
- sql = nil
41
- begin
42
- targets << rows.pop
43
- last_sql = sql
44
- sql = compose(targets)
45
- end until rows.empty? or targets.length >= max_targets or sql.length > max_length
46
- if sql.length > max_length
47
- raise if last_sql.nil?
48
- sql = last_sql
49
- rows << targets.pop
50
- end
51
- sql
52
- end
53
-
54
- def cleanup
55
- clear
56
- end
57
49
  end
58
50
  end
@@ -1,16 +1,20 @@
1
1
  class Upsert
2
2
  class Buffer
3
+ # @private
3
4
  class Mysql2_Client < Buffer
4
- def compose(targets)
5
- columns = targets.first.columns
6
- row_inserts = targets.map { |row| row.inserts }
7
- column_tautologies = columns.map do |k|
8
- [ quote_ident(k), "VALUES(#{quote_ident(k)})" ].join('=')
5
+ include Quoter
6
+
7
+ def chunk
8
+ return false if rows.empty?
9
+ take = rows.length
10
+ until take == 1 or fits_in_single_query?(take)
11
+ take -= 1
9
12
  end
10
- sql = <<-EOS
11
- INSERT INTO "#{table_name}" (#{quote_idents(columns)}) VALUES (#{row_inserts.map { |row_insert| quote_values(row_insert) }.join('),(') })
12
- ON DUPLICATE KEY UPDATE #{column_tautologies.join(',')};
13
- EOS
13
+ if async? and not maximal?(take)
14
+ return false
15
+ end
16
+ sql = sql take
17
+ @rows = rows.drop(take)
14
18
  sql
15
19
  end
16
20
 
@@ -18,30 +22,109 @@ EOS
18
22
  connection.query sql
19
23
  end
20
24
 
21
- def max_targets
22
- INFINITY
25
+ def fits_in_single_query?(take)
26
+ sql_length(take) <= max_sql_length
23
27
  end
24
28
 
25
- def max_length
26
- @max_length ||= connection.query("SHOW VARIABLES LIKE 'max_allowed_packet'", :as => :hash).first['Value'].to_i
29
+ def maximal?(take)
30
+ sql_length(take) >= max_sql_length
27
31
  end
28
32
 
29
- include Quoter
33
+ def columns
34
+ @columns ||= rows.first.columns
35
+ end
36
+
37
+ def insert_part
38
+ @insert_part ||= %{INSERT INTO "#{table_name}" (#{quote_idents(columns)}) VALUES }
39
+ end
40
+
41
+ def update_part
42
+ @update_part ||= begin
43
+ updaters = columns.map do |k|
44
+ qk = quote_ident k
45
+ [ qk, "VALUES(#{qk})" ].join('=')
46
+ end.join(',')
47
+ %{ ON DUPLICATE KEY UPDATE #{updaters}}
48
+ end
49
+ end
50
+
51
+ # where 2 is the parens
52
+ def static_sql_length
53
+ @static_sql_length ||= insert_part.length + update_part.length + 2
54
+ end
55
+
56
+ # where 3 is parens and comma
57
+ def variable_sql_length(take)
58
+ rows.first(take).inject(0) { |sum, row| sum + row.values_sql_length + 3 }
59
+ end
60
+
61
+ def sql_length(take)
62
+ static_sql_length + variable_sql_length(take)
63
+ end
30
64
 
31
- def quote_value(v)
65
+ def sql(take)
66
+ all_value_sql = rows.first(take).map { |row| row.values_sql }
67
+ [ insert_part, '(', all_value_sql.join('),('), ')', update_part ].join
68
+ end
69
+
70
+ def max_sql_length
71
+ @max_sql_length ||= connection.query("SHOW VARIABLES LIKE 'max_allowed_packet'", :as => :hash).first['Value'].to_i
72
+ end
73
+
74
+ def quoted_value_length(v)
32
75
  case v
33
76
  when NilClass
34
- 'NULL'
35
- when String, Symbol
36
- SINGLE_QUOTE + connection.escape(v.to_s) + SINGLE_QUOTE
77
+ 4
78
+ when TrueClass
79
+ 4
80
+ when FalseClass
81
+ 5
82
+ when BigDecimal
83
+ v.to_s('F').length
84
+ when Upsert::Binary
85
+ # conservative
86
+ v.length * 2 + 3
87
+ when Numeric
88
+ v.to_s.length
89
+ when String
90
+ # conservative
91
+ v.length * 2 + 2
92
+ when Time, DateTime
93
+ 24 + 2
94
+ when Date
95
+ 10 + 2
37
96
  else
38
- v
97
+ raise "not sure how to get quoted length of #{v.class}: #{v.inspect}"
39
98
  end
40
99
  end
41
-
100
+
101
+ def quote_boolean(v)
102
+ v ? 'TRUE' : 'FALSE'
103
+ end
104
+
105
+ def quote_string(v)
106
+ SINGLE_QUOTE + connection.escape(v) + SINGLE_QUOTE
107
+ end
108
+
109
+ # We **could** do this, but I don't think it's necessary.
110
+ # def quote_binary(v)
111
+ # X_AND_SINGLE_QUOTE + v.unpack("H*")[0] + SINGLE_QUOTE
112
+ # end
113
+
114
+ # put raw binary straight into sql
115
+ alias_method :quote_binary, :quote_string
116
+
117
+ def quote_time(v)
118
+ quote_string v.strftime(ISO8601_DATETIME)
119
+ end
120
+
42
121
  def quote_ident(k)
43
122
  BACKTICK + connection.escape(k.to_s) + BACKTICK
44
123
  end
124
+
125
+ def quote_big_decimal(v)
126
+ v.to_s('F')
127
+ end
45
128
  end
46
129
  end
47
130
  end
@@ -2,69 +2,63 @@ require 'upsert/buffer/pg_connection/column_definition'
2
2
 
3
3
  class Upsert
4
4
  class Buffer
5
+ # @private
5
6
  class PG_Connection < Buffer
6
- attr_reader :db_function_name
7
+ include Quoter
8
+
9
+ attr_reader :merge_function
7
10
 
8
- def compose(targets)
9
- target = targets.first
10
- unless created_db_function?
11
- create_db_function target
11
+ def chunk
12
+ return false if rows.empty?
13
+ row = rows.shift
14
+ unless merge_function
15
+ create_merge_function row
12
16
  end
13
- hsh = target.to_hash
17
+ hsh = row.to_hash
14
18
  ordered_args = column_definitions.map do |c|
15
- if hsh.has_key? c.name
16
- hsh[c.name]
17
- else
18
- nil
19
- end
19
+ hsh[c.name]
20
20
  end
21
- %{ SELECT #{db_function_name}(#{quote_values(ordered_args)}) }
21
+ %{SELECT #{merge_function}(#{quote_values(ordered_args)})}
22
22
  end
23
23
 
24
24
  def execute(sql)
25
25
  connection.exec sql
26
26
  end
27
27
 
28
- def max_length
29
- INFINITY
28
+ def quote_string(v)
29
+ SINGLE_QUOTE + connection.escape_string(v) + SINGLE_QUOTE
30
30
  end
31
31
 
32
- def max_targets
33
- 1
32
+ def quote_binary(v)
33
+ E_AND_SINGLE_QUOTE + connection.escape_bytea(v) + SINGLE_QUOTE
34
34
  end
35
35
 
36
- include Quoter
37
-
38
- def quote_ident(k)
39
- SINGLE_QUOTE + connection.quote_ident(k) + SINGLE_QUOTE
36
+ def quote_time(v)
37
+ quote_string [v.strftime(ISO8601_DATETIME), sprintf(USEC_SPRINTF, v.usec)].join('.')
40
38
  end
41
-
42
- # FIXME escape_bytea with (v, k = nil)
43
- def quote_value(v)
44
- case v
45
- when NilClass
46
- 'NULL'
47
- when String, Symbol
48
- SINGLE_QUOTE + connection.escape_string(v.to_s) + SINGLE_QUOTE
49
- else
50
- v
51
- end
39
+
40
+ def quote_big_decimal(v)
41
+ v.to_s('F')
52
42
  end
53
-
43
+
44
+ def quote_boolean(v)
45
+ v ? 'TRUE' : 'FALSE'
46
+ end
47
+
48
+ def quote_ident(k)
49
+ DOUBLE_QUOTE + connection.quote_ident(k.to_s) + DOUBLE_QUOTE
50
+ end
51
+
54
52
  def column_definitions
55
53
  @column_definitions ||= ColumnDefinition.all(connection, table_name)
56
54
  end
57
55
 
58
56
  private
59
57
 
60
- def created_db_function?
61
- !!@created_db_function_query
62
- end
63
-
64
- def create_db_function(example_row)
65
- @db_function_name = "pg_temp.merge_#{table_name}_#{Kernel.rand(1e11)}"
58
+ def create_merge_function(example_row)
59
+ @merge_function = "pg_temp.merge_#{table_name}_#{Kernel.rand(1e11)}"
66
60
  execute <<-EOS
67
- CREATE FUNCTION #{db_function_name}(#{column_definitions.map { |c| "#{c.name}_input #{c.sql_type} DEFAULT #{c.default || 'NULL'}" }.join(',') }) RETURNS VOID AS
61
+ CREATE FUNCTION #{merge_function}(#{column_definitions.map { |c| "#{c.name}_input #{c.sql_type} DEFAULT #{c.default || 'NULL'}" }.join(',') }) RETURNS VOID AS
68
62
  $$
69
63
  BEGIN
70
64
  LOOP
@@ -87,7 +81,6 @@ END;
87
81
  $$
88
82
  LANGUAGE plpgsql;
89
83
  EOS
90
- @created_db_function_query = true
91
84
  end
92
85
  end
93
86
  end