upsert 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.yardopts ADDED
@@ -0,0 +1,2 @@
1
+ --no-private
2
+ --readme README.md
data/README.md CHANGED
@@ -2,7 +2,50 @@
2
2
 
3
3
  Finally, all those SQL MERGE tricks codified.
4
4
 
5
- ## Supported databases
5
+ ## Usage
6
+
7
+ ### One at a time
8
+
9
+ upsert = Upsert.new Pet.connection, Pet.table_name
10
+ upsert.row({:name => 'Jerry'}, :breed => 'beagle')
11
+ upsert.row({:name => 'Pierre'}, :breed => 'tabby')
12
+
13
+ ### Multiple upserts at once
14
+
15
+ Upsert.new(Pet.connection, Pet.table_name).multi do |upsert|
16
+ upsert.row({:name => 'Jerry'}, :breed => 'beagle')
17
+ upsert.row({:name => 'Pierre'}, :breed => 'tabby')
18
+ end
19
+
20
+ ## Wishlist
21
+
22
+ 1. Make `c=c+1` stuff possible with `Upsert.sql('c=c+1')` or something
23
+
24
+ ## Speed
25
+
26
+ ### MySQL
27
+
28
+ (from the tests)
29
+
30
+ Upsert was 47% faster than faking upserts with activerecord-import
31
+ Upsert was 77% faster than find + new/set/save
32
+ Upsert was 84% faster than create + rescue/find/update
33
+ Upsert was 82% faster than find_or_create + update_attributes
34
+
35
+ ### PostgreSQL
36
+
37
+ Upsert was 73% faster than find + new/set/save
38
+ Upsert was 84% faster than find_or_create + update_attributes
39
+ Upsert was 87% faster than create + rescue/find/update
40
+
41
+ ## Supported database drivers
42
+
43
+ 1. [mysql2](https://rubygems.org/gems/mysql2) (e.g. `Upsert.new(Mysql2::Connection.new([...]), :pets)`)
44
+ 2. [sqlite3](https://rubygems.org/gems/sqlite3)
45
+ 3. [pg](https://rubygems.org/gems/pg)
46
+ 4. Any of these wrapped in an ActiveRecord connection adapter (e.g. `Upsert.new(Pet.connection, Pet.table_name)`)
47
+
48
+ ## SQL merge tricks in use
6
49
 
7
50
  ### MySQL
8
51
 
@@ -12,8 +55,6 @@ Finally, all those SQL MERGE tricks codified.
12
55
 
13
56
  ### PostgreSQL
14
57
 
15
- #### Used
16
-
17
58
  # http://www.postgresql.org/docs/current/interactive/plpgsql-control-structures.html#PLPGSQL-ERROR-TRAPPING
18
59
  CREATE TABLE db (a INT PRIMARY KEY, b TEXT);
19
60
  CREATE FUNCTION merge_db(key INT, data TEXT) RETURNS VOID AS
@@ -41,7 +82,15 @@ Finally, all those SQL MERGE tricks codified.
41
82
  SELECT merge_db(1, 'david');
42
83
  SELECT merge_db(1, 'dennis');
43
84
 
44
- #### Alternatives (not used)
85
+ ### Sqlite
86
+
87
+ # http://stackoverflow.com/questions/2717590/sqlite-upsert-on-duplicate-key-update
88
+ INSERT OR IGNORE INTO visits VALUES ($ip, 0);
89
+ UPDATE visits SET hits = hits + 1 WHERE ip LIKE $ip;
90
+
91
+ ### Unused alternatives
92
+
93
+ #### PostgreSQL
45
94
 
46
95
  # http://stackoverflow.com/questions/1109061/insert-on-duplicate-update-postgresql
47
96
  UPDATE table SET field='C', field2='Z' WHERE id=3;
@@ -61,10 +110,4 @@ Finally, all those SQL MERGE tricks codified.
61
110
  FROM stage_data
62
111
  WHERE NOT EXISTS (SELECT 1 FROM target_data
63
112
  WHERE target_data.key_column = stage_data.key_column)
64
- END;
65
-
66
- ### Sqlite
67
-
68
- # http://stackoverflow.com/questions/2717590/sqlite-upsert-on-duplicate-key-update
69
- INSERT OR IGNORE INTO visits VALUES ($ip, 0);
70
- UPDATE visits SET hits = hits + 1 WHERE ip LIKE $ip;
113
+ END;
data/Rakefile CHANGED
@@ -10,7 +10,7 @@ Rake::TestTask.new(:_test) do |test|
10
10
  end
11
11
 
12
12
  task :test_each_db_adapter do
13
- %w{ mysql2 sqlite pg }.each do |database|
13
+ %w{ mysql2 sqlite pg active_record_connection_adapter }.each do |database|
14
14
  puts
15
15
  puts "#{'*'*10} Running #{database} tests"
16
16
  puts
data/lib/upsert.rb CHANGED
@@ -1,4 +1,7 @@
1
+ require 'bigdecimal'
2
+
1
3
  require 'upsert/version'
4
+ require 'upsert/binary'
2
5
  require 'upsert/buffer'
3
6
  require 'upsert/quoter'
4
7
  require 'upsert/row'
@@ -7,35 +10,55 @@ require 'upsert/buffer/pg_connection'
7
10
  require 'upsert/buffer/sqlite3_database'
8
11
 
9
12
  class Upsert
10
- INFINITY = 1.0/0
11
- SINGLE_QUOTE = %{'}
12
- DOUBLE_QUOTE = %{"}
13
- BACKTICK = %{`}
13
+ class << self
14
+ # @param [String] v A string containing binary data that should be inserted/escaped as such.
15
+ #
16
+ # @return [Upsert::Binary]
17
+ def binary(v)
18
+ Binary.new v
19
+ end
20
+ end
14
21
 
22
+ # @private
15
23
  attr_reader :buffer
16
24
 
25
+ # @param [Mysql2::Client,Sqlite3::Database,PG::Connection,#raw_connection] connection A supported database connection.
26
+ # @param [String,Symbol] table_name The name of the table into which you will be upserting.
17
27
  def initialize(connection, table_name)
18
28
  @multi_mutex = Mutex.new
19
29
  @buffer = Buffer.for connection, table_name
20
30
  end
21
31
 
32
+ # @param [Hash] selector Key-value pairs that will be used to find or create a row.
33
+ # @param [Hash] document Key-value pairs that will be set on the row, whether it previously existed or not.
34
+ #
35
+ # @return [nil]
36
+ #
37
+ # @example One at a time
38
+ # upsert = Upsert.new Pet.connection, Pet.table_name
39
+ # upsert.row({:name => 'Jerry'}, :breed => 'beagle')
40
+ # upsert.row({:name => 'Pierre'}, :breed => 'tabby')
22
41
  def row(selector, document)
23
42
  buffer.add selector, document
43
+ nil
24
44
  end
25
45
 
26
- def cleanup
27
- buffer.cleanup
28
- end
29
-
30
- def multi(&blk)
46
+ # @yield [Upsert] An +Upsert+ object in "async" mode. You can call #row on it multiple times and it will try to optimize on speed.
47
+ #
48
+ # @return [nil]
49
+ #
50
+ # @example Many at once
51
+ # Upsert.new(Pet.connection, Pet.table_name).multi do |upsert|
52
+ # upsert.row({:name => 'Jerry'}, :breed => 'beagle')
53
+ # upsert.row({:name => 'Pierre'}, :breed => 'tabby')
54
+ # end
55
+ def multi
31
56
  @multi_mutex.synchronize do
32
- begin
33
- buffer.async = true
34
- instance_eval(&blk)
35
- buffer.cleanup
36
- ensure
37
- buffer.async = nil
38
- end
57
+ buffer.async = true
58
+ yield self
59
+ buffer.async = false
60
+ buffer.clear
39
61
  end
62
+ nil
40
63
  end
41
64
  end
@@ -0,0 +1,7 @@
1
+ class Upsert
2
+ # A wrapper class for binary strings so that Upsert knows to escape them as such.
3
+ #
4
+ # Create them with +Upsert.binary(x)+
5
+ class Binary < ::String
6
+ end
7
+ end
data/lib/upsert/buffer.rb CHANGED
@@ -1,11 +1,24 @@
1
1
  class Upsert
2
+ # @private
2
3
  class Buffer
3
4
  class << self
4
5
  def for(connection, table_name)
6
+ if connection.respond_to?(:raw_connection)
7
+ # deal with ActiveRecord::Base.connection or ActiveRecord::Base.connection_pool.checkout
8
+ connection = connection.raw_connection
9
+ end
5
10
  const_get(connection.class.name.gsub(/\W+/, '_')).new connection, table_name
6
11
  end
7
12
  end
8
13
 
14
+ SINGLE_QUOTE = %{'}
15
+ DOUBLE_QUOTE = %{"}
16
+ BACKTICK = %{`}
17
+ E_AND_SINGLE_QUOTE = %{E'}
18
+ X_AND_SINGLE_QUOTE = %{x'}
19
+ USEC_SPRINTF = '%06d'
20
+ ISO8601_DATETIME = '%Y-%m-%d %H:%M:%S' #FIXME ignores timezones i think
21
+
9
22
  attr_reader :connection
10
23
  attr_reader :table_name
11
24
  attr_reader :rows
@@ -22,7 +35,7 @@ class Upsert
22
35
  end
23
36
 
24
37
  def add(selector, document)
25
- rows << Row.new(selector, document)
38
+ rows << Row.new(self, selector, document)
26
39
  if sql = chunk
27
40
  execute sql
28
41
  end
@@ -33,26 +46,5 @@ class Upsert
33
46
  execute sql
34
47
  end
35
48
  end
36
-
37
- def chunk
38
- return if rows.empty?
39
- targets = []
40
- sql = nil
41
- begin
42
- targets << rows.pop
43
- last_sql = sql
44
- sql = compose(targets)
45
- end until rows.empty? or targets.length >= max_targets or sql.length > max_length
46
- if sql.length > max_length
47
- raise if last_sql.nil?
48
- sql = last_sql
49
- rows << targets.pop
50
- end
51
- sql
52
- end
53
-
54
- def cleanup
55
- clear
56
- end
57
49
  end
58
50
  end
@@ -1,16 +1,20 @@
1
1
  class Upsert
2
2
  class Buffer
3
+ # @private
3
4
  class Mysql2_Client < Buffer
4
- def compose(targets)
5
- columns = targets.first.columns
6
- row_inserts = targets.map { |row| row.inserts }
7
- column_tautologies = columns.map do |k|
8
- [ quote_ident(k), "VALUES(#{quote_ident(k)})" ].join('=')
5
+ include Quoter
6
+
7
+ def chunk
8
+ return false if rows.empty?
9
+ take = rows.length
10
+ until take == 1 or fits_in_single_query?(take)
11
+ take -= 1
9
12
  end
10
- sql = <<-EOS
11
- INSERT INTO "#{table_name}" (#{quote_idents(columns)}) VALUES (#{row_inserts.map { |row_insert| quote_values(row_insert) }.join('),(') })
12
- ON DUPLICATE KEY UPDATE #{column_tautologies.join(',')};
13
- EOS
13
+ if async? and not maximal?(take)
14
+ return false
15
+ end
16
+ sql = sql take
17
+ @rows = rows.drop(take)
14
18
  sql
15
19
  end
16
20
 
@@ -18,30 +22,109 @@ EOS
18
22
  connection.query sql
19
23
  end
20
24
 
21
- def max_targets
22
- INFINITY
25
+ def fits_in_single_query?(take)
26
+ sql_length(take) <= max_sql_length
23
27
  end
24
28
 
25
- def max_length
26
- @max_length ||= connection.query("SHOW VARIABLES LIKE 'max_allowed_packet'", :as => :hash).first['Value'].to_i
29
+ def maximal?(take)
30
+ sql_length(take) >= max_sql_length
27
31
  end
28
32
 
29
- include Quoter
33
+ def columns
34
+ @columns ||= rows.first.columns
35
+ end
36
+
37
+ def insert_part
38
+ @insert_part ||= %{INSERT INTO "#{table_name}" (#{quote_idents(columns)}) VALUES }
39
+ end
40
+
41
+ def update_part
42
+ @update_part ||= begin
43
+ updaters = columns.map do |k|
44
+ qk = quote_ident k
45
+ [ qk, "VALUES(#{qk})" ].join('=')
46
+ end.join(',')
47
+ %{ ON DUPLICATE KEY UPDATE #{updaters}}
48
+ end
49
+ end
50
+
51
+ # where 2 is the parens
52
+ def static_sql_length
53
+ @static_sql_length ||= insert_part.length + update_part.length + 2
54
+ end
55
+
56
+ # where 3 is parens and comma
57
+ def variable_sql_length(take)
58
+ rows.first(take).inject(0) { |sum, row| sum + row.values_sql_length + 3 }
59
+ end
60
+
61
+ def sql_length(take)
62
+ static_sql_length + variable_sql_length(take)
63
+ end
30
64
 
31
- def quote_value(v)
65
+ def sql(take)
66
+ all_value_sql = rows.first(take).map { |row| row.values_sql }
67
+ [ insert_part, '(', all_value_sql.join('),('), ')', update_part ].join
68
+ end
69
+
70
+ def max_sql_length
71
+ @max_sql_length ||= connection.query("SHOW VARIABLES LIKE 'max_allowed_packet'", :as => :hash).first['Value'].to_i
72
+ end
73
+
74
+ def quoted_value_length(v)
32
75
  case v
33
76
  when NilClass
34
- 'NULL'
35
- when String, Symbol
36
- SINGLE_QUOTE + connection.escape(v.to_s) + SINGLE_QUOTE
77
+ 4
78
+ when TrueClass
79
+ 4
80
+ when FalseClass
81
+ 5
82
+ when BigDecimal
83
+ v.to_s('F').length
84
+ when Upsert::Binary
85
+ # conservative
86
+ v.length * 2 + 3
87
+ when Numeric
88
+ v.to_s.length
89
+ when String
90
+ # conservative
91
+ v.length * 2 + 2
92
+ when Time, DateTime
93
+ 24 + 2
94
+ when Date
95
+ 10 + 2
37
96
  else
38
- v
97
+ raise "not sure how to get quoted length of #{v.class}: #{v.inspect}"
39
98
  end
40
99
  end
41
-
100
+
101
+ def quote_boolean(v)
102
+ v ? 'TRUE' : 'FALSE'
103
+ end
104
+
105
+ def quote_string(v)
106
+ SINGLE_QUOTE + connection.escape(v) + SINGLE_QUOTE
107
+ end
108
+
109
+ # We **could** do this, but I don't think it's necessary.
110
+ # def quote_binary(v)
111
+ # X_AND_SINGLE_QUOTE + v.unpack("H*")[0] + SINGLE_QUOTE
112
+ # end
113
+
114
+ # put raw binary straight into sql
115
+ alias_method :quote_binary, :quote_string
116
+
117
+ def quote_time(v)
118
+ quote_string v.strftime(ISO8601_DATETIME)
119
+ end
120
+
42
121
  def quote_ident(k)
43
122
  BACKTICK + connection.escape(k.to_s) + BACKTICK
44
123
  end
124
+
125
+ def quote_big_decimal(v)
126
+ v.to_s('F')
127
+ end
45
128
  end
46
129
  end
47
130
  end
@@ -2,69 +2,63 @@ require 'upsert/buffer/pg_connection/column_definition'
2
2
 
3
3
  class Upsert
4
4
  class Buffer
5
+ # @private
5
6
  class PG_Connection < Buffer
6
- attr_reader :db_function_name
7
+ include Quoter
8
+
9
+ attr_reader :merge_function
7
10
 
8
- def compose(targets)
9
- target = targets.first
10
- unless created_db_function?
11
- create_db_function target
11
+ def chunk
12
+ return false if rows.empty?
13
+ row = rows.shift
14
+ unless merge_function
15
+ create_merge_function row
12
16
  end
13
- hsh = target.to_hash
17
+ hsh = row.to_hash
14
18
  ordered_args = column_definitions.map do |c|
15
- if hsh.has_key? c.name
16
- hsh[c.name]
17
- else
18
- nil
19
- end
19
+ hsh[c.name]
20
20
  end
21
- %{ SELECT #{db_function_name}(#{quote_values(ordered_args)}) }
21
+ %{SELECT #{merge_function}(#{quote_values(ordered_args)})}
22
22
  end
23
23
 
24
24
  def execute(sql)
25
25
  connection.exec sql
26
26
  end
27
27
 
28
- def max_length
29
- INFINITY
28
+ def quote_string(v)
29
+ SINGLE_QUOTE + connection.escape_string(v) + SINGLE_QUOTE
30
30
  end
31
31
 
32
- def max_targets
33
- 1
32
+ def quote_binary(v)
33
+ E_AND_SINGLE_QUOTE + connection.escape_bytea(v) + SINGLE_QUOTE
34
34
  end
35
35
 
36
- include Quoter
37
-
38
- def quote_ident(k)
39
- SINGLE_QUOTE + connection.quote_ident(k) + SINGLE_QUOTE
36
+ def quote_time(v)
37
+ quote_string [v.strftime(ISO8601_DATETIME), sprintf(USEC_SPRINTF, v.usec)].join('.')
40
38
  end
41
-
42
- # FIXME escape_bytea with (v, k = nil)
43
- def quote_value(v)
44
- case v
45
- when NilClass
46
- 'NULL'
47
- when String, Symbol
48
- SINGLE_QUOTE + connection.escape_string(v.to_s) + SINGLE_QUOTE
49
- else
50
- v
51
- end
39
+
40
+ def quote_big_decimal(v)
41
+ v.to_s('F')
52
42
  end
53
-
43
+
44
+ def quote_boolean(v)
45
+ v ? 'TRUE' : 'FALSE'
46
+ end
47
+
48
+ def quote_ident(k)
49
+ DOUBLE_QUOTE + connection.quote_ident(k.to_s) + DOUBLE_QUOTE
50
+ end
51
+
54
52
  def column_definitions
55
53
  @column_definitions ||= ColumnDefinition.all(connection, table_name)
56
54
  end
57
55
 
58
56
  private
59
57
 
60
- def created_db_function?
61
- !!@created_db_function_query
62
- end
63
-
64
- def create_db_function(example_row)
65
- @db_function_name = "pg_temp.merge_#{table_name}_#{Kernel.rand(1e11)}"
58
+ def create_merge_function(example_row)
59
+ @merge_function = "pg_temp.merge_#{table_name}_#{Kernel.rand(1e11)}"
66
60
  execute <<-EOS
67
- CREATE FUNCTION #{db_function_name}(#{column_definitions.map { |c| "#{c.name}_input #{c.sql_type} DEFAULT #{c.default || 'NULL'}" }.join(',') }) RETURNS VOID AS
61
+ CREATE FUNCTION #{merge_function}(#{column_definitions.map { |c| "#{c.name}_input #{c.sql_type} DEFAULT #{c.default || 'NULL'}" }.join(',') }) RETURNS VOID AS
68
62
  $$
69
63
  BEGIN
70
64
  LOOP
@@ -87,7 +81,6 @@ END;
87
81
  $$
88
82
  LANGUAGE plpgsql;
89
83
  EOS
90
- @created_db_function_query = true
91
84
  end
92
85
  end
93
86
  end