sequel-pg_bulk_upsert 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: dae595e4106d5d417ee04cc133ec517bb262a258
4
+ data.tar.gz: 8487d7990c2ddc1b532d9ccf73f6b5a6d49d9635
5
+ SHA512:
6
+ metadata.gz: 0b5a2f42be945d76e6dc3a0ba36c0674203c9b4415e6b1860d8d9f6c075ec311bd7afbbce1bd5ba7787d4c2fb45f7c68c674483b29e0f88489ad43e2aa676bf8
7
+ data.tar.gz: 489f39454ff4bfe11acad6816454d0e693821b9454d912a2a9930e6b61597bed98850d12256bab88deec651991fe9410c1a1d56daa2f7c585480f6179e9026ee
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in sequel-pg-bulk-upsert.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Juan Manuel Barreneche
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,83 @@
1
+ # Sequel::PgBulkUpsert
2
+
3
+ This Sequel extension implements `on_duplicate_key_update(*...).multi_insert(*...)` for postgresql. The
4
+ syntax is 100% with the one that Sequel provides for mysql with a couple of caveats.
5
+
6
+ There are different ways for implementing "upsert" in postgresql, some of them discussed in:
7
+
8
+ http://johtopg.blogspot.com.ar/2014/04/upsertisms-in-postgres.html
9
+
10
+ Because we aim to do bulk upserts, this extension uses the one discribed here:
11
+
12
+ http://tapoueh.org/blog/2013/03/15-batch-update
13
+
14
+ It consists in 3 parts:
15
+
16
+ 1. Create a temp table with the target table structure
17
+
18
+ 2. Fill the temp table with the rows that are going to be "upserted"
19
+
20
+ 3. Updates the records that exist in the target table with the temp table information, then, by doing an
21
+ "anti-join" between the temp table and the target table, the records that didn't exist are inserted.
22
+
23
+ The resulting SQL looks like this:
24
+
25
+ ```SQL
26
+ WITH "update_cte" AS
27
+ (UPDATE "<target_table>"
28
+ SET "updatable_column" = "<temp_table>"."updatable_column"
29
+ FROM "<temp_table>"
30
+ WHERE ("<target_table>"."id" = "<temp_table>"."id")
31
+ RETURNING "<target_table>"."id")
32
+ INSERT INTO "<target_table>" ("updatable_column", "insertable_column")
33
+ SELECT "updatable_column", "insertable_column"
34
+ FROM "<temp_table>" LEFT JOIN "update_cte" USING ("id")
35
+ WHERE ("update_cte"."id" IS NULL)
36
+ RETURNING "<target_table>"."id"
37
+ ```
38
+
39
+ ## Caveats
40
+
41
+ 1. The target table **must** have a primary key (this is the key used to decide when a record "exist"), it
42
+ won't work for any other unique constraint in the target table.
43
+
44
+ 2. This strategy for upsert isn't suitable for concurrent upserts on the same table. Because the concurrent
45
+ inserts will see the same table, and neither will do an update on the others records, which will result in
46
+ a duplicate key error.
47
+
48
+
49
+ ## Installation
50
+
51
+ Add this line to your application's Gemfile:
52
+
53
+ gem 'sequel-pg-bulk-upsert'
54
+
55
+ And then execute:
56
+
57
+ $ bundle
58
+
59
+ Or install it yourself as:
60
+
61
+ $ gem install sequel-pg-bulk-upsert
62
+
63
+ ## Usage
64
+
65
+ ```ruby
66
+ require 'sequel/pg_bulk_upsert'
67
+
68
+ # This can also be done for a specify dataset
69
+ DB.extension(:pg_bulk_upsert)
70
+
71
+ DB[:target].on_duplicate_key_update(:column1, :column2).multi_insert([
72
+ {column1: '1', column2: '2', column3: '3'},
73
+ {column1: '4', column2: '4', column3: '4'}
74
+ ])
75
+ ```
76
+
77
+ ## Contributing
78
+
79
+ 1. Fork it
80
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
81
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
82
+ 4. Push to the branch (`git push origin my-new-feature`)
83
+ 5. Create new Pull Request
@@ -0,0 +1,11 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new :test do |t|
6
+ t.libs << "test"
7
+ # t.libs << "test"
8
+ t.pattern = "test/**/*_test.rb"
9
+ end
10
+
11
+ task default: %w[test]
@@ -0,0 +1 @@
1
+ require 'sequel-pg_bulk_upsert/version'
@@ -0,0 +1,5 @@
1
+ module Sequel
2
+ module PgBulkUpsert
3
+ VERSION = "0.1.0"
4
+ end
5
+ end
@@ -0,0 +1,77 @@
1
+ require "sequel"
2
+
3
+ module Sequel
4
+ Dataset::NON_SQL_OPTIONS << :on_duplicate_key_update
5
+
6
+ module PgBulkUpsert
7
+ def on_duplicate_key_update(*args)
8
+ clone(on_duplicate_key_update: args)
9
+ end
10
+
11
+ def multi_insert_sql(columns, values)
12
+ if duplicate_keys = @opts[:on_duplicate_key_update]
13
+ from_table_name = @opts[:from].first # XXX How: can we ensure theres only one?
14
+ temp_table_name = "#{from_table_name}_tmp_#{Time.now.strftime("%s%L")}"
15
+ upsert_on = @db.primary_key(from_table_name) or raise "missing primary_key for #{from_table_name}"
16
+
17
+ [
18
+ create_temp_table_from_existing_sql(from_table_name, temp_table_name),
19
+ multi_insert_without_duplicates_sqls(temp_table_name, columns, values),
20
+ upsert_from_to_sql(temp_table_name, from_table_name, upsert_on.to_sym, duplicate_keys, columns)
21
+ ].flatten
22
+ else
23
+ super
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def multi_insert_without_duplicates_sqls(target_table, columns, values)
30
+ clone(on_duplicate_key_update: nil).
31
+ from(target_table).
32
+ multi_insert_sql(columns, values)
33
+ end
34
+
35
+ def upsert_from_to_sql(source_name, target_name, join_on, update_columns, insert_columns)
36
+ target = @db.from(target_name)
37
+ source = @db.from(source_name)
38
+ source_ds = source.
39
+ select(*insert_columns).
40
+ left_join(:update_cte, [join_on]).
41
+ where(Sequel.qualify(:update_cte, join_on) => nil)
42
+
43
+ update_hash = update_columns.each_with_object({}) do |column, hash|
44
+ hash[column] = Sequel.qualify(source_name, column)
45
+ end
46
+
47
+ target.with(:update_cte,
48
+ target.from(target_name, source_name).
49
+ where(Sequel.qualify(target_name, join_on) => Sequel.qualify(source_name, join_on)).
50
+ returning(Sequel.qualify(target_name, join_on)).with_sql(:update_sql, update_hash)).
51
+ returning(Sequel.qualify(target_name, join_on)).insert_sql(insert_columns, source_ds)
52
+ end
53
+
54
+ def create_temp_table_from_existing_sql(base_table, temp_table_name)
55
+ columns_information = @db.schema(base_table)
56
+
57
+ generator = @db.create_table_generator do
58
+ columns_information.each do |col, data|
59
+ column col, data[:db_type], primary_key: data[:primary]
60
+ end
61
+ end
62
+
63
+ @db.send :create_table_sql, temp_table_name, generator, temp: true, on_commit: :drop
64
+ end
65
+
66
+ end
67
+
68
+ module PgDatabaseBulkUpsert
69
+ def self.extended(db)
70
+ db.extend_datasets(PgBulkUpsert)
71
+ end
72
+ end
73
+
74
+ Dataset.register_extension :pg_bulk_upsert, PgBulkUpsert
75
+ Database.register_extension :pg_bulk_upsert, PgDatabaseBulkUpsert
76
+ end
77
+
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'sequel-pg_bulk_upsert/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "sequel-pg_bulk_upsert"
8
+ gem.version = Sequel::PgBulkUpsert::VERSION
9
+ gem.authors = ["Juan Manuel Barreneche"]
10
+ gem.email = ["jbarreneche@restorando.com"]
11
+ gem.description = %q{Implementation `on_duplicate_key_update(*...).multi_insert(*...)` for postgresql}
12
+ gem.summary = %q{Implementation `on_duplicate_key_update(*...).multi_insert(*...)` for postgresql}
13
+ gem.homepage = "https://github.com/restorando/sequel-pg_bulk_upsert"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.add_dependency "sequel", ">= 4.15.0"
19
+ gem.add_dependency "pg"
20
+ gem.add_development_dependency "minitest"
21
+ gem.add_development_dependency "pry"
22
+ gem.require_paths = ["lib"]
23
+ end
@@ -0,0 +1,95 @@
1
+ require 'test_helper'
2
+
3
+ class DatasetExtensionTest < MiniTest::Test
4
+
5
+ def setup
6
+ @db = Sequel.
7
+ connect('mock://postgres').
8
+ extension(:pg_bulk_upsert)
9
+
10
+ def @db.schema(table)
11
+ if table == :target
12
+ [
13
+ [:id, { primary: true, db_type: :serial }],
14
+ [:updatable_column, { db_type: :text }],
15
+ [:insertable_column, { db_type: :text }]
16
+ ]
17
+ end
18
+ end
19
+
20
+ @db.create_table(:target) do
21
+ primary_key :id
22
+ String :updatable_column
23
+ String :insertable_column
24
+ end
25
+
26
+ @ds = @db.dataset.from(:target)
27
+
28
+ @upsert_columns = [:updatable_column, :insertable_column]
29
+ @upsert_data = [%w[foo bar]]
30
+
31
+ # Clear setup sqls
32
+ @db.sqls
33
+ end
34
+
35
+ def test_temp_table_creation
36
+ sqls = do_upsert
37
+ temp_table_name = extract_temp_table_name(sqls[1])
38
+
39
+ temp_table_creation = sqls[1]
40
+ assert_equal strip_heredoc(<<-SQL).gsub("\n", ""), temp_table_creation
41
+ CREATE TEMPORARY TABLE "#{temp_table_name}"
42
+ ("id" serial PRIMARY KEY, "updatable_column" text, "insertable_column" text) ON COMMIT DROP
43
+ SQL
44
+ end
45
+
46
+ def test_temp_table_batch_loading
47
+ sqls = do_upsert
48
+ temp_table_name = extract_temp_table_name(sqls[1])
49
+ temp_table_insert = @db.from(temp_table_name).multi_insert_sql(@upsert_columns, @upsert_data)
50
+
51
+ assert_equal temp_table_insert, [sqls[2]]
52
+ end
53
+
54
+ def test_upsert_from_temp
55
+ sqls = do_upsert
56
+ temp_table_name = extract_temp_table_name(sqls[1])
57
+ temp_table_insert = strip_heredoc(<<-SQL).gsub("\n", "")
58
+ WITH "update_cte" AS
59
+ (UPDATE "target" SET "updatable_column" = "#{temp_table_name}"."updatable_column"
60
+ FROM "#{temp_table_name}" WHERE ("target"."id" = "#{temp_table_name}"."id") RETURNING "target"."id")
61
+ INSERT INTO "target" ("updatable_column", "insertable_column")
62
+ SELECT "updatable_column", "insertable_column"
63
+ FROM "#{temp_table_name}" LEFT JOIN "update_cte" USING ("id")
64
+ WHERE ("update_cte"."id" IS NULL)
65
+ RETURNING "target"."id"
66
+ SQL
67
+
68
+ assert_equal temp_table_insert, sqls[3]
69
+ end
70
+
71
+ def test_upsert_inside_transaction
72
+ sqls = do_upsert
73
+
74
+ assert_equal "BEGIN", sqls[0]
75
+ assert_equal "COMMIT", sqls[4]
76
+ end
77
+
78
+ private
79
+
80
+ def do_upsert(columns = @upsert_columns, data = @upsert_data)
81
+ @ds.on_duplicate_key_update(:updatable_column).import(columns, data)
82
+
83
+ @db.sqls
84
+ end
85
+
86
+ def extract_temp_table_name(create_sql)
87
+ create_sql[/CREATE TEMPORARY TABLE "(\w+)"/i, 1]
88
+ end
89
+
90
+ def strip_heredoc(heredoc)
91
+ indent = (heredoc.scan(/^[ \t]*(?=\S)/).min || "").size
92
+ heredoc.gsub(/^[ \t]{#{indent}}/, '')
93
+ end
94
+
95
+ end
@@ -0,0 +1,5 @@
1
+ require 'bundler/setup'
2
+ require 'minitest/autorun'
3
+
4
+ require 'sequel'
5
+ require 'sequel-pg_bulk_upsert'
metadata ADDED
@@ -0,0 +1,113 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sequel-pg_bulk_upsert
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Juan Manuel Barreneche
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: sequel
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 4.15.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 4.15.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: pg
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: Implementation `on_duplicate_key_update(*...).multi_insert(*...)` for
70
+ postgresql
71
+ email:
72
+ - jbarreneche@restorando.com
73
+ executables: []
74
+ extensions: []
75
+ extra_rdoc_files: []
76
+ files:
77
+ - ".gitignore"
78
+ - Gemfile
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - lib/sequel-pg_bulk_upsert.rb
83
+ - lib/sequel-pg_bulk_upsert/version.rb
84
+ - lib/sequel/extensions/pg_bulk_upsert.rb
85
+ - sequel-pg_bulk_upsert.gemspec
86
+ - test/lib/dataset_extension_test.rb
87
+ - test/test_helper.rb
88
+ homepage: https://github.com/restorando/sequel-pg_bulk_upsert
89
+ licenses: []
90
+ metadata: {}
91
+ post_install_message:
92
+ rdoc_options: []
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ requirements: []
106
+ rubyforge_project:
107
+ rubygems_version: 2.2.2
108
+ signing_key:
109
+ specification_version: 4
110
+ summary: Implementation `on_duplicate_key_update(*...).multi_insert(*...)` for postgresql
111
+ test_files:
112
+ - test/lib/dataset_extension_test.rb
113
+ - test/test_helper.rb