sequel-pg_bulk_upsert 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: dae595e4106d5d417ee04cc133ec517bb262a258
4
+ data.tar.gz: 8487d7990c2ddc1b532d9ccf73f6b5a6d49d9635
5
+ SHA512:
6
+ metadata.gz: 0b5a2f42be945d76e6dc3a0ba36c0674203c9b4415e6b1860d8d9f6c075ec311bd7afbbce1bd5ba7787d4c2fb45f7c68c674483b29e0f88489ad43e2aa676bf8
7
+ data.tar.gz: 489f39454ff4bfe11acad6816454d0e693821b9454d912a2a9930e6b61597bed98850d12256bab88deec651991fe9410c1a1d56daa2f7c585480f6179e9026ee
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in sequel-pg-bulk-upsert.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Juan Manuel Barreneche
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,83 @@
1
+ # Sequel::PgBulkUpsert
2
+
3
+ This Sequel extension implements `on_duplicate_key_update(*...).multi_insert(*...)` for postgresql. The
4
+ syntax is 100% with the one that Sequel provides for mysql with a couple of caveats.
5
+
6
+ There are different ways for implementing "upsert" in postgresql, some of them discussed in:
7
+
8
+ http://johtopg.blogspot.com.ar/2014/04/upsertisms-in-postgres.html
9
+
10
+ Because we aim to do bulk upserts, this extension uses the one discribed here:
11
+
12
+ http://tapoueh.org/blog/2013/03/15-batch-update
13
+
14
+ It consists in 3 parts:
15
+
16
+ 1. Create a temp table with the target table structure
17
+
18
+ 2. Fill the temp table with the rows that are going to be "upserted"
19
+
20
+ 3. Updates the records that exist in the target table with the temp table information, then, by doing an
21
+ "anti-join" between the temp table and the target table, the records that didn't exist are inserted.
22
+
23
+ The resulting SQL looks like this:
24
+
25
+ ```SQL
26
+ WITH "update_cte" AS
27
+ (UPDATE "<target_table>"
28
+ SET "updatable_column" = "<temp_table>"."updatable_column"
29
+ FROM "<temp_table>"
30
+ WHERE ("<target_table>"."id" = "<temp_table>"."id")
31
+ RETURNING "<target_table>"."id")
32
+ INSERT INTO "<target_table>" ("updatable_column", "insertable_column")
33
+ SELECT "updatable_column", "insertable_column"
34
+ FROM "<temp_table>" LEFT JOIN "update_cte" USING ("id")
35
+ WHERE ("update_cte"."id" IS NULL)
36
+ RETURNING "<target_table>"."id"
37
+ ```
38
+
39
+ ## Caveats
40
+
41
+ 1. The target table **must** have a primary key (this is the key used to decide when a record "exist"), it
42
+ won't work for any other unique constraint in the target table.
43
+
44
+ 2. This strategy for upsert isn't suitable for concurrent upserts on the same table. Because the concurrent
45
+ inserts will see the same table, and neither will do an update on the others records, which will result in
46
+ a duplicate key error.
47
+
48
+
49
+ ## Installation
50
+
51
+ Add this line to your application's Gemfile:
52
+
53
+ gem 'sequel-pg-bulk-upsert'
54
+
55
+ And then execute:
56
+
57
+ $ bundle
58
+
59
+ Or install it yourself as:
60
+
61
+ $ gem install sequel-pg-bulk-upsert
62
+
63
+ ## Usage
64
+
65
+ ```ruby
66
+ require 'sequel/pg_bulk_upsert'
67
+
68
+ # This can also be done for a specify dataset
69
+ DB.extension(:pg_bulk_upsert)
70
+
71
+ DB[:target].on_duplicate_key_update(:column1, :column2).multi_insert([
72
+ {column1: '1', column2: '2', column3: '3'},
73
+ {column1: '4', column2: '4', column3: '4'}
74
+ ])
75
+ ```
76
+
77
+ ## Contributing
78
+
79
+ 1. Fork it
80
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
81
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
82
+ 4. Push to the branch (`git push origin my-new-feature`)
83
+ 5. Create new Pull Request
@@ -0,0 +1,11 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new :test do |t|
6
+ t.libs << "test"
7
+ # t.libs << "test"
8
+ t.pattern = "test/**/*_test.rb"
9
+ end
10
+
11
+ task default: %w[test]
@@ -0,0 +1 @@
1
+ require 'sequel-pg_bulk_upsert/version'
@@ -0,0 +1,5 @@
1
+ module Sequel
2
+ module PgBulkUpsert
3
+ VERSION = "0.1.0"
4
+ end
5
+ end
@@ -0,0 +1,77 @@
1
+ require "sequel"
2
+
3
+ module Sequel
4
+ Dataset::NON_SQL_OPTIONS << :on_duplicate_key_update
5
+
6
+ module PgBulkUpsert
7
+ def on_duplicate_key_update(*args)
8
+ clone(on_duplicate_key_update: args)
9
+ end
10
+
11
+ def multi_insert_sql(columns, values)
12
+ if duplicate_keys = @opts[:on_duplicate_key_update]
13
+ from_table_name = @opts[:from].first # XXX How: can we ensure theres only one?
14
+ temp_table_name = "#{from_table_name}_tmp_#{Time.now.strftime("%s%L")}"
15
+ upsert_on = @db.primary_key(from_table_name) or raise "missing primary_key for #{from_table_name}"
16
+
17
+ [
18
+ create_temp_table_from_existing_sql(from_table_name, temp_table_name),
19
+ multi_insert_without_duplicates_sqls(temp_table_name, columns, values),
20
+ upsert_from_to_sql(temp_table_name, from_table_name, upsert_on.to_sym, duplicate_keys, columns)
21
+ ].flatten
22
+ else
23
+ super
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def multi_insert_without_duplicates_sqls(target_table, columns, values)
30
+ clone(on_duplicate_key_update: nil).
31
+ from(target_table).
32
+ multi_insert_sql(columns, values)
33
+ end
34
+
35
+ def upsert_from_to_sql(source_name, target_name, join_on, update_columns, insert_columns)
36
+ target = @db.from(target_name)
37
+ source = @db.from(source_name)
38
+ source_ds = source.
39
+ select(*insert_columns).
40
+ left_join(:update_cte, [join_on]).
41
+ where(Sequel.qualify(:update_cte, join_on) => nil)
42
+
43
+ update_hash = update_columns.each_with_object({}) do |column, hash|
44
+ hash[column] = Sequel.qualify(source_name, column)
45
+ end
46
+
47
+ target.with(:update_cte,
48
+ target.from(target_name, source_name).
49
+ where(Sequel.qualify(target_name, join_on) => Sequel.qualify(source_name, join_on)).
50
+ returning(Sequel.qualify(target_name, join_on)).with_sql(:update_sql, update_hash)).
51
+ returning(Sequel.qualify(target_name, join_on)).insert_sql(insert_columns, source_ds)
52
+ end
53
+
54
+ def create_temp_table_from_existing_sql(base_table, temp_table_name)
55
+ columns_information = @db.schema(base_table)
56
+
57
+ generator = @db.create_table_generator do
58
+ columns_information.each do |col, data|
59
+ column col, data[:db_type], primary_key: data[:primary]
60
+ end
61
+ end
62
+
63
+ @db.send :create_table_sql, temp_table_name, generator, temp: true, on_commit: :drop
64
+ end
65
+
66
+ end
67
+
68
+ module PgDatabaseBulkUpsert
69
+ def self.extended(db)
70
+ db.extend_datasets(PgBulkUpsert)
71
+ end
72
+ end
73
+
74
+ Dataset.register_extension :pg_bulk_upsert, PgBulkUpsert
75
+ Database.register_extension :pg_bulk_upsert, PgDatabaseBulkUpsert
76
+ end
77
+
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'sequel-pg_bulk_upsert/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "sequel-pg_bulk_upsert"
8
+ gem.version = Sequel::PgBulkUpsert::VERSION
9
+ gem.authors = ["Juan Manuel Barreneche"]
10
+ gem.email = ["jbarreneche@restorando.com"]
11
+ gem.description = %q{Implementation `on_duplicate_key_update(*...).multi_insert(*...)` for postgresql}
12
+ gem.summary = %q{Implementation `on_duplicate_key_update(*...).multi_insert(*...)` for postgresql}
13
+ gem.homepage = "https://github.com/restorando/sequel-pg_bulk_upsert"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.add_dependency "sequel", ">= 4.15.0"
19
+ gem.add_dependency "pg"
20
+ gem.add_development_dependency "minitest"
21
+ gem.add_development_dependency "pry"
22
+ gem.require_paths = ["lib"]
23
+ end
@@ -0,0 +1,95 @@
1
+ require 'test_helper'
2
+
3
+ class DatasetExtensionTest < MiniTest::Test
4
+
5
+ def setup
6
+ @db = Sequel.
7
+ connect('mock://postgres').
8
+ extension(:pg_bulk_upsert)
9
+
10
+ def @db.schema(table)
11
+ if table == :target
12
+ [
13
+ [:id, { primary: true, db_type: :serial }],
14
+ [:updatable_column, { db_type: :text }],
15
+ [:insertable_column, { db_type: :text }]
16
+ ]
17
+ end
18
+ end
19
+
20
+ @db.create_table(:target) do
21
+ primary_key :id
22
+ String :updatable_column
23
+ String :insertable_column
24
+ end
25
+
26
+ @ds = @db.dataset.from(:target)
27
+
28
+ @upsert_columns = [:updatable_column, :insertable_column]
29
+ @upsert_data = [%w[foo bar]]
30
+
31
+ # Clear setup sqls
32
+ @db.sqls
33
+ end
34
+
35
+ def test_temp_table_creation
36
+ sqls = do_upsert
37
+ temp_table_name = extract_temp_table_name(sqls[1])
38
+
39
+ temp_table_creation = sqls[1]
40
+ assert_equal strip_heredoc(<<-SQL).gsub("\n", ""), temp_table_creation
41
+ CREATE TEMPORARY TABLE "#{temp_table_name}"
42
+ ("id" serial PRIMARY KEY, "updatable_column" text, "insertable_column" text) ON COMMIT DROP
43
+ SQL
44
+ end
45
+
46
+ def test_temp_table_batch_loading
47
+ sqls = do_upsert
48
+ temp_table_name = extract_temp_table_name(sqls[1])
49
+ temp_table_insert = @db.from(temp_table_name).multi_insert_sql(@upsert_columns, @upsert_data)
50
+
51
+ assert_equal temp_table_insert, [sqls[2]]
52
+ end
53
+
54
+ def test_upsert_from_temp
55
+ sqls = do_upsert
56
+ temp_table_name = extract_temp_table_name(sqls[1])
57
+ temp_table_insert = strip_heredoc(<<-SQL).gsub("\n", "")
58
+ WITH "update_cte" AS
59
+ (UPDATE "target" SET "updatable_column" = "#{temp_table_name}"."updatable_column"
60
+ FROM "#{temp_table_name}" WHERE ("target"."id" = "#{temp_table_name}"."id") RETURNING "target"."id")
61
+ INSERT INTO "target" ("updatable_column", "insertable_column")
62
+ SELECT "updatable_column", "insertable_column"
63
+ FROM "#{temp_table_name}" LEFT JOIN "update_cte" USING ("id")
64
+ WHERE ("update_cte"."id" IS NULL)
65
+ RETURNING "target"."id"
66
+ SQL
67
+
68
+ assert_equal temp_table_insert, sqls[3]
69
+ end
70
+
71
+ def test_upsert_inside_transaction
72
+ sqls = do_upsert
73
+
74
+ assert_equal "BEGIN", sqls[0]
75
+ assert_equal "COMMIT", sqls[4]
76
+ end
77
+
78
+ private
79
+
80
+ def do_upsert(columns = @upsert_columns, data = @upsert_data)
81
+ @ds.on_duplicate_key_update(:updatable_column).import(columns, data)
82
+
83
+ @db.sqls
84
+ end
85
+
86
+ def extract_temp_table_name(create_sql)
87
+ create_sql[/CREATE TEMPORARY TABLE "(\w+)"/i, 1]
88
+ end
89
+
90
+ def strip_heredoc(heredoc)
91
+ indent = (heredoc.scan(/^[ \t]*(?=\S)/).min || "").size
92
+ heredoc.gsub(/^[ \t]{#{indent}}/, '')
93
+ end
94
+
95
+ end
@@ -0,0 +1,5 @@
1
+ require 'bundler/setup'
2
+ require 'minitest/autorun'
3
+
4
+ require 'sequel'
5
+ require 'sequel-pg_bulk_upsert'
metadata ADDED
@@ -0,0 +1,113 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sequel-pg_bulk_upsert
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Juan Manuel Barreneche
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: sequel
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 4.15.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 4.15.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: pg
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: Implementation `on_duplicate_key_update(*...).multi_insert(*...)` for
70
+ postgresql
71
+ email:
72
+ - jbarreneche@restorando.com
73
+ executables: []
74
+ extensions: []
75
+ extra_rdoc_files: []
76
+ files:
77
+ - ".gitignore"
78
+ - Gemfile
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - lib/sequel-pg_bulk_upsert.rb
83
+ - lib/sequel-pg_bulk_upsert/version.rb
84
+ - lib/sequel/extensions/pg_bulk_upsert.rb
85
+ - sequel-pg_bulk_upsert.gemspec
86
+ - test/lib/dataset_extension_test.rb
87
+ - test/test_helper.rb
88
+ homepage: https://github.com/restorando/sequel-pg_bulk_upsert
89
+ licenses: []
90
+ metadata: {}
91
+ post_install_message:
92
+ rdoc_options: []
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ requirements: []
106
+ rubyforge_project:
107
+ rubygems_version: 2.2.2
108
+ signing_key:
109
+ specification_version: 4
110
+ summary: Implementation `on_duplicate_key_update(*...).multi_insert(*...)` for postgresql
111
+ test_files:
112
+ - test/lib/dataset_extension_test.rb
113
+ - test/test_helper.rb