janko 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +2 -0
- data/Guardfile +7 -0
- data/LICENSE.txt +202 -0
- data/README.md +186 -0
- data/Rakefile +14 -0
- data/assets/insert-performance-graph.png +0 -0
- data/assets/merge-performance-graph.png +0 -0
- data/config/environment.rb +7 -0
- data/janko.gemspec +32 -0
- data/lib/janko.rb +5 -0
- data/lib/janko/bulk_merge.rb +57 -0
- data/lib/janko/column_list.rb +193 -0
- data/lib/janko/connection.rb +107 -0
- data/lib/janko/constants.rb +13 -0
- data/lib/janko/copy_importer.rb +36 -0
- data/lib/janko/flag.rb +26 -0
- data/lib/janko/import.rb +80 -0
- data/lib/janko/insert_importer.rb +32 -0
- data/lib/janko/merge.rb +167 -0
- data/lib/janko/merge_result.rb +37 -0
- data/lib/janko/single_merge.rb +33 -0
- data/lib/janko/tagged_column.rb +133 -0
- data/lib/janko/upsert.rb +193 -0
- data/lib/janko/version.rb +3 -0
- data/spec/column_list_spec.rb +164 -0
- data/spec/connection_spec.rb +27 -0
- data/spec/flag_spec.rb +16 -0
- data/spec/import_spec.rb +112 -0
- data/spec/merge_spec.rb +400 -0
- data/spec/spec_helper.rb +32 -0
- data/spec/tagged_column_spec.rb +56 -0
- metadata +261 -0
Binary file
|
data/janko.gemspec
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
lib = File.expand_path('../lib', __FILE__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
|
6
|
+
require 'janko/version'
|
7
|
+
|
8
|
+
Gem::Specification.new do |spec|
|
9
|
+
spec.name = "janko"
|
10
|
+
spec.version = Janko::VERSION
|
11
|
+
spec.authors = ["Don Werve"]
|
12
|
+
spec.email = ["don@werve.net"]
|
13
|
+
spec.summary = %q{High-performance import, merge, and upsert for PostgreSQL.}
|
14
|
+
spec.description = %q{Because sometimes you just need to feed PostgreSQL a lot of data.}
|
15
|
+
spec.homepage = "https://github.com/matadon/janko"
|
16
|
+
spec.license = "Apache-2.0"
|
17
|
+
spec.files = `git ls-files -z`.split("\x0")
|
18
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
19
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
spec.add_runtime_dependency 'pg', '~> 0.17', '> 0.17'
|
22
|
+
spec.add_runtime_dependency 'agrippa', '~> 0.0.1', '>= 0.0.1'
|
23
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
24
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
25
|
+
spec.add_development_dependency 'rspec', '~> 3.0', '>= 3.0.0'
|
26
|
+
spec.add_development_dependency 'guard', '~> 2.8'
|
27
|
+
spec.add_development_dependency 'guard-rspec', '~> 4.3'
|
28
|
+
spec.add_development_dependency 'ruby_gntp', '~> 0'
|
29
|
+
spec.add_development_dependency 'simplecov', '~> 0'
|
30
|
+
spec.add_development_dependency 'pry', '~> 0'
|
31
|
+
spec.add_development_dependency 'activerecord', '~> 4.1', '> 4.1'
|
32
|
+
end
|
data/lib/janko.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require "janko/merge_result"
|
2
|
+
require "janko/import"
|
3
|
+
require "janko/upsert"
|
4
|
+
|
5
|
+
module Janko
|
6
|
+
class BulkMerge
|
7
|
+
def initialize(options = {})
|
8
|
+
@options = options
|
9
|
+
@target = "merge_#{SecureRandom.hex(8)}"
|
10
|
+
@upsert = Upsert.new(options.merge(from_table: @target))
|
11
|
+
@importer = Import.new(strategy: Janko::CopyImporter,
|
12
|
+
table: @target, connection: connection,
|
13
|
+
columns: options[:columns])
|
14
|
+
end
|
15
|
+
|
16
|
+
def start
|
17
|
+
create_copy_target
|
18
|
+
@importer.start
|
19
|
+
self
|
20
|
+
end
|
21
|
+
|
22
|
+
def push(*values)
|
23
|
+
@importer.push(*values)
|
24
|
+
self
|
25
|
+
end
|
26
|
+
|
27
|
+
def stop
|
28
|
+
@importer.stop
|
29
|
+
@upsert.process.cleanup
|
30
|
+
drop_copy_target
|
31
|
+
self
|
32
|
+
end
|
33
|
+
|
34
|
+
def result
|
35
|
+
@upsert.result
|
36
|
+
end
|
37
|
+
|
38
|
+
def connection
|
39
|
+
@options[:connection]
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def create_copy_target
|
45
|
+
connection.exec(<<-END)
|
46
|
+
CREATE TEMP TABLE #{@target} WITHOUT OIDS ON COMMIT DROP
|
47
|
+
AS (SELECT * FROM #{@options[:table]}) WITH NO DATA;
|
48
|
+
END
|
49
|
+
self
|
50
|
+
end
|
51
|
+
|
52
|
+
def drop_copy_target
|
53
|
+
connection.exec("DROP TABLE #{@target}")
|
54
|
+
self
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,193 @@
|
|
1
|
+
require "janko/tagged_column"
|
2
|
+
require "janko/constants"
|
3
|
+
require "agrippa/mutable_hash"
|
4
|
+
|
5
|
+
module Janko
|
6
|
+
class ColumnList
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
include Agrippa::MutableHash
|
10
|
+
|
11
|
+
def ColumnList.build(source)
|
12
|
+
source.is_a?(ColumnList) ? source : ColumnList.new.add(*source)
|
13
|
+
end
|
14
|
+
|
15
|
+
def default_state
|
16
|
+
{ columns: {} }
|
17
|
+
end
|
18
|
+
|
19
|
+
def builder
|
20
|
+
self
|
21
|
+
end
|
22
|
+
|
23
|
+
def add(*names)
|
24
|
+
each_column(names) { |name| add_column(name) }
|
25
|
+
end
|
26
|
+
|
27
|
+
def remove(*names)
|
28
|
+
each_column(names) { |name| remove_column(name) }
|
29
|
+
end
|
30
|
+
|
31
|
+
def tag(tag, *names)
|
32
|
+
each { |_, column| column.untag(tag) }
|
33
|
+
each_column(names) { |name| add_column(name).tag(tag) }
|
34
|
+
end
|
35
|
+
|
36
|
+
def untag(tag, *names)
|
37
|
+
each_column(names) { |_, column| column and column.untag(tag) }
|
38
|
+
end
|
39
|
+
|
40
|
+
def tagged(tag = nil)
|
41
|
+
filter_columns do |name, column|
|
42
|
+
(tag.nil? and column.tagged?) or column.has_tag?(tag)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def not_tagged(tag)
|
47
|
+
filter_columns { |name, column| not column.has_tag?(tag) }
|
48
|
+
end
|
49
|
+
|
50
|
+
def none_tagged?(tag)
|
51
|
+
@state[:columns].none? { |_, column| column.has_tag?(tag) }
|
52
|
+
end
|
53
|
+
|
54
|
+
def alter(*names)
|
55
|
+
each_column(names) do |name, column|
|
56
|
+
raise("Unknown column: #{name}") unless column
|
57
|
+
yield(column)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def set(state)
|
62
|
+
chain(state)
|
63
|
+
end
|
64
|
+
|
65
|
+
def pack(values)
|
66
|
+
pack_hash(stringify_keys(values))
|
67
|
+
end
|
68
|
+
|
69
|
+
def to_list
|
70
|
+
map_and_join { |_, column| column.quoted }
|
71
|
+
end
|
72
|
+
|
73
|
+
def to_conditions(left = nil, right = nil)
|
74
|
+
map_and_join(" AND ") { |_, c| c.to_condition(left, right) }
|
75
|
+
end
|
76
|
+
|
77
|
+
def to_setters(left = nil, right = nil)
|
78
|
+
map_and_join { |_, c| c.to_setter(left, right) }
|
79
|
+
end
|
80
|
+
|
81
|
+
def to_list_with_defaults
|
82
|
+
map_and_join { |_, c| c.to_value }
|
83
|
+
end
|
84
|
+
|
85
|
+
def to_binds
|
86
|
+
map_and_join_with_index { |_, c, i| c.to_bind(i + 1) }
|
87
|
+
end
|
88
|
+
|
89
|
+
def to_typecast_binds
|
90
|
+
map_and_join_with_index { |_, c, i| c.to_typecast_bind(i + 1) }
|
91
|
+
end
|
92
|
+
|
93
|
+
def each(&block)
|
94
|
+
@state[:columns].each(&block)
|
95
|
+
end
|
96
|
+
|
97
|
+
def map(&block)
|
98
|
+
@state[:columns].map(&block)
|
99
|
+
end
|
100
|
+
|
101
|
+
def map_and_join(separator = ",", &block)
|
102
|
+
map(&block).join(separator)
|
103
|
+
end
|
104
|
+
|
105
|
+
def map_and_join_with_index(separator = ",")
|
106
|
+
output = each_with_index.map { |pair, index| yield(*pair, index) }
|
107
|
+
output.join(separator)
|
108
|
+
end
|
109
|
+
|
110
|
+
def empty?
|
111
|
+
@state[:columns].empty?
|
112
|
+
end
|
113
|
+
|
114
|
+
def columns
|
115
|
+
@state[:columns].keys
|
116
|
+
end
|
117
|
+
|
118
|
+
def connection
|
119
|
+
@state[:parent].connection
|
120
|
+
end
|
121
|
+
|
122
|
+
def table
|
123
|
+
@state[:parent].table
|
124
|
+
end
|
125
|
+
|
126
|
+
def inspect
|
127
|
+
children = @state[:columns].map { |name, column|
|
128
|
+
"#{name}(#{column.inspect})" }
|
129
|
+
"#<#{self.class}:0x#{self.__id__.to_s(16)} #{children.join(" ")}>"
|
130
|
+
end
|
131
|
+
|
132
|
+
def length
|
133
|
+
@length ||= @state[:columns].length
|
134
|
+
end
|
135
|
+
|
136
|
+
def pack_hash(values)
|
137
|
+
result = columns.map { |column| values.delete(column) }
|
138
|
+
return(result) if values.empty?
|
139
|
+
raise(ArgumentError, "Unknown columns: #{values.keys.join(" ")}")
|
140
|
+
end
|
141
|
+
|
142
|
+
private
|
143
|
+
|
144
|
+
def add_column(name)
|
145
|
+
@state[:columns][name.to_s] ||= TaggedColumn.new(name: name,
|
146
|
+
parent: self)
|
147
|
+
end
|
148
|
+
|
149
|
+
def remove_column(name)
|
150
|
+
@state[:columns].delete(name.to_s)
|
151
|
+
self
|
152
|
+
end
|
153
|
+
|
154
|
+
def filter_columns
|
155
|
+
result = {}
|
156
|
+
each { |n, c| result[n] = c if yield(n, c) }
|
157
|
+
self.class.new(@state.merge(columns: result))
|
158
|
+
end
|
159
|
+
|
160
|
+
def stringify_keys(hash)
|
161
|
+
output = {}
|
162
|
+
hash.each { |k, v| output[k.to_s] = v }
|
163
|
+
output
|
164
|
+
end
|
165
|
+
|
166
|
+
def matching_names(names)
|
167
|
+
result = []
|
168
|
+
names.flatten.each do |name|
|
169
|
+
if(name == Janko::ALL)
|
170
|
+
result.concat(connection.column_list(table))
|
171
|
+
elsif(name == Janko::DEFAULT)
|
172
|
+
result.concat(connection.column_list(table) - [ "id" ])
|
173
|
+
elsif(name.is_a?(Hash) and name.has_key?(:except))
|
174
|
+
all_columns = connection.column_list(table)
|
175
|
+
exceptions = [ name[:except] ].flatten.map(&:to_s)
|
176
|
+
result.concat(all_columns - exceptions)
|
177
|
+
elsif(name.nil? or name == "")
|
178
|
+
raise("Blank or nil column names are not allowed.")
|
179
|
+
else
|
180
|
+
result.push(name.to_s)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
result
|
184
|
+
end
|
185
|
+
|
186
|
+
def each_column(names)
|
187
|
+
matching_names(names).each do |name|
|
188
|
+
yield(name, @state[:columns][name.to_s])
|
189
|
+
end
|
190
|
+
self
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require "agrippa/maybe"
|
2
|
+
require "agrippa/delegation"
|
3
|
+
|
4
|
+
module Janko
|
5
|
+
class Connection
|
6
|
+
include Agrippa::Delegation
|
7
|
+
|
8
|
+
def Connection.build(backend)
|
9
|
+
return(default) if backend.nil?
|
10
|
+
return(backend) if backend.is_a?(Connection)
|
11
|
+
new(backend)
|
12
|
+
end
|
13
|
+
|
14
|
+
def Connection.default
|
15
|
+
if Kernel.const_defined?("ActiveRecord::Base")
|
16
|
+
new(Kernel.const_get("ActiveRecord::Base"))
|
17
|
+
else
|
18
|
+
raise("No default connection available.")
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def Connection.cache_catalog
|
23
|
+
@catalog ||= yield
|
24
|
+
end
|
25
|
+
|
26
|
+
def Connection.reset_cached_catalog
|
27
|
+
@catalog = nil
|
28
|
+
self
|
29
|
+
end
|
30
|
+
|
31
|
+
attr_reader :backend
|
32
|
+
|
33
|
+
delegate *%w(exec prepare exec_prepared async_exec put_copy_data
|
34
|
+
put_copy_end get_last_result), to: "backend"
|
35
|
+
|
36
|
+
def initialize(backend)
|
37
|
+
@backend = extract_raw_connection(backend)
|
38
|
+
end
|
39
|
+
|
40
|
+
def in_transaction?
|
41
|
+
backend.transaction_status > 0
|
42
|
+
end
|
43
|
+
|
44
|
+
def failed?
|
45
|
+
backend.transaction_status >= 3
|
46
|
+
end
|
47
|
+
|
48
|
+
# http://dba.stackexchange.com/questions/22362/
|
49
|
+
# http://www.postgresql.org/docs/9.3/static/catalog-pg-attribute.html
|
50
|
+
# http://www.postgresql.org/docs/9.3/static/catalog-pg-attrdef.html
|
51
|
+
def catalog
|
52
|
+
Connection.cache_catalog do
|
53
|
+
result = backend.exec(<<-END)
|
54
|
+
SELECT relname, attname, typname, pg_get_expr(adbin, 0)
|
55
|
+
FROM pg_class
|
56
|
+
LEFT JOIN pg_namespace ON (
|
57
|
+
pg_class.relnamespace = pg_namespace.oid)
|
58
|
+
LEFT JOIN pg_attribute ON (
|
59
|
+
pg_class.oid = pg_attribute.attrelid)
|
60
|
+
LEFT JOIN pg_attrdef ON (
|
61
|
+
pg_attribute.attrelid = pg_attrdef.adrelid
|
62
|
+
AND pg_attribute.attnum = pg_attrdef.adnum)
|
63
|
+
LEFT JOIN pg_type ON (
|
64
|
+
pg_attribute.atttypid = pg_type.oid)
|
65
|
+
WHERE pg_class.relkind IN ('r','')
|
66
|
+
AND pg_namespace.nspname
|
67
|
+
NOT IN ('pg_catalog', 'pg_toast')
|
68
|
+
AND pg_table_is_visible(pg_class.oid)
|
69
|
+
AND attnum > 0
|
70
|
+
AND NOT attisdropped;
|
71
|
+
END
|
72
|
+
|
73
|
+
output = {}
|
74
|
+
result.each_row do |row|
|
75
|
+
output[row[0]] ||= {}
|
76
|
+
output[row[0]][row[1]] = { type: row[2], default: row[3] }
|
77
|
+
end
|
78
|
+
output
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def column_list(table)
|
83
|
+
catalog[table].keys
|
84
|
+
end
|
85
|
+
|
86
|
+
def column_type(table, column)
|
87
|
+
catalog[table][column][:type]
|
88
|
+
end
|
89
|
+
|
90
|
+
def column_default(table, column)
|
91
|
+
catalog[table][column][:default]
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
def maybe(*args)
|
97
|
+
Agrippa::Maybe.new(*args)
|
98
|
+
end
|
99
|
+
|
100
|
+
def extract_raw_connection(backend)
|
101
|
+
return(backend) if backend.is_a?(PG::Connection)
|
102
|
+
maybe(backend).raw_connection._ \
|
103
|
+
or maybe(backend).connection.raw_connection._ \
|
104
|
+
or raise("Unable to extract a connection from: #{backend}")
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require "csv"
|
2
|
+
require "agrippa/mutable"
|
3
|
+
|
4
|
+
module Janko
|
5
|
+
class CopyImporter
|
6
|
+
include Agrippa::Mutable
|
7
|
+
|
8
|
+
state_reader :connection, :table, :columns
|
9
|
+
|
10
|
+
def start
|
11
|
+
connection.async_exec(sprintf("COPY %s(%s) FROM STDOUT CSV",
|
12
|
+
table, columns.to_list))
|
13
|
+
self
|
14
|
+
end
|
15
|
+
|
16
|
+
def push(values)
|
17
|
+
begin
|
18
|
+
line = CSV.generate_line(columns.pack(values))
|
19
|
+
connection.put_copy_data(line)
|
20
|
+
rescue
|
21
|
+
stop
|
22
|
+
raise
|
23
|
+
end
|
24
|
+
self
|
25
|
+
end
|
26
|
+
|
27
|
+
def stop
|
28
|
+
connection.put_copy_end
|
29
|
+
result = connection.get_last_result
|
30
|
+
return(self) if (result.result_status == PG::PGRES_COMMAND_OK)
|
31
|
+
return(self) if (result.result_status == PG::PGRES_COPY_IN)
|
32
|
+
raise(PG::Error, result.error_message)
|
33
|
+
self
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|