janko 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +2 -0
- data/Guardfile +7 -0
- data/LICENSE.txt +202 -0
- data/README.md +186 -0
- data/Rakefile +14 -0
- data/assets/insert-performance-graph.png +0 -0
- data/assets/merge-performance-graph.png +0 -0
- data/config/environment.rb +7 -0
- data/janko.gemspec +32 -0
- data/lib/janko.rb +5 -0
- data/lib/janko/bulk_merge.rb +57 -0
- data/lib/janko/column_list.rb +193 -0
- data/lib/janko/connection.rb +107 -0
- data/lib/janko/constants.rb +13 -0
- data/lib/janko/copy_importer.rb +36 -0
- data/lib/janko/flag.rb +26 -0
- data/lib/janko/import.rb +80 -0
- data/lib/janko/insert_importer.rb +32 -0
- data/lib/janko/merge.rb +167 -0
- data/lib/janko/merge_result.rb +37 -0
- data/lib/janko/single_merge.rb +33 -0
- data/lib/janko/tagged_column.rb +133 -0
- data/lib/janko/upsert.rb +193 -0
- data/lib/janko/version.rb +3 -0
- data/spec/column_list_spec.rb +164 -0
- data/spec/connection_spec.rb +27 -0
- data/spec/flag_spec.rb +16 -0
- data/spec/import_spec.rb +112 -0
- data/spec/merge_spec.rb +400 -0
- data/spec/spec_helper.rb +32 -0
- data/spec/tagged_column_spec.rb +56 -0
- metadata +261 -0
Binary file
|
data/janko.gemspec
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
lib = File.expand_path('../lib', __FILE__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
|
6
|
+
require 'janko/version'
|
7
|
+
|
8
|
+
Gem::Specification.new do |spec|
|
9
|
+
spec.name = "janko"
|
10
|
+
spec.version = Janko::VERSION
|
11
|
+
spec.authors = ["Don Werve"]
|
12
|
+
spec.email = ["don@werve.net"]
|
13
|
+
spec.summary = %q{High-performance import, merge, and upsert for PostgreSQL.}
|
14
|
+
spec.description = %q{Because sometimes you just need to feed PostgreSQL a lot of data.}
|
15
|
+
spec.homepage = "https://github.com/matadon/janko"
|
16
|
+
spec.license = "Apache-2.0"
|
17
|
+
spec.files = `git ls-files -z`.split("\x0")
|
18
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
19
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
spec.add_runtime_dependency 'pg', '~> 0.17', '> 0.17'
|
22
|
+
spec.add_runtime_dependency 'agrippa', '~> 0.0.1', '>= 0.0.1'
|
23
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
24
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
25
|
+
spec.add_development_dependency 'rspec', '~> 3.0', '>= 3.0.0'
|
26
|
+
spec.add_development_dependency 'guard', '~> 2.8'
|
27
|
+
spec.add_development_dependency 'guard-rspec', '~> 4.3'
|
28
|
+
spec.add_development_dependency 'ruby_gntp', '~> 0'
|
29
|
+
spec.add_development_dependency 'simplecov', '~> 0'
|
30
|
+
spec.add_development_dependency 'pry', '~> 0'
|
31
|
+
spec.add_development_dependency 'activerecord', '~> 4.1', '> 4.1'
|
32
|
+
end
|
data/lib/janko.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require "janko/merge_result"
|
2
|
+
require "janko/import"
|
3
|
+
require "janko/upsert"
|
4
|
+
|
5
|
+
module Janko
|
6
|
+
class BulkMerge
|
7
|
+
def initialize(options = {})
|
8
|
+
@options = options
|
9
|
+
@target = "merge_#{SecureRandom.hex(8)}"
|
10
|
+
@upsert = Upsert.new(options.merge(from_table: @target))
|
11
|
+
@importer = Import.new(strategy: Janko::CopyImporter,
|
12
|
+
table: @target, connection: connection,
|
13
|
+
columns: options[:columns])
|
14
|
+
end
|
15
|
+
|
16
|
+
def start
|
17
|
+
create_copy_target
|
18
|
+
@importer.start
|
19
|
+
self
|
20
|
+
end
|
21
|
+
|
22
|
+
def push(*values)
|
23
|
+
@importer.push(*values)
|
24
|
+
self
|
25
|
+
end
|
26
|
+
|
27
|
+
def stop
|
28
|
+
@importer.stop
|
29
|
+
@upsert.process.cleanup
|
30
|
+
drop_copy_target
|
31
|
+
self
|
32
|
+
end
|
33
|
+
|
34
|
+
def result
|
35
|
+
@upsert.result
|
36
|
+
end
|
37
|
+
|
38
|
+
def connection
|
39
|
+
@options[:connection]
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def create_copy_target
|
45
|
+
connection.exec(<<-END)
|
46
|
+
CREATE TEMP TABLE #{@target} WITHOUT OIDS ON COMMIT DROP
|
47
|
+
AS (SELECT * FROM #{@options[:table]}) WITH NO DATA;
|
48
|
+
END
|
49
|
+
self
|
50
|
+
end
|
51
|
+
|
52
|
+
def drop_copy_target
|
53
|
+
connection.exec("DROP TABLE #{@target}")
|
54
|
+
self
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,193 @@
|
|
1
|
+
require "janko/tagged_column"
|
2
|
+
require "janko/constants"
|
3
|
+
require "agrippa/mutable_hash"
|
4
|
+
|
5
|
+
module Janko
|
6
|
+
class ColumnList
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
include Agrippa::MutableHash
|
10
|
+
|
11
|
+
def ColumnList.build(source)
|
12
|
+
source.is_a?(ColumnList) ? source : ColumnList.new.add(*source)
|
13
|
+
end
|
14
|
+
|
15
|
+
def default_state
|
16
|
+
{ columns: {} }
|
17
|
+
end
|
18
|
+
|
19
|
+
def builder
|
20
|
+
self
|
21
|
+
end
|
22
|
+
|
23
|
+
def add(*names)
|
24
|
+
each_column(names) { |name| add_column(name) }
|
25
|
+
end
|
26
|
+
|
27
|
+
def remove(*names)
|
28
|
+
each_column(names) { |name| remove_column(name) }
|
29
|
+
end
|
30
|
+
|
31
|
+
def tag(tag, *names)
|
32
|
+
each { |_, column| column.untag(tag) }
|
33
|
+
each_column(names) { |name| add_column(name).tag(tag) }
|
34
|
+
end
|
35
|
+
|
36
|
+
def untag(tag, *names)
|
37
|
+
each_column(names) { |_, column| column and column.untag(tag) }
|
38
|
+
end
|
39
|
+
|
40
|
+
def tagged(tag = nil)
|
41
|
+
filter_columns do |name, column|
|
42
|
+
(tag.nil? and column.tagged?) or column.has_tag?(tag)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def not_tagged(tag)
|
47
|
+
filter_columns { |name, column| not column.has_tag?(tag) }
|
48
|
+
end
|
49
|
+
|
50
|
+
def none_tagged?(tag)
|
51
|
+
@state[:columns].none? { |_, column| column.has_tag?(tag) }
|
52
|
+
end
|
53
|
+
|
54
|
+
def alter(*names)
|
55
|
+
each_column(names) do |name, column|
|
56
|
+
raise("Unknown column: #{name}") unless column
|
57
|
+
yield(column)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def set(state)
|
62
|
+
chain(state)
|
63
|
+
end
|
64
|
+
|
65
|
+
def pack(values)
|
66
|
+
pack_hash(stringify_keys(values))
|
67
|
+
end
|
68
|
+
|
69
|
+
def to_list
|
70
|
+
map_and_join { |_, column| column.quoted }
|
71
|
+
end
|
72
|
+
|
73
|
+
def to_conditions(left = nil, right = nil)
|
74
|
+
map_and_join(" AND ") { |_, c| c.to_condition(left, right) }
|
75
|
+
end
|
76
|
+
|
77
|
+
def to_setters(left = nil, right = nil)
|
78
|
+
map_and_join { |_, c| c.to_setter(left, right) }
|
79
|
+
end
|
80
|
+
|
81
|
+
def to_list_with_defaults
|
82
|
+
map_and_join { |_, c| c.to_value }
|
83
|
+
end
|
84
|
+
|
85
|
+
def to_binds
|
86
|
+
map_and_join_with_index { |_, c, i| c.to_bind(i + 1) }
|
87
|
+
end
|
88
|
+
|
89
|
+
def to_typecast_binds
|
90
|
+
map_and_join_with_index { |_, c, i| c.to_typecast_bind(i + 1) }
|
91
|
+
end
|
92
|
+
|
93
|
+
def each(&block)
|
94
|
+
@state[:columns].each(&block)
|
95
|
+
end
|
96
|
+
|
97
|
+
def map(&block)
|
98
|
+
@state[:columns].map(&block)
|
99
|
+
end
|
100
|
+
|
101
|
+
def map_and_join(separator = ",", &block)
|
102
|
+
map(&block).join(separator)
|
103
|
+
end
|
104
|
+
|
105
|
+
def map_and_join_with_index(separator = ",")
|
106
|
+
output = each_with_index.map { |pair, index| yield(*pair, index) }
|
107
|
+
output.join(separator)
|
108
|
+
end
|
109
|
+
|
110
|
+
def empty?
|
111
|
+
@state[:columns].empty?
|
112
|
+
end
|
113
|
+
|
114
|
+
def columns
|
115
|
+
@state[:columns].keys
|
116
|
+
end
|
117
|
+
|
118
|
+
def connection
|
119
|
+
@state[:parent].connection
|
120
|
+
end
|
121
|
+
|
122
|
+
def table
|
123
|
+
@state[:parent].table
|
124
|
+
end
|
125
|
+
|
126
|
+
def inspect
|
127
|
+
children = @state[:columns].map { |name, column|
|
128
|
+
"#{name}(#{column.inspect})" }
|
129
|
+
"#<#{self.class}:0x#{self.__id__.to_s(16)} #{children.join(" ")}>"
|
130
|
+
end
|
131
|
+
|
132
|
+
def length
|
133
|
+
@length ||= @state[:columns].length
|
134
|
+
end
|
135
|
+
|
136
|
+
def pack_hash(values)
|
137
|
+
result = columns.map { |column| values.delete(column) }
|
138
|
+
return(result) if values.empty?
|
139
|
+
raise(ArgumentError, "Unknown columns: #{values.keys.join(" ")}")
|
140
|
+
end
|
141
|
+
|
142
|
+
private
|
143
|
+
|
144
|
+
def add_column(name)
|
145
|
+
@state[:columns][name.to_s] ||= TaggedColumn.new(name: name,
|
146
|
+
parent: self)
|
147
|
+
end
|
148
|
+
|
149
|
+
def remove_column(name)
|
150
|
+
@state[:columns].delete(name.to_s)
|
151
|
+
self
|
152
|
+
end
|
153
|
+
|
154
|
+
def filter_columns
|
155
|
+
result = {}
|
156
|
+
each { |n, c| result[n] = c if yield(n, c) }
|
157
|
+
self.class.new(@state.merge(columns: result))
|
158
|
+
end
|
159
|
+
|
160
|
+
def stringify_keys(hash)
|
161
|
+
output = {}
|
162
|
+
hash.each { |k, v| output[k.to_s] = v }
|
163
|
+
output
|
164
|
+
end
|
165
|
+
|
166
|
+
def matching_names(names)
|
167
|
+
result = []
|
168
|
+
names.flatten.each do |name|
|
169
|
+
if(name == Janko::ALL)
|
170
|
+
result.concat(connection.column_list(table))
|
171
|
+
elsif(name == Janko::DEFAULT)
|
172
|
+
result.concat(connection.column_list(table) - [ "id" ])
|
173
|
+
elsif(name.is_a?(Hash) and name.has_key?(:except))
|
174
|
+
all_columns = connection.column_list(table)
|
175
|
+
exceptions = [ name[:except] ].flatten.map(&:to_s)
|
176
|
+
result.concat(all_columns - exceptions)
|
177
|
+
elsif(name.nil? or name == "")
|
178
|
+
raise("Blank or nil column names are not allowed.")
|
179
|
+
else
|
180
|
+
result.push(name.to_s)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
result
|
184
|
+
end
|
185
|
+
|
186
|
+
def each_column(names)
|
187
|
+
matching_names(names).each do |name|
|
188
|
+
yield(name, @state[:columns][name.to_s])
|
189
|
+
end
|
190
|
+
self
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require "agrippa/maybe"
|
2
|
+
require "agrippa/delegation"
|
3
|
+
|
4
|
+
module Janko
|
5
|
+
class Connection
|
6
|
+
include Agrippa::Delegation
|
7
|
+
|
8
|
+
def Connection.build(backend)
|
9
|
+
return(default) if backend.nil?
|
10
|
+
return(backend) if backend.is_a?(Connection)
|
11
|
+
new(backend)
|
12
|
+
end
|
13
|
+
|
14
|
+
def Connection.default
|
15
|
+
if Kernel.const_defined?("ActiveRecord::Base")
|
16
|
+
new(Kernel.const_get("ActiveRecord::Base"))
|
17
|
+
else
|
18
|
+
raise("No default connection available.")
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def Connection.cache_catalog
|
23
|
+
@catalog ||= yield
|
24
|
+
end
|
25
|
+
|
26
|
+
def Connection.reset_cached_catalog
|
27
|
+
@catalog = nil
|
28
|
+
self
|
29
|
+
end
|
30
|
+
|
31
|
+
attr_reader :backend
|
32
|
+
|
33
|
+
delegate *%w(exec prepare exec_prepared async_exec put_copy_data
|
34
|
+
put_copy_end get_last_result), to: "backend"
|
35
|
+
|
36
|
+
def initialize(backend)
|
37
|
+
@backend = extract_raw_connection(backend)
|
38
|
+
end
|
39
|
+
|
40
|
+
def in_transaction?
|
41
|
+
backend.transaction_status > 0
|
42
|
+
end
|
43
|
+
|
44
|
+
def failed?
|
45
|
+
backend.transaction_status >= 3
|
46
|
+
end
|
47
|
+
|
48
|
+
# http://dba.stackexchange.com/questions/22362/
|
49
|
+
# http://www.postgresql.org/docs/9.3/static/catalog-pg-attribute.html
|
50
|
+
# http://www.postgresql.org/docs/9.3/static/catalog-pg-attrdef.html
|
51
|
+
def catalog
|
52
|
+
Connection.cache_catalog do
|
53
|
+
result = backend.exec(<<-END)
|
54
|
+
SELECT relname, attname, typname, pg_get_expr(adbin, 0)
|
55
|
+
FROM pg_class
|
56
|
+
LEFT JOIN pg_namespace ON (
|
57
|
+
pg_class.relnamespace = pg_namespace.oid)
|
58
|
+
LEFT JOIN pg_attribute ON (
|
59
|
+
pg_class.oid = pg_attribute.attrelid)
|
60
|
+
LEFT JOIN pg_attrdef ON (
|
61
|
+
pg_attribute.attrelid = pg_attrdef.adrelid
|
62
|
+
AND pg_attribute.attnum = pg_attrdef.adnum)
|
63
|
+
LEFT JOIN pg_type ON (
|
64
|
+
pg_attribute.atttypid = pg_type.oid)
|
65
|
+
WHERE pg_class.relkind IN ('r','')
|
66
|
+
AND pg_namespace.nspname
|
67
|
+
NOT IN ('pg_catalog', 'pg_toast')
|
68
|
+
AND pg_table_is_visible(pg_class.oid)
|
69
|
+
AND attnum > 0
|
70
|
+
AND NOT attisdropped;
|
71
|
+
END
|
72
|
+
|
73
|
+
output = {}
|
74
|
+
result.each_row do |row|
|
75
|
+
output[row[0]] ||= {}
|
76
|
+
output[row[0]][row[1]] = { type: row[2], default: row[3] }
|
77
|
+
end
|
78
|
+
output
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def column_list(table)
|
83
|
+
catalog[table].keys
|
84
|
+
end
|
85
|
+
|
86
|
+
def column_type(table, column)
|
87
|
+
catalog[table][column][:type]
|
88
|
+
end
|
89
|
+
|
90
|
+
def column_default(table, column)
|
91
|
+
catalog[table][column][:default]
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
def maybe(*args)
|
97
|
+
Agrippa::Maybe.new(*args)
|
98
|
+
end
|
99
|
+
|
100
|
+
def extract_raw_connection(backend)
|
101
|
+
return(backend) if backend.is_a?(PG::Connection)
|
102
|
+
maybe(backend).raw_connection._ \
|
103
|
+
or maybe(backend).connection.raw_connection._ \
|
104
|
+
or raise("Unable to extract a connection from: #{backend}")
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require "csv"
|
2
|
+
require "agrippa/mutable"
|
3
|
+
|
4
|
+
module Janko
|
5
|
+
class CopyImporter
|
6
|
+
include Agrippa::Mutable
|
7
|
+
|
8
|
+
state_reader :connection, :table, :columns
|
9
|
+
|
10
|
+
def start
|
11
|
+
connection.async_exec(sprintf("COPY %s(%s) FROM STDOUT CSV",
|
12
|
+
table, columns.to_list))
|
13
|
+
self
|
14
|
+
end
|
15
|
+
|
16
|
+
def push(values)
|
17
|
+
begin
|
18
|
+
line = CSV.generate_line(columns.pack(values))
|
19
|
+
connection.put_copy_data(line)
|
20
|
+
rescue
|
21
|
+
stop
|
22
|
+
raise
|
23
|
+
end
|
24
|
+
self
|
25
|
+
end
|
26
|
+
|
27
|
+
def stop
|
28
|
+
connection.put_copy_end
|
29
|
+
result = connection.get_last_result
|
30
|
+
return(self) if (result.result_status == PG::PGRES_COMMAND_OK)
|
31
|
+
return(self) if (result.result_status == PG::PGRES_COPY_IN)
|
32
|
+
raise(PG::Error, result.error_message)
|
33
|
+
self
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|