janko 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Binary file
@@ -0,0 +1,7 @@
1
+ require "active_record"
2
+ require "pg"
3
+ require "logger"
4
+
5
+ ActiveRecord::Base.logger = Logger.new("tmp/test.log")
6
+ config = YAML::load(IO.read("config/database.yml"))
7
+ ActiveRecord::Base.establish_connection(config["development"])
data/janko.gemspec ADDED
@@ -0,0 +1,32 @@
1
+ # coding: utf-8
2
+
3
+ lib = File.expand_path('../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+
6
+ require 'janko/version'
7
+
8
+ Gem::Specification.new do |spec|
9
+ spec.name = "janko"
10
+ spec.version = Janko::VERSION
11
+ spec.authors = ["Don Werve"]
12
+ spec.email = ["don@werve.net"]
13
+ spec.summary = %q{High-performance import, merge, and upsert for PostgreSQL.}
14
+ spec.description = %q{Because sometimes you just need to feed PostgreSQL a lot of data.}
15
+ spec.homepage = "https://github.com/matadon/janko"
16
+ spec.license = "Apache-2.0"
17
+ spec.files = `git ls-files -z`.split("\x0")
18
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
+ spec.require_paths = ["lib"]
21
+ spec.add_runtime_dependency 'pg', '~> 0.17', '> 0.17'
22
+ spec.add_runtime_dependency 'agrippa', '~> 0.0.1', '>= 0.0.1'
23
+ spec.add_development_dependency "bundler", "~> 1.6"
24
+ spec.add_development_dependency "rake", "~> 10.0"
25
+ spec.add_development_dependency 'rspec', '~> 3.0', '>= 3.0.0'
26
+ spec.add_development_dependency 'guard', '~> 2.8'
27
+ spec.add_development_dependency 'guard-rspec', '~> 4.3'
28
+ spec.add_development_dependency 'ruby_gntp', '~> 0'
29
+ spec.add_development_dependency 'simplecov', '~> 0'
30
+ spec.add_development_dependency 'pry', '~> 0'
31
+ spec.add_development_dependency 'activerecord', '~> 4.1', '> 4.1'
32
+ end
data/lib/janko.rb ADDED
@@ -0,0 +1,5 @@
1
+ require "janko/version"
2
+
3
+ module Janko
4
+ # Your code goes here...
5
+ end
@@ -0,0 +1,57 @@
1
+ require "janko/merge_result"
2
+ require "janko/import"
3
+ require "janko/upsert"
4
+
5
+ module Janko
6
+ class BulkMerge
7
+ def initialize(options = {})
8
+ @options = options
9
+ @target = "merge_#{SecureRandom.hex(8)}"
10
+ @upsert = Upsert.new(options.merge(from_table: @target))
11
+ @importer = Import.new(strategy: Janko::CopyImporter,
12
+ table: @target, connection: connection,
13
+ columns: options[:columns])
14
+ end
15
+
16
+ def start
17
+ create_copy_target
18
+ @importer.start
19
+ self
20
+ end
21
+
22
+ def push(*values)
23
+ @importer.push(*values)
24
+ self
25
+ end
26
+
27
+ def stop
28
+ @importer.stop
29
+ @upsert.process.cleanup
30
+ drop_copy_target
31
+ self
32
+ end
33
+
34
+ def result
35
+ @upsert.result
36
+ end
37
+
38
+ def connection
39
+ @options[:connection]
40
+ end
41
+
42
+ private
43
+
44
+ def create_copy_target
45
+ connection.exec(<<-END)
46
+ CREATE TEMP TABLE #{@target} WITHOUT OIDS ON COMMIT DROP
47
+ AS (SELECT * FROM #{@options[:table]}) WITH NO DATA;
48
+ END
49
+ self
50
+ end
51
+
52
+ def drop_copy_target
53
+ connection.exec("DROP TABLE #{@target}")
54
+ self
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,193 @@
1
+ require "janko/tagged_column"
2
+ require "janko/constants"
3
+ require "agrippa/mutable_hash"
4
+
5
+ module Janko
6
+ class ColumnList
7
+ include Enumerable
8
+
9
+ include Agrippa::MutableHash
10
+
11
+ def ColumnList.build(source)
12
+ source.is_a?(ColumnList) ? source : ColumnList.new.add(*source)
13
+ end
14
+
15
+ def default_state
16
+ { columns: {} }
17
+ end
18
+
19
+ def builder
20
+ self
21
+ end
22
+
23
+ def add(*names)
24
+ each_column(names) { |name| add_column(name) }
25
+ end
26
+
27
+ def remove(*names)
28
+ each_column(names) { |name| remove_column(name) }
29
+ end
30
+
31
+ def tag(tag, *names)
32
+ each { |_, column| column.untag(tag) }
33
+ each_column(names) { |name| add_column(name).tag(tag) }
34
+ end
35
+
36
+ def untag(tag, *names)
37
+ each_column(names) { |_, column| column and column.untag(tag) }
38
+ end
39
+
40
+ def tagged(tag = nil)
41
+ filter_columns do |name, column|
42
+ (tag.nil? and column.tagged?) or column.has_tag?(tag)
43
+ end
44
+ end
45
+
46
+ def not_tagged(tag)
47
+ filter_columns { |name, column| not column.has_tag?(tag) }
48
+ end
49
+
50
+ def none_tagged?(tag)
51
+ @state[:columns].none? { |_, column| column.has_tag?(tag) }
52
+ end
53
+
54
+ def alter(*names)
55
+ each_column(names) do |name, column|
56
+ raise("Unknown column: #{name}") unless column
57
+ yield(column)
58
+ end
59
+ end
60
+
61
+ def set(state)
62
+ chain(state)
63
+ end
64
+
65
+ def pack(values)
66
+ pack_hash(stringify_keys(values))
67
+ end
68
+
69
+ def to_list
70
+ map_and_join { |_, column| column.quoted }
71
+ end
72
+
73
+ def to_conditions(left = nil, right = nil)
74
+ map_and_join(" AND ") { |_, c| c.to_condition(left, right) }
75
+ end
76
+
77
+ def to_setters(left = nil, right = nil)
78
+ map_and_join { |_, c| c.to_setter(left, right) }
79
+ end
80
+
81
+ def to_list_with_defaults
82
+ map_and_join { |_, c| c.to_value }
83
+ end
84
+
85
+ def to_binds
86
+ map_and_join_with_index { |_, c, i| c.to_bind(i + 1) }
87
+ end
88
+
89
+ def to_typecast_binds
90
+ map_and_join_with_index { |_, c, i| c.to_typecast_bind(i + 1) }
91
+ end
92
+
93
+ def each(&block)
94
+ @state[:columns].each(&block)
95
+ end
96
+
97
+ def map(&block)
98
+ @state[:columns].map(&block)
99
+ end
100
+
101
+ def map_and_join(separator = ",", &block)
102
+ map(&block).join(separator)
103
+ end
104
+
105
+ def map_and_join_with_index(separator = ",")
106
+ output = each_with_index.map { |pair, index| yield(*pair, index) }
107
+ output.join(separator)
108
+ end
109
+
110
+ def empty?
111
+ @state[:columns].empty?
112
+ end
113
+
114
+ def columns
115
+ @state[:columns].keys
116
+ end
117
+
118
+ def connection
119
+ @state[:parent].connection
120
+ end
121
+
122
+ def table
123
+ @state[:parent].table
124
+ end
125
+
126
+ def inspect
127
+ children = @state[:columns].map { |name, column|
128
+ "#{name}(#{column.inspect})" }
129
+ "#<#{self.class}:0x#{self.__id__.to_s(16)} #{children.join(" ")}>"
130
+ end
131
+
132
+ def length
133
+ @length ||= @state[:columns].length
134
+ end
135
+
136
+ def pack_hash(values)
137
+ result = columns.map { |column| values.delete(column) }
138
+ return(result) if values.empty?
139
+ raise(ArgumentError, "Unknown columns: #{values.keys.join(" ")}")
140
+ end
141
+
142
+ private
143
+
144
+ def add_column(name)
145
+ @state[:columns][name.to_s] ||= TaggedColumn.new(name: name,
146
+ parent: self)
147
+ end
148
+
149
+ def remove_column(name)
150
+ @state[:columns].delete(name.to_s)
151
+ self
152
+ end
153
+
154
+ def filter_columns
155
+ result = {}
156
+ each { |n, c| result[n] = c if yield(n, c) }
157
+ self.class.new(@state.merge(columns: result))
158
+ end
159
+
160
+ def stringify_keys(hash)
161
+ output = {}
162
+ hash.each { |k, v| output[k.to_s] = v }
163
+ output
164
+ end
165
+
166
+ def matching_names(names)
167
+ result = []
168
+ names.flatten.each do |name|
169
+ if(name == Janko::ALL)
170
+ result.concat(connection.column_list(table))
171
+ elsif(name == Janko::DEFAULT)
172
+ result.concat(connection.column_list(table) - [ "id" ])
173
+ elsif(name.is_a?(Hash) and name.has_key?(:except))
174
+ all_columns = connection.column_list(table)
175
+ exceptions = [ name[:except] ].flatten.map(&:to_s)
176
+ result.concat(all_columns - exceptions)
177
+ elsif(name.nil? or name == "")
178
+ raise("Blank or nil column names are not allowed.")
179
+ else
180
+ result.push(name.to_s)
181
+ end
182
+ end
183
+ result
184
+ end
185
+
186
+ def each_column(names)
187
+ matching_names(names).each do |name|
188
+ yield(name, @state[:columns][name.to_s])
189
+ end
190
+ self
191
+ end
192
+ end
193
+ end
@@ -0,0 +1,107 @@
1
+ require "agrippa/maybe"
2
+ require "agrippa/delegation"
3
+
4
+ module Janko
5
+ class Connection
6
+ include Agrippa::Delegation
7
+
8
+ def Connection.build(backend)
9
+ return(default) if backend.nil?
10
+ return(backend) if backend.is_a?(Connection)
11
+ new(backend)
12
+ end
13
+
14
+ def Connection.default
15
+ if Kernel.const_defined?("ActiveRecord::Base")
16
+ new(Kernel.const_get("ActiveRecord::Base"))
17
+ else
18
+ raise("No default connection available.")
19
+ end
20
+ end
21
+
22
+ def Connection.cache_catalog
23
+ @catalog ||= yield
24
+ end
25
+
26
+ def Connection.reset_cached_catalog
27
+ @catalog = nil
28
+ self
29
+ end
30
+
31
+ attr_reader :backend
32
+
33
+ delegate *%w(exec prepare exec_prepared async_exec put_copy_data
34
+ put_copy_end get_last_result), to: "backend"
35
+
36
+ def initialize(backend)
37
+ @backend = extract_raw_connection(backend)
38
+ end
39
+
40
+ def in_transaction?
41
+ backend.transaction_status > 0
42
+ end
43
+
44
+ def failed?
45
+ backend.transaction_status >= 3
46
+ end
47
+
48
+ # http://dba.stackexchange.com/questions/22362/
49
+ # http://www.postgresql.org/docs/9.3/static/catalog-pg-attribute.html
50
+ # http://www.postgresql.org/docs/9.3/static/catalog-pg-attrdef.html
51
+ def catalog
52
+ Connection.cache_catalog do
53
+ result = backend.exec(<<-END)
54
+ SELECT relname, attname, typname, pg_get_expr(adbin, 0)
55
+ FROM pg_class
56
+ LEFT JOIN pg_namespace ON (
57
+ pg_class.relnamespace = pg_namespace.oid)
58
+ LEFT JOIN pg_attribute ON (
59
+ pg_class.oid = pg_attribute.attrelid)
60
+ LEFT JOIN pg_attrdef ON (
61
+ pg_attribute.attrelid = pg_attrdef.adrelid
62
+ AND pg_attribute.attnum = pg_attrdef.adnum)
63
+ LEFT JOIN pg_type ON (
64
+ pg_attribute.atttypid = pg_type.oid)
65
+ WHERE pg_class.relkind IN ('r','')
66
+ AND pg_namespace.nspname
67
+ NOT IN ('pg_catalog', 'pg_toast')
68
+ AND pg_table_is_visible(pg_class.oid)
69
+ AND attnum > 0
70
+ AND NOT attisdropped;
71
+ END
72
+
73
+ output = {}
74
+ result.each_row do |row|
75
+ output[row[0]] ||= {}
76
+ output[row[0]][row[1]] = { type: row[2], default: row[3] }
77
+ end
78
+ output
79
+ end
80
+ end
81
+
82
+ def column_list(table)
83
+ catalog[table].keys
84
+ end
85
+
86
+ def column_type(table, column)
87
+ catalog[table][column][:type]
88
+ end
89
+
90
+ def column_default(table, column)
91
+ catalog[table][column][:default]
92
+ end
93
+
94
+ private
95
+
96
+ def maybe(*args)
97
+ Agrippa::Maybe.new(*args)
98
+ end
99
+
100
+ def extract_raw_connection(backend)
101
+ return(backend) if backend.is_a?(PG::Connection)
102
+ maybe(backend).raw_connection._ \
103
+ or maybe(backend).connection.raw_connection._ \
104
+ or raise("Unable to extract a connection from: #{backend}")
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,13 @@
1
+ require "janko/flag"
2
+
3
+ module Janko
4
+ module Constants
5
+ NULL = nil
6
+
7
+ DEFAULT = Flag.new(2 ** 0)
8
+
9
+ ALL = Flag.new(2 ** 1)
10
+
11
+ KEEP = Flag.new(2 ** 2)
12
+ end
13
+ end
@@ -0,0 +1,36 @@
1
+ require "csv"
2
+ require "agrippa/mutable"
3
+
4
+ module Janko
5
+ class CopyImporter
6
+ include Agrippa::Mutable
7
+
8
+ state_reader :connection, :table, :columns
9
+
10
+ def start
11
+ connection.async_exec(sprintf("COPY %s(%s) FROM STDOUT CSV",
12
+ table, columns.to_list))
13
+ self
14
+ end
15
+
16
+ def push(values)
17
+ begin
18
+ line = CSV.generate_line(columns.pack(values))
19
+ connection.put_copy_data(line)
20
+ rescue
21
+ stop
22
+ raise
23
+ end
24
+ self
25
+ end
26
+
27
+ def stop
28
+ connection.put_copy_end
29
+ result = connection.get_last_result
30
+ return(self) if (result.result_status == PG::PGRES_COMMAND_OK)
31
+ return(self) if (result.result_status == PG::PGRES_COPY_IN)
32
+ raise(PG::Error, result.error_message)
33
+ self
34
+ end
35
+ end
36
+ end