janko 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
@@ -0,0 +1,7 @@
1
+ require "active_record"
2
+ require "pg"
3
+ require "logger"
4
+
5
+ ActiveRecord::Base.logger = Logger.new("tmp/test.log")
6
+ config = YAML::load(IO.read("config/database.yml"))
7
+ ActiveRecord::Base.establish_connection(config["development"])
data/janko.gemspec ADDED
@@ -0,0 +1,32 @@
1
+ # coding: utf-8
2
+
3
+ lib = File.expand_path('../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+
6
+ require 'janko/version'
7
+
8
+ Gem::Specification.new do |spec|
9
+ spec.name = "janko"
10
+ spec.version = Janko::VERSION
11
+ spec.authors = ["Don Werve"]
12
+ spec.email = ["don@werve.net"]
13
+ spec.summary = %q{High-performance import, merge, and upsert for PostgreSQL.}
14
+ spec.description = %q{Because sometimes you just need to feed PostgreSQL a lot of data.}
15
+ spec.homepage = "https://github.com/matadon/janko"
16
+ spec.license = "Apache-2.0"
17
+ spec.files = `git ls-files -z`.split("\x0")
18
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
+ spec.require_paths = ["lib"]
21
+ spec.add_runtime_dependency 'pg', '~> 0.17', '> 0.17'
22
+ spec.add_runtime_dependency 'agrippa', '~> 0.0.1', '>= 0.0.1'
23
+ spec.add_development_dependency "bundler", "~> 1.6"
24
+ spec.add_development_dependency "rake", "~> 10.0"
25
+ spec.add_development_dependency 'rspec', '~> 3.0', '>= 3.0.0'
26
+ spec.add_development_dependency 'guard', '~> 2.8'
27
+ spec.add_development_dependency 'guard-rspec', '~> 4.3'
28
+ spec.add_development_dependency 'ruby_gntp', '~> 0'
29
+ spec.add_development_dependency 'simplecov', '~> 0'
30
+ spec.add_development_dependency 'pry', '~> 0'
31
+ spec.add_development_dependency 'activerecord', '~> 4.1', '> 4.1'
32
+ end
data/lib/janko.rb ADDED
@@ -0,0 +1,5 @@
1
+ require "janko/version"
2
+
3
+ module Janko
4
+ # Your code goes here...
5
+ end
@@ -0,0 +1,57 @@
1
+ require "janko/merge_result"
2
+ require "janko/import"
3
+ require "janko/upsert"
4
+
5
+ module Janko
6
+ class BulkMerge
7
+ def initialize(options = {})
8
+ @options = options
9
+ @target = "merge_#{SecureRandom.hex(8)}"
10
+ @upsert = Upsert.new(options.merge(from_table: @target))
11
+ @importer = Import.new(strategy: Janko::CopyImporter,
12
+ table: @target, connection: connection,
13
+ columns: options[:columns])
14
+ end
15
+
16
+ def start
17
+ create_copy_target
18
+ @importer.start
19
+ self
20
+ end
21
+
22
+ def push(*values)
23
+ @importer.push(*values)
24
+ self
25
+ end
26
+
27
+ def stop
28
+ @importer.stop
29
+ @upsert.process.cleanup
30
+ drop_copy_target
31
+ self
32
+ end
33
+
34
+ def result
35
+ @upsert.result
36
+ end
37
+
38
+ def connection
39
+ @options[:connection]
40
+ end
41
+
42
+ private
43
+
44
+ def create_copy_target
45
+ connection.exec(<<-END)
46
+ CREATE TEMP TABLE #{@target} WITHOUT OIDS ON COMMIT DROP
47
+ AS (SELECT * FROM #{@options[:table]}) WITH NO DATA;
48
+ END
49
+ self
50
+ end
51
+
52
+ def drop_copy_target
53
+ connection.exec("DROP TABLE #{@target}")
54
+ self
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,193 @@
1
+ require "janko/tagged_column"
2
+ require "janko/constants"
3
+ require "agrippa/mutable_hash"
4
+
5
+ module Janko
6
+ class ColumnList
7
+ include Enumerable
8
+
9
+ include Agrippa::MutableHash
10
+
11
+ def ColumnList.build(source)
12
+ source.is_a?(ColumnList) ? source : ColumnList.new.add(*source)
13
+ end
14
+
15
+ def default_state
16
+ { columns: {} }
17
+ end
18
+
19
+ def builder
20
+ self
21
+ end
22
+
23
+ def add(*names)
24
+ each_column(names) { |name| add_column(name) }
25
+ end
26
+
27
+ def remove(*names)
28
+ each_column(names) { |name| remove_column(name) }
29
+ end
30
+
31
+ def tag(tag, *names)
32
+ each { |_, column| column.untag(tag) }
33
+ each_column(names) { |name| add_column(name).tag(tag) }
34
+ end
35
+
36
+ def untag(tag, *names)
37
+ each_column(names) { |_, column| column and column.untag(tag) }
38
+ end
39
+
40
+ def tagged(tag = nil)
41
+ filter_columns do |name, column|
42
+ (tag.nil? and column.tagged?) or column.has_tag?(tag)
43
+ end
44
+ end
45
+
46
+ def not_tagged(tag)
47
+ filter_columns { |name, column| not column.has_tag?(tag) }
48
+ end
49
+
50
+ def none_tagged?(tag)
51
+ @state[:columns].none? { |_, column| column.has_tag?(tag) }
52
+ end
53
+
54
+ def alter(*names)
55
+ each_column(names) do |name, column|
56
+ raise("Unknown column: #{name}") unless column
57
+ yield(column)
58
+ end
59
+ end
60
+
61
+ def set(state)
62
+ chain(state)
63
+ end
64
+
65
+ def pack(values)
66
+ pack_hash(stringify_keys(values))
67
+ end
68
+
69
+ def to_list
70
+ map_and_join { |_, column| column.quoted }
71
+ end
72
+
73
+ def to_conditions(left = nil, right = nil)
74
+ map_and_join(" AND ") { |_, c| c.to_condition(left, right) }
75
+ end
76
+
77
+ def to_setters(left = nil, right = nil)
78
+ map_and_join { |_, c| c.to_setter(left, right) }
79
+ end
80
+
81
+ def to_list_with_defaults
82
+ map_and_join { |_, c| c.to_value }
83
+ end
84
+
85
+ def to_binds
86
+ map_and_join_with_index { |_, c, i| c.to_bind(i + 1) }
87
+ end
88
+
89
+ def to_typecast_binds
90
+ map_and_join_with_index { |_, c, i| c.to_typecast_bind(i + 1) }
91
+ end
92
+
93
+ def each(&block)
94
+ @state[:columns].each(&block)
95
+ end
96
+
97
+ def map(&block)
98
+ @state[:columns].map(&block)
99
+ end
100
+
101
+ def map_and_join(separator = ",", &block)
102
+ map(&block).join(separator)
103
+ end
104
+
105
+ def map_and_join_with_index(separator = ",")
106
+ output = each_with_index.map { |pair, index| yield(*pair, index) }
107
+ output.join(separator)
108
+ end
109
+
110
+ def empty?
111
+ @state[:columns].empty?
112
+ end
113
+
114
+ def columns
115
+ @state[:columns].keys
116
+ end
117
+
118
+ def connection
119
+ @state[:parent].connection
120
+ end
121
+
122
+ def table
123
+ @state[:parent].table
124
+ end
125
+
126
+ def inspect
127
+ children = @state[:columns].map { |name, column|
128
+ "#{name}(#{column.inspect})" }
129
+ "#<#{self.class}:0x#{self.__id__.to_s(16)} #{children.join(" ")}>"
130
+ end
131
+
132
+ def length
133
+ @length ||= @state[:columns].length
134
+ end
135
+
136
+ def pack_hash(values)
137
+ result = columns.map { |column| values.delete(column) }
138
+ return(result) if values.empty?
139
+ raise(ArgumentError, "Unknown columns: #{values.keys.join(" ")}")
140
+ end
141
+
142
+ private
143
+
144
+ def add_column(name)
145
+ @state[:columns][name.to_s] ||= TaggedColumn.new(name: name,
146
+ parent: self)
147
+ end
148
+
149
+ def remove_column(name)
150
+ @state[:columns].delete(name.to_s)
151
+ self
152
+ end
153
+
154
+ def filter_columns
155
+ result = {}
156
+ each { |n, c| result[n] = c if yield(n, c) }
157
+ self.class.new(@state.merge(columns: result))
158
+ end
159
+
160
+ def stringify_keys(hash)
161
+ output = {}
162
+ hash.each { |k, v| output[k.to_s] = v }
163
+ output
164
+ end
165
+
166
+ def matching_names(names)
167
+ result = []
168
+ names.flatten.each do |name|
169
+ if(name == Janko::ALL)
170
+ result.concat(connection.column_list(table))
171
+ elsif(name == Janko::DEFAULT)
172
+ result.concat(connection.column_list(table) - [ "id" ])
173
+ elsif(name.is_a?(Hash) and name.has_key?(:except))
174
+ all_columns = connection.column_list(table)
175
+ exceptions = [ name[:except] ].flatten.map(&:to_s)
176
+ result.concat(all_columns - exceptions)
177
+ elsif(name.nil? or name == "")
178
+ raise("Blank or nil column names are not allowed.")
179
+ else
180
+ result.push(name.to_s)
181
+ end
182
+ end
183
+ result
184
+ end
185
+
186
+ def each_column(names)
187
+ matching_names(names).each do |name|
188
+ yield(name, @state[:columns][name.to_s])
189
+ end
190
+ self
191
+ end
192
+ end
193
+ end
@@ -0,0 +1,107 @@
1
+ require "agrippa/maybe"
2
+ require "agrippa/delegation"
3
+
4
+ module Janko
5
+ class Connection
6
+ include Agrippa::Delegation
7
+
8
+ def Connection.build(backend)
9
+ return(default) if backend.nil?
10
+ return(backend) if backend.is_a?(Connection)
11
+ new(backend)
12
+ end
13
+
14
+ def Connection.default
15
+ if Kernel.const_defined?("ActiveRecord::Base")
16
+ new(Kernel.const_get("ActiveRecord::Base"))
17
+ else
18
+ raise("No default connection available.")
19
+ end
20
+ end
21
+
22
+ def Connection.cache_catalog
23
+ @catalog ||= yield
24
+ end
25
+
26
+ def Connection.reset_cached_catalog
27
+ @catalog = nil
28
+ self
29
+ end
30
+
31
+ attr_reader :backend
32
+
33
+ delegate *%w(exec prepare exec_prepared async_exec put_copy_data
34
+ put_copy_end get_last_result), to: "backend"
35
+
36
+ def initialize(backend)
37
+ @backend = extract_raw_connection(backend)
38
+ end
39
+
40
+ def in_transaction?
41
+ backend.transaction_status > 0
42
+ end
43
+
44
+ def failed?
45
+ backend.transaction_status >= 3
46
+ end
47
+
48
+ # http://dba.stackexchange.com/questions/22362/
49
+ # http://www.postgresql.org/docs/9.3/static/catalog-pg-attribute.html
50
+ # http://www.postgresql.org/docs/9.3/static/catalog-pg-attrdef.html
51
+ def catalog
52
+ Connection.cache_catalog do
53
+ result = backend.exec(<<-END)
54
+ SELECT relname, attname, typname, pg_get_expr(adbin, 0)
55
+ FROM pg_class
56
+ LEFT JOIN pg_namespace ON (
57
+ pg_class.relnamespace = pg_namespace.oid)
58
+ LEFT JOIN pg_attribute ON (
59
+ pg_class.oid = pg_attribute.attrelid)
60
+ LEFT JOIN pg_attrdef ON (
61
+ pg_attribute.attrelid = pg_attrdef.adrelid
62
+ AND pg_attribute.attnum = pg_attrdef.adnum)
63
+ LEFT JOIN pg_type ON (
64
+ pg_attribute.atttypid = pg_type.oid)
65
+ WHERE pg_class.relkind IN ('r','')
66
+ AND pg_namespace.nspname
67
+ NOT IN ('pg_catalog', 'pg_toast')
68
+ AND pg_table_is_visible(pg_class.oid)
69
+ AND attnum > 0
70
+ AND NOT attisdropped;
71
+ END
72
+
73
+ output = {}
74
+ result.each_row do |row|
75
+ output[row[0]] ||= {}
76
+ output[row[0]][row[1]] = { type: row[2], default: row[3] }
77
+ end
78
+ output
79
+ end
80
+ end
81
+
82
+ def column_list(table)
83
+ catalog[table].keys
84
+ end
85
+
86
+ def column_type(table, column)
87
+ catalog[table][column][:type]
88
+ end
89
+
90
+ def column_default(table, column)
91
+ catalog[table][column][:default]
92
+ end
93
+
94
+ private
95
+
96
+ def maybe(*args)
97
+ Agrippa::Maybe.new(*args)
98
+ end
99
+
100
+ def extract_raw_connection(backend)
101
+ return(backend) if backend.is_a?(PG::Connection)
102
+ maybe(backend).raw_connection._ \
103
+ or maybe(backend).connection.raw_connection._ \
104
+ or raise("Unable to extract a connection from: #{backend}")
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,13 @@
1
+ require "janko/flag"
2
+
3
+ module Janko
4
+ module Constants
5
+ NULL = nil
6
+
7
+ DEFAULT = Flag.new(2 ** 0)
8
+
9
+ ALL = Flag.new(2 ** 1)
10
+
11
+ KEEP = Flag.new(2 ** 2)
12
+ end
13
+ end
@@ -0,0 +1,36 @@
1
+ require "csv"
2
+ require "agrippa/mutable"
3
+
4
+ module Janko
5
+ class CopyImporter
6
+ include Agrippa::Mutable
7
+
8
+ state_reader :connection, :table, :columns
9
+
10
+ def start
11
+ connection.async_exec(sprintf("COPY %s(%s) FROM STDOUT CSV",
12
+ table, columns.to_list))
13
+ self
14
+ end
15
+
16
+ def push(values)
17
+ begin
18
+ line = CSV.generate_line(columns.pack(values))
19
+ connection.put_copy_data(line)
20
+ rescue
21
+ stop
22
+ raise
23
+ end
24
+ self
25
+ end
26
+
27
+ def stop
28
+ connection.put_copy_end
29
+ result = connection.get_last_result
30
+ return(self) if (result.result_status == PG::PGRES_COMMAND_OK)
31
+ return(self) if (result.result_status == PG::PGRES_COPY_IN)
32
+ raise(PG::Error, result.error_message)
33
+ self
34
+ end
35
+ end
36
+ end