activerecord-redshiftbulk-adapter 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,230 @@
1
+ module ActiverecordRedshiftbulk
2
+ class TableManager
3
+ attr_reader :default_options
4
+
5
+ DEFAULT_OPTIONS = { :exemplar_table_name => nil, :add_identity => false, :temporary => true}
6
+
7
+ def initialize(connection, default_options = {})
8
+ @connection = connection
9
+ table_name_options = {}
10
+ if default_options[:partitioned_model]
11
+ model = default_options[:partitioned_model]
12
+ default_options[:exemplar_table_name] = model.table_name
13
+ default_options[:schema_name] = model.configurator.schema_name
14
+ end
15
+
16
+ if default_options[:table_name].blank?
17
+ connection_pid = @connection.execute("select pg_backend_pid() as pid").first['pid'].to_i
18
+ table_name_options[:table_name] = "temporary_events_#{connection_pid}"
19
+ end
20
+ @default_options = DEFAULT_OPTIONS.merge(table_name_options).merge(default_options)
21
+ end
22
+
23
+ def partitioned_model
24
+ return @default_options[:partitioned_model]
25
+ end
26
+
27
+ def schema_name
28
+ return @default_options[:schema_name]
29
+ end
30
+
31
+ def exemplar_table_name
32
+ return @default_options[:exemplar_table_name]
33
+ end
34
+
35
+ def add_identity
36
+ return @default_options[:add_identity]
37
+ end
38
+
39
+ def temporary
40
+ return @default_options[:temporary]
41
+ end
42
+
43
+ def base_table_name
44
+ return @default_options[:table_name]
45
+ end
46
+
47
+ def table_name
48
+ if schema_name.blank?
49
+ return base_table_name
50
+ end
51
+ return "#{schema_name}.#{base_table_name}"
52
+ end
53
+
54
+ def drop_table
55
+ @connection.execute("drop table #{table_name}")
56
+ end
57
+
58
+ def duplicate_table(options = {})
59
+ current_options = @default_options.merge(options)
60
+ target_table_name = current_options[:table_name]
61
+ raise "target_table_name not set" if target_table_name.blank?
62
+ exemplar_table_name = current_options[:exemplar_table_name]
63
+ raise "exemplar_table_name not set" if exemplar_table_name.blank?
64
+ table_name_elements = exemplar_table_name.split('.');
65
+ if table_name_elements.length == 1
66
+ table_name_elements.unshift("public")
67
+ end
68
+ schema_name = table_name_elements[0]
69
+ parent_table_name = table_name_elements[1]
70
+
71
+ # first find the diststyle
72
+ ## namespace first
73
+ sql = "select oid from pg_namespace where nspname = '#{schema_name}' limit 1"
74
+ schema_oid = @connection.execute(sql).first['oid'].to_i
75
+
76
+ ## now the diststyle 0 = even, 1 = some column
77
+ sql = "select oid,reldiststyle from pg_class where relnamespace = #{schema_oid} and relname = '#{parent_table_name}' limit 1"
78
+ pg_class_row = @connection.execute(sql).first
79
+ reldiststyle = pg_class_row['reldiststyle'].to_i
80
+ even_diststyle = (reldiststyle == 0)
81
+ table_oid = pg_class_row['oid'].to_i
82
+
83
+ ## get unique and primary key constraints (pg_constraints)
84
+ sql = "select contype,conkey from pg_constraint where connamespace = #{schema_oid} and conrelid = #{table_oid}"
85
+ primary_key = nil
86
+ uniques = []
87
+ @connection.execute(sql).each do |row|
88
+ if row['contype'] == 'p'
89
+ # primary key
90
+ primary_key = row['conkey'][1..-2].split(',')
91
+ elsif row['contype'] == 'u'
92
+ # unique
93
+ uniques << row['conkey'][1..-2].split(',')
94
+ end
95
+ end
96
+
97
+ attnums = uniques.clone
98
+ unless primary_key.blank?
99
+ attnums << primary_key
100
+ end
101
+ attnums = attnums.flatten.uniq
102
+
103
+ column_names = {}
104
+ if attnums.length > 0
105
+ sql = "select attname,attnum from pg_attribute where attrelid = #{table_oid} and attnum in (#{attnums.join(',')})"
106
+ @connection.execute(sql).each do |row|
107
+ column_names[row['attnum']] = row['attname']
108
+ end
109
+ end
110
+
111
+ column_defaults = {}
112
+ sql = "select a.attname,d.adsrc from pg_attribute as a,pg_attrdef as d where a.attrelid = d.adrelid and d.adnum = a.attnum and a.attrelid = #{table_oid}"
113
+ @connection.execute(sql).each do |row|
114
+ column_defaults[row['attname']] = row['adsrc']
115
+ end
116
+
117
+ with_search_path([schema_name]) do
118
+ # select * from pg_table_def where tablename = 'bids' and schemaname = 'public';
119
+ ## column, type, encoding, distkey, sortkey, not null
120
+ sortkeys = []
121
+ sql_columns = []
122
+
123
+ if current_options[:add_identity]
124
+ sql_columns << "_identity bigint identity"
125
+ end
126
+
127
+ sql = "select * from pg_table_def where tablename = '#{parent_table_name}' and schemaname = '#{schema_name}'"
128
+ sql_column_rows = @connection.execute(sql)
129
+ sql_column_rows.each do |row|
130
+ column_info = []
131
+ column_name = row['column']
132
+ column_info << column_name
133
+ column_info << row['type']
134
+ if row['notnull'] == "t"
135
+ column_info << "not null"
136
+ end
137
+ if row['distkey'] == "t"
138
+ column_info << "distkey"
139
+ end
140
+ if row['encoding'] != 'none'
141
+ column_info << "encode #{row['encoding']}"
142
+ end
143
+ if row['sortkey'] != "0"
144
+ sortkeys[row['sortkey'].to_i - 1] = column_name
145
+ end
146
+ unless column_defaults[column_name].blank?
147
+ column_info << "default #{column_defaults[column_name]}"
148
+ end
149
+
150
+ sql_columns << column_info.join(" ")
151
+ end
152
+
153
+ unless primary_key.blank?
154
+ sql_columns << "primary key (#{primary_key.map{|pk| column_names[pk]}.join(',')})"
155
+ end
156
+
157
+ uniques.each do |unique|
158
+ sql_columns << "unique (#{unique.map{|uk| column_names[uk]}.join(',')})"
159
+ end
160
+
161
+ if sortkeys.blank?
162
+ sql_sortkeys = ""
163
+ else
164
+ sql_sortkeys = " sortkey (#{sortkeys.join(',')})"
165
+ end
166
+ sql = <<-SQL
167
+ create #{"temporary " if current_options[:temporary]}table #{table_name}
168
+ (
169
+ #{sql_columns.join(', ')}
170
+ ) #{"diststyle even " if even_diststyle}#{sql_sortkeys}
171
+ SQL
172
+ @connection.execute(sql)
173
+ end
174
+ end
175
+
176
+ def table_def(table_name)
177
+ table_parts = table_name.split('.')
178
+ if table_parts.length == 1
179
+ name = table_parts.first
180
+ search_path = ["public"]
181
+ else
182
+ name = table_parts.last
183
+ search_path = [table_parts.first]
184
+ end
185
+
186
+ with_search_path(search_path) do
187
+ return @connection.execute("select * from pg_table_def where tablename = '#{name}'").to_a
188
+ end
189
+ end
190
+
191
+ # search_path = array
192
+ # modes: :prefix, :suffix, :replace
193
+ def with_search_path(search_path, mode = :replace, &block)
194
+ unless search_path.is_a? Array
195
+ raise "search_path must be an Array"
196
+ end
197
+
198
+ old_search_path = get_search_path
199
+ if mode == :prefix
200
+ new_search_path = search_path + old_search_path
201
+ elsif mode == :suffix
202
+ new_search_path = old_search_path + search_path
203
+ elsif mode == :replace
204
+ new_search_path = search_path
205
+ else
206
+ raise "mode must be :prefix, :suffix, :replace"
207
+ end
208
+
209
+ set_search_path(new_search_path)
210
+ begin
211
+ yield
212
+ ensure
213
+ set_search_path(old_search_path)
214
+ end
215
+ end
216
+
217
+ def get_search_path
218
+ return @connection.execute("show search_path").to_a.first["search_path"].split(',').map{|p| p.delete('" ')}
219
+ end
220
+
221
+ def set_search_path(search_path)
222
+ unless search_path.is_a? Array
223
+ raise "search_path must be an Array"
224
+ end
225
+ quoted_search_path = search_path.map{|sp| "'#{sp}'"}.join(',')
226
+ @connection.execute("set search_path = #{quoted_search_path}")
227
+ end
228
+
229
+ end
230
+ end
@@ -0,0 +1,4 @@
1
+ require 'activerecord_redshiftbulk_adapter/version'
2
+ require 'activerecord_redshiftbulk/table_manager'
3
+ require 'monkeypatch_activerecord'
4
+ require 'monkeypatch_arel'
@@ -0,0 +1,4 @@
1
+ module ActiverecordRedshiftbulkAdapter
2
+ # the current version of this gem
3
+ VERSION = "0.0.1"
4
+ end
@@ -0,0 +1,195 @@
1
+ module ActiveRecord
2
+ module Querying
3
+ delegate :unload, :copy, :to => :scoped
4
+ end
5
+ end
6
+
7
+ module ActiveRecord::QueryMethods
8
+ module CopyUnloadParser
9
+ def self.parse_options(options, options_hash, valid_switches, valid_options, valid_unquoted_options, valid_special_options)
10
+ # credentials first
11
+ credentials = nil
12
+ if options_hash.has_key?(:credentials)
13
+ credentials = options_hash[:credentials]
14
+ else
15
+ creds = {}
16
+ creds[:aws_access_key_id] = options_hash[:aws_access_key_id] if options_hash.has_key?(:aws_access_key_id)
17
+ creds[:aws_secret_access_key] = options_hash[:aws_secret_access_key] if options_hash.has_key?(:aws_secret_access_key)
18
+ creds[:token] = options_hash[:token] if options_hash.has_key?(:token)
19
+ creds[:master_symmetric_key] = options_hash[:master_symmetric_key] if options_hash.has_key?(:master_symmetric_key)
20
+ credentials = creds.map{|k,v| "#{k}=#{v}"}.join(';')
21
+ end
22
+
23
+ option_list = []
24
+ option_list << "WITH CREDENTIALS AS #{connection.quote_value(credentials)}" unless credentials.blank?
25
+
26
+ valid_switches.each do |switch_name|
27
+ if options.include? switch_name
28
+ option_list << switch_name.to_s.upcase
29
+ end
30
+ end
31
+
32
+ valid_options.each do |option_name|
33
+ if options_hash.has_key? option_name
34
+ option_list << "#{option_name.to_s.upcase} AS #{connection.quote_value(options_hash[option_name])}"
35
+ end
36
+ end
37
+
38
+ valid_unquoted_options.each do |option_name|
39
+ if options_hash.has_key? option_name
40
+ option_list << "#{option_name.to_s.upcase} #{options_hash[option_name]}"
41
+ end
42
+ end
43
+
44
+ return credentials, option_list
45
+ end
46
+ end
47
+ end
48
+
49
+ module ActiveRecord
50
+ module QueryMethods
51
+ # UNLOAD ('select_statement')
52
+ # TO 's3_path'
53
+ # [ WITH ] CREDENTIALS [AS] 'aws_access_credentials'
54
+ # [ option [ ... ] ]
55
+ #
56
+ # where option is
57
+ #
58
+ # { DELIMITER [ AS ] 'delimiter_char'
59
+ # | FIXEDWIDTH [ AS ] 'fixedwidth_spec' }
60
+ # | ENCRYPTED
61
+ # | GZIP
62
+ # | ADDQUOTES
63
+ # | NULL [ AS ] 'null_string'
64
+ # | ESCAPE
65
+ # | ALLOWOVERWRITE
66
+ VALID_UNLOAD_SWITCHES = [
67
+ :gzip,
68
+ :addquotes,
69
+ :escape,
70
+ :allowoverwrite
71
+ ]
72
+ VALID_UNLOAD_OPTIONS = [
73
+ :delimiter,
74
+ :fixedwidth,
75
+ :null
76
+ ]
77
+ VALID_UNQUOTED_UNLOAD_OPTIONS = [ ]
78
+ VALID_SPECIAL_UNLOAD_OPTIONS = [
79
+ :credentials,
80
+ :aws_access_key_id,
81
+ :aws_secret_access_key,
82
+ :master_symmetric_key,
83
+ :token
84
+ ]
85
+
86
+ def unload(to_s3_filename, *options)
87
+ if options.last.is_a? Hash
88
+ options_hash = options.last
89
+ else
90
+ options_hash = {}
91
+ end
92
+
93
+ credentials, unload_options =
94
+ ActiveRecord::QueryMethods::CopyUnloadParser.parse_options(options, options_hash,
95
+ VALID_UNLOAD_SWITCHES, VALID_UNLOAD_OPTIONS, VALID_UNQUOTED_UNLOAD_OPTIONS, VALID_SPECIAL_UNLOAD_OPTIONS)
96
+
97
+
98
+ relation = Arel::Nodes::UnloadStatement.new(Arel::Nodes::Unload.new(Arel::Nodes::Relation.new(clone), to_s3_filename), unload_options.join(" "))
99
+ relation
100
+ end
101
+
102
+ VALID_COPY_SWITCHES = [
103
+ :encrypted,
104
+ :gzip,
105
+ :removequotes,
106
+ :explicit_ids,
107
+ :escape,
108
+ :acceptanydate,
109
+ :ignoreblanklines,
110
+ :truncatecolumns,
111
+ :fillrecord,
112
+ :trimblanks,
113
+ :noload,
114
+ :emptyasnull,
115
+ :blanksasnull,
116
+ :escape,
117
+ :roundec
118
+ ]
119
+ VALID_COPY_OPTIONS = [
120
+ :delimiter,
121
+ :fixedwidth,
122
+ :csv,
123
+ :acceptinvchars,
124
+ :dateformat,
125
+ :timeformat,
126
+ :null
127
+ ]
128
+
129
+ VALID_UNQUOTED_COPY_OPTIONS = [
130
+ :maxerror,
131
+ :ignoreheader,
132
+ :comprows,
133
+ :compupdate,
134
+ :statupdate
135
+ ]
136
+
137
+ VALID_SPECIAL_COPY_OPTIONS = [
138
+ :credentials,
139
+ :aws_access_key_id,
140
+ :aws_secret_access_key,
141
+ :master_symmetric_key,
142
+ :token
143
+ ]
144
+
145
+ # COPY table_name [ (column1 [,column2, ...]) ]
146
+ # FROM 's3://objectpath'
147
+ # [ WITH ] CREDENTIALS [AS] 'aws_access_credentials'
148
+ # [ option [ ... ] ]
149
+
150
+ # where option is
151
+
152
+ # { FIXEDWIDTH 'fixedwidth_spec'
153
+ # | [DELIMITER [ AS ] 'delimiter_char']
154
+ # [CSV [QUOTE [ AS ] 'quote_character']}
155
+
156
+ # | ENCRYPTED
157
+ # | GZIP
158
+ # | REMOVEQUOTES
159
+ # | EXPLICIT_IDS
160
+
161
+ # | ACCEPTINVCHARS [ AS ] ['replacement_char']
162
+ # | MAXERROR [ AS ] error_count
163
+ # | DATEFORMAT [ AS ] { 'dateformat_string' | 'auto' }
164
+ # | TIMEFORMAT [ AS ] { 'timeformat_string' | 'auto' | 'epochsecs' | 'epochmillisecs' }
165
+ # | IGNOREHEADER [ AS ] number_rows
166
+ # | ACCEPTANYDATE
167
+ # | IGNOREBLANKLINES
168
+ # | TRUNCATECOLUMNS
169
+ # | FILLRECORD
170
+ # | TRIMBLANKS
171
+ # | NOLOAD
172
+ # | NULL [ AS ] 'null_string'
173
+ # | EMPTYASNULL
174
+ # | BLANKSASNULL
175
+ # | COMPROWS numrows
176
+ # | COMPUPDATE [ { ON | TRUE } | { OFF | FALSE } ]
177
+ # | STATUPDATE [ { ON | TRUE } | { OFF | FALSE } ]
178
+ # | ESCAPE
179
+ # | ROUNDEC
180
+ def copy(to_s3_filename, *options)
181
+ if options.last.is_a? Hash
182
+ options_hash = options.last
183
+ else
184
+ options_hash = {}
185
+ end
186
+
187
+ credentials, copy_options =
188
+ ::ActiveRecord::QueryMethods::CopyUnloadParser.parse_options(options, options_hash,
189
+ VALID_COPY_SWITCHES, VALID_COPY_OPTIONS, VALID_UNQUOTED_COPY_OPTIONS, VALID_SPECIAL_COPY_OPTIONS)
190
+
191
+
192
+ conncection.execute(Arel::Nodes::CopyStatement.new(Arel::Nodes::Copy.new(table_name, to_s3_filename), copy_options.join(" ")).to_sql)
193
+ end
194
+ end
195
+ end
@@ -0,0 +1,96 @@
1
+ module Arel
2
+ module Nodes
3
+ class Relation < Arel::Nodes::Unary
4
+ end
5
+
6
+ class Unload < Arel::Nodes::Binary
7
+ alias :statement :left
8
+ alias :statement= :left=
9
+ alias :to :right
10
+ alias :to= :right=
11
+ def initialize statement = nil, to = nil
12
+ super
13
+ end
14
+
15
+ def initialize_copy other
16
+ super
17
+ @right = @right.clone
18
+ end
19
+ end
20
+
21
+ class UnloadStatement < Arel::Nodes::Binary
22
+ alias :relation :left
23
+ alias :relation= :left=
24
+ alias :options :right
25
+ alias :options= :right=
26
+
27
+ def initialize relation = nil, options = []
28
+ super
29
+ end
30
+
31
+ def initialize_copy other
32
+ super
33
+ @right = @right.clone
34
+ end
35
+ end
36
+
37
+ class Copy < Arel::Nodes::Binary
38
+ alias :statement :left
39
+ alias :statement= :left=
40
+ alias :from :right
41
+ alias :from= :right=
42
+ def initialize statement = nil, from = nil
43
+ super
44
+ end
45
+
46
+ def initialize_copy other
47
+ super
48
+ @right = @right.clone
49
+ end
50
+ end
51
+
52
+ class CopyStatement < Arel::Nodes::Binary
53
+ alias :relation :left
54
+ alias :relation= :left=
55
+ alias :options :right
56
+ alias :options= :right=
57
+
58
+ def initialize relation = nil, options = []
59
+ super
60
+ end
61
+
62
+ def initialize_copy other
63
+ super
64
+ @right = @right.clone
65
+ end
66
+ end
67
+
68
+ end
69
+ end
70
+
71
+ module Arel
72
+ module Visitors
73
+ class ToSql < Arel::Visitors::Visitor
74
+
75
+ def visit_Arel_Nodes_UnloadStatement o
76
+ "#{visit o.relation} #{o.options}"
77
+ end
78
+
79
+ def visit_Arel_Nodes_Unload o
80
+ "UNLOAD (#{visit o.statement}) TO #{visit o.to}"
81
+ end
82
+
83
+ def visit_Arel_Nodes_CopyStatement o
84
+ "#{visit o.relation} #{o.options}"
85
+ end
86
+
87
+ def visit_Arel_Nodes_Copy o
88
+ "COPY #{o.statement} FROM #{visit o.from}"
89
+ end
90
+
91
+ def visit_Arel_Nodes_Relation o
92
+ visit o.expr.to_sql
93
+ end
94
+ end
95
+ end
96
+ end