activerecord-redshiftbulk-adapter 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,230 @@
1
+ module ActiverecordRedshiftbulk
2
+ class TableManager
3
+ attr_reader :default_options
4
+
5
+ DEFAULT_OPTIONS = { :exemplar_table_name => nil, :add_identity => false, :temporary => true}
6
+
7
+ def initialize(connection, default_options = {})
8
+ @connection = connection
9
+ table_name_options = {}
10
+ if default_options[:partitioned_model]
11
+ model = default_options[:partitioned_model]
12
+ default_options[:exemplar_table_name] = model.table_name
13
+ default_options[:schema_name] = model.configurator.schema_name
14
+ end
15
+
16
+ if default_options[:table_name].blank?
17
+ connection_pid = @connection.execute("select pg_backend_pid() as pid").first['pid'].to_i
18
+ table_name_options[:table_name] = "temporary_events_#{connection_pid}"
19
+ end
20
+ @default_options = DEFAULT_OPTIONS.merge(table_name_options).merge(default_options)
21
+ end
22
+
23
+ def partitioned_model
24
+ return @default_options[:partitioned_model]
25
+ end
26
+
27
+ def schema_name
28
+ return @default_options[:schema_name]
29
+ end
30
+
31
+ def exemplar_table_name
32
+ return @default_options[:exemplar_table_name]
33
+ end
34
+
35
+ def add_identity
36
+ return @default_options[:add_identity]
37
+ end
38
+
39
+ def temporary
40
+ return @default_options[:temporary]
41
+ end
42
+
43
+ def base_table_name
44
+ return @default_options[:table_name]
45
+ end
46
+
47
+ def table_name
48
+ if schema_name.blank?
49
+ return base_table_name
50
+ end
51
+ return "#{schema_name}.#{base_table_name}"
52
+ end
53
+
54
+ def drop_table
55
+ @connection.execute("drop table #{table_name}")
56
+ end
57
+
58
+ def duplicate_table(options = {})
59
+ current_options = @default_options.merge(options)
60
+ target_table_name = current_options[:table_name]
61
+ raise "target_table_name not set" if target_table_name.blank?
62
+ exemplar_table_name = current_options[:exemplar_table_name]
63
+ raise "exemplar_table_name not set" if exemplar_table_name.blank?
64
+ table_name_elements = exemplar_table_name.split('.');
65
+ if table_name_elements.length == 1
66
+ table_name_elements.unshift("public")
67
+ end
68
+ schema_name = table_name_elements[0]
69
+ parent_table_name = table_name_elements[1]
70
+
71
+ # first find the diststyle
72
+ ## namespace first
73
+ sql = "select oid from pg_namespace where nspname = '#{schema_name}' limit 1"
74
+ schema_oid = @connection.execute(sql).first['oid'].to_i
75
+
76
+ ## now the diststyle 0 = even, 1 = some column
77
+ sql = "select oid,reldiststyle from pg_class where relnamespace = #{schema_oid} and relname = '#{parent_table_name}' limit 1"
78
+ pg_class_row = @connection.execute(sql).first
79
+ reldiststyle = pg_class_row['reldiststyle'].to_i
80
+ even_diststyle = (reldiststyle == 0)
81
+ table_oid = pg_class_row['oid'].to_i
82
+
83
+ ## get unique and primary key constraints (pg_constraints)
84
+ sql = "select contype,conkey from pg_constraint where connamespace = #{schema_oid} and conrelid = #{table_oid}"
85
+ primary_key = nil
86
+ uniques = []
87
+ @connection.execute(sql).each do |row|
88
+ if row['contype'] == 'p'
89
+ # primary key
90
+ primary_key = row['conkey'][1..-2].split(',')
91
+ elsif row['contype'] == 'u'
92
+ # unique
93
+ uniques << row['conkey'][1..-2].split(',')
94
+ end
95
+ end
96
+
97
+ attnums = uniques.clone
98
+ unless primary_key.blank?
99
+ attnums << primary_key
100
+ end
101
+ attnums = attnums.flatten.uniq
102
+
103
+ column_names = {}
104
+ if attnums.length > 0
105
+ sql = "select attname,attnum from pg_attribute where attrelid = #{table_oid} and attnum in (#{attnums.join(',')})"
106
+ @connection.execute(sql).each do |row|
107
+ column_names[row['attnum']] = row['attname']
108
+ end
109
+ end
110
+
111
+ column_defaults = {}
112
+ sql = "select a.attname,d.adsrc from pg_attribute as a,pg_attrdef as d where a.attrelid = d.adrelid and d.adnum = a.attnum and a.attrelid = #{table_oid}"
113
+ @connection.execute(sql).each do |row|
114
+ column_defaults[row['attname']] = row['adsrc']
115
+ end
116
+
117
+ with_search_path([schema_name]) do
118
+ # select * from pg_table_def where tablename = 'bids' and schemaname = 'public';
119
+ ## column, type, encoding, distkey, sortkey, not null
120
+ sortkeys = []
121
+ sql_columns = []
122
+
123
+ if current_options[:add_identity]
124
+ sql_columns << "_identity bigint identity"
125
+ end
126
+
127
+ sql = "select * from pg_table_def where tablename = '#{parent_table_name}' and schemaname = '#{schema_name}'"
128
+ sql_column_rows = @connection.execute(sql)
129
+ sql_column_rows.each do |row|
130
+ column_info = []
131
+ column_name = row['column']
132
+ column_info << column_name
133
+ column_info << row['type']
134
+ if row['notnull'] == "t"
135
+ column_info << "not null"
136
+ end
137
+ if row['distkey'] == "t"
138
+ column_info << "distkey"
139
+ end
140
+ if row['encoding'] != 'none'
141
+ column_info << "encode #{row['encoding']}"
142
+ end
143
+ if row['sortkey'] != "0"
144
+ sortkeys[row['sortkey'].to_i - 1] = column_name
145
+ end
146
+ unless column_defaults[column_name].blank?
147
+ column_info << "default #{column_defaults[column_name]}"
148
+ end
149
+
150
+ sql_columns << column_info.join(" ")
151
+ end
152
+
153
+ unless primary_key.blank?
154
+ sql_columns << "primary key (#{primary_key.map{|pk| column_names[pk]}.join(',')})"
155
+ end
156
+
157
+ uniques.each do |unique|
158
+ sql_columns << "unique (#{unique.map{|uk| column_names[uk]}.join(',')})"
159
+ end
160
+
161
+ if sortkeys.blank?
162
+ sql_sortkeys = ""
163
+ else
164
+ sql_sortkeys = " sortkey (#{sortkeys.join(',')})"
165
+ end
166
+ sql = <<-SQL
167
+ create #{"temporary " if current_options[:temporary]}table #{table_name}
168
+ (
169
+ #{sql_columns.join(', ')}
170
+ ) #{"diststyle even " if even_diststyle}#{sql_sortkeys}
171
+ SQL
172
+ @connection.execute(sql)
173
+ end
174
+ end
175
+
176
+ def table_def(table_name)
177
+ table_parts = table_name.split('.')
178
+ if table_parts.length == 1
179
+ name = table_parts.first
180
+ search_path = ["public"]
181
+ else
182
+ name = table_parts.last
183
+ search_path = [table_parts.first]
184
+ end
185
+
186
+ with_search_path(search_path) do
187
+ return @connection.execute("select * from pg_table_def where tablename = '#{name}'").to_a
188
+ end
189
+ end
190
+
191
+ # search_path = array
192
+ # modes: :prefix, :suffix, :replace
193
+ def with_search_path(search_path, mode = :replace, &block)
194
+ unless search_path.is_a? Array
195
+ raise "search_path must be an Array"
196
+ end
197
+
198
+ old_search_path = get_search_path
199
+ if mode == :prefix
200
+ new_search_path = search_path + old_search_path
201
+ elsif mode == :suffix
202
+ new_search_path = old_search_path + search_path
203
+ elsif mode == :replace
204
+ new_search_path = search_path
205
+ else
206
+ raise "mode must be :prefix, :suffix, :replace"
207
+ end
208
+
209
+ set_search_path(new_search_path)
210
+ begin
211
+ yield
212
+ ensure
213
+ set_search_path(old_search_path)
214
+ end
215
+ end
216
+
217
+ def get_search_path
218
+ return @connection.execute("show search_path").to_a.first["search_path"].split(',').map{|p| p.delete('" ')}
219
+ end
220
+
221
+ def set_search_path(search_path)
222
+ unless search_path.is_a? Array
223
+ raise "search_path must be an Array"
224
+ end
225
+ quoted_search_path = search_path.map{|sp| "'#{sp}'"}.join(',')
226
+ @connection.execute("set search_path = #{quoted_search_path}")
227
+ end
228
+
229
+ end
230
+ end
@@ -0,0 +1,4 @@
1
+ require 'activerecord_redshiftbulk_adapter/version'
2
+ require 'activerecord_redshiftbulk/table_manager'
3
+ require 'monkeypatch_activerecord'
4
+ require 'monkeypatch_arel'
@@ -0,0 +1,4 @@
1
+ module ActiverecordRedshiftbulkAdapter
2
+ # the current version of this gem
3
+ VERSION = "0.0.1"
4
+ end
@@ -0,0 +1,195 @@
1
+ module ActiveRecord
2
+ module Querying
3
+ delegate :unload, :copy, :to => :scoped
4
+ end
5
+ end
6
+
7
+ module ActiveRecord::QueryMethods
8
+ module CopyUnloadParser
9
+ def self.parse_options(options, options_hash, valid_switches, valid_options, valid_unquoted_options, valid_special_options)
10
+ # credentials first
11
+ credentials = nil
12
+ if options_hash.has_key?(:credentials)
13
+ credentials = options_hash[:credentials]
14
+ else
15
+ creds = {}
16
+ creds[:aws_access_key_id] = options_hash[:aws_access_key_id] if options_hash.has_key?(:aws_access_key_id)
17
+ creds[:aws_secret_access_key] = options_hash[:aws_secret_access_key] if options_hash.has_key?(:aws_secret_access_key)
18
+ creds[:token] = options_hash[:token] if options_hash.has_key?(:token)
19
+ creds[:master_symmetric_key] = options_hash[:master_symmetric_key] if options_hash.has_key?(:master_symmetric_key)
20
+ credentials = creds.map{|k,v| "#{k}=#{v}"}.join(';')
21
+ end
22
+
23
+ option_list = []
24
+ option_list << "WITH CREDENTIALS AS #{connection.quote_value(credentials)}" unless credentials.blank?
25
+
26
+ valid_switches.each do |switch_name|
27
+ if options.include? switch_name
28
+ option_list << switch_name.to_s.upcase
29
+ end
30
+ end
31
+
32
+ valid_options.each do |option_name|
33
+ if options_hash.has_key? option_name
34
+ option_list << "#{option_name.to_s.upcase} AS #{connection.quote_value(options_hash[option_name])}"
35
+ end
36
+ end
37
+
38
+ valid_unquoted_options.each do |option_name|
39
+ if options_hash.has_key? option_name
40
+ option_list << "#{option_name.to_s.upcase} #{options_hash[option_name]}"
41
+ end
42
+ end
43
+
44
+ return credentials, option_list
45
+ end
46
+ end
47
+ end
48
+
49
+ module ActiveRecord
50
+ module QueryMethods
51
+ # UNLOAD ('select_statement')
52
+ # TO 's3_path'
53
+ # [ WITH ] CREDENTIALS [AS] 'aws_access_credentials'
54
+ # [ option [ ... ] ]
55
+ #
56
+ # where option is
57
+ #
58
+ # { DELIMITER [ AS ] 'delimiter_char'
59
+ # | FIXEDWIDTH [ AS ] 'fixedwidth_spec' }
60
+ # | ENCRYPTED
61
+ # | GZIP
62
+ # | ADDQUOTES
63
+ # | NULL [ AS ] 'null_string'
64
+ # | ESCAPE
65
+ # | ALLOWOVERWRITE
66
+ VALID_UNLOAD_SWITCHES = [
67
+ :gzip,
68
+ :addquotes,
69
+ :escape,
70
+ :allowoverwrite
71
+ ]
72
+ VALID_UNLOAD_OPTIONS = [
73
+ :delimiter,
74
+ :fixedwidth,
75
+ :null
76
+ ]
77
+ VALID_UNQUOTED_UNLOAD_OPTIONS = [ ]
78
+ VALID_SPECIAL_UNLOAD_OPTIONS = [
79
+ :credentials,
80
+ :aws_access_key_id,
81
+ :aws_secret_access_key,
82
+ :master_symmetric_key,
83
+ :token
84
+ ]
85
+
86
+ def unload(to_s3_filename, *options)
87
+ if options.last.is_a? Hash
88
+ options_hash = options.last
89
+ else
90
+ options_hash = {}
91
+ end
92
+
93
+ credentials, unload_options =
94
+ ActiveRecord::QueryMethods::CopyUnloadParser.parse_options(options, options_hash,
95
+ VALID_UNLOAD_SWITCHES, VALID_UNLOAD_OPTIONS, VALID_UNQUOTED_UNLOAD_OPTIONS, VALID_SPECIAL_UNLOAD_OPTIONS)
96
+
97
+
98
+ relation = Arel::Nodes::UnloadStatement.new(Arel::Nodes::Unload.new(Arel::Nodes::Relation.new(clone), to_s3_filename), unload_options.join(" "))
99
+ relation
100
+ end
101
+
102
+ VALID_COPY_SWITCHES = [
103
+ :encrypted,
104
+ :gzip,
105
+ :removequotes,
106
+ :explicit_ids,
107
+ :escape,
108
+ :acceptanydate,
109
+ :ignoreblanklines,
110
+ :truncatecolumns,
111
+ :fillrecord,
112
+ :trimblanks,
113
+ :noload,
114
+ :emptyasnull,
115
+ :blanksasnull,
116
+ :escape,
117
+ :roundec
118
+ ]
119
+ VALID_COPY_OPTIONS = [
120
+ :delimiter,
121
+ :fixedwidth,
122
+ :csv,
123
+ :acceptinvchars,
124
+ :dateformat,
125
+ :timeformat,
126
+ :null
127
+ ]
128
+
129
+ VALID_UNQUOTED_COPY_OPTIONS = [
130
+ :maxerror,
131
+ :ignoreheader,
132
+ :comprows,
133
+ :compupdate,
134
+ :statupdate
135
+ ]
136
+
137
+ VALID_SPECIAL_COPY_OPTIONS = [
138
+ :credentials,
139
+ :aws_access_key_id,
140
+ :aws_secret_access_key,
141
+ :master_symmetric_key,
142
+ :token
143
+ ]
144
+
145
+ # COPY table_name [ (column1 [,column2, ...]) ]
146
+ # FROM 's3://objectpath'
147
+ # [ WITH ] CREDENTIALS [AS] 'aws_access_credentials'
148
+ # [ option [ ... ] ]
149
+
150
+ # where option is
151
+
152
+ # { FIXEDWIDTH 'fixedwidth_spec'
153
+ # | [DELIMITER [ AS ] 'delimiter_char']
154
+ # [CSV [QUOTE [ AS ] 'quote_character']}
155
+
156
+ # | ENCRYPTED
157
+ # | GZIP
158
+ # | REMOVEQUOTES
159
+ # | EXPLICIT_IDS
160
+
161
+ # | ACCEPTINVCHARS [ AS ] ['replacement_char']
162
+ # | MAXERROR [ AS ] error_count
163
+ # | DATEFORMAT [ AS ] { 'dateformat_string' | 'auto' }
164
+ # | TIMEFORMAT [ AS ] { 'timeformat_string' | 'auto' | 'epochsecs' | 'epochmillisecs' }
165
+ # | IGNOREHEADER [ AS ] number_rows
166
+ # | ACCEPTANYDATE
167
+ # | IGNOREBLANKLINES
168
+ # | TRUNCATECOLUMNS
169
+ # | FILLRECORD
170
+ # | TRIMBLANKS
171
+ # | NOLOAD
172
+ # | NULL [ AS ] 'null_string'
173
+ # | EMPTYASNULL
174
+ # | BLANKSASNULL
175
+ # | COMPROWS numrows
176
+ # | COMPUPDATE [ { ON | TRUE } | { OFF | FALSE } ]
177
+ # | STATUPDATE [ { ON | TRUE } | { OFF | FALSE } ]
178
+ # | ESCAPE
179
+ # | ROUNDEC
180
+ def copy(to_s3_filename, *options)
181
+ if options.last.is_a? Hash
182
+ options_hash = options.last
183
+ else
184
+ options_hash = {}
185
+ end
186
+
187
+ credentials, copy_options =
188
+ ::ActiveRecord::QueryMethods::CopyUnloadParser.parse_options(options, options_hash,
189
+ VALID_COPY_SWITCHES, VALID_COPY_OPTIONS, VALID_UNQUOTED_COPY_OPTIONS, VALID_SPECIAL_COPY_OPTIONS)
190
+
191
+
192
+ conncection.execute(Arel::Nodes::CopyStatement.new(Arel::Nodes::Copy.new(table_name, to_s3_filename), copy_options.join(" ")).to_sql)
193
+ end
194
+ end
195
+ end
@@ -0,0 +1,96 @@
1
+ module Arel
2
+ module Nodes
3
+ class Relation < Arel::Nodes::Unary
4
+ end
5
+
6
+ class Unload < Arel::Nodes::Binary
7
+ alias :statement :left
8
+ alias :statement= :left=
9
+ alias :to :right
10
+ alias :to= :right=
11
+ def initialize statement = nil, to = nil
12
+ super
13
+ end
14
+
15
+ def initialize_copy other
16
+ super
17
+ @right = @right.clone
18
+ end
19
+ end
20
+
21
+ class UnloadStatement < Arel::Nodes::Binary
22
+ alias :relation :left
23
+ alias :relation= :left=
24
+ alias :options :right
25
+ alias :options= :right=
26
+
27
+ def initialize relation = nil, options = []
28
+ super
29
+ end
30
+
31
+ def initialize_copy other
32
+ super
33
+ @right = @right.clone
34
+ end
35
+ end
36
+
37
+ class Copy < Arel::Nodes::Binary
38
+ alias :statement :left
39
+ alias :statement= :left=
40
+ alias :from :right
41
+ alias :from= :right=
42
+ def initialize statement = nil, from = nil
43
+ super
44
+ end
45
+
46
+ def initialize_copy other
47
+ super
48
+ @right = @right.clone
49
+ end
50
+ end
51
+
52
+ class CopyStatement < Arel::Nodes::Binary
53
+ alias :relation :left
54
+ alias :relation= :left=
55
+ alias :options :right
56
+ alias :options= :right=
57
+
58
+ def initialize relation = nil, options = []
59
+ super
60
+ end
61
+
62
+ def initialize_copy other
63
+ super
64
+ @right = @right.clone
65
+ end
66
+ end
67
+
68
+ end
69
+ end
70
+
71
+ module Arel
72
+ module Visitors
73
+ class ToSql < Arel::Visitors::Visitor
74
+
75
+ def visit_Arel_Nodes_UnloadStatement o
76
+ "#{visit o.relation} #{o.options}"
77
+ end
78
+
79
+ def visit_Arel_Nodes_Unload o
80
+ "UNLOAD (#{visit o.statement}) TO #{visit o.to}"
81
+ end
82
+
83
+ def visit_Arel_Nodes_CopyStatement o
84
+ "#{visit o.relation} #{o.options}"
85
+ end
86
+
87
+ def visit_Arel_Nodes_Copy o
88
+ "COPY #{o.statement} FROM #{visit o.from}"
89
+ end
90
+
91
+ def visit_Arel_Nodes_Relation o
92
+ visit o.expr.to_sql
93
+ end
94
+ end
95
+ end
96
+ end