jredshift 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8bd2e7cbd469a48f42d1182e83af7391620ad338
4
+ data.tar.gz: 411d575cae9571cd92df1d3bc015372b0cdee43e
5
+ SHA512:
6
+ metadata.gz: 71e88500c05c1517ed3f6cee631daae94c28f1e7001ff4d1bf6a29308acaf2d632a1ad0246f21da0e8b33a37c8761ff5d9a4a3581deb6a0baeab2dc9f1b91a5f
7
+ data.tar.gz: ceeab03d691c5d93453aa0b484a0b5638a4cc50def92c3df780532896b883fad37db34501b155cf026b0f47dbebbb8fb24a6cab3fab2cb688c0a6545fd7f9f66
data/.gitignore ADDED
@@ -0,0 +1,22 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.bundle
19
+ *.so
20
+ *.o
21
+ *.a
22
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in jredshift.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,24 @@
1
+ This is free and unencumbered software released into the public domain.
2
+
3
+ Anyone is free to copy, modify, publish, use, compile, sell, or
4
+ distribute this software, either in source code form or as a compiled
5
+ binary, for any purpose, commercial or non-commercial, and by any
6
+ means.
7
+
8
+ In jurisdictions that recognize copyright laws, the author or authors
9
+ of this software dedicate any and all copyright interest in the
10
+ software to the public domain. We make this dedication for the benefit
11
+ of the public at large and to the detriment of our heirs and
12
+ successors. We intend this dedication to be an overt act of
13
+ relinquishment in perpetuity of all present and future rights to this
14
+ software under copyright law.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
23
+
24
+ For more information, please refer to <http://unlicense.org/>
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Jredshift
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'jredshift'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install jredshift
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it ( https://github.com/[my-github-username]/jredshift/fork )
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
data/jredshift.gemspec ADDED
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'jredshift/version'
5
+
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'jredshift'
9
+ spec.version = Jredshift::VERSION
10
+ spec.authors = ['Dean Morin']
11
+ spec.email = ['morin.dean@gmail.com']
12
+ spec.summary = 'Redshift wrapper for JRuby.'
13
+ spec.description = <<-EOS
14
+ Convience wrapper for using Redshift with JRuby.
15
+ EOS
16
+ spec.homepage = 'http://github.com/deanmorin/jredshift'
17
+ spec.license = 'The Unlicense'
18
+
19
+ spec.files = `git ls-files -z`.split("\x0")
20
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
21
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
22
+ spec.require_paths = ['lib']
23
+
24
+ spec.add_dependency 'jdbc-postgres', '~> 9.3'
25
+ spec.add_development_dependency 'bundler', '~> 1.6'
26
+ spec.add_development_dependency 'rake', '~> 0'
27
+ end
@@ -0,0 +1,85 @@
1
+ require 'bigdecimal'
2
+
3
+
4
+ module Jredshift
5
+ class ArrayOfHashesResult
6
+ attr_reader :result
7
+
8
+ # Defined at http://docs.oracle.com/javase/6/docs/api/java/sql/Types.html
9
+ COL_TYPE_BIGINT = -5
10
+ COL_TYPE_BIT = -7
11
+ COL_TYPE_CHAR = 1
12
+ COL_TYPE_DATE = 41
13
+ COL_TYPE_DECIMAL = 3
14
+ COL_TYPE_DOUBLE = 8
15
+ COL_TYPE_FLOAT = 6
16
+ COL_TYPE_INTEGER = 4
17
+ COL_TYPE_NCHAR = -15
18
+ COL_TYPE_NUMERIC = 2
19
+ COL_TYPE_NVARCHAR = -9
20
+ COL_TYPE_SMALLINT = 5
21
+ COL_TYPE_TIME = 92
22
+ COL_TYPE_TIMESTAMP = 93
23
+ COL_TYPE_TINYINT = -6
24
+ COL_TYPE_VARCHAR = 12
25
+
26
+
27
+ def initialize(java_result_set)
28
+ @java_result_set = java_result_set
29
+ @result = result_set_to_array_of_hashes
30
+ end
31
+
32
+ private
33
+
34
+ def result_set_to_array_of_hashes
35
+ meta = @java_result_set.meta_data
36
+ rows = []
37
+
38
+ while @java_result_set.next
39
+ row = {}
40
+
41
+ (1..meta.column_count).each do |i|
42
+ name = meta.column_name(i)
43
+ row[name] = get_field_by_type(meta, i)
44
+ end
45
+
46
+ rows << row
47
+ end
48
+ rows
49
+ end
50
+
51
+ def get_field_by_type(meta, i)
52
+ return nil if @java_result_set.get_string(i).nil?
53
+
54
+ case meta.column_type(i)
55
+
56
+ when COL_TYPE_TINYINT, COL_TYPE_SMALLINT, COL_TYPE_INTEGER
57
+ @java_result_set.get_int(i).to_i
58
+ when COL_TYPE_BIGINT
59
+ @java_result_set.get_long(i).to_i
60
+ when COL_TYPE_DATE
61
+ @java_result_set.get_date(i)
62
+ when COL_TYPE_TIME
63
+ @java_result_set.get_time(i).to_i
64
+ when COL_TYPE_TIMESTAMP
65
+ datetime_str = @java_result_set.get_timestamp(i).to_s
66
+ DateTime.strptime(datetime_str, '%Y-%m-%d %H:%M:%S')
67
+ when COL_TYPE_NUMERIC, COL_TYPE_DECIMAL, COL_TYPE_FLOAT, COL_TYPE_DOUBLE
68
+ BigDecimal.new(@java_result_set.get_string(i).to_s)
69
+ when COL_TYPE_CHAR, COL_TYPE_NCHAR, COL_TYPE_NVARCHAR, COL_TYPE_VARCHAR
70
+ @java_result_set.get_string(i).to_s
71
+ when COL_TYPE_BIT
72
+ str = @java_result_set.get_string(i)
73
+ if str == 't'
74
+ true
75
+ elsif str == 'f'
76
+ false
77
+ else
78
+ str
79
+ end
80
+ else
81
+ @java_result_set.get_string(i).to_s
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,192 @@
1
+ require 'java'
2
+ require 'jdbc/postgres'
3
+ require 'jredshift/array_of_hashes_result'
4
+ require 'jredshift/logger'
5
+
6
+
7
+ module Jredshift
8
+ class JdbcDb
9
+ attr_reader :conn
10
+
11
+ # Java uses an int for the updateCount field in ResultHandler. If the
12
+ # update count overflows an int, you'll get this error message (but the
13
+ # records will be updated/inserted correctly).
14
+ ERROR_UPDATE_COUNT_OVERFLOW_REGEX =
15
+ /Unable to interpret the update count in command completion tag: /i
16
+
17
+
18
+ def initialize(jdbc_url, user, password, options={})
19
+ @abort_on_error = options.fetch(:abort_on_error, true)
20
+ @quiet = options.fetch(:quiet, false)
21
+ @jdbc_url = jdbc_url
22
+ @user = user
23
+ @password = password
24
+ @error_occurred = false
25
+ @retry_count = 0
26
+ init_connection
27
+
28
+ secrets = options.fetch(:secrets, [])
29
+ @logger = Logger.new(:secrets => secrets)
30
+ end
31
+
32
+ def execute(sql, options={})
33
+ errors_to_ignore = options.fetch(:errors_to_ignore, [])
34
+ quiet = options.fetch(:quiet, @quiet)
35
+
36
+ log "\n#{sql}\n" unless quiet
37
+
38
+ affected_rows = with_error_handling(:errors_to_ignore => errors_to_ignore) do
39
+ @statement.execute_update(sql)
40
+ end
41
+
42
+ unless transaction_statement?(sql) || quiet || affected_rows.nil?
43
+ log "Affected #{affected_rows} row(s)."
44
+ end
45
+
46
+ affected_rows
47
+ end
48
+
49
+ # Returns an empty array for no results, or nil if an error occurred and
50
+ # @abort_on_error = false
51
+ def query(sql, options={})
52
+ quiet = options.fetch(:quiet, @quiet)
53
+
54
+ log "\n#{sql}\n" unless quiet
55
+ result_set = with_error_handling { @statement.execute_query(sql) }
56
+ result_set ? ArrayOfHashesResult.new(result_set).result : nil
57
+ end
58
+
59
+ # Returns a ResultSet (instead of an Array of Hashes). Requires caller to
60
+ # close ResultSet and set autocommit back to true
61
+ def big_query(sql, options={})
62
+ quiet = options.fetch(:quiet, @quiet)
63
+ fetch_size = options.fetch(:fetch_size, 100000)
64
+
65
+ @conn.set_auto_commit(false);
66
+ stmt = @conn.create_statement
67
+ stmt.set_fetch_size(fetch_size)
68
+ stmt.execute_query(sql)
69
+
70
+ log "\n#{sql}\n" unless quiet
71
+ with_error_handling { stmt.execute_query(sql) }
72
+ end
73
+
74
+ def execute_script(filename, options={})
75
+ File.open(filename) do |fh|
76
+ sql = fh.read
77
+ sql = remove_comments(sql)
78
+ sql = substitute_variables(sql)
79
+ execute_each_statement(sql, options)
80
+ end
81
+ end
82
+
83
+ def error_occurred?
84
+ @error_occurred
85
+ end
86
+
87
+ def clear_error_state
88
+ @error_occurred = false
89
+ end
90
+
91
+ private
92
+
93
+ def init_connection
94
+ Jdbc::Postgres.load_driver
95
+ with_error_handling do
96
+ @conn = java.sql::DriverManager.get_connection(@jdbc_url, @user, @password)
97
+ end
98
+ @statement = @conn.create_statement
99
+ end
100
+
101
+ # errors_to_ignore should only be used where no return value is expected
102
+ def with_error_handling(options={})
103
+ errors_to_ignore = options.fetch(:errors_to_ignore, [])
104
+ return_value = yield
105
+ @retry_count = 0
106
+ return_value
107
+
108
+ rescue Exception => e
109
+ if errors_to_ignore.include?(e.message)
110
+ @retry_count = 0
111
+
112
+ elsif interpretable_error?(e.message)
113
+ @retry_count = 0
114
+ interpret_error(e.message)
115
+
116
+ elsif recoverable_error?(e.message) && @retry_count < 3
117
+ @retry_count += 1
118
+ sleep_time = sleep_time_for_error(e.message)
119
+ log "Failed with recoverable error: #{e.message}"
120
+ log "Retry attempt #{@retry_count} will occur after #{sleep_time} seconds"
121
+ sleep sleep_time
122
+ retry
123
+
124
+ else
125
+ @retry_count = 0
126
+ @error_occurred = true
127
+ log e.class.to_s
128
+ log e.message
129
+ log e.backtrace.join("\n") unless e.class == sql_exception_class
130
+
131
+ raise e if @abort_on_error
132
+ end
133
+ end
134
+
135
+ def interpretable_error?(err_msg)
136
+ err_msg =~ ERROR_UPDATE_COUNT_OVERFLOW_REGEX
137
+ end
138
+
139
+ def interpret_error(err_msg)
140
+ if err_msg =~ ERROR_UPDATE_COUNT_OVERFLOW_REGEX
141
+ get_update_count_from_error(err_msg)
142
+ else
143
+ fail ArgumentError, 'Unexpected interpretable_error'
144
+ end
145
+ end
146
+
147
+ def get_update_count_from_error(err_msg)
148
+ err_msg.split(' ').last.split('.').first
149
+ end
150
+
151
+ def recoverable_error?(err_msg)
152
+ false
153
+ end
154
+
155
+ def sleep_time_for_error(err_msg)
156
+ 60
157
+ end
158
+
159
+ def sql_exception_class
160
+ fail NotImplementedError, 'sql_exception_class'
161
+ end
162
+
163
+ # TODO add other transaction keywords
164
+ def transaction_statement?(sql)
165
+ sql =~ /begin\s*;?/i || sql =~ /end\s*;?/i
166
+ end
167
+
168
+ def remove_comments(sql)
169
+ lines = sql.split("\n")
170
+ stripped_lines = lines.reject {|line| starts_with_double_dash?(line) }
171
+ stripped_lines.join("\n")
172
+ end
173
+
174
+ def starts_with_double_dash?(line)
175
+ line =~ /\A\s*--/
176
+ end
177
+
178
+ def substitute_variables(sql)
179
+ sql
180
+ end
181
+
182
+ def execute_each_statement(sql, options={})
183
+ sql.split(/;\s*$/).each do |statement|
184
+ execute("#{statement};", options)
185
+ end
186
+ end
187
+
188
+ def log(msg)
189
+ @logger.log(msg)
190
+ end
191
+ end
192
+ end
@@ -0,0 +1,32 @@
1
+ require 'singleton'
2
+ require 'time'
3
+ require 'copperlight/time_util'
4
+
5
+
6
+ module Jredshift
7
+ class Logger
8
+
9
+ def initialize(options={})
10
+ @secrets = options.fetch(:secrets, [])
11
+ @secrets << ENV['AWS_SECRET_ACCESS_KEY'] if ENV['AWS_SECRET_ACCESS_KEY']
12
+ end
13
+
14
+ def log(msg)
15
+ puts "#{now_utc} | #{hide_secrets(msg)}"
16
+ end
17
+
18
+ private
19
+
20
+ def hide_secrets(msg)
21
+ @secrets.inject(msg) {|message, secret| hide_secret(message, secret) }
22
+ end
23
+
24
+ def hide_secret(msg, secret)
25
+ msg.sub(secret, '*' * secret.length)
26
+ end
27
+
28
+ def now_utc
29
+ DateTime.now.new_offset(0).strftime('%Y-%m-%d %H:%M:%S')
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,3 @@
1
+ module Jredshift
2
+ VERSION = "0.0.1"
3
+ end
data/lib/jredshift.rb ADDED
@@ -0,0 +1,85 @@
1
+ require 'jredshift/jdbc_db'
2
+
3
+
4
+ module Jredshift
5
+ class Redshift < JdbcDb
6
+ attr_reader :query_group, :query_slot_count
7
+
8
+ # I've encountered a Redshift bug where the DB breaks all connections and
9
+ # can't be connected to again for a short time.
10
+ ERROR_CONNECTION_REFUSED = 'Connection refused. Check that the hostname and ' +
11
+ 'port are correct and that the postmaster is accepting TCP/IP connections.'
12
+ # This is seen when the connection is broken.
13
+ ERROR_IO = 'An I/O error occurred while sending to the backend'
14
+
15
+
16
+ def initialize(jdbc_url, user, password, options={})
17
+ super
18
+ @query_group = options.fetch(:query_group, nil)
19
+ @query_slot_count = options.fetch(:query_slot_count, nil)
20
+
21
+ set_query_group(@query_group) if @query_group
22
+ set_query_slot_count(@query_slot_count) if @query_slot_count
23
+ end
24
+
25
+ def set_query_group(query_group)
26
+ execute("SET query_group TO #{query_group};")
27
+ end
28
+
29
+ def set_query_slot_count(count)
30
+ execute("SET wlm_query_slot_count TO #{count};")
31
+ end
32
+
33
+ def drop_table_if_exists(table, options={})
34
+ cascade = options.fetch(:cascade, false) ? ' CASCADE' : ''
35
+ err_msg = "ERROR: table \"#{remove_schema(table)}\" does not exist"
36
+
37
+ execute("DROP TABLE #{table}#{cascade};", :quiet => true, :errors_to_ignore => [err_msg])
38
+ end
39
+
40
+ def drop_view_if_exists(view, options={})
41
+ cascade = options.fetch(:cascade, false) ? ' CASCADE' : ''
42
+ err_msg = "ERROR: view \"#{remove_schema(view)}\" does not exist"
43
+
44
+ execute("DROP VIEW #{view}#{cascade};", :quiet => true, :errors_to_ignore => [err_msg])
45
+ end
46
+
47
+ def table_exists?(schema, table)
48
+ sql = <<-SQL
49
+ SELECT count(*) FROM pg_tables
50
+ WHERE schemaname = '#{schema}' AND tablename = '#{table}'
51
+ ;
52
+ SQL
53
+ query(sql, :quiet => true).first['count'] == 1
54
+ end
55
+
56
+ private
57
+
58
+ def recoverable_error?(err_msg)
59
+ err_msg =~ /S3ServiceException:speed limit exceeded/i ||
60
+ err_msg =~ /Communications link failure/i ||
61
+ err_msg =~ /VACUUM is running/i ||
62
+ err_msg =~ /system is in maintenance mode/i ||
63
+ err_msg == ERROR_CONNECTION_REFUSED ||
64
+ super
65
+ end
66
+
67
+ def sleep_time_for_error(err_msg)
68
+ if err_msg =~ /system is in maintenance mode/i
69
+ 900
70
+ elsif err_msg == ERROR_CONNECTION_REFUSED
71
+ 600
72
+ else
73
+ super
74
+ end
75
+ end
76
+
77
+ def sql_exception_class
78
+ Java::OrgPostgresqlUtil::PSQLException
79
+ end
80
+
81
+ def remove_schema(table)
82
+ table.split('.').last
83
+ end
84
+ end
85
+ end
metadata ADDED
@@ -0,0 +1,98 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jredshift
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Dean Morin
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-08-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: jdbc-postgres
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '9.3'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '9.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.6'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.6'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: |2
56
+ Convience wrapper for using Redshift with JRuby.
57
+ email:
58
+ - morin.dean@gmail.com
59
+ executables: []
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - ".gitignore"
64
+ - Gemfile
65
+ - LICENSE.txt
66
+ - README.md
67
+ - Rakefile
68
+ - jredshift.gemspec
69
+ - lib/jredshift.rb
70
+ - lib/jredshift/array_of_hashes_result.rb
71
+ - lib/jredshift/jdbc_db.rb
72
+ - lib/jredshift/logger.rb
73
+ - lib/jredshift/version.rb
74
+ homepage: http://github.com/deanmorin/jredshift
75
+ licenses:
76
+ - The Unlicense
77
+ metadata: {}
78
+ post_install_message:
79
+ rdoc_options: []
80
+ require_paths:
81
+ - lib
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ requirements: []
93
+ rubyforge_project:
94
+ rubygems_version: 2.2.0
95
+ signing_key:
96
+ specification_version: 4
97
+ summary: Redshift wrapper for JRuby.
98
+ test_files: []