jredshift 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8bd2e7cbd469a48f42d1182e83af7391620ad338
4
+ data.tar.gz: 411d575cae9571cd92df1d3bc015372b0cdee43e
5
+ SHA512:
6
+ metadata.gz: 71e88500c05c1517ed3f6cee631daae94c28f1e7001ff4d1bf6a29308acaf2d632a1ad0246f21da0e8b33a37c8761ff5d9a4a3581deb6a0baeab2dc9f1b91a5f
7
+ data.tar.gz: ceeab03d691c5d93453aa0b484a0b5638a4cc50def92c3df780532896b883fad37db34501b155cf026b0f47dbebbb8fb24a6cab3fab2cb688c0a6545fd7f9f66
data/.gitignore ADDED
@@ -0,0 +1,22 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.bundle
19
+ *.so
20
+ *.o
21
+ *.a
22
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in jredshift.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,24 @@
1
+ This is free and unencumbered software released into the public domain.
2
+
3
+ Anyone is free to copy, modify, publish, use, compile, sell, or
4
+ distribute this software, either in source code form or as a compiled
5
+ binary, for any purpose, commercial or non-commercial, and by any
6
+ means.
7
+
8
+ In jurisdictions that recognize copyright laws, the author or authors
9
+ of this software dedicate any and all copyright interest in the
10
+ software to the public domain. We make this dedication for the benefit
11
+ of the public at large and to the detriment of our heirs and
12
+ successors. We intend this dedication to be an overt act of
13
+ relinquishment in perpetuity of all present and future rights to this
14
+ software under copyright law.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
23
+
24
+ For more information, please refer to <http://unlicense.org/>
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Jredshift
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'jredshift'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install jredshift
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it ( https://github.com/[my-github-username]/jredshift/fork )
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
data/jredshift.gemspec ADDED
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'jredshift/version'
5
+
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'jredshift'
9
+ spec.version = Jredshift::VERSION
10
+ spec.authors = ['Dean Morin']
11
+ spec.email = ['morin.dean@gmail.com']
12
+ spec.summary = 'Redshift wrapper for JRuby.'
13
+ spec.description = <<-EOS
14
+ Convience wrapper for using Redshift with JRuby.
15
+ EOS
16
+ spec.homepage = 'http://github.com/deanmorin/jredshift'
17
+ spec.license = 'The Unlicense'
18
+
19
+ spec.files = `git ls-files -z`.split("\x0")
20
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
21
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
22
+ spec.require_paths = ['lib']
23
+
24
+ spec.add_dependency 'jdbc-postgres', '~> 9.3'
25
+ spec.add_development_dependency 'bundler', '~> 1.6'
26
+ spec.add_development_dependency 'rake', '~> 0'
27
+ end
@@ -0,0 +1,85 @@
1
+ require 'bigdecimal'
2
+
3
+
4
+ module Jredshift
5
+ class ArrayOfHashesResult
6
+ attr_reader :result
7
+
8
+ # Defined at http://docs.oracle.com/javase/6/docs/api/java/sql/Types.html
9
+ COL_TYPE_BIGINT = -5
10
+ COL_TYPE_BIT = -7
11
+ COL_TYPE_CHAR = 1
12
+ COL_TYPE_DATE = 41
13
+ COL_TYPE_DECIMAL = 3
14
+ COL_TYPE_DOUBLE = 8
15
+ COL_TYPE_FLOAT = 6
16
+ COL_TYPE_INTEGER = 4
17
+ COL_TYPE_NCHAR = -15
18
+ COL_TYPE_NUMERIC = 2
19
+ COL_TYPE_NVARCHAR = -9
20
+ COL_TYPE_SMALLINT = 5
21
+ COL_TYPE_TIME = 92
22
+ COL_TYPE_TIMESTAMP = 93
23
+ COL_TYPE_TINYINT = -6
24
+ COL_TYPE_VARCHAR = 12
25
+
26
+
27
+ def initialize(java_result_set)
28
+ @java_result_set = java_result_set
29
+ @result = result_set_to_array_of_hashes
30
+ end
31
+
32
+ private
33
+
34
+ def result_set_to_array_of_hashes
35
+ meta = @java_result_set.meta_data
36
+ rows = []
37
+
38
+ while @java_result_set.next
39
+ row = {}
40
+
41
+ (1..meta.column_count).each do |i|
42
+ name = meta.column_name(i)
43
+ row[name] = get_field_by_type(meta, i)
44
+ end
45
+
46
+ rows << row
47
+ end
48
+ rows
49
+ end
50
+
51
+ def get_field_by_type(meta, i)
52
+ return nil if @java_result_set.get_string(i).nil?
53
+
54
+ case meta.column_type(i)
55
+
56
+ when COL_TYPE_TINYINT, COL_TYPE_SMALLINT, COL_TYPE_INTEGER
57
+ @java_result_set.get_int(i).to_i
58
+ when COL_TYPE_BIGINT
59
+ @java_result_set.get_long(i).to_i
60
+ when COL_TYPE_DATE
61
+ @java_result_set.get_date(i)
62
+ when COL_TYPE_TIME
63
+ @java_result_set.get_time(i).to_i
64
+ when COL_TYPE_TIMESTAMP
65
+ datetime_str = @java_result_set.get_timestamp(i).to_s
66
+ DateTime.strptime(datetime_str, '%Y-%m-%d %H:%M:%S')
67
+ when COL_TYPE_NUMERIC, COL_TYPE_DECIMAL, COL_TYPE_FLOAT, COL_TYPE_DOUBLE
68
+ BigDecimal.new(@java_result_set.get_string(i).to_s)
69
+ when COL_TYPE_CHAR, COL_TYPE_NCHAR, COL_TYPE_NVARCHAR, COL_TYPE_VARCHAR
70
+ @java_result_set.get_string(i).to_s
71
+ when COL_TYPE_BIT
72
+ str = @java_result_set.get_string(i)
73
+ if str == 't'
74
+ true
75
+ elsif str == 'f'
76
+ false
77
+ else
78
+ str
79
+ end
80
+ else
81
+ @java_result_set.get_string(i).to_s
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,192 @@
1
+ require 'java'
2
+ require 'jdbc/postgres'
3
+ require 'jredshift/array_of_hashes_result'
4
+ require 'jredshift/logger'
5
+
6
+
7
+ module Jredshift
8
+ class JdbcDb
9
+ attr_reader :conn
10
+
11
+ # Java uses an int for the updateCount field in ResultHandler. If the
12
+ # update count overflows an int, you'll get this error message (but the
13
+ # records will be updated/inserted correctly).
14
+ ERROR_UPDATE_COUNT_OVERFLOW_REGEX =
15
+ /Unable to interpret the update count in command completion tag: /i
16
+
17
+
18
+ def initialize(jdbc_url, user, password, options={})
19
+ @abort_on_error = options.fetch(:abort_on_error, true)
20
+ @quiet = options.fetch(:quiet, false)
21
+ @jdbc_url = jdbc_url
22
+ @user = user
23
+ @password = password
24
+ @error_occurred = false
25
+ @retry_count = 0
26
+ init_connection
27
+
28
+ secrets = options.fetch(:secrets, [])
29
+ @logger = Logger.new(:secrets => secrets)
30
+ end
31
+
32
+ def execute(sql, options={})
33
+ errors_to_ignore = options.fetch(:errors_to_ignore, [])
34
+ quiet = options.fetch(:quiet, @quiet)
35
+
36
+ log "\n#{sql}\n" unless quiet
37
+
38
+ affected_rows = with_error_handling(:errors_to_ignore => errors_to_ignore) do
39
+ @statement.execute_update(sql)
40
+ end
41
+
42
+ unless transaction_statement?(sql) || quiet || affected_rows.nil?
43
+ log "Affected #{affected_rows} row(s)."
44
+ end
45
+
46
+ affected_rows
47
+ end
48
+
49
+ # Returns an empty array for no results, or nil if an error occurred and
50
+ # @abort_on_error = false
51
+ def query(sql, options={})
52
+ quiet = options.fetch(:quiet, @quiet)
53
+
54
+ log "\n#{sql}\n" unless quiet
55
+ result_set = with_error_handling { @statement.execute_query(sql) }
56
+ result_set ? ArrayOfHashesResult.new(result_set).result : nil
57
+ end
58
+
59
+ # Returns a ResultSet (instead of an Array of Hashes). Requires caller to
60
+ # close ResultSet and set autocommit back to true
61
+ def big_query(sql, options={})
62
+ quiet = options.fetch(:quiet, @quiet)
63
+ fetch_size = options.fetch(:fetch_size, 100000)
64
+
65
+ @conn.set_auto_commit(false);
66
+ stmt = @conn.create_statement
67
+ stmt.set_fetch_size(fetch_size)
68
+ stmt.execute_query(sql)
69
+
70
+ log "\n#{sql}\n" unless quiet
71
+ with_error_handling { stmt.execute_query(sql) }
72
+ end
73
+
74
+ def execute_script(filename, options={})
75
+ File.open(filename) do |fh|
76
+ sql = fh.read
77
+ sql = remove_comments(sql)
78
+ sql = substitute_variables(sql)
79
+ execute_each_statement(sql, options)
80
+ end
81
+ end
82
+
83
+ def error_occurred?
84
+ @error_occurred
85
+ end
86
+
87
+ def clear_error_state
88
+ @error_occurred = false
89
+ end
90
+
91
+ private
92
+
93
+ def init_connection
94
+ Jdbc::Postgres.load_driver
95
+ with_error_handling do
96
+ @conn = java.sql::DriverManager.get_connection(@jdbc_url, @user, @password)
97
+ end
98
+ @statement = @conn.create_statement
99
+ end
100
+
101
+ # errors_to_ignore should only be used where no return value is expected
102
+ def with_error_handling(options={})
103
+ errors_to_ignore = options.fetch(:errors_to_ignore, [])
104
+ return_value = yield
105
+ @retry_count = 0
106
+ return_value
107
+
108
+ rescue Exception => e
109
+ if errors_to_ignore.include?(e.message)
110
+ @retry_count = 0
111
+
112
+ elsif interpretable_error?(e.message)
113
+ @retry_count = 0
114
+ interpret_error(e.message)
115
+
116
+ elsif recoverable_error?(e.message) && @retry_count < 3
117
+ @retry_count += 1
118
+ sleep_time = sleep_time_for_error(e.message)
119
+ log "Failed with recoverable error: #{e.message}"
120
+ log "Retry attempt #{@retry_count} will occur after #{sleep_time} seconds"
121
+ sleep sleep_time
122
+ retry
123
+
124
+ else
125
+ @retry_count = 0
126
+ @error_occurred = true
127
+ log e.class.to_s
128
+ log e.message
129
+ log e.backtrace.join("\n") unless e.class == sql_exception_class
130
+
131
+ raise e if @abort_on_error
132
+ end
133
+ end
134
+
135
+ def interpretable_error?(err_msg)
136
+ err_msg =~ ERROR_UPDATE_COUNT_OVERFLOW_REGEX
137
+ end
138
+
139
+ def interpret_error(err_msg)
140
+ if err_msg =~ ERROR_UPDATE_COUNT_OVERFLOW_REGEX
141
+ get_update_count_from_error(err_msg)
142
+ else
143
+ fail ArgumentError, 'Unexpected interpretable_error'
144
+ end
145
+ end
146
+
147
+ def get_update_count_from_error(err_msg)
148
+ err_msg.split(' ').last.split('.').first
149
+ end
150
+
151
+ def recoverable_error?(err_msg)
152
+ false
153
+ end
154
+
155
+ def sleep_time_for_error(err_msg)
156
+ 60
157
+ end
158
+
159
+ def sql_exception_class
160
+ fail NotImplementedError, 'sql_exception_class'
161
+ end
162
+
163
+ # TODO add other transaction keywords
164
+ def transaction_statement?(sql)
165
+ sql =~ /begin\s*;?/i || sql =~ /end\s*;?/i
166
+ end
167
+
168
+ def remove_comments(sql)
169
+ lines = sql.split("\n")
170
+ stripped_lines = lines.reject {|line| starts_with_double_dash?(line) }
171
+ stripped_lines.join("\n")
172
+ end
173
+
174
+ def starts_with_double_dash?(line)
175
+ line =~ /\A\s*--/
176
+ end
177
+
178
+ def substitute_variables(sql)
179
+ sql
180
+ end
181
+
182
+ def execute_each_statement(sql, options={})
183
+ sql.split(/;\s*$/).each do |statement|
184
+ execute("#{statement};", options)
185
+ end
186
+ end
187
+
188
+ def log(msg)
189
+ @logger.log(msg)
190
+ end
191
+ end
192
+ end
@@ -0,0 +1,32 @@
1
+ require 'singleton'
2
+ require 'time'
3
+ require 'copperlight/time_util'
4
+
5
+
6
+ module Jredshift
7
+ class Logger
8
+
9
+ def initialize(options={})
10
+ @secrets = options.fetch(:secrets, [])
11
+ @secrets << ENV['AWS_SECRET_ACCESS_KEY'] if ENV['AWS_SECRET_ACCESS_KEY']
12
+ end
13
+
14
+ def log(msg)
15
+ puts "#{now_utc} | #{hide_secrets(msg)}"
16
+ end
17
+
18
+ private
19
+
20
+ def hide_secrets(msg)
21
+ @secrets.inject(msg) {|message, secret| hide_secret(message, secret) }
22
+ end
23
+
24
+ def hide_secret(msg, secret)
25
+ msg.sub(secret, '*' * secret.length)
26
+ end
27
+
28
+ def now_utc
29
+ DateTime.now.new_offset(0).strftime('%Y-%m-%d %H:%M:%S')
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,3 @@
1
+ module Jredshift
2
+ VERSION = "0.0.1"
3
+ end
data/lib/jredshift.rb ADDED
@@ -0,0 +1,85 @@
1
+ require 'jredshift/jdbc_db'
2
+
3
+
4
+ module Jredshift
5
+ class Redshift < JdbcDb
6
+ attr_reader :query_group, :query_slot_count
7
+
8
+ # I've encountered a Redshift bug where the DB breaks all connections and
9
+ # can't be connected to again for a short time.
10
+ ERROR_CONNECTION_REFUSED = 'Connection refused. Check that the hostname and ' +
11
+ 'port are correct and that the postmaster is accepting TCP/IP connections.'
12
+ # This is seen when the connection is broken.
13
+ ERROR_IO = 'An I/O error occurred while sending to the backend'
14
+
15
+
16
+ def initialize(jdbc_url, user, password, options={})
17
+ super
18
+ @query_group = options.fetch(:query_group, nil)
19
+ @query_slot_count = options.fetch(:query_slot_count, nil)
20
+
21
+ set_query_group(@query_group) if @query_group
22
+ set_query_slot_count(@query_slot_count) if @query_slot_count
23
+ end
24
+
25
+ def set_query_group(query_group)
26
+ execute("SET query_group TO #{query_group};")
27
+ end
28
+
29
+ def set_query_slot_count(count)
30
+ execute("SET wlm_query_slot_count TO #{count};")
31
+ end
32
+
33
+ def drop_table_if_exists(table, options={})
34
+ cascade = options.fetch(:cascade, false) ? ' CASCADE' : ''
35
+ err_msg = "ERROR: table \"#{remove_schema(table)}\" does not exist"
36
+
37
+ execute("DROP TABLE #{table}#{cascade};", :quiet => true, :errors_to_ignore => [err_msg])
38
+ end
39
+
40
+ def drop_view_if_exists(view, options={})
41
+ cascade = options.fetch(:cascade, false) ? ' CASCADE' : ''
42
+ err_msg = "ERROR: view \"#{remove_schema(view)}\" does not exist"
43
+
44
+ execute("DROP VIEW #{view}#{cascade};", :quiet => true, :errors_to_ignore => [err_msg])
45
+ end
46
+
47
+ def table_exists?(schema, table)
48
+ sql = <<-SQL
49
+ SELECT count(*) FROM pg_tables
50
+ WHERE schemaname = '#{schema}' AND tablename = '#{table}'
51
+ ;
52
+ SQL
53
+ query(sql, :quiet => true).first['count'] == 1
54
+ end
55
+
56
+ private
57
+
58
+ def recoverable_error?(err_msg)
59
+ err_msg =~ /S3ServiceException:speed limit exceeded/i ||
60
+ err_msg =~ /Communications link failure/i ||
61
+ err_msg =~ /VACUUM is running/i ||
62
+ err_msg =~ /system is in maintenance mode/i ||
63
+ err_msg == ERROR_CONNECTION_REFUSED ||
64
+ super
65
+ end
66
+
67
+ def sleep_time_for_error(err_msg)
68
+ if err_msg =~ /system is in maintenance mode/i
69
+ 900
70
+ elsif err_msg == ERROR_CONNECTION_REFUSED
71
+ 600
72
+ else
73
+ super
74
+ end
75
+ end
76
+
77
+ def sql_exception_class
78
+ Java::OrgPostgresqlUtil::PSQLException
79
+ end
80
+
81
+ def remove_schema(table)
82
+ table.split('.').last
83
+ end
84
+ end
85
+ end
metadata ADDED
@@ -0,0 +1,98 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jredshift
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Dean Morin
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-08-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: jdbc-postgres
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '9.3'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '9.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.6'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.6'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: |2
56
+ Convience wrapper for using Redshift with JRuby.
57
+ email:
58
+ - morin.dean@gmail.com
59
+ executables: []
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - ".gitignore"
64
+ - Gemfile
65
+ - LICENSE.txt
66
+ - README.md
67
+ - Rakefile
68
+ - jredshift.gemspec
69
+ - lib/jredshift.rb
70
+ - lib/jredshift/array_of_hashes_result.rb
71
+ - lib/jredshift/jdbc_db.rb
72
+ - lib/jredshift/logger.rb
73
+ - lib/jredshift/version.rb
74
+ homepage: http://github.com/deanmorin/jredshift
75
+ licenses:
76
+ - The Unlicense
77
+ metadata: {}
78
+ post_install_message:
79
+ rdoc_options: []
80
+ require_paths:
81
+ - lib
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ requirements: []
93
+ rubyforge_project:
94
+ rubygems_version: 2.2.0
95
+ signing_key:
96
+ specification_version: 4
97
+ summary: Redshift wrapper for JRuby.
98
+ test_files: []