athena-utils 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 3f4fe05a4b845153c14b1b655a590a0983b8264853c44289ffb1498247b526d8
4
+ data.tar.gz: 3685bc1f2542f2e02d6a31a7a7e2acf12ea5f78a13f6b526418f59465939f45e
5
+ SHA512:
6
+ metadata.gz: 9b42c88bd431a5f3ee5fa5ee8ef889028f7943ebf2df6fec2b0aec982fdb90c0ffdc87338859147c2cff9f10541debcbec5e3f30b5d4963385d4faf00fe9c4e7
7
+ data.tar.gz: c3a2a4b4bd08c7aeda35552c7caf2abd352eb5b2c206135cc5d4853798948507078166b459bf22d42d33bfd79d9f67c44379a5fb6f61476e729030bbcbcbc0ec
data/.gitignore ADDED
@@ -0,0 +1,52 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /spec/examples.txt
9
+ /test/tmp/
10
+ /test/version_tmp/
11
+ /tmp/
12
+
13
+ # Used by dotenv library to load environment variables.
14
+ # .env
15
+
16
+ ## Specific to RubyMotion:
17
+ .dat*
18
+ .repl_history
19
+ build/
20
+ *.bridgesupport
21
+ build-iPhoneOS/
22
+ build-iPhoneSimulator/
23
+
24
+ ## Specific to RubyMotion (use of CocoaPods):
25
+ #
26
+ # We recommend against adding the Pods directory to your .gitignore. However
27
+ # you should judge for yourself, the pros and cons are mentioned at:
28
+ # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
29
+ #
30
+ # vendor/Pods/
31
+
32
+ ## Documentation cache and generated files:
33
+ /.yardoc/
34
+ /_yardoc/
35
+ /doc/
36
+ /rdoc/
37
+
38
+ ## Environment normalization:
39
+ /.bundle/
40
+ /vendor/bundle
41
+ /lib/bundler/man/
42
+
43
+ # for a library or gem, you might want to ignore these files since the code is
44
+ # intended to run in multiple environments; otherwise, check them in:
45
+ # Gemfile.lock
46
+ # .ruby-version
47
+ # .ruby-gemset
48
+
49
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
50
+ .rvmrc
51
+
52
+ *~
data/.ruby-gemset ADDED
@@ -0,0 +1 @@
1
+ athena-utils
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 3.0.0
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'http://rubygems.org'
4
+
5
+ gem 'aws-sdk-athena'
6
+ gem 'aws-sdk-s3'
7
+ gem 'csv-utils'
data/Gemfile.lock ADDED
@@ -0,0 +1,37 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ aws-eventstream (1.2.0)
5
+ aws-partitions (1.555.0)
6
+ aws-sdk-athena (1.49.0)
7
+ aws-sdk-core (~> 3, >= 3.126.0)
8
+ aws-sigv4 (~> 1.1)
9
+ aws-sdk-core (3.126.2)
10
+ aws-eventstream (~> 1, >= 1.0.2)
11
+ aws-partitions (~> 1, >= 1.525.0)
12
+ aws-sigv4 (~> 1.1)
13
+ jmespath (~> 1.0)
14
+ aws-sdk-kms (1.54.0)
15
+ aws-sdk-core (~> 3, >= 3.126.0)
16
+ aws-sigv4 (~> 1.1)
17
+ aws-sdk-s3 (1.112.0)
18
+ aws-sdk-core (~> 3, >= 3.126.0)
19
+ aws-sdk-kms (~> 1)
20
+ aws-sigv4 (~> 1.4)
21
+ aws-sigv4 (1.4.0)
22
+ aws-eventstream (~> 1, >= 1.0.2)
23
+ csv-utils (0.3.14)
24
+ inheritance-helper
25
+ inheritance-helper (0.2.5)
26
+ jmespath (1.6.0)
27
+
28
+ PLATFORMS
29
+ x86_64-darwin-20
30
+
31
+ DEPENDENCIES
32
+ aws-sdk-athena
33
+ aws-sdk-s3
34
+ csv-utils
35
+
36
+ BUNDLED WITH
37
+ 2.2.3
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2019 Douglas Youch
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1 @@
1
+ # AWS Athena Utils
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'athena-utils'
5
+ s.version = '0.1.0'
6
+ s.licenses = ['MIT']
7
+ s.summary = 'Athena Utils'
8
+ s.description = 'Tools for querying AWS Athena'
9
+ s.authors = ['Doug Youch']
10
+ s.email = 'dougyouch@gmail.com'
11
+ s.homepage = 'https://github.com/dougyouch/athena-utils'
12
+ s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
13
+ s.bindir = 'bin'
14
+ s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
15
+
16
+ s.add_runtime_dependency 'aws-sdk-athena'
17
+ s.add_runtime_dependency 'aws-sdk-s3'
18
+ s.add_runtime_dependency 'csv-utils'
19
+ end
data/bin/athena ADDED
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'athena-utils'
4
+ require 'optparse'
5
+
6
+ options = {
7
+ database: nil,
8
+ output_location: nil,
9
+ query: nil,
10
+ save: nil
11
+ }
12
+ OptionParser.new do |opts|
13
+ opts.banner = "Usage: athena [options]"
14
+
15
+ opts.on("-d", "--database DATABASE", "Athena DB") do |v|
16
+ options[:database] = v
17
+ end
18
+
19
+ opts.on("-o", "--output-location OUTPUT_LOCATION", "S3 output location for athena queries") do |v|
20
+ options[:output_location] = v
21
+ end
22
+
23
+ opts.on("-q", "--query QUERY", "SQL Query") do |v|
24
+ options[:query] = v
25
+ end
26
+
27
+ opts.on('-s', '--save FILE', 'Save query results to file') do |v|
28
+ options[:save] = v
29
+ end
30
+ end.parse!
31
+
32
+ raise('must specify a database') unless options[:database]
33
+ raise('must specify output location for athean queries') unless options[:output_location]
34
+
35
+ @athena = AthenaUtils::AthenaClient.new(options[:database], options[:output_location])
36
+ def athena
37
+ @athena
38
+ end
39
+
40
+ if options[:query]
41
+ @results = athena.query(options[:query])
42
+ def results
43
+ @results
44
+ end
45
+
46
+ if options[:save]
47
+ results.save(options[:save])
48
+ exit
49
+ end
50
+ end
51
+
52
+ require 'irb'
53
+ IRB.start
54
+
@@ -0,0 +1,5 @@
1
+ module AthenaUtils
2
+ class AthenaQueryError < StandardError; end
3
+ autoload :AthenaClient, 'athena_utils/athena_client'
4
+ autoload :AthenaQueryResults, 'athena_utils/athena_query_results'
5
+ end
@@ -0,0 +1,70 @@
1
+ require 'aws-sdk-athena'
2
+
3
+ module AthenaUtils
4
+ class AthenaClient
5
+ # database is the name of the Athena DB
6
+ # output_location is the full S3 path to store the results of Athena queries
7
+ attr_reader :database,
8
+ :output_location
9
+
10
+ def initialize(database, output_location)
11
+ @database = database
12
+ @output_location = output_location
13
+ end
14
+
15
+ def aws_athena_client
16
+ @aws_athena_client ||= create_aws_athena_client
17
+ end
18
+
19
+ def create_aws_athena_client
20
+ Aws::Athena::Client.new
21
+ end
22
+
23
+ def query(query)
24
+ query_execution_id = query_async(query)
25
+ wait([query_execution_id])[query_execution_id]
26
+ end
27
+
28
+ def query_async(query)
29
+ response = aws_athena_client.start_query_execution(
30
+ query_string: query,
31
+ query_execution_context: {
32
+ database: database
33
+ },
34
+ result_configuration: {
35
+ output_location: output_location
36
+ }
37
+ )
38
+
39
+ response.query_execution_id
40
+ end
41
+
42
+ def wait(query_execution_ids)
43
+ results = {}
44
+
45
+ while results.size != query_execution_ids.size
46
+ query_execution_ids.each do |query_execution_id|
47
+ next if results.key?(query_execution_id)
48
+
49
+ query_status = aws_athena_client.get_query_execution(
50
+ query_execution_id: query_execution_id
51
+ )
52
+
53
+ case query_status[:query_execution][:status][:state]
54
+ when 'SUCCEEDED'
55
+ results[query_execution_id] = AthenaQueryResults.new(query_status)
56
+ when 'RUNNING',
57
+ 'QUEUED'
58
+ # no-op
59
+ else
60
+ raise(AthenaQueryError.new("Query failed #{query_status}"))
61
+ end
62
+ end
63
+
64
+ sleep(3) if results.size != query_execution_ids.size
65
+ end
66
+
67
+ results
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,57 @@
1
+ require 'aws-sdk-s3'
2
+ require 'csv-utils'
3
+
4
+ module AthenaUtils
5
+ class AthenaQueryResults
6
+ include Enumerable
7
+
8
+ attr_reader :query_status
9
+
10
+ def initialize(query_status)
11
+ @query_status = query_status
12
+ end
13
+
14
+ def s3_url
15
+ query_status.query_execution.result_configuration.output_location
16
+ end
17
+
18
+ def s3_object
19
+ uri = URI(s3_url)
20
+
21
+ aws_s3_client.get_object(
22
+ {
23
+ bucket: uri.host,
24
+ key: uri.path[1..-1]
25
+ }
26
+ )
27
+ end
28
+
29
+ def save(file)
30
+ uri = URI(s3_url)
31
+
32
+ aws_s3_client.get_object(
33
+ {
34
+ bucket: uri.host,
35
+ key: uri.path[1..-1],
36
+ response_target: file
37
+ }
38
+ )
39
+ end
40
+
41
+ def aws_s3_client
42
+ @aws_s3_client ||= create_aws_s3_client
43
+ end
44
+
45
+ def create_aws_s3_client
46
+ Aws::S3::Client.new
47
+ end
48
+
49
+ def csv_iterator
50
+ @csv_iterator ||= CSVUtils::CSVIterator.new(CSV.new(s3_object.body))
51
+ end
52
+
53
+ def each(&block)
54
+ csv_iterator.each(&block)
55
+ end
56
+ end
57
+ end
data/scripts/console ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ $LOAD_PATH << File.expand_path('../lib', __dir__)
5
+ require 'athena-utils'
6
+ require 'irb'
7
+ IRB.start(__FILE__)
metadata ADDED
@@ -0,0 +1,98 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: athena-utils
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Doug Youch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-02-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: aws-sdk-athena
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: aws-sdk-s3
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: csv-utils
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: Tools for querying AWS Athena
56
+ email: dougyouch@gmail.com
57
+ executables:
58
+ - athena
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".ruby-gemset"
64
+ - ".ruby-version"
65
+ - Gemfile
66
+ - Gemfile.lock
67
+ - LICENSE.txt
68
+ - README.md
69
+ - athena-utils.gemspec
70
+ - bin/athena
71
+ - lib/athena-utils.rb
72
+ - lib/athena_utils/athena_client.rb
73
+ - lib/athena_utils/athena_query_results.rb
74
+ - scripts/console
75
+ homepage: https://github.com/dougyouch/athena-utils
76
+ licenses:
77
+ - MIT
78
+ metadata: {}
79
+ post_install_message:
80
+ rdoc_options: []
81
+ require_paths:
82
+ - lib
83
+ required_ruby_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ requirements: []
94
+ rubygems_version: 3.2.3
95
+ signing_key:
96
+ specification_version: 4
97
+ summary: Athena Utils
98
+ test_files: []