athena-utils 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 3f4fe05a4b845153c14b1b655a590a0983b8264853c44289ffb1498247b526d8
4
+ data.tar.gz: 3685bc1f2542f2e02d6a31a7a7e2acf12ea5f78a13f6b526418f59465939f45e
5
+ SHA512:
6
+ metadata.gz: 9b42c88bd431a5f3ee5fa5ee8ef889028f7943ebf2df6fec2b0aec982fdb90c0ffdc87338859147c2cff9f10541debcbec5e3f30b5d4963385d4faf00fe9c4e7
7
+ data.tar.gz: c3a2a4b4bd08c7aeda35552c7caf2abd352eb5b2c206135cc5d4853798948507078166b459bf22d42d33bfd79d9f67c44379a5fb6f61476e729030bbcbcbc0ec
data/.gitignore ADDED
@@ -0,0 +1,52 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /spec/examples.txt
9
+ /test/tmp/
10
+ /test/version_tmp/
11
+ /tmp/
12
+
13
+ # Used by dotenv library to load environment variables.
14
+ # .env
15
+
16
+ ## Specific to RubyMotion:
17
+ .dat*
18
+ .repl_history
19
+ build/
20
+ *.bridgesupport
21
+ build-iPhoneOS/
22
+ build-iPhoneSimulator/
23
+
24
+ ## Specific to RubyMotion (use of CocoaPods):
25
+ #
26
+ # We recommend against adding the Pods directory to your .gitignore. However
27
+ # you should judge for yourself, the pros and cons are mentioned at:
28
+ # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
29
+ #
30
+ # vendor/Pods/
31
+
32
+ ## Documentation cache and generated files:
33
+ /.yardoc/
34
+ /_yardoc/
35
+ /doc/
36
+ /rdoc/
37
+
38
+ ## Environment normalization:
39
+ /.bundle/
40
+ /vendor/bundle
41
+ /lib/bundler/man/
42
+
43
+ # for a library or gem, you might want to ignore these files since the code is
44
+ # intended to run in multiple environments; otherwise, check them in:
45
+ # Gemfile.lock
46
+ # .ruby-version
47
+ # .ruby-gemset
48
+
49
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
50
+ .rvmrc
51
+
52
+ *~
data/.ruby-gemset ADDED
@@ -0,0 +1 @@
1
+ athena-utils
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 3.0.0
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'http://rubygems.org'
4
+
5
+ gem 'aws-sdk-athena'
6
+ gem 'aws-sdk-s3'
7
+ gem 'csv-utils'
data/Gemfile.lock ADDED
@@ -0,0 +1,37 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ aws-eventstream (1.2.0)
5
+ aws-partitions (1.555.0)
6
+ aws-sdk-athena (1.49.0)
7
+ aws-sdk-core (~> 3, >= 3.126.0)
8
+ aws-sigv4 (~> 1.1)
9
+ aws-sdk-core (3.126.2)
10
+ aws-eventstream (~> 1, >= 1.0.2)
11
+ aws-partitions (~> 1, >= 1.525.0)
12
+ aws-sigv4 (~> 1.1)
13
+ jmespath (~> 1.0)
14
+ aws-sdk-kms (1.54.0)
15
+ aws-sdk-core (~> 3, >= 3.126.0)
16
+ aws-sigv4 (~> 1.1)
17
+ aws-sdk-s3 (1.112.0)
18
+ aws-sdk-core (~> 3, >= 3.126.0)
19
+ aws-sdk-kms (~> 1)
20
+ aws-sigv4 (~> 1.4)
21
+ aws-sigv4 (1.4.0)
22
+ aws-eventstream (~> 1, >= 1.0.2)
23
+ csv-utils (0.3.14)
24
+ inheritance-helper
25
+ inheritance-helper (0.2.5)
26
+ jmespath (1.6.0)
27
+
28
+ PLATFORMS
29
+ x86_64-darwin-20
30
+
31
+ DEPENDENCIES
32
+ aws-sdk-athena
33
+ aws-sdk-s3
34
+ csv-utils
35
+
36
+ BUNDLED WITH
37
+ 2.2.3
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2019 Douglas Youch
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1 @@
1
+ # AWS Athena Utils
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'athena-utils'
5
+ s.version = '0.1.0'
6
+ s.licenses = ['MIT']
7
+ s.summary = 'Athena Utils'
8
+ s.description = 'Tools for querying AWS Athena'
9
+ s.authors = ['Doug Youch']
10
+ s.email = 'dougyouch@gmail.com'
11
+ s.homepage = 'https://github.com/dougyouch/athena-utils'
12
+ s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
13
+ s.bindir = 'bin'
14
+ s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
15
+
16
+ s.add_runtime_dependency 'aws-sdk-athena'
17
+ s.add_runtime_dependency 'aws-sdk-s3'
18
+ s.add_runtime_dependency 'csv-utils'
19
+ end
data/bin/athena ADDED
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'athena-utils'
4
+ require 'optparse'
5
+
6
+ options = {
7
+ database: nil,
8
+ output_location: nil,
9
+ query: nil,
10
+ save: nil
11
+ }
12
+ OptionParser.new do |opts|
13
+ opts.banner = "Usage: athena [options]"
14
+
15
+ opts.on("-d", "--database DATABASE", "Athena DB") do |v|
16
+ options[:database] = v
17
+ end
18
+
19
+ opts.on("-o", "--output-location OUTPUT_LOCATION", "S3 output location for athena queries") do |v|
20
+ options[:output_location] = v
21
+ end
22
+
23
+ opts.on("-q", "--query QUERY", "SQL Query") do |v|
24
+ options[:query] = v
25
+ end
26
+
27
+ opts.on('-s', '--save FILE', 'Save query results to file') do |v|
28
+ options[:save] = v
29
+ end
30
+ end.parse!
31
+
32
+ raise('must specify a database') unless options[:database]
33
+ raise('must specify output location for athean queries') unless options[:output_location]
34
+
35
+ @athena = AthenaUtils::AthenaClient.new(options[:database], options[:output_location])
36
+ def athena
37
+ @athena
38
+ end
39
+
40
+ if options[:query]
41
+ @results = athena.query(options[:query])
42
+ def results
43
+ @results
44
+ end
45
+
46
+ if options[:save]
47
+ results.save(options[:save])
48
+ exit
49
+ end
50
+ end
51
+
52
+ require 'irb'
53
+ IRB.start
54
+
@@ -0,0 +1,5 @@
1
+ module AthenaUtils
2
+ class AthenaQueryError < StandardError; end
3
+ autoload :AthenaClient, 'athena_utils/athena_client'
4
+ autoload :AthenaQueryResults, 'athena_utils/athena_query_results'
5
+ end
@@ -0,0 +1,70 @@
1
+ require 'aws-sdk-athena'
2
+
3
+ module AthenaUtils
4
+ class AthenaClient
5
+ # database is the name of the Athena DB
6
+ # output_location is the full S3 path to store the results of Athena queries
7
+ attr_reader :database,
8
+ :output_location
9
+
10
+ def initialize(database, output_location)
11
+ @database = database
12
+ @output_location = output_location
13
+ end
14
+
15
+ def aws_athena_client
16
+ @aws_athena_client ||= create_aws_athena_client
17
+ end
18
+
19
+ def create_aws_athena_client
20
+ Aws::Athena::Client.new
21
+ end
22
+
23
+ def query(query)
24
+ query_execution_id = query_async(query)
25
+ wait([query_execution_id])[query_execution_id]
26
+ end
27
+
28
+ def query_async(query)
29
+ response = aws_athena_client.start_query_execution(
30
+ query_string: query,
31
+ query_execution_context: {
32
+ database: database
33
+ },
34
+ result_configuration: {
35
+ output_location: output_location
36
+ }
37
+ )
38
+
39
+ response.query_execution_id
40
+ end
41
+
42
+ def wait(query_execution_ids)
43
+ results = {}
44
+
45
+ while results.size != query_execution_ids.size
46
+ query_execution_ids.each do |query_execution_id|
47
+ next if results.key?(query_execution_id)
48
+
49
+ query_status = aws_athena_client.get_query_execution(
50
+ query_execution_id: query_execution_id
51
+ )
52
+
53
+ case query_status[:query_execution][:status][:state]
54
+ when 'SUCCEEDED'
55
+ results[query_execution_id] = AthenaQueryResults.new(query_status)
56
+ when 'RUNNING',
57
+ 'QUEUED'
58
+ # no-op
59
+ else
60
+ raise(AthenaQueryError.new("Query failed #{query_status}"))
61
+ end
62
+ end
63
+
64
+ sleep(3) if results.size != query_execution_ids.size
65
+ end
66
+
67
+ results
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,57 @@
1
+ require 'aws-sdk-s3'
2
+ require 'csv-utils'
3
+
4
+ module AthenaUtils
5
+ class AthenaQueryResults
6
+ include Enumerable
7
+
8
+ attr_reader :query_status
9
+
10
+ def initialize(query_status)
11
+ @query_status = query_status
12
+ end
13
+
14
+ def s3_url
15
+ query_status.query_execution.result_configuration.output_location
16
+ end
17
+
18
+ def s3_object
19
+ uri = URI(s3_url)
20
+
21
+ aws_s3_client.get_object(
22
+ {
23
+ bucket: uri.host,
24
+ key: uri.path[1..-1]
25
+ }
26
+ )
27
+ end
28
+
29
+ def save(file)
30
+ uri = URI(s3_url)
31
+
32
+ aws_s3_client.get_object(
33
+ {
34
+ bucket: uri.host,
35
+ key: uri.path[1..-1],
36
+ response_target: file
37
+ }
38
+ )
39
+ end
40
+
41
+ def aws_s3_client
42
+ @aws_s3_client ||= create_aws_s3_client
43
+ end
44
+
45
+ def create_aws_s3_client
46
+ Aws::S3::Client.new
47
+ end
48
+
49
+ def csv_iterator
50
+ @csv_iterator ||= CSVUtils::CSVIterator.new(CSV.new(s3_object.body))
51
+ end
52
+
53
+ def each(&block)
54
+ csv_iterator.each(&block)
55
+ end
56
+ end
57
+ end
data/scripts/console ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ $LOAD_PATH << File.expand_path('../lib', __dir__)
5
+ require 'athena-utils'
6
+ require 'irb'
7
+ IRB.start(__FILE__)
metadata ADDED
@@ -0,0 +1,98 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: athena-utils
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Doug Youch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-02-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: aws-sdk-athena
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: aws-sdk-s3
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: csv-utils
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: Tools for querying AWS Athena
56
+ email: dougyouch@gmail.com
57
+ executables:
58
+ - athena
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".ruby-gemset"
64
+ - ".ruby-version"
65
+ - Gemfile
66
+ - Gemfile.lock
67
+ - LICENSE.txt
68
+ - README.md
69
+ - athena-utils.gemspec
70
+ - bin/athena
71
+ - lib/athena-utils.rb
72
+ - lib/athena_utils/athena_client.rb
73
+ - lib/athena_utils/athena_query_results.rb
74
+ - scripts/console
75
+ homepage: https://github.com/dougyouch/athena-utils
76
+ licenses:
77
+ - MIT
78
+ metadata: {}
79
+ post_install_message:
80
+ rdoc_options: []
81
+ require_paths:
82
+ - lib
83
+ required_ruby_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ requirements: []
94
+ rubygems_version: 3.2.3
95
+ signing_key:
96
+ specification_version: 4
97
+ summary: Athena Utils
98
+ test_files: []