athena-utils 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +52 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/Gemfile +7 -0
- data/Gemfile.lock +37 -0
- data/LICENSE.txt +21 -0
- data/README.md +1 -0
- data/athena-utils.gemspec +19 -0
- data/bin/athena +54 -0
- data/lib/athena-utils.rb +5 -0
- data/lib/athena_utils/athena_client.rb +70 -0
- data/lib/athena_utils/athena_query_results.rb +57 -0
- data/scripts/console +7 -0
- metadata +98 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3f4fe05a4b845153c14b1b655a590a0983b8264853c44289ffb1498247b526d8
|
4
|
+
data.tar.gz: 3685bc1f2542f2e02d6a31a7a7e2acf12ea5f78a13f6b526418f59465939f45e
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9b42c88bd431a5f3ee5fa5ee8ef889028f7943ebf2df6fec2b0aec982fdb90c0ffdc87338859147c2cff9f10541debcbec5e3f30b5d4963385d4faf00fe9c4e7
|
7
|
+
data.tar.gz: c3a2a4b4bd08c7aeda35552c7caf2abd352eb5b2c206135cc5d4853798948507078166b459bf22d42d33bfd79d9f67c44379a5fb6f61476e729030bbcbcbc0ec
|
data/.gitignore
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/spec/examples.txt
|
9
|
+
/test/tmp/
|
10
|
+
/test/version_tmp/
|
11
|
+
/tmp/
|
12
|
+
|
13
|
+
# Used by dotenv library to load environment variables.
|
14
|
+
# .env
|
15
|
+
|
16
|
+
## Specific to RubyMotion:
|
17
|
+
.dat*
|
18
|
+
.repl_history
|
19
|
+
build/
|
20
|
+
*.bridgesupport
|
21
|
+
build-iPhoneOS/
|
22
|
+
build-iPhoneSimulator/
|
23
|
+
|
24
|
+
## Specific to RubyMotion (use of CocoaPods):
|
25
|
+
#
|
26
|
+
# We recommend against adding the Pods directory to your .gitignore. However
|
27
|
+
# you should judge for yourself, the pros and cons are mentioned at:
|
28
|
+
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
|
29
|
+
#
|
30
|
+
# vendor/Pods/
|
31
|
+
|
32
|
+
## Documentation cache and generated files:
|
33
|
+
/.yardoc/
|
34
|
+
/_yardoc/
|
35
|
+
/doc/
|
36
|
+
/rdoc/
|
37
|
+
|
38
|
+
## Environment normalization:
|
39
|
+
/.bundle/
|
40
|
+
/vendor/bundle
|
41
|
+
/lib/bundler/man/
|
42
|
+
|
43
|
+
# for a library or gem, you might want to ignore these files since the code is
|
44
|
+
# intended to run in multiple environments; otherwise, check them in:
|
45
|
+
# Gemfile.lock
|
46
|
+
# .ruby-version
|
47
|
+
# .ruby-gemset
|
48
|
+
|
49
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
50
|
+
.rvmrc
|
51
|
+
|
52
|
+
*~
|
data/.ruby-gemset
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
athena-utils
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
3.0.0
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
aws-eventstream (1.2.0)
|
5
|
+
aws-partitions (1.555.0)
|
6
|
+
aws-sdk-athena (1.49.0)
|
7
|
+
aws-sdk-core (~> 3, >= 3.126.0)
|
8
|
+
aws-sigv4 (~> 1.1)
|
9
|
+
aws-sdk-core (3.126.2)
|
10
|
+
aws-eventstream (~> 1, >= 1.0.2)
|
11
|
+
aws-partitions (~> 1, >= 1.525.0)
|
12
|
+
aws-sigv4 (~> 1.1)
|
13
|
+
jmespath (~> 1.0)
|
14
|
+
aws-sdk-kms (1.54.0)
|
15
|
+
aws-sdk-core (~> 3, >= 3.126.0)
|
16
|
+
aws-sigv4 (~> 1.1)
|
17
|
+
aws-sdk-s3 (1.112.0)
|
18
|
+
aws-sdk-core (~> 3, >= 3.126.0)
|
19
|
+
aws-sdk-kms (~> 1)
|
20
|
+
aws-sigv4 (~> 1.4)
|
21
|
+
aws-sigv4 (1.4.0)
|
22
|
+
aws-eventstream (~> 1, >= 1.0.2)
|
23
|
+
csv-utils (0.3.14)
|
24
|
+
inheritance-helper
|
25
|
+
inheritance-helper (0.2.5)
|
26
|
+
jmespath (1.6.0)
|
27
|
+
|
28
|
+
PLATFORMS
|
29
|
+
x86_64-darwin-20
|
30
|
+
|
31
|
+
DEPENDENCIES
|
32
|
+
aws-sdk-athena
|
33
|
+
aws-sdk-s3
|
34
|
+
csv-utils
|
35
|
+
|
36
|
+
BUNDLED WITH
|
37
|
+
2.2.3
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2019 Douglas Youch
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
# AWS Athena Utils
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = 'athena-utils'
|
5
|
+
s.version = '0.1.0'
|
6
|
+
s.licenses = ['MIT']
|
7
|
+
s.summary = 'Athena Utils'
|
8
|
+
s.description = 'Tools for querying AWS Athena'
|
9
|
+
s.authors = ['Doug Youch']
|
10
|
+
s.email = 'dougyouch@gmail.com'
|
11
|
+
s.homepage = 'https://github.com/dougyouch/athena-utils'
|
12
|
+
s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
13
|
+
s.bindir = 'bin'
|
14
|
+
s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
15
|
+
|
16
|
+
s.add_runtime_dependency 'aws-sdk-athena'
|
17
|
+
s.add_runtime_dependency 'aws-sdk-s3'
|
18
|
+
s.add_runtime_dependency 'csv-utils'
|
19
|
+
end
|
data/bin/athena
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'athena-utils'
|
4
|
+
require 'optparse'
|
5
|
+
|
6
|
+
options = {
|
7
|
+
database: nil,
|
8
|
+
output_location: nil,
|
9
|
+
query: nil,
|
10
|
+
save: nil
|
11
|
+
}
|
12
|
+
OptionParser.new do |opts|
|
13
|
+
opts.banner = "Usage: athena [options]"
|
14
|
+
|
15
|
+
opts.on("-d", "--database DATABASE", "Athena DB") do |v|
|
16
|
+
options[:database] = v
|
17
|
+
end
|
18
|
+
|
19
|
+
opts.on("-o", "--output-location OUTPUT_LOCATION", "S3 output location for athena queries") do |v|
|
20
|
+
options[:output_location] = v
|
21
|
+
end
|
22
|
+
|
23
|
+
opts.on("-q", "--query QUERY", "SQL Query") do |v|
|
24
|
+
options[:query] = v
|
25
|
+
end
|
26
|
+
|
27
|
+
opts.on('-s', '--save FILE', 'Save query results to file') do |v|
|
28
|
+
options[:save] = v
|
29
|
+
end
|
30
|
+
end.parse!
|
31
|
+
|
32
|
+
raise('must specify a database') unless options[:database]
|
33
|
+
raise('must specify output location for athean queries') unless options[:output_location]
|
34
|
+
|
35
|
+
@athena = AthenaUtils::AthenaClient.new(options[:database], options[:output_location])
|
36
|
+
def athena
|
37
|
+
@athena
|
38
|
+
end
|
39
|
+
|
40
|
+
if options[:query]
|
41
|
+
@results = athena.query(options[:query])
|
42
|
+
def results
|
43
|
+
@results
|
44
|
+
end
|
45
|
+
|
46
|
+
if options[:save]
|
47
|
+
results.save(options[:save])
|
48
|
+
exit
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
require 'irb'
|
53
|
+
IRB.start
|
54
|
+
|
data/lib/athena-utils.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'aws-sdk-athena'
|
2
|
+
|
3
|
+
module AthenaUtils
|
4
|
+
class AthenaClient
|
5
|
+
# database is the name of the Athena DB
|
6
|
+
# output_location is the full S3 path to store the results of Athena queries
|
7
|
+
attr_reader :database,
|
8
|
+
:output_location
|
9
|
+
|
10
|
+
def initialize(database, output_location)
|
11
|
+
@database = database
|
12
|
+
@output_location = output_location
|
13
|
+
end
|
14
|
+
|
15
|
+
def aws_athena_client
|
16
|
+
@aws_athena_client ||= create_aws_athena_client
|
17
|
+
end
|
18
|
+
|
19
|
+
def create_aws_athena_client
|
20
|
+
Aws::Athena::Client.new
|
21
|
+
end
|
22
|
+
|
23
|
+
def query(query)
|
24
|
+
query_execution_id = query_async(query)
|
25
|
+
wait([query_execution_id])[query_execution_id]
|
26
|
+
end
|
27
|
+
|
28
|
+
def query_async(query)
|
29
|
+
response = aws_athena_client.start_query_execution(
|
30
|
+
query_string: query,
|
31
|
+
query_execution_context: {
|
32
|
+
database: database
|
33
|
+
},
|
34
|
+
result_configuration: {
|
35
|
+
output_location: output_location
|
36
|
+
}
|
37
|
+
)
|
38
|
+
|
39
|
+
response.query_execution_id
|
40
|
+
end
|
41
|
+
|
42
|
+
def wait(query_execution_ids)
|
43
|
+
results = {}
|
44
|
+
|
45
|
+
while results.size != query_execution_ids.size
|
46
|
+
query_execution_ids.each do |query_execution_id|
|
47
|
+
next if results.key?(query_execution_id)
|
48
|
+
|
49
|
+
query_status = aws_athena_client.get_query_execution(
|
50
|
+
query_execution_id: query_execution_id
|
51
|
+
)
|
52
|
+
|
53
|
+
case query_status[:query_execution][:status][:state]
|
54
|
+
when 'SUCCEEDED'
|
55
|
+
results[query_execution_id] = AthenaQueryResults.new(query_status)
|
56
|
+
when 'RUNNING',
|
57
|
+
'QUEUED'
|
58
|
+
# no-op
|
59
|
+
else
|
60
|
+
raise(AthenaQueryError.new("Query failed #{query_status}"))
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
sleep(3) if results.size != query_execution_ids.size
|
65
|
+
end
|
66
|
+
|
67
|
+
results
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'aws-sdk-s3'
|
2
|
+
require 'csv-utils'
|
3
|
+
|
4
|
+
module AthenaUtils
|
5
|
+
class AthenaQueryResults
|
6
|
+
include Enumerable
|
7
|
+
|
8
|
+
attr_reader :query_status
|
9
|
+
|
10
|
+
def initialize(query_status)
|
11
|
+
@query_status = query_status
|
12
|
+
end
|
13
|
+
|
14
|
+
def s3_url
|
15
|
+
query_status.query_execution.result_configuration.output_location
|
16
|
+
end
|
17
|
+
|
18
|
+
def s3_object
|
19
|
+
uri = URI(s3_url)
|
20
|
+
|
21
|
+
aws_s3_client.get_object(
|
22
|
+
{
|
23
|
+
bucket: uri.host,
|
24
|
+
key: uri.path[1..-1]
|
25
|
+
}
|
26
|
+
)
|
27
|
+
end
|
28
|
+
|
29
|
+
def save(file)
|
30
|
+
uri = URI(s3_url)
|
31
|
+
|
32
|
+
aws_s3_client.get_object(
|
33
|
+
{
|
34
|
+
bucket: uri.host,
|
35
|
+
key: uri.path[1..-1],
|
36
|
+
response_target: file
|
37
|
+
}
|
38
|
+
)
|
39
|
+
end
|
40
|
+
|
41
|
+
def aws_s3_client
|
42
|
+
@aws_s3_client ||= create_aws_s3_client
|
43
|
+
end
|
44
|
+
|
45
|
+
def create_aws_s3_client
|
46
|
+
Aws::S3::Client.new
|
47
|
+
end
|
48
|
+
|
49
|
+
def csv_iterator
|
50
|
+
@csv_iterator ||= CSVUtils::CSVIterator.new(CSV.new(s3_object.body))
|
51
|
+
end
|
52
|
+
|
53
|
+
def each(&block)
|
54
|
+
csv_iterator.each(&block)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
data/scripts/console
ADDED
metadata
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: athena-utils
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Doug Youch
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-02-21 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: aws-sdk-athena
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: aws-sdk-s3
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: csv-utils
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: Tools for querying AWS Athena
|
56
|
+
email: dougyouch@gmail.com
|
57
|
+
executables:
|
58
|
+
- athena
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- ".gitignore"
|
63
|
+
- ".ruby-gemset"
|
64
|
+
- ".ruby-version"
|
65
|
+
- Gemfile
|
66
|
+
- Gemfile.lock
|
67
|
+
- LICENSE.txt
|
68
|
+
- README.md
|
69
|
+
- athena-utils.gemspec
|
70
|
+
- bin/athena
|
71
|
+
- lib/athena-utils.rb
|
72
|
+
- lib/athena_utils/athena_client.rb
|
73
|
+
- lib/athena_utils/athena_query_results.rb
|
74
|
+
- scripts/console
|
75
|
+
homepage: https://github.com/dougyouch/athena-utils
|
76
|
+
licenses:
|
77
|
+
- MIT
|
78
|
+
metadata: {}
|
79
|
+
post_install_message:
|
80
|
+
rdoc_options: []
|
81
|
+
require_paths:
|
82
|
+
- lib
|
83
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: '0'
|
88
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
requirements: []
|
94
|
+
rubygems_version: 3.2.3
|
95
|
+
signing_key:
|
96
|
+
specification_version: 4
|
97
|
+
summary: Athena Utils
|
98
|
+
test_files: []
|