athena-utils 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +0 -1
- data/README.md +2 -0
- data/athena-utils.gemspec +1 -2
- data/bin/athena +1 -1
- data/lib/athena_utils/athena_client.rb +21 -3
- data/lib/athena_utils/athena_query_results.rb +16 -13
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f60738b7ecb0c8155fbdf4f85e0bc0371905840808af83eace67328b9a65177b
|
4
|
+
data.tar.gz: 8055dfa958fe0e4a3d8d7364a75e7a6ee500ea32316677472c80c20525c47d82
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aeccd09b1b52e06d5c4f7da44ae6b0538f2c779e959d22a2aa3b117709da72c463d409fa640af51208f1e5fc8fecb2cb8e891a45868f4ac9131f01202fbe5b24
|
7
|
+
data.tar.gz: 9485169a4b35602f55b7cf60321cc4629a2798b7380f6ec3d8edf4045c03debe20c362f520136fbd858726d65af15c1bf6b4c04cba16473c8dda4d39c6d1d7ce
|
data/Gemfile
CHANGED
data/README.md
CHANGED
data/athena-utils.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = 'athena-utils'
|
5
|
-
s.version = '0.1.
|
5
|
+
s.version = '0.1.1'
|
6
6
|
s.licenses = ['MIT']
|
7
7
|
s.summary = 'Athena Utils'
|
8
8
|
s.description = 'Tools for querying AWS Athena'
|
@@ -15,5 +15,4 @@ Gem::Specification.new do |s|
|
|
15
15
|
|
16
16
|
s.add_runtime_dependency 'aws-sdk-athena'
|
17
17
|
s.add_runtime_dependency 'aws-sdk-s3'
|
18
|
-
s.add_runtime_dependency 'csv-utils'
|
19
18
|
end
|
data/bin/athena
CHANGED
@@ -2,14 +2,23 @@ require 'aws-sdk-athena'
|
|
2
2
|
|
3
3
|
module AthenaUtils
|
4
4
|
class AthenaClient
|
5
|
+
DEFAULT_WAIT_TIME = 3 # seconds
|
6
|
+
|
5
7
|
# database is the name of the Athena DB
|
6
8
|
# output_location is the full S3 path to store the results of Athena queries
|
7
9
|
attr_reader :database,
|
8
10
|
:output_location
|
9
11
|
|
10
|
-
|
12
|
+
# wait_time is time to wait before checking query results again
|
13
|
+
attr_accessor :wait_time
|
14
|
+
|
15
|
+
attr_writer :aws_athena_client,
|
16
|
+
:aws_s3_client
|
17
|
+
|
18
|
+
def initialize(database, output_location, wait_time = DEFAULT_WAIT_TIME)
|
11
19
|
@database = database
|
12
20
|
@output_location = output_location
|
21
|
+
@wait_time = wait_time
|
13
22
|
end
|
14
23
|
|
15
24
|
def aws_athena_client
|
@@ -20,6 +29,14 @@ module AthenaUtils
|
|
20
29
|
Aws::Athena::Client.new
|
21
30
|
end
|
22
31
|
|
32
|
+
def aws_s3_client
|
33
|
+
@aws_s3_client ||= create_aws_s3_client
|
34
|
+
end
|
35
|
+
|
36
|
+
def create_aws_s3_client
|
37
|
+
Aws::S3::Client.new
|
38
|
+
end
|
39
|
+
|
23
40
|
def query(query)
|
24
41
|
query_execution_id = query_async(query)
|
25
42
|
wait([query_execution_id])[query_execution_id]
|
@@ -52,16 +69,17 @@ module AthenaUtils
|
|
52
69
|
|
53
70
|
case query_status[:query_execution][:status][:state]
|
54
71
|
when 'SUCCEEDED'
|
55
|
-
results[query_execution_id] = AthenaQueryResults.new(query_status)
|
72
|
+
results[query_execution_id] = AthenaQueryResults.new(query_status, aws_s3_client)
|
56
73
|
when 'RUNNING',
|
57
74
|
'QUEUED'
|
58
75
|
# no-op
|
76
|
+
next
|
59
77
|
else
|
60
78
|
raise(AthenaQueryError.new("Query failed #{query_status}"))
|
61
79
|
end
|
62
80
|
end
|
63
81
|
|
64
|
-
sleep(
|
82
|
+
sleep(wait_time) if results.size != query_execution_ids.size
|
65
83
|
end
|
66
84
|
|
67
85
|
results
|
@@ -1,14 +1,16 @@
|
|
1
1
|
require 'aws-sdk-s3'
|
2
|
-
require 'csv
|
2
|
+
require 'csv'
|
3
3
|
|
4
4
|
module AthenaUtils
|
5
5
|
class AthenaQueryResults
|
6
6
|
include Enumerable
|
7
7
|
|
8
|
-
attr_reader :query_status
|
8
|
+
attr_reader :query_status,
|
9
|
+
:aws_s3_client
|
9
10
|
|
10
|
-
def initialize(query_status)
|
11
|
+
def initialize(query_status, aws_s3_client)
|
11
12
|
@query_status = query_status
|
13
|
+
@aws_s3_client = aws_s3_client
|
12
14
|
end
|
13
15
|
|
14
16
|
def s3_url
|
@@ -38,20 +40,21 @@ module AthenaUtils
|
|
38
40
|
)
|
39
41
|
end
|
40
42
|
|
41
|
-
def
|
42
|
-
@
|
43
|
+
def csv
|
44
|
+
@csv ||= CSV.new(s3_object.body)
|
43
45
|
end
|
44
46
|
|
45
|
-
def
|
46
|
-
|
47
|
+
def headers
|
48
|
+
csv.rewind
|
49
|
+
csv.shift
|
47
50
|
end
|
48
51
|
|
49
|
-
def
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
52
|
+
def each
|
53
|
+
csv.rewind
|
54
|
+
headers = csv.shift
|
55
|
+
while (row = csv.shift)
|
56
|
+
yield Hash[headers.zip(row)]
|
57
|
+
end
|
55
58
|
end
|
56
59
|
end
|
57
60
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: athena-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Doug Youch
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-02-
|
11
|
+
date: 2022-02-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aws-sdk-athena
|
@@ -38,20 +38,6 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: csv-utils
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
41
|
description: Tools for querying AWS Athena
|
56
42
|
email: dougyouch@gmail.com
|
57
43
|
executables:
|