egis 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/egis.gemspec +30 -0
- data/lib/egis/aws_client_provider.rb +30 -0
- data/lib/egis/cartesian_product_generator.rb +17 -0
- data/lib/egis/client.rb +142 -0
- data/lib/egis/configuration.rb +9 -0
- data/lib/egis/database.rb +102 -0
- data/lib/egis/errors.rb +12 -0
- data/lib/egis/output_downloader.rb +21 -0
- data/lib/egis/output_parser.rb +24 -0
- data/lib/egis/partitions_generator.rb +55 -0
- data/lib/egis/query_output_location.rb +13 -0
- data/lib/egis/query_status.rb +76 -0
- data/lib/egis/s3_cleaner.rb +22 -0
- data/lib/egis/s3_location_parser.rb +14 -0
- data/lib/egis/standard_mode.rb +18 -0
- data/lib/egis/table.rb +163 -0
- data/lib/egis/table_data_wiper.rb +51 -0
- data/lib/egis/table_ddl_generator.rb +50 -0
- data/lib/egis/table_schema.rb +49 -0
- data/lib/egis/testing/testing_mode.rb +62 -0
- data/lib/egis/testing.rb +48 -0
- data/lib/egis/types/boolean_serializer.rb +53 -0
- data/lib/egis/types/default_serializer.rb +20 -0
- data/lib/egis/types/integer_serializer.rb +20 -0
- data/lib/egis/types/null_serializer.rb +36 -0
- data/lib/egis/types/string_serializer.rb +20 -0
- data/lib/egis/types/timestamp_serializer.rb +22 -0
- data/lib/egis/types.rb +30 -0
- data/lib/egis/version.rb +5 -0
- data/lib/egis.rb +62 -0
- metadata +106 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 1d2255a76aef464d0d8faee0c39be753e1a928133a25de5c4a86f553e51e92ca
|
4
|
+
data.tar.gz: f18ae28053651576ccf941bd42f5a9bf40db32fed3c1c2025e1bc987844199f8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9b143dbc650f8c02ba39f1404a2d9b4ebe36c49182a36b947a1d091079b1c94839320c2d05dd9a0db1516d82c9b03694244e4fa467898302e888addb234e1291
|
7
|
+
data.tar.gz: a1577d30cbfd63632dbd55052adf0f863f7a5f571e8a8f8a210649446acc786764d8bec6a15be7cd91742f4f7681bfc1d379a9099934f62b8a30379d12a4bd77
|
data/egis.gemspec
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
require 'egis/version'
|
6
|
+
|
7
|
+
Gem::Specification.new do |spec|
|
8
|
+
spec.name = 'egis'
|
9
|
+
spec.version = Egis::VERSION
|
10
|
+
spec.authors = ['Agnieszka Czereba', 'Marek Mateja']
|
11
|
+
spec.email = %w[agnieszka.czereba@u2i.com marek.mateja@u2i.com]
|
12
|
+
|
13
|
+
spec.summary = 'A handy wrapper for AWS Athena Ruby SDK.'
|
14
|
+
spec.homepage = 'https://github.com/u2i/egis'
|
15
|
+
spec.license = 'MIT'
|
16
|
+
|
17
|
+
spec.metadata['homepage_uri'] = spec.homepage
|
18
|
+
spec.metadata['source_code_uri'] = spec.homepage
|
19
|
+
spec.metadata['changelog_uri'] = 'https://github.com/u2i/egis/blob/master/CHANGELOG.md'
|
20
|
+
|
21
|
+
# Specify which files should be added to the gem when it is released.
|
22
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
23
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
24
|
+
`git ls-files -z egis.gemspec lib/`.split("\x0")
|
25
|
+
end
|
26
|
+
spec.require_paths = ['lib']
|
27
|
+
|
28
|
+
spec.add_dependency 'aws-sdk-athena', '~> 1.0'
|
29
|
+
spec.add_dependency 'aws-sdk-s3', '~> 1.0'
|
30
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'aws-sdk-s3'
|
4
|
+
require 'aws-sdk-athena'
|
5
|
+
|
6
|
+
module Egis
|
7
|
+
# @!visibility private
|
8
|
+
class AwsClientProvider
|
9
|
+
def s3_client
|
10
|
+
Aws::S3::Client.new(client_config)
|
11
|
+
end
|
12
|
+
|
13
|
+
def athena_client
|
14
|
+
Aws::Athena::Client.new(client_config)
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def client_config
|
20
|
+
configuration = Egis.configuration
|
21
|
+
|
22
|
+
config = {}
|
23
|
+
config[:region] = configuration.aws_region if configuration.aws_region
|
24
|
+
config[:access_key_id] = configuration.aws_access_key_id if configuration.aws_access_key_id
|
25
|
+
config[:secret_access_key] = configuration.aws_secret_access_key if configuration.aws_secret_access_key
|
26
|
+
config[:profile] = configuration.aws_profile if configuration.aws_profile
|
27
|
+
config
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Egis
|
4
|
+
# @!visibility private
|
5
|
+
class CartesianProductGenerator
|
6
|
+
def cartesian_product(values_by_key)
|
7
|
+
keys = values_by_key.keys
|
8
|
+
values = values_by_key.values
|
9
|
+
|
10
|
+
head, *tail = values
|
11
|
+
|
12
|
+
return keys.zip(head) unless tail
|
13
|
+
|
14
|
+
head.product(*tail).map { |vals| keys.zip(vals) }
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/egis/client.rb
ADDED
@@ -0,0 +1,142 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Egis
|
4
|
+
##
|
5
|
+
# The most fundamental {Egis} class. Provides an interface for executing Athena queries.
|
6
|
+
#
|
7
|
+
# See configuration instructions {Egis.configure}.
|
8
|
+
#
|
9
|
+
# @see Egis.configure
|
10
|
+
#
|
11
|
+
# @example Create client and execute asynchronous query
|
12
|
+
# client = Egis::Client.new
|
13
|
+
# status = client.execute_query('SELECT * FROM my_table;')
|
14
|
+
#
|
15
|
+
# while status.in_progress?
|
16
|
+
# # do something useful
|
17
|
+
# # ...
|
18
|
+
# status = client.query_status(status.id)
|
19
|
+
# end
|
20
|
+
#
|
21
|
+
# status.output_location.url # s3://my-bucket/result/path
|
22
|
+
#
|
23
|
+
# @example Execute synchronous query and fetch results
|
24
|
+
# status = client.execute_query('SELECT MAX(time), MIN(id) FROM my_table;', async: false)
|
25
|
+
# status.fetch_result(schema: [:timestamp, :int]) # [[2020-05-04 11:19:03 +0200, 7]]
|
26
|
+
#
|
27
|
+
class Client
|
28
|
+
QUERY_STATUS_MAPPING = {
|
29
|
+
'QUEUED' => Egis::QueryStatus::QUEUED,
|
30
|
+
'RUNNING' => Egis::QueryStatus::RUNNING,
|
31
|
+
'SUCCEEDED' => Egis::QueryStatus::FINISHED,
|
32
|
+
'FAILED' => Egis::QueryStatus::FAILED,
|
33
|
+
'CANCELLED' => Egis::QueryStatus::CANCELLED
|
34
|
+
}.freeze
|
35
|
+
|
36
|
+
DEFAULT_QUERY_STATUS_BACKOFF = ->(attempt) { 1.5**attempt - 1 }
|
37
|
+
|
38
|
+
private_constant :QUERY_STATUS_MAPPING, :DEFAULT_QUERY_STATUS_BACKOFF
|
39
|
+
|
40
|
+
def initialize(aws_client_provider: Egis::AwsClientProvider.new, s3_location_parser: Egis::S3LocationParser.new)
|
41
|
+
@aws_athena_client = aws_client_provider.athena_client
|
42
|
+
@s3_location_parser = s3_location_parser
|
43
|
+
@query_status_backoff = Egis.configuration.query_status_backoff || DEFAULT_QUERY_STATUS_BACKOFF
|
44
|
+
end
|
45
|
+
|
46
|
+
##
|
47
|
+
# Creates {Egis::Database} object with a given name. Executing it doesn't create Athena database yet.
|
48
|
+
#
|
49
|
+
# @param [String] database_name
|
50
|
+
# @return [Egis::Database]
|
51
|
+
|
52
|
+
def database(database_name)
|
53
|
+
Database.new(database_name, client: self)
|
54
|
+
end
|
55
|
+
|
56
|
+
##
|
57
|
+
# Executes Athena query. By default, queries are being executed asynchronously.
|
58
|
+
#
|
59
|
+
# @param [String] query SQL query to execute
|
60
|
+
# @param [Boolean] async Decide whether you want to run query asynchronously or block execution until it finishes
|
61
|
+
# @param [String] work_group Change Athena work group the query will be executed in.
|
62
|
+
# @param [String] database Run query in the context of a specific database (implicit table references are expected
|
63
|
+
# to be in given database).
|
64
|
+
# @param [String] output_location S3 url of the desired output location. By default, Athena uses location defined in
|
65
|
+
# by workgroup.
|
66
|
+
# @return [Egis::QueryStatus]
|
67
|
+
|
68
|
+
def execute_query(query, work_group: nil, database: nil, output_location: nil, async: true)
|
69
|
+
query_execution_id = aws_athena_client.start_query_execution(
|
70
|
+
query_execution_params(query, work_group, database, output_location)
|
71
|
+
).query_execution_id
|
72
|
+
|
73
|
+
return query_status(query_execution_id) if Egis.mode.async(async)
|
74
|
+
|
75
|
+
query_status = wait_for_query_to_finish(query_execution_id)
|
76
|
+
|
77
|
+
raise Egis::Errors::QueryExecutionError, query_status.message unless query_status.finished?
|
78
|
+
|
79
|
+
query_status
|
80
|
+
end
|
81
|
+
|
82
|
+
##
|
83
|
+
# Check the status of asynchronous query execution.
|
84
|
+
#
|
85
|
+
# @param [String] query_id Query id from {Egis::QueryStatus} returned by {#execute_query} method
|
86
|
+
# @return [Egis::QueryStatus]
|
87
|
+
|
88
|
+
def query_status(query_id)
|
89
|
+
resp = aws_athena_client.get_query_execution(query_execution_id: query_id)
|
90
|
+
|
91
|
+
query_execution = resp.query_execution
|
92
|
+
|
93
|
+
Egis::QueryStatus.new(
|
94
|
+
query_execution.query_execution_id,
|
95
|
+
QUERY_STATUS_MAPPING.fetch(query_execution.status.state),
|
96
|
+
query_execution.status.state_change_reason,
|
97
|
+
parse_output_location(query_execution)
|
98
|
+
)
|
99
|
+
end
|
100
|
+
|
101
|
+
private
|
102
|
+
|
103
|
+
attr_reader :aws_athena_client, :s3_location_parser, :query_status_backoff
|
104
|
+
|
105
|
+
def query_execution_params(query, work_group, database, output_location)
|
106
|
+
work_group_params = work_group || Egis.configuration.work_group
|
107
|
+
|
108
|
+
params = {query_string: query}
|
109
|
+
params[:work_group] = work_group_params if work_group_params
|
110
|
+
params[:query_execution_context] = {database: database_name(database)} if database
|
111
|
+
params[:result_configuration] = {output_location: translate_path(output_location)} if output_location
|
112
|
+
params
|
113
|
+
end
|
114
|
+
|
115
|
+
def wait_for_query_to_finish(query_execution_id)
|
116
|
+
attempt = 1
|
117
|
+
loop do
|
118
|
+
sleep(query_status_backoff.call(attempt))
|
119
|
+
status = query_status(query_execution_id)
|
120
|
+
return status unless status.queued? || status.running?
|
121
|
+
|
122
|
+
attempt += 1
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def parse_output_location(query_execution)
|
127
|
+
url = query_execution.result_configuration.output_location
|
128
|
+
|
129
|
+
bucket, path = s3_location_parser.parse_url(url)
|
130
|
+
|
131
|
+
QueryOutputLocation.new(url, bucket, path)
|
132
|
+
end
|
133
|
+
|
134
|
+
def translate_path(s3_url)
|
135
|
+
Egis.mode.s3_path(s3_url)
|
136
|
+
end
|
137
|
+
|
138
|
+
def database_name(name)
|
139
|
+
Egis.mode.database_name(name)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Egis
|
4
|
+
##
|
5
|
+
# Interface for database manipulation and querying.
|
6
|
+
#
|
7
|
+
# Extends the interface of {Egis::Client} but all the queries scheduled using {Egis::Database} are executed
|
8
|
+
# within the database's context. SQL table references without explicit database will implicitly refer to
|
9
|
+
# the database they are executed from.
|
10
|
+
#
|
11
|
+
# It is recommended to create database objects using {Egis::Client#database} method.
|
12
|
+
#
|
13
|
+
class Database
|
14
|
+
def initialize(database_name, client: Egis::Client.new, output_downloader: Egis::OutputDownloader.new)
|
15
|
+
@client = client
|
16
|
+
@database_name = database_name
|
17
|
+
@output_downloader = output_downloader
|
18
|
+
end
|
19
|
+
|
20
|
+
##
|
21
|
+
# Creates {Egis::Table} object. Executing it doesn't create Athena table yet.
|
22
|
+
#
|
23
|
+
# @param [String] table_name
|
24
|
+
# @param [Egis::TableSchema] table_schema
|
25
|
+
# @param [String] table_location S3 URL with table location (e.g. `s3://s3_bucket/table/location/`)
|
26
|
+
# @param [:tsv, :csv, :orc] format Table format (defaults to :tsv)
|
27
|
+
# @return [Egis::Table]
|
28
|
+
|
29
|
+
def table(table_name, table_schema, table_location, **options)
|
30
|
+
Table.new(self, table_name, table_schema, table_location, options: options)
|
31
|
+
end
|
32
|
+
|
33
|
+
##
|
34
|
+
# Creates database in Athena.
|
35
|
+
#
|
36
|
+
# @return [void]
|
37
|
+
|
38
|
+
def create
|
39
|
+
client.execute_query("CREATE DATABASE IF NOT EXISTS #{translate_name(database_name)};", async: false)
|
40
|
+
end
|
41
|
+
|
42
|
+
##
|
43
|
+
# The same as {#create} but raising error if it already exists.
|
44
|
+
#
|
45
|
+
# @return [void]
|
46
|
+
|
47
|
+
def create!
|
48
|
+
client.execute_query("CREATE DATABASE #{translate_name(database_name)};", async: false)
|
49
|
+
end
|
50
|
+
|
51
|
+
##
|
52
|
+
# Removes database in Athena.
|
53
|
+
#
|
54
|
+
# @return [void]
|
55
|
+
|
56
|
+
def drop
|
57
|
+
client.execute_query("DROP DATABASE IF EXISTS #{translate_name(database_name)} CASCADE;", async: false)
|
58
|
+
end
|
59
|
+
|
60
|
+
##
|
61
|
+
# The same as {#drop} but raising error if it the database does not exist.
|
62
|
+
#
|
63
|
+
# @return [void]
|
64
|
+
|
65
|
+
def drop!
|
66
|
+
client.execute_query("DROP DATABASE #{translate_name(database_name)} CASCADE;", async: false)
|
67
|
+
end
|
68
|
+
|
69
|
+
##
|
70
|
+
# (see Egis::Client#execute_query)
|
71
|
+
|
72
|
+
def execute_query(query, **options)
|
73
|
+
client.execute_query(query, **{database: database_name, **options})
|
74
|
+
end
|
75
|
+
|
76
|
+
##
|
77
|
+
# (see Egis::Client#query_status)
|
78
|
+
|
79
|
+
def query_status(query_id)
|
80
|
+
client.query_status(query_id)
|
81
|
+
end
|
82
|
+
|
83
|
+
##
|
84
|
+
# Checks whether database with such name exists in Athena.
|
85
|
+
#
|
86
|
+
# @return [Boolean]
|
87
|
+
|
88
|
+
def exists?
|
89
|
+
query_status = client.execute_query("SHOW DATABASES LIKE '#{database_name}';", async: false)
|
90
|
+
parsed_result = output_downloader.download(query_status.output_location)
|
91
|
+
parsed_result.flatten.include?(database_name)
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
attr_reader :client, :database_name, :output_downloader
|
97
|
+
|
98
|
+
def translate_name(name)
|
99
|
+
Egis.mode.database_name(name)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
data/lib/egis/errors.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Egis
|
4
|
+
module Errors
|
5
|
+
class Error < StandardError; end
|
6
|
+
|
7
|
+
class UnsupportedTableFormat < Error; end
|
8
|
+
class QueryExecutionError < Error; end
|
9
|
+
class PartitionError < Error; end
|
10
|
+
class TypeError < Error; end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
|
5
|
+
module Egis
|
6
|
+
# @!visibility private
|
7
|
+
class OutputDownloader
|
8
|
+
def initialize(aws_client_provider: Egis::AwsClientProvider.new)
|
9
|
+
@s3_client = aws_client_provider.s3_client
|
10
|
+
end
|
11
|
+
|
12
|
+
def download(output_location)
|
13
|
+
query_result = s3_client.get_object(bucket: output_location.bucket, key: output_location.key)
|
14
|
+
CSV.parse(query_result.body.read)
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
attr_reader :s3_client
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Egis
|
4
|
+
# @!visibility private
|
5
|
+
class OutputParser
|
6
|
+
def parse(output, types)
|
7
|
+
header, *content = output
|
8
|
+
|
9
|
+
serializers = serializers(header, types)
|
10
|
+
|
11
|
+
content.map do |row|
|
12
|
+
row.zip(serializers).map do |string, serializer|
|
13
|
+
serializer.load(string)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def serializers(row, types)
|
21
|
+
row.zip(types).map { |_, type| type ? Types.serializer(type) : Types::DefaultSerializer.new }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Egis
|
4
|
+
# @!visibility private
|
5
|
+
class PartitionsGenerator
|
6
|
+
def initialize(cartesian_product_generator: Egis::CartesianProductGenerator.new)
|
7
|
+
@cartesian_product_generator = cartesian_product_generator
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_sql(table_name, values_by_partition, permissive: false)
|
11
|
+
validate_partition_values(values_by_partition)
|
12
|
+
|
13
|
+
<<~SQL
|
14
|
+
ALTER TABLE #{table_name} ADD #{permissive_statement(permissive)}
|
15
|
+
#{partitions_definition(values_by_partition)};
|
16
|
+
SQL
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
attr_reader :cartesian_product_generator
|
22
|
+
|
23
|
+
def validate_partition_values(values_by_partition)
|
24
|
+
raise Errors::PartitionError, 'Partition value(s) missing' if partition_values_missing?(values_by_partition)
|
25
|
+
end
|
26
|
+
|
27
|
+
def partition_values_missing?(values_by_partition)
|
28
|
+
values_by_partition.nil? || values_by_partition.empty? || values_by_partition.values.any?(&:empty?)
|
29
|
+
end
|
30
|
+
|
31
|
+
def permissive_statement(permissive)
|
32
|
+
'IF NOT EXISTS' if permissive
|
33
|
+
end
|
34
|
+
|
35
|
+
def partitions_definition(values_by_partition)
|
36
|
+
cartesian_product_generator.cartesian_product(values_by_partition).
|
37
|
+
map { |partition_values_combination| partition_values_clause(partition_values_combination) }.
|
38
|
+
join("\n")
|
39
|
+
end
|
40
|
+
|
41
|
+
def partition_values_clause(partition_values_combination)
|
42
|
+
"PARTITION (#{partition_values(partition_values_combination).join(', ')})"
|
43
|
+
end
|
44
|
+
|
45
|
+
def partition_values(partition_values_combination)
|
46
|
+
partition_values_combination.map do |partition_name, value|
|
47
|
+
if value.is_a?(String)
|
48
|
+
"#{partition_name} = '#{value}'"
|
49
|
+
else
|
50
|
+
"#{partition_name} = #{value}"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Egis
|
4
|
+
##
|
5
|
+
# @!attribute [r] url
|
6
|
+
# @return [String] Query output file's URL
|
7
|
+
# @!attribute [r] bucket
|
8
|
+
# @return [String] Query output's S3 bucket
|
9
|
+
# @!attribute [r] key
|
10
|
+
# @return [String] Query output's S3 path
|
11
|
+
|
12
|
+
QueryOutputLocation = Struct.new(:url, :bucket, :key)
|
13
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Egis
|
4
|
+
##
|
5
|
+
# @!attribute [r] id
|
6
|
+
# @return [String] Athena query execution ID
|
7
|
+
# @!attribute [r] status
|
8
|
+
# @return [:queued, :running, :finished, :failed, :cancelled]
|
9
|
+
# @!attribute [r] message
|
10
|
+
# @return [String]
|
11
|
+
# @!attribute [r] output_location
|
12
|
+
# @return [Egis::OutputLocation]
|
13
|
+
#
|
14
|
+
class QueryStatus
|
15
|
+
QUEUED = :queued
|
16
|
+
RUNNING = :running
|
17
|
+
FINISHED = :finished
|
18
|
+
FAILED = :failed
|
19
|
+
CANCELLED = :cancelled
|
20
|
+
|
21
|
+
STATUSES = [QUEUED, RUNNING, FINISHED, FAILED, CANCELLED].freeze
|
22
|
+
|
23
|
+
attr_reader :id, :status, :message, :output_location
|
24
|
+
|
25
|
+
def initialize(id, status, message, output_location,
|
26
|
+
output_downloader: Egis::OutputDownloader.new,
|
27
|
+
output_parser: Egis::OutputParser.new)
|
28
|
+
raise ArgumentError, "Unsupported status #{status}" unless STATUSES.include?(status)
|
29
|
+
|
30
|
+
@id = id
|
31
|
+
@status = status
|
32
|
+
@message = message
|
33
|
+
@output_location = output_location
|
34
|
+
@output_downloader = output_downloader
|
35
|
+
@output_parser = output_parser
|
36
|
+
end
|
37
|
+
|
38
|
+
def finished?
|
39
|
+
status == FINISHED
|
40
|
+
end
|
41
|
+
|
42
|
+
def failed?
|
43
|
+
status == FAILED
|
44
|
+
end
|
45
|
+
|
46
|
+
def queued?
|
47
|
+
status == QUEUED
|
48
|
+
end
|
49
|
+
|
50
|
+
def running?
|
51
|
+
status == RUNNING
|
52
|
+
end
|
53
|
+
|
54
|
+
def in_progress?
|
55
|
+
[RUNNING, QUEUED].include?(status)
|
56
|
+
end
|
57
|
+
|
58
|
+
##
|
59
|
+
# Download query result.
|
60
|
+
#
|
61
|
+
# By default, Egis will just parse output CSV and return array of string arrays. Additionally, you
|
62
|
+
# can pass expected query result column types to parse them into Ruby objects accordingly.
|
63
|
+
#
|
64
|
+
# @param [Array] schema Array with expected query column types
|
65
|
+
# @return [Array] Array of row values
|
66
|
+
|
67
|
+
def fetch_result(schema: [])
|
68
|
+
output = output_downloader.download(output_location)
|
69
|
+
output_parser.parse(output, schema)
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
attr_reader :output_downloader, :output_parser
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Egis
|
4
|
+
# @!visibility private
|
5
|
+
class S3Cleaner
|
6
|
+
def initialize(aws_client_provider: Egis::AwsClientProvider.new)
|
7
|
+
@s3_client = aws_client_provider.s3_client
|
8
|
+
end
|
9
|
+
|
10
|
+
def delete(bucket, prefix)
|
11
|
+
prefix_contents = s3_client.list_objects_v2(bucket: bucket, prefix: prefix).contents
|
12
|
+
return if prefix_contents.empty?
|
13
|
+
|
14
|
+
objects_to_remove = prefix_contents.map { |content| {key: content.key} }
|
15
|
+
s3_client.delete_objects(bucket: bucket, delete: {objects: objects_to_remove})
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
attr_reader :s3_client
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Egis
|
4
|
+
# @!visibility private
|
5
|
+
class S3LocationParser
|
6
|
+
S3_URL_PATTERN = %r{^s3://(?<bucket>\S+?)/(?<key>\S+)$}.freeze
|
7
|
+
|
8
|
+
def parse_url(url)
|
9
|
+
matched_data = S3_URL_PATTERN.match(url)
|
10
|
+
|
11
|
+
[matched_data['bucket'], matched_data['key']]
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|