egis 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/egis.gemspec +30 -0
- data/lib/egis/aws_client_provider.rb +30 -0
- data/lib/egis/cartesian_product_generator.rb +17 -0
- data/lib/egis/client.rb +142 -0
- data/lib/egis/configuration.rb +9 -0
- data/lib/egis/database.rb +102 -0
- data/lib/egis/errors.rb +12 -0
- data/lib/egis/output_downloader.rb +21 -0
- data/lib/egis/output_parser.rb +24 -0
- data/lib/egis/partitions_generator.rb +55 -0
- data/lib/egis/query_output_location.rb +13 -0
- data/lib/egis/query_status.rb +76 -0
- data/lib/egis/s3_cleaner.rb +22 -0
- data/lib/egis/s3_location_parser.rb +14 -0
- data/lib/egis/standard_mode.rb +18 -0
- data/lib/egis/table.rb +163 -0
- data/lib/egis/table_data_wiper.rb +51 -0
- data/lib/egis/table_ddl_generator.rb +50 -0
- data/lib/egis/table_schema.rb +49 -0
- data/lib/egis/testing/testing_mode.rb +62 -0
- data/lib/egis/testing.rb +48 -0
- data/lib/egis/types/boolean_serializer.rb +53 -0
- data/lib/egis/types/default_serializer.rb +20 -0
- data/lib/egis/types/integer_serializer.rb +20 -0
- data/lib/egis/types/null_serializer.rb +36 -0
- data/lib/egis/types/string_serializer.rb +20 -0
- data/lib/egis/types/timestamp_serializer.rb +22 -0
- data/lib/egis/types.rb +30 -0
- data/lib/egis/version.rb +5 -0
- data/lib/egis.rb +62 -0
- metadata +106 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 1d2255a76aef464d0d8faee0c39be753e1a928133a25de5c4a86f553e51e92ca
|
4
|
+
data.tar.gz: f18ae28053651576ccf941bd42f5a9bf40db32fed3c1c2025e1bc987844199f8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9b143dbc650f8c02ba39f1404a2d9b4ebe36c49182a36b947a1d091079b1c94839320c2d05dd9a0db1516d82c9b03694244e4fa467898302e888addb234e1291
|
7
|
+
data.tar.gz: a1577d30cbfd63632dbd55052adf0f863f7a5f571e8a8f8a210649446acc786764d8bec6a15be7cd91742f4f7681bfc1d379a9099934f62b8a30379d12a4bd77
|
data/egis.gemspec
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
require 'egis/version'
|
6
|
+
|
7
|
+
Gem::Specification.new do |spec|
|
8
|
+
spec.name = 'egis'
|
9
|
+
spec.version = Egis::VERSION
|
10
|
+
spec.authors = ['Agnieszka Czereba', 'Marek Mateja']
|
11
|
+
spec.email = %w[agnieszka.czereba@u2i.com marek.mateja@u2i.com]
|
12
|
+
|
13
|
+
spec.summary = 'A handy wrapper for AWS Athena Ruby SDK.'
|
14
|
+
spec.homepage = 'https://github.com/u2i/egis'
|
15
|
+
spec.license = 'MIT'
|
16
|
+
|
17
|
+
spec.metadata['homepage_uri'] = spec.homepage
|
18
|
+
spec.metadata['source_code_uri'] = spec.homepage
|
19
|
+
spec.metadata['changelog_uri'] = 'https://github.com/u2i/egis/blob/master/CHANGELOG.md'
|
20
|
+
|
21
|
+
# Specify which files should be added to the gem when it is released.
|
22
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
23
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
24
|
+
`git ls-files -z egis.gemspec lib/`.split("\x0")
|
25
|
+
end
|
26
|
+
spec.require_paths = ['lib']
|
27
|
+
|
28
|
+
spec.add_dependency 'aws-sdk-athena', '~> 1.0'
|
29
|
+
spec.add_dependency 'aws-sdk-s3', '~> 1.0'
|
30
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'aws-sdk-s3'
|
4
|
+
require 'aws-sdk-athena'
|
5
|
+
|
6
|
+
module Egis
|
7
|
+
# @!visibility private
|
8
|
+
class AwsClientProvider
|
9
|
+
def s3_client
|
10
|
+
Aws::S3::Client.new(client_config)
|
11
|
+
end
|
12
|
+
|
13
|
+
def athena_client
|
14
|
+
Aws::Athena::Client.new(client_config)
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def client_config
|
20
|
+
configuration = Egis.configuration
|
21
|
+
|
22
|
+
config = {}
|
23
|
+
config[:region] = configuration.aws_region if configuration.aws_region
|
24
|
+
config[:access_key_id] = configuration.aws_access_key_id if configuration.aws_access_key_id
|
25
|
+
config[:secret_access_key] = configuration.aws_secret_access_key if configuration.aws_secret_access_key
|
26
|
+
config[:profile] = configuration.aws_profile if configuration.aws_profile
|
27
|
+
config
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Egis
|
4
|
+
# @!visibility private
|
5
|
+
class CartesianProductGenerator
|
6
|
+
def cartesian_product(values_by_key)
|
7
|
+
keys = values_by_key.keys
|
8
|
+
values = values_by_key.values
|
9
|
+
|
10
|
+
head, *tail = values
|
11
|
+
|
12
|
+
return keys.zip(head) unless tail
|
13
|
+
|
14
|
+
head.product(*tail).map { |vals| keys.zip(vals) }
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/egis/client.rb
ADDED
@@ -0,0 +1,142 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Egis
|
4
|
+
##
|
5
|
+
# The most fundamental {Egis} class. Provides an interface for executing Athena queries.
|
6
|
+
#
|
7
|
+
# See configuration instructions {Egis.configure}.
|
8
|
+
#
|
9
|
+
# @see Egis.configure
|
10
|
+
#
|
11
|
+
# @example Create client and execute asynchronous query
|
12
|
+
# client = Egis::Client.new
|
13
|
+
# status = client.execute_query('SELECT * FROM my_table;')
|
14
|
+
#
|
15
|
+
# while status.in_progress?
|
16
|
+
# # do something useful
|
17
|
+
# # ...
|
18
|
+
# status = client.query_status(status.id)
|
19
|
+
# end
|
20
|
+
#
|
21
|
+
# status.output_location.url # s3://my-bucket/result/path
|
22
|
+
#
|
23
|
+
# @example Execute synchronous query and fetch results
|
24
|
+
# status = client.execute_query('SELECT MAX(time), MIN(id) FROM my_table;', async: false)
|
25
|
+
# status.fetch_result(schema: [:timestamp, :int]) # [[2020-05-04 11:19:03 +0200, 7]]
|
26
|
+
#
|
27
|
+
class Client
|
28
|
+
QUERY_STATUS_MAPPING = {
|
29
|
+
'QUEUED' => Egis::QueryStatus::QUEUED,
|
30
|
+
'RUNNING' => Egis::QueryStatus::RUNNING,
|
31
|
+
'SUCCEEDED' => Egis::QueryStatus::FINISHED,
|
32
|
+
'FAILED' => Egis::QueryStatus::FAILED,
|
33
|
+
'CANCELLED' => Egis::QueryStatus::CANCELLED
|
34
|
+
}.freeze
|
35
|
+
|
36
|
+
DEFAULT_QUERY_STATUS_BACKOFF = ->(attempt) { 1.5**attempt - 1 }
|
37
|
+
|
38
|
+
private_constant :QUERY_STATUS_MAPPING, :DEFAULT_QUERY_STATUS_BACKOFF
|
39
|
+
|
40
|
+
def initialize(aws_client_provider: Egis::AwsClientProvider.new, s3_location_parser: Egis::S3LocationParser.new)
|
41
|
+
@aws_athena_client = aws_client_provider.athena_client
|
42
|
+
@s3_location_parser = s3_location_parser
|
43
|
+
@query_status_backoff = Egis.configuration.query_status_backoff || DEFAULT_QUERY_STATUS_BACKOFF
|
44
|
+
end
|
45
|
+
|
46
|
+
##
|
47
|
+
# Creates {Egis::Database} object with a given name. Executing it doesn't create Athena database yet.
|
48
|
+
#
|
49
|
+
# @param [String] database_name
|
50
|
+
# @return [Egis::Database]
|
51
|
+
|
52
|
+
def database(database_name)
|
53
|
+
Database.new(database_name, client: self)
|
54
|
+
end
|
55
|
+
|
56
|
+
##
|
57
|
+
# Executes Athena query. By default, queries are being executed asynchronously.
|
58
|
+
#
|
59
|
+
# @param [String] query SQL query to execute
|
60
|
+
# @param [Boolean] async Decide whether you want to run query asynchronously or block execution until it finishes
|
61
|
+
# @param [String] work_group Change Athena work group the query will be executed in.
|
62
|
+
# @param [String] database Run query in the context of a specific database (implicit table references are expected
|
63
|
+
# to be in given database).
|
64
|
+
# @param [String] output_location S3 url of the desired output location. By default, Athena uses location defined in
|
65
|
+
# by workgroup.
|
66
|
+
# @return [Egis::QueryStatus]
|
67
|
+
|
68
|
+
def execute_query(query, work_group: nil, database: nil, output_location: nil, async: true)
|
69
|
+
query_execution_id = aws_athena_client.start_query_execution(
|
70
|
+
query_execution_params(query, work_group, database, output_location)
|
71
|
+
).query_execution_id
|
72
|
+
|
73
|
+
return query_status(query_execution_id) if Egis.mode.async(async)
|
74
|
+
|
75
|
+
query_status = wait_for_query_to_finish(query_execution_id)
|
76
|
+
|
77
|
+
raise Egis::Errors::QueryExecutionError, query_status.message unless query_status.finished?
|
78
|
+
|
79
|
+
query_status
|
80
|
+
end
|
81
|
+
|
82
|
+
##
|
83
|
+
# Check the status of asynchronous query execution.
|
84
|
+
#
|
85
|
+
# @param [String] query_id Query id from {Egis::QueryStatus} returned by {#execute_query} method
|
86
|
+
# @return [Egis::QueryStatus]
|
87
|
+
|
88
|
+
def query_status(query_id)
|
89
|
+
resp = aws_athena_client.get_query_execution(query_execution_id: query_id)
|
90
|
+
|
91
|
+
query_execution = resp.query_execution
|
92
|
+
|
93
|
+
Egis::QueryStatus.new(
|
94
|
+
query_execution.query_execution_id,
|
95
|
+
QUERY_STATUS_MAPPING.fetch(query_execution.status.state),
|
96
|
+
query_execution.status.state_change_reason,
|
97
|
+
parse_output_location(query_execution)
|
98
|
+
)
|
99
|
+
end
|
100
|
+
|
101
|
+
private
|
102
|
+
|
103
|
+
attr_reader :aws_athena_client, :s3_location_parser, :query_status_backoff
|
104
|
+
|
105
|
+
def query_execution_params(query, work_group, database, output_location)
|
106
|
+
work_group_params = work_group || Egis.configuration.work_group
|
107
|
+
|
108
|
+
params = {query_string: query}
|
109
|
+
params[:work_group] = work_group_params if work_group_params
|
110
|
+
params[:query_execution_context] = {database: database_name(database)} if database
|
111
|
+
params[:result_configuration] = {output_location: translate_path(output_location)} if output_location
|
112
|
+
params
|
113
|
+
end
|
114
|
+
|
115
|
+
def wait_for_query_to_finish(query_execution_id)
|
116
|
+
attempt = 1
|
117
|
+
loop do
|
118
|
+
sleep(query_status_backoff.call(attempt))
|
119
|
+
status = query_status(query_execution_id)
|
120
|
+
return status unless status.queued? || status.running?
|
121
|
+
|
122
|
+
attempt += 1
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def parse_output_location(query_execution)
|
127
|
+
url = query_execution.result_configuration.output_location
|
128
|
+
|
129
|
+
bucket, path = s3_location_parser.parse_url(url)
|
130
|
+
|
131
|
+
QueryOutputLocation.new(url, bucket, path)
|
132
|
+
end
|
133
|
+
|
134
|
+
def translate_path(s3_url)
|
135
|
+
Egis.mode.s3_path(s3_url)
|
136
|
+
end
|
137
|
+
|
138
|
+
def database_name(name)
|
139
|
+
Egis.mode.database_name(name)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Egis
|
4
|
+
##
|
5
|
+
# Interface for database manipulation and querying.
|
6
|
+
#
|
7
|
+
# Extends the interface of {Egis::Client} but all the queries scheduled using {Egis::Database} are executed
|
8
|
+
# within the database's context. SQL table references without explicit database will implicitly refer to
|
9
|
+
# the database they are executed from.
|
10
|
+
#
|
11
|
+
# It is recommended to create database objects using {Egis::Client#database} method.
|
12
|
+
#
|
13
|
+
class Database
|
14
|
+
def initialize(database_name, client: Egis::Client.new, output_downloader: Egis::OutputDownloader.new)
|
15
|
+
@client = client
|
16
|
+
@database_name = database_name
|
17
|
+
@output_downloader = output_downloader
|
18
|
+
end
|
19
|
+
|
20
|
+
##
|
21
|
+
# Creates {Egis::Table} object. Executing it doesn't create Athena table yet.
|
22
|
+
#
|
23
|
+
# @param [String] table_name
|
24
|
+
# @param [Egis::TableSchema] table_schema
|
25
|
+
# @param [String] table_location S3 URL with table location (e.g. `s3://s3_bucket/table/location/`)
|
26
|
+
# @param [:tsv, :csv, :orc] format Table format (defaults to :tsv)
|
27
|
+
# @return [Egis::Table]
|
28
|
+
|
29
|
+
def table(table_name, table_schema, table_location, **options)
|
30
|
+
Table.new(self, table_name, table_schema, table_location, options: options)
|
31
|
+
end
|
32
|
+
|
33
|
+
##
|
34
|
+
# Creates database in Athena.
|
35
|
+
#
|
36
|
+
# @return [void]
|
37
|
+
|
38
|
+
def create
|
39
|
+
client.execute_query("CREATE DATABASE IF NOT EXISTS #{translate_name(database_name)};", async: false)
|
40
|
+
end
|
41
|
+
|
42
|
+
##
|
43
|
+
# The same as {#create} but raising error if it already exists.
|
44
|
+
#
|
45
|
+
# @return [void]
|
46
|
+
|
47
|
+
def create!
|
48
|
+
client.execute_query("CREATE DATABASE #{translate_name(database_name)};", async: false)
|
49
|
+
end
|
50
|
+
|
51
|
+
##
|
52
|
+
# Removes database in Athena.
|
53
|
+
#
|
54
|
+
# @return [void]
|
55
|
+
|
56
|
+
def drop
|
57
|
+
client.execute_query("DROP DATABASE IF EXISTS #{translate_name(database_name)} CASCADE;", async: false)
|
58
|
+
end
|
59
|
+
|
60
|
+
##
|
61
|
+
# The same as {#drop} but raising error if it the database does not exist.
|
62
|
+
#
|
63
|
+
# @return [void]
|
64
|
+
|
65
|
+
def drop!
|
66
|
+
client.execute_query("DROP DATABASE #{translate_name(database_name)} CASCADE;", async: false)
|
67
|
+
end
|
68
|
+
|
69
|
+
##
|
70
|
+
# (see Egis::Client#execute_query)
|
71
|
+
|
72
|
+
def execute_query(query, **options)
|
73
|
+
client.execute_query(query, **{database: database_name, **options})
|
74
|
+
end
|
75
|
+
|
76
|
+
##
|
77
|
+
# (see Egis::Client#query_status)
|
78
|
+
|
79
|
+
def query_status(query_id)
|
80
|
+
client.query_status(query_id)
|
81
|
+
end
|
82
|
+
|
83
|
+
##
|
84
|
+
# Checks whether database with such name exists in Athena.
|
85
|
+
#
|
86
|
+
# @return [Boolean]
|
87
|
+
|
88
|
+
def exists?
|
89
|
+
query_status = client.execute_query("SHOW DATABASES LIKE '#{database_name}';", async: false)
|
90
|
+
parsed_result = output_downloader.download(query_status.output_location)
|
91
|
+
parsed_result.flatten.include?(database_name)
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
attr_reader :client, :database_name, :output_downloader
|
97
|
+
|
98
|
+
def translate_name(name)
|
99
|
+
Egis.mode.database_name(name)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
data/lib/egis/errors.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Egis
|
4
|
+
module Errors
|
5
|
+
class Error < StandardError; end
|
6
|
+
|
7
|
+
class UnsupportedTableFormat < Error; end
|
8
|
+
class QueryExecutionError < Error; end
|
9
|
+
class PartitionError < Error; end
|
10
|
+
class TypeError < Error; end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
|
5
|
+
module Egis
|
6
|
+
# @!visibility private
|
7
|
+
class OutputDownloader
|
8
|
+
def initialize(aws_client_provider: Egis::AwsClientProvider.new)
|
9
|
+
@s3_client = aws_client_provider.s3_client
|
10
|
+
end
|
11
|
+
|
12
|
+
def download(output_location)
|
13
|
+
query_result = s3_client.get_object(bucket: output_location.bucket, key: output_location.key)
|
14
|
+
CSV.parse(query_result.body.read)
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
attr_reader :s3_client
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Egis
|
4
|
+
# @!visibility private
|
5
|
+
class OutputParser
|
6
|
+
def parse(output, types)
|
7
|
+
header, *content = output
|
8
|
+
|
9
|
+
serializers = serializers(header, types)
|
10
|
+
|
11
|
+
content.map do |row|
|
12
|
+
row.zip(serializers).map do |string, serializer|
|
13
|
+
serializer.load(string)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def serializers(row, types)
|
21
|
+
row.zip(types).map { |_, type| type ? Types.serializer(type) : Types::DefaultSerializer.new }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Egis
|
4
|
+
# @!visibility private
|
5
|
+
class PartitionsGenerator
|
6
|
+
def initialize(cartesian_product_generator: Egis::CartesianProductGenerator.new)
|
7
|
+
@cartesian_product_generator = cartesian_product_generator
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_sql(table_name, values_by_partition, permissive: false)
|
11
|
+
validate_partition_values(values_by_partition)
|
12
|
+
|
13
|
+
<<~SQL
|
14
|
+
ALTER TABLE #{table_name} ADD #{permissive_statement(permissive)}
|
15
|
+
#{partitions_definition(values_by_partition)};
|
16
|
+
SQL
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
attr_reader :cartesian_product_generator
|
22
|
+
|
23
|
+
def validate_partition_values(values_by_partition)
|
24
|
+
raise Errors::PartitionError, 'Partition value(s) missing' if partition_values_missing?(values_by_partition)
|
25
|
+
end
|
26
|
+
|
27
|
+
def partition_values_missing?(values_by_partition)
|
28
|
+
values_by_partition.nil? || values_by_partition.empty? || values_by_partition.values.any?(&:empty?)
|
29
|
+
end
|
30
|
+
|
31
|
+
def permissive_statement(permissive)
|
32
|
+
'IF NOT EXISTS' if permissive
|
33
|
+
end
|
34
|
+
|
35
|
+
def partitions_definition(values_by_partition)
|
36
|
+
cartesian_product_generator.cartesian_product(values_by_partition).
|
37
|
+
map { |partition_values_combination| partition_values_clause(partition_values_combination) }.
|
38
|
+
join("\n")
|
39
|
+
end
|
40
|
+
|
41
|
+
def partition_values_clause(partition_values_combination)
|
42
|
+
"PARTITION (#{partition_values(partition_values_combination).join(', ')})"
|
43
|
+
end
|
44
|
+
|
45
|
+
def partition_values(partition_values_combination)
|
46
|
+
partition_values_combination.map do |partition_name, value|
|
47
|
+
if value.is_a?(String)
|
48
|
+
"#{partition_name} = '#{value}'"
|
49
|
+
else
|
50
|
+
"#{partition_name} = #{value}"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Egis
|
4
|
+
##
|
5
|
+
# @!attribute [r] url
|
6
|
+
# @return [String] Query output file's URL
|
7
|
+
# @!attribute [r] bucket
|
8
|
+
# @return [String] Query output's S3 bucket
|
9
|
+
# @!attribute [r] key
|
10
|
+
# @return [String] Query output's S3 path
|
11
|
+
|
12
|
+
QueryOutputLocation = Struct.new(:url, :bucket, :key)
|
13
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Egis
|
4
|
+
##
|
5
|
+
# @!attribute [r] id
|
6
|
+
# @return [String] Athena query execution ID
|
7
|
+
# @!attribute [r] status
|
8
|
+
# @return [:queued, :running, :finished, :failed, :cancelled]
|
9
|
+
# @!attribute [r] message
|
10
|
+
# @return [String]
|
11
|
+
# @!attribute [r] output_location
|
12
|
+
# @return [Egis::OutputLocation]
|
13
|
+
#
|
14
|
+
class QueryStatus
|
15
|
+
QUEUED = :queued
|
16
|
+
RUNNING = :running
|
17
|
+
FINISHED = :finished
|
18
|
+
FAILED = :failed
|
19
|
+
CANCELLED = :cancelled
|
20
|
+
|
21
|
+
STATUSES = [QUEUED, RUNNING, FINISHED, FAILED, CANCELLED].freeze
|
22
|
+
|
23
|
+
attr_reader :id, :status, :message, :output_location
|
24
|
+
|
25
|
+
def initialize(id, status, message, output_location,
|
26
|
+
output_downloader: Egis::OutputDownloader.new,
|
27
|
+
output_parser: Egis::OutputParser.new)
|
28
|
+
raise ArgumentError, "Unsupported status #{status}" unless STATUSES.include?(status)
|
29
|
+
|
30
|
+
@id = id
|
31
|
+
@status = status
|
32
|
+
@message = message
|
33
|
+
@output_location = output_location
|
34
|
+
@output_downloader = output_downloader
|
35
|
+
@output_parser = output_parser
|
36
|
+
end
|
37
|
+
|
38
|
+
def finished?
|
39
|
+
status == FINISHED
|
40
|
+
end
|
41
|
+
|
42
|
+
def failed?
|
43
|
+
status == FAILED
|
44
|
+
end
|
45
|
+
|
46
|
+
def queued?
|
47
|
+
status == QUEUED
|
48
|
+
end
|
49
|
+
|
50
|
+
def running?
|
51
|
+
status == RUNNING
|
52
|
+
end
|
53
|
+
|
54
|
+
def in_progress?
|
55
|
+
[RUNNING, QUEUED].include?(status)
|
56
|
+
end
|
57
|
+
|
58
|
+
##
|
59
|
+
# Download query result.
|
60
|
+
#
|
61
|
+
# By default, Egis will just parse output CSV and return array of string arrays. Additionally, you
|
62
|
+
# can pass expected query result column types to parse them into Ruby objects accordingly.
|
63
|
+
#
|
64
|
+
# @param [Array] schema Array with expected query column types
|
65
|
+
# @return [Array] Array of row values
|
66
|
+
|
67
|
+
def fetch_result(schema: [])
|
68
|
+
output = output_downloader.download(output_location)
|
69
|
+
output_parser.parse(output, schema)
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
attr_reader :output_downloader, :output_parser
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Egis
|
4
|
+
# @!visibility private
|
5
|
+
class S3Cleaner
|
6
|
+
def initialize(aws_client_provider: Egis::AwsClientProvider.new)
|
7
|
+
@s3_client = aws_client_provider.s3_client
|
8
|
+
end
|
9
|
+
|
10
|
+
def delete(bucket, prefix)
|
11
|
+
prefix_contents = s3_client.list_objects_v2(bucket: bucket, prefix: prefix).contents
|
12
|
+
return if prefix_contents.empty?
|
13
|
+
|
14
|
+
objects_to_remove = prefix_contents.map { |content| {key: content.key} }
|
15
|
+
s3_client.delete_objects(bucket: bucket, delete: {objects: objects_to_remove})
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
attr_reader :s3_client
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Egis
|
4
|
+
# @!visibility private
|
5
|
+
class S3LocationParser
|
6
|
+
S3_URL_PATTERN = %r{^s3://(?<bucket>\S+?)/(?<key>\S+)$}.freeze
|
7
|
+
|
8
|
+
def parse_url(url)
|
9
|
+
matched_data = S3_URL_PATTERN.match(url)
|
10
|
+
|
11
|
+
[matched_data['bucket'], matched_data['key']]
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|