egis 1.1.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/egis.gemspec +1 -1
- data/lib/egis.rb +10 -0
- data/lib/egis/client.rb +21 -7
- data/lib/egis/configuration.rb +5 -1
- data/lib/egis/database.rb +35 -11
- data/lib/egis/table.rb +44 -14
- data/lib/egis/table_ddl_generator.rb +25 -2
- data/lib/egis/testing.rb +1 -1
- data/lib/egis/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4fa3323e3c02d8537df83f33799f580fb196c255315c9923eb70c0e87bd1f50e
|
4
|
+
data.tar.gz: 9b4054a2201d98f501603cc137c1c7df93fccf105c448b6d4261515c3f53d959
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3da9b098de6948b584244db63eda8ef4ba3d26ff29806be0e1a3b3469a321bb72d0761bc4bcb415759646a2397c71d32221477cc2dabeb9233dd8cad22300cef
|
7
|
+
data.tar.gz: 5c50463ef9585563b3aa6fefbb89b4df101f93693b7e5d5f423e98fa803e614d7541639db9afe06fc1a4544205d6e3b0cbf3c166112f556f58b8c798b528571d
|
data/egis.gemspec
CHANGED
@@ -16,7 +16,7 @@ Gem::Specification.new do |spec|
|
|
16
16
|
|
17
17
|
spec.metadata['homepage_uri'] = spec.homepage
|
18
18
|
spec.metadata['source_code_uri'] = spec.homepage
|
19
|
-
spec.metadata['changelog_uri'] = 'https://github.
|
19
|
+
spec.metadata['changelog_uri'] = 'https://u2i.github.io/egis/file.CHANGELOG.html'
|
20
20
|
|
21
21
|
# Specify which files should be added to the gem when it is released.
|
22
22
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
data/lib/egis.rb
CHANGED
@@ -40,6 +40,11 @@ require 'egis/s3_location_parser'
|
|
40
40
|
# config.aws_profile = 'my-profile'
|
41
41
|
# end
|
42
42
|
#
|
43
|
+
# @example Configure logger
|
44
|
+
# Egis.configure do |config|
|
45
|
+
# config.logger = Logger.new('athena.log', level: :debug)
|
46
|
+
# end
|
47
|
+
#
|
43
48
|
# @yield [Egis::Configuration]
|
44
49
|
# @return [void]
|
45
50
|
#
|
@@ -58,5 +63,10 @@ module Egis
|
|
58
63
|
def mode
|
59
64
|
@mode ||= Egis::StandardMode.new
|
60
65
|
end
|
66
|
+
|
67
|
+
# @!visibility private
|
68
|
+
def logger
|
69
|
+
@configuration.logger
|
70
|
+
end
|
61
71
|
end
|
62
72
|
end
|
data/lib/egis/client.rb
CHANGED
@@ -65,14 +65,16 @@ module Egis
|
|
65
65
|
# by workgroup.
|
66
66
|
# @return [Egis::QueryStatus]
|
67
67
|
|
68
|
-
def execute_query(query, work_group: nil, database: nil, output_location: nil, async: true)
|
69
|
-
|
68
|
+
def execute_query(query, work_group: nil, database: nil, output_location: nil, async: true, system_execution: false)
|
69
|
+
query_id = aws_athena_client.start_query_execution(
|
70
70
|
query_execution_params(query, work_group, database, output_location)
|
71
71
|
).query_execution_id
|
72
72
|
|
73
|
-
|
73
|
+
log_query_execution(query, query_id, system_execution)
|
74
74
|
|
75
|
-
query_status
|
75
|
+
return query_status(query_id) if Egis.mode.async(async)
|
76
|
+
|
77
|
+
query_status = wait_for_query_to_finish(query_id)
|
76
78
|
|
77
79
|
raise Egis::Errors::QueryExecutionError, query_status.message unless query_status.finished?
|
78
80
|
|
@@ -89,10 +91,13 @@ module Egis
|
|
89
91
|
resp = aws_athena_client.get_query_execution(query_execution_id: query_id)
|
90
92
|
|
91
93
|
query_execution = resp.query_execution
|
94
|
+
query_status = query_execution.status.state
|
95
|
+
|
96
|
+
Egis.logger.debug { "Checking query status (#{query_id}): #{query_status}" }
|
92
97
|
|
93
98
|
Egis::QueryStatus.new(
|
94
99
|
query_execution.query_execution_id,
|
95
|
-
QUERY_STATUS_MAPPING.fetch(
|
100
|
+
QUERY_STATUS_MAPPING.fetch(query_status),
|
96
101
|
query_execution.status.state_change_reason,
|
97
102
|
parse_output_location(query_execution)
|
98
103
|
)
|
@@ -112,11 +117,20 @@ module Egis
|
|
112
117
|
params
|
113
118
|
end
|
114
119
|
|
115
|
-
def
|
120
|
+
def log_query_execution(query, query_id, system_execution)
|
121
|
+
if system_execution
|
122
|
+
Egis.logger.debug { "Executing system query (#{query_id}): #{query.gsub(/\s+/, ' ')}" }
|
123
|
+
else
|
124
|
+
Egis.logger.info { "Executing query (#{query_id}): #{query.gsub(/\s+/, ' ')}" }
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def wait_for_query_to_finish(query_id)
|
116
129
|
attempt = 1
|
117
130
|
loop do
|
118
131
|
sleep(query_status_backoff.call(attempt))
|
119
|
-
status = query_status(
|
132
|
+
status = query_status(query_id)
|
133
|
+
|
120
134
|
return status unless status.queued? || status.running?
|
121
135
|
|
122
136
|
attempt += 1
|
data/lib/egis/configuration.rb
CHANGED
@@ -4,6 +4,10 @@ module Egis
|
|
4
4
|
# @!visibility private
|
5
5
|
class Configuration
|
6
6
|
attr_accessor :work_group, :aws_region, :aws_access_key_id, :aws_secret_access_key, :aws_profile,
|
7
|
-
:query_status_backoff, :testing_s3_bucket
|
7
|
+
:query_status_backoff, :testing_s3_bucket, :logger
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
@logger = Logger.new(STDOUT, level: :info)
|
11
|
+
end
|
8
12
|
end
|
9
13
|
end
|
data/lib/egis/database.rb
CHANGED
@@ -10,20 +10,26 @@ module Egis
|
|
10
10
|
#
|
11
11
|
# It is recommended to create database objects using {Egis::Client#database} method.
|
12
12
|
#
|
13
|
+
# @!attribute [r] name
|
14
|
+
# @return [String] Athena database name
|
15
|
+
#
|
13
16
|
class Database
|
14
|
-
def initialize(
|
17
|
+
def initialize(name, client: Egis::Client.new, output_downloader: Egis::OutputDownloader.new)
|
15
18
|
@client = client
|
16
|
-
@
|
19
|
+
@name = name
|
17
20
|
@output_downloader = output_downloader
|
18
21
|
end
|
19
22
|
|
23
|
+
attr_reader :name
|
24
|
+
|
20
25
|
##
|
21
26
|
# Creates {Egis::Table} object. Executing it doesn't create Athena table yet.
|
22
27
|
#
|
23
28
|
# @param [String] table_name
|
24
29
|
# @param [Egis::TableSchema] table_schema
|
25
30
|
# @param [String] table_location S3 URL with table location (e.g. `s3://s3_bucket/table/location/`)
|
26
|
-
# @param [:tsv, :csv, :orc] format Table format
|
31
|
+
# @param [:tsv, :csv, :orc, {serde: 'SerdeClass', serde_properties: {property: value}}] format Table format
|
32
|
+
# (defaults to :tsv)
|
27
33
|
# @return [Egis::Table]
|
28
34
|
|
29
35
|
def table(table_name, table_schema, table_location, **options)
|
@@ -36,7 +42,10 @@ module Egis
|
|
36
42
|
# @return [void]
|
37
43
|
|
38
44
|
def create
|
39
|
-
|
45
|
+
log_database_creation
|
46
|
+
|
47
|
+
client.execute_query("CREATE DATABASE IF NOT EXISTS #{translate_name(name)};", async: false,
|
48
|
+
system_execution: true)
|
40
49
|
end
|
41
50
|
|
42
51
|
##
|
@@ -45,7 +54,9 @@ module Egis
|
|
45
54
|
# @return [void]
|
46
55
|
|
47
56
|
def create!
|
48
|
-
|
57
|
+
log_database_creation
|
58
|
+
|
59
|
+
client.execute_query("CREATE DATABASE #{translate_name(name)};", async: false, system_execution: true)
|
49
60
|
end
|
50
61
|
|
51
62
|
##
|
@@ -54,7 +65,10 @@ module Egis
|
|
54
65
|
# @return [void]
|
55
66
|
|
56
67
|
def drop
|
57
|
-
|
68
|
+
log_database_removal
|
69
|
+
|
70
|
+
client.execute_query("DROP DATABASE IF EXISTS #{translate_name(name)} CASCADE;", async: false,
|
71
|
+
system_execution: true)
|
58
72
|
end
|
59
73
|
|
60
74
|
##
|
@@ -63,14 +77,16 @@ module Egis
|
|
63
77
|
# @return [void]
|
64
78
|
|
65
79
|
def drop!
|
66
|
-
|
80
|
+
log_database_removal
|
81
|
+
|
82
|
+
client.execute_query("DROP DATABASE #{translate_name(name)} CASCADE;", async: false, system_execution: true)
|
67
83
|
end
|
68
84
|
|
69
85
|
##
|
70
86
|
# (see Egis::Client#execute_query)
|
71
87
|
|
72
88
|
def execute_query(query, **options)
|
73
|
-
client.execute_query(query, **{database:
|
89
|
+
client.execute_query(query, **{database: name, **options})
|
74
90
|
end
|
75
91
|
|
76
92
|
##
|
@@ -86,14 +102,22 @@ module Egis
|
|
86
102
|
# @return [Boolean]
|
87
103
|
|
88
104
|
def exists?
|
89
|
-
query_status = client.execute_query("SHOW DATABASES LIKE '#{
|
105
|
+
query_status = client.execute_query("SHOW DATABASES LIKE '#{name}';", async: false, system_execution: true)
|
90
106
|
parsed_result = output_downloader.download(query_status.output_location)
|
91
|
-
parsed_result.flatten.include?(
|
107
|
+
parsed_result.flatten.include?(name)
|
92
108
|
end
|
93
109
|
|
94
110
|
private
|
95
111
|
|
96
|
-
attr_reader :client, :
|
112
|
+
attr_reader :client, :output_downloader
|
113
|
+
|
114
|
+
def log_database_creation
|
115
|
+
Egis.logger.info { "Creating database #{name}" }
|
116
|
+
end
|
117
|
+
|
118
|
+
def log_database_removal
|
119
|
+
Egis.logger.info { "Removing database #{name}" }
|
120
|
+
end
|
97
121
|
|
98
122
|
def translate_name(name)
|
99
123
|
Egis.mode.database_name(name)
|
data/lib/egis/table.rb
CHANGED
@@ -9,7 +9,7 @@ module Egis
|
|
9
9
|
# @!attribute [r] database
|
10
10
|
# @return [Egis::Database]
|
11
11
|
# @!attribute [r] name
|
12
|
-
# @return [String] Athena
|
12
|
+
# @return [String] Athena table name
|
13
13
|
# @!attribute [r] schema
|
14
14
|
# @return [Egis::TableSchema] table's schema object
|
15
15
|
#
|
@@ -42,8 +42,10 @@ module Egis
|
|
42
42
|
# @return [void]
|
43
43
|
|
44
44
|
def create
|
45
|
+
log_table_creation
|
46
|
+
|
45
47
|
create_table_sql = table_ddl_generator.create_table_sql(self, permissive: true)
|
46
|
-
database.execute_query(create_table_sql, async: false)
|
48
|
+
database.execute_query(create_table_sql, async: false, system_execution: true)
|
47
49
|
end
|
48
50
|
|
49
51
|
##
|
@@ -52,8 +54,10 @@ module Egis
|
|
52
54
|
# @return [void]
|
53
55
|
|
54
56
|
def create!
|
57
|
+
log_table_creation
|
58
|
+
|
55
59
|
create_table_sql = table_ddl_generator.create_table_sql(self, permissive: false)
|
56
|
-
database.execute_query(create_table_sql, async: false)
|
60
|
+
database.execute_query(create_table_sql, async: false, system_execution: true)
|
57
61
|
end
|
58
62
|
|
59
63
|
##
|
@@ -67,7 +71,7 @@ module Egis
|
|
67
71
|
|
68
72
|
def add_partitions(partitions)
|
69
73
|
load_partitions_query = partitions_generator.to_sql(name, partitions, permissive: true)
|
70
|
-
database.execute_query(load_partitions_query, async: false)
|
74
|
+
database.execute_query(load_partitions_query, async: false, system_execution: true)
|
71
75
|
end
|
72
76
|
|
73
77
|
##
|
@@ -76,7 +80,7 @@ module Egis
|
|
76
80
|
|
77
81
|
def add_partitions!(partitions)
|
78
82
|
load_partitions_query = partitions_generator.to_sql(name, partitions, permissive: false)
|
79
|
-
database.execute_query(load_partitions_query, async: false)
|
83
|
+
database.execute_query(load_partitions_query, async: false, system_execution: true)
|
80
84
|
end
|
81
85
|
|
82
86
|
##
|
@@ -87,18 +91,30 @@ module Egis
|
|
87
91
|
# @return [void]
|
88
92
|
|
89
93
|
def discover_partitions
|
90
|
-
database.execute_query("MSCK REPAIR TABLE #{name};", async: false)
|
94
|
+
database.execute_query("MSCK REPAIR TABLE #{name};", async: false, system_execution: true)
|
91
95
|
end
|
92
96
|
|
93
97
|
##
|
94
98
|
# Insert data into the table. Mostly useful for testing purposes.
|
95
99
|
#
|
96
|
-
# @
|
100
|
+
# @example Insert with array of arrays
|
101
|
+
# table.upload_data([
|
102
|
+
# ['hello world', 'mx', 1],
|
103
|
+
# ['hello again', 'us', 2]
|
104
|
+
# ])
|
105
|
+
#
|
106
|
+
# @example Insert with array of hashes
|
107
|
+
# table.upload_data([
|
108
|
+
# {message: 'hello world', country: 'mx', type: 1},
|
109
|
+
# {message: 'hello again', country: 'us', type: 2}
|
110
|
+
# ])
|
111
|
+
#
|
112
|
+
# @param [Array] rows Array of arrays or hashes with row values
|
97
113
|
# @return [void]
|
98
114
|
|
99
115
|
def upload_data(rows)
|
100
116
|
query = data_insert_query(rows)
|
101
|
-
database.execute_query(query, async: false)
|
117
|
+
database.execute_query(query, async: false, system_execution: true)
|
102
118
|
end
|
103
119
|
|
104
120
|
##
|
@@ -107,7 +123,7 @@ module Egis
|
|
107
123
|
# @return [Array] Array of arrays with row values.
|
108
124
|
|
109
125
|
def download_data
|
110
|
-
result = database.execute_query("SELECT * FROM #{name};", async: false)
|
126
|
+
result = database.execute_query("SELECT * FROM #{name};", async: false, system_execution: true)
|
111
127
|
content = output_downloader.download(result.output_location)
|
112
128
|
output_parser.parse(content, column_types)
|
113
129
|
end
|
@@ -141,23 +157,37 @@ module Egis
|
|
141
157
|
attr_reader :options, :partitions_generator, :table_ddl_generator, :output_downloader, :output_parser,
|
142
158
|
:table_data_wiper
|
143
159
|
|
144
|
-
def
|
145
|
-
|
160
|
+
def log_table_creation
|
161
|
+
Egis.logger.info { "Creating table #{database.name}.#{name} located in #{location}" }
|
146
162
|
end
|
147
163
|
|
148
164
|
def column_types
|
149
|
-
|
165
|
+
all_columns.map(&:type)
|
166
|
+
end
|
167
|
+
|
168
|
+
def all_columns
|
169
|
+
schema.columns + schema.partitions
|
150
170
|
end
|
151
171
|
|
152
172
|
def data_insert_query(rows)
|
173
|
+
insert_values = rows.map { |row| row_literal_values(row) }
|
174
|
+
row_clause = insert_values.map { |row| row_values_statement(row) }.join(",\n")
|
175
|
+
|
153
176
|
<<~SQL
|
154
177
|
INSERT INTO #{name} VALUES
|
155
|
-
#{
|
178
|
+
#{row_clause}
|
156
179
|
SQL
|
157
180
|
end
|
158
181
|
|
182
|
+
def row_literal_values(row)
|
183
|
+
all_columns.map.with_index do |column, index|
|
184
|
+
value = row.is_a?(Hash) ? row[column.name] : row[index]
|
185
|
+
Egis::Types.serializer(column.type).literal(value)
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
159
189
|
def row_values_statement(row)
|
160
|
-
"(#{row.
|
190
|
+
"(#{row.join(', ')})"
|
161
191
|
end
|
162
192
|
end
|
163
193
|
end
|
@@ -9,7 +9,7 @@ module Egis
|
|
9
9
|
#{column_definition_sql(table.schema.columns)}
|
10
10
|
)
|
11
11
|
#{partition_statement(table.schema)}
|
12
|
-
#{
|
12
|
+
#{row_format_statement(table.format)}
|
13
13
|
LOCATION '#{table.location}';
|
14
14
|
SQL
|
15
15
|
end
|
@@ -34,7 +34,30 @@ module Egis
|
|
34
34
|
columns.map { |column| "`#{column.name}` #{column.type}" }.join(",\n")
|
35
35
|
end
|
36
36
|
|
37
|
-
def
|
37
|
+
def serde?(format)
|
38
|
+
format.is_a?(Hash) && format.key?(:serde)
|
39
|
+
end
|
40
|
+
|
41
|
+
def row_format_statement(format)
|
42
|
+
return serde_row_format_statement(format) if serde?(format)
|
43
|
+
|
44
|
+
delimited_row_format_statement(format)
|
45
|
+
end
|
46
|
+
|
47
|
+
def serde_row_format_statement(format)
|
48
|
+
row_format = "ROW FORMAT SERDE '#{format[:serde]}'"
|
49
|
+
return row_format unless format.key?(:serde_properties)
|
50
|
+
|
51
|
+
serde_properties = format[:serde_properties].map { |property, value| "'#{property}' = '#{value}'" }
|
52
|
+
<<-SQL
|
53
|
+
#{row_format}
|
54
|
+
WITH SERDEPROPERTIES (
|
55
|
+
#{serde_properties.join(",\n")}
|
56
|
+
)
|
57
|
+
SQL
|
58
|
+
end
|
59
|
+
|
60
|
+
def delimited_row_format_statement(format)
|
38
61
|
case format
|
39
62
|
when :csv
|
40
63
|
"ROW FORMAT DELIMITED FIELDS TERMINATED BY ','"
|
data/lib/egis/testing.rb
CHANGED
data/lib/egis/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: egis
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Agnieszka Czereba
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2021-03-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: aws-sdk-athena
|
@@ -83,7 +83,7 @@ licenses:
|
|
83
83
|
metadata:
|
84
84
|
homepage_uri: https://github.com/u2i/egis
|
85
85
|
source_code_uri: https://github.com/u2i/egis
|
86
|
-
changelog_uri: https://github.
|
86
|
+
changelog_uri: https://u2i.github.io/egis/file.CHANGELOG.html
|
87
87
|
post_install_message:
|
88
88
|
rdoc_options: []
|
89
89
|
require_paths:
|
@@ -99,7 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
99
99
|
- !ruby/object:Gem::Version
|
100
100
|
version: '0'
|
101
101
|
requirements: []
|
102
|
-
rubygems_version: 3.1.
|
102
|
+
rubygems_version: 3.1.4
|
103
103
|
signing_key:
|
104
104
|
specification_version: 4
|
105
105
|
summary: A handy wrapper for AWS Athena Ruby SDK.
|