egis 1.1.1 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/egis.gemspec +1 -1
- data/lib/egis.rb +12 -2
- data/lib/egis/aws_client_provider.rb +11 -13
- data/lib/egis/client.rb +37 -16
- data/lib/egis/configuration.rb +11 -1
- data/lib/egis/database.rb +36 -12
- data/lib/egis/output_downloader.rb +2 -2
- data/lib/egis/query_status.rb +6 -1
- data/lib/egis/s3_cleaner.rb +2 -2
- data/lib/egis/table.rb +48 -16
- data/lib/egis/table_ddl_generator.rb +25 -2
- data/lib/egis/testing.rb +1 -1
- data/lib/egis/testing/testing_mode.rb +1 -1
- data/lib/egis/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 924727f52ac809df7ab4457a6a0c05469cd6c128adaff339018e964dc9ff4ea0
|
4
|
+
data.tar.gz: c8ba9d090641c899e832b73b062314bd76fa4fb39041fb175d7e3a717e943856
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fa429f550a68475a75f9dd9c0610b0d41c9a8017f0e46c0f9e467cfaeddfb7d6587d6e7df5b4440bbd75e4d4d9a12d5966f40e4ce72cb8abf70e5dc2897f91d8
|
7
|
+
data.tar.gz: 3beac29ce82c267bfc2621e730a8b631f76a94f7426ac7d7901b31d204cda3320d1ab934e9c3e156ee52e5fca315728412ffd5a007bf4623dd2ac077d85399ba
|
data/egis.gemspec
CHANGED
@@ -16,7 +16,7 @@ Gem::Specification.new do |spec|
|
|
16
16
|
|
17
17
|
spec.metadata['homepage_uri'] = spec.homepage
|
18
18
|
spec.metadata['source_code_uri'] = spec.homepage
|
19
|
-
spec.metadata['changelog_uri'] = 'https://github.
|
19
|
+
spec.metadata['changelog_uri'] = 'https://u2i.github.io/egis/file.CHANGELOG.html'
|
20
20
|
|
21
21
|
# Specify which files should be added to the gem when it is released.
|
22
22
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
data/lib/egis.rb
CHANGED
@@ -40,13 +40,18 @@ require 'egis/s3_location_parser'
|
|
40
40
|
# config.aws_profile = 'my-profile'
|
41
41
|
# end
|
42
42
|
#
|
43
|
+
# @example Configure logger
|
44
|
+
# Egis.configure do |config|
|
45
|
+
# config.logger = Logger.new('athena.log', level: :debug)
|
46
|
+
# end
|
47
|
+
#
|
43
48
|
# @yield [Egis::Configuration]
|
44
49
|
# @return [void]
|
45
50
|
#
|
46
51
|
module Egis
|
47
52
|
class << self
|
48
|
-
def configure
|
49
|
-
|
53
|
+
def configure(&block)
|
54
|
+
configuration.configure(&block)
|
50
55
|
end
|
51
56
|
|
52
57
|
# @!visibility private
|
@@ -58,5 +63,10 @@ module Egis
|
|
58
63
|
def mode
|
59
64
|
@mode ||= Egis::StandardMode.new
|
60
65
|
end
|
66
|
+
|
67
|
+
# @!visibility private
|
68
|
+
def logger
|
69
|
+
@configuration.logger
|
70
|
+
end
|
61
71
|
end
|
62
72
|
end
|
@@ -6,25 +6,23 @@ require 'aws-sdk-athena'
|
|
6
6
|
module Egis
|
7
7
|
# @!visibility private
|
8
8
|
class AwsClientProvider
|
9
|
-
def s3_client
|
10
|
-
Aws::S3::Client.new(client_config)
|
9
|
+
def s3_client(configuration)
|
10
|
+
Aws::S3::Client.new(client_config(configuration))
|
11
11
|
end
|
12
12
|
|
13
|
-
def athena_client
|
14
|
-
Aws::Athena::Client.new(client_config)
|
13
|
+
def athena_client(configuration)
|
14
|
+
Aws::Athena::Client.new(client_config(configuration))
|
15
15
|
end
|
16
16
|
|
17
17
|
private
|
18
18
|
|
19
|
-
def client_config
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
config[:profile] = configuration.aws_profile if configuration.aws_profile
|
27
|
-
config
|
19
|
+
def client_config(configuration)
|
20
|
+
{
|
21
|
+
region: configuration.aws_region,
|
22
|
+
access_key_id: configuration.aws_access_key_id,
|
23
|
+
secret_access_key: configuration.aws_secret_access_key,
|
24
|
+
profile: configuration.aws_profile
|
25
|
+
}.compact
|
28
26
|
end
|
29
27
|
end
|
30
28
|
end
|
data/lib/egis/client.rb
CHANGED
@@ -4,6 +4,9 @@ module Egis
|
|
4
4
|
##
|
5
5
|
# The most fundamental {Egis} class. Provides an interface for executing Athena queries.
|
6
6
|
#
|
7
|
+
# @yieldparam config [Egis::Configuration] Egis configuration block, if missing Egis will use global configuration
|
8
|
+
# provided by {Egis.configure}
|
9
|
+
#
|
7
10
|
# See configuration instructions {Egis.configure}.
|
8
11
|
#
|
9
12
|
# @see Egis.configure
|
@@ -33,14 +36,17 @@ module Egis
|
|
33
36
|
'CANCELLED' => Egis::QueryStatus::CANCELLED
|
34
37
|
}.freeze
|
35
38
|
|
36
|
-
|
39
|
+
private_constant :QUERY_STATUS_MAPPING
|
37
40
|
|
38
|
-
|
41
|
+
attr_reader :aws_s3_client
|
39
42
|
|
40
|
-
def initialize(aws_client_provider: Egis::AwsClientProvider.new,
|
41
|
-
|
43
|
+
def initialize(aws_client_provider: Egis::AwsClientProvider.new,
|
44
|
+
s3_location_parser: Egis::S3LocationParser.new,
|
45
|
+
&block)
|
46
|
+
@configuration = block_given? ? Egis.configuration.dup.configure(&block) : Egis.configuration
|
47
|
+
@aws_athena_client = aws_client_provider.athena_client(configuration)
|
48
|
+
@aws_s3_client = aws_client_provider.s3_client(configuration)
|
42
49
|
@s3_location_parser = s3_location_parser
|
43
|
-
@query_status_backoff = Egis.configuration.query_status_backoff || DEFAULT_QUERY_STATUS_BACKOFF
|
44
50
|
end
|
45
51
|
|
46
52
|
##
|
@@ -65,14 +71,16 @@ module Egis
|
|
65
71
|
# by workgroup.
|
66
72
|
# @return [Egis::QueryStatus]
|
67
73
|
|
68
|
-
def execute_query(query, work_group: nil, database: nil, output_location: nil, async: true)
|
69
|
-
|
74
|
+
def execute_query(query, work_group: nil, database: nil, output_location: nil, async: true, system_execution: false)
|
75
|
+
query_id = aws_athena_client.start_query_execution(
|
70
76
|
query_execution_params(query, work_group, database, output_location)
|
71
77
|
).query_execution_id
|
72
78
|
|
73
|
-
|
79
|
+
log_query_execution(query, query_id, system_execution)
|
80
|
+
|
81
|
+
return query_status(query_id) if Egis.mode.async(async)
|
74
82
|
|
75
|
-
query_status = wait_for_query_to_finish(
|
83
|
+
query_status = wait_for_query_to_finish(query_id)
|
76
84
|
|
77
85
|
raise Egis::Errors::QueryExecutionError, query_status.message unless query_status.finished?
|
78
86
|
|
@@ -89,21 +97,25 @@ module Egis
|
|
89
97
|
resp = aws_athena_client.get_query_execution(query_execution_id: query_id)
|
90
98
|
|
91
99
|
query_execution = resp.query_execution
|
100
|
+
query_status = query_execution.status.state
|
101
|
+
|
102
|
+
Egis.logger.debug { "Checking query status (#{query_id}): #{query_status}" }
|
92
103
|
|
93
104
|
Egis::QueryStatus.new(
|
94
105
|
query_execution.query_execution_id,
|
95
|
-
QUERY_STATUS_MAPPING.fetch(
|
106
|
+
QUERY_STATUS_MAPPING.fetch(query_status),
|
96
107
|
query_execution.status.state_change_reason,
|
97
|
-
parse_output_location(query_execution)
|
108
|
+
parse_output_location(query_execution),
|
109
|
+
client: self
|
98
110
|
)
|
99
111
|
end
|
100
112
|
|
101
113
|
private
|
102
114
|
|
103
|
-
attr_reader :
|
115
|
+
attr_reader :configuration, :aws_athena_client, :s3_location_parser
|
104
116
|
|
105
117
|
def query_execution_params(query, work_group, database, output_location)
|
106
|
-
work_group_params = work_group ||
|
118
|
+
work_group_params = work_group || configuration.work_group
|
107
119
|
|
108
120
|
params = {query_string: query}
|
109
121
|
params[:work_group] = work_group_params if work_group_params
|
@@ -112,11 +124,20 @@ module Egis
|
|
112
124
|
params
|
113
125
|
end
|
114
126
|
|
115
|
-
def
|
127
|
+
def log_query_execution(query, query_id, system_execution)
|
128
|
+
if system_execution
|
129
|
+
Egis.logger.debug { "Executing system query (#{query_id}): #{query.gsub(/\s+/, ' ')}" }
|
130
|
+
else
|
131
|
+
Egis.logger.info { "Executing query (#{query_id}): #{query.gsub(/\s+/, ' ')}" }
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def wait_for_query_to_finish(query_id)
|
116
136
|
attempt = 1
|
117
137
|
loop do
|
118
|
-
sleep(query_status_backoff.call(attempt))
|
119
|
-
status = query_status(
|
138
|
+
sleep(configuration.query_status_backoff.call(attempt))
|
139
|
+
status = query_status(query_id)
|
140
|
+
|
120
141
|
return status unless status.queued? || status.running?
|
121
142
|
|
122
143
|
attempt += 1
|
data/lib/egis/configuration.rb
CHANGED
@@ -4,6 +4,16 @@ module Egis
|
|
4
4
|
# @!visibility private
|
5
5
|
class Configuration
|
6
6
|
attr_accessor :work_group, :aws_region, :aws_access_key_id, :aws_secret_access_key, :aws_profile,
|
7
|
-
:query_status_backoff, :testing_s3_bucket
|
7
|
+
:query_status_backoff, :testing_s3_bucket, :logger
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
@logger = Logger.new(STDOUT, level: :info)
|
11
|
+
@query_status_backoff = ->(attempt) { 1.5**attempt - 1 }
|
12
|
+
end
|
13
|
+
|
14
|
+
def configure
|
15
|
+
yield(self)
|
16
|
+
self
|
17
|
+
end
|
8
18
|
end
|
9
19
|
end
|
data/lib/egis/database.rb
CHANGED
@@ -10,24 +10,30 @@ module Egis
|
|
10
10
|
#
|
11
11
|
# It is recommended to create database objects using {Egis::Client#database} method.
|
12
12
|
#
|
13
|
+
# @!attribute [r] name
|
14
|
+
# @return [String] Athena database name
|
15
|
+
#
|
13
16
|
class Database
|
14
|
-
def initialize(
|
17
|
+
def initialize(name, client: Egis::Client.new, output_downloader: Egis::OutputDownloader.new(client.aws_s3_client))
|
15
18
|
@client = client
|
16
|
-
@database_name = database_name
|
17
19
|
@output_downloader = output_downloader
|
20
|
+
@name = name
|
18
21
|
end
|
19
22
|
|
23
|
+
attr_reader :name
|
24
|
+
|
20
25
|
##
|
21
26
|
# Creates {Egis::Table} object. Executing it doesn't create Athena table yet.
|
22
27
|
#
|
23
28
|
# @param [String] table_name
|
24
29
|
# @param [Egis::TableSchema] table_schema
|
25
30
|
# @param [String] table_location S3 URL with table location (e.g. `s3://s3_bucket/table/location/`)
|
26
|
-
# @param [:tsv, :csv, :orc] format Table format
|
31
|
+
# @param [:tsv, :csv, :orc, {serde: 'SerdeClass', serde_properties: {property: value}}] format Table format
|
32
|
+
# (defaults to :tsv)
|
27
33
|
# @return [Egis::Table]
|
28
34
|
|
29
35
|
def table(table_name, table_schema, table_location, **options)
|
30
|
-
Table.new(self, table_name, table_schema, table_location, options: options)
|
36
|
+
Table.new(self, table_name, table_schema, table_location, client: client, options: options)
|
31
37
|
end
|
32
38
|
|
33
39
|
##
|
@@ -36,7 +42,10 @@ module Egis
|
|
36
42
|
# @return [void]
|
37
43
|
|
38
44
|
def create
|
39
|
-
|
45
|
+
log_database_creation
|
46
|
+
|
47
|
+
client.execute_query("CREATE DATABASE IF NOT EXISTS #{translate_name(name)};", async: false,
|
48
|
+
system_execution: true)
|
40
49
|
end
|
41
50
|
|
42
51
|
##
|
@@ -45,7 +54,9 @@ module Egis
|
|
45
54
|
# @return [void]
|
46
55
|
|
47
56
|
def create!
|
48
|
-
|
57
|
+
log_database_creation
|
58
|
+
|
59
|
+
client.execute_query("CREATE DATABASE #{translate_name(name)};", async: false, system_execution: true)
|
49
60
|
end
|
50
61
|
|
51
62
|
##
|
@@ -54,7 +65,10 @@ module Egis
|
|
54
65
|
# @return [void]
|
55
66
|
|
56
67
|
def drop
|
57
|
-
|
68
|
+
log_database_removal
|
69
|
+
|
70
|
+
client.execute_query("DROP DATABASE IF EXISTS #{translate_name(name)} CASCADE;", async: false,
|
71
|
+
system_execution: true)
|
58
72
|
end
|
59
73
|
|
60
74
|
##
|
@@ -63,14 +77,16 @@ module Egis
|
|
63
77
|
# @return [void]
|
64
78
|
|
65
79
|
def drop!
|
66
|
-
|
80
|
+
log_database_removal
|
81
|
+
|
82
|
+
client.execute_query("DROP DATABASE #{translate_name(name)} CASCADE;", async: false, system_execution: true)
|
67
83
|
end
|
68
84
|
|
69
85
|
##
|
70
86
|
# (see Egis::Client#execute_query)
|
71
87
|
|
72
88
|
def execute_query(query, **options)
|
73
|
-
client.execute_query(query, **{database:
|
89
|
+
client.execute_query(query, **{database: name, **options})
|
74
90
|
end
|
75
91
|
|
76
92
|
##
|
@@ -86,14 +102,22 @@ module Egis
|
|
86
102
|
# @return [Boolean]
|
87
103
|
|
88
104
|
def exists?
|
89
|
-
query_status = client.execute_query("SHOW DATABASES LIKE '#{
|
105
|
+
query_status = client.execute_query("SHOW DATABASES LIKE '#{name}';", async: false, system_execution: true)
|
90
106
|
parsed_result = output_downloader.download(query_status.output_location)
|
91
|
-
parsed_result.flatten.include?(
|
107
|
+
parsed_result.flatten.include?(name)
|
92
108
|
end
|
93
109
|
|
94
110
|
private
|
95
111
|
|
96
|
-
attr_reader :client, :
|
112
|
+
attr_reader :client, :output_downloader
|
113
|
+
|
114
|
+
def log_database_creation
|
115
|
+
Egis.logger.info { "Creating database #{name}" }
|
116
|
+
end
|
117
|
+
|
118
|
+
def log_database_removal
|
119
|
+
Egis.logger.info { "Removing database #{name}" }
|
120
|
+
end
|
97
121
|
|
98
122
|
def translate_name(name)
|
99
123
|
Egis.mode.database_name(name)
|
@@ -5,8 +5,8 @@ require 'csv'
|
|
5
5
|
module Egis
|
6
6
|
# @!visibility private
|
7
7
|
class OutputDownloader
|
8
|
-
def initialize(
|
9
|
-
@s3_client =
|
8
|
+
def initialize(aws_s3_client)
|
9
|
+
@s3_client = aws_s3_client
|
10
10
|
end
|
11
11
|
|
12
12
|
def download(output_location)
|
data/lib/egis/query_status.rb
CHANGED
@@ -23,7 +23,8 @@ module Egis
|
|
23
23
|
attr_reader :id, :status, :message, :output_location
|
24
24
|
|
25
25
|
def initialize(id, status, message, output_location,
|
26
|
-
|
26
|
+
client: Egis::Client.new,
|
27
|
+
output_downloader: Egis::OutputDownloader.new(client.aws_s3_client),
|
27
28
|
output_parser: Egis::OutputParser.new)
|
28
29
|
raise ArgumentError, "Unsupported status #{status}" unless STATUSES.include?(status)
|
29
30
|
|
@@ -51,6 +52,10 @@ module Egis
|
|
51
52
|
status == RUNNING
|
52
53
|
end
|
53
54
|
|
55
|
+
def cancelled?
|
56
|
+
status == CANCELLED
|
57
|
+
end
|
58
|
+
|
54
59
|
def in_progress?
|
55
60
|
[RUNNING, QUEUED].include?(status)
|
56
61
|
end
|
data/lib/egis/s3_cleaner.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
module Egis
|
4
4
|
# @!visibility private
|
5
5
|
class S3Cleaner
|
6
|
-
def initialize(
|
7
|
-
@s3_client =
|
6
|
+
def initialize(aws_s3_client)
|
7
|
+
@s3_client = aws_s3_client
|
8
8
|
end
|
9
9
|
|
10
10
|
def delete(bucket, prefix)
|
data/lib/egis/table.rb
CHANGED
@@ -9,7 +9,7 @@ module Egis
|
|
9
9
|
# @!attribute [r] database
|
10
10
|
# @return [Egis::Database]
|
11
11
|
# @!attribute [r] name
|
12
|
-
# @return [String] Athena
|
12
|
+
# @return [String] Athena table name
|
13
13
|
# @!attribute [r] schema
|
14
14
|
# @return [Egis::TableSchema] table's schema object
|
15
15
|
#
|
@@ -17,11 +17,13 @@ module Egis
|
|
17
17
|
DEFAULT_OPTIONS = {format: :tsv}.freeze
|
18
18
|
|
19
19
|
def initialize(database, name, schema, location, options: {},
|
20
|
+
client: Egis::Client.new,
|
20
21
|
partitions_generator: Egis::PartitionsGenerator.new,
|
21
22
|
table_ddl_generator: Egis::TableDDLGenerator.new,
|
22
|
-
output_downloader: Egis::OutputDownloader.new,
|
23
|
+
output_downloader: Egis::OutputDownloader.new(client.aws_s3_client),
|
23
24
|
output_parser: Egis::OutputParser.new,
|
24
|
-
|
25
|
+
s3_cleaner: Egis::S3Cleaner.new(client.aws_s3_client),
|
26
|
+
table_data_wiper: Egis::TableDataWiper.new(s3_cleaner: s3_cleaner))
|
25
27
|
@database = database
|
26
28
|
@name = name
|
27
29
|
@schema = schema
|
@@ -42,8 +44,10 @@ module Egis
|
|
42
44
|
# @return [void]
|
43
45
|
|
44
46
|
def create
|
47
|
+
log_table_creation
|
48
|
+
|
45
49
|
create_table_sql = table_ddl_generator.create_table_sql(self, permissive: true)
|
46
|
-
database.execute_query(create_table_sql, async: false)
|
50
|
+
database.execute_query(create_table_sql, async: false, system_execution: true)
|
47
51
|
end
|
48
52
|
|
49
53
|
##
|
@@ -52,8 +56,10 @@ module Egis
|
|
52
56
|
# @return [void]
|
53
57
|
|
54
58
|
def create!
|
59
|
+
log_table_creation
|
60
|
+
|
55
61
|
create_table_sql = table_ddl_generator.create_table_sql(self, permissive: false)
|
56
|
-
database.execute_query(create_table_sql, async: false)
|
62
|
+
database.execute_query(create_table_sql, async: false, system_execution: true)
|
57
63
|
end
|
58
64
|
|
59
65
|
##
|
@@ -67,7 +73,7 @@ module Egis
|
|
67
73
|
|
68
74
|
def add_partitions(partitions)
|
69
75
|
load_partitions_query = partitions_generator.to_sql(name, partitions, permissive: true)
|
70
|
-
database.execute_query(load_partitions_query, async: false)
|
76
|
+
database.execute_query(load_partitions_query, async: false, system_execution: true)
|
71
77
|
end
|
72
78
|
|
73
79
|
##
|
@@ -76,7 +82,7 @@ module Egis
|
|
76
82
|
|
77
83
|
def add_partitions!(partitions)
|
78
84
|
load_partitions_query = partitions_generator.to_sql(name, partitions, permissive: false)
|
79
|
-
database.execute_query(load_partitions_query, async: false)
|
85
|
+
database.execute_query(load_partitions_query, async: false, system_execution: true)
|
80
86
|
end
|
81
87
|
|
82
88
|
##
|
@@ -87,18 +93,30 @@ module Egis
|
|
87
93
|
# @return [void]
|
88
94
|
|
89
95
|
def discover_partitions
|
90
|
-
database.execute_query("MSCK REPAIR TABLE #{name};", async: false)
|
96
|
+
database.execute_query("MSCK REPAIR TABLE #{name};", async: false, system_execution: true)
|
91
97
|
end
|
92
98
|
|
93
99
|
##
|
94
100
|
# Insert data into the table. Mostly useful for testing purposes.
|
95
101
|
#
|
96
|
-
# @
|
102
|
+
# @example Insert with array of arrays
|
103
|
+
# table.upload_data([
|
104
|
+
# ['hello world', 'mx', 1],
|
105
|
+
# ['hello again', 'us', 2]
|
106
|
+
# ])
|
107
|
+
#
|
108
|
+
# @example Insert with array of hashes
|
109
|
+
# table.upload_data([
|
110
|
+
# {message: 'hello world', country: 'mx', type: 1},
|
111
|
+
# {message: 'hello again', country: 'us', type: 2}
|
112
|
+
# ])
|
113
|
+
#
|
114
|
+
# @param [Array] rows Array of arrays or hashes with row values
|
97
115
|
# @return [void]
|
98
116
|
|
99
117
|
def upload_data(rows)
|
100
118
|
query = data_insert_query(rows)
|
101
|
-
database.execute_query(query, async: false)
|
119
|
+
database.execute_query(query, async: false, system_execution: true)
|
102
120
|
end
|
103
121
|
|
104
122
|
##
|
@@ -107,7 +125,7 @@ module Egis
|
|
107
125
|
# @return [Array] Array of arrays with row values.
|
108
126
|
|
109
127
|
def download_data
|
110
|
-
result = database.execute_query("SELECT * FROM #{name};", async: false)
|
128
|
+
result = database.execute_query("SELECT * FROM #{name};", async: false, system_execution: true)
|
111
129
|
content = output_downloader.download(result.output_location)
|
112
130
|
output_parser.parse(content, column_types)
|
113
131
|
end
|
@@ -141,23 +159,37 @@ module Egis
|
|
141
159
|
attr_reader :options, :partitions_generator, :table_ddl_generator, :output_downloader, :output_parser,
|
142
160
|
:table_data_wiper
|
143
161
|
|
144
|
-
def
|
145
|
-
|
162
|
+
def log_table_creation
|
163
|
+
Egis.logger.info { "Creating table #{database.name}.#{name} located in #{location}" }
|
146
164
|
end
|
147
165
|
|
148
166
|
def column_types
|
149
|
-
|
167
|
+
all_columns.map(&:type)
|
168
|
+
end
|
169
|
+
|
170
|
+
def all_columns
|
171
|
+
schema.columns + schema.partitions
|
150
172
|
end
|
151
173
|
|
152
174
|
def data_insert_query(rows)
|
175
|
+
insert_values = rows.map { |row| row_literal_values(row) }
|
176
|
+
row_clause = insert_values.map { |row| row_values_statement(row) }.join(",\n")
|
177
|
+
|
153
178
|
<<~SQL
|
154
179
|
INSERT INTO #{name} VALUES
|
155
|
-
#{
|
180
|
+
#{row_clause}
|
156
181
|
SQL
|
157
182
|
end
|
158
183
|
|
184
|
+
def row_literal_values(row)
|
185
|
+
all_columns.map.with_index do |column, index|
|
186
|
+
value = row.is_a?(Hash) ? row[column.name] : row[index]
|
187
|
+
Egis::Types.serializer(column.type).literal(value)
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
159
191
|
def row_values_statement(row)
|
160
|
-
"(#{row.
|
192
|
+
"(#{row.join(', ')})"
|
161
193
|
end
|
162
194
|
end
|
163
195
|
end
|
@@ -9,7 +9,7 @@ module Egis
|
|
9
9
|
#{column_definition_sql(table.schema.columns)}
|
10
10
|
)
|
11
11
|
#{partition_statement(table.schema)}
|
12
|
-
#{
|
12
|
+
#{row_format_statement(table.format)}
|
13
13
|
LOCATION '#{table.location}';
|
14
14
|
SQL
|
15
15
|
end
|
@@ -34,7 +34,30 @@ module Egis
|
|
34
34
|
columns.map { |column| "`#{column.name}` #{column.type}" }.join(",\n")
|
35
35
|
end
|
36
36
|
|
37
|
-
def
|
37
|
+
def serde?(format)
|
38
|
+
format.is_a?(Hash) && format.key?(:serde)
|
39
|
+
end
|
40
|
+
|
41
|
+
def row_format_statement(format)
|
42
|
+
return serde_row_format_statement(format) if serde?(format)
|
43
|
+
|
44
|
+
delimited_row_format_statement(format)
|
45
|
+
end
|
46
|
+
|
47
|
+
def serde_row_format_statement(format)
|
48
|
+
row_format = "ROW FORMAT SERDE '#{format[:serde]}'"
|
49
|
+
return row_format unless format.key?(:serde_properties)
|
50
|
+
|
51
|
+
serde_properties = format[:serde_properties].map { |property, value| "'#{property}' = '#{value}'" }
|
52
|
+
<<-SQL
|
53
|
+
#{row_format}
|
54
|
+
WITH SERDEPROPERTIES (
|
55
|
+
#{serde_properties.join(",\n")}
|
56
|
+
)
|
57
|
+
SQL
|
58
|
+
end
|
59
|
+
|
60
|
+
def delimited_row_format_statement(format)
|
38
61
|
case format
|
39
62
|
when :csv
|
40
63
|
"ROW FORMAT DELIMITED FIELDS TERMINATED BY ','"
|
data/lib/egis/testing.rb
CHANGED
@@ -6,7 +6,7 @@ module Egis
|
|
6
6
|
class TestingMode
|
7
7
|
def initialize(test_id, s3_bucket,
|
8
8
|
client: Egis::Client.new,
|
9
|
-
output_downloader: Egis::OutputDownloader.new,
|
9
|
+
output_downloader: Egis::OutputDownloader.new(client.aws_s3_client),
|
10
10
|
s3_location_parser: Egis::S3LocationParser.new)
|
11
11
|
@test_id = test_id
|
12
12
|
@s3_bucket = s3_bucket
|
data/lib/egis/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: egis
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Agnieszka Czereba
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2021-05-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: aws-sdk-athena
|
@@ -83,7 +83,7 @@ licenses:
|
|
83
83
|
metadata:
|
84
84
|
homepage_uri: https://github.com/u2i/egis
|
85
85
|
source_code_uri: https://github.com/u2i/egis
|
86
|
-
changelog_uri: https://github.
|
86
|
+
changelog_uri: https://u2i.github.io/egis/file.CHANGELOG.html
|
87
87
|
post_install_message:
|
88
88
|
rdoc_options: []
|
89
89
|
require_paths:
|
@@ -99,7 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
99
99
|
- !ruby/object:Gem::Version
|
100
100
|
version: '0'
|
101
101
|
requirements: []
|
102
|
-
rubygems_version: 3.1.
|
102
|
+
rubygems_version: 3.1.6
|
103
103
|
signing_key:
|
104
104
|
specification_version: 4
|
105
105
|
summary: A handy wrapper for AWS Athena Ruby SDK.
|