egis 1.1.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/egis.gemspec +1 -1
- data/lib/egis.rb +12 -2
- data/lib/egis/aws_client_provider.rb +11 -13
- data/lib/egis/client.rb +37 -16
- data/lib/egis/configuration.rb +11 -1
- data/lib/egis/database.rb +36 -12
- data/lib/egis/output_downloader.rb +2 -2
- data/lib/egis/query_status.rb +6 -1
- data/lib/egis/s3_cleaner.rb +2 -2
- data/lib/egis/table.rb +48 -16
- data/lib/egis/table_ddl_generator.rb +25 -2
- data/lib/egis/testing.rb +1 -1
- data/lib/egis/testing/testing_mode.rb +1 -1
- data/lib/egis/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 924727f52ac809df7ab4457a6a0c05469cd6c128adaff339018e964dc9ff4ea0
|
4
|
+
data.tar.gz: c8ba9d090641c899e832b73b062314bd76fa4fb39041fb175d7e3a717e943856
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fa429f550a68475a75f9dd9c0610b0d41c9a8017f0e46c0f9e467cfaeddfb7d6587d6e7df5b4440bbd75e4d4d9a12d5966f40e4ce72cb8abf70e5dc2897f91d8
|
7
|
+
data.tar.gz: 3beac29ce82c267bfc2621e730a8b631f76a94f7426ac7d7901b31d204cda3320d1ab934e9c3e156ee52e5fca315728412ffd5a007bf4623dd2ac077d85399ba
|
data/egis.gemspec
CHANGED
@@ -16,7 +16,7 @@ Gem::Specification.new do |spec|
|
|
16
16
|
|
17
17
|
spec.metadata['homepage_uri'] = spec.homepage
|
18
18
|
spec.metadata['source_code_uri'] = spec.homepage
|
19
|
-
spec.metadata['changelog_uri'] = 'https://github.
|
19
|
+
spec.metadata['changelog_uri'] = 'https://u2i.github.io/egis/file.CHANGELOG.html'
|
20
20
|
|
21
21
|
# Specify which files should be added to the gem when it is released.
|
22
22
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
data/lib/egis.rb
CHANGED
@@ -40,13 +40,18 @@ require 'egis/s3_location_parser'
|
|
40
40
|
# config.aws_profile = 'my-profile'
|
41
41
|
# end
|
42
42
|
#
|
43
|
+
# @example Configure logger
|
44
|
+
# Egis.configure do |config|
|
45
|
+
# config.logger = Logger.new('athena.log', level: :debug)
|
46
|
+
# end
|
47
|
+
#
|
43
48
|
# @yield [Egis::Configuration]
|
44
49
|
# @return [void]
|
45
50
|
#
|
46
51
|
module Egis
|
47
52
|
class << self
|
48
|
-
def configure
|
49
|
-
|
53
|
+
def configure(&block)
|
54
|
+
configuration.configure(&block)
|
50
55
|
end
|
51
56
|
|
52
57
|
# @!visibility private
|
@@ -58,5 +63,10 @@ module Egis
|
|
58
63
|
def mode
|
59
64
|
@mode ||= Egis::StandardMode.new
|
60
65
|
end
|
66
|
+
|
67
|
+
# @!visibility private
|
68
|
+
def logger
|
69
|
+
@configuration.logger
|
70
|
+
end
|
61
71
|
end
|
62
72
|
end
|
@@ -6,25 +6,23 @@ require 'aws-sdk-athena'
|
|
6
6
|
module Egis
|
7
7
|
# @!visibility private
|
8
8
|
class AwsClientProvider
|
9
|
-
def s3_client
|
10
|
-
Aws::S3::Client.new(client_config)
|
9
|
+
def s3_client(configuration)
|
10
|
+
Aws::S3::Client.new(client_config(configuration))
|
11
11
|
end
|
12
12
|
|
13
|
-
def athena_client
|
14
|
-
Aws::Athena::Client.new(client_config)
|
13
|
+
def athena_client(configuration)
|
14
|
+
Aws::Athena::Client.new(client_config(configuration))
|
15
15
|
end
|
16
16
|
|
17
17
|
private
|
18
18
|
|
19
|
-
def client_config
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
config[:profile] = configuration.aws_profile if configuration.aws_profile
|
27
|
-
config
|
19
|
+
def client_config(configuration)
|
20
|
+
{
|
21
|
+
region: configuration.aws_region,
|
22
|
+
access_key_id: configuration.aws_access_key_id,
|
23
|
+
secret_access_key: configuration.aws_secret_access_key,
|
24
|
+
profile: configuration.aws_profile
|
25
|
+
}.compact
|
28
26
|
end
|
29
27
|
end
|
30
28
|
end
|
data/lib/egis/client.rb
CHANGED
@@ -4,6 +4,9 @@ module Egis
|
|
4
4
|
##
|
5
5
|
# The most fundamental {Egis} class. Provides an interface for executing Athena queries.
|
6
6
|
#
|
7
|
+
# @yieldparam config [Egis::Configuration] Egis configuration block, if missing Egis will use global configuration
|
8
|
+
# provided by {Egis.configure}
|
9
|
+
#
|
7
10
|
# See configuration instructions {Egis.configure}.
|
8
11
|
#
|
9
12
|
# @see Egis.configure
|
@@ -33,14 +36,17 @@ module Egis
|
|
33
36
|
'CANCELLED' => Egis::QueryStatus::CANCELLED
|
34
37
|
}.freeze
|
35
38
|
|
36
|
-
|
39
|
+
private_constant :QUERY_STATUS_MAPPING
|
37
40
|
|
38
|
-
|
41
|
+
attr_reader :aws_s3_client
|
39
42
|
|
40
|
-
def initialize(aws_client_provider: Egis::AwsClientProvider.new,
|
41
|
-
|
43
|
+
def initialize(aws_client_provider: Egis::AwsClientProvider.new,
|
44
|
+
s3_location_parser: Egis::S3LocationParser.new,
|
45
|
+
&block)
|
46
|
+
@configuration = block_given? ? Egis.configuration.dup.configure(&block) : Egis.configuration
|
47
|
+
@aws_athena_client = aws_client_provider.athena_client(configuration)
|
48
|
+
@aws_s3_client = aws_client_provider.s3_client(configuration)
|
42
49
|
@s3_location_parser = s3_location_parser
|
43
|
-
@query_status_backoff = Egis.configuration.query_status_backoff || DEFAULT_QUERY_STATUS_BACKOFF
|
44
50
|
end
|
45
51
|
|
46
52
|
##
|
@@ -65,14 +71,16 @@ module Egis
|
|
65
71
|
# by workgroup.
|
66
72
|
# @return [Egis::QueryStatus]
|
67
73
|
|
68
|
-
def execute_query(query, work_group: nil, database: nil, output_location: nil, async: true)
|
69
|
-
|
74
|
+
def execute_query(query, work_group: nil, database: nil, output_location: nil, async: true, system_execution: false)
|
75
|
+
query_id = aws_athena_client.start_query_execution(
|
70
76
|
query_execution_params(query, work_group, database, output_location)
|
71
77
|
).query_execution_id
|
72
78
|
|
73
|
-
|
79
|
+
log_query_execution(query, query_id, system_execution)
|
80
|
+
|
81
|
+
return query_status(query_id) if Egis.mode.async(async)
|
74
82
|
|
75
|
-
query_status = wait_for_query_to_finish(
|
83
|
+
query_status = wait_for_query_to_finish(query_id)
|
76
84
|
|
77
85
|
raise Egis::Errors::QueryExecutionError, query_status.message unless query_status.finished?
|
78
86
|
|
@@ -89,21 +97,25 @@ module Egis
|
|
89
97
|
resp = aws_athena_client.get_query_execution(query_execution_id: query_id)
|
90
98
|
|
91
99
|
query_execution = resp.query_execution
|
100
|
+
query_status = query_execution.status.state
|
101
|
+
|
102
|
+
Egis.logger.debug { "Checking query status (#{query_id}): #{query_status}" }
|
92
103
|
|
93
104
|
Egis::QueryStatus.new(
|
94
105
|
query_execution.query_execution_id,
|
95
|
-
QUERY_STATUS_MAPPING.fetch(
|
106
|
+
QUERY_STATUS_MAPPING.fetch(query_status),
|
96
107
|
query_execution.status.state_change_reason,
|
97
|
-
parse_output_location(query_execution)
|
108
|
+
parse_output_location(query_execution),
|
109
|
+
client: self
|
98
110
|
)
|
99
111
|
end
|
100
112
|
|
101
113
|
private
|
102
114
|
|
103
|
-
attr_reader :
|
115
|
+
attr_reader :configuration, :aws_athena_client, :s3_location_parser
|
104
116
|
|
105
117
|
def query_execution_params(query, work_group, database, output_location)
|
106
|
-
work_group_params = work_group ||
|
118
|
+
work_group_params = work_group || configuration.work_group
|
107
119
|
|
108
120
|
params = {query_string: query}
|
109
121
|
params[:work_group] = work_group_params if work_group_params
|
@@ -112,11 +124,20 @@ module Egis
|
|
112
124
|
params
|
113
125
|
end
|
114
126
|
|
115
|
-
def
|
127
|
+
def log_query_execution(query, query_id, system_execution)
|
128
|
+
if system_execution
|
129
|
+
Egis.logger.debug { "Executing system query (#{query_id}): #{query.gsub(/\s+/, ' ')}" }
|
130
|
+
else
|
131
|
+
Egis.logger.info { "Executing query (#{query_id}): #{query.gsub(/\s+/, ' ')}" }
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def wait_for_query_to_finish(query_id)
|
116
136
|
attempt = 1
|
117
137
|
loop do
|
118
|
-
sleep(query_status_backoff.call(attempt))
|
119
|
-
status = query_status(
|
138
|
+
sleep(configuration.query_status_backoff.call(attempt))
|
139
|
+
status = query_status(query_id)
|
140
|
+
|
120
141
|
return status unless status.queued? || status.running?
|
121
142
|
|
122
143
|
attempt += 1
|
data/lib/egis/configuration.rb
CHANGED
@@ -4,6 +4,16 @@ module Egis
|
|
4
4
|
# @!visibility private
|
5
5
|
class Configuration
|
6
6
|
attr_accessor :work_group, :aws_region, :aws_access_key_id, :aws_secret_access_key, :aws_profile,
|
7
|
-
:query_status_backoff, :testing_s3_bucket
|
7
|
+
:query_status_backoff, :testing_s3_bucket, :logger
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
@logger = Logger.new(STDOUT, level: :info)
|
11
|
+
@query_status_backoff = ->(attempt) { 1.5**attempt - 1 }
|
12
|
+
end
|
13
|
+
|
14
|
+
def configure
|
15
|
+
yield(self)
|
16
|
+
self
|
17
|
+
end
|
8
18
|
end
|
9
19
|
end
|
data/lib/egis/database.rb
CHANGED
@@ -10,24 +10,30 @@ module Egis
|
|
10
10
|
#
|
11
11
|
# It is recommended to create database objects using {Egis::Client#database} method.
|
12
12
|
#
|
13
|
+
# @!attribute [r] name
|
14
|
+
# @return [String] Athena database name
|
15
|
+
#
|
13
16
|
class Database
|
14
|
-
def initialize(
|
17
|
+
def initialize(name, client: Egis::Client.new, output_downloader: Egis::OutputDownloader.new(client.aws_s3_client))
|
15
18
|
@client = client
|
16
|
-
@database_name = database_name
|
17
19
|
@output_downloader = output_downloader
|
20
|
+
@name = name
|
18
21
|
end
|
19
22
|
|
23
|
+
attr_reader :name
|
24
|
+
|
20
25
|
##
|
21
26
|
# Creates {Egis::Table} object. Executing it doesn't create Athena table yet.
|
22
27
|
#
|
23
28
|
# @param [String] table_name
|
24
29
|
# @param [Egis::TableSchema] table_schema
|
25
30
|
# @param [String] table_location S3 URL with table location (e.g. `s3://s3_bucket/table/location/`)
|
26
|
-
# @param [:tsv, :csv, :orc] format Table format
|
31
|
+
# @param [:tsv, :csv, :orc, {serde: 'SerdeClass', serde_properties: {property: value}}] format Table format
|
32
|
+
# (defaults to :tsv)
|
27
33
|
# @return [Egis::Table]
|
28
34
|
|
29
35
|
def table(table_name, table_schema, table_location, **options)
|
30
|
-
Table.new(self, table_name, table_schema, table_location, options: options)
|
36
|
+
Table.new(self, table_name, table_schema, table_location, client: client, options: options)
|
31
37
|
end
|
32
38
|
|
33
39
|
##
|
@@ -36,7 +42,10 @@ module Egis
|
|
36
42
|
# @return [void]
|
37
43
|
|
38
44
|
def create
|
39
|
-
|
45
|
+
log_database_creation
|
46
|
+
|
47
|
+
client.execute_query("CREATE DATABASE IF NOT EXISTS #{translate_name(name)};", async: false,
|
48
|
+
system_execution: true)
|
40
49
|
end
|
41
50
|
|
42
51
|
##
|
@@ -45,7 +54,9 @@ module Egis
|
|
45
54
|
# @return [void]
|
46
55
|
|
47
56
|
def create!
|
48
|
-
|
57
|
+
log_database_creation
|
58
|
+
|
59
|
+
client.execute_query("CREATE DATABASE #{translate_name(name)};", async: false, system_execution: true)
|
49
60
|
end
|
50
61
|
|
51
62
|
##
|
@@ -54,7 +65,10 @@ module Egis
|
|
54
65
|
# @return [void]
|
55
66
|
|
56
67
|
def drop
|
57
|
-
|
68
|
+
log_database_removal
|
69
|
+
|
70
|
+
client.execute_query("DROP DATABASE IF EXISTS #{translate_name(name)} CASCADE;", async: false,
|
71
|
+
system_execution: true)
|
58
72
|
end
|
59
73
|
|
60
74
|
##
|
@@ -63,14 +77,16 @@ module Egis
|
|
63
77
|
# @return [void]
|
64
78
|
|
65
79
|
def drop!
|
66
|
-
|
80
|
+
log_database_removal
|
81
|
+
|
82
|
+
client.execute_query("DROP DATABASE #{translate_name(name)} CASCADE;", async: false, system_execution: true)
|
67
83
|
end
|
68
84
|
|
69
85
|
##
|
70
86
|
# (see Egis::Client#execute_query)
|
71
87
|
|
72
88
|
def execute_query(query, **options)
|
73
|
-
client.execute_query(query, **{database:
|
89
|
+
client.execute_query(query, **{database: name, **options})
|
74
90
|
end
|
75
91
|
|
76
92
|
##
|
@@ -86,14 +102,22 @@ module Egis
|
|
86
102
|
# @return [Boolean]
|
87
103
|
|
88
104
|
def exists?
|
89
|
-
query_status = client.execute_query("SHOW DATABASES LIKE '#{
|
105
|
+
query_status = client.execute_query("SHOW DATABASES LIKE '#{name}';", async: false, system_execution: true)
|
90
106
|
parsed_result = output_downloader.download(query_status.output_location)
|
91
|
-
parsed_result.flatten.include?(
|
107
|
+
parsed_result.flatten.include?(name)
|
92
108
|
end
|
93
109
|
|
94
110
|
private
|
95
111
|
|
96
|
-
attr_reader :client, :
|
112
|
+
attr_reader :client, :output_downloader
|
113
|
+
|
114
|
+
def log_database_creation
|
115
|
+
Egis.logger.info { "Creating database #{name}" }
|
116
|
+
end
|
117
|
+
|
118
|
+
def log_database_removal
|
119
|
+
Egis.logger.info { "Removing database #{name}" }
|
120
|
+
end
|
97
121
|
|
98
122
|
def translate_name(name)
|
99
123
|
Egis.mode.database_name(name)
|
@@ -5,8 +5,8 @@ require 'csv'
|
|
5
5
|
module Egis
|
6
6
|
# @!visibility private
|
7
7
|
class OutputDownloader
|
8
|
-
def initialize(
|
9
|
-
@s3_client =
|
8
|
+
def initialize(aws_s3_client)
|
9
|
+
@s3_client = aws_s3_client
|
10
10
|
end
|
11
11
|
|
12
12
|
def download(output_location)
|
data/lib/egis/query_status.rb
CHANGED
@@ -23,7 +23,8 @@ module Egis
|
|
23
23
|
attr_reader :id, :status, :message, :output_location
|
24
24
|
|
25
25
|
def initialize(id, status, message, output_location,
|
26
|
-
|
26
|
+
client: Egis::Client.new,
|
27
|
+
output_downloader: Egis::OutputDownloader.new(client.aws_s3_client),
|
27
28
|
output_parser: Egis::OutputParser.new)
|
28
29
|
raise ArgumentError, "Unsupported status #{status}" unless STATUSES.include?(status)
|
29
30
|
|
@@ -51,6 +52,10 @@ module Egis
|
|
51
52
|
status == RUNNING
|
52
53
|
end
|
53
54
|
|
55
|
+
def cancelled?
|
56
|
+
status == CANCELLED
|
57
|
+
end
|
58
|
+
|
54
59
|
def in_progress?
|
55
60
|
[RUNNING, QUEUED].include?(status)
|
56
61
|
end
|
data/lib/egis/s3_cleaner.rb
CHANGED
@@ -3,8 +3,8 @@
|
|
3
3
|
module Egis
|
4
4
|
# @!visibility private
|
5
5
|
class S3Cleaner
|
6
|
-
def initialize(
|
7
|
-
@s3_client =
|
6
|
+
def initialize(aws_s3_client)
|
7
|
+
@s3_client = aws_s3_client
|
8
8
|
end
|
9
9
|
|
10
10
|
def delete(bucket, prefix)
|
data/lib/egis/table.rb
CHANGED
@@ -9,7 +9,7 @@ module Egis
|
|
9
9
|
# @!attribute [r] database
|
10
10
|
# @return [Egis::Database]
|
11
11
|
# @!attribute [r] name
|
12
|
-
# @return [String] Athena
|
12
|
+
# @return [String] Athena table name
|
13
13
|
# @!attribute [r] schema
|
14
14
|
# @return [Egis::TableSchema] table's schema object
|
15
15
|
#
|
@@ -17,11 +17,13 @@ module Egis
|
|
17
17
|
DEFAULT_OPTIONS = {format: :tsv}.freeze
|
18
18
|
|
19
19
|
def initialize(database, name, schema, location, options: {},
|
20
|
+
client: Egis::Client.new,
|
20
21
|
partitions_generator: Egis::PartitionsGenerator.new,
|
21
22
|
table_ddl_generator: Egis::TableDDLGenerator.new,
|
22
|
-
output_downloader: Egis::OutputDownloader.new,
|
23
|
+
output_downloader: Egis::OutputDownloader.new(client.aws_s3_client),
|
23
24
|
output_parser: Egis::OutputParser.new,
|
24
|
-
|
25
|
+
s3_cleaner: Egis::S3Cleaner.new(client.aws_s3_client),
|
26
|
+
table_data_wiper: Egis::TableDataWiper.new(s3_cleaner: s3_cleaner))
|
25
27
|
@database = database
|
26
28
|
@name = name
|
27
29
|
@schema = schema
|
@@ -42,8 +44,10 @@ module Egis
|
|
42
44
|
# @return [void]
|
43
45
|
|
44
46
|
def create
|
47
|
+
log_table_creation
|
48
|
+
|
45
49
|
create_table_sql = table_ddl_generator.create_table_sql(self, permissive: true)
|
46
|
-
database.execute_query(create_table_sql, async: false)
|
50
|
+
database.execute_query(create_table_sql, async: false, system_execution: true)
|
47
51
|
end
|
48
52
|
|
49
53
|
##
|
@@ -52,8 +56,10 @@ module Egis
|
|
52
56
|
# @return [void]
|
53
57
|
|
54
58
|
def create!
|
59
|
+
log_table_creation
|
60
|
+
|
55
61
|
create_table_sql = table_ddl_generator.create_table_sql(self, permissive: false)
|
56
|
-
database.execute_query(create_table_sql, async: false)
|
62
|
+
database.execute_query(create_table_sql, async: false, system_execution: true)
|
57
63
|
end
|
58
64
|
|
59
65
|
##
|
@@ -67,7 +73,7 @@ module Egis
|
|
67
73
|
|
68
74
|
def add_partitions(partitions)
|
69
75
|
load_partitions_query = partitions_generator.to_sql(name, partitions, permissive: true)
|
70
|
-
database.execute_query(load_partitions_query, async: false)
|
76
|
+
database.execute_query(load_partitions_query, async: false, system_execution: true)
|
71
77
|
end
|
72
78
|
|
73
79
|
##
|
@@ -76,7 +82,7 @@ module Egis
|
|
76
82
|
|
77
83
|
def add_partitions!(partitions)
|
78
84
|
load_partitions_query = partitions_generator.to_sql(name, partitions, permissive: false)
|
79
|
-
database.execute_query(load_partitions_query, async: false)
|
85
|
+
database.execute_query(load_partitions_query, async: false, system_execution: true)
|
80
86
|
end
|
81
87
|
|
82
88
|
##
|
@@ -87,18 +93,30 @@ module Egis
|
|
87
93
|
# @return [void]
|
88
94
|
|
89
95
|
def discover_partitions
|
90
|
-
database.execute_query("MSCK REPAIR TABLE #{name};", async: false)
|
96
|
+
database.execute_query("MSCK REPAIR TABLE #{name};", async: false, system_execution: true)
|
91
97
|
end
|
92
98
|
|
93
99
|
##
|
94
100
|
# Insert data into the table. Mostly useful for testing purposes.
|
95
101
|
#
|
96
|
-
# @
|
102
|
+
# @example Insert with array of arrays
|
103
|
+
# table.upload_data([
|
104
|
+
# ['hello world', 'mx', 1],
|
105
|
+
# ['hello again', 'us', 2]
|
106
|
+
# ])
|
107
|
+
#
|
108
|
+
# @example Insert with array of hashes
|
109
|
+
# table.upload_data([
|
110
|
+
# {message: 'hello world', country: 'mx', type: 1},
|
111
|
+
# {message: 'hello again', country: 'us', type: 2}
|
112
|
+
# ])
|
113
|
+
#
|
114
|
+
# @param [Array] rows Array of arrays or hashes with row values
|
97
115
|
# @return [void]
|
98
116
|
|
99
117
|
def upload_data(rows)
|
100
118
|
query = data_insert_query(rows)
|
101
|
-
database.execute_query(query, async: false)
|
119
|
+
database.execute_query(query, async: false, system_execution: true)
|
102
120
|
end
|
103
121
|
|
104
122
|
##
|
@@ -107,7 +125,7 @@ module Egis
|
|
107
125
|
# @return [Array] Array of arrays with row values.
|
108
126
|
|
109
127
|
def download_data
|
110
|
-
result = database.execute_query("SELECT * FROM #{name};", async: false)
|
128
|
+
result = database.execute_query("SELECT * FROM #{name};", async: false, system_execution: true)
|
111
129
|
content = output_downloader.download(result.output_location)
|
112
130
|
output_parser.parse(content, column_types)
|
113
131
|
end
|
@@ -141,23 +159,37 @@ module Egis
|
|
141
159
|
attr_reader :options, :partitions_generator, :table_ddl_generator, :output_downloader, :output_parser,
|
142
160
|
:table_data_wiper
|
143
161
|
|
144
|
-
def
|
145
|
-
|
162
|
+
def log_table_creation
|
163
|
+
Egis.logger.info { "Creating table #{database.name}.#{name} located in #{location}" }
|
146
164
|
end
|
147
165
|
|
148
166
|
def column_types
|
149
|
-
|
167
|
+
all_columns.map(&:type)
|
168
|
+
end
|
169
|
+
|
170
|
+
def all_columns
|
171
|
+
schema.columns + schema.partitions
|
150
172
|
end
|
151
173
|
|
152
174
|
def data_insert_query(rows)
|
175
|
+
insert_values = rows.map { |row| row_literal_values(row) }
|
176
|
+
row_clause = insert_values.map { |row| row_values_statement(row) }.join(",\n")
|
177
|
+
|
153
178
|
<<~SQL
|
154
179
|
INSERT INTO #{name} VALUES
|
155
|
-
#{
|
180
|
+
#{row_clause}
|
156
181
|
SQL
|
157
182
|
end
|
158
183
|
|
184
|
+
def row_literal_values(row)
|
185
|
+
all_columns.map.with_index do |column, index|
|
186
|
+
value = row.is_a?(Hash) ? row[column.name] : row[index]
|
187
|
+
Egis::Types.serializer(column.type).literal(value)
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
159
191
|
def row_values_statement(row)
|
160
|
-
"(#{row.
|
192
|
+
"(#{row.join(', ')})"
|
161
193
|
end
|
162
194
|
end
|
163
195
|
end
|
@@ -9,7 +9,7 @@ module Egis
|
|
9
9
|
#{column_definition_sql(table.schema.columns)}
|
10
10
|
)
|
11
11
|
#{partition_statement(table.schema)}
|
12
|
-
#{
|
12
|
+
#{row_format_statement(table.format)}
|
13
13
|
LOCATION '#{table.location}';
|
14
14
|
SQL
|
15
15
|
end
|
@@ -34,7 +34,30 @@ module Egis
|
|
34
34
|
columns.map { |column| "`#{column.name}` #{column.type}" }.join(",\n")
|
35
35
|
end
|
36
36
|
|
37
|
-
def
|
37
|
+
def serde?(format)
|
38
|
+
format.is_a?(Hash) && format.key?(:serde)
|
39
|
+
end
|
40
|
+
|
41
|
+
def row_format_statement(format)
|
42
|
+
return serde_row_format_statement(format) if serde?(format)
|
43
|
+
|
44
|
+
delimited_row_format_statement(format)
|
45
|
+
end
|
46
|
+
|
47
|
+
def serde_row_format_statement(format)
|
48
|
+
row_format = "ROW FORMAT SERDE '#{format[:serde]}'"
|
49
|
+
return row_format unless format.key?(:serde_properties)
|
50
|
+
|
51
|
+
serde_properties = format[:serde_properties].map { |property, value| "'#{property}' = '#{value}'" }
|
52
|
+
<<-SQL
|
53
|
+
#{row_format}
|
54
|
+
WITH SERDEPROPERTIES (
|
55
|
+
#{serde_properties.join(",\n")}
|
56
|
+
)
|
57
|
+
SQL
|
58
|
+
end
|
59
|
+
|
60
|
+
def delimited_row_format_statement(format)
|
38
61
|
case format
|
39
62
|
when :csv
|
40
63
|
"ROW FORMAT DELIMITED FIELDS TERMINATED BY ','"
|
data/lib/egis/testing.rb
CHANGED
@@ -6,7 +6,7 @@ module Egis
|
|
6
6
|
class TestingMode
|
7
7
|
def initialize(test_id, s3_bucket,
|
8
8
|
client: Egis::Client.new,
|
9
|
-
output_downloader: Egis::OutputDownloader.new,
|
9
|
+
output_downloader: Egis::OutputDownloader.new(client.aws_s3_client),
|
10
10
|
s3_location_parser: Egis::S3LocationParser.new)
|
11
11
|
@test_id = test_id
|
12
12
|
@s3_bucket = s3_bucket
|
data/lib/egis/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: egis
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Agnieszka Czereba
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2021-05-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: aws-sdk-athena
|
@@ -83,7 +83,7 @@ licenses:
|
|
83
83
|
metadata:
|
84
84
|
homepage_uri: https://github.com/u2i/egis
|
85
85
|
source_code_uri: https://github.com/u2i/egis
|
86
|
-
changelog_uri: https://github.
|
86
|
+
changelog_uri: https://u2i.github.io/egis/file.CHANGELOG.html
|
87
87
|
post_install_message:
|
88
88
|
rdoc_options: []
|
89
89
|
require_paths:
|
@@ -99,7 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
99
99
|
- !ruby/object:Gem::Version
|
100
100
|
version: '0'
|
101
101
|
requirements: []
|
102
|
-
rubygems_version: 3.1.
|
102
|
+
rubygems_version: 3.1.6
|
103
103
|
signing_key:
|
104
104
|
specification_version: 4
|
105
105
|
summary: A handy wrapper for AWS Athena Ruby SDK.
|