egis 1.1.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1d2255a76aef464d0d8faee0c39be753e1a928133a25de5c4a86f553e51e92ca
4
- data.tar.gz: f18ae28053651576ccf941bd42f5a9bf40db32fed3c1c2025e1bc987844199f8
3
+ metadata.gz: 4fa3323e3c02d8537df83f33799f580fb196c255315c9923eb70c0e87bd1f50e
4
+ data.tar.gz: 9b4054a2201d98f501603cc137c1c7df93fccf105c448b6d4261515c3f53d959
5
5
  SHA512:
6
- metadata.gz: 9b143dbc650f8c02ba39f1404a2d9b4ebe36c49182a36b947a1d091079b1c94839320c2d05dd9a0db1516d82c9b03694244e4fa467898302e888addb234e1291
7
- data.tar.gz: a1577d30cbfd63632dbd55052adf0f863f7a5f571e8a8f8a210649446acc786764d8bec6a15be7cd91742f4f7681bfc1d379a9099934f62b8a30379d12a4bd77
6
+ metadata.gz: 3da9b098de6948b584244db63eda8ef4ba3d26ff29806be0e1a3b3469a321bb72d0761bc4bcb415759646a2397c71d32221477cc2dabeb9233dd8cad22300cef
7
+ data.tar.gz: 5c50463ef9585563b3aa6fefbb89b4df101f93693b7e5d5f423e98fa803e614d7541639db9afe06fc1a4544205d6e3b0cbf3c166112f556f58b8c798b528571d
data/egis.gemspec CHANGED
@@ -16,7 +16,7 @@ Gem::Specification.new do |spec|
16
16
 
17
17
  spec.metadata['homepage_uri'] = spec.homepage
18
18
  spec.metadata['source_code_uri'] = spec.homepage
19
- spec.metadata['changelog_uri'] = 'https://github.com/u2i/egis/blob/master/CHANGELOG.md'
19
+ spec.metadata['changelog_uri'] = 'https://u2i.github.io/egis/file.CHANGELOG.html'
20
20
 
21
21
  # Specify which files should be added to the gem when it is released.
22
22
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
data/lib/egis.rb CHANGED
@@ -40,6 +40,11 @@ require 'egis/s3_location_parser'
40
40
  # config.aws_profile = 'my-profile'
41
41
  # end
42
42
  #
43
+ # @example Configure logger
44
+ # Egis.configure do |config|
45
+ # config.logger = Logger.new('athena.log', level: :debug)
46
+ # end
47
+ #
43
48
  # @yield [Egis::Configuration]
44
49
  # @return [void]
45
50
  #
@@ -58,5 +63,10 @@ module Egis
58
63
  def mode
59
64
  @mode ||= Egis::StandardMode.new
60
65
  end
66
+
67
+ # @!visibility private
68
+ def logger
69
+ @configuration.logger
70
+ end
61
71
  end
62
72
  end
data/lib/egis/client.rb CHANGED
@@ -65,14 +65,16 @@ module Egis
65
65
  # by workgroup.
66
66
  # @return [Egis::QueryStatus]
67
67
 
68
- def execute_query(query, work_group: nil, database: nil, output_location: nil, async: true)
69
- query_execution_id = aws_athena_client.start_query_execution(
68
+ def execute_query(query, work_group: nil, database: nil, output_location: nil, async: true, system_execution: false)
69
+ query_id = aws_athena_client.start_query_execution(
70
70
  query_execution_params(query, work_group, database, output_location)
71
71
  ).query_execution_id
72
72
 
73
- return query_status(query_execution_id) if Egis.mode.async(async)
73
+ log_query_execution(query, query_id, system_execution)
74
74
 
75
- query_status = wait_for_query_to_finish(query_execution_id)
75
+ return query_status(query_id) if Egis.mode.async(async)
76
+
77
+ query_status = wait_for_query_to_finish(query_id)
76
78
 
77
79
  raise Egis::Errors::QueryExecutionError, query_status.message unless query_status.finished?
78
80
 
@@ -89,10 +91,13 @@ module Egis
89
91
  resp = aws_athena_client.get_query_execution(query_execution_id: query_id)
90
92
 
91
93
  query_execution = resp.query_execution
94
+ query_status = query_execution.status.state
95
+
96
+ Egis.logger.debug { "Checking query status (#{query_id}): #{query_status}" }
92
97
 
93
98
  Egis::QueryStatus.new(
94
99
  query_execution.query_execution_id,
95
- QUERY_STATUS_MAPPING.fetch(query_execution.status.state),
100
+ QUERY_STATUS_MAPPING.fetch(query_status),
96
101
  query_execution.status.state_change_reason,
97
102
  parse_output_location(query_execution)
98
103
  )
@@ -112,11 +117,20 @@ module Egis
112
117
  params
113
118
  end
114
119
 
115
- def wait_for_query_to_finish(query_execution_id)
120
+ def log_query_execution(query, query_id, system_execution)
121
+ if system_execution
122
+ Egis.logger.debug { "Executing system query (#{query_id}): #{query.gsub(/\s+/, ' ')}" }
123
+ else
124
+ Egis.logger.info { "Executing query (#{query_id}): #{query.gsub(/\s+/, ' ')}" }
125
+ end
126
+ end
127
+
128
+ def wait_for_query_to_finish(query_id)
116
129
  attempt = 1
117
130
  loop do
118
131
  sleep(query_status_backoff.call(attempt))
119
- status = query_status(query_execution_id)
132
+ status = query_status(query_id)
133
+
120
134
  return status unless status.queued? || status.running?
121
135
 
122
136
  attempt += 1
@@ -4,6 +4,10 @@ module Egis
4
4
  # @!visibility private
5
5
  class Configuration
6
6
  attr_accessor :work_group, :aws_region, :aws_access_key_id, :aws_secret_access_key, :aws_profile,
7
- :query_status_backoff, :testing_s3_bucket
7
+ :query_status_backoff, :testing_s3_bucket, :logger
8
+
9
+ def initialize
10
+ @logger = Logger.new(STDOUT, level: :info)
11
+ end
8
12
  end
9
13
  end
data/lib/egis/database.rb CHANGED
@@ -10,20 +10,26 @@ module Egis
10
10
  #
11
11
  # It is recommended to create database objects using {Egis::Client#database} method.
12
12
  #
13
+ # @!attribute [r] name
14
+ # @return [String] Athena database name
15
+ #
13
16
  class Database
14
- def initialize(database_name, client: Egis::Client.new, output_downloader: Egis::OutputDownloader.new)
17
+ def initialize(name, client: Egis::Client.new, output_downloader: Egis::OutputDownloader.new)
15
18
  @client = client
16
- @database_name = database_name
19
+ @name = name
17
20
  @output_downloader = output_downloader
18
21
  end
19
22
 
23
+ attr_reader :name
24
+
20
25
  ##
21
26
  # Creates {Egis::Table} object. Executing it doesn't create Athena table yet.
22
27
  #
23
28
  # @param [String] table_name
24
29
  # @param [Egis::TableSchema] table_schema
25
30
  # @param [String] table_location S3 URL with table location (e.g. `s3://s3_bucket/table/location/`)
26
- # @param [:tsv, :csv, :orc] format Table format (defaults to :tsv)
31
+ # @param [:tsv, :csv, :orc, {serde: 'SerdeClass', serde_properties: {property: value}}] format Table format
32
+ # (defaults to :tsv)
27
33
  # @return [Egis::Table]
28
34
 
29
35
  def table(table_name, table_schema, table_location, **options)
@@ -36,7 +42,10 @@ module Egis
36
42
  # @return [void]
37
43
 
38
44
  def create
39
- client.execute_query("CREATE DATABASE IF NOT EXISTS #{translate_name(database_name)};", async: false)
45
+ log_database_creation
46
+
47
+ client.execute_query("CREATE DATABASE IF NOT EXISTS #{translate_name(name)};", async: false,
48
+ system_execution: true)
40
49
  end
41
50
 
42
51
  ##
@@ -45,7 +54,9 @@ module Egis
45
54
  # @return [void]
46
55
 
47
56
  def create!
48
- client.execute_query("CREATE DATABASE #{translate_name(database_name)};", async: false)
57
+ log_database_creation
58
+
59
+ client.execute_query("CREATE DATABASE #{translate_name(name)};", async: false, system_execution: true)
49
60
  end
50
61
 
51
62
  ##
@@ -54,7 +65,10 @@ module Egis
54
65
  # @return [void]
55
66
 
56
67
  def drop
57
- client.execute_query("DROP DATABASE IF EXISTS #{translate_name(database_name)} CASCADE;", async: false)
68
+ log_database_removal
69
+
70
+ client.execute_query("DROP DATABASE IF EXISTS #{translate_name(name)} CASCADE;", async: false,
71
+ system_execution: true)
58
72
  end
59
73
 
60
74
  ##
@@ -63,14 +77,16 @@ module Egis
63
77
  # @return [void]
64
78
 
65
79
  def drop!
66
- client.execute_query("DROP DATABASE #{translate_name(database_name)} CASCADE;", async: false)
80
+ log_database_removal
81
+
82
+ client.execute_query("DROP DATABASE #{translate_name(name)} CASCADE;", async: false, system_execution: true)
67
83
  end
68
84
 
69
85
  ##
70
86
  # (see Egis::Client#execute_query)
71
87
 
72
88
  def execute_query(query, **options)
73
- client.execute_query(query, **{database: database_name, **options})
89
+ client.execute_query(query, **{database: name, **options})
74
90
  end
75
91
 
76
92
  ##
@@ -86,14 +102,22 @@ module Egis
86
102
  # @return [Boolean]
87
103
 
88
104
  def exists?
89
- query_status = client.execute_query("SHOW DATABASES LIKE '#{database_name}';", async: false)
105
+ query_status = client.execute_query("SHOW DATABASES LIKE '#{name}';", async: false, system_execution: true)
90
106
  parsed_result = output_downloader.download(query_status.output_location)
91
- parsed_result.flatten.include?(database_name)
107
+ parsed_result.flatten.include?(name)
92
108
  end
93
109
 
94
110
  private
95
111
 
96
- attr_reader :client, :database_name, :output_downloader
112
+ attr_reader :client, :output_downloader
113
+
114
+ def log_database_creation
115
+ Egis.logger.info { "Creating database #{name}" }
116
+ end
117
+
118
+ def log_database_removal
119
+ Egis.logger.info { "Removing database #{name}" }
120
+ end
97
121
 
98
122
  def translate_name(name)
99
123
  Egis.mode.database_name(name)
data/lib/egis/table.rb CHANGED
@@ -9,7 +9,7 @@ module Egis
9
9
  # @!attribute [r] database
10
10
  # @return [Egis::Database]
11
11
  # @!attribute [r] name
12
- # @return [String] Athena database name
12
+ # @return [String] Athena table name
13
13
  # @!attribute [r] schema
14
14
  # @return [Egis::TableSchema] table's schema object
15
15
  #
@@ -42,8 +42,10 @@ module Egis
42
42
  # @return [void]
43
43
 
44
44
  def create
45
+ log_table_creation
46
+
45
47
  create_table_sql = table_ddl_generator.create_table_sql(self, permissive: true)
46
- database.execute_query(create_table_sql, async: false)
48
+ database.execute_query(create_table_sql, async: false, system_execution: true)
47
49
  end
48
50
 
49
51
  ##
@@ -52,8 +54,10 @@ module Egis
52
54
  # @return [void]
53
55
 
54
56
  def create!
57
+ log_table_creation
58
+
55
59
  create_table_sql = table_ddl_generator.create_table_sql(self, permissive: false)
56
- database.execute_query(create_table_sql, async: false)
60
+ database.execute_query(create_table_sql, async: false, system_execution: true)
57
61
  end
58
62
 
59
63
  ##
@@ -67,7 +71,7 @@ module Egis
67
71
 
68
72
  def add_partitions(partitions)
69
73
  load_partitions_query = partitions_generator.to_sql(name, partitions, permissive: true)
70
- database.execute_query(load_partitions_query, async: false)
74
+ database.execute_query(load_partitions_query, async: false, system_execution: true)
71
75
  end
72
76
 
73
77
  ##
@@ -76,7 +80,7 @@ module Egis
76
80
 
77
81
  def add_partitions!(partitions)
78
82
  load_partitions_query = partitions_generator.to_sql(name, partitions, permissive: false)
79
- database.execute_query(load_partitions_query, async: false)
83
+ database.execute_query(load_partitions_query, async: false, system_execution: true)
80
84
  end
81
85
 
82
86
  ##
@@ -87,18 +91,30 @@ module Egis
87
91
  # @return [void]
88
92
 
89
93
  def discover_partitions
90
- database.execute_query("MSCK REPAIR TABLE #{name};", async: false)
94
+ database.execute_query("MSCK REPAIR TABLE #{name};", async: false, system_execution: true)
91
95
  end
92
96
 
93
97
  ##
94
98
  # Insert data into the table. Mostly useful for testing purposes.
95
99
  #
96
- # @param [Array] rows Array of arrays with row values
100
+ # @example Insert with array of arrays
101
+ # table.upload_data([
102
+ # ['hello world', 'mx', 1],
103
+ # ['hello again', 'us', 2]
104
+ # ])
105
+ #
106
+ # @example Insert with array of hashes
107
+ # table.upload_data([
108
+ # {message: 'hello world', country: 'mx', type: 1},
109
+ # {message: 'hello again', country: 'us', type: 2}
110
+ # ])
111
+ #
112
+ # @param [Array] rows Array of arrays or hashes with row values
97
113
  # @return [void]
98
114
 
99
115
  def upload_data(rows)
100
116
  query = data_insert_query(rows)
101
- database.execute_query(query, async: false)
117
+ database.execute_query(query, async: false, system_execution: true)
102
118
  end
103
119
 
104
120
  ##
@@ -107,7 +123,7 @@ module Egis
107
123
  # @return [Array] Array of arrays with row values.
108
124
 
109
125
  def download_data
110
- result = database.execute_query("SELECT * FROM #{name};", async: false)
126
+ result = database.execute_query("SELECT * FROM #{name};", async: false, system_execution: true)
111
127
  content = output_downloader.download(result.output_location)
112
128
  output_parser.parse(content, column_types)
113
129
  end
@@ -141,23 +157,37 @@ module Egis
141
157
  attr_reader :options, :partitions_generator, :table_ddl_generator, :output_downloader, :output_parser,
142
158
  :table_data_wiper
143
159
 
144
- def column_serializers
145
- @column_serializers ||= column_types.map { |type| Egis::Types.serializer(type) }
160
+ def log_table_creation
161
+ Egis.logger.info { "Creating table #{database.name}.#{name} located in #{location}" }
146
162
  end
147
163
 
148
164
  def column_types
149
- (schema.columns + schema.partitions).map(&:type)
165
+ all_columns.map(&:type)
166
+ end
167
+
168
+ def all_columns
169
+ schema.columns + schema.partitions
150
170
  end
151
171
 
152
172
  def data_insert_query(rows)
173
+ insert_values = rows.map { |row| row_literal_values(row) }
174
+ row_clause = insert_values.map { |row| row_values_statement(row) }.join(",\n")
175
+
153
176
  <<~SQL
154
177
  INSERT INTO #{name} VALUES
155
- #{rows.map { |row| row_values_statement(row) }.join(",\n")};
178
+ #{row_clause}
156
179
  SQL
157
180
  end
158
181
 
182
+ def row_literal_values(row)
183
+ all_columns.map.with_index do |column, index|
184
+ value = row.is_a?(Hash) ? row[column.name] : row[index]
185
+ Egis::Types.serializer(column.type).literal(value)
186
+ end
187
+ end
188
+
159
189
  def row_values_statement(row)
160
- "(#{row.zip(column_serializers).map { |value, serializer| serializer.literal(value) }.join(', ')})"
190
+ "(#{row.join(', ')})"
161
191
  end
162
192
  end
163
193
  end
@@ -9,7 +9,7 @@ module Egis
9
9
  #{column_definition_sql(table.schema.columns)}
10
10
  )
11
11
  #{partition_statement(table.schema)}
12
- #{format_statement(table.format)}
12
+ #{row_format_statement(table.format)}
13
13
  LOCATION '#{table.location}';
14
14
  SQL
15
15
  end
@@ -34,7 +34,30 @@ module Egis
34
34
  columns.map { |column| "`#{column.name}` #{column.type}" }.join(",\n")
35
35
  end
36
36
 
37
- def format_statement(format)
37
+ def serde?(format)
38
+ format.is_a?(Hash) && format.key?(:serde)
39
+ end
40
+
41
+ def row_format_statement(format)
42
+ return serde_row_format_statement(format) if serde?(format)
43
+
44
+ delimited_row_format_statement(format)
45
+ end
46
+
47
+ def serde_row_format_statement(format)
48
+ row_format = "ROW FORMAT SERDE '#{format[:serde]}'"
49
+ return row_format unless format.key?(:serde_properties)
50
+
51
+ serde_properties = format[:serde_properties].map { |property, value| "'#{property}' = '#{value}'" }
52
+ <<-SQL
53
+ #{row_format}
54
+ WITH SERDEPROPERTIES (
55
+ #{serde_properties.join(",\n")}
56
+ )
57
+ SQL
58
+ end
59
+
60
+ def delimited_row_format_statement(format)
38
61
  case format
39
62
  when :csv
40
63
  "ROW FORMAT DELIMITED FIELDS TERMINATED BY ','"
data/lib/egis/testing.rb CHANGED
@@ -43,6 +43,6 @@ module Egis # rubocop:disable Style/Documentation
43
43
  yield
44
44
  ensure
45
45
  @mode = previous_mode
46
- test_mode.cleanup
46
+ test_mode&.cleanup
47
47
  end
48
48
  end
data/lib/egis/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Egis
4
- VERSION = '1.1.0'
4
+ VERSION = '1.4.0'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: egis
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Agnieszka Czereba
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2020-05-15 00:00:00.000000000 Z
12
+ date: 2021-03-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: aws-sdk-athena
@@ -83,7 +83,7 @@ licenses:
83
83
  metadata:
84
84
  homepage_uri: https://github.com/u2i/egis
85
85
  source_code_uri: https://github.com/u2i/egis
86
- changelog_uri: https://github.com/u2i/egis/blob/master/CHANGELOG.md
86
+ changelog_uri: https://u2i.github.io/egis/file.CHANGELOG.html
87
87
  post_install_message:
88
88
  rdoc_options: []
89
89
  require_paths:
@@ -99,7 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
99
99
  - !ruby/object:Gem::Version
100
100
  version: '0'
101
101
  requirements: []
102
- rubygems_version: 3.1.2
102
+ rubygems_version: 3.1.4
103
103
  signing_key:
104
104
  specification_version: 4
105
105
  summary: A handy wrapper for AWS Athena Ruby SDK.