egis 1.1.0 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1d2255a76aef464d0d8faee0c39be753e1a928133a25de5c4a86f553e51e92ca
4
- data.tar.gz: f18ae28053651576ccf941bd42f5a9bf40db32fed3c1c2025e1bc987844199f8
3
+ metadata.gz: 4fa3323e3c02d8537df83f33799f580fb196c255315c9923eb70c0e87bd1f50e
4
+ data.tar.gz: 9b4054a2201d98f501603cc137c1c7df93fccf105c448b6d4261515c3f53d959
5
5
  SHA512:
6
- metadata.gz: 9b143dbc650f8c02ba39f1404a2d9b4ebe36c49182a36b947a1d091079b1c94839320c2d05dd9a0db1516d82c9b03694244e4fa467898302e888addb234e1291
7
- data.tar.gz: a1577d30cbfd63632dbd55052adf0f863f7a5f571e8a8f8a210649446acc786764d8bec6a15be7cd91742f4f7681bfc1d379a9099934f62b8a30379d12a4bd77
6
+ metadata.gz: 3da9b098de6948b584244db63eda8ef4ba3d26ff29806be0e1a3b3469a321bb72d0761bc4bcb415759646a2397c71d32221477cc2dabeb9233dd8cad22300cef
7
+ data.tar.gz: 5c50463ef9585563b3aa6fefbb89b4df101f93693b7e5d5f423e98fa803e614d7541639db9afe06fc1a4544205d6e3b0cbf3c166112f556f58b8c798b528571d
data/egis.gemspec CHANGED
@@ -16,7 +16,7 @@ Gem::Specification.new do |spec|
16
16
 
17
17
  spec.metadata['homepage_uri'] = spec.homepage
18
18
  spec.metadata['source_code_uri'] = spec.homepage
19
- spec.metadata['changelog_uri'] = 'https://github.com/u2i/egis/blob/master/CHANGELOG.md'
19
+ spec.metadata['changelog_uri'] = 'https://u2i.github.io/egis/file.CHANGELOG.html'
20
20
 
21
21
  # Specify which files should be added to the gem when it is released.
22
22
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
data/lib/egis.rb CHANGED
@@ -40,6 +40,11 @@ require 'egis/s3_location_parser'
40
40
  # config.aws_profile = 'my-profile'
41
41
  # end
42
42
  #
43
+ # @example Configure logger
44
+ # Egis.configure do |config|
45
+ # config.logger = Logger.new('athena.log', level: :debug)
46
+ # end
47
+ #
43
48
  # @yield [Egis::Configuration]
44
49
  # @return [void]
45
50
  #
@@ -58,5 +63,10 @@ module Egis
58
63
  def mode
59
64
  @mode ||= Egis::StandardMode.new
60
65
  end
66
+
67
+ # @!visibility private
68
+ def logger
69
+ @configuration.logger
70
+ end
61
71
  end
62
72
  end
data/lib/egis/client.rb CHANGED
@@ -65,14 +65,16 @@ module Egis
65
65
  # by workgroup.
66
66
  # @return [Egis::QueryStatus]
67
67
 
68
- def execute_query(query, work_group: nil, database: nil, output_location: nil, async: true)
69
- query_execution_id = aws_athena_client.start_query_execution(
68
+ def execute_query(query, work_group: nil, database: nil, output_location: nil, async: true, system_execution: false)
69
+ query_id = aws_athena_client.start_query_execution(
70
70
  query_execution_params(query, work_group, database, output_location)
71
71
  ).query_execution_id
72
72
 
73
- return query_status(query_execution_id) if Egis.mode.async(async)
73
+ log_query_execution(query, query_id, system_execution)
74
74
 
75
- query_status = wait_for_query_to_finish(query_execution_id)
75
+ return query_status(query_id) if Egis.mode.async(async)
76
+
77
+ query_status = wait_for_query_to_finish(query_id)
76
78
 
77
79
  raise Egis::Errors::QueryExecutionError, query_status.message unless query_status.finished?
78
80
 
@@ -89,10 +91,13 @@ module Egis
89
91
  resp = aws_athena_client.get_query_execution(query_execution_id: query_id)
90
92
 
91
93
  query_execution = resp.query_execution
94
+ query_status = query_execution.status.state
95
+
96
+ Egis.logger.debug { "Checking query status (#{query_id}): #{query_status}" }
92
97
 
93
98
  Egis::QueryStatus.new(
94
99
  query_execution.query_execution_id,
95
- QUERY_STATUS_MAPPING.fetch(query_execution.status.state),
100
+ QUERY_STATUS_MAPPING.fetch(query_status),
96
101
  query_execution.status.state_change_reason,
97
102
  parse_output_location(query_execution)
98
103
  )
@@ -112,11 +117,20 @@ module Egis
112
117
  params
113
118
  end
114
119
 
115
- def wait_for_query_to_finish(query_execution_id)
120
+ def log_query_execution(query, query_id, system_execution)
121
+ if system_execution
122
+ Egis.logger.debug { "Executing system query (#{query_id}): #{query.gsub(/\s+/, ' ')}" }
123
+ else
124
+ Egis.logger.info { "Executing query (#{query_id}): #{query.gsub(/\s+/, ' ')}" }
125
+ end
126
+ end
127
+
128
+ def wait_for_query_to_finish(query_id)
116
129
  attempt = 1
117
130
  loop do
118
131
  sleep(query_status_backoff.call(attempt))
119
- status = query_status(query_execution_id)
132
+ status = query_status(query_id)
133
+
120
134
  return status unless status.queued? || status.running?
121
135
 
122
136
  attempt += 1
@@ -4,6 +4,10 @@ module Egis
4
4
  # @!visibility private
5
5
  class Configuration
6
6
  attr_accessor :work_group, :aws_region, :aws_access_key_id, :aws_secret_access_key, :aws_profile,
7
- :query_status_backoff, :testing_s3_bucket
7
+ :query_status_backoff, :testing_s3_bucket, :logger
8
+
9
+ def initialize
10
+ @logger = Logger.new(STDOUT, level: :info)
11
+ end
8
12
  end
9
13
  end
data/lib/egis/database.rb CHANGED
@@ -10,20 +10,26 @@ module Egis
10
10
  #
11
11
  # It is recommended to create database objects using {Egis::Client#database} method.
12
12
  #
13
+ # @!attribute [r] name
14
+ # @return [String] Athena database name
15
+ #
13
16
  class Database
14
- def initialize(database_name, client: Egis::Client.new, output_downloader: Egis::OutputDownloader.new)
17
+ def initialize(name, client: Egis::Client.new, output_downloader: Egis::OutputDownloader.new)
15
18
  @client = client
16
- @database_name = database_name
19
+ @name = name
17
20
  @output_downloader = output_downloader
18
21
  end
19
22
 
23
+ attr_reader :name
24
+
20
25
  ##
21
26
  # Creates {Egis::Table} object. Executing it doesn't create Athena table yet.
22
27
  #
23
28
  # @param [String] table_name
24
29
  # @param [Egis::TableSchema] table_schema
25
30
  # @param [String] table_location S3 URL with table location (e.g. `s3://s3_bucket/table/location/`)
26
- # @param [:tsv, :csv, :orc] format Table format (defaults to :tsv)
31
+ # @param [:tsv, :csv, :orc, {serde: 'SerdeClass', serde_properties: {property: value}}] format Table format
32
+ # (defaults to :tsv)
27
33
  # @return [Egis::Table]
28
34
 
29
35
  def table(table_name, table_schema, table_location, **options)
@@ -36,7 +42,10 @@ module Egis
36
42
  # @return [void]
37
43
 
38
44
  def create
39
- client.execute_query("CREATE DATABASE IF NOT EXISTS #{translate_name(database_name)};", async: false)
45
+ log_database_creation
46
+
47
+ client.execute_query("CREATE DATABASE IF NOT EXISTS #{translate_name(name)};", async: false,
48
+ system_execution: true)
40
49
  end
41
50
 
42
51
  ##
@@ -45,7 +54,9 @@ module Egis
45
54
  # @return [void]
46
55
 
47
56
  def create!
48
- client.execute_query("CREATE DATABASE #{translate_name(database_name)};", async: false)
57
+ log_database_creation
58
+
59
+ client.execute_query("CREATE DATABASE #{translate_name(name)};", async: false, system_execution: true)
49
60
  end
50
61
 
51
62
  ##
@@ -54,7 +65,10 @@ module Egis
54
65
  # @return [void]
55
66
 
56
67
  def drop
57
- client.execute_query("DROP DATABASE IF EXISTS #{translate_name(database_name)} CASCADE;", async: false)
68
+ log_database_removal
69
+
70
+ client.execute_query("DROP DATABASE IF EXISTS #{translate_name(name)} CASCADE;", async: false,
71
+ system_execution: true)
58
72
  end
59
73
 
60
74
  ##
@@ -63,14 +77,16 @@ module Egis
63
77
  # @return [void]
64
78
 
65
79
  def drop!
66
- client.execute_query("DROP DATABASE #{translate_name(database_name)} CASCADE;", async: false)
80
+ log_database_removal
81
+
82
+ client.execute_query("DROP DATABASE #{translate_name(name)} CASCADE;", async: false, system_execution: true)
67
83
  end
68
84
 
69
85
  ##
70
86
  # (see Egis::Client#execute_query)
71
87
 
72
88
  def execute_query(query, **options)
73
- client.execute_query(query, **{database: database_name, **options})
89
+ client.execute_query(query, **{database: name, **options})
74
90
  end
75
91
 
76
92
  ##
@@ -86,14 +102,22 @@ module Egis
86
102
  # @return [Boolean]
87
103
 
88
104
  def exists?
89
- query_status = client.execute_query("SHOW DATABASES LIKE '#{database_name}';", async: false)
105
+ query_status = client.execute_query("SHOW DATABASES LIKE '#{name}';", async: false, system_execution: true)
90
106
  parsed_result = output_downloader.download(query_status.output_location)
91
- parsed_result.flatten.include?(database_name)
107
+ parsed_result.flatten.include?(name)
92
108
  end
93
109
 
94
110
  private
95
111
 
96
- attr_reader :client, :database_name, :output_downloader
112
+ attr_reader :client, :output_downloader
113
+
114
+ def log_database_creation
115
+ Egis.logger.info { "Creating database #{name}" }
116
+ end
117
+
118
+ def log_database_removal
119
+ Egis.logger.info { "Removing database #{name}" }
120
+ end
97
121
 
98
122
  def translate_name(name)
99
123
  Egis.mode.database_name(name)
data/lib/egis/table.rb CHANGED
@@ -9,7 +9,7 @@ module Egis
9
9
  # @!attribute [r] database
10
10
  # @return [Egis::Database]
11
11
  # @!attribute [r] name
12
- # @return [String] Athena database name
12
+ # @return [String] Athena table name
13
13
  # @!attribute [r] schema
14
14
  # @return [Egis::TableSchema] table's schema object
15
15
  #
@@ -42,8 +42,10 @@ module Egis
42
42
  # @return [void]
43
43
 
44
44
  def create
45
+ log_table_creation
46
+
45
47
  create_table_sql = table_ddl_generator.create_table_sql(self, permissive: true)
46
- database.execute_query(create_table_sql, async: false)
48
+ database.execute_query(create_table_sql, async: false, system_execution: true)
47
49
  end
48
50
 
49
51
  ##
@@ -52,8 +54,10 @@ module Egis
52
54
  # @return [void]
53
55
 
54
56
  def create!
57
+ log_table_creation
58
+
55
59
  create_table_sql = table_ddl_generator.create_table_sql(self, permissive: false)
56
- database.execute_query(create_table_sql, async: false)
60
+ database.execute_query(create_table_sql, async: false, system_execution: true)
57
61
  end
58
62
 
59
63
  ##
@@ -67,7 +71,7 @@ module Egis
67
71
 
68
72
  def add_partitions(partitions)
69
73
  load_partitions_query = partitions_generator.to_sql(name, partitions, permissive: true)
70
- database.execute_query(load_partitions_query, async: false)
74
+ database.execute_query(load_partitions_query, async: false, system_execution: true)
71
75
  end
72
76
 
73
77
  ##
@@ -76,7 +80,7 @@ module Egis
76
80
 
77
81
  def add_partitions!(partitions)
78
82
  load_partitions_query = partitions_generator.to_sql(name, partitions, permissive: false)
79
- database.execute_query(load_partitions_query, async: false)
83
+ database.execute_query(load_partitions_query, async: false, system_execution: true)
80
84
  end
81
85
 
82
86
  ##
@@ -87,18 +91,30 @@ module Egis
87
91
  # @return [void]
88
92
 
89
93
  def discover_partitions
90
- database.execute_query("MSCK REPAIR TABLE #{name};", async: false)
94
+ database.execute_query("MSCK REPAIR TABLE #{name};", async: false, system_execution: true)
91
95
  end
92
96
 
93
97
  ##
94
98
  # Insert data into the table. Mostly useful for testing purposes.
95
99
  #
96
- # @param [Array] rows Array of arrays with row values
100
+ # @example Insert with array of arrays
101
+ # table.upload_data([
102
+ # ['hello world', 'mx', 1],
103
+ # ['hello again', 'us', 2]
104
+ # ])
105
+ #
106
+ # @example Insert with array of hashes
107
+ # table.upload_data([
108
+ # {message: 'hello world', country: 'mx', type: 1},
109
+ # {message: 'hello again', country: 'us', type: 2}
110
+ # ])
111
+ #
112
+ # @param [Array] rows Array of arrays or hashes with row values
97
113
  # @return [void]
98
114
 
99
115
  def upload_data(rows)
100
116
  query = data_insert_query(rows)
101
- database.execute_query(query, async: false)
117
+ database.execute_query(query, async: false, system_execution: true)
102
118
  end
103
119
 
104
120
  ##
@@ -107,7 +123,7 @@ module Egis
107
123
  # @return [Array] Array of arrays with row values.
108
124
 
109
125
  def download_data
110
- result = database.execute_query("SELECT * FROM #{name};", async: false)
126
+ result = database.execute_query("SELECT * FROM #{name};", async: false, system_execution: true)
111
127
  content = output_downloader.download(result.output_location)
112
128
  output_parser.parse(content, column_types)
113
129
  end
@@ -141,23 +157,37 @@ module Egis
141
157
  attr_reader :options, :partitions_generator, :table_ddl_generator, :output_downloader, :output_parser,
142
158
  :table_data_wiper
143
159
 
144
- def column_serializers
145
- @column_serializers ||= column_types.map { |type| Egis::Types.serializer(type) }
160
+ def log_table_creation
161
+ Egis.logger.info { "Creating table #{database.name}.#{name} located in #{location}" }
146
162
  end
147
163
 
148
164
  def column_types
149
- (schema.columns + schema.partitions).map(&:type)
165
+ all_columns.map(&:type)
166
+ end
167
+
168
+ def all_columns
169
+ schema.columns + schema.partitions
150
170
  end
151
171
 
152
172
  def data_insert_query(rows)
173
+ insert_values = rows.map { |row| row_literal_values(row) }
174
+ row_clause = insert_values.map { |row| row_values_statement(row) }.join(",\n")
175
+
153
176
  <<~SQL
154
177
  INSERT INTO #{name} VALUES
155
- #{rows.map { |row| row_values_statement(row) }.join(",\n")};
178
+ #{row_clause}
156
179
  SQL
157
180
  end
158
181
 
182
+ def row_literal_values(row)
183
+ all_columns.map.with_index do |column, index|
184
+ value = row.is_a?(Hash) ? row[column.name] : row[index]
185
+ Egis::Types.serializer(column.type).literal(value)
186
+ end
187
+ end
188
+
159
189
  def row_values_statement(row)
160
- "(#{row.zip(column_serializers).map { |value, serializer| serializer.literal(value) }.join(', ')})"
190
+ "(#{row.join(', ')})"
161
191
  end
162
192
  end
163
193
  end
@@ -9,7 +9,7 @@ module Egis
9
9
  #{column_definition_sql(table.schema.columns)}
10
10
  )
11
11
  #{partition_statement(table.schema)}
12
- #{format_statement(table.format)}
12
+ #{row_format_statement(table.format)}
13
13
  LOCATION '#{table.location}';
14
14
  SQL
15
15
  end
@@ -34,7 +34,30 @@ module Egis
34
34
  columns.map { |column| "`#{column.name}` #{column.type}" }.join(",\n")
35
35
  end
36
36
 
37
- def format_statement(format)
37
+ def serde?(format)
38
+ format.is_a?(Hash) && format.key?(:serde)
39
+ end
40
+
41
+ def row_format_statement(format)
42
+ return serde_row_format_statement(format) if serde?(format)
43
+
44
+ delimited_row_format_statement(format)
45
+ end
46
+
47
+ def serde_row_format_statement(format)
48
+ row_format = "ROW FORMAT SERDE '#{format[:serde]}'"
49
+ return row_format unless format.key?(:serde_properties)
50
+
51
+ serde_properties = format[:serde_properties].map { |property, value| "'#{property}' = '#{value}'" }
52
+ <<-SQL
53
+ #{row_format}
54
+ WITH SERDEPROPERTIES (
55
+ #{serde_properties.join(",\n")}
56
+ )
57
+ SQL
58
+ end
59
+
60
+ def delimited_row_format_statement(format)
38
61
  case format
39
62
  when :csv
40
63
  "ROW FORMAT DELIMITED FIELDS TERMINATED BY ','"
data/lib/egis/testing.rb CHANGED
@@ -43,6 +43,6 @@ module Egis # rubocop:disable Style/Documentation
43
43
  yield
44
44
  ensure
45
45
  @mode = previous_mode
46
- test_mode.cleanup
46
+ test_mode&.cleanup
47
47
  end
48
48
  end
data/lib/egis/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Egis
4
- VERSION = '1.1.0'
4
+ VERSION = '1.4.0'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: egis
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Agnieszka Czereba
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2020-05-15 00:00:00.000000000 Z
12
+ date: 2021-03-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: aws-sdk-athena
@@ -83,7 +83,7 @@ licenses:
83
83
  metadata:
84
84
  homepage_uri: https://github.com/u2i/egis
85
85
  source_code_uri: https://github.com/u2i/egis
86
- changelog_uri: https://github.com/u2i/egis/blob/master/CHANGELOG.md
86
+ changelog_uri: https://u2i.github.io/egis/file.CHANGELOG.html
87
87
  post_install_message:
88
88
  rdoc_options: []
89
89
  require_paths:
@@ -99,7 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
99
99
  - !ruby/object:Gem::Version
100
100
  version: '0'
101
101
  requirements: []
102
- rubygems_version: 3.1.2
102
+ rubygems_version: 3.1.4
103
103
  signing_key:
104
104
  specification_version: 4
105
105
  summary: A handy wrapper for AWS Athena Ruby SDK.