egis 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d232fad45575196b2a8a2a65ccca79529a2fd72121bf5883a9c3b037a6c40850
4
- data.tar.gz: d269d52126bf32592f3b3425abc76b1664b119684674db5e618d9cbacbc0d3f3
3
+ metadata.gz: bc78871d415888b51b651537cf0716a553d44b20627e60f8e6110d8c1929791c
4
+ data.tar.gz: 96919f1a0ab277b6895dbea4db17c26544d21995010c53a87f1f9515d48eddfd
5
5
  SHA512:
6
- metadata.gz: 37c73df4ff2e1bf1f280816f0a882046a02447932bfdb8c77893b27d2e106f263d22a0d7cd0951861898aef882b2ea9ddca737ef980b9bfc2e342609a2716845
7
- data.tar.gz: 93e325830f6382c683430e7dac987b01dc9bc93890155f0c28f19373742610d8f24195e7c074684f813821f508a956a7ad936c7e4297a0738af52ec285445805
6
+ metadata.gz: e8c30d0705a76438655608af048138ba54cc710a1d1811fe44513551d63c6e1a9fd0a8650a25f2dbba2d828eaaa8c87c75e76562ccb433c720d59bca1a2b655f
7
+ data.tar.gz: 6b61e9a3adcea83fc6e31c04ef64eae21398a7b1ea4facf1f6b34d4c518f2cfb1722594b308cc48209181dc5a6d2b3a35d76542d0f23ff24b778cbfc97bbed58
data/egis.gemspec CHANGED
@@ -16,7 +16,7 @@ Gem::Specification.new do |spec|
16
16
 
17
17
  spec.metadata['homepage_uri'] = spec.homepage
18
18
  spec.metadata['source_code_uri'] = spec.homepage
19
- spec.metadata['changelog_uri'] = 'https://github.com/u2i/egis/blob/master/docs/CHANGELOG.md'
19
+ spec.metadata['changelog_uri'] = 'https://u2i.github.io/egis/file.CHANGELOG.html'
20
20
 
21
21
  # Specify which files should be added to the gem when it is released.
22
22
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
data/lib/egis/client.rb CHANGED
@@ -65,14 +65,16 @@ module Egis
65
65
  # by workgroup.
66
66
  # @return [Egis::QueryStatus]
67
67
 
68
- def execute_query(query, work_group: nil, database: nil, output_location: nil, async: true)
69
- query_execution_id = aws_athena_client.start_query_execution(
68
+ def execute_query(query, work_group: nil, database: nil, output_location: nil, async: true, system_execution: false)
69
+ query_id = aws_athena_client.start_query_execution(
70
70
  query_execution_params(query, work_group, database, output_location)
71
71
  ).query_execution_id
72
72
 
73
- return query_status(query_execution_id) if Egis.mode.async(async)
73
+ log_query_execution(query, query_id, system_execution)
74
74
 
75
- query_status = wait_for_query_to_finish(query_execution_id)
75
+ return query_status(query_id) if Egis.mode.async(async)
76
+
77
+ query_status = wait_for_query_to_finish(query_id)
76
78
 
77
79
  raise Egis::Errors::QueryExecutionError, query_status.message unless query_status.finished?
78
80
 
@@ -89,10 +91,13 @@ module Egis
89
91
  resp = aws_athena_client.get_query_execution(query_execution_id: query_id)
90
92
 
91
93
  query_execution = resp.query_execution
94
+ query_status = query_execution.status.state
95
+
96
+ Egis.logger.debug { "Checking query status (#{query_id}): #{query_status}" }
92
97
 
93
98
  Egis::QueryStatus.new(
94
99
  query_execution.query_execution_id,
95
- QUERY_STATUS_MAPPING.fetch(query_execution.status.state),
100
+ QUERY_STATUS_MAPPING.fetch(query_status),
96
101
  query_execution.status.state_change_reason,
97
102
  parse_output_location(query_execution)
98
103
  )
@@ -112,11 +117,20 @@ module Egis
112
117
  params
113
118
  end
114
119
 
115
- def wait_for_query_to_finish(query_execution_id)
120
+ def log_query_execution(query, query_id, system_execution)
121
+ if system_execution
122
+ Egis.logger.debug { "Executing system query (#{query_id}): #{query.gsub(/\s+/, ' ')}" }
123
+ else
124
+ Egis.logger.info { "Executing query (#{query_id}): #{query.gsub(/\s+/, ' ')}" }
125
+ end
126
+ end
127
+
128
+ def wait_for_query_to_finish(query_id)
116
129
  attempt = 1
117
130
  loop do
118
131
  sleep(query_status_backoff.call(attempt))
119
- status = query_status(query_execution_id)
132
+ status = query_status(query_id)
133
+
120
134
  return status unless status.queued? || status.running?
121
135
 
122
136
  attempt += 1
@@ -4,6 +4,10 @@ module Egis
4
4
  # @!visibility private
5
5
  class Configuration
6
6
  attr_accessor :work_group, :aws_region, :aws_access_key_id, :aws_secret_access_key, :aws_profile,
7
- :query_status_backoff, :testing_s3_bucket
7
+ :query_status_backoff, :testing_s3_bucket, :logger
8
+
9
+ def initialize
10
+ @logger = Logger.new(STDOUT, level: :info)
11
+ end
8
12
  end
9
13
  end
data/lib/egis/database.rb CHANGED
@@ -10,13 +10,18 @@ module Egis
10
10
  #
11
11
  # It is recommended to create database objects using {Egis::Client#database} method.
12
12
  #
13
+ # @!attribute [r] name
14
+ # @return [String] Athena database name
15
+ #
13
16
  class Database
14
- def initialize(database_name, client: Egis::Client.new, output_downloader: Egis::OutputDownloader.new)
17
+ def initialize(name, client: Egis::Client.new, output_downloader: Egis::OutputDownloader.new)
15
18
  @client = client
16
- @database_name = database_name
19
+ @name = name
17
20
  @output_downloader = output_downloader
18
21
  end
19
22
 
23
+ attr_reader :name
24
+
20
25
  ##
21
26
  # Creates {Egis::Table} object. Executing it doesn't create Athena table yet.
22
27
  #
@@ -36,7 +41,10 @@ module Egis
36
41
  # @return [void]
37
42
 
38
43
  def create
39
- client.execute_query("CREATE DATABASE IF NOT EXISTS #{translate_name(database_name)};", async: false)
44
+ log_database_creation
45
+
46
+ client.execute_query("CREATE DATABASE IF NOT EXISTS #{translate_name(name)};", async: false,
47
+ system_execution: true)
40
48
  end
41
49
 
42
50
  ##
@@ -45,7 +53,9 @@ module Egis
45
53
  # @return [void]
46
54
 
47
55
  def create!
48
- client.execute_query("CREATE DATABASE #{translate_name(database_name)};", async: false)
56
+ log_database_creation
57
+
58
+ client.execute_query("CREATE DATABASE #{translate_name(name)};", async: false, system_execution: true)
49
59
  end
50
60
 
51
61
  ##
@@ -54,7 +64,10 @@ module Egis
54
64
  # @return [void]
55
65
 
56
66
  def drop
57
- client.execute_query("DROP DATABASE IF EXISTS #{translate_name(database_name)} CASCADE;", async: false)
67
+ log_database_removal
68
+
69
+ client.execute_query("DROP DATABASE IF EXISTS #{translate_name(name)} CASCADE;", async: false,
70
+ system_execution: true)
58
71
  end
59
72
 
60
73
  ##
@@ -63,14 +76,16 @@ module Egis
63
76
  # @return [void]
64
77
 
65
78
  def drop!
66
- client.execute_query("DROP DATABASE #{translate_name(database_name)} CASCADE;", async: false)
79
+ log_database_removal
80
+
81
+ client.execute_query("DROP DATABASE #{translate_name(name)} CASCADE;", async: false, system_execution: true)
67
82
  end
68
83
 
69
84
  ##
70
85
  # (see Egis::Client#execute_query)
71
86
 
72
87
  def execute_query(query, **options)
73
- client.execute_query(query, **{database: database_name, **options})
88
+ client.execute_query(query, **{database: name, **options})
74
89
  end
75
90
 
76
91
  ##
@@ -86,14 +101,22 @@ module Egis
86
101
  # @return [Boolean]
87
102
 
88
103
  def exists?
89
- query_status = client.execute_query("SHOW DATABASES LIKE '#{database_name}';", async: false)
104
+ query_status = client.execute_query("SHOW DATABASES LIKE '#{name}';", async: false, system_execution: true)
90
105
  parsed_result = output_downloader.download(query_status.output_location)
91
- parsed_result.flatten.include?(database_name)
106
+ parsed_result.flatten.include?(name)
92
107
  end
93
108
 
94
109
  private
95
110
 
96
- attr_reader :client, :database_name, :output_downloader
111
+ attr_reader :client, :output_downloader
112
+
113
+ def log_database_creation
114
+ Egis.logger.info { "Creating database #{name}" }
115
+ end
116
+
117
+ def log_database_removal
118
+ Egis.logger.info { "Removing database #{name}" }
119
+ end
97
120
 
98
121
  def translate_name(name)
99
122
  Egis.mode.database_name(name)
data/lib/egis/table.rb CHANGED
@@ -9,7 +9,7 @@ module Egis
9
9
  # @!attribute [r] database
10
10
  # @return [Egis::Database]
11
11
  # @!attribute [r] name
12
- # @return [String] Athena database name
12
+ # @return [String] Athena table name
13
13
  # @!attribute [r] schema
14
14
  # @return [Egis::TableSchema] table's schema object
15
15
  #
@@ -42,8 +42,10 @@ module Egis
42
42
  # @return [void]
43
43
 
44
44
  def create
45
+ log_table_creation
46
+
45
47
  create_table_sql = table_ddl_generator.create_table_sql(self, permissive: true)
46
- database.execute_query(create_table_sql, async: false)
48
+ database.execute_query(create_table_sql, async: false, system_execution: true)
47
49
  end
48
50
 
49
51
  ##
@@ -52,8 +54,10 @@ module Egis
52
54
  # @return [void]
53
55
 
54
56
  def create!
57
+ log_table_creation
58
+
55
59
  create_table_sql = table_ddl_generator.create_table_sql(self, permissive: false)
56
- database.execute_query(create_table_sql, async: false)
60
+ database.execute_query(create_table_sql, async: false, system_execution: true)
57
61
  end
58
62
 
59
63
  ##
@@ -67,7 +71,7 @@ module Egis
67
71
 
68
72
  def add_partitions(partitions)
69
73
  load_partitions_query = partitions_generator.to_sql(name, partitions, permissive: true)
70
- database.execute_query(load_partitions_query, async: false)
74
+ database.execute_query(load_partitions_query, async: false, system_execution: true)
71
75
  end
72
76
 
73
77
  ##
@@ -76,7 +80,7 @@ module Egis
76
80
 
77
81
  def add_partitions!(partitions)
78
82
  load_partitions_query = partitions_generator.to_sql(name, partitions, permissive: false)
79
- database.execute_query(load_partitions_query, async: false)
83
+ database.execute_query(load_partitions_query, async: false, system_execution: true)
80
84
  end
81
85
 
82
86
  ##
@@ -87,7 +91,7 @@ module Egis
87
91
  # @return [void]
88
92
 
89
93
  def discover_partitions
90
- database.execute_query("MSCK REPAIR TABLE #{name};", async: false)
94
+ database.execute_query("MSCK REPAIR TABLE #{name};", async: false, system_execution: true)
91
95
  end
92
96
 
93
97
  ##
@@ -98,7 +102,7 @@ module Egis
98
102
 
99
103
  def upload_data(rows)
100
104
  query = data_insert_query(rows)
101
- database.execute_query(query, async: false)
105
+ database.execute_query(query, async: false, system_execution: true)
102
106
  end
103
107
 
104
108
  ##
@@ -107,7 +111,7 @@ module Egis
107
111
  # @return [Array] Array of arrays with row values.
108
112
 
109
113
  def download_data
110
- result = database.execute_query("SELECT * FROM #{name};", async: false)
114
+ result = database.execute_query("SELECT * FROM #{name};", async: false, system_execution: true)
111
115
  content = output_downloader.download(result.output_location)
112
116
  output_parser.parse(content, column_types)
113
117
  end
@@ -141,6 +145,10 @@ module Egis
141
145
  attr_reader :options, :partitions_generator, :table_ddl_generator, :output_downloader, :output_parser,
142
146
  :table_data_wiper
143
147
 
148
+ def log_table_creation
149
+ Egis.logger.info { "Creating table #{database.name}.#{name} located in #{location}" }
150
+ end
151
+
144
152
  def column_serializers
145
153
  @column_serializers ||= column_types.map { |type| Egis::Types.serializer(type) }
146
154
  end
data/lib/egis/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Egis
4
- VERSION = '1.1.1'
4
+ VERSION = '1.2.0'
5
5
  end
data/lib/egis.rb CHANGED
@@ -40,6 +40,11 @@ require 'egis/s3_location_parser'
40
40
  # config.aws_profile = 'my-profile'
41
41
  # end
42
42
  #
43
+ # @example Configure logger
44
+ # Egis.configure do |config|
45
+ # config.logger = Logger.new('athena.log', level: :debug)
46
+ # end
47
+ #
43
48
  # @yield [Egis::Configuration]
44
49
  # @return [void]
45
50
  #
@@ -58,5 +63,10 @@ module Egis
58
63
  def mode
59
64
  @mode ||= Egis::StandardMode.new
60
65
  end
66
+
67
+ # @!visibility private
68
+ def logger
69
+ @configuration.logger
70
+ end
61
71
  end
62
72
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: egis
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Agnieszka Czereba
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2020-05-18 00:00:00.000000000 Z
12
+ date: 2020-05-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: aws-sdk-athena
@@ -83,7 +83,7 @@ licenses:
83
83
  metadata:
84
84
  homepage_uri: https://github.com/u2i/egis
85
85
  source_code_uri: https://github.com/u2i/egis
86
- changelog_uri: https://github.com/u2i/egis/blob/master/docs/CHANGELOG.md
86
+ changelog_uri: https://u2i.github.io/egis/file.CHANGELOG.html
87
87
  post_install_message:
88
88
  rdoc_options: []
89
89
  require_paths: