rb_snowflake_client 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2a93f0b1e503e2d219db8b1d1133395ca484b45aaa1f91ceef013a50a116b973
4
- data.tar.gz: c292c5b0d25a921f8deba6882fe288ed084aa07919fa0d056899d20b921612fc
3
+ metadata.gz: b345626b574463c788bc7ff125c6297eac071ce62cd77fadb77c5da896f6089d
4
+ data.tar.gz: 3bed923cc293d33af04eb846241637d6c72a8bef8917958e50b013fa8d120f70
5
5
  SHA512:
6
- metadata.gz: 28428d4cf4bd70a11fefad327a2ab14deff17e95bf28b999a6b84d67756b35d3207c8224d6a028b9998ca85b62a7226f4df2beddea50f30e9a91350d7df73b4b
7
- data.tar.gz: ad55068885a29f1cae0181e52d66764651cb4f7d541e46680bd15644ed8c17f10dda8cb9356ae4948cdf9b249e0c726abc540e26b03e1b76f9f2c538a8251cce
6
+ metadata.gz: 952d6ffe14f350158abbe2342815687e36564233f7de8a452c27174f2f483f76c32e331502a1a35e7e20a654f11a74c109f7018410a9a3add7f7fa42ea3e32c7
7
+ data.tar.gz: 3dcc696ab574bebb3b0416dbc9595df656c6547f7b4a36b70d32d63030dfedc54d9f84e8b9838c0fd22c3cb5a9b9ce0efdc9c74b78e3bb41a57e58c29757c120
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- rb_snowflake_client (0.2.0)
4
+ rb_snowflake_client (0.3.0)
5
5
  concurrent-ruby (>= 1.2)
6
6
  connection_pool (>= 2.4)
7
7
  dotenv (>= 2.8)
data/README.md CHANGED
@@ -37,6 +37,7 @@ client = RubySnowflake::Client.new(
37
37
  "some_database", # The name of the database in the context of which the queries will run
38
38
  max_connections: 12, # Config options can be passed in
39
39
  connection_timeout: 45, # See below for the full set of options
40
+ query_timeout: 1200, # how long to wait for queries, in seconds
40
41
  )
41
42
 
42
43
  # alternatively you can use the `from_env` method, which will pull these values from the following environment variables. You can either provide the path to the PEM file, or it's contents in an ENV variable.
@@ -57,6 +58,7 @@ Available ENV variables (see below in the config section for details)
57
58
  - `SNOWFLAKE_MAX_THREADS_PER_QUERY`
58
59
  - `SNOWFLAKE_THREAD_SCALE_FACTOR`
59
60
  - `SNOWFLAKE_HTTP_RETRIES`
61
+ - `SNOWFLAKE_QUERY_TIMEOUT`
60
62
 
61
63
  ## Make queries
62
64
 
@@ -115,6 +117,7 @@ The client supports the following configuration options, each with their own get
115
117
  - `max_threads_per_query` - The maximum number of threads the client should use to retreive data, per query, defaults to 8. If you want the client to act in a single threaded way, set this to 1
116
118
  - `thread_scale_factor` - When downloading a result set into memory, thread count is calculated by dividing a query's partition count by this number. For details on implementation see the code in `client.rb`.
117
119
  - `http_retries` - By default the client will retry common typically transient errors (http responses) twice, you can change the number of retries with this.
120
+ - `query_timeout` - By default the client will wait 10 minutes (600s) for a query to finish, you can change this default, will also set this limit in the query for snowflake to obey. Set in seconds.
118
121
 
119
122
  Example configuration:
120
123
  ```ruby
@@ -12,7 +12,6 @@ require "retryable"
12
12
  require "securerandom"
13
13
  require "uri"
14
14
 
15
-
16
15
  require_relative "client/http_connection_wrapper"
17
16
  require_relative "client/key_pair_jwt_auth_manager"
18
17
  require_relative "client/single_thread_in_memory_strategy"
@@ -36,6 +35,7 @@ module RubySnowflake
36
35
  class ConnectionStarvedError < Error ; end
37
36
  class RetryableBadResponseError < Error ; end
38
37
  class RequestError < Error ; end
38
+ class QueryTimeoutError < Error ; end
39
39
 
40
40
  class Client
41
41
  DEFAULT_LOGGER = Logger.new(STDOUT)
@@ -53,11 +53,16 @@ module RubySnowflake
53
53
  DEFAULT_THREAD_SCALE_FACTOR = 4
54
54
  # how many times to retry common retryable HTTP responses (i.e. 429, 504)
55
55
  DEFAULT_HTTP_RETRIES = 2
56
+ # how long to wait to allow a query to complete, in seconds
57
+ DEFAULT_QUERY_TIMEOUT = 600 # 10 minutes
56
58
 
57
59
  OJ_OPTIONS = { :bigdecimal_load => :bigdecimal }.freeze
60
+ VALID_RESPONSE_CODES = %w(200 202).freeze
61
+ POLLING_RESPONSE_CODE = "202"
62
+ POLLING_INTERVAL = 2 # seconds
58
63
 
59
64
  # can't be set after initialization
60
- attr_reader :connection_timeout, :max_connections, :logger, :max_threads_per_query, :thread_scale_factor, :http_retries
65
+ attr_reader :connection_timeout, :max_connections, :logger, :max_threads_per_query, :thread_scale_factor, :http_retries, :query_timeout
61
66
 
62
67
  def self.from_env(logger: DEFAULT_LOGGER,
63
68
  log_level: DEFAULT_LOG_LEVEL,
@@ -66,7 +71,8 @@ module RubySnowflake
66
71
  max_connections: env_option("SNOWFLAKE_MAX_CONNECTIONS", DEFAULT_MAX_CONNECTIONS ),
67
72
  max_threads_per_query: env_option("SNOWFLAKE_MAX_THREADS_PER_QUERY", DEFAULT_MAX_THREADS_PER_QUERY),
68
73
  thread_scale_factor: env_option("SNOWFLAKE_THREAD_SCALE_FACTOR", DEFAULT_THREAD_SCALE_FACTOR),
69
- http_retries: env_option("SNOWFLAKE_HTTP_RETRIES", DEFAULT_HTTP_RETRIES))
74
+ http_retries: env_option("SNOWFLAKE_HTTP_RETRIES", DEFAULT_HTTP_RETRIES),
75
+ query_timeout: env_option("SNOWFLAKE_QUERY_TIMEOUT", DEFAULT_QUERY_TIMEOUT))
70
76
  private_key = ENV["SNOWFLAKE_PRIVATE_KEY"] || File.read(ENV["SNOWFLAKE_PRIVATE_KEY_PATH"])
71
77
 
72
78
  new(
@@ -85,6 +91,7 @@ module RubySnowflake
85
91
  max_threads_per_query: max_threads_per_query,
86
92
  thread_scale_factor: thread_scale_factor,
87
93
  http_retries: http_retries,
94
+ query_timeout: query_timeout,
88
95
  )
89
96
  end
90
97
 
@@ -97,7 +104,8 @@ module RubySnowflake
97
104
  max_connections: DEFAULT_MAX_CONNECTIONS,
98
105
  max_threads_per_query: DEFAULT_MAX_THREADS_PER_QUERY,
99
106
  thread_scale_factor: DEFAULT_THREAD_SCALE_FACTOR,
100
- http_retries: DEFAULT_HTTP_RETRIES
107
+ http_retries: DEFAULT_HTTP_RETRIES,
108
+ query_timeout: DEFAULT_QUERY_TIMEOUT
101
109
  )
102
110
  @base_uri = uri
103
111
  @key_pair_jwt_auth_manager =
@@ -113,26 +121,33 @@ module RubySnowflake
113
121
  @max_threads_per_query = max_threads_per_query
114
122
  @thread_scale_factor = thread_scale_factor
115
123
  @http_retries = http_retries
124
+ @query_timeout = query_timeout
125
+
126
+ # Do NOT use normally, this exists for tests so we can reliably trigger the polling
127
+ # response workflow from snowflake in tests
128
+ @_enable_polling_queries = false
116
129
  end
117
130
 
118
131
  def query(query, warehouse: nil, streaming: false, database: nil)
119
132
  warehouse ||= @default_warehouse
120
133
  database ||= @default_database
121
134
 
135
+ query_start_time = Time.now.to_i
122
136
  response = nil
123
137
  connection_pool.with do |connection|
124
138
  request_body = {
125
- "statement" => query, "warehouse" => warehouse, "database" => database
139
+ "statement" => query, "warehouse" => warehouse,
140
+ "database" => database, "timeout" => @query_timeout
126
141
  }
127
142
 
128
143
  response = request_with_auth_and_headers(
129
144
  connection,
130
145
  Net::HTTP::Post,
131
- "/api/v2/statements?requestId=#{SecureRandom.uuid}",
146
+ "/api/v2/statements?requestId=#{SecureRandom.uuid}&async=#{@_enable_polling_queries}",
132
147
  Oj.dump(request_body)
133
148
  )
134
149
  end
135
- retreive_result_set(response, streaming)
150
+ retreive_result_set(query_start_time, query, response, streaming)
136
151
  end
137
152
 
138
153
  alias fetch query
@@ -158,13 +173,6 @@ module RubySnowflake
158
173
  @port ||= URI.parse(@base_uri).port
159
174
  end
160
175
 
161
- def handle_errors(response)
162
- if response.code != "200"
163
- raise BadResponseError.new({}),
164
- "Bad response! Got code: #{response.code}, w/ message #{response.body}"
165
- end
166
- end
167
-
168
176
  def request_with_auth_and_headers(connection, request_class, path, body=nil)
169
177
  uri = URI.parse("#{@base_uri}#{path}")
170
178
  request = request_class.new(uri)
@@ -186,7 +194,7 @@ module RubySnowflake
186
194
  end
187
195
 
188
196
  def raise_on_bad_response(response)
189
- return if response.code == "200"
197
+ return if VALID_RESPONSE_CODES.include? response.code
190
198
 
191
199
  # there are a class of errors we want to retry rather than just giving up
192
200
  if retryable_http_response_code?(response.code)
@@ -213,9 +221,50 @@ module RubySnowflake
213
221
  end
214
222
  end
215
223
 
216
- def retreive_result_set(response, streaming)
224
+ def poll_for_completion_or_timeout(query_start_time, query, statement_handle)
225
+ first_data_json_body = nil
226
+
227
+ connection_pool.with do |connection|
228
+ loop do
229
+ sleep POLLING_INTERVAL
230
+
231
+ if Time.now.to_i - query_start_time > @query_timeout
232
+ cancelled = attempt_to_cancel_and_silence_errors(connection, statement_handle)
233
+ raise QueryTimeoutError.new("Query timed out. Query cancelled? #{cancelled} Query: #{query}")
234
+ end
235
+
236
+ poll_response = request_with_auth_and_headers(connection, Net::HTTP::Get,
237
+ "/api/v2/statements/#{statement_handle}")
238
+ if poll_response.code == POLLING_RESPONSE_CODE
239
+ next
240
+ else
241
+ return poll_response
242
+ end
243
+ end
244
+ end
245
+ end
246
+
247
+ def attempt_to_cancel_and_silence_errors(connection, statement_handle)
248
+ cancel_response = request_with_auth_and_headers(connection, Net::HTTP::Post,
249
+ "/api/v2/#{statement_handle}/cancel")
250
+ true
251
+ rescue Error => error
252
+ if error.is_a?(BadResponseError) && error.message.include?("404")
253
+ return true # snowflake cancelled it before we did
254
+ end
255
+ @logger.error("Error on attempting to cancel query #{statement_handle}, will raise a QueryTimeoutError")
256
+ false
257
+ end
258
+
259
+ def retreive_result_set(query_start_time, query, response, streaming)
217
260
  json_body = Oj.load(response.body, OJ_OPTIONS)
218
261
  statement_handle = json_body["statementHandle"]
262
+
263
+ if response.code == POLLING_RESPONSE_CODE
264
+ result_response = poll_for_completion_or_timeout(query_start_time, query, statement_handle)
265
+ json_body = Oj.load(result_response.body, OJ_OPTIONS)
266
+ end
267
+
219
268
  num_threads = number_of_threads_to_use(json_body["resultSetMetaData"]["partitionInfo"].size)
220
269
  retreive_proc = ->(index) { retreive_partition_data(statement_handle, index) }
221
270
 
@@ -1,3 +1,3 @@
1
1
  module RubySnowflake
2
- VERSION = '0.2.0'
2
+ VERSION = "0.3.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rb_snowflake_client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rinsed
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-12-07 00:00:00.000000000 Z
11
+ date: 2023-12-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: concurrent-ruby