rb_snowflake_client 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2a93f0b1e503e2d219db8b1d1133395ca484b45aaa1f91ceef013a50a116b973
4
- data.tar.gz: c292c5b0d25a921f8deba6882fe288ed084aa07919fa0d056899d20b921612fc
3
+ metadata.gz: b345626b574463c788bc7ff125c6297eac071ce62cd77fadb77c5da896f6089d
4
+ data.tar.gz: 3bed923cc293d33af04eb846241637d6c72a8bef8917958e50b013fa8d120f70
5
5
  SHA512:
6
- metadata.gz: 28428d4cf4bd70a11fefad327a2ab14deff17e95bf28b999a6b84d67756b35d3207c8224d6a028b9998ca85b62a7226f4df2beddea50f30e9a91350d7df73b4b
7
- data.tar.gz: ad55068885a29f1cae0181e52d66764651cb4f7d541e46680bd15644ed8c17f10dda8cb9356ae4948cdf9b249e0c726abc540e26b03e1b76f9f2c538a8251cce
6
+ metadata.gz: 952d6ffe14f350158abbe2342815687e36564233f7de8a452c27174f2f483f76c32e331502a1a35e7e20a654f11a74c109f7018410a9a3add7f7fa42ea3e32c7
7
+ data.tar.gz: 3dcc696ab574bebb3b0416dbc9595df656c6547f7b4a36b70d32d63030dfedc54d9f84e8b9838c0fd22c3cb5a9b9ce0efdc9c74b78e3bb41a57e58c29757c120
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- rb_snowflake_client (0.2.0)
4
+ rb_snowflake_client (0.3.0)
5
5
  concurrent-ruby (>= 1.2)
6
6
  connection_pool (>= 2.4)
7
7
  dotenv (>= 2.8)
data/README.md CHANGED
@@ -37,6 +37,7 @@ client = RubySnowflake::Client.new(
37
37
  "some_database", # The name of the database in the context of which the queries will run
38
38
  max_connections: 12, # Config options can be passed in
39
39
  connection_timeout: 45, # See below for the full set of options
40
+ query_timeout: 1200, # how long to wait for queries, in seconds
40
41
  )
41
42
 
42
43
  # alternatively you can use the `from_env` method, which will pull these values from the following environment variables. You can either provide the path to the PEM file, or it's contents in an ENV variable.
@@ -57,6 +58,7 @@ Available ENV variables (see below in the config section for details)
57
58
  - `SNOWFLAKE_MAX_THREADS_PER_QUERY`
58
59
  - `SNOWFLAKE_THREAD_SCALE_FACTOR`
59
60
  - `SNOWFLAKE_HTTP_RETRIES`
61
+ - `SNOWFLAKE_QUERY_TIMEOUT`
60
62
 
61
63
  ## Make queries
62
64
 
@@ -115,6 +117,7 @@ The client supports the following configuration options, each with their own get
115
117
  - `max_threads_per_query` - The maximum number of threads the client should use to retreive data, per query, defaults to 8. If you want the client to act in a single threaded way, set this to 1
116
118
  - `thread_scale_factor` - When downloading a result set into memory, thread count is calculated by dividing a query's partition count by this number. For details on implementation see the code in `client.rb`.
117
119
  - `http_retries` - By default the client will retry common typically transient errors (http responses) twice, you can change the number of retries with this.
120
+ - `query_timeout` - By default the client will wait 10 minutes (600s) for a query to finish, you can change this default, will also set this limit in the query for snowflake to obey. Set in seconds.
118
121
 
119
122
  Example configuration:
120
123
  ```ruby
@@ -12,7 +12,6 @@ require "retryable"
12
12
  require "securerandom"
13
13
  require "uri"
14
14
 
15
-
16
15
  require_relative "client/http_connection_wrapper"
17
16
  require_relative "client/key_pair_jwt_auth_manager"
18
17
  require_relative "client/single_thread_in_memory_strategy"
@@ -36,6 +35,7 @@ module RubySnowflake
36
35
  class ConnectionStarvedError < Error ; end
37
36
  class RetryableBadResponseError < Error ; end
38
37
  class RequestError < Error ; end
38
+ class QueryTimeoutError < Error ; end
39
39
 
40
40
  class Client
41
41
  DEFAULT_LOGGER = Logger.new(STDOUT)
@@ -53,11 +53,16 @@ module RubySnowflake
53
53
  DEFAULT_THREAD_SCALE_FACTOR = 4
54
54
  # how many times to retry common retryable HTTP responses (i.e. 429, 504)
55
55
  DEFAULT_HTTP_RETRIES = 2
56
+ # how long to wait to allow a query to complete, in seconds
57
+ DEFAULT_QUERY_TIMEOUT = 600 # 10 minutes
56
58
 
57
59
  OJ_OPTIONS = { :bigdecimal_load => :bigdecimal }.freeze
60
+ VALID_RESPONSE_CODES = %w(200 202).freeze
61
+ POLLING_RESPONSE_CODE = "202"
62
+ POLLING_INTERVAL = 2 # seconds
58
63
 
59
64
  # can't be set after initialization
60
- attr_reader :connection_timeout, :max_connections, :logger, :max_threads_per_query, :thread_scale_factor, :http_retries
65
+ attr_reader :connection_timeout, :max_connections, :logger, :max_threads_per_query, :thread_scale_factor, :http_retries, :query_timeout
61
66
 
62
67
  def self.from_env(logger: DEFAULT_LOGGER,
63
68
  log_level: DEFAULT_LOG_LEVEL,
@@ -66,7 +71,8 @@ module RubySnowflake
66
71
  max_connections: env_option("SNOWFLAKE_MAX_CONNECTIONS", DEFAULT_MAX_CONNECTIONS ),
67
72
  max_threads_per_query: env_option("SNOWFLAKE_MAX_THREADS_PER_QUERY", DEFAULT_MAX_THREADS_PER_QUERY),
68
73
  thread_scale_factor: env_option("SNOWFLAKE_THREAD_SCALE_FACTOR", DEFAULT_THREAD_SCALE_FACTOR),
69
- http_retries: env_option("SNOWFLAKE_HTTP_RETRIES", DEFAULT_HTTP_RETRIES))
74
+ http_retries: env_option("SNOWFLAKE_HTTP_RETRIES", DEFAULT_HTTP_RETRIES),
75
+ query_timeout: env_option("SNOWFLAKE_QUERY_TIMEOUT", DEFAULT_QUERY_TIMEOUT))
70
76
  private_key = ENV["SNOWFLAKE_PRIVATE_KEY"] || File.read(ENV["SNOWFLAKE_PRIVATE_KEY_PATH"])
71
77
 
72
78
  new(
@@ -85,6 +91,7 @@ module RubySnowflake
85
91
  max_threads_per_query: max_threads_per_query,
86
92
  thread_scale_factor: thread_scale_factor,
87
93
  http_retries: http_retries,
94
+ query_timeout: query_timeout,
88
95
  )
89
96
  end
90
97
 
@@ -97,7 +104,8 @@ module RubySnowflake
97
104
  max_connections: DEFAULT_MAX_CONNECTIONS,
98
105
  max_threads_per_query: DEFAULT_MAX_THREADS_PER_QUERY,
99
106
  thread_scale_factor: DEFAULT_THREAD_SCALE_FACTOR,
100
- http_retries: DEFAULT_HTTP_RETRIES
107
+ http_retries: DEFAULT_HTTP_RETRIES,
108
+ query_timeout: DEFAULT_QUERY_TIMEOUT
101
109
  )
102
110
  @base_uri = uri
103
111
  @key_pair_jwt_auth_manager =
@@ -113,26 +121,33 @@ module RubySnowflake
113
121
  @max_threads_per_query = max_threads_per_query
114
122
  @thread_scale_factor = thread_scale_factor
115
123
  @http_retries = http_retries
124
+ @query_timeout = query_timeout
125
+
126
+ # Do NOT use normally, this exists for tests so we can reliably trigger the polling
127
+ # response workflow from snowflake in tests
128
+ @_enable_polling_queries = false
116
129
  end
117
130
 
118
131
  def query(query, warehouse: nil, streaming: false, database: nil)
119
132
  warehouse ||= @default_warehouse
120
133
  database ||= @default_database
121
134
 
135
+ query_start_time = Time.now.to_i
122
136
  response = nil
123
137
  connection_pool.with do |connection|
124
138
  request_body = {
125
- "statement" => query, "warehouse" => warehouse, "database" => database
139
+ "statement" => query, "warehouse" => warehouse,
140
+ "database" => database, "timeout" => @query_timeout
126
141
  }
127
142
 
128
143
  response = request_with_auth_and_headers(
129
144
  connection,
130
145
  Net::HTTP::Post,
131
- "/api/v2/statements?requestId=#{SecureRandom.uuid}",
146
+ "/api/v2/statements?requestId=#{SecureRandom.uuid}&async=#{@_enable_polling_queries}",
132
147
  Oj.dump(request_body)
133
148
  )
134
149
  end
135
- retreive_result_set(response, streaming)
150
+ retreive_result_set(query_start_time, query, response, streaming)
136
151
  end
137
152
 
138
153
  alias fetch query
@@ -158,13 +173,6 @@ module RubySnowflake
158
173
  @port ||= URI.parse(@base_uri).port
159
174
  end
160
175
 
161
- def handle_errors(response)
162
- if response.code != "200"
163
- raise BadResponseError.new({}),
164
- "Bad response! Got code: #{response.code}, w/ message #{response.body}"
165
- end
166
- end
167
-
168
176
  def request_with_auth_and_headers(connection, request_class, path, body=nil)
169
177
  uri = URI.parse("#{@base_uri}#{path}")
170
178
  request = request_class.new(uri)
@@ -186,7 +194,7 @@ module RubySnowflake
186
194
  end
187
195
 
188
196
  def raise_on_bad_response(response)
189
- return if response.code == "200"
197
+ return if VALID_RESPONSE_CODES.include? response.code
190
198
 
191
199
  # there are a class of errors we want to retry rather than just giving up
192
200
  if retryable_http_response_code?(response.code)
@@ -213,9 +221,50 @@ module RubySnowflake
213
221
  end
214
222
  end
215
223
 
216
- def retreive_result_set(response, streaming)
224
+ def poll_for_completion_or_timeout(query_start_time, query, statement_handle)
225
+ first_data_json_body = nil
226
+
227
+ connection_pool.with do |connection|
228
+ loop do
229
+ sleep POLLING_INTERVAL
230
+
231
+ if Time.now.to_i - query_start_time > @query_timeout
232
+ cancelled = attempt_to_cancel_and_silence_errors(connection, statement_handle)
233
+ raise QueryTimeoutError.new("Query timed out. Query cancelled? #{cancelled} Query: #{query}")
234
+ end
235
+
236
+ poll_response = request_with_auth_and_headers(connection, Net::HTTP::Get,
237
+ "/api/v2/statements/#{statement_handle}")
238
+ if poll_response.code == POLLING_RESPONSE_CODE
239
+ next
240
+ else
241
+ return poll_response
242
+ end
243
+ end
244
+ end
245
+ end
246
+
247
+ def attempt_to_cancel_and_silence_errors(connection, statement_handle)
248
+ cancel_response = request_with_auth_and_headers(connection, Net::HTTP::Post,
249
+ "/api/v2/#{statement_handle}/cancel")
250
+ true
251
+ rescue Error => error
252
+ if error.is_a?(BadResponseError) && error.message.include?("404")
253
+ return true # snowflake cancelled it before we did
254
+ end
255
+ @logger.error("Error on attempting to cancel query #{statement_handle}, will raise a QueryTimeoutError")
256
+ false
257
+ end
258
+
259
+ def retreive_result_set(query_start_time, query, response, streaming)
217
260
  json_body = Oj.load(response.body, OJ_OPTIONS)
218
261
  statement_handle = json_body["statementHandle"]
262
+
263
+ if response.code == POLLING_RESPONSE_CODE
264
+ result_response = poll_for_completion_or_timeout(query_start_time, query, statement_handle)
265
+ json_body = Oj.load(result_response.body, OJ_OPTIONS)
266
+ end
267
+
219
268
  num_threads = number_of_threads_to_use(json_body["resultSetMetaData"]["partitionInfo"].size)
220
269
  retreive_proc = ->(index) { retreive_partition_data(statement_handle, index) }
221
270
 
@@ -1,3 +1,3 @@
1
1
  module RubySnowflake
2
- VERSION = '0.2.0'
2
+ VERSION = "0.3.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rb_snowflake_client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rinsed
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-12-07 00:00:00.000000000 Z
11
+ date: 2023-12-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: concurrent-ruby