rb_snowflake_client 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +3 -0
- data/lib/ruby_snowflake/client.rb +65 -16
- data/lib/ruby_snowflake/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b345626b574463c788bc7ff125c6297eac071ce62cd77fadb77c5da896f6089d
|
4
|
+
data.tar.gz: 3bed923cc293d33af04eb846241637d6c72a8bef8917958e50b013fa8d120f70
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 952d6ffe14f350158abbe2342815687e36564233f7de8a452c27174f2f483f76c32e331502a1a35e7e20a654f11a74c109f7018410a9a3add7f7fa42ea3e32c7
|
7
|
+
data.tar.gz: 3dcc696ab574bebb3b0416dbc9595df656c6547f7b4a36b70d32d63030dfedc54d9f84e8b9838c0fd22c3cb5a9b9ce0efdc9c74b78e3bb41a57e58c29757c120
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -37,6 +37,7 @@ client = RubySnowflake::Client.new(
|
|
37
37
|
"some_database", # The name of the database in the context of which the queries will run
|
38
38
|
max_connections: 12, # Config options can be passed in
|
39
39
|
connection_timeout: 45, # See below for the full set of options
|
40
|
+
query_timeout: 1200, # how long to wait for queries, in seconds
|
40
41
|
)
|
41
42
|
|
42
43
|
# alternatively you can use the `from_env` method, which will pull these values from the following environment variables. You can either provide the path to the PEM file, or it's contents in an ENV variable.
|
@@ -57,6 +58,7 @@ Available ENV variables (see below in the config section for details)
|
|
57
58
|
- `SNOWFLAKE_MAX_THREADS_PER_QUERY`
|
58
59
|
- `SNOWFLAKE_THREAD_SCALE_FACTOR`
|
59
60
|
- `SNOWFLAKE_HTTP_RETRIES`
|
61
|
+
- `SNOWFLAKE_QUERY_TIMEOUT`
|
60
62
|
|
61
63
|
## Make queries
|
62
64
|
|
@@ -115,6 +117,7 @@ The client supports the following configuration options, each with their own get
|
|
115
117
|
- `max_threads_per_query` - The maximum number of threads the client should use to retreive data, per query, defaults to 8. If you want the client to act in a single threaded way, set this to 1
|
116
118
|
- `thread_scale_factor` - When downloading a result set into memory, thread count is calculated by dividing a query's partition count by this number. For details on implementation see the code in `client.rb`.
|
117
119
|
- `http_retries` - By default the client will retry common typically transient errors (http responses) twice, you can change the number of retries with this.
|
120
|
+
- `query_timeout` - By default the client will wait 10 minutes (600s) for a query to finish, you can change this default, will also set this limit in the query for snowflake to obey. Set in seconds.
|
118
121
|
|
119
122
|
Example configuration:
|
120
123
|
```ruby
|
@@ -12,7 +12,6 @@ require "retryable"
|
|
12
12
|
require "securerandom"
|
13
13
|
require "uri"
|
14
14
|
|
15
|
-
|
16
15
|
require_relative "client/http_connection_wrapper"
|
17
16
|
require_relative "client/key_pair_jwt_auth_manager"
|
18
17
|
require_relative "client/single_thread_in_memory_strategy"
|
@@ -36,6 +35,7 @@ module RubySnowflake
|
|
36
35
|
class ConnectionStarvedError < Error ; end
|
37
36
|
class RetryableBadResponseError < Error ; end
|
38
37
|
class RequestError < Error ; end
|
38
|
+
class QueryTimeoutError < Error ; end
|
39
39
|
|
40
40
|
class Client
|
41
41
|
DEFAULT_LOGGER = Logger.new(STDOUT)
|
@@ -53,11 +53,16 @@ module RubySnowflake
|
|
53
53
|
DEFAULT_THREAD_SCALE_FACTOR = 4
|
54
54
|
# how many times to retry common retryable HTTP responses (i.e. 429, 504)
|
55
55
|
DEFAULT_HTTP_RETRIES = 2
|
56
|
+
# how long to wait to allow a query to complete, in seconds
|
57
|
+
DEFAULT_QUERY_TIMEOUT = 600 # 10 minutes
|
56
58
|
|
57
59
|
OJ_OPTIONS = { :bigdecimal_load => :bigdecimal }.freeze
|
60
|
+
VALID_RESPONSE_CODES = %w(200 202).freeze
|
61
|
+
POLLING_RESPONSE_CODE = "202"
|
62
|
+
POLLING_INTERVAL = 2 # seconds
|
58
63
|
|
59
64
|
# can't be set after initialization
|
60
|
-
attr_reader :connection_timeout, :max_connections, :logger, :max_threads_per_query, :thread_scale_factor, :http_retries
|
65
|
+
attr_reader :connection_timeout, :max_connections, :logger, :max_threads_per_query, :thread_scale_factor, :http_retries, :query_timeout
|
61
66
|
|
62
67
|
def self.from_env(logger: DEFAULT_LOGGER,
|
63
68
|
log_level: DEFAULT_LOG_LEVEL,
|
@@ -66,7 +71,8 @@ module RubySnowflake
|
|
66
71
|
max_connections: env_option("SNOWFLAKE_MAX_CONNECTIONS", DEFAULT_MAX_CONNECTIONS ),
|
67
72
|
max_threads_per_query: env_option("SNOWFLAKE_MAX_THREADS_PER_QUERY", DEFAULT_MAX_THREADS_PER_QUERY),
|
68
73
|
thread_scale_factor: env_option("SNOWFLAKE_THREAD_SCALE_FACTOR", DEFAULT_THREAD_SCALE_FACTOR),
|
69
|
-
http_retries: env_option("SNOWFLAKE_HTTP_RETRIES", DEFAULT_HTTP_RETRIES)
|
74
|
+
http_retries: env_option("SNOWFLAKE_HTTP_RETRIES", DEFAULT_HTTP_RETRIES),
|
75
|
+
query_timeout: env_option("SNOWFLAKE_QUERY_TIMEOUT", DEFAULT_QUERY_TIMEOUT))
|
70
76
|
private_key = ENV["SNOWFLAKE_PRIVATE_KEY"] || File.read(ENV["SNOWFLAKE_PRIVATE_KEY_PATH"])
|
71
77
|
|
72
78
|
new(
|
@@ -85,6 +91,7 @@ module RubySnowflake
|
|
85
91
|
max_threads_per_query: max_threads_per_query,
|
86
92
|
thread_scale_factor: thread_scale_factor,
|
87
93
|
http_retries: http_retries,
|
94
|
+
query_timeout: query_timeout,
|
88
95
|
)
|
89
96
|
end
|
90
97
|
|
@@ -97,7 +104,8 @@ module RubySnowflake
|
|
97
104
|
max_connections: DEFAULT_MAX_CONNECTIONS,
|
98
105
|
max_threads_per_query: DEFAULT_MAX_THREADS_PER_QUERY,
|
99
106
|
thread_scale_factor: DEFAULT_THREAD_SCALE_FACTOR,
|
100
|
-
http_retries: DEFAULT_HTTP_RETRIES
|
107
|
+
http_retries: DEFAULT_HTTP_RETRIES,
|
108
|
+
query_timeout: DEFAULT_QUERY_TIMEOUT
|
101
109
|
)
|
102
110
|
@base_uri = uri
|
103
111
|
@key_pair_jwt_auth_manager =
|
@@ -113,26 +121,33 @@ module RubySnowflake
|
|
113
121
|
@max_threads_per_query = max_threads_per_query
|
114
122
|
@thread_scale_factor = thread_scale_factor
|
115
123
|
@http_retries = http_retries
|
124
|
+
@query_timeout = query_timeout
|
125
|
+
|
126
|
+
# Do NOT use normally, this exists for tests so we can reliably trigger the polling
|
127
|
+
# response workflow from snowflake in tests
|
128
|
+
@_enable_polling_queries = false
|
116
129
|
end
|
117
130
|
|
118
131
|
def query(query, warehouse: nil, streaming: false, database: nil)
|
119
132
|
warehouse ||= @default_warehouse
|
120
133
|
database ||= @default_database
|
121
134
|
|
135
|
+
query_start_time = Time.now.to_i
|
122
136
|
response = nil
|
123
137
|
connection_pool.with do |connection|
|
124
138
|
request_body = {
|
125
|
-
"statement" => query, "warehouse" => warehouse,
|
139
|
+
"statement" => query, "warehouse" => warehouse,
|
140
|
+
"database" => database, "timeout" => @query_timeout
|
126
141
|
}
|
127
142
|
|
128
143
|
response = request_with_auth_and_headers(
|
129
144
|
connection,
|
130
145
|
Net::HTTP::Post,
|
131
|
-
"/api/v2/statements?requestId=#{SecureRandom.uuid}",
|
146
|
+
"/api/v2/statements?requestId=#{SecureRandom.uuid}&async=#{@_enable_polling_queries}",
|
132
147
|
Oj.dump(request_body)
|
133
148
|
)
|
134
149
|
end
|
135
|
-
retreive_result_set(response, streaming)
|
150
|
+
retreive_result_set(query_start_time, query, response, streaming)
|
136
151
|
end
|
137
152
|
|
138
153
|
alias fetch query
|
@@ -158,13 +173,6 @@ module RubySnowflake
|
|
158
173
|
@port ||= URI.parse(@base_uri).port
|
159
174
|
end
|
160
175
|
|
161
|
-
def handle_errors(response)
|
162
|
-
if response.code != "200"
|
163
|
-
raise BadResponseError.new({}),
|
164
|
-
"Bad response! Got code: #{response.code}, w/ message #{response.body}"
|
165
|
-
end
|
166
|
-
end
|
167
|
-
|
168
176
|
def request_with_auth_and_headers(connection, request_class, path, body=nil)
|
169
177
|
uri = URI.parse("#{@base_uri}#{path}")
|
170
178
|
request = request_class.new(uri)
|
@@ -186,7 +194,7 @@ module RubySnowflake
|
|
186
194
|
end
|
187
195
|
|
188
196
|
def raise_on_bad_response(response)
|
189
|
-
return if response.code
|
197
|
+
return if VALID_RESPONSE_CODES.include? response.code
|
190
198
|
|
191
199
|
# there are a class of errors we want to retry rather than just giving up
|
192
200
|
if retryable_http_response_code?(response.code)
|
@@ -213,9 +221,50 @@ module RubySnowflake
|
|
213
221
|
end
|
214
222
|
end
|
215
223
|
|
216
|
-
def
|
224
|
+
def poll_for_completion_or_timeout(query_start_time, query, statement_handle)
|
225
|
+
first_data_json_body = nil
|
226
|
+
|
227
|
+
connection_pool.with do |connection|
|
228
|
+
loop do
|
229
|
+
sleep POLLING_INTERVAL
|
230
|
+
|
231
|
+
if Time.now.to_i - query_start_time > @query_timeout
|
232
|
+
cancelled = attempt_to_cancel_and_silence_errors(connection, statement_handle)
|
233
|
+
raise QueryTimeoutError.new("Query timed out. Query cancelled? #{cancelled} Query: #{query}")
|
234
|
+
end
|
235
|
+
|
236
|
+
poll_response = request_with_auth_and_headers(connection, Net::HTTP::Get,
|
237
|
+
"/api/v2/statements/#{statement_handle}")
|
238
|
+
if poll_response.code == POLLING_RESPONSE_CODE
|
239
|
+
next
|
240
|
+
else
|
241
|
+
return poll_response
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
def attempt_to_cancel_and_silence_errors(connection, statement_handle)
|
248
|
+
cancel_response = request_with_auth_and_headers(connection, Net::HTTP::Post,
|
249
|
+
"/api/v2/#{statement_handle}/cancel")
|
250
|
+
true
|
251
|
+
rescue Error => error
|
252
|
+
if error.is_a?(BadResponseError) && error.message.include?("404")
|
253
|
+
return true # snowflake cancelled it before we did
|
254
|
+
end
|
255
|
+
@logger.error("Error on attempting to cancel query #{statement_handle}, will raise a QueryTimeoutError")
|
256
|
+
false
|
257
|
+
end
|
258
|
+
|
259
|
+
def retreive_result_set(query_start_time, query, response, streaming)
|
217
260
|
json_body = Oj.load(response.body, OJ_OPTIONS)
|
218
261
|
statement_handle = json_body["statementHandle"]
|
262
|
+
|
263
|
+
if response.code == POLLING_RESPONSE_CODE
|
264
|
+
result_response = poll_for_completion_or_timeout(query_start_time, query, statement_handle)
|
265
|
+
json_body = Oj.load(result_response.body, OJ_OPTIONS)
|
266
|
+
end
|
267
|
+
|
219
268
|
num_threads = number_of_threads_to_use(json_body["resultSetMetaData"]["partitionInfo"].size)
|
220
269
|
retreive_proc = ->(index) { retreive_partition_data(statement_handle, index) }
|
221
270
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rb_snowflake_client
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rinsed
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-12-
|
11
|
+
date: 2023-12-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: concurrent-ruby
|