rb_snowflake_client 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +3 -0
- data/lib/ruby_snowflake/client.rb +65 -16
- data/lib/ruby_snowflake/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b345626b574463c788bc7ff125c6297eac071ce62cd77fadb77c5da896f6089d
|
4
|
+
data.tar.gz: 3bed923cc293d33af04eb846241637d6c72a8bef8917958e50b013fa8d120f70
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 952d6ffe14f350158abbe2342815687e36564233f7de8a452c27174f2f483f76c32e331502a1a35e7e20a654f11a74c109f7018410a9a3add7f7fa42ea3e32c7
|
7
|
+
data.tar.gz: 3dcc696ab574bebb3b0416dbc9595df656c6547f7b4a36b70d32d63030dfedc54d9f84e8b9838c0fd22c3cb5a9b9ce0efdc9c74b78e3bb41a57e58c29757c120
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -37,6 +37,7 @@ client = RubySnowflake::Client.new(
|
|
37
37
|
"some_database", # The name of the database in the context of which the queries will run
|
38
38
|
max_connections: 12, # Config options can be passed in
|
39
39
|
connection_timeout: 45, # See below for the full set of options
|
40
|
+
query_timeout: 1200, # how long to wait for queries, in seconds
|
40
41
|
)
|
41
42
|
|
42
43
|
# alternatively you can use the `from_env` method, which will pull these values from the following environment variables. You can either provide the path to the PEM file, or it's contents in an ENV variable.
|
@@ -57,6 +58,7 @@ Available ENV variables (see below in the config section for details)
|
|
57
58
|
- `SNOWFLAKE_MAX_THREADS_PER_QUERY`
|
58
59
|
- `SNOWFLAKE_THREAD_SCALE_FACTOR`
|
59
60
|
- `SNOWFLAKE_HTTP_RETRIES`
|
61
|
+
- `SNOWFLAKE_QUERY_TIMEOUT`
|
60
62
|
|
61
63
|
## Make queries
|
62
64
|
|
@@ -115,6 +117,7 @@ The client supports the following configuration options, each with their own get
|
|
115
117
|
- `max_threads_per_query` - The maximum number of threads the client should use to retreive data, per query, defaults to 8. If you want the client to act in a single threaded way, set this to 1
|
116
118
|
- `thread_scale_factor` - When downloading a result set into memory, thread count is calculated by dividing a query's partition count by this number. For details on implementation see the code in `client.rb`.
|
117
119
|
- `http_retries` - By default the client will retry common typically transient errors (http responses) twice, you can change the number of retries with this.
|
120
|
+
- `query_timeout` - By default the client will wait 10 minutes (600s) for a query to finish, you can change this default, will also set this limit in the query for snowflake to obey. Set in seconds.
|
118
121
|
|
119
122
|
Example configuration:
|
120
123
|
```ruby
|
@@ -12,7 +12,6 @@ require "retryable"
|
|
12
12
|
require "securerandom"
|
13
13
|
require "uri"
|
14
14
|
|
15
|
-
|
16
15
|
require_relative "client/http_connection_wrapper"
|
17
16
|
require_relative "client/key_pair_jwt_auth_manager"
|
18
17
|
require_relative "client/single_thread_in_memory_strategy"
|
@@ -36,6 +35,7 @@ module RubySnowflake
|
|
36
35
|
class ConnectionStarvedError < Error ; end
|
37
36
|
class RetryableBadResponseError < Error ; end
|
38
37
|
class RequestError < Error ; end
|
38
|
+
class QueryTimeoutError < Error ; end
|
39
39
|
|
40
40
|
class Client
|
41
41
|
DEFAULT_LOGGER = Logger.new(STDOUT)
|
@@ -53,11 +53,16 @@ module RubySnowflake
|
|
53
53
|
DEFAULT_THREAD_SCALE_FACTOR = 4
|
54
54
|
# how many times to retry common retryable HTTP responses (i.e. 429, 504)
|
55
55
|
DEFAULT_HTTP_RETRIES = 2
|
56
|
+
# how long to wait to allow a query to complete, in seconds
|
57
|
+
DEFAULT_QUERY_TIMEOUT = 600 # 10 minutes
|
56
58
|
|
57
59
|
OJ_OPTIONS = { :bigdecimal_load => :bigdecimal }.freeze
|
60
|
+
VALID_RESPONSE_CODES = %w(200 202).freeze
|
61
|
+
POLLING_RESPONSE_CODE = "202"
|
62
|
+
POLLING_INTERVAL = 2 # seconds
|
58
63
|
|
59
64
|
# can't be set after initialization
|
60
|
-
attr_reader :connection_timeout, :max_connections, :logger, :max_threads_per_query, :thread_scale_factor, :http_retries
|
65
|
+
attr_reader :connection_timeout, :max_connections, :logger, :max_threads_per_query, :thread_scale_factor, :http_retries, :query_timeout
|
61
66
|
|
62
67
|
def self.from_env(logger: DEFAULT_LOGGER,
|
63
68
|
log_level: DEFAULT_LOG_LEVEL,
|
@@ -66,7 +71,8 @@ module RubySnowflake
|
|
66
71
|
max_connections: env_option("SNOWFLAKE_MAX_CONNECTIONS", DEFAULT_MAX_CONNECTIONS ),
|
67
72
|
max_threads_per_query: env_option("SNOWFLAKE_MAX_THREADS_PER_QUERY", DEFAULT_MAX_THREADS_PER_QUERY),
|
68
73
|
thread_scale_factor: env_option("SNOWFLAKE_THREAD_SCALE_FACTOR", DEFAULT_THREAD_SCALE_FACTOR),
|
69
|
-
http_retries: env_option("SNOWFLAKE_HTTP_RETRIES", DEFAULT_HTTP_RETRIES)
|
74
|
+
http_retries: env_option("SNOWFLAKE_HTTP_RETRIES", DEFAULT_HTTP_RETRIES),
|
75
|
+
query_timeout: env_option("SNOWFLAKE_QUERY_TIMEOUT", DEFAULT_QUERY_TIMEOUT))
|
70
76
|
private_key = ENV["SNOWFLAKE_PRIVATE_KEY"] || File.read(ENV["SNOWFLAKE_PRIVATE_KEY_PATH"])
|
71
77
|
|
72
78
|
new(
|
@@ -85,6 +91,7 @@ module RubySnowflake
|
|
85
91
|
max_threads_per_query: max_threads_per_query,
|
86
92
|
thread_scale_factor: thread_scale_factor,
|
87
93
|
http_retries: http_retries,
|
94
|
+
query_timeout: query_timeout,
|
88
95
|
)
|
89
96
|
end
|
90
97
|
|
@@ -97,7 +104,8 @@ module RubySnowflake
|
|
97
104
|
max_connections: DEFAULT_MAX_CONNECTIONS,
|
98
105
|
max_threads_per_query: DEFAULT_MAX_THREADS_PER_QUERY,
|
99
106
|
thread_scale_factor: DEFAULT_THREAD_SCALE_FACTOR,
|
100
|
-
http_retries: DEFAULT_HTTP_RETRIES
|
107
|
+
http_retries: DEFAULT_HTTP_RETRIES,
|
108
|
+
query_timeout: DEFAULT_QUERY_TIMEOUT
|
101
109
|
)
|
102
110
|
@base_uri = uri
|
103
111
|
@key_pair_jwt_auth_manager =
|
@@ -113,26 +121,33 @@ module RubySnowflake
|
|
113
121
|
@max_threads_per_query = max_threads_per_query
|
114
122
|
@thread_scale_factor = thread_scale_factor
|
115
123
|
@http_retries = http_retries
|
124
|
+
@query_timeout = query_timeout
|
125
|
+
|
126
|
+
# Do NOT use normally, this exists for tests so we can reliably trigger the polling
|
127
|
+
# response workflow from snowflake in tests
|
128
|
+
@_enable_polling_queries = false
|
116
129
|
end
|
117
130
|
|
118
131
|
def query(query, warehouse: nil, streaming: false, database: nil)
|
119
132
|
warehouse ||= @default_warehouse
|
120
133
|
database ||= @default_database
|
121
134
|
|
135
|
+
query_start_time = Time.now.to_i
|
122
136
|
response = nil
|
123
137
|
connection_pool.with do |connection|
|
124
138
|
request_body = {
|
125
|
-
"statement" => query, "warehouse" => warehouse,
|
139
|
+
"statement" => query, "warehouse" => warehouse,
|
140
|
+
"database" => database, "timeout" => @query_timeout
|
126
141
|
}
|
127
142
|
|
128
143
|
response = request_with_auth_and_headers(
|
129
144
|
connection,
|
130
145
|
Net::HTTP::Post,
|
131
|
-
"/api/v2/statements?requestId=#{SecureRandom.uuid}",
|
146
|
+
"/api/v2/statements?requestId=#{SecureRandom.uuid}&async=#{@_enable_polling_queries}",
|
132
147
|
Oj.dump(request_body)
|
133
148
|
)
|
134
149
|
end
|
135
|
-
retreive_result_set(response, streaming)
|
150
|
+
retreive_result_set(query_start_time, query, response, streaming)
|
136
151
|
end
|
137
152
|
|
138
153
|
alias fetch query
|
@@ -158,13 +173,6 @@ module RubySnowflake
|
|
158
173
|
@port ||= URI.parse(@base_uri).port
|
159
174
|
end
|
160
175
|
|
161
|
-
def handle_errors(response)
|
162
|
-
if response.code != "200"
|
163
|
-
raise BadResponseError.new({}),
|
164
|
-
"Bad response! Got code: #{response.code}, w/ message #{response.body}"
|
165
|
-
end
|
166
|
-
end
|
167
|
-
|
168
176
|
def request_with_auth_and_headers(connection, request_class, path, body=nil)
|
169
177
|
uri = URI.parse("#{@base_uri}#{path}")
|
170
178
|
request = request_class.new(uri)
|
@@ -186,7 +194,7 @@ module RubySnowflake
|
|
186
194
|
end
|
187
195
|
|
188
196
|
def raise_on_bad_response(response)
|
189
|
-
return if response.code
|
197
|
+
return if VALID_RESPONSE_CODES.include? response.code
|
190
198
|
|
191
199
|
# there are a class of errors we want to retry rather than just giving up
|
192
200
|
if retryable_http_response_code?(response.code)
|
@@ -213,9 +221,50 @@ module RubySnowflake
|
|
213
221
|
end
|
214
222
|
end
|
215
223
|
|
216
|
-
def
|
224
|
+
def poll_for_completion_or_timeout(query_start_time, query, statement_handle)
|
225
|
+
first_data_json_body = nil
|
226
|
+
|
227
|
+
connection_pool.with do |connection|
|
228
|
+
loop do
|
229
|
+
sleep POLLING_INTERVAL
|
230
|
+
|
231
|
+
if Time.now.to_i - query_start_time > @query_timeout
|
232
|
+
cancelled = attempt_to_cancel_and_silence_errors(connection, statement_handle)
|
233
|
+
raise QueryTimeoutError.new("Query timed out. Query cancelled? #{cancelled} Query: #{query}")
|
234
|
+
end
|
235
|
+
|
236
|
+
poll_response = request_with_auth_and_headers(connection, Net::HTTP::Get,
|
237
|
+
"/api/v2/statements/#{statement_handle}")
|
238
|
+
if poll_response.code == POLLING_RESPONSE_CODE
|
239
|
+
next
|
240
|
+
else
|
241
|
+
return poll_response
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
def attempt_to_cancel_and_silence_errors(connection, statement_handle)
|
248
|
+
cancel_response = request_with_auth_and_headers(connection, Net::HTTP::Post,
|
249
|
+
"/api/v2/#{statement_handle}/cancel")
|
250
|
+
true
|
251
|
+
rescue Error => error
|
252
|
+
if error.is_a?(BadResponseError) && error.message.include?("404")
|
253
|
+
return true # snowflake cancelled it before we did
|
254
|
+
end
|
255
|
+
@logger.error("Error on attempting to cancel query #{statement_handle}, will raise a QueryTimeoutError")
|
256
|
+
false
|
257
|
+
end
|
258
|
+
|
259
|
+
def retreive_result_set(query_start_time, query, response, streaming)
|
217
260
|
json_body = Oj.load(response.body, OJ_OPTIONS)
|
218
261
|
statement_handle = json_body["statementHandle"]
|
262
|
+
|
263
|
+
if response.code == POLLING_RESPONSE_CODE
|
264
|
+
result_response = poll_for_completion_or_timeout(query_start_time, query, statement_handle)
|
265
|
+
json_body = Oj.load(result_response.body, OJ_OPTIONS)
|
266
|
+
end
|
267
|
+
|
219
268
|
num_threads = number_of_threads_to_use(json_body["resultSetMetaData"]["partitionInfo"].size)
|
220
269
|
retreive_proc = ->(index) { retreive_partition_data(statement_handle, index) }
|
221
270
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rb_snowflake_client
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rinsed
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-12-
|
11
|
+
date: 2023-12-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: concurrent-ruby
|