rb_snowflake_client 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +37 -13
- data/README.md +42 -5
- data/lib/ruby_snowflake/client.rb +45 -24
- data/lib/ruby_snowflake/streaming_result.rb +10 -0
- data/lib/ruby_snowflake/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ddf76c34f14ea4b7192e5bd3dbdb18b7d176e66c6327fab97ef8d4352138d964
|
4
|
+
data.tar.gz: 20223663cc72100e28b22a4ea572611d2b2bc8291519d1cde9f2b536c02865f1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 266de51be70f28c748b703fc55787b0a495404a62226fba151c44dfe00ce31465c3aae5e516ba362cdfa8a22a0d597514d86fd6644f263a046cb275b0a3e3f46
|
7
|
+
data.tar.gz: b0e90b538d9b6557d7de1b58f085570097036df3a9d1ea6be500babb6b56fafb50f21a4d905f1c50e8a274df7ffdd58b6268d65f9dc2220c7e9f70680662479c
|
data/CHANGELOG.md
CHANGED
@@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file.
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
7
7
|
|
8
|
+
## Unreleased
|
9
|
+
|
10
|
+
## [1.5.0] - 2025-10-14
|
11
|
+
### Added
|
12
|
+
- Instrumentation feature added for Active Support users
|
13
|
+
- Added `query_timeout` as a per-query parameter, allowing timeout override on individual queries
|
14
|
+
### Fixed
|
15
|
+
- `query_timeout` now properly sends timeout parameter to Snowflake API for server-side enforcement
|
16
|
+
- Streaming mode now releases consumed records, fixing memory leak. Note: if you were iterating over streaming results more than once, this is a breaking change (though that was not its intended usage).
|
17
|
+
|
8
18
|
## [1.4.0] - 2025-05-01
|
9
19
|
### Added
|
10
20
|
- Enhanced Row API to implement Enumerable interface
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
rb_snowflake_client (1.
|
4
|
+
rb_snowflake_client (1.5.0)
|
5
5
|
bigdecimal (>= 3.0)
|
6
6
|
concurrent-ruby (>= 1.2)
|
7
7
|
connection_pool (>= 2.4)
|
@@ -13,42 +13,66 @@ PATH
|
|
13
13
|
GEM
|
14
14
|
remote: https://rubygems.org/
|
15
15
|
specs:
|
16
|
-
|
17
|
-
|
16
|
+
activesupport (8.0.3)
|
17
|
+
base64
|
18
|
+
benchmark (>= 0.3)
|
19
|
+
bigdecimal
|
20
|
+
concurrent-ruby (~> 1.0, >= 1.3.1)
|
21
|
+
connection_pool (>= 2.2.5)
|
22
|
+
drb
|
23
|
+
i18n (>= 1.6, < 2)
|
24
|
+
logger (>= 1.4.2)
|
25
|
+
minitest (>= 5.1)
|
26
|
+
securerandom (>= 0.3)
|
27
|
+
tzinfo (~> 2.0, >= 2.0.5)
|
28
|
+
uri (>= 0.13.1)
|
29
|
+
base64 (0.3.0)
|
30
|
+
benchmark (0.4.1)
|
31
|
+
bigdecimal (3.3.1)
|
18
32
|
coderay (1.1.3)
|
19
33
|
concurrent-ruby (1.3.5)
|
20
|
-
connection_pool (2.5.
|
21
|
-
diff-lcs (1.6.
|
34
|
+
connection_pool (2.5.4)
|
35
|
+
diff-lcs (1.6.2)
|
22
36
|
dotenv (3.1.8)
|
23
|
-
|
24
|
-
|
37
|
+
drb (2.2.3)
|
38
|
+
i18n (1.14.7)
|
39
|
+
concurrent-ruby (~> 1.0)
|
40
|
+
json (2.15.1)
|
41
|
+
jwt (3.1.2)
|
25
42
|
base64
|
43
|
+
logger (1.7.0)
|
26
44
|
method_source (1.1.0)
|
45
|
+
minitest (5.26.0)
|
27
46
|
parallel (1.27.0)
|
28
47
|
pry (0.15.2)
|
29
48
|
coderay (~> 1.1)
|
30
49
|
method_source (~> 1.0)
|
31
|
-
rake (13.
|
50
|
+
rake (13.3.0)
|
32
51
|
retryable (3.0.5)
|
33
|
-
rspec (3.13.
|
52
|
+
rspec (3.13.1)
|
34
53
|
rspec-core (~> 3.13.0)
|
35
54
|
rspec-expectations (~> 3.13.0)
|
36
55
|
rspec-mocks (~> 3.13.0)
|
37
|
-
rspec-core (3.13.
|
56
|
+
rspec-core (3.13.5)
|
38
57
|
rspec-support (~> 3.13.0)
|
39
|
-
rspec-expectations (3.13.
|
58
|
+
rspec-expectations (3.13.5)
|
40
59
|
diff-lcs (>= 1.2.0, < 2.0)
|
41
60
|
rspec-support (~> 3.13.0)
|
42
|
-
rspec-mocks (3.13.
|
61
|
+
rspec-mocks (3.13.5)
|
43
62
|
diff-lcs (>= 1.2.0, < 2.0)
|
44
63
|
rspec-support (~> 3.13.0)
|
45
|
-
rspec-support (3.13.
|
64
|
+
rspec-support (3.13.6)
|
65
|
+
securerandom (0.4.1)
|
66
|
+
tzinfo (2.0.6)
|
67
|
+
concurrent-ruby (~> 1.0)
|
68
|
+
uri (1.0.4)
|
46
69
|
|
47
70
|
PLATFORMS
|
48
71
|
arm64-darwin-22
|
49
72
|
ruby
|
50
73
|
|
51
74
|
DEPENDENCIES
|
75
|
+
activesupport
|
52
76
|
bundler
|
53
77
|
parallel
|
54
78
|
pry
|
data/README.md
CHANGED
@@ -132,6 +132,14 @@ Queries by default use the primary role assigned to the account. If there are mu
|
|
132
132
|
client.query("SELECT * FROM BIGTABLE", role: "MY_ROLE")
|
133
133
|
```
|
134
134
|
|
135
|
+
## Query timeout
|
136
|
+
|
137
|
+
You can override the query timeout on a per-query basis. The timeout is specified in seconds and will be enforced by both Snowflake server-side and the client-side polling mechanism.
|
138
|
+
|
139
|
+
```ruby
|
140
|
+
client.query("SELECT * FROM BIGTABLE", query_timeout: 30)
|
141
|
+
```
|
142
|
+
|
135
143
|
## Binding parameters
|
136
144
|
|
137
145
|
Say we have `BIGTABLE` with a `data` column of a type `VARIANT`.
|
@@ -140,16 +148,45 @@ Say we have `BIGTABLE` with a `data` column of a type `VARIANT`.
|
|
140
148
|
json_string = '{"valid": "json"}'
|
141
149
|
query = "insert into BIGTABLE(data) select parse_json(?)"
|
142
150
|
bindings = {
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
}
|
151
|
+
"1" => {
|
152
|
+
"type" => "TEXT",
|
153
|
+
"value" => "Other Event"
|
154
|
+
}
|
155
|
+
}
|
148
156
|
client.query(query, bindings: bindings)
|
149
157
|
```
|
150
158
|
|
151
159
|
For additional information about binding parameters refer to snowflake documentation: https://docs.snowflake.com/en/developer-guide/sql-api/submitting-requests#using-bind-variables-in-a-statement
|
152
160
|
|
161
|
+
## Instrumentation
|
162
|
+
|
163
|
+
If ActiveSupport is available, this library additionally emits [notification events](https://api.rubyonrails.org/classes/ActiveSupport/Notifications.html) around queries. You can subscribe to those to track timing, query counts, etc.
|
164
|
+
|
165
|
+
* `rb_snowflake_client.snowflake_query.finish`: published at query end
|
166
|
+
|
167
|
+
Events receive a payload with the following properties:
|
168
|
+
* `database`: snowflake database
|
169
|
+
* `schema`: snowflake schema
|
170
|
+
* `warehouse`: snowflake warehouse
|
171
|
+
* `query_id`: random UUID for the query
|
172
|
+
* `query_name`: argument passed to query/fetch
|
173
|
+
* `exception`: present if the query raised an error, see [Notifications documentation](https://api.rubyonrails.org/classes/ActiveSupport/Notifications.html#module-ActiveSupport::Notifications-label-Subscribers) for details
|
174
|
+
* `exception_object`: present if the query raised an error, see [Notifications documentation](https://api.rubyonrails.org/classes/ActiveSupport/Notifications.html#module-ActiveSupport::Notifications-label-Subscribers) for details
|
175
|
+
|
176
|
+
An example integration with [Datadog](https://www.rubydoc.info/gems/datadog) might look like this:
|
177
|
+
|
178
|
+
```ruby
|
179
|
+
ActiveSupport::Notifications.subscribe("rb_snowflake_client.snowflake_query.finish") do |name, start, finish, id, payload|
|
180
|
+
span = Datadog::Tracing.trace(payload[:query_name] || "snowflake_query",
|
181
|
+
resource: "snowflake",
|
182
|
+
start_time: start,
|
183
|
+
tags: payload,
|
184
|
+
type: Datadog::Tracing::Metadata::Ext::AppTypes::TYPE_DB)
|
185
|
+
|
186
|
+
span.finish(finish)
|
187
|
+
end
|
188
|
+
```
|
189
|
+
|
153
190
|
# Configuration Options
|
154
191
|
|
155
192
|
The client supports the following configuration options, each with their own getter/setter except connection pool options which must be set at construction. Additionally, all except logger can be configured with environment variables (see above, but the pattern is like: "SNOWFLAKE_HTTP_RETRIES". Configuration options can only be set on initialization through `new` or `from_env`.
|
@@ -12,6 +12,13 @@ require "retryable"
|
|
12
12
|
require "securerandom"
|
13
13
|
require "uri"
|
14
14
|
|
15
|
+
begin
|
16
|
+
require "active_support"
|
17
|
+
require "active_support/notifications"
|
18
|
+
rescue LoadError
|
19
|
+
# This isn't required
|
20
|
+
end
|
21
|
+
|
15
22
|
require_relative "client/http_connection_wrapper"
|
16
23
|
require_relative "client/key_pair_jwt_auth_manager"
|
17
24
|
require_relative "client/single_thread_in_memory_strategy"
|
@@ -143,31 +150,35 @@ module RubySnowflake
|
|
143
150
|
@_enable_polling_queries = false
|
144
151
|
end
|
145
152
|
|
146
|
-
def query(query, warehouse: nil, streaming: false, database: nil, schema: nil, bindings: nil, role: nil)
|
153
|
+
def query(query, warehouse: nil, streaming: false, database: nil, schema: nil, bindings: nil, role: nil, query_name: nil, query_timeout: nil)
|
147
154
|
warehouse ||= @default_warehouse
|
148
155
|
database ||= @default_database
|
149
156
|
role ||= @default_role
|
157
|
+
query_timeout ||= @query_timeout
|
150
158
|
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
159
|
+
with_instrumentation({ database:, schema:, warehouse:, query_name: }) do
|
160
|
+
query_start_time = Time.now.to_i
|
161
|
+
response = nil
|
162
|
+
connection_pool.with do |connection|
|
163
|
+
request_body = {
|
164
|
+
"warehouse" => warehouse&.upcase,
|
165
|
+
"schema" => schema&.upcase,
|
166
|
+
"database" => database&.upcase,
|
167
|
+
"statement" => query,
|
168
|
+
"bindings" => bindings,
|
169
|
+
"role" => role,
|
170
|
+
"timeout" => query_timeout
|
171
|
+
}
|
172
|
+
|
173
|
+
response = request_with_auth_and_headers(
|
174
|
+
connection,
|
175
|
+
Net::HTTP::Post,
|
176
|
+
"/api/v2/statements?requestId=#{SecureRandom.uuid}&async=#{@_enable_polling_queries}",
|
177
|
+
request_body.to_json
|
178
|
+
)
|
179
|
+
end
|
180
|
+
retrieve_result_set(query_start_time, query, response, streaming, query_timeout)
|
169
181
|
end
|
170
|
-
retrieve_result_set(query_start_time, query, response, streaming)
|
171
182
|
end
|
172
183
|
|
173
184
|
alias fetch query
|
@@ -251,7 +262,7 @@ module RubySnowflake
|
|
251
262
|
end
|
252
263
|
end
|
253
264
|
|
254
|
-
def poll_for_completion_or_timeout(query_start_time, query, statement_handle)
|
265
|
+
def poll_for_completion_or_timeout(query_start_time, query, statement_handle, query_timeout)
|
255
266
|
first_data_json_body = nil
|
256
267
|
|
257
268
|
connection_pool.with do |connection|
|
@@ -259,7 +270,7 @@ module RubySnowflake
|
|
259
270
|
sleep POLLING_INTERVAL
|
260
271
|
|
261
272
|
elapsed_time = Time.now.to_i - query_start_time
|
262
|
-
if elapsed_time >
|
273
|
+
if elapsed_time > query_timeout
|
263
274
|
cancelled = attempt_to_cancel_and_silence_errors(connection, statement_handle)
|
264
275
|
raise QueryTimeoutError.new("Query timed out. Query cancelled? #{cancelled}; Duration: #{elapsed_time}; Query: '#{query}'")
|
265
276
|
end
|
@@ -287,12 +298,12 @@ module RubySnowflake
|
|
287
298
|
false
|
288
299
|
end
|
289
300
|
|
290
|
-
def retrieve_result_set(query_start_time, query, response, streaming)
|
301
|
+
def retrieve_result_set(query_start_time, query, response, streaming, query_timeout)
|
291
302
|
json_body = JSON.parse(response.body, JSON_PARSE_OPTIONS)
|
292
303
|
statement_handle = json_body["statementHandle"]
|
293
304
|
|
294
305
|
if response.code == POLLING_RESPONSE_CODE
|
295
|
-
result_response = poll_for_completion_or_timeout(query_start_time, query, statement_handle)
|
306
|
+
result_response = poll_for_completion_or_timeout(query_start_time, query, statement_handle, query_timeout)
|
296
307
|
json_body = JSON.parse(result_response.body, JSON_PARSE_OPTIONS)
|
297
308
|
end
|
298
309
|
|
@@ -329,5 +340,15 @@ module RubySnowflake
|
|
329
340
|
def number_of_threads_to_use(partition_count)
|
330
341
|
[[1, (partition_count / @thread_scale_factor.to_f).ceil].max, @max_threads_per_query].min
|
331
342
|
end
|
343
|
+
|
344
|
+
def with_instrumentation(tags, &block)
|
345
|
+
return block.call unless defined?(::ActiveSupport) && ::ActiveSupport
|
346
|
+
|
347
|
+
::ActiveSupport::Notifications.instrument(
|
348
|
+
"rb_snowflake_client.snowflake_query.finish",
|
349
|
+
tags.merge(query_id: SecureRandom.uuid)) do
|
350
|
+
block.call
|
351
|
+
end
|
352
|
+
end
|
332
353
|
end
|
333
354
|
end
|
@@ -27,9 +27,19 @@ module RubySnowflake
|
|
27
27
|
if data[index].is_a? Concurrent::Future
|
28
28
|
data[index] = data[index].value # wait for it to finish
|
29
29
|
end
|
30
|
+
|
30
31
|
data[index].each do |row|
|
31
32
|
yield wrap_row(row)
|
32
33
|
end
|
34
|
+
|
35
|
+
# After iterating over the current partition, clear the data to release memory
|
36
|
+
data[index].clear
|
37
|
+
|
38
|
+
# Reassign to a symbol so:
|
39
|
+
# - When looking at the list of partitions in `data` it is easier to detect
|
40
|
+
# - Will raise an exception if `data.each` is attempted to be called again
|
41
|
+
# - It won't trigger prefetch detection as `next_index`
|
42
|
+
data[index] = :finished
|
33
43
|
end
|
34
44
|
end
|
35
45
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rb_snowflake_client
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rinsed
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-10-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bigdecimal
|