rb_snowflake_client 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9c1581fededb6e083bc1f55249d806fca4a63462fafee98f9277986729dd1107
4
- data.tar.gz: e810744812f56a65d519d8a30ce96e97d2f1341479ee7aeb2a50766c0f541e90
3
+ metadata.gz: ddf76c34f14ea4b7192e5bd3dbdb18b7d176e66c6327fab97ef8d4352138d964
4
+ data.tar.gz: 20223663cc72100e28b22a4ea572611d2b2bc8291519d1cde9f2b536c02865f1
5
5
  SHA512:
6
- metadata.gz: 7cc78b78f3a5a3d056c826107aab889d23705ea76249800a7c098e43e8515db4fc9f85e11a2fae86cced570b1c5aaa869dded60495467bbb2b52241edd466c2e
7
- data.tar.gz: 6b57c2a14a03fe96711d86554eb13b86bd5865a1aafa629a30ea02b2c27fc72f743f8942fc0d4d35b590d824cbd110c2ad3efbe6ffc235037c7039213644e97c
6
+ metadata.gz: 266de51be70f28c748b703fc55787b0a495404a62226fba151c44dfe00ce31465c3aae5e516ba362cdfa8a22a0d597514d86fd6644f263a046cb275b0a3e3f46
7
+ data.tar.gz: b0e90b538d9b6557d7de1b58f085570097036df3a9d1ea6be500babb6b56fafb50f21a4d905f1c50e8a274df7ffdd58b6268d65f9dc2220c7e9f70680662479c
data/CHANGELOG.md CHANGED
@@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## Unreleased
9
+
10
+ ## [1.5.0] - 2025-10-14
11
+ ### Added
12
+ - Instrumentation feature added for Active Support users
13
+ - Added `query_timeout` as a per-query parameter, allowing timeout override on individual queries
14
+ ### Fixed
15
+ - `query_timeout` now properly sends timeout parameter to Snowflake API for server-side enforcement
16
+ - Streaming mode now releases consumed records, fixing memory leak. Note: if you were iterating over streaming results more than once, this is a breaking change (though that was not its intended usage).
17
+
8
18
  ## [1.4.0] - 2025-05-01
9
19
  ### Added
10
20
  - Enhanced Row API to implement Enumerable interface
data/Gemfile CHANGED
@@ -8,6 +8,10 @@ gemspec
8
8
  gem "bundler"
9
9
  gem "rake"
10
10
 
11
+ group :development, :test do
12
+ gem "activesupport"
13
+ end
14
+
11
15
  group :development do
12
16
  gem "parallel"
13
17
  gem "pry"
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- rb_snowflake_client (1.4.0)
4
+ rb_snowflake_client (1.5.0)
5
5
  bigdecimal (>= 3.0)
6
6
  concurrent-ruby (>= 1.2)
7
7
  connection_pool (>= 2.4)
@@ -13,42 +13,66 @@ PATH
13
13
  GEM
14
14
  remote: https://rubygems.org/
15
15
  specs:
16
- base64 (0.2.0)
17
- bigdecimal (3.1.9)
16
+ activesupport (8.0.3)
17
+ base64
18
+ benchmark (>= 0.3)
19
+ bigdecimal
20
+ concurrent-ruby (~> 1.0, >= 1.3.1)
21
+ connection_pool (>= 2.2.5)
22
+ drb
23
+ i18n (>= 1.6, < 2)
24
+ logger (>= 1.4.2)
25
+ minitest (>= 5.1)
26
+ securerandom (>= 0.3)
27
+ tzinfo (~> 2.0, >= 2.0.5)
28
+ uri (>= 0.13.1)
29
+ base64 (0.3.0)
30
+ benchmark (0.4.1)
31
+ bigdecimal (3.3.1)
18
32
  coderay (1.1.3)
19
33
  concurrent-ruby (1.3.5)
20
- connection_pool (2.5.3)
21
- diff-lcs (1.6.1)
34
+ connection_pool (2.5.4)
35
+ diff-lcs (1.6.2)
22
36
  dotenv (3.1.8)
23
- json (2.11.3)
24
- jwt (2.10.1)
37
+ drb (2.2.3)
38
+ i18n (1.14.7)
39
+ concurrent-ruby (~> 1.0)
40
+ json (2.15.1)
41
+ jwt (3.1.2)
25
42
  base64
43
+ logger (1.7.0)
26
44
  method_source (1.1.0)
45
+ minitest (5.26.0)
27
46
  parallel (1.27.0)
28
47
  pry (0.15.2)
29
48
  coderay (~> 1.1)
30
49
  method_source (~> 1.0)
31
- rake (13.2.1)
50
+ rake (13.3.0)
32
51
  retryable (3.0.5)
33
- rspec (3.13.0)
52
+ rspec (3.13.1)
34
53
  rspec-core (~> 3.13.0)
35
54
  rspec-expectations (~> 3.13.0)
36
55
  rspec-mocks (~> 3.13.0)
37
- rspec-core (3.13.3)
56
+ rspec-core (3.13.5)
38
57
  rspec-support (~> 3.13.0)
39
- rspec-expectations (3.13.3)
58
+ rspec-expectations (3.13.5)
40
59
  diff-lcs (>= 1.2.0, < 2.0)
41
60
  rspec-support (~> 3.13.0)
42
- rspec-mocks (3.13.2)
61
+ rspec-mocks (3.13.5)
43
62
  diff-lcs (>= 1.2.0, < 2.0)
44
63
  rspec-support (~> 3.13.0)
45
- rspec-support (3.13.2)
64
+ rspec-support (3.13.6)
65
+ securerandom (0.4.1)
66
+ tzinfo (2.0.6)
67
+ concurrent-ruby (~> 1.0)
68
+ uri (1.0.4)
46
69
 
47
70
  PLATFORMS
48
71
  arm64-darwin-22
49
72
  ruby
50
73
 
51
74
  DEPENDENCIES
75
+ activesupport
52
76
  bundler
53
77
  parallel
54
78
  pry
data/README.md CHANGED
@@ -132,6 +132,14 @@ Queries by default use the primary role assigned to the account. If there are mu
132
132
  client.query("SELECT * FROM BIGTABLE", role: "MY_ROLE")
133
133
  ```
134
134
 
135
+ ## Query timeout
136
+
137
+ You can override the query timeout on a per-query basis. The timeout is specified in seconds and will be enforced by both Snowflake server-side and the client-side polling mechanism.
138
+
139
+ ```ruby
140
+ client.query("SELECT * FROM BIGTABLE", query_timeout: 30)
141
+ ```
142
+
135
143
  ## Binding parameters
136
144
 
137
145
  Say we have `BIGTABLE` with a `data` column of a type `VARIANT`.
@@ -140,16 +148,45 @@ Say we have `BIGTABLE` with a `data` column of a type `VARIANT`.
140
148
  json_string = '{"valid": "json"}'
141
149
  query = "insert into BIGTABLE(data) select parse_json(?)"
142
150
  bindings = {
143
- "1": {
144
- "type": "TEXT",
145
- "value": json_string
146
- }
147
- }
151
+ "1" => {
152
+ "type" => "TEXT",
153
+ "value" => "Other Event"
154
+ }
155
+ }
148
156
  client.query(query, bindings: bindings)
149
157
  ```
150
158
 
151
159
  For additional information about binding parameters refer to snowflake documentation: https://docs.snowflake.com/en/developer-guide/sql-api/submitting-requests#using-bind-variables-in-a-statement
152
160
 
161
+ ## Instrumentation
162
+
163
+ If ActiveSupport is available, this library additionally emits [notification events](https://api.rubyonrails.org/classes/ActiveSupport/Notifications.html) around queries. You can subscribe to those to track timing, query counts, etc.
164
+
165
+ * `rb_snowflake_client.snowflake_query.finish`: published at query end
166
+
167
+ Events receive a payload with the following properties:
168
+ * `database`: snowflake database
169
+ * `schema`: snowflake schema
170
+ * `warehouse`: snowflake warehouse
171
+ * `query_id`: random UUID for the query
172
+ * `query_name`: argument passed to query/fetch
173
+ * `exception`: present if the query raised an error, see [Notifications documentation](https://api.rubyonrails.org/classes/ActiveSupport/Notifications.html#module-ActiveSupport::Notifications-label-Subscribers) for details
174
+ * `exception_object`: present if the query raised an error, see [Notifications documentation](https://api.rubyonrails.org/classes/ActiveSupport/Notifications.html#module-ActiveSupport::Notifications-label-Subscribers) for details
175
+
176
+ An example integration with [Datadog](https://www.rubydoc.info/gems/datadog) might look like this:
177
+
178
+ ```ruby
179
+ ActiveSupport::Notifications.subscribe("rb_snowflake_client.snowflake_query.finish") do |name, start, finish, id, payload|
180
+ span = Datadog::Tracing.trace(payload[:query_name] || "snowflake_query",
181
+ resource: "snowflake",
182
+ start_time: start,
183
+ tags: payload,
184
+ type: Datadog::Tracing::Metadata::Ext::AppTypes::TYPE_DB)
185
+
186
+ span.finish(finish)
187
+ end
188
+ ```
189
+
153
190
  # Configuration Options
154
191
 
155
192
  The client supports the following configuration options, each with their own getter/setter except connection pool options which must be set at construction. Additionally, all except logger can be configured with environment variables (see above, but the pattern is like: "SNOWFLAKE_HTTP_RETRIES". Configuration options can only be set on initialization through `new` or `from_env`.
@@ -12,6 +12,13 @@ require "retryable"
12
12
  require "securerandom"
13
13
  require "uri"
14
14
 
15
+ begin
16
+ require "active_support"
17
+ require "active_support/notifications"
18
+ rescue LoadError
19
+ # This isn't required
20
+ end
21
+
15
22
  require_relative "client/http_connection_wrapper"
16
23
  require_relative "client/key_pair_jwt_auth_manager"
17
24
  require_relative "client/single_thread_in_memory_strategy"
@@ -143,31 +150,35 @@ module RubySnowflake
143
150
  @_enable_polling_queries = false
144
151
  end
145
152
 
146
- def query(query, warehouse: nil, streaming: false, database: nil, schema: nil, bindings: nil, role: nil)
153
+ def query(query, warehouse: nil, streaming: false, database: nil, schema: nil, bindings: nil, role: nil, query_name: nil, query_timeout: nil)
147
154
  warehouse ||= @default_warehouse
148
155
  database ||= @default_database
149
156
  role ||= @default_role
157
+ query_timeout ||= @query_timeout
150
158
 
151
- query_start_time = Time.now.to_i
152
- response = nil
153
- connection_pool.with do |connection|
154
- request_body = {
155
- "warehouse" => warehouse&.upcase,
156
- "schema" => schema&.upcase,
157
- "database" => database&.upcase,
158
- "statement" => query,
159
- "bindings" => bindings,
160
- "role" => role
161
- }
162
-
163
- response = request_with_auth_and_headers(
164
- connection,
165
- Net::HTTP::Post,
166
- "/api/v2/statements?requestId=#{SecureRandom.uuid}&async=#{@_enable_polling_queries}",
167
- request_body.to_json
168
- )
159
+ with_instrumentation({ database:, schema:, warehouse:, query_name: }) do
160
+ query_start_time = Time.now.to_i
161
+ response = nil
162
+ connection_pool.with do |connection|
163
+ request_body = {
164
+ "warehouse" => warehouse&.upcase,
165
+ "schema" => schema&.upcase,
166
+ "database" => database&.upcase,
167
+ "statement" => query,
168
+ "bindings" => bindings,
169
+ "role" => role,
170
+ "timeout" => query_timeout
171
+ }
172
+
173
+ response = request_with_auth_and_headers(
174
+ connection,
175
+ Net::HTTP::Post,
176
+ "/api/v2/statements?requestId=#{SecureRandom.uuid}&async=#{@_enable_polling_queries}",
177
+ request_body.to_json
178
+ )
179
+ end
180
+ retrieve_result_set(query_start_time, query, response, streaming, query_timeout)
169
181
  end
170
- retrieve_result_set(query_start_time, query, response, streaming)
171
182
  end
172
183
 
173
184
  alias fetch query
@@ -251,7 +262,7 @@ module RubySnowflake
251
262
  end
252
263
  end
253
264
 
254
- def poll_for_completion_or_timeout(query_start_time, query, statement_handle)
265
+ def poll_for_completion_or_timeout(query_start_time, query, statement_handle, query_timeout)
255
266
  first_data_json_body = nil
256
267
 
257
268
  connection_pool.with do |connection|
@@ -259,7 +270,7 @@ module RubySnowflake
259
270
  sleep POLLING_INTERVAL
260
271
 
261
272
  elapsed_time = Time.now.to_i - query_start_time
262
- if elapsed_time > @query_timeout
273
+ if elapsed_time > query_timeout
263
274
  cancelled = attempt_to_cancel_and_silence_errors(connection, statement_handle)
264
275
  raise QueryTimeoutError.new("Query timed out. Query cancelled? #{cancelled}; Duration: #{elapsed_time}; Query: '#{query}'")
265
276
  end
@@ -287,12 +298,12 @@ module RubySnowflake
287
298
  false
288
299
  end
289
300
 
290
- def retrieve_result_set(query_start_time, query, response, streaming)
301
+ def retrieve_result_set(query_start_time, query, response, streaming, query_timeout)
291
302
  json_body = JSON.parse(response.body, JSON_PARSE_OPTIONS)
292
303
  statement_handle = json_body["statementHandle"]
293
304
 
294
305
  if response.code == POLLING_RESPONSE_CODE
295
- result_response = poll_for_completion_or_timeout(query_start_time, query, statement_handle)
306
+ result_response = poll_for_completion_or_timeout(query_start_time, query, statement_handle, query_timeout)
296
307
  json_body = JSON.parse(result_response.body, JSON_PARSE_OPTIONS)
297
308
  end
298
309
 
@@ -329,5 +340,15 @@ module RubySnowflake
329
340
  def number_of_threads_to_use(partition_count)
330
341
  [[1, (partition_count / @thread_scale_factor.to_f).ceil].max, @max_threads_per_query].min
331
342
  end
343
+
344
+ def with_instrumentation(tags, &block)
345
+ return block.call unless defined?(::ActiveSupport) && ::ActiveSupport
346
+
347
+ ::ActiveSupport::Notifications.instrument(
348
+ "rb_snowflake_client.snowflake_query.finish",
349
+ tags.merge(query_id: SecureRandom.uuid)) do
350
+ block.call
351
+ end
352
+ end
332
353
  end
333
354
  end
@@ -27,9 +27,19 @@ module RubySnowflake
27
27
  if data[index].is_a? Concurrent::Future
28
28
  data[index] = data[index].value # wait for it to finish
29
29
  end
30
+
30
31
  data[index].each do |row|
31
32
  yield wrap_row(row)
32
33
  end
34
+
35
+ # After iterating over the current partition, clear the data to release memory
36
+ data[index].clear
37
+
38
+ # Reassign to a symbol so:
39
+ # - When looking at the list of partitions in `data` it is easier to detect
40
+ # - Will raise an exception if `data.each` is attempted to be called again
41
+ # - It won't trigger prefetch detection as `next_index`
42
+ data[index] = :finished
33
43
  end
34
44
  end
35
45
 
@@ -1,3 +1,3 @@
1
1
  module RubySnowflake
2
- VERSION = "1.4.0"
2
+ VERSION = "1.5.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rb_snowflake_client
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rinsed
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-05-01 00:00:00.000000000 Z
11
+ date: 2025-10-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bigdecimal