rb_snowflake_client 0.0.6 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/release-gh-packages.yml +1 -1
- data/.github/workflows/release-rubygems.yml +1 -1
- data/Gemfile +1 -0
- data/Gemfile.lock +3 -1
- data/README.md +40 -10
- data/lib/ruby_snowflake/client/threaded_in_memory_strategy.rb +12 -7
- data/lib/ruby_snowflake/client.rb +96 -26
- data/lib/ruby_snowflake/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f2dfd76d405095487b91fea79a1ab4757cdf80a3058e87d196ae112dcc93b573
|
4
|
+
data.tar.gz: f73bc76ce00aede0edc9a85d1afd62ce3d73a5abbff9c1037db6a271d7d38137
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 25f52a2906278092292dade37dc1c46384d2e02bb1770fb7f99b09458e3cfcdb23aaa380e62f98e35463af76b67f9eb122adf046058522c486af10c6cfde0ef2
|
7
|
+
data.tar.gz: '079f280217f4d1d2554f6f895d3c061b93780e812bdf5bbda8e846a4bca1f10666e8c862d6ca00d741cf2cf28565f4994ebe55b8e4e3cc5555ffbbd1ebc217a0'
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
rb_snowflake_client (0.0
|
4
|
+
rb_snowflake_client (0.1.0)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
@@ -19,6 +19,7 @@ GEM
|
|
19
19
|
coderay (~> 1.1)
|
20
20
|
method_source (~> 1.0)
|
21
21
|
rake (13.1.0)
|
22
|
+
retryable (3.0.5)
|
22
23
|
rspec (3.12.0)
|
23
24
|
rspec-core (~> 3.12.0)
|
24
25
|
rspec-expectations (~> 3.12.0)
|
@@ -49,6 +50,7 @@ DEPENDENCIES
|
|
49
50
|
pry
|
50
51
|
rake
|
51
52
|
rb_snowflake_client!
|
53
|
+
retryable
|
52
54
|
rspec
|
53
55
|
|
54
56
|
BUNDLED WITH
|
data/README.md
CHANGED
@@ -32,19 +32,28 @@ client = RubySnowflake::Client.new(
|
|
32
32
|
"snowflake-account", # typically your subdomain
|
33
33
|
"snowflake-user", # Your snowflake user
|
34
34
|
"some_warehouse", # The name of your warehouse to use by default
|
35
|
+
max_connections: 12, # Config options can be passed in
|
36
|
+
connection_timeout: 45, # See below for the full set of options
|
35
37
|
)
|
36
38
|
|
37
|
-
# alternatively you can use the
|
38
|
-
|
39
|
-
# SNOWFLAKE_PRIVATE_KEY_PATH
|
40
|
-
# or
|
41
|
-
# SNOWFLAKE_PRIVATE_KEY
|
42
|
-
# SNOWFLAKE_ORGANIZATION
|
43
|
-
# SNOWFLAKE_ACCOUNT
|
44
|
-
# SNOWFLAKE_USER
|
45
|
-
# SNOWFLAKE_DEFAULT_WAREHOUSE
|
46
|
-
RubySnowflake::Client.connect
|
39
|
+
# alternatively you can use the `from_env` method, which will pull these values from the following environment variables. You can either provide the path to the PEM file, or it's contents in an ENV variable.
|
40
|
+
RubySnowflake::Client.from_env
|
47
41
|
```
|
42
|
+
Available ENV variables (see below in the config section for details)
|
43
|
+
`SNOWFLAKE_URI`
|
44
|
+
`SNOWFLAKE_PRIVATE_KEY_PATH`
|
45
|
+
or (use either the key, or the path, key takes precedence if both are provided)
|
46
|
+
`SNOWFLAKE_PRIVATE_KEY`
|
47
|
+
`SNOWFLAKE_ORGANIZATION`
|
48
|
+
`SNOWFLAKE_ACCOUNT`
|
49
|
+
`SNOWFLAKE_USER`
|
50
|
+
`SNOWFLAKE_DEFAULT_WAREHOUSE`
|
51
|
+
`SNOWFLAKE_JWT_TOKEN_TTL`
|
52
|
+
`SNOWFLAKE_CONNECTION_TIMEOUT`
|
53
|
+
`SNOWFLAKE_MAX_CONNECTIONS`
|
54
|
+
`SNOWFLAKE_MAX_THREADS_PER_QUERY`
|
55
|
+
`SNOWFLAKE_THREAD_SCALE_FACTOR`
|
56
|
+
`SNOWFLAKE_HTTP_RETRIES`
|
48
57
|
|
49
58
|
Once you have a client, make queries
|
50
59
|
```ruby
|
@@ -68,6 +77,27 @@ result.each do |row|
|
|
68
77
|
end
|
69
78
|
```
|
70
79
|
|
80
|
+
# Configuration Options
|
81
|
+
|
82
|
+
The client supports the following configuration options, each with their own getter/setter except connection pool options which must be set at construction. Additionally, all except logger can be configured with environment variables (see above, but the pattern is like: "SNOWFLAKE_HTTP_RETRIES".
|
83
|
+
|
84
|
+
`logger` - takes any ruby logger (by default it's a std lib Logger.new(STDOUT), set at DEBUG level. Not available as an ENV variable config option
|
85
|
+
`log_level` - takes a log level, type is dependent on logger, for the default ruby Logger, use a level like `Logger::WARN`. Not available as an ENV variable config option.
|
86
|
+
`jwt_token_ttl` - The time to live set on JWT token in seconds, defaults to 3540 (59 minutes, the longest Snowflake supports is 60)
|
87
|
+
`connection_timeout` - The amount of time in seconds that the client's connection pool will wait before erroring in handing out a valid connection, defaults to 60 seconds
|
88
|
+
`max_connections` - The maximum number of http connections to hold open in the connection pool. If you use the client in a threaded context, you may need to increase this to be threads * client.max_threads_per_query, defaults to 16. Can only be set on initialization.
|
89
|
+
`max_threads_per_query` - The maximum number of threads the client should use to retreive data, per query, defaults to 8. If you want the client to act in a single threaded way, set this to 1
|
90
|
+
`thread_scale_factor` - When downloading a result set into memory, thread count is calculated by dividing a query's partition count by this number. For details on implementation see the code in `client.rb`.
|
91
|
+
`http_retries` - By default the client will retry common typically transient errors (http responses) twice, you can change the number of retries with this.
|
92
|
+
|
93
|
+
Example configuration:
|
94
|
+
```ruby
|
95
|
+
client = RubySnowflake::Client.from_env
|
96
|
+
client.logger = Rails.logger
|
97
|
+
client.max_connections = 24
|
98
|
+
client.http_retries = 1
|
99
|
+
end
|
100
|
+
|
71
101
|
# Gotchas
|
72
102
|
|
73
103
|
1. Does not yet support multiple statements (work around is to wrap in `BEGIN ... END`)
|
@@ -18,13 +18,18 @@ module RubySnowflake
|
|
18
18
|
end
|
19
19
|
futures.each do |future|
|
20
20
|
if future.rejected?
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
21
|
+
if future.reason.is_a? RubySnowflake::Error
|
22
|
+
raise future.reason
|
23
|
+
else
|
24
|
+
raise ConnectionStarvedError.new(
|
25
|
+
"A partition request timed out. This is usually do to using the client in" \
|
26
|
+
"multiple threads. The client uses a connection thread pool and if too many" \
|
27
|
+
"requests are all done in threads at the same time, threads can get starved" \
|
28
|
+
"of access to connections. The solution for this is to either increase the " \
|
29
|
+
"max_connections parameter on the client or create a new client instance" \
|
30
|
+
"with it's own connection pool to snowflake per thread. Rejection reason: #{future.reason.message}"
|
31
|
+
)
|
32
|
+
end
|
28
33
|
end
|
29
34
|
index, partition_data = future.value
|
30
35
|
result[index] = partition_data
|
@@ -6,9 +6,11 @@ require "concurrent"
|
|
6
6
|
require "connection_pool"
|
7
7
|
require "json"
|
8
8
|
require "jwt"
|
9
|
+
require "logger"
|
9
10
|
require "net/http"
|
10
11
|
require "oj"
|
11
12
|
require "openssl"
|
13
|
+
require "retryable"
|
12
14
|
require "securerandom"
|
13
15
|
require "uri"
|
14
16
|
|
@@ -33,17 +35,31 @@ module RubySnowflake
|
|
33
35
|
class BadResponseError < Error ; end
|
34
36
|
class ConnectionError < Error ; end
|
35
37
|
class ConnectionStarvedError < Error ; end
|
38
|
+
class RetryableBadResponseError < Error ; end
|
36
39
|
class RequestError < Error ; end
|
37
40
|
|
38
|
-
|
39
41
|
class Client
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
42
|
+
DEFAULT_LOGGER = Logger.new(STDOUT)
|
43
|
+
DEFAULT_LOG_LEVEL = Logger::INFO
|
44
|
+
# seconds (59 min), this is the max supported by snowflake - 1 minute
|
45
|
+
DEFAULT_JWT_TOKEN_TTL = 3540
|
46
|
+
# seconds, how long for a thread to wait for a connection before erroring
|
47
|
+
DEFAULT_CONNECTION_TIMEOUT = 60
|
48
|
+
# default maximum size of the http connection pool
|
49
|
+
DEFAULT_MAX_CONNECTIONS = 16
|
50
|
+
# default maximum size of the thread pool on a single query
|
51
|
+
DEFAULT_MAX_THREADS_PER_QUERY = 8
|
52
|
+
# partition count factor for number of threads
|
53
|
+
# (i.e. 2 == once we have 4 partitions, spin up a second thread)
|
54
|
+
DEFAULT_THREAD_SCALE_FACTOR = 4
|
55
|
+
# how many times to retry common retryable HTTP responses (i.e. 429, 504)
|
56
|
+
DEFAULT_HTTP_RETRIES = 2
|
57
|
+
|
58
|
+
# can't be set after initialization
|
59
|
+
attr_reader :connection_timeout, :max_connections
|
60
|
+
attr_accessor :logger, :jwt_token_ttl, :max_threads_per_query, :thread_scale_factor, :http_retries
|
61
|
+
|
62
|
+
def self.from_env
|
47
63
|
private_key = ENV["SNOWFLAKE_PRIVATE_KEY"] || File.read(ENV["SNOWFLAKE_PRIVATE_KEY_PATH"])
|
48
64
|
|
49
65
|
new(
|
@@ -53,10 +69,24 @@ module RubySnowflake
|
|
53
69
|
ENV["SNOWFLAKE_ACCOUNT"],
|
54
70
|
ENV["SNOWFLAKE_USER"],
|
55
71
|
ENV["SNOWFLAKE_DEFAULT_WAREHOUSE"],
|
72
|
+
jwt_token_ttl: env_option("SNOWFLAKE_JWT_TOKEN_TTL", DEFAULT_JWT_TOKEN_TTL),
|
73
|
+
connection_timeout: env_option("SNOWFLAKE_CONNECTION_TIMEOUT", DEFAULT_CONNECTION_TIMEOUT ),
|
74
|
+
max_connections: env_option("SNOWFLAKE_MAX_CONNECTIONS", DEFAULT_MAX_CONNECTIONS ),
|
75
|
+
max_threads_per_query: env_option("SNOWFLAKE_MAX_THREADS_PER_QUERY", DEFAULT_MAX_THREADS_PER_QUERY),
|
76
|
+
thread_scale_factor: env_option("SNOWFLAKE_THREAD_SCALE_FACTOR", DEFAULT_THREAD_SCALE_FACTOR),
|
77
|
+
http_retries: env_option("SNOWFLAKE_HTTP_RETRIES", DEFAULT_HTTP_RETRIES),
|
56
78
|
)
|
57
79
|
end
|
58
80
|
|
59
|
-
def initialize(uri, private_key, organization, account, user, default_warehouse
|
81
|
+
def initialize(uri, private_key, organization, account, user, default_warehouse,
|
82
|
+
logger: DEFAULT_LOGGER,
|
83
|
+
log_level: DEFAULT_LOG_LEVEL,
|
84
|
+
jwt_token_ttl: DEFAULT_JWT_TOKEN_TTL,
|
85
|
+
connection_timeout: DEFAULT_CONNECTION_TIMEOUT,
|
86
|
+
max_connections: DEFAULT_MAX_CONNECTIONS,
|
87
|
+
max_threads_per_query: DEFAULT_MAX_THREADS_PER_QUERY,
|
88
|
+
thread_scale_factor: DEFAULT_THREAD_SCALE_FACTOR,
|
89
|
+
http_retries: DEFAULT_HTTP_RETRIES)
|
60
90
|
@base_uri = uri
|
61
91
|
@private_key_pem = private_key
|
62
92
|
@organization = organization
|
@@ -65,6 +95,16 @@ module RubySnowflake
|
|
65
95
|
@default_warehouse = default_warehouse
|
66
96
|
@public_key_fingerprint = public_key_fingerprint(@private_key_pem)
|
67
97
|
|
98
|
+
# set defaults for config settings
|
99
|
+
@logger = logger
|
100
|
+
@logger.level = log_level
|
101
|
+
@jwt_token_ttl = jwt_token_ttl
|
102
|
+
@connection_timeout = connection_timeout
|
103
|
+
@max_connections = max_connections
|
104
|
+
@max_threads_per_query = max_threads_per_query
|
105
|
+
@thread_scale_factor = thread_scale_factor
|
106
|
+
@http_retries = http_retries
|
107
|
+
|
68
108
|
# start with an expired value to force creation
|
69
109
|
@token_expires_at = Time.now.to_i - 1
|
70
110
|
@token_semaphore = Concurrent::Semaphore.new(1)
|
@@ -84,13 +124,18 @@ module RubySnowflake
|
|
84
124
|
Oj.dump(request_body)
|
85
125
|
)
|
86
126
|
end
|
87
|
-
handle_errors(response)
|
88
127
|
retreive_result_set(response, streaming)
|
89
128
|
end
|
90
129
|
|
130
|
+
def self.env_option(env_var_name, default_value)
|
131
|
+
value = ENV[env_var_name]
|
132
|
+
value.nil? || value.empty? ? default_value : ENV[env_var_name].to_i
|
133
|
+
end
|
134
|
+
private_class_method :env_option
|
135
|
+
|
91
136
|
private
|
92
137
|
def connection_pool
|
93
|
-
@connection_pool ||= ConnectionPool.new(size:
|
138
|
+
@connection_pool ||= ConnectionPool.new(size: @max_connections, timeout: @connection_timeout) do
|
94
139
|
HttpConnectionWrapper.new(hostname, port).start
|
95
140
|
end
|
96
141
|
end
|
@@ -108,7 +153,7 @@ module RubySnowflake
|
|
108
153
|
|
109
154
|
@token_semaphore.acquire do
|
110
155
|
now = Time.now.to_i
|
111
|
-
@token_expires_at = now +
|
156
|
+
@token_expires_at = now + @jwt_token_ttl
|
112
157
|
|
113
158
|
private_key = OpenSSL::PKey.read(@private_key_pem)
|
114
159
|
|
@@ -127,13 +172,6 @@ module RubySnowflake
|
|
127
172
|
Time.now.to_i > @token_expires_at
|
128
173
|
end
|
129
174
|
|
130
|
-
def handle_errors(response)
|
131
|
-
if response.code != "200"
|
132
|
-
raise BadResponseError.new({}),
|
133
|
-
"Bad response! Got code: #{response.code}, w/ message #{response.body}"
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
175
|
def request_with_auth_and_headers(connection, request_class, path, body=nil)
|
138
176
|
uri = URI.parse("#{@base_uri}#{path}")
|
139
177
|
request = request_class.new(uri)
|
@@ -143,11 +181,43 @@ module RubySnowflake
|
|
143
181
|
request["X-Snowflake-Authorization-Token-Type"] = "KEYPAIR_JWT"
|
144
182
|
request.body = body unless body.nil?
|
145
183
|
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
184
|
+
Retryable.retryable(tries: @http_retries + 1,
|
185
|
+
on: RetryableBadResponseError,
|
186
|
+
log_method: retryable_log_method) do
|
187
|
+
response = nil
|
188
|
+
bm = Benchmark.measure { response = connection.request(request) }
|
189
|
+
logger.debug { "HTTP Request time: #{bm.real}" }
|
190
|
+
raise_on_bad_response(response)
|
191
|
+
response
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def raise_on_bad_response(response)
|
196
|
+
return if response.code == "200"
|
197
|
+
|
198
|
+
# there are a class of errors we want to retry rather than just giving up
|
199
|
+
if retryable_http_response_code?(response.code)
|
200
|
+
raise RetryableBadResponseError.new({}),
|
201
|
+
"Retryable bad response! Got code: #{response.code}, w/ message #{response.body}"
|
202
|
+
|
203
|
+
else # not one we should retry
|
204
|
+
raise BadResponseError.new({}),
|
205
|
+
"Bad response! Got code: #{response.code}, w/ message #{response.body}"
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
# shamelessly stolen from the battle tested python client
|
210
|
+
# https://github.com/snowflakedb/snowflake-connector-python/blob/eceed981f93e29d2f4663241253b48340389f4ef/src/snowflake/connector/network.py#L191
|
211
|
+
def retryable_http_response_code?(code)
|
212
|
+
# retry (in order): bad request, forbidden (token expired in flight), method not allowed,
|
213
|
+
# request timeout, too many requests, anything in the 500 range (504 is fairly common)
|
214
|
+
[400, 403, 405, 408, 429, 504].include?(code.to_i) || (500..599).include?(code)
|
215
|
+
end
|
216
|
+
|
217
|
+
def retryable_log_method
|
218
|
+
@retryable_log_method ||= proc do |retries, error|
|
219
|
+
logger.info("Retry attempt #{retries} because #{error.message}")
|
220
|
+
end
|
151
221
|
end
|
152
222
|
|
153
223
|
def retreive_result_set(response, streaming)
|
@@ -177,14 +247,14 @@ module RubySnowflake
|
|
177
247
|
|
178
248
|
partition_json = nil
|
179
249
|
bm = Benchmark.measure { partition_json = Oj.load(partition_response.body, oj_options) }
|
180
|
-
|
250
|
+
logger.debug { "JSON parsing took: #{bm.real}" }
|
181
251
|
partition_data = partition_json["data"]
|
182
252
|
|
183
253
|
partition_data
|
184
254
|
end
|
185
255
|
|
186
256
|
def number_of_threads_to_use(partition_count)
|
187
|
-
[[1, (partition_count /
|
257
|
+
[[1, (partition_count / @thread_scale_factor.to_f).ceil].max, @max_threads_per_query].min
|
188
258
|
end
|
189
259
|
|
190
260
|
def oj_options
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rb_snowflake_client
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rinsed
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-11-
|
11
|
+
date: 2023-11-29 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: 'Using the HTTP V2 Api for Snowflake runs queries & creates native Ruby
|
14
14
|
objects.
|