rb_snowflake_client 0.0.6 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/release-gh-packages.yml +1 -1
- data/.github/workflows/release-rubygems.yml +1 -1
- data/Gemfile +1 -0
- data/Gemfile.lock +3 -1
- data/README.md +40 -10
- data/lib/ruby_snowflake/client/threaded_in_memory_strategy.rb +12 -7
- data/lib/ruby_snowflake/client.rb +96 -26
- data/lib/ruby_snowflake/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f2dfd76d405095487b91fea79a1ab4757cdf80a3058e87d196ae112dcc93b573
|
4
|
+
data.tar.gz: f73bc76ce00aede0edc9a85d1afd62ce3d73a5abbff9c1037db6a271d7d38137
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 25f52a2906278092292dade37dc1c46384d2e02bb1770fb7f99b09458e3cfcdb23aaa380e62f98e35463af76b67f9eb122adf046058522c486af10c6cfde0ef2
|
7
|
+
data.tar.gz: '079f280217f4d1d2554f6f895d3c061b93780e812bdf5bbda8e846a4bca1f10666e8c862d6ca00d741cf2cf28565f4994ebe55b8e4e3cc5555ffbbd1ebc217a0'
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
rb_snowflake_client (0.0
|
4
|
+
rb_snowflake_client (0.1.0)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
@@ -19,6 +19,7 @@ GEM
|
|
19
19
|
coderay (~> 1.1)
|
20
20
|
method_source (~> 1.0)
|
21
21
|
rake (13.1.0)
|
22
|
+
retryable (3.0.5)
|
22
23
|
rspec (3.12.0)
|
23
24
|
rspec-core (~> 3.12.0)
|
24
25
|
rspec-expectations (~> 3.12.0)
|
@@ -49,6 +50,7 @@ DEPENDENCIES
|
|
49
50
|
pry
|
50
51
|
rake
|
51
52
|
rb_snowflake_client!
|
53
|
+
retryable
|
52
54
|
rspec
|
53
55
|
|
54
56
|
BUNDLED WITH
|
data/README.md
CHANGED
@@ -32,19 +32,28 @@ client = RubySnowflake::Client.new(
|
|
32
32
|
"snowflake-account", # typically your subdomain
|
33
33
|
"snowflake-user", # Your snowflake user
|
34
34
|
"some_warehouse", # The name of your warehouse to use by default
|
35
|
+
max_connections: 12, # Config options can be passed in
|
36
|
+
connection_timeout: 45, # See below for the full set of options
|
35
37
|
)
|
36
38
|
|
37
|
-
# alternatively you can use the
|
38
|
-
|
39
|
-
# SNOWFLAKE_PRIVATE_KEY_PATH
|
40
|
-
# or
|
41
|
-
# SNOWFLAKE_PRIVATE_KEY
|
42
|
-
# SNOWFLAKE_ORGANIZATION
|
43
|
-
# SNOWFLAKE_ACCOUNT
|
44
|
-
# SNOWFLAKE_USER
|
45
|
-
# SNOWFLAKE_DEFAULT_WAREHOUSE
|
46
|
-
RubySnowflake::Client.connect
|
39
|
+
# alternatively you can use the `from_env` method, which will pull these values from the following environment variables. You can either provide the path to the PEM file, or it's contents in an ENV variable.
|
40
|
+
RubySnowflake::Client.from_env
|
47
41
|
```
|
42
|
+
Available ENV variables (see below in the config section for details)
|
43
|
+
`SNOWFLAKE_URI`
|
44
|
+
`SNOWFLAKE_PRIVATE_KEY_PATH`
|
45
|
+
or (use either the key, or the path, key takes precedence if both are provided)
|
46
|
+
`SNOWFLAKE_PRIVATE_KEY`
|
47
|
+
`SNOWFLAKE_ORGANIZATION`
|
48
|
+
`SNOWFLAKE_ACCOUNT`
|
49
|
+
`SNOWFLAKE_USER`
|
50
|
+
`SNOWFLAKE_DEFAULT_WAREHOUSE`
|
51
|
+
`SNOWFLAKE_JWT_TOKEN_TTL`
|
52
|
+
`SNOWFLAKE_CONNECTION_TIMEOUT`
|
53
|
+
`SNOWFLAKE_MAX_CONNECTIONS`
|
54
|
+
`SNOWFLAKE_MAX_THREADS_PER_QUERY`
|
55
|
+
`SNOWFLAKE_THREAD_SCALE_FACTOR`
|
56
|
+
`SNOWFLAKE_HTTP_RETRIES`
|
48
57
|
|
49
58
|
Once you have a client, make queries
|
50
59
|
```ruby
|
@@ -68,6 +77,27 @@ result.each do |row|
|
|
68
77
|
end
|
69
78
|
```
|
70
79
|
|
80
|
+
# Configuration Options
|
81
|
+
|
82
|
+
The client supports the following configuration options, each with their own getter/setter except connection pool options which must be set at construction. Additionally, all except logger can be configured with environment variables (see above, but the pattern is like: "SNOWFLAKE_HTTP_RETRIES".
|
83
|
+
|
84
|
+
`logger` - takes any ruby logger (by default it's a std lib Logger.new(STDOUT), set at DEBUG level. Not available as an ENV variable config option
|
85
|
+
`log_level` - takes a log level, type is dependent on logger, for the default ruby Logger, use a level like `Logger::WARN`. Not available as an ENV variable config option.
|
86
|
+
`jwt_token_ttl` - The time to live set on JWT token in seconds, defaults to 3540 (59 minutes, the longest Snowflake supports is 60)
|
87
|
+
`connection_timeout` - The amount of time in seconds that the client's connection pool will wait before erroring in handing out a valid connection, defaults to 60 seconds
|
88
|
+
`max_connections` - The maximum number of http connections to hold open in the connection pool. If you use the client in a threaded context, you may need to increase this to be threads * client.max_threads_per_query, defaults to 16. Can only be set on initialization.
|
89
|
+
`max_threads_per_query` - The maximum number of threads the client should use to retreive data, per query, defaults to 8. If you want the client to act in a single threaded way, set this to 1
|
90
|
+
`thread_scale_factor` - When downloading a result set into memory, thread count is calculated by dividing a query's partition count by this number. For details on implementation see the code in `client.rb`.
|
91
|
+
`http_retries` - By default the client will retry common typically transient errors (http responses) twice, you can change the number of retries with this.
|
92
|
+
|
93
|
+
Example configuration:
|
94
|
+
```ruby
|
95
|
+
client = RubySnowflake::Client.from_env
|
96
|
+
client.logger = Rails.logger
|
97
|
+
client.max_connections = 24
|
98
|
+
client.http_retries = 1
|
99
|
+
end
|
100
|
+
|
71
101
|
# Gotchas
|
72
102
|
|
73
103
|
1. Does not yet support multiple statements (work around is to wrap in `BEGIN ... END`)
|
@@ -18,13 +18,18 @@ module RubySnowflake
|
|
18
18
|
end
|
19
19
|
futures.each do |future|
|
20
20
|
if future.rejected?
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
21
|
+
if future.reason.is_a? RubySnowflake::Error
|
22
|
+
raise future.reason
|
23
|
+
else
|
24
|
+
raise ConnectionStarvedError.new(
|
25
|
+
"A partition request timed out. This is usually do to using the client in" \
|
26
|
+
"multiple threads. The client uses a connection thread pool and if too many" \
|
27
|
+
"requests are all done in threads at the same time, threads can get starved" \
|
28
|
+
"of access to connections. The solution for this is to either increase the " \
|
29
|
+
"max_connections parameter on the client or create a new client instance" \
|
30
|
+
"with it's own connection pool to snowflake per thread. Rejection reason: #{future.reason.message}"
|
31
|
+
)
|
32
|
+
end
|
28
33
|
end
|
29
34
|
index, partition_data = future.value
|
30
35
|
result[index] = partition_data
|
@@ -6,9 +6,11 @@ require "concurrent"
|
|
6
6
|
require "connection_pool"
|
7
7
|
require "json"
|
8
8
|
require "jwt"
|
9
|
+
require "logger"
|
9
10
|
require "net/http"
|
10
11
|
require "oj"
|
11
12
|
require "openssl"
|
13
|
+
require "retryable"
|
12
14
|
require "securerandom"
|
13
15
|
require "uri"
|
14
16
|
|
@@ -33,17 +35,31 @@ module RubySnowflake
|
|
33
35
|
class BadResponseError < Error ; end
|
34
36
|
class ConnectionError < Error ; end
|
35
37
|
class ConnectionStarvedError < Error ; end
|
38
|
+
class RetryableBadResponseError < Error ; end
|
36
39
|
class RequestError < Error ; end
|
37
40
|
|
38
|
-
|
39
41
|
class Client
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
42
|
+
DEFAULT_LOGGER = Logger.new(STDOUT)
|
43
|
+
DEFAULT_LOG_LEVEL = Logger::INFO
|
44
|
+
# seconds (59 min), this is the max supported by snowflake - 1 minute
|
45
|
+
DEFAULT_JWT_TOKEN_TTL = 3540
|
46
|
+
# seconds, how long for a thread to wait for a connection before erroring
|
47
|
+
DEFAULT_CONNECTION_TIMEOUT = 60
|
48
|
+
# default maximum size of the http connection pool
|
49
|
+
DEFAULT_MAX_CONNECTIONS = 16
|
50
|
+
# default maximum size of the thread pool on a single query
|
51
|
+
DEFAULT_MAX_THREADS_PER_QUERY = 8
|
52
|
+
# partition count factor for number of threads
|
53
|
+
# (i.e. 2 == once we have 4 partitions, spin up a second thread)
|
54
|
+
DEFAULT_THREAD_SCALE_FACTOR = 4
|
55
|
+
# how many times to retry common retryable HTTP responses (i.e. 429, 504)
|
56
|
+
DEFAULT_HTTP_RETRIES = 2
|
57
|
+
|
58
|
+
# can't be set after initialization
|
59
|
+
attr_reader :connection_timeout, :max_connections
|
60
|
+
attr_accessor :logger, :jwt_token_ttl, :max_threads_per_query, :thread_scale_factor, :http_retries
|
61
|
+
|
62
|
+
def self.from_env
|
47
63
|
private_key = ENV["SNOWFLAKE_PRIVATE_KEY"] || File.read(ENV["SNOWFLAKE_PRIVATE_KEY_PATH"])
|
48
64
|
|
49
65
|
new(
|
@@ -53,10 +69,24 @@ module RubySnowflake
|
|
53
69
|
ENV["SNOWFLAKE_ACCOUNT"],
|
54
70
|
ENV["SNOWFLAKE_USER"],
|
55
71
|
ENV["SNOWFLAKE_DEFAULT_WAREHOUSE"],
|
72
|
+
jwt_token_ttl: env_option("SNOWFLAKE_JWT_TOKEN_TTL", DEFAULT_JWT_TOKEN_TTL),
|
73
|
+
connection_timeout: env_option("SNOWFLAKE_CONNECTION_TIMEOUT", DEFAULT_CONNECTION_TIMEOUT ),
|
74
|
+
max_connections: env_option("SNOWFLAKE_MAX_CONNECTIONS", DEFAULT_MAX_CONNECTIONS ),
|
75
|
+
max_threads_per_query: env_option("SNOWFLAKE_MAX_THREADS_PER_QUERY", DEFAULT_MAX_THREADS_PER_QUERY),
|
76
|
+
thread_scale_factor: env_option("SNOWFLAKE_THREAD_SCALE_FACTOR", DEFAULT_THREAD_SCALE_FACTOR),
|
77
|
+
http_retries: env_option("SNOWFLAKE_HTTP_RETRIES", DEFAULT_HTTP_RETRIES),
|
56
78
|
)
|
57
79
|
end
|
58
80
|
|
59
|
-
def initialize(uri, private_key, organization, account, user, default_warehouse
|
81
|
+
def initialize(uri, private_key, organization, account, user, default_warehouse,
|
82
|
+
logger: DEFAULT_LOGGER,
|
83
|
+
log_level: DEFAULT_LOG_LEVEL,
|
84
|
+
jwt_token_ttl: DEFAULT_JWT_TOKEN_TTL,
|
85
|
+
connection_timeout: DEFAULT_CONNECTION_TIMEOUT,
|
86
|
+
max_connections: DEFAULT_MAX_CONNECTIONS,
|
87
|
+
max_threads_per_query: DEFAULT_MAX_THREADS_PER_QUERY,
|
88
|
+
thread_scale_factor: DEFAULT_THREAD_SCALE_FACTOR,
|
89
|
+
http_retries: DEFAULT_HTTP_RETRIES)
|
60
90
|
@base_uri = uri
|
61
91
|
@private_key_pem = private_key
|
62
92
|
@organization = organization
|
@@ -65,6 +95,16 @@ module RubySnowflake
|
|
65
95
|
@default_warehouse = default_warehouse
|
66
96
|
@public_key_fingerprint = public_key_fingerprint(@private_key_pem)
|
67
97
|
|
98
|
+
# set defaults for config settings
|
99
|
+
@logger = logger
|
100
|
+
@logger.level = log_level
|
101
|
+
@jwt_token_ttl = jwt_token_ttl
|
102
|
+
@connection_timeout = connection_timeout
|
103
|
+
@max_connections = max_connections
|
104
|
+
@max_threads_per_query = max_threads_per_query
|
105
|
+
@thread_scale_factor = thread_scale_factor
|
106
|
+
@http_retries = http_retries
|
107
|
+
|
68
108
|
# start with an expired value to force creation
|
69
109
|
@token_expires_at = Time.now.to_i - 1
|
70
110
|
@token_semaphore = Concurrent::Semaphore.new(1)
|
@@ -84,13 +124,18 @@ module RubySnowflake
|
|
84
124
|
Oj.dump(request_body)
|
85
125
|
)
|
86
126
|
end
|
87
|
-
handle_errors(response)
|
88
127
|
retreive_result_set(response, streaming)
|
89
128
|
end
|
90
129
|
|
130
|
+
def self.env_option(env_var_name, default_value)
|
131
|
+
value = ENV[env_var_name]
|
132
|
+
value.nil? || value.empty? ? default_value : ENV[env_var_name].to_i
|
133
|
+
end
|
134
|
+
private_class_method :env_option
|
135
|
+
|
91
136
|
private
|
92
137
|
def connection_pool
|
93
|
-
@connection_pool ||= ConnectionPool.new(size:
|
138
|
+
@connection_pool ||= ConnectionPool.new(size: @max_connections, timeout: @connection_timeout) do
|
94
139
|
HttpConnectionWrapper.new(hostname, port).start
|
95
140
|
end
|
96
141
|
end
|
@@ -108,7 +153,7 @@ module RubySnowflake
|
|
108
153
|
|
109
154
|
@token_semaphore.acquire do
|
110
155
|
now = Time.now.to_i
|
111
|
-
@token_expires_at = now +
|
156
|
+
@token_expires_at = now + @jwt_token_ttl
|
112
157
|
|
113
158
|
private_key = OpenSSL::PKey.read(@private_key_pem)
|
114
159
|
|
@@ -127,13 +172,6 @@ module RubySnowflake
|
|
127
172
|
Time.now.to_i > @token_expires_at
|
128
173
|
end
|
129
174
|
|
130
|
-
def handle_errors(response)
|
131
|
-
if response.code != "200"
|
132
|
-
raise BadResponseError.new({}),
|
133
|
-
"Bad response! Got code: #{response.code}, w/ message #{response.body}"
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
175
|
def request_with_auth_and_headers(connection, request_class, path, body=nil)
|
138
176
|
uri = URI.parse("#{@base_uri}#{path}")
|
139
177
|
request = request_class.new(uri)
|
@@ -143,11 +181,43 @@ module RubySnowflake
|
|
143
181
|
request["X-Snowflake-Authorization-Token-Type"] = "KEYPAIR_JWT"
|
144
182
|
request.body = body unless body.nil?
|
145
183
|
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
184
|
+
Retryable.retryable(tries: @http_retries + 1,
|
185
|
+
on: RetryableBadResponseError,
|
186
|
+
log_method: retryable_log_method) do
|
187
|
+
response = nil
|
188
|
+
bm = Benchmark.measure { response = connection.request(request) }
|
189
|
+
logger.debug { "HTTP Request time: #{bm.real}" }
|
190
|
+
raise_on_bad_response(response)
|
191
|
+
response
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def raise_on_bad_response(response)
|
196
|
+
return if response.code == "200"
|
197
|
+
|
198
|
+
# there are a class of errors we want to retry rather than just giving up
|
199
|
+
if retryable_http_response_code?(response.code)
|
200
|
+
raise RetryableBadResponseError.new({}),
|
201
|
+
"Retryable bad response! Got code: #{response.code}, w/ message #{response.body}"
|
202
|
+
|
203
|
+
else # not one we should retry
|
204
|
+
raise BadResponseError.new({}),
|
205
|
+
"Bad response! Got code: #{response.code}, w/ message #{response.body}"
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
# shamelessly stolen from the battle tested python client
|
210
|
+
# https://github.com/snowflakedb/snowflake-connector-python/blob/eceed981f93e29d2f4663241253b48340389f4ef/src/snowflake/connector/network.py#L191
|
211
|
+
def retryable_http_response_code?(code)
|
212
|
+
# retry (in order): bad request, forbidden (token expired in flight), method not allowed,
|
213
|
+
# request timeout, too many requests, anything in the 500 range (504 is fairly common)
|
214
|
+
[400, 403, 405, 408, 429, 504].include?(code.to_i) || (500..599).include?(code)
|
215
|
+
end
|
216
|
+
|
217
|
+
def retryable_log_method
|
218
|
+
@retryable_log_method ||= proc do |retries, error|
|
219
|
+
logger.info("Retry attempt #{retries} because #{error.message}")
|
220
|
+
end
|
151
221
|
end
|
152
222
|
|
153
223
|
def retreive_result_set(response, streaming)
|
@@ -177,14 +247,14 @@ module RubySnowflake
|
|
177
247
|
|
178
248
|
partition_json = nil
|
179
249
|
bm = Benchmark.measure { partition_json = Oj.load(partition_response.body, oj_options) }
|
180
|
-
|
250
|
+
logger.debug { "JSON parsing took: #{bm.real}" }
|
181
251
|
partition_data = partition_json["data"]
|
182
252
|
|
183
253
|
partition_data
|
184
254
|
end
|
185
255
|
|
186
256
|
def number_of_threads_to_use(partition_count)
|
187
|
-
[[1, (partition_count /
|
257
|
+
[[1, (partition_count / @thread_scale_factor.to_f).ceil].max, @max_threads_per_query].min
|
188
258
|
end
|
189
259
|
|
190
260
|
def oj_options
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rb_snowflake_client
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rinsed
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-11-
|
11
|
+
date: 2023-11-29 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: 'Using the HTTP V2 Api for Snowflake runs queries & creates native Ruby
|
14
14
|
objects.
|