rb_snowflake_client 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 70963fe960bc2ab443a82480ede2636c6c76f2704f1b72f0ddf67c166b3539e5
4
+ data.tar.gz: 4475b0653841836532b6934ea30630e4f3f89f7923ec4031f99d78d08c43e8ca
5
+ SHA512:
6
+ metadata.gz: ba3ed329ca17e395d89e8d10a5e20d00e5b0d00ecf8f6689ad064e8385042070d9821c2944c7a0b6321baa2492c5602e49a66743294c37372ac306e9ffcee238
7
+ data.tar.gz: 211534933b4a6767c7866b1b926cc54cf309877ac2199d535e6b31b42ccbfcb938533e3a9692e7f4e06cf8aa723a238aa0a763231324d7ec361d0679740b421b
@@ -0,0 +1,37 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: "*"
6
+ pull_request:
7
+ branches: "*"
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v3
14
+ - name: Set up Ruby
15
+ uses: ruby/setup-ruby@904f3fef85a9c80a3750cbe7d5159268fd5caa9f
16
+ with:
17
+ ruby-version: '3.0.6'
18
+ - name: Install dependencies
19
+ run: bundle install
20
+ - name: Build gem
21
+ run: rake build
22
+ # Enable this section to allow debugging via SSH
23
+ #- name: Setup upterm session
24
+ #uses: lhotari/action-upterm@v1
25
+ #with:
26
+ ### limits ssh access and adds the ssh public key for the user which triggered the workflow
27
+ #limit-access-to-actor: true
28
+ - name: Install gem
29
+ run: cd pkg && gem install --local *.gem
30
+ - name: Run tests
31
+ run: ruby -rrb_snowflake_client spec/test.rb
32
+ env: # Or as an environment variable
33
+ SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }}
34
+ SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }}
35
+ SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_PASSWORD }}
36
+ SNOWFLAKE_WAREHOUSE: ${{ secrets.SNOWFLAKE_WAREHOUSE }}
37
+ SNOWFLAKE_PRIVATE_KEY: ${{ secrets.SNOWFLAKE_CLIENT_TEST_PRIVATE_KEY }}
@@ -0,0 +1,26 @@
1
+ name: Release
2
+ on:
3
+ push:
4
+ branches:
5
+ - "master"
6
+ jobs:
7
+ release:
8
+ runs-on: ubuntu-latest
9
+ permissions:
10
+ packages: write
11
+ contents: read
12
+ steps:
13
+ - uses: actions/checkout@v3
14
+ - name: Set up Ruby
15
+ uses: ruby/setup-ruby@904f3fef85a9c80a3750cbe7d5159268fd5caa9f
16
+ with:
17
+ ruby-version: '3.0.6'
18
+ - name: Install dependencies
19
+ run: bundle install
20
+ - name: Build gem
21
+ run: rake build
22
+ - name: Build and publish to GitHub Package
23
+ uses: actionshub/publish-gem-to-github@v1.0.6
24
+ with:
25
+ token: ${{ secrets.GITHUB_TOKEN }}
26
+ owner: rinsed-org
@@ -0,0 +1,26 @@
1
+ name: Release
2
+ on:
3
+ push:
4
+ branches:
5
+ - "master"
6
+ jobs:
7
+ release:
8
+ runs-on: ubuntu-latest
9
+ steps:
10
+ - uses: actions/checkout@v3
11
+ - name: Set up Ruby
12
+ uses: ruby/setup-ruby@904f3fef85a9c80a3750cbe7d5159268fd5caa9f
13
+ with:
14
+ ruby-version: '3.0.6'
15
+ - name: Install dependencies
16
+ run: bundle install
17
+ - name: Publish to RubyGems
18
+ run: |
19
+ mkdir -p $HOME/.gem
20
+ touch $HOME/.gem/credentials
21
+ chmod 0600 $HOME/.gem/credentials
22
+ printf -- "---\n:rubygems_api_key: ${GEM_HOST_API_KEY}\n" > $HOME/.gem/credentials
23
+ gem build *.gemspec
24
+ gem push *.gem
25
+ env:
26
+ GEM_HOST_API_KEY: "${{secrets.RUBYGEMS_AUTH_TOKEN}}"
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.project
2
+ /.rakeTasks
3
+ .idea/*
4
+
5
+ # ruby gems
6
+ *.gem
7
+ /.DS_Store
8
+ .env
9
+ .ruby-version
data/Gemfile ADDED
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ # Specify your gem's dependencies in ruby_snowflake_client.gemspec
6
+ gemspec
7
+
8
+ gem "bundler"
9
+ gem "concurrent-ruby"
10
+ gem "connection_pool"
11
+ gem "dotenv"
12
+ gem "jwt"
13
+ gem "oj"
14
+ gem "rake"
15
+
16
+ group :development do
17
+ gem "parallel"
18
+ gem "pry"
19
+ end
20
+
21
+ group :test do
22
+ gem "rspec"
23
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,55 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ rb_snowflake_client (0.0.6)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ coderay (1.1.3)
10
+ concurrent-ruby (1.2.2)
11
+ connection_pool (2.4.1)
12
+ diff-lcs (1.5.0)
13
+ dotenv (2.8.1)
14
+ jwt (2.7.1)
15
+ method_source (1.0.0)
16
+ oj (3.16.1)
17
+ parallel (1.23.0)
18
+ pry (0.14.2)
19
+ coderay (~> 1.1)
20
+ method_source (~> 1.0)
21
+ rake (13.1.0)
22
+ rspec (3.12.0)
23
+ rspec-core (~> 3.12.0)
24
+ rspec-expectations (~> 3.12.0)
25
+ rspec-mocks (~> 3.12.0)
26
+ rspec-core (3.12.2)
27
+ rspec-support (~> 3.12.0)
28
+ rspec-expectations (3.12.3)
29
+ diff-lcs (>= 1.2.0, < 2.0)
30
+ rspec-support (~> 3.12.0)
31
+ rspec-mocks (3.12.6)
32
+ diff-lcs (>= 1.2.0, < 2.0)
33
+ rspec-support (~> 3.12.0)
34
+ rspec-support (3.12.1)
35
+
36
+ PLATFORMS
37
+ arm64-darwin-21
38
+ arm64-darwin-22
39
+ x86_64-linux
40
+
41
+ DEPENDENCIES
42
+ bundler
43
+ concurrent-ruby
44
+ connection_pool
45
+ dotenv
46
+ jwt
47
+ oj
48
+ parallel
49
+ pry
50
+ rake
51
+ rb_snowflake_client!
52
+ rspec
53
+
54
+ BUNDLED WITH
55
+ 2.4.19
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Dotan Nahum
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,131 @@
1
+ # Ruby snowflake client using the v2 HTTP API
2
+
3
+ # Why this library?
4
+
5
+ The available options for connecting from Ruby to Snowflake include:
6
+ * ODBC - which works, but can be very slow, especially for a lot of data, which is probably why you're using Snowflake
7
+ * The [ruby snowflake client][https://github.com/rinsed-org/ruby-snowflake-client] that wraps the go client. This is probably the fastest single threaded option, which we also created. However, that library takes the ruby GVL and so stops all other processing in your ruby process (threads).
8
+
9
+ This library is implemented in ruby and while it leverages some libraries that have native extensions, doesn't currently include anything itself. Depending on network latency and the shape of the data this library can be faster or slower than the go wrapper. The big advantages are:
10
+ * It uses about half the memory when you pull a full result set into memory
11
+ * It does not hold onto the [ruby GVL][https://www.speedshop.co/2020/05/11/the-ruby-gvl-and-scaling.html] and so does not block other threads while waiting on IO like the go wrapper client.
12
+ * It will comsume more resources for the same data, because it's using the HTTP v2 API and getting JSON back, there is just more work to as compared to the go or python clients that use Apache Arrow under the covers.
13
+
14
+ # Usage
15
+
16
+ Add to your Gemfile or use `gem install rb-snowflake-client`
17
+ ```ruby
18
+ gem "rb-snowflake-client"
19
+ ```
20
+
21
+ Then require, create a client
22
+ ```ruby
23
+ require "rb_snowflake_client"
24
+
25
+
26
+ # uses env variables, you can also new one up
27
+ # see: https://github.com/rinsed-org/pure-ruby-snowflake-client/blob/master/lib/ruby_snowflake/client.rb#L43
28
+ client = RubySnowflake::Client.new(
29
+ "https://yourinstance.region.snowflakecomputing.com", # insert your URL here
30
+ File.read("secrets/my_key.pem"), # path to your private key
31
+ "snowflake-organization", # your account name (doesn't match your URL)
32
+ "snowflake-account", # typically your subdomain
33
+ "snowflake-user", # Your snowflake user
34
+ "some_warehouse", # The name of your warehouse to use by default
35
+ )
36
+
37
+ # alternatively you can use the connect method, which will pull these values from the following environment variables. You can either provide the path to the PEM file, or it's contents in an ENV variable.
38
+ # SNOWFLAKE_URI
39
+ # SNOWFLAKE_PRIVATE_KEY_PATH
40
+ # or
41
+ # SNOWFLAKE_PRIVATE_KEY
42
+ # SNOWFLAKE_ORGANIZATION
43
+ # SNOWFLAKE_ACCOUNT
44
+ # SNOWFLAKE_USER
45
+ # SNOWFLAKE_DEFAULT_WAREHOUSE
46
+ RubySnowflake::Client.connect
47
+ ```
48
+
49
+ Once you have a client, make queries
50
+ ```ruby
51
+ # will get all data in memory
52
+ result = client.query("SELECT ID, NAME FROM SOMETABLE")
53
+
54
+ # result is Enumerable
55
+ result.each do |row|
56
+ puts row[:id] # row supports access with symbols
57
+ puts row["name"] # or case insensitive strings
58
+ puts row.to_h # and can produce a hash with keys/values
59
+ end
60
+
61
+ # You can also stream results and not hold them all in memory.
62
+ # The client will prefetch the next data partition only. If you
63
+ # have some IO in your processing there should usually be data
64
+ # available for you.
65
+ result = client.query("SELECT * FROM HUGETABLE", streaming: true)
66
+ result.each do |row|
67
+ puts row
68
+ end
69
+ ```
70
+
71
+ # Gotchas
72
+
73
+ 1. Does not yet support multiple statements (work around is to wrap in `BEGIN ... END`)
74
+ 2. Only supports key pair authentication
75
+ 3. Its faster to work directly with the row value and not call to_h if you don't need to
76
+
77
+ # Setting up a user for key pair authentication
78
+
79
+ This library uses JWT to authenticate with the API which relies on key-pair authentication to connect to Snowflake.
80
+
81
+ 1. Generate a private/public key pair for your user. Your private key will now be in a file `private_key.pem`. Keep this safe! Don't check it in to source control.
82
+ ```bash
83
+ openssl genpkey -algorithm RSA -out private_key.pem -pkeyopt rsa_keygen_bits:2048
84
+ ```
85
+ 2. Generate a public key in the format that Snowflake likes (will produce `public_key.pem`)
86
+ ```bash
87
+ openssl rsa -pubout -in private_key.pem -out public_key.pem
88
+ ```
89
+ 3. Your public_key.pem file should look something like this
90
+ ```text
91
+ -----BEGIN PUBLIC KEY-----
92
+ MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAx8FaPusz9X9MCvv0h3N3
93
+ v1QaruyU1ivHs8jLjo6idzLSHJPGk7n3LSXerIw5/LkhfA27ibJj225/fKFnPy+X
94
+ gidbhE4BlvSdoVgdMH7WB1ZC3PpAwwqHeMisIzarwOwUu6mLyG9VY55ciKJY8CwA
95
+ 5xt19pgVsXg/lcOa72jDjK+ExdSAN6K2TqSKqq77yzeI5creslny5VuAGTbZy3Bt
96
+ Wk0zg1xz8+C4regIOlSoFrzn1e4wHqbFv2zFFvORC2LV3HXFRaHYClB7jWRN1bFj
97
+ om6gRpiTO8bsCSPKi0anxMN8qt1Lw2d/+cwezxCwI6xPLC7JhZYdx6u+hC0g3PVK
98
+ PQIDAQAB
99
+ -----END PUBLIC KEY-----
100
+ ```
101
+ Snowflake doesn't like it in that format, but openssl can remove the newlines and begining and ending for you:
102
+ ```bash
103
+ openssl rsa -pubin -in public_key.pem -outform DER | openssl base64 -A
104
+ ```
105
+ (if it spits out a % at the end, remove that).
106
+ ```text
107
+ MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArOL5WQYaXSnVhQXQZQHVIzrNt08A+bnGjBb6DWFVRao3dlPG+HOf9Nv0nGlk8m5AMvvETUnN3tihuRHOJ9MOUzDp58IYIr5xvOENSunbRVyJL7DuCGwZz8z1pEnlBjZPONzEX8dCKxCU0neJrksFgwdhfhIUs7GnbTuJjYP9EqXPlbsYNYTVVnFNZ9DHFur9PggPJpPHTfFDz8MEB3Xb3AWV3pE752ed/PtRcTODvgoQSpP80cTgsKjsG009NY2ulEtV3r7yNJgawxmcMTNLhFlSS7Wm2NSEIS0aNo+DgSZI72MnAOw2klUzvdBl0i43gI+aX0Y6y/y18VL1o9KMQwIDAQAB
108
+ ```
109
+ 4. Now, in the snowflake web console or through your favorite client, log in as a user with permissions to edit users. For your particular user (`EXAMPLE_USER` below) update the user with the modified public key from above:
110
+ ```sql
111
+ ALTER USER EXAMPLE_USER SET RSA_PUBLIC_KEY = 'MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArOL5WQYaXSnVhQXQZQHVIzrNt08A+bnGjBb6DWFVRao3dlPG+HOf9Nv0nGlk8m5AMvvETUnN3tihuRHOJ9MOUzDp58IYIr5xvOENSunbRVyJL7DuCGwZz8z1pEnlBjZPONzEX8dCKxCU0neJrksFgwdhfhIUs7GnbTuJjYP9EqXPlbsYNYTVVnFNZ9DHFur9PggPJpPHTfFDz8MEB3Xb3AWV3pE752ed/PtRcTODvgoQSpP80cTgsKjsG009NY2ulEtV3r7yNJgawxmcMTNLhFlSS7Wm2NSEIS0aNo+DgSZI72MnAOw2klUzvdBl0i43gI+aX0Y6y/y18VL1o9KMQwIDAQAB'
112
+ ```
113
+ 5. Verify your auth setup. If you have `snowsql` installed, that has an easy method (CTRL-d to exit)
114
+ ```bash
115
+ snowsql -a <account_identifier>.<region>p -u <user> --private-key-path private_key.pem
116
+ ```
117
+ or alternatively, use the client to verify:
118
+ ```ruby
119
+ client = RubySnowflake::Client.new(
120
+ "https://yourinstance.region.snowflakecomputing.com", # insert your URL here
121
+ File.read("secrets/my_key.pem"), # path to your private key
122
+ "snowflake-organization", # your account name (doesn't match your URL)
123
+ "snowflake-account", # typically your subdomain
124
+ "snowflake-user", # Your snowflake user
125
+ "some_warehouse", # The name of your warehouse to use by default
126
+ )
127
+ ```
128
+
129
+ # Links:
130
+ - snowflake API reference https://docs.snowflake.com/en/developer-guide/sql-api/reference
131
+ - snowflake authentication docs: https://docs.snowflake.com/en/developer-guide/sql-api/authenticating
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ruby_snowflake/client"
4
+
5
+ module RubySnowflake
6
+ end
@@ -0,0 +1,32 @@
1
+ module RubySnowflake
2
+ class Client
3
+ class HttpConnectionWrapper
4
+ def initialize(hostname, port)
5
+ @hostname = hostname
6
+ @port = port
7
+ end
8
+
9
+ def start
10
+ @connection = Net::HTTP.start(@hostname, @port, use_ssl: true)
11
+ self
12
+ rescue StandardError
13
+ raise ConnectionError.new "Error connecting to server."
14
+ end
15
+
16
+ def request(request)
17
+ # connections can timeout and close, re-open them
18
+ # which is what the connection pool expects
19
+ start unless connection.active?
20
+
21
+ begin
22
+ connection.request(request)
23
+ rescue StandardError => error
24
+ raise RequestError.new "HTTP error requesting data", cause: error
25
+ end
26
+ end
27
+
28
+ private
29
+ attr_accessor :connection
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubySnowflake
4
+ class Client
5
+ class SingleThreadInMemoryStrategy
6
+ def self.result(statement_json_body, retreive_proc)
7
+ partitions = statement_json_body["resultSetMetaData"]["partitionInfo"]
8
+ result = Result.new(partitions.size, statement_json_body["resultSetMetaData"]["rowType"])
9
+ result[0] = statement_json_body["data"]
10
+
11
+ partitions.each_with_index do |partition, index|
12
+ next if index == 0 # already have the first partition
13
+ result[index] = retreive_proc.call(index)
14
+ end
15
+
16
+ result
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubySnowflake
4
+ class Client
5
+ class StreamingResultStrategy
6
+ def self.result(statement_json_body, retreive_proc)
7
+ partitions = statement_json_body["resultSetMetaData"]["partitionInfo"]
8
+
9
+ result = StreamingResult.new(
10
+ partitions.size,
11
+ statement_json_body["resultSetMetaData"]["rowType"],
12
+ retreive_proc
13
+ )
14
+ result[0] = statement_json_body["data"]
15
+
16
+ result
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubySnowflake
4
+ class Client
5
+ class ThreadedInMemoryStrategy
6
+ def self.result(statement_json_body, retreive_proc, num_threads)
7
+ partitions = statement_json_body["resultSetMetaData"]["partitionInfo"]
8
+ result = Result.new(partitions.size, statement_json_body["resultSetMetaData"]["rowType"])
9
+ result[0] = statement_json_body["data"]
10
+
11
+ thread_pool = Concurrent::FixedThreadPool.new(num_threads)
12
+ futures = []
13
+ partitions.each_with_index do |partition, index|
14
+ next if index == 0 # already have the first partition
15
+ futures << Concurrent::Future.execute(executor: thread_pool) do
16
+ [index, retreive_proc.call(index)]
17
+ end
18
+ end
19
+ futures.each do |future|
20
+ if future.rejected?
21
+ raise ConnectionStarvedError.new(
22
+ "A partition request timed out. This is usually do to using the client in" \
23
+ "multiple threads. The client uses a connection thread pool and if too many" \
24
+ "requests are all done in threads at the same time, threads can get starved" \
25
+ "of access to connections. The solution for this is to spin up a new client" \
26
+ "instance with it's own connection pool to snowflake."
27
+ )
28
+ end
29
+ index, partition_data = future.value
30
+ result[index] = partition_data
31
+ end
32
+ result
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,202 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "base64"
4
+ require "benchmark"
5
+ require "concurrent"
6
+ require "connection_pool"
7
+ require "json"
8
+ require "jwt"
9
+ require "net/http"
10
+ require "oj"
11
+ require "openssl"
12
+ require "securerandom"
13
+ require "uri"
14
+
15
+
16
+ require_relative "result"
17
+ require_relative "streaming_result"
18
+ require_relative "client/http_connection_wrapper"
19
+ require_relative "client/single_thread_in_memory_strategy"
20
+ require_relative "client/streaming_result_strategy"
21
+ require_relative "client/threaded_in_memory_strategy"
22
+
23
+ module RubySnowflake
24
+ class Error < StandardError
25
+ # This will get pulled through to Sentry, see:
26
+ # https://github.com/getsentry/sentry-ruby/blob/11ecd254c0d2cae2b327f0348074e849095aa32d/sentry-ruby/lib/sentry/error_event.rb#L31-L33
27
+ attr_reader :sentry_context
28
+
29
+ def initialize(details)
30
+ @sentry_context = details
31
+ end
32
+ end
33
+ class BadResponseError < Error ; end
34
+ class ConnectionError < Error ; end
35
+ class ConnectionStarvedError < Error ; end
36
+ class RequestError < Error ; end
37
+
38
+
39
+ class Client
40
+ JWT_TOKEN_TTL = 3600 # seconds, this is the max supported by snowflake
41
+ CONNECTION_TIMEOUT = 60 # seconds, how long for a thread to wait for a connection b4 erroring
42
+ MAX_CONNECTIONS = 8
43
+ MAX_THREADS = 8
44
+ THREAD_SCALE_FACTOR = 4 # parition count factor for number of threads (i.e. 2 == once we have 4 partitions, spin up a second thread)
45
+
46
+ def self.connect
47
+ private_key = ENV["SNOWFLAKE_PRIVATE_KEY"] || File.read(ENV["SNOWFLAKE_PRIVATE_KEY_PATH"])
48
+
49
+ new(
50
+ ENV["SNOWFLAKE_URI"],
51
+ private_key,
52
+ ENV["SNOWFLAKE_ORGANIZATION"],
53
+ ENV["SNOWFLAKE_ACCOUNT"],
54
+ ENV["SNOWFLAKE_USER"],
55
+ ENV["SNOWFLAKE_DEFAULT_WAREHOUSE"],
56
+ )
57
+ end
58
+
59
+ def initialize(uri, private_key, organization, account, user, default_warehouse)
60
+ @base_uri = uri
61
+ @private_key_pem = private_key
62
+ @organization = organization
63
+ @account = account
64
+ @user = user
65
+ @default_warehouse = default_warehouse
66
+ @public_key_fingerprint = public_key_fingerprint(@private_key_pem)
67
+
68
+ # start with an expired value to force creation
69
+ @token_expires_at = Time.now.to_i - 1
70
+ @token_semaphore = Concurrent::Semaphore.new(1)
71
+ end
72
+
73
+ def query(query, warehouse: nil, streaming: false)
74
+ warehouse ||= @default_warehouse
75
+
76
+ response = nil
77
+ connection_pool.with do |connection|
78
+ request_body = { "statement" => query, "warehouse" => warehouse }
79
+
80
+ response = request_with_auth_and_headers(
81
+ connection,
82
+ Net::HTTP::Post,
83
+ "/api/v2/statements?requestId=#{SecureRandom.uuid}",
84
+ Oj.dump(request_body)
85
+ )
86
+ end
87
+ handle_errors(response)
88
+ retreive_result_set(response, streaming)
89
+ end
90
+
91
+ private
92
+ def connection_pool
93
+ @connection_pool ||= ConnectionPool.new(size: MAX_CONNECTIONS, timeout: CONNECTION_TIMEOUT) do
94
+ HttpConnectionWrapper.new(hostname, port).start
95
+ end
96
+ end
97
+
98
+ def hostname
99
+ @hostname ||= URI.parse(@base_uri).hostname
100
+ end
101
+
102
+ def port
103
+ @port ||= URI.parse(@base_uri).port
104
+ end
105
+
106
+ def jwt_token
107
+ return @token unless jwt_token_expired?
108
+
109
+ @token_semaphore.acquire do
110
+ now = Time.now.to_i
111
+ @token_expires_at = now + JWT_TOKEN_TTL
112
+
113
+ private_key = OpenSSL::PKey.read(@private_key_pem)
114
+
115
+ payload = {
116
+ :iss => "#{@organization.upcase}-#{@account.upcase}.#{@user}.#{@public_key_fingerprint}",
117
+ :sub => "#{@organization.upcase}-#{@account.upcase}.#{@user}",
118
+ :iat => now,
119
+ :exp => @token_expires_at
120
+ }
121
+
122
+ @token = JWT.encode payload, private_key, "RS256"
123
+ end
124
+ end
125
+
126
+ def jwt_token_expired?
127
+ Time.now.to_i > @token_expires_at
128
+ end
129
+
130
+ def handle_errors(response)
131
+ if response.code != "200"
132
+ raise BadResponseError.new({}),
133
+ "Bad response! Got code: #{response.code}, w/ message #{response.body}"
134
+ end
135
+ end
136
+
137
+ def request_with_auth_and_headers(connection, request_class, path, body=nil)
138
+ uri = URI.parse("#{@base_uri}#{path}")
139
+ request = request_class.new(uri)
140
+ request["Content-Type"] = "application/json"
141
+ request["Accept"] = "application/json"
142
+ request["Authorization"] = "Bearer #{jwt_token}"
143
+ request["X-Snowflake-Authorization-Token-Type"] = "KEYPAIR_JWT"
144
+ request.body = body unless body.nil?
145
+
146
+ response = nil
147
+ bm = Benchmark.measure { response = connection.request(request) }
148
+ puts "HTTP Request time: #{bm.real}"
149
+ handle_errors(response)
150
+ response
151
+ end
152
+
153
+ def retreive_result_set(response, streaming)
154
+ json_body = Oj.load(response.body, oj_options)
155
+ statement_handle = json_body["statementHandle"]
156
+ num_threads = number_of_threads_to_use(json_body["resultSetMetaData"]["partitionInfo"].size)
157
+ retreive_proc = ->(index) { retreive_partition_data(statement_handle, index) }
158
+
159
+ if streaming
160
+ StreamingResultStrategy.result(json_body, retreive_proc)
161
+ elsif num_threads == 1
162
+ SingleThreadInMemoryStrategy.result(json_body, retreive_proc)
163
+ else
164
+ ThreadedInMemoryStrategy.result(json_body, retreive_proc, num_threads)
165
+ end
166
+ end
167
+
168
+ def retreive_partition_data(statement_handle, partition_index)
169
+ partition_response = nil
170
+ connection_pool.with do |connection|
171
+ partition_response = request_with_auth_and_headers(
172
+ connection,
173
+ Net::HTTP::Get,
174
+ "/api/v2/statements/#{statement_handle}?partition=#{partition_index}&requestId=#{SecureRandom.uuid}",
175
+ )
176
+ end
177
+
178
+ partition_json = nil
179
+ bm = Benchmark.measure { partition_json = Oj.load(partition_response.body, oj_options) }
180
+ puts "JSON parsing took: #{bm.real}"
181
+ partition_data = partition_json["data"]
182
+
183
+ partition_data
184
+ end
185
+
186
+ def number_of_threads_to_use(partition_count)
187
+ [[1, (partition_count / THREAD_SCALE_FACTOR.to_f).ceil].max, MAX_THREADS].min
188
+ end
189
+
190
+ def oj_options
191
+ { :bigdecimal_load => :bigdecimal }
192
+ end
193
+
194
+ def public_key_fingerprint(private_key_pem_string)
195
+ public_key_der = OpenSSL::PKey::RSA.new(private_key_pem_string).public_key.to_der
196
+ digest = OpenSSL::Digest::SHA256.new.digest(public_key_der)
197
+ fingerprint = Base64.strict_encode64(digest)
198
+
199
+ "SHA256:#{fingerprint}"
200
+ end
201
+ end
202
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "concurrent"
4
+
5
+ require_relative "row"
6
+
7
+ module RubySnowflake
8
+ class Result
9
+ include Enumerable
10
+
11
+ attr_reader :data
12
+
13
+ def initialize(partition_count, row_type_data)
14
+ @data = Concurrent::Array.new(partition_count)
15
+ extract_row_metadata(row_type_data)
16
+ end
17
+
18
+ def []=(index, value)
19
+ data[index] = value
20
+ end
21
+
22
+ def get_all_rows
23
+ map(&:to_h)
24
+ end
25
+
26
+ def each
27
+ return to_enum(:each) unless block_given?
28
+
29
+ data.each do |partition|
30
+ partition.each do |row|
31
+ yield wrap_row(row)
32
+ end
33
+ end
34
+ end
35
+
36
+ def size
37
+ data.map(&:size).sum
38
+ end
39
+
40
+ alias length size
41
+
42
+ def first
43
+ wrap_row(data.first.first)
44
+ end
45
+
46
+ def last
47
+ wrap_row(data.last.last)
48
+ end
49
+
50
+ def columns
51
+ @row_types.map {|type| type[:name].downcase }
52
+ end
53
+
54
+ private
55
+ def wrap_row(row)
56
+ Row.new(@row_types, @column_to_index, row)
57
+ end
58
+
59
+ def extract_row_metadata(row_type_data)
60
+ @row_types = []
61
+ @column_to_index = {}
62
+
63
+ row_type_data.each_with_index do |type_data, index|
64
+ @row_types[index] = {
65
+ type: type_data["type"].downcase.to_sym,
66
+ scale: type_data["scale"].to_i,
67
+ precision: type_data["precision"].to_i,
68
+ name: type_data["name"].to_sym,
69
+ }
70
+ @column_to_index[type_data["name"].downcase] = index
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "date"
4
+ require "time"
5
+
6
+ module RubySnowflake
7
+ class Row
8
+ EPOCH_JULIAN_DAY_NUMBER = Date.new(1970,1,1).jd
9
+ TIME_FORMAT = "%s.%N".freeze
10
+
11
+ def initialize(row_types, column_to_index, data)
12
+ @row_types = row_types
13
+ @data = data
14
+ @column_to_index = column_to_index
15
+ end
16
+
17
+ # see: https://docs.snowflake.com/en/developer-guide/sql-api/handling-responses#getting-the-data-from-the-results
18
+ def [](column)
19
+ index = column.is_a?(Numeric) ? Integer(column) : @column_to_index[column]
20
+ return nil if index.nil?
21
+ return nil if @data[index].nil?
22
+
23
+ case @row_types[index][:type]
24
+ when :boolean
25
+ @data[index] == "true"
26
+ when :date
27
+ Date.jd(Integer(@data[index]) + EPOCH_JULIAN_DAY_NUMBER)
28
+ when :fixed
29
+ if @row_types[index][:scale] == 0
30
+ Integer(@data[index])
31
+ else
32
+ BigDecimal(@data[index]).round(@row_types[index][:scale])
33
+ end
34
+ when :float, :double, :"double precision", :real
35
+ # snowflake treats these all as 64 bit IEEE 754 floating point numbers, and will we too
36
+ Float(@data[index])
37
+ when :time, :datetime, :timestamp, :timestamp_ltz, :timestamp_ntz
38
+ Time.strptime(@data[index], TIME_FORMAT).utc
39
+ when :timestamp_tz
40
+ timestamp, offset_minutes = @data[index].split(" ")
41
+ Time.strptime(@data[index], TIME_FORMAT).utc - (Integer(offset_minutes) * 60)
42
+ else
43
+ @data[index]
44
+ end
45
+ end
46
+
47
+ def to_h
48
+ output = {}
49
+ @column_to_index.each_pair do |name, index|
50
+ output[name.downcase] = self[index]
51
+ end
52
+ output
53
+ end
54
+
55
+ def to_s
56
+ to_h.to_s
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "concurrent"
4
+
5
+ require_relative "result"
6
+
7
+ module RubySnowflake
8
+ class StreamingResult < Result
9
+ def initialize(partition_count, row_type_data, retreive_proc)
10
+ super(partition_count, row_type_data)
11
+ @retreive_proc = retreive_proc
12
+ end
13
+
14
+ def each
15
+ return to_enum(:each) unless block_given?
16
+
17
+ thread_pool = Concurrent::FixedThreadPool.new 1
18
+
19
+ data.each_with_index do |_partition, index|
20
+ next_index = [index+1, data.size-1].min
21
+ if data[next_index].nil? # prefetch
22
+ data[next_index] = Concurrent::Future.execute(executor: thread_pool) do
23
+ @retreive_proc.call(next_index)
24
+ end
25
+ end
26
+
27
+ if data[index].is_a? Concurrent::Future
28
+ data[index] = data[index].value # wait for it to finish
29
+ end
30
+ data[index].each do |row|
31
+ yield wrap_row(row)
32
+ end
33
+ end
34
+ end
35
+
36
+
37
+ def size
38
+ not_implemented
39
+ end
40
+
41
+ def last
42
+ not_implemented
43
+ end
44
+
45
+ private
46
+ def not_implemented
47
+ raise "not implemented on streaming result set"
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,3 @@
1
+ module RubySnowflake
2
+ VERSION = '0.0.6'
3
+ end
@@ -0,0 +1,28 @@
1
+ -----BEGIN PRIVATE KEY-----
2
+ MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDHwVo+6zP1f0wK
3
+ +/SHc3e/VBqu7JTWK8ezyMuOjqJ3MtIck8aTufctJd6sjDn8uSF8DbuJsmPbbn98
4
+ oWc/L5eCJ1uETgGW9J2hWB0wftYHVkLc+kDDCod4yKwjNqvA7BS7qYvIb1VjnlyI
5
+ oljwLADnG3X2mBWxeD+Vw5rvaMOMr4TF1IA3orZOpIqqrvvLN4jlyt6yWfLlW4AZ
6
+ NtnLcG1aTTODXHPz4Lit6Ag6VKgWvOfV7jAepsW/bMUW85ELYtXcdcVFodgKUHuN
7
+ ZE3VsWOibqBGmJM7xuwJI8qLRqfEw3yq3UvDZ3/5zB7PELAjrE8sLsmFlh3Hq76E
8
+ LSDc9Uo9AgMBAAECggEADvKy5mjy7TDet6a1nIpBQIXSblshYyqJCfTheLRHR5y7
9
+ uOw9DLJ6ovwFvwtKTpVrAk3DxjDdhJ7u9txcLdAJqjF4SX0N5OY+woNlwcqICSoF
10
+ HGS0wnOwAhOgQA1YCnqQLZ/pKDFepjMEUSw29BgS5Dh7mh205+Ar//DQkBBaq9UX
11
+ uZtw3dKKmDPcWdPfFjFJQrH9YrrDdt+rcOULIk4iyzepA7nLV9RBiuir256Pnmi8
12
+ +aXzPkl7PjMgCrDvqx47+Kjxcvccq/yt0zlNuEBDKOa9ChPd6FOV8O64IknBl/Eq
13
+ xU/2+06XI5QEuk9k1MKyMO06fOWS/3XSpOBSa1pooQKBgQDuiFk1897Bsxz5wwvb
14
+ vwRVoVHhNcmWLixMiWggRvnDWfDKzm7lGKTAzrgafNnw84NPD0nvcgDXivzQpEgX
15
+ Ge/SSZzn3DDjEl+QhAP2kg1zE1+7XVR2+2NNa4lS7uksjuEmffHbIg8PXFbyZ3az
16
+ Z/8p63q4bq5Lo5O0EKbRBjQLXQKBgQDWYhCbxhLVXcrz9PMWIFsOn9JqvfL/DJoh
17
+ A7RcxEAcPCHG13fOHaqIo40QoiuOJj86MjdD9rHUl2opQedKmGOE4KsN1PFzwZiU
18
+ fqYsgodgyuJe+gA9EzKabPiXvUG0R8m9T1iZocW4/n0UKrr7qwKco9JVfZQ3MpoJ
19
+ 3d6/qt0sYQKBgEfxC6cAFHErSlPyBcOTCwWgkAdyp4gKCQZUcvk5Lt9CYyNnu36R
20
+ Pi+AqXojqb/IUhQoMk5rfeGEY9exlKSTCAfOd+peOMhjpDr79g4J4tCQSVFDRtRT
21
+ 4ZFaG6LNt7nepf1sAuPqk8w4N2kI134ooYbNJMg3aY4amdO3Kfjk/BFpAoGABAlg
22
+ qq42GJJlieZo1VfSnxnfSmc9NhQ0qr55HO4NDvsK38Wa2eSxG1WJCHElRM3vjtb3
23
+ N25jts8LDq/Q/RSVeyEH14DU8wsIJnOkCV6D0gy/6m3mpz+/Si0XGTRVEmHvqILf
24
+ 9jSXln4kOMb9MlPr/qsPANEWg1uOjvUAhrde5sECgYEArHqQfneC3dG+PaRIlwDz
25
+ cKl6QWrCcfSVHL6e0XsRWT2UHrEhC5cLwd1mqMyKTZ/AK0idgyA9HRDZVzAEeina
26
+ O1kTzlr5+ukTF3wEC4N2GKoaKkeWy5LBWSduRpa4NtjJsXVVylgYmm/jqXKwzAnP
27
+ 9J6e0aokrs0mnEu/Ij/pB0A=
28
+ -----END PRIVATE KEY-----
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/ruby_snowflake/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "rb_snowflake_client"
7
+ s.version = RubySnowflake::VERSION
8
+ s.summary = "Snowflake connector for Ruby"
9
+ s.author = "Rinsed"
10
+ s.email = ["reid@rinsed.co", "alex@rinsed.co"]
11
+ s.description = <<~DESC
12
+ Using the HTTP V2 Api for Snowflake runs queries & creates native Ruby objects.
13
+ DESC
14
+ s.license = "MIT" # TODO: double check
15
+
16
+ s.files = Dir.chdir(File.expand_path(__dir__)) do
17
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features|vendor)/}) }
18
+ end
19
+
20
+ s.require_paths = ["lib"]
21
+ end
metadata ADDED
@@ -0,0 +1,68 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rb_snowflake_client
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.6
5
+ platform: ruby
6
+ authors:
7
+ - Rinsed
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-11-28 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: 'Using the HTTP V2 Api for Snowflake runs queries & creates native Ruby
14
+ objects.
15
+
16
+ '
17
+ email:
18
+ - reid@rinsed.co
19
+ - alex@rinsed.co
20
+ executables: []
21
+ extensions: []
22
+ extra_rdoc_files: []
23
+ files:
24
+ - ".github/workflows/ci.yml"
25
+ - ".github/workflows/release-gh-packages.yml"
26
+ - ".github/workflows/release-rubygems.yml"
27
+ - ".gitignore"
28
+ - Gemfile
29
+ - Gemfile.lock
30
+ - LICENSE.txt
31
+ - README.md
32
+ - Rakefile
33
+ - lib/rb_snowflake_client.rb
34
+ - lib/ruby_snowflake/client.rb
35
+ - lib/ruby_snowflake/client/http_connection_wrapper.rb
36
+ - lib/ruby_snowflake/client/single_thread_in_memory_strategy.rb
37
+ - lib/ruby_snowflake/client/streaming_result_strategy.rb
38
+ - lib/ruby_snowflake/client/threaded_in_memory_strategy.rb
39
+ - lib/ruby_snowflake/result.rb
40
+ - lib/ruby_snowflake/row.rb
41
+ - lib/ruby_snowflake/streaming_result.rb
42
+ - lib/ruby_snowflake/version.rb
43
+ - private_key_pkcs8.pem
44
+ - rb_snowflake_client.gemspec
45
+ homepage:
46
+ licenses:
47
+ - MIT
48
+ metadata: {}
49
+ post_install_message:
50
+ rdoc_options: []
51
+ require_paths:
52
+ - lib
53
+ required_ruby_version: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ requirements: []
64
+ rubygems_version: 3.2.33
65
+ signing_key:
66
+ specification_version: 4
67
+ summary: Snowflake connector for Ruby
68
+ test_files: []