chalk_ruby 0.2.8 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Dockerfile +26 -0
- data/README.md +95 -9
- data/lib/chalk_ruby/client.rb +5 -3
- data/lib/chalk_ruby/grpc_client.rb +45 -45
- data/lib/chalk_ruby/version.rb +1 -1
- data/test/chalk_ruby/integration/client_test.rb +15 -3
- data/test/chalk_ruby/integration/grpc_client_test.rb +27 -50
- metadata +3 -4
- data/test/chalk_ruby/integration/arrow_conversion_test.rb +0 -286
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8e3e6e01afae6634b4b66b557f658573bf4ffd2b657c082652e45185ec48ec05
|
4
|
+
data.tar.gz: 58222edf6ee281ae1cfbaaeec09b628985cfe96ddce3299054f5318e61a1d036
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0e995a01c66b8086fff20b2b7d1304458d2370a73205b87b2c6c2860df0a03e93affda126d20cf8d018ed3fe32f0e612e6ef4b7cdef5d95d1b5b02456994eb5b
|
7
|
+
data.tar.gz: b00008371f1f5f5a2fbd3113a7e2db1cca4e58100ac5ba8385cfad08f065acb1476d645edc5f853ca5d8a67392cd4b3ce3d2aed8b85f979aebd618bedde48a68
|
data/Dockerfile
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
FROM ubuntu:22.04
|
2
|
+
|
3
|
+
ENV BUNDLER_VERSION=2.3.6 \
|
4
|
+
BUNDLE_PATH=/app/vendor/bundle
|
5
|
+
|
6
|
+
RUN apt-get update && apt-get install -y \
|
7
|
+
ca-certificates \
|
8
|
+
sudo \
|
9
|
+
wget \
|
10
|
+
lsb-release \
|
11
|
+
build-essential \
|
12
|
+
bash \
|
13
|
+
jq \
|
14
|
+
curl \
|
15
|
+
libffi-dev \
|
16
|
+
git \
|
17
|
+
libglib2.0-dev \
|
18
|
+
ruby-full
|
19
|
+
|
20
|
+
# Install Apache Arrow 18
|
21
|
+
RUN wget https://apache.jfrog.io/artifactory/arrow/ubuntu/apache-arrow-apt-source-latest-jammy.deb && \
|
22
|
+
apt-get install -y -V ./apache-arrow-apt-source-latest-jammy.deb && \
|
23
|
+
apt-get update && \
|
24
|
+
apt-get install -y -V libarrow-dev=18.* libarrow-glib-dev=18.* libparquet-dev=18.* libarrow-acero-dev=18.* gir1.2-arrow-1.0=18.*
|
25
|
+
|
26
|
+
RUN gem install bundler
|
data/README.md
CHANGED
@@ -1,29 +1,115 @@
|
|
1
|
-
|
1
|
+
# ChalkRuby - Ruby Client for Chalk
|
2
2
|
|
3
3
|
[](https://badge.fury.io/rb/chalk_ruby)
|
4
|
+
[](LICENSE)
|
4
5
|
|
5
|
-
Ruby client for Chalk
|
6
|
+
A Ruby client library for [Chalk](https://chalk.ai/), a feature engineering platform for machine learning and data applications.
|
6
7
|
|
7
|
-
|
8
|
+
## Installation
|
8
9
|
|
9
|
-
|
10
|
+
Add this line to your application's Gemfile:
|
10
11
|
|
11
|
-
|
12
|
+
```ruby
|
13
|
+
gem 'chalk_ruby'
|
14
|
+
```
|
15
|
+
|
16
|
+
And then execute:
|
17
|
+
|
18
|
+
```bash
|
19
|
+
bundle install
|
20
|
+
```
|
21
|
+
|
22
|
+
Or install it directly:
|
12
23
|
|
13
24
|
```bash
|
14
25
|
gem install chalk_ruby
|
15
26
|
```
|
16
27
|
|
17
|
-
|
28
|
+
## Quick Start
|
18
29
|
|
19
30
|
```ruby
|
20
|
-
|
31
|
+
# Create a client with credentials
|
32
|
+
client = ChalkRuby::Client.create(
|
33
|
+
'my-client-id',
|
34
|
+
'my-client-secret',
|
35
|
+
'my-environment-id' # Optional, can also use CHALK_ACTIVE_ENVIRONMENT env var
|
36
|
+
)
|
37
|
+
|
38
|
+
# Query features
|
21
39
|
results = client.query(
|
22
40
|
input: { 'user.id': 'my-user-id' },
|
23
|
-
output: %w(user.id user.name user.email)
|
41
|
+
output: %w(user.id user.name user.email),
|
42
|
+
query_name: 'user_profile' # Optional: for tracking and monitoring
|
24
43
|
)
|
44
|
+
|
45
|
+
# Access feature values
|
46
|
+
puts results['user.name']
|
25
47
|
```
|
26
48
|
|
49
|
+
## Authentication
|
50
|
+
|
51
|
+
Authentication can be provided in multiple ways:
|
52
|
+
|
53
|
+
1. Directly in the client constructor:
|
54
|
+
```ruby
|
55
|
+
client = ChalkRuby::Client.create('my-client-id', 'my-client-secret')
|
56
|
+
```
|
57
|
+
|
58
|
+
2. Using environment variables:
|
59
|
+
```
|
60
|
+
CHALK_CLIENT_ID=my-client-id
|
61
|
+
CHALK_CLIENT_SECRET=my-client-secret
|
62
|
+
CHALK_ACTIVE_ENVIRONMENT=my-environment # Optional
|
63
|
+
```
|
64
|
+
```ruby
|
65
|
+
client = ChalkRuby::Client.create
|
66
|
+
```
|
67
|
+
|
68
|
+
## Advanced Usage
|
69
|
+
|
70
|
+
### Query Options
|
71
|
+
|
72
|
+
The `query` method supports several options:
|
73
|
+
|
74
|
+
```ruby
|
75
|
+
client.query(
|
76
|
+
input: { 'user.id': 'my-user-id' },
|
77
|
+
output: %w(user.id user.name user.credit_score),
|
78
|
+
now: Time.now, # Optional: time at which to evaluate the query
|
79
|
+
staleness: { 'user.credit_score': '1d' }, # Optional: max staleness for cached features
|
80
|
+
tags: { 'environment': 'production' }, # Optional: tags for resolver selection
|
81
|
+
branch: 'my-feature-branch', # Optional: route request to a specific branch
|
82
|
+
correlation_id: 'request-123', # Optional: ID for tracing in logs
|
83
|
+
query_name: 'user_profile', # Optional: semantic name for the query
|
84
|
+
timeout: 5.0 # Optional: timeout in seconds
|
85
|
+
)
|
86
|
+
```
|
87
|
+
|
88
|
+
### Configuration
|
89
|
+
|
90
|
+
Create a client with custom configuration:
|
91
|
+
|
92
|
+
```ruby
|
93
|
+
config = ChalkRuby::Config.new(
|
94
|
+
client_id: 'my-client-id',
|
95
|
+
client_secret: 'my-client-secret',
|
96
|
+
environment: 'my-environment',
|
97
|
+
query_server: 'https://custom-query-server.chalk.ai',
|
98
|
+
api_server: 'https://custom-api.chalk.ai',
|
99
|
+
additional_headers: { 'X-Custom-Header': 'value' }
|
100
|
+
)
|
101
|
+
|
102
|
+
client = ChalkRuby::Client.create_with_config(config)
|
103
|
+
```
|
104
|
+
|
105
|
+
## Requirements
|
106
|
+
|
107
|
+
- Ruby 2.7 or higher
|
108
|
+
|
109
|
+
## Version
|
110
|
+
|
111
|
+
Current version: `0.2.8`
|
112
|
+
|
27
113
|
## License
|
28
114
|
|
29
|
-
The Chalk Ruby
|
115
|
+
The Chalk Ruby Client is open-sourced software licensed under the [Apache 2.0 License](LICENSE).
|
data/lib/chalk_ruby/client.rb
CHANGED
@@ -150,7 +150,8 @@ module ChalkRuby
|
|
150
150
|
explain: nil,
|
151
151
|
include_meta: nil,
|
152
152
|
store_plan_stages: nil,
|
153
|
-
timeout: nil
|
153
|
+
timeout: nil,
|
154
|
+
planner_options: nil
|
154
155
|
)
|
155
156
|
query_server_request(
|
156
157
|
method: :post,
|
@@ -168,10 +169,11 @@ module ChalkRuby
|
|
168
169
|
meta: meta,
|
169
170
|
explain: explain || false,
|
170
171
|
include_meta: include_meta || false,
|
171
|
-
store_plan_stages: store_plan_stages || false
|
172
|
+
store_plan_stages: store_plan_stages || false,
|
173
|
+
planner_options: planner_options || {}
|
172
174
|
},
|
173
175
|
headers: get_authenticated_engine_headers(branch: branch),
|
174
|
-
timeout:
|
176
|
+
timeout: timeout
|
175
177
|
)
|
176
178
|
end
|
177
179
|
|
@@ -146,6 +146,14 @@ module ChalkRuby
|
|
146
146
|
query_service.ping(Chalk::Engine::V1::PingRequest.new(num: 1))
|
147
147
|
end
|
148
148
|
|
149
|
+
def convert_to_proto_values(options_hash)
|
150
|
+
return {} if options_hash.nil?
|
151
|
+
|
152
|
+
options_hash.transform_values do |value|
|
153
|
+
convert_to_protobuf_value(value)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
149
157
|
def query_bulk(
|
150
158
|
input:,
|
151
159
|
output:,
|
@@ -157,7 +165,8 @@ module ChalkRuby
|
|
157
165
|
timeout: nil,
|
158
166
|
query_name: nil,
|
159
167
|
query_name_version: nil,
|
160
|
-
correlation_id: nil
|
168
|
+
correlation_id: nil,
|
169
|
+
planner_options: nil
|
161
170
|
)
|
162
171
|
# Convert input to feather format
|
163
172
|
inputs_feather = to_feather(input)
|
@@ -167,7 +176,12 @@ module ChalkRuby
|
|
167
176
|
inputs_feather: inputs_feather,
|
168
177
|
outputs: output.map { |o| Chalk::Common::V1::OutputExpr.new(feature_fqn: o) },
|
169
178
|
staleness: staleness || {},
|
170
|
-
context: context || Chalk::Common::V1::OnlineQueryContext.new(
|
179
|
+
context: context || Chalk::Common::V1::OnlineQueryContext.new(
|
180
|
+
query_name: query_name,
|
181
|
+
query_name_version: query_name_version,
|
182
|
+
correlation_id: correlation_id,
|
183
|
+
options: convert_to_proto_values(planner_options)
|
184
|
+
),
|
171
185
|
response_options: response_options || Chalk::Common::V1::OnlineQueryResponseOptions.new,
|
172
186
|
body_type: body_type || :FEATHER_BODY_TYPE_UNSPECIFIED
|
173
187
|
)
|
@@ -181,7 +195,31 @@ module ChalkRuby
|
|
181
195
|
output_data = nil
|
182
196
|
|
183
197
|
if (!response.scalars_data.nil?) and response.scalars_data.length > 0
|
184
|
-
|
198
|
+
buffer = Arrow::Buffer.new(response.scalars_data)
|
199
|
+
|
200
|
+
# Create a buffer reader
|
201
|
+
buffer_reader = Arrow::BufferInputStream.new(buffer)
|
202
|
+
|
203
|
+
# Create an IPC reader from the buffer reader
|
204
|
+
reader = Arrow::FeatherFileReader.new(buffer_reader)
|
205
|
+
|
206
|
+
# Read the table
|
207
|
+
|
208
|
+
|
209
|
+
output_data = []
|
210
|
+
|
211
|
+
table = reader.read
|
212
|
+
|
213
|
+
|
214
|
+
field_names = table.schema.fields.map(&:name)
|
215
|
+
table.each_record do |r|
|
216
|
+
row = {}
|
217
|
+
field_names.each do |f|
|
218
|
+
row[f] = r[f]
|
219
|
+
end
|
220
|
+
|
221
|
+
output_data << row
|
222
|
+
end
|
185
223
|
end
|
186
224
|
|
187
225
|
{
|
@@ -205,13 +243,16 @@ module ChalkRuby
|
|
205
243
|
explain: nil,
|
206
244
|
include_meta: nil,
|
207
245
|
store_plan_stages: nil,
|
208
|
-
timeout: nil
|
246
|
+
timeout: nil,
|
247
|
+
planner_options: nil
|
209
248
|
)
|
210
249
|
formatted_inputs = input.transform_values { |value| self.convert_to_protobuf_value(value) }
|
211
250
|
|
212
251
|
context = Chalk::Common::V1::OnlineQueryContext.new(
|
213
252
|
query_name: query_name,
|
214
253
|
query_name_version: query_name_version,
|
254
|
+
correlation_id: correlation_id,
|
255
|
+
options: planner_options || {}
|
215
256
|
)
|
216
257
|
|
217
258
|
r = Chalk::Common::V1::OnlineQueryRequest.new(
|
@@ -395,47 +436,6 @@ module ChalkRuby
|
|
395
436
|
|
396
437
|
private
|
397
438
|
|
398
|
-
# Converts Arrow binary data to an array of hashes
|
399
|
-
# @param arrow_data [String] Binary Arrow data (IPC stream format)
|
400
|
-
# @return [Array<Hash>] Array of hashes with column name as keys and Ruby values
|
401
|
-
def arrow_table_to_array(arrow_data)
|
402
|
-
require 'arrow'
|
403
|
-
|
404
|
-
buffer = Arrow::Buffer.new(arrow_data)
|
405
|
-
buffer_reader = Arrow::BufferInputStream.new(buffer)
|
406
|
-
|
407
|
-
# Try IPC stream format first (which is what we expect from the query service)
|
408
|
-
begin
|
409
|
-
reader = Arrow::RecordBatchStreamReader.new(buffer_reader)
|
410
|
-
table = reader.read_all
|
411
|
-
rescue => e
|
412
|
-
# Fall back to feather format for backward compatibility
|
413
|
-
# buffer_reader.rewind
|
414
|
-
begin
|
415
|
-
reader = Arrow::FeatherFileReader.new(buffer_reader)
|
416
|
-
table = reader.read
|
417
|
-
rescue => e2
|
418
|
-
raise "Failed to parse Arrow data: #{e.message}, #{e2.message}"
|
419
|
-
end
|
420
|
-
end
|
421
|
-
|
422
|
-
output_data = []
|
423
|
-
field_names = table.schema.fields.map(&:name)
|
424
|
-
|
425
|
-
table.each_record do |r|
|
426
|
-
row = {}
|
427
|
-
field_names.each do |f|
|
428
|
-
value = r[f]
|
429
|
-
# Convert GLib::Bytes to Ruby String for binary and large string types
|
430
|
-
value = value.to_s if value.is_a?(GLib::Bytes)
|
431
|
-
row[f] = value
|
432
|
-
end
|
433
|
-
output_data << row
|
434
|
-
end
|
435
|
-
|
436
|
-
output_data
|
437
|
-
end
|
438
|
-
|
439
439
|
def to_feather(input_hash)
|
440
440
|
require 'arrow'
|
441
441
|
|
data/lib/chalk_ruby/version.rb
CHANGED
@@ -3,15 +3,27 @@ require 'rspec/autorun'
|
|
3
3
|
require 'chalk_ruby/client'
|
4
4
|
require 'chalk_ruby/error'
|
5
5
|
|
6
|
-
CLIENT_ID = '
|
7
|
-
CLIENT_SECRET = '
|
6
|
+
CLIENT_ID = ''
|
7
|
+
CLIENT_SECRET = ''
|
8
8
|
|
9
9
|
RSpec.describe 'Online query' do
|
10
10
|
it 'should accept valid queries' do
|
11
11
|
client = ChalkRuby::Client.create(CLIENT_ID, CLIENT_SECRET)
|
12
12
|
response = client.query(
|
13
13
|
input: { 'user.id': 3454 },
|
14
|
-
output: %w(user.id)
|
14
|
+
output: %w(user.id),
|
15
|
+
query_name: "no_planner_options_test",
|
16
|
+
)
|
17
|
+
|
18
|
+
print response
|
19
|
+
end
|
20
|
+
it 'should run with planner options' do
|
21
|
+
client = ChalkRuby::Client.create(CLIENT_ID, CLIENT_SECRET)
|
22
|
+
response = client.query(
|
23
|
+
input: { 'user.id': 3454 },
|
24
|
+
output: %w(user.id),
|
25
|
+
query_name: "planner_options_test",
|
26
|
+
planner_options: {'defer_non_bus_persist_operators': "1"} # test planner option
|
15
27
|
)
|
16
28
|
|
17
29
|
print response
|
@@ -1,66 +1,43 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
# require 'chalk_ruby'
|
4
|
-
# require 'chalk_ruby'
|
5
|
-
|
6
|
-
require 'date'
|
7
|
-
require 'rspec/autorun'
|
8
|
-
require 'chalk_ruby/grpc_client'
|
9
|
-
require 'chalk_ruby/grpc/auth_interceptor'
|
10
|
-
require 'chalk_ruby/error'
|
11
|
-
require 'chalk_ruby/protos/chalk/server/v1/auth_pb'
|
12
|
-
require 'chalk_ruby/protos/chalk/server/v1/auth_services_pb'
|
13
|
-
require 'chalk_ruby/protos/chalk/engine/v1/query_server_services_pb'
|
14
|
-
require 'arrow'
|
15
|
-
|
16
|
-
|
17
|
-
CLIENT_ID = 'client-095f628e339a593e12a58559f5f8cd00'
|
18
|
-
CLIENT_SECRET = 'secret-55601b885bcc0d558c4abede69480aaac1dba6e1679889fa1d5196ba04f7af89'
|
19
|
-
|
1
|
+
require 'rspec'
|
2
|
+
require 'chalk_ruby'
|
20
3
|
|
21
4
|
RSpec.describe ChalkRuby::GrpcClient do
|
22
|
-
describe '#
|
5
|
+
describe '#query_bulk' do
|
23
6
|
let(:client) do
|
24
|
-
ChalkRuby::GrpcClient.
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
environment: "tmnmc9beyujew",
|
31
|
-
# api_timeout: 0.6, # seconds
|
32
|
-
# connect_timeout: 0.3, # seconds
|
33
|
-
# query_service_root_ca_path: "/Users/andrew/found_ca.pem" # path to the root ca for chalkai.internal.found.app,
|
34
|
-
)
|
7
|
+
ChalkRuby::GrpcClient.create(
|
8
|
+
ENV.fetch('CHALK_CLIENT_ID'),
|
9
|
+
ENV.fetch('CHALK_CLIENT_SECRET'),
|
10
|
+
ENV.fetch('CHALK_ENVIRONMENT', 'tmnmc9beyujew'),
|
11
|
+
ENV.fetch('CHALK_QUERY_SERVER', 'standard-gke.chalk-develop.gcp.chalk.ai'),
|
12
|
+
ENV.fetch('CHALK_API_SERVER', 'api.staging.chalk.ai:443')
|
35
13
|
)
|
36
14
|
end
|
37
15
|
|
38
|
-
# it 'can perform queries' do
|
39
|
-
# response = client.query(
|
40
|
-
# input: { 'business.id': 1 },
|
41
|
-
# output: %w(business.id)
|
42
|
-
# )
|
43
|
-
#
|
44
|
-
# expect(response).not_to be_nil
|
45
|
-
#
|
46
|
-
# puts response
|
47
|
-
# # The response should be a OnlineQueryBulkResponse
|
48
|
-
# # expect(response).to be_a(Chalk::Common::V1::OnlineQueryBulkResponse)
|
49
|
-
# end
|
50
|
-
|
51
16
|
it 'can perform bulk queries' do
|
52
17
|
response = client.query_bulk(
|
53
18
|
input: { 'user.id': 1 },
|
54
|
-
output: %w(user.id user.socure_score
|
19
|
+
output: %w(user.id user.socure_score),
|
20
|
+
planner_options: {'defer_non_bus_persist_operators': "1"}, # test planner option
|
21
|
+
query_name: "planner_options_test",
|
22
|
+
|
55
23
|
)
|
56
24
|
|
57
25
|
expect(response).not_to be_nil
|
58
|
-
# The response should
|
59
|
-
|
60
|
-
|
61
|
-
|
26
|
+
# The response should have no errors and user_id of 1
|
27
|
+
expect(response[:errors]).to be_empty
|
28
|
+
expect(response[:data][0]['user.id']).to eq(1)
|
29
|
+
end
|
30
|
+
it 'can perform bulk queries without planner options' do
|
31
|
+
response = client.query_bulk(
|
32
|
+
input: { 'user.id': 1 },
|
33
|
+
output: %w(user.id user.socure_score),
|
34
|
+
query_name: "no_planner_options_test",
|
35
|
+
)
|
62
36
|
|
63
|
-
|
37
|
+
expect(response).not_to be_nil
|
38
|
+
# The response should have no errors and user_id of 1
|
39
|
+
expect(response[:errors]).to be_empty
|
40
|
+
expect(response[:data][0]['user.id']).to eq(1)
|
64
41
|
end
|
65
42
|
end
|
66
43
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chalk_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chalk AI, Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-03-
|
11
|
+
date: 2025-03-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -237,6 +237,7 @@ files:
|
|
237
237
|
- ".gitignore"
|
238
238
|
- ".rubocop.yml"
|
239
239
|
- ".rubocop_todo.yml"
|
240
|
+
- Dockerfile
|
240
241
|
- Gemfile
|
241
242
|
- LICENSE
|
242
243
|
- README.dev.md
|
@@ -361,7 +362,6 @@ files:
|
|
361
362
|
- sig/chalk_ruby/http/response.rbs
|
362
363
|
- sig/chalk_ruby/token.rbs
|
363
364
|
- sig/chalk_ruby/versions.rbs
|
364
|
-
- test/chalk_ruby/integration/arrow_conversion_test.rb
|
365
365
|
- test/chalk_ruby/integration/client_test.rb
|
366
366
|
- test/chalk_ruby/integration/grpc_client_test.rb
|
367
367
|
- test/chalk_ruby/test_helper.rb
|
@@ -392,7 +392,6 @@ signing_key:
|
|
392
392
|
specification_version: 4
|
393
393
|
summary: A simple Ruby client for Chalk
|
394
394
|
test_files:
|
395
|
-
- test/chalk_ruby/integration/arrow_conversion_test.rb
|
396
395
|
- test/chalk_ruby/integration/client_test.rb
|
397
396
|
- test/chalk_ruby/integration/grpc_client_test.rb
|
398
397
|
- test/chalk_ruby/test_helper.rb
|
@@ -1,286 +0,0 @@
|
|
1
|
-
require 'minitest/autorun'
|
2
|
-
require 'chalk_ruby/grpc_client'
|
3
|
-
require 'arrow'
|
4
|
-
|
5
|
-
class ArrowConversionTest < Minitest::Test
|
6
|
-
def setup
|
7
|
-
@client = create_client
|
8
|
-
end
|
9
|
-
|
10
|
-
def test_arrow_string_conversion
|
11
|
-
assert_type_conversion('test_string', 'string', String)
|
12
|
-
end
|
13
|
-
|
14
|
-
def test_arrow_large_string_conversion
|
15
|
-
test_string = 'test_large_string'
|
16
|
-
# For large string types, we might get a GLib::Bytes object, which can be converted to a string
|
17
|
-
result = assert_type_conversion(test_string, 'large_string', [String, GLib::Bytes])
|
18
|
-
# Check the actual content
|
19
|
-
actual_value = result.is_a?(GLib::Bytes) ? result.to_s : result
|
20
|
-
assert_equal test_string, actual_value
|
21
|
-
end
|
22
|
-
|
23
|
-
def test_arrow_int8_conversion
|
24
|
-
assert_type_conversion(8, 'int8', Integer)
|
25
|
-
end
|
26
|
-
|
27
|
-
def test_arrow_int16_conversion
|
28
|
-
assert_type_conversion(16, 'int16', Integer)
|
29
|
-
end
|
30
|
-
|
31
|
-
def test_arrow_int32_conversion
|
32
|
-
assert_type_conversion(32, 'int32', Integer)
|
33
|
-
end
|
34
|
-
|
35
|
-
def test_arrow_int64_conversion
|
36
|
-
assert_type_conversion(64, 'int64', Integer)
|
37
|
-
end
|
38
|
-
|
39
|
-
def test_arrow_uint8_conversion
|
40
|
-
assert_type_conversion(8, 'uint8', Integer)
|
41
|
-
end
|
42
|
-
|
43
|
-
def test_arrow_uint16_conversion
|
44
|
-
assert_type_conversion(16, 'uint16', Integer)
|
45
|
-
end
|
46
|
-
|
47
|
-
def test_arrow_uint32_conversion
|
48
|
-
assert_type_conversion(32, 'uint32', Integer)
|
49
|
-
end
|
50
|
-
|
51
|
-
def test_arrow_uint64_conversion
|
52
|
-
assert_type_conversion(64, 'uint64', Integer)
|
53
|
-
end
|
54
|
-
|
55
|
-
def test_arrow_float_conversion
|
56
|
-
assert_type_conversion(3.14, 'float', Float)
|
57
|
-
end
|
58
|
-
|
59
|
-
def test_arrow_double_conversion
|
60
|
-
assert_type_conversion(3.14159, 'double', Float)
|
61
|
-
end
|
62
|
-
|
63
|
-
def test_arrow_boolean_conversion
|
64
|
-
assert_type_conversion(true, 'boolean', [TrueClass, FalseClass])
|
65
|
-
end
|
66
|
-
|
67
|
-
def test_arrow_timestamp_conversion
|
68
|
-
# Create timestamp as a Ruby Time object
|
69
|
-
timestamp = Time.now
|
70
|
-
# Pass timestamp seconds since epoch to Arrow
|
71
|
-
arrow_timestamp = timestamp.to_i
|
72
|
-
# Check that it converts back to a Time-like object
|
73
|
-
result = assert_type_conversion(arrow_timestamp, 'timestamp[s]', Time)
|
74
|
-
# Check that the timestamp values are approximately equal
|
75
|
-
assert_in_delta timestamp.to_i, result.to_i, 1
|
76
|
-
end
|
77
|
-
|
78
|
-
def test_arrow_date32_conversion
|
79
|
-
# Date represented as days since epoch
|
80
|
-
today = Date.today
|
81
|
-
days_since_epoch = (today - Date.new(1970, 1, 1)).to_i
|
82
|
-
result = assert_type_conversion(days_since_epoch, 'date32', Date)
|
83
|
-
assert_equal today, result
|
84
|
-
end
|
85
|
-
|
86
|
-
def test_arrow_binary_conversion
|
87
|
-
binary_data = "\x01\x02\x03\x04"
|
88
|
-
# For binary types, we might get a GLib::Bytes object, which can be converted to a string
|
89
|
-
result = assert_type_conversion(binary_data, 'binary', [String, GLib::Bytes])
|
90
|
-
# Check the actual content
|
91
|
-
actual_value = result.is_a?(GLib::Bytes) ? result.to_s : result
|
92
|
-
assert_equal binary_data, actual_value
|
93
|
-
end
|
94
|
-
|
95
|
-
def test_arrow_large_binary_conversion
|
96
|
-
large_binary_data = "\x01\x02\x03\x04" * 10
|
97
|
-
# For binary types, we might get a GLib::Bytes object, which can be converted to a string
|
98
|
-
result = assert_type_conversion(large_binary_data, 'large_binary', [String, GLib::Bytes])
|
99
|
-
# Check the actual content
|
100
|
-
actual_value = result.is_a?(GLib::Bytes) ? result.to_s : result
|
101
|
-
assert_equal large_binary_data, actual_value
|
102
|
-
end
|
103
|
-
|
104
|
-
def test_arrow_list_conversion
|
105
|
-
# Skip this test as it's difficult to create a list array with the current Arrow Ruby API
|
106
|
-
skip "Creating list arrays requires more complex approach with current Arrow Ruby API"
|
107
|
-
end
|
108
|
-
|
109
|
-
def test_arrow_struct_conversion
|
110
|
-
# Skip this test as it's difficult to create a struct array with the current Arrow Ruby API
|
111
|
-
skip "Creating struct arrays requires more complex approach with current Arrow Ruby API"
|
112
|
-
end
|
113
|
-
|
114
|
-
def test_arrow_null_conversion
|
115
|
-
# Test that null values are properly converted to nil
|
116
|
-
schema = Arrow::Schema.new([Arrow::Field.new("test_null", Arrow::Int32DataType.new)])
|
117
|
-
|
118
|
-
# Create array with a null value
|
119
|
-
builder = Arrow::Int32ArrayBuilder.new
|
120
|
-
builder.append_null
|
121
|
-
array = builder.finish
|
122
|
-
|
123
|
-
table = Arrow::Table.new(schema, [array])
|
124
|
-
|
125
|
-
# Serialize to Arrow IPC streaming format
|
126
|
-
buffer = Arrow::ResizableBuffer.new(0)
|
127
|
-
output = Arrow::BufferOutputStream.new(buffer)
|
128
|
-
writer = Arrow::RecordBatchStreamWriter.new(output, table.schema)
|
129
|
-
writer.write_table(table)
|
130
|
-
writer.close
|
131
|
-
output.close
|
132
|
-
arrow_data = buffer.data.to_s
|
133
|
-
|
134
|
-
result = @client.send(:arrow_table_to_array, arrow_data)
|
135
|
-
|
136
|
-
assert_instance_of Array, result
|
137
|
-
assert_equal 1, result.length
|
138
|
-
assert_nil result[0]["test_null"]
|
139
|
-
end
|
140
|
-
|
141
|
-
private
|
142
|
-
|
143
|
-
def create_client
|
144
|
-
config = ChalkRuby::Config.new(
|
145
|
-
client_id: 'test',
|
146
|
-
client_secret: 'test',
|
147
|
-
environment: 'test',
|
148
|
-
query_server: 'test'
|
149
|
-
)
|
150
|
-
ChalkRuby::GrpcClient.new(config)
|
151
|
-
end
|
152
|
-
|
153
|
-
def assert_type_conversion(value, arrow_type_name, expected_ruby_type)
|
154
|
-
# Create schema with the specified arrow type
|
155
|
-
data_type = create_arrow_data_type(arrow_type_name)
|
156
|
-
schema = Arrow::Schema.new([Arrow::Field.new("test_field", data_type)])
|
157
|
-
|
158
|
-
# Create array with the value
|
159
|
-
array = create_arrow_array(data_type, value)
|
160
|
-
|
161
|
-
# Create table with schema and array
|
162
|
-
table = Arrow::Table.new(schema, [array])
|
163
|
-
|
164
|
-
# Serialize table to Arrow IPC streaming format
|
165
|
-
buffer = Arrow::ResizableBuffer.new(0)
|
166
|
-
output = Arrow::BufferOutputStream.new(buffer)
|
167
|
-
writer = Arrow::RecordBatchStreamWriter.new(output, table.schema)
|
168
|
-
writer.write_table(table)
|
169
|
-
writer.close
|
170
|
-
output.close
|
171
|
-
arrow_data = buffer.data.to_s
|
172
|
-
|
173
|
-
# Convert arrow data to array of hashes
|
174
|
-
result = @client.send(:arrow_table_to_array, arrow_data)
|
175
|
-
|
176
|
-
# Check that the value was properly converted
|
177
|
-
assert_instance_of Array, result
|
178
|
-
assert_equal 1, result.length
|
179
|
-
|
180
|
-
expected_types = Array(expected_ruby_type)
|
181
|
-
assert expected_types.any? { |type| result[0]["test_field"].is_a?(type) },
|
182
|
-
"Expected #{result[0]["test_field"].inspect} to be a #{expected_ruby_type}, but was #{result[0]["test_field"].class}"
|
183
|
-
|
184
|
-
# Return the converted value for further assertions
|
185
|
-
result[0]["test_field"]
|
186
|
-
end
|
187
|
-
|
188
|
-
def create_arrow_data_type(type_name)
|
189
|
-
case type_name
|
190
|
-
when 'int8' then Arrow::Int8DataType.new
|
191
|
-
when 'int16' then Arrow::Int16DataType.new
|
192
|
-
when 'int32' then Arrow::Int32DataType.new
|
193
|
-
when 'int64' then Arrow::Int64DataType.new
|
194
|
-
when 'uint8' then Arrow::UInt8DataType.new
|
195
|
-
when 'uint16' then Arrow::UInt16DataType.new
|
196
|
-
when 'uint32' then Arrow::UInt32DataType.new
|
197
|
-
when 'uint64' then Arrow::UInt64DataType.new
|
198
|
-
when 'float' then Arrow::FloatDataType.new
|
199
|
-
when 'double' then Arrow::DoubleDataType.new
|
200
|
-
when 'string' then Arrow::StringDataType.new
|
201
|
-
when 'large_string' then Arrow::LargeStringDataType.new
|
202
|
-
when 'binary' then Arrow::BinaryDataType.new
|
203
|
-
when 'large_binary' then Arrow::LargeBinaryDataType.new
|
204
|
-
when 'boolean' then Arrow::BooleanDataType.new
|
205
|
-
when 'timestamp[s]' then Arrow::TimestampDataType.new(:second)
|
206
|
-
when 'date32' then Arrow::Date32DataType.new
|
207
|
-
else
|
208
|
-
raise "Unsupported arrow type: #{type_name}"
|
209
|
-
end
|
210
|
-
end
|
211
|
-
|
212
|
-
def create_arrow_array(data_type, value)
|
213
|
-
case data_type
|
214
|
-
when Arrow::Int8DataType
|
215
|
-
builder = Arrow::Int8ArrayBuilder.new
|
216
|
-
builder.append_value(value)
|
217
|
-
builder.finish
|
218
|
-
when Arrow::Int16DataType
|
219
|
-
builder = Arrow::Int16ArrayBuilder.new
|
220
|
-
builder.append_value(value)
|
221
|
-
builder.finish
|
222
|
-
when Arrow::Int32DataType
|
223
|
-
builder = Arrow::Int32ArrayBuilder.new
|
224
|
-
builder.append_value(value)
|
225
|
-
builder.finish
|
226
|
-
when Arrow::Int64DataType
|
227
|
-
builder = Arrow::Int64ArrayBuilder.new
|
228
|
-
builder.append_value(value)
|
229
|
-
builder.finish
|
230
|
-
when Arrow::UInt8DataType
|
231
|
-
builder = Arrow::UInt8ArrayBuilder.new
|
232
|
-
builder.append_value(value)
|
233
|
-
builder.finish
|
234
|
-
when Arrow::UInt16DataType
|
235
|
-
builder = Arrow::UInt16ArrayBuilder.new
|
236
|
-
builder.append_value(value)
|
237
|
-
builder.finish
|
238
|
-
when Arrow::UInt32DataType
|
239
|
-
builder = Arrow::UInt32ArrayBuilder.new
|
240
|
-
builder.append_value(value)
|
241
|
-
builder.finish
|
242
|
-
when Arrow::UInt64DataType
|
243
|
-
builder = Arrow::UInt64ArrayBuilder.new
|
244
|
-
builder.append_value(value)
|
245
|
-
builder.finish
|
246
|
-
when Arrow::FloatDataType
|
247
|
-
builder = Arrow::FloatArrayBuilder.new
|
248
|
-
builder.append_value(value)
|
249
|
-
builder.finish
|
250
|
-
when Arrow::DoubleDataType
|
251
|
-
builder = Arrow::DoubleArrayBuilder.new
|
252
|
-
builder.append_value(value)
|
253
|
-
builder.finish
|
254
|
-
when Arrow::StringDataType
|
255
|
-
builder = Arrow::StringArrayBuilder.new
|
256
|
-
builder.append_value(value)
|
257
|
-
builder.finish
|
258
|
-
when Arrow::LargeStringDataType
|
259
|
-
builder = Arrow::LargeStringArrayBuilder.new
|
260
|
-
builder.append_value(value)
|
261
|
-
builder.finish
|
262
|
-
when Arrow::BinaryDataType
|
263
|
-
builder = Arrow::BinaryArrayBuilder.new
|
264
|
-
builder.append_value(value)
|
265
|
-
builder.finish
|
266
|
-
when Arrow::LargeBinaryDataType
|
267
|
-
builder = Arrow::LargeBinaryArrayBuilder.new
|
268
|
-
builder.append_value(value)
|
269
|
-
builder.finish
|
270
|
-
when Arrow::BooleanDataType
|
271
|
-
builder = Arrow::BooleanArrayBuilder.new
|
272
|
-
builder.append_value(value)
|
273
|
-
builder.finish
|
274
|
-
when Arrow::TimestampDataType
|
275
|
-
builder = Arrow::TimestampArrayBuilder.new(data_type)
|
276
|
-
builder.append_value(value)
|
277
|
-
builder.finish
|
278
|
-
when Arrow::Date32DataType
|
279
|
-
builder = Arrow::Date32ArrayBuilder.new
|
280
|
-
builder.append_value(value)
|
281
|
-
builder.finish
|
282
|
-
else
|
283
|
-
raise "Unsupported arrow data type: #{data_type.class}"
|
284
|
-
end
|
285
|
-
end
|
286
|
-
end
|