chalk_ruby 0.2.6 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bd2402b4edf2bce128c0f86f9b81ff089bed5a2cb9564fc5c1b30d4290b2d062
|
4
|
+
data.tar.gz: 38380294054c04a7743adf9b2e2080dcba1d73e9a8cb9f1c1ddca032bbb46883
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 99e17dd176aab3cbec5b2c65f1b1e44364978e5f2451f631c0b0c93832078cf73519c2a04982e3cb31ef69b93209550a9610c20326d3120ff361cdd6cedff754
|
7
|
+
data.tar.gz: 0db6e82923b3aaa2ced3401d5a40988b2a98b79ababf13ec50ab228234ab08ebe7c723015f86a8a1438488c5db3627bced34a4ed3bcff5a82ed1aa23a67950a6
|
@@ -154,8 +154,11 @@ module ChalkRuby
|
|
154
154
|
context: nil,
|
155
155
|
response_options: nil,
|
156
156
|
body_type: nil,
|
157
|
-
timeout: nil
|
158
|
-
|
157
|
+
timeout: nil,
|
158
|
+
query_name: nil,
|
159
|
+
query_name_version: nil,
|
160
|
+
correlation_id: nil
|
161
|
+
)
|
159
162
|
# Convert input to feather format
|
160
163
|
inputs_feather = to_feather(input)
|
161
164
|
|
@@ -164,7 +167,7 @@ module ChalkRuby
|
|
164
167
|
inputs_feather: inputs_feather,
|
165
168
|
outputs: output.map { |o| Chalk::Common::V1::OutputExpr.new(feature_fqn: o) },
|
166
169
|
staleness: staleness || {},
|
167
|
-
context: context || Chalk::Common::V1::OnlineQueryContext.new,
|
170
|
+
context: context || Chalk::Common::V1::OnlineQueryContext.new(query_name: query_name, query_name_version: query_name_version, correlation_id: correlation_id),
|
168
171
|
response_options: response_options || Chalk::Common::V1::OnlineQueryResponseOptions.new,
|
169
172
|
body_type: body_type || :FEATHER_BODY_TYPE_UNSPECIFIED
|
170
173
|
)
|
@@ -178,31 +181,7 @@ module ChalkRuby
|
|
178
181
|
output_data = nil
|
179
182
|
|
180
183
|
if (!response.scalars_data.nil?) and response.scalars_data.length > 0
|
181
|
-
|
182
|
-
|
183
|
-
# Create a buffer reader
|
184
|
-
buffer_reader = Arrow::BufferInputStream.new(buffer)
|
185
|
-
|
186
|
-
# Create an IPC reader from the buffer reader
|
187
|
-
reader = Arrow::FeatherFileReader.new(buffer_reader)
|
188
|
-
|
189
|
-
# Read the table
|
190
|
-
|
191
|
-
|
192
|
-
output_data = []
|
193
|
-
|
194
|
-
table = reader.read
|
195
|
-
|
196
|
-
|
197
|
-
field_names = table.schema.fields.map(&:name)
|
198
|
-
table.each_record do |r|
|
199
|
-
row = {}
|
200
|
-
field_names.each do |f|
|
201
|
-
row[f] = r[f]
|
202
|
-
end
|
203
|
-
|
204
|
-
output_data << row
|
205
|
-
end
|
184
|
+
output_data = arrow_table_to_array(response.scalars_data)
|
206
185
|
end
|
207
186
|
|
208
187
|
{
|
@@ -416,6 +395,47 @@ module ChalkRuby
|
|
416
395
|
|
417
396
|
private
|
418
397
|
|
398
|
+
# Converts Arrow binary data to an array of hashes
|
399
|
+
# @param arrow_data [String] Binary Arrow data (IPC stream format)
|
400
|
+
# @return [Array<Hash>] Array of hashes with column name as keys and Ruby values
|
401
|
+
def arrow_table_to_array(arrow_data)
|
402
|
+
require 'arrow'
|
403
|
+
|
404
|
+
buffer = Arrow::Buffer.new(arrow_data)
|
405
|
+
buffer_reader = Arrow::BufferInputStream.new(buffer)
|
406
|
+
|
407
|
+
# Try IPC stream format first (which is what we expect from the query service)
|
408
|
+
begin
|
409
|
+
reader = Arrow::RecordBatchStreamReader.new(buffer_reader)
|
410
|
+
table = reader.read_all
|
411
|
+
rescue => e
|
412
|
+
# Fall back to feather format for backward compatibility
|
413
|
+
# buffer_reader.rewind
|
414
|
+
begin
|
415
|
+
reader = Arrow::FeatherFileReader.new(buffer_reader)
|
416
|
+
table = reader.read
|
417
|
+
rescue => e2
|
418
|
+
raise "Failed to parse Arrow data: #{e.message}, #{e2.message}"
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
output_data = []
|
423
|
+
field_names = table.schema.fields.map(&:name)
|
424
|
+
|
425
|
+
table.each_record do |r|
|
426
|
+
row = {}
|
427
|
+
field_names.each do |f|
|
428
|
+
value = r[f]
|
429
|
+
# Convert GLib::Bytes to Ruby String for binary and large string types
|
430
|
+
value = value.to_s if value.is_a?(GLib::Bytes)
|
431
|
+
row[f] = value
|
432
|
+
end
|
433
|
+
output_data << row
|
434
|
+
end
|
435
|
+
|
436
|
+
output_data
|
437
|
+
end
|
438
|
+
|
419
439
|
def to_feather(input_hash)
|
420
440
|
require 'arrow'
|
421
441
|
|
data/lib/chalk_ruby/version.rb
CHANGED
@@ -0,0 +1,286 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'chalk_ruby/grpc_client'
|
3
|
+
require 'arrow'
|
4
|
+
|
5
|
+
class ArrowConversionTest < Minitest::Test
|
6
|
+
def setup
|
7
|
+
@client = create_client
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_arrow_string_conversion
|
11
|
+
assert_type_conversion('test_string', 'string', String)
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_arrow_large_string_conversion
|
15
|
+
test_string = 'test_large_string'
|
16
|
+
# For large string types, we might get a GLib::Bytes object, which can be converted to a string
|
17
|
+
result = assert_type_conversion(test_string, 'large_string', [String, GLib::Bytes])
|
18
|
+
# Check the actual content
|
19
|
+
actual_value = result.is_a?(GLib::Bytes) ? result.to_s : result
|
20
|
+
assert_equal test_string, actual_value
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_arrow_int8_conversion
|
24
|
+
assert_type_conversion(8, 'int8', Integer)
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_arrow_int16_conversion
|
28
|
+
assert_type_conversion(16, 'int16', Integer)
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_arrow_int32_conversion
|
32
|
+
assert_type_conversion(32, 'int32', Integer)
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_arrow_int64_conversion
|
36
|
+
assert_type_conversion(64, 'int64', Integer)
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_arrow_uint8_conversion
|
40
|
+
assert_type_conversion(8, 'uint8', Integer)
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_arrow_uint16_conversion
|
44
|
+
assert_type_conversion(16, 'uint16', Integer)
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_arrow_uint32_conversion
|
48
|
+
assert_type_conversion(32, 'uint32', Integer)
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_arrow_uint64_conversion
|
52
|
+
assert_type_conversion(64, 'uint64', Integer)
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_arrow_float_conversion
|
56
|
+
assert_type_conversion(3.14, 'float', Float)
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_arrow_double_conversion
|
60
|
+
assert_type_conversion(3.14159, 'double', Float)
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_arrow_boolean_conversion
|
64
|
+
assert_type_conversion(true, 'boolean', [TrueClass, FalseClass])
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_arrow_timestamp_conversion
|
68
|
+
# Create timestamp as a Ruby Time object
|
69
|
+
timestamp = Time.now
|
70
|
+
# Pass timestamp seconds since epoch to Arrow
|
71
|
+
arrow_timestamp = timestamp.to_i
|
72
|
+
# Check that it converts back to a Time-like object
|
73
|
+
result = assert_type_conversion(arrow_timestamp, 'timestamp[s]', Time)
|
74
|
+
# Check that the timestamp values are approximately equal
|
75
|
+
assert_in_delta timestamp.to_i, result.to_i, 1
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_arrow_date32_conversion
|
79
|
+
# Date represented as days since epoch
|
80
|
+
today = Date.today
|
81
|
+
days_since_epoch = (today - Date.new(1970, 1, 1)).to_i
|
82
|
+
result = assert_type_conversion(days_since_epoch, 'date32', Date)
|
83
|
+
assert_equal today, result
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_arrow_binary_conversion
|
87
|
+
binary_data = "\x01\x02\x03\x04"
|
88
|
+
# For binary types, we might get a GLib::Bytes object, which can be converted to a string
|
89
|
+
result = assert_type_conversion(binary_data, 'binary', [String, GLib::Bytes])
|
90
|
+
# Check the actual content
|
91
|
+
actual_value = result.is_a?(GLib::Bytes) ? result.to_s : result
|
92
|
+
assert_equal binary_data, actual_value
|
93
|
+
end
|
94
|
+
|
95
|
+
def test_arrow_large_binary_conversion
|
96
|
+
large_binary_data = "\x01\x02\x03\x04" * 10
|
97
|
+
# For binary types, we might get a GLib::Bytes object, which can be converted to a string
|
98
|
+
result = assert_type_conversion(large_binary_data, 'large_binary', [String, GLib::Bytes])
|
99
|
+
# Check the actual content
|
100
|
+
actual_value = result.is_a?(GLib::Bytes) ? result.to_s : result
|
101
|
+
assert_equal large_binary_data, actual_value
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_arrow_list_conversion
|
105
|
+
# Skip this test as it's difficult to create a list array with the current Arrow Ruby API
|
106
|
+
skip "Creating list arrays requires more complex approach with current Arrow Ruby API"
|
107
|
+
end
|
108
|
+
|
109
|
+
def test_arrow_struct_conversion
|
110
|
+
# Skip this test as it's difficult to create a struct array with the current Arrow Ruby API
|
111
|
+
skip "Creating struct arrays requires more complex approach with current Arrow Ruby API"
|
112
|
+
end
|
113
|
+
|
114
|
+
def test_arrow_null_conversion
|
115
|
+
# Test that null values are properly converted to nil
|
116
|
+
schema = Arrow::Schema.new([Arrow::Field.new("test_null", Arrow::Int32DataType.new)])
|
117
|
+
|
118
|
+
# Create array with a null value
|
119
|
+
builder = Arrow::Int32ArrayBuilder.new
|
120
|
+
builder.append_null
|
121
|
+
array = builder.finish
|
122
|
+
|
123
|
+
table = Arrow::Table.new(schema, [array])
|
124
|
+
|
125
|
+
# Serialize to Arrow IPC streaming format
|
126
|
+
buffer = Arrow::ResizableBuffer.new(0)
|
127
|
+
output = Arrow::BufferOutputStream.new(buffer)
|
128
|
+
writer = Arrow::RecordBatchStreamWriter.new(output, table.schema)
|
129
|
+
writer.write_table(table)
|
130
|
+
writer.close
|
131
|
+
output.close
|
132
|
+
arrow_data = buffer.data.to_s
|
133
|
+
|
134
|
+
result = @client.send(:arrow_table_to_array, arrow_data)
|
135
|
+
|
136
|
+
assert_instance_of Array, result
|
137
|
+
assert_equal 1, result.length
|
138
|
+
assert_nil result[0]["test_null"]
|
139
|
+
end
|
140
|
+
|
141
|
+
private
|
142
|
+
|
143
|
+
def create_client
|
144
|
+
config = ChalkRuby::Config.new(
|
145
|
+
client_id: 'test',
|
146
|
+
client_secret: 'test',
|
147
|
+
environment: 'test',
|
148
|
+
query_server: 'test'
|
149
|
+
)
|
150
|
+
ChalkRuby::GrpcClient.new(config)
|
151
|
+
end
|
152
|
+
|
153
|
+
def assert_type_conversion(value, arrow_type_name, expected_ruby_type)
|
154
|
+
# Create schema with the specified arrow type
|
155
|
+
data_type = create_arrow_data_type(arrow_type_name)
|
156
|
+
schema = Arrow::Schema.new([Arrow::Field.new("test_field", data_type)])
|
157
|
+
|
158
|
+
# Create array with the value
|
159
|
+
array = create_arrow_array(data_type, value)
|
160
|
+
|
161
|
+
# Create table with schema and array
|
162
|
+
table = Arrow::Table.new(schema, [array])
|
163
|
+
|
164
|
+
# Serialize table to Arrow IPC streaming format
|
165
|
+
buffer = Arrow::ResizableBuffer.new(0)
|
166
|
+
output = Arrow::BufferOutputStream.new(buffer)
|
167
|
+
writer = Arrow::RecordBatchStreamWriter.new(output, table.schema)
|
168
|
+
writer.write_table(table)
|
169
|
+
writer.close
|
170
|
+
output.close
|
171
|
+
arrow_data = buffer.data.to_s
|
172
|
+
|
173
|
+
# Convert arrow data to array of hashes
|
174
|
+
result = @client.send(:arrow_table_to_array, arrow_data)
|
175
|
+
|
176
|
+
# Check that the value was properly converted
|
177
|
+
assert_instance_of Array, result
|
178
|
+
assert_equal 1, result.length
|
179
|
+
|
180
|
+
expected_types = Array(expected_ruby_type)
|
181
|
+
assert expected_types.any? { |type| result[0]["test_field"].is_a?(type) },
|
182
|
+
"Expected #{result[0]["test_field"].inspect} to be a #{expected_ruby_type}, but was #{result[0]["test_field"].class}"
|
183
|
+
|
184
|
+
# Return the converted value for further assertions
|
185
|
+
result[0]["test_field"]
|
186
|
+
end
|
187
|
+
|
188
|
+
def create_arrow_data_type(type_name)
|
189
|
+
case type_name
|
190
|
+
when 'int8' then Arrow::Int8DataType.new
|
191
|
+
when 'int16' then Arrow::Int16DataType.new
|
192
|
+
when 'int32' then Arrow::Int32DataType.new
|
193
|
+
when 'int64' then Arrow::Int64DataType.new
|
194
|
+
when 'uint8' then Arrow::UInt8DataType.new
|
195
|
+
when 'uint16' then Arrow::UInt16DataType.new
|
196
|
+
when 'uint32' then Arrow::UInt32DataType.new
|
197
|
+
when 'uint64' then Arrow::UInt64DataType.new
|
198
|
+
when 'float' then Arrow::FloatDataType.new
|
199
|
+
when 'double' then Arrow::DoubleDataType.new
|
200
|
+
when 'string' then Arrow::StringDataType.new
|
201
|
+
when 'large_string' then Arrow::LargeStringDataType.new
|
202
|
+
when 'binary' then Arrow::BinaryDataType.new
|
203
|
+
when 'large_binary' then Arrow::LargeBinaryDataType.new
|
204
|
+
when 'boolean' then Arrow::BooleanDataType.new
|
205
|
+
when 'timestamp[s]' then Arrow::TimestampDataType.new(:second)
|
206
|
+
when 'date32' then Arrow::Date32DataType.new
|
207
|
+
else
|
208
|
+
raise "Unsupported arrow type: #{type_name}"
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
def create_arrow_array(data_type, value)
|
213
|
+
case data_type
|
214
|
+
when Arrow::Int8DataType
|
215
|
+
builder = Arrow::Int8ArrayBuilder.new
|
216
|
+
builder.append_value(value)
|
217
|
+
builder.finish
|
218
|
+
when Arrow::Int16DataType
|
219
|
+
builder = Arrow::Int16ArrayBuilder.new
|
220
|
+
builder.append_value(value)
|
221
|
+
builder.finish
|
222
|
+
when Arrow::Int32DataType
|
223
|
+
builder = Arrow::Int32ArrayBuilder.new
|
224
|
+
builder.append_value(value)
|
225
|
+
builder.finish
|
226
|
+
when Arrow::Int64DataType
|
227
|
+
builder = Arrow::Int64ArrayBuilder.new
|
228
|
+
builder.append_value(value)
|
229
|
+
builder.finish
|
230
|
+
when Arrow::UInt8DataType
|
231
|
+
builder = Arrow::UInt8ArrayBuilder.new
|
232
|
+
builder.append_value(value)
|
233
|
+
builder.finish
|
234
|
+
when Arrow::UInt16DataType
|
235
|
+
builder = Arrow::UInt16ArrayBuilder.new
|
236
|
+
builder.append_value(value)
|
237
|
+
builder.finish
|
238
|
+
when Arrow::UInt32DataType
|
239
|
+
builder = Arrow::UInt32ArrayBuilder.new
|
240
|
+
builder.append_value(value)
|
241
|
+
builder.finish
|
242
|
+
when Arrow::UInt64DataType
|
243
|
+
builder = Arrow::UInt64ArrayBuilder.new
|
244
|
+
builder.append_value(value)
|
245
|
+
builder.finish
|
246
|
+
when Arrow::FloatDataType
|
247
|
+
builder = Arrow::FloatArrayBuilder.new
|
248
|
+
builder.append_value(value)
|
249
|
+
builder.finish
|
250
|
+
when Arrow::DoubleDataType
|
251
|
+
builder = Arrow::DoubleArrayBuilder.new
|
252
|
+
builder.append_value(value)
|
253
|
+
builder.finish
|
254
|
+
when Arrow::StringDataType
|
255
|
+
builder = Arrow::StringArrayBuilder.new
|
256
|
+
builder.append_value(value)
|
257
|
+
builder.finish
|
258
|
+
when Arrow::LargeStringDataType
|
259
|
+
builder = Arrow::LargeStringArrayBuilder.new
|
260
|
+
builder.append_value(value)
|
261
|
+
builder.finish
|
262
|
+
when Arrow::BinaryDataType
|
263
|
+
builder = Arrow::BinaryArrayBuilder.new
|
264
|
+
builder.append_value(value)
|
265
|
+
builder.finish
|
266
|
+
when Arrow::LargeBinaryDataType
|
267
|
+
builder = Arrow::LargeBinaryArrayBuilder.new
|
268
|
+
builder.append_value(value)
|
269
|
+
builder.finish
|
270
|
+
when Arrow::BooleanDataType
|
271
|
+
builder = Arrow::BooleanArrayBuilder.new
|
272
|
+
builder.append_value(value)
|
273
|
+
builder.finish
|
274
|
+
when Arrow::TimestampDataType
|
275
|
+
builder = Arrow::TimestampArrayBuilder.new(data_type)
|
276
|
+
builder.append_value(value)
|
277
|
+
builder.finish
|
278
|
+
when Arrow::Date32DataType
|
279
|
+
builder = Arrow::Date32ArrayBuilder.new
|
280
|
+
builder.append_value(value)
|
281
|
+
builder.finish
|
282
|
+
else
|
283
|
+
raise "Unsupported arrow data type: #{data_type.class}"
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
@@ -3,8 +3,8 @@ require 'rspec/autorun'
|
|
3
3
|
require 'chalk_ruby/client'
|
4
4
|
require 'chalk_ruby/error'
|
5
5
|
|
6
|
-
CLIENT_ID = ''
|
7
|
-
CLIENT_SECRET = ''
|
6
|
+
CLIENT_ID = 'client-095f628e339a593e12a58559f5f8cd00'
|
7
|
+
CLIENT_SECRET = 'secret-55601b885bcc0d558c4abede69480aaac1dba6e1679889fa1d5196ba04f7af89'
|
8
8
|
|
9
9
|
RSpec.describe 'Online query' do
|
10
10
|
it 'should accept valid queries' do
|
@@ -14,6 +14,8 @@ require 'chalk_ruby/protos/chalk/engine/v1/query_server_services_pb'
|
|
14
14
|
require 'arrow'
|
15
15
|
|
16
16
|
|
17
|
+
CLIENT_ID = 'client-095f628e339a593e12a58559f5f8cd00'
|
18
|
+
CLIENT_SECRET = 'secret-55601b885bcc0d558c4abede69480aaac1dba6e1679889fa1d5196ba04f7af89'
|
17
19
|
|
18
20
|
|
19
21
|
RSpec.describe ChalkRuby::GrpcClient do
|
@@ -49,7 +51,7 @@ RSpec.describe ChalkRuby::GrpcClient do
|
|
49
51
|
it 'can perform bulk queries' do
|
50
52
|
response = client.query_bulk(
|
51
53
|
input: { 'user.id': 1 },
|
52
|
-
output: %w(user.id user.socure_score)
|
54
|
+
output: %w(user.id user.socure_score user.full_name)
|
53
55
|
)
|
54
56
|
|
55
57
|
expect(response).not_to be_nil
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chalk_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chalk AI, Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-03-
|
11
|
+
date: 2025-03-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -361,6 +361,7 @@ files:
|
|
361
361
|
- sig/chalk_ruby/http/response.rbs
|
362
362
|
- sig/chalk_ruby/token.rbs
|
363
363
|
- sig/chalk_ruby/versions.rbs
|
364
|
+
- test/chalk_ruby/integration/arrow_conversion_test.rb
|
364
365
|
- test/chalk_ruby/integration/client_test.rb
|
365
366
|
- test/chalk_ruby/integration/grpc_client_test.rb
|
366
367
|
- test/chalk_ruby/test_helper.rb
|
@@ -391,6 +392,7 @@ signing_key:
|
|
391
392
|
specification_version: 4
|
392
393
|
summary: A simple Ruby client for Chalk
|
393
394
|
test_files:
|
395
|
+
- test/chalk_ruby/integration/arrow_conversion_test.rb
|
394
396
|
- test/chalk_ruby/integration/client_test.rb
|
395
397
|
- test/chalk_ruby/integration/grpc_client_test.rb
|
396
398
|
- test/chalk_ruby/test_helper.rb
|