chalk_ruby 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5078b236fa2532b078ba26509838b574bcb72c624ac4a27eb41f7fb34b9fb617
|
4
|
+
data.tar.gz: af01a9060816db57ff9198129a8d3ac2d0c7cce683e0ea59da1f9f6ed7ead09a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f0f5a7f1ca26cefac916b76bf8f83b77b94ecc64344985d7d5adf6fe77aaebc61b811b6c9f5cbc3dd1cc78dc7b1fe519a3d7b9e45656294c0bc021b42cafe9cf
|
7
|
+
data.tar.gz: 910e8c846f446f44bb556f34c430d4eec7e24482ff3e13f29ad6874c635f48f5539e0502c25dfb8a9d6d48227a538beedbcd598474d6bc3c059136746f9194b6
|
@@ -195,31 +195,7 @@ module ChalkRuby
|
|
195
195
|
output_data = nil
|
196
196
|
|
197
197
|
if (!response.scalars_data.nil?) and response.scalars_data.length > 0
|
198
|
-
|
199
|
-
|
200
|
-
# Create a buffer reader
|
201
|
-
buffer_reader = Arrow::BufferInputStream.new(buffer)
|
202
|
-
|
203
|
-
# Create an IPC reader from the buffer reader
|
204
|
-
reader = Arrow::FeatherFileReader.new(buffer_reader)
|
205
|
-
|
206
|
-
# Read the table
|
207
|
-
|
208
|
-
|
209
|
-
output_data = []
|
210
|
-
|
211
|
-
table = reader.read
|
212
|
-
|
213
|
-
|
214
|
-
field_names = table.schema.fields.map(&:name)
|
215
|
-
table.each_record do |r|
|
216
|
-
row = {}
|
217
|
-
field_names.each do |f|
|
218
|
-
row[f] = r[f]
|
219
|
-
end
|
220
|
-
|
221
|
-
output_data << row
|
222
|
-
end
|
198
|
+
output_data = arrow_table_to_array(response.scalars_data)
|
223
199
|
end
|
224
200
|
|
225
201
|
{
|
@@ -436,6 +412,47 @@ module ChalkRuby
|
|
436
412
|
|
437
413
|
private
|
438
414
|
|
415
|
+
# Converts Arrow binary data to an array of hashes
|
416
|
+
# @param arrow_data [String] Binary Arrow data (IPC stream format)
|
417
|
+
# @return [Array<Hash>] Array of hashes with column name as keys and Ruby values
|
418
|
+
def arrow_table_to_array(arrow_data)
|
419
|
+
require 'arrow'
|
420
|
+
|
421
|
+
buffer = Arrow::Buffer.new(arrow_data)
|
422
|
+
buffer_reader = Arrow::BufferInputStream.new(buffer)
|
423
|
+
|
424
|
+
# Try IPC stream format first (which is what we expect from the query service)
|
425
|
+
begin
|
426
|
+
reader = Arrow::RecordBatchStreamReader.new(buffer_reader)
|
427
|
+
table = reader.read_all
|
428
|
+
rescue => e
|
429
|
+
# Fall back to feather format for backward compatibility
|
430
|
+
# buffer_reader.rewind
|
431
|
+
begin
|
432
|
+
reader = Arrow::FeatherFileReader.new(buffer_reader)
|
433
|
+
table = reader.read
|
434
|
+
rescue => e2
|
435
|
+
raise "Failed to parse Arrow data: #{e.message}, #{e2.message}"
|
436
|
+
end
|
437
|
+
end
|
438
|
+
|
439
|
+
output_data = []
|
440
|
+
field_names = table.schema.fields.map(&:name)
|
441
|
+
|
442
|
+
table.each_record do |r|
|
443
|
+
row = {}
|
444
|
+
field_names.each do |f|
|
445
|
+
value = r[f]
|
446
|
+
# Convert GLib::Bytes to Ruby String for binary and large string types
|
447
|
+
value = value.to_s if value.is_a?(GLib::Bytes)
|
448
|
+
row[f] = value
|
449
|
+
end
|
450
|
+
output_data << row
|
451
|
+
end
|
452
|
+
|
453
|
+
output_data
|
454
|
+
end
|
455
|
+
|
439
456
|
def to_feather(input_hash)
|
440
457
|
require 'arrow'
|
441
458
|
|
data/lib/chalk_ruby/version.rb
CHANGED
@@ -0,0 +1,286 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'chalk_ruby/grpc_client'
|
3
|
+
require 'arrow'
|
4
|
+
|
5
|
+
class ArrowConversionTest < Minitest::Test
|
6
|
+
def setup
|
7
|
+
@client = create_client
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_arrow_string_conversion
|
11
|
+
assert_type_conversion('test_string', 'string', String)
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_arrow_large_string_conversion
|
15
|
+
test_string = 'test_large_string'
|
16
|
+
# For large string types, we might get a GLib::Bytes object, which can be converted to a string
|
17
|
+
result = assert_type_conversion(test_string, 'large_string', [String, GLib::Bytes])
|
18
|
+
# Check the actual content
|
19
|
+
actual_value = result.is_a?(GLib::Bytes) ? result.to_s : result
|
20
|
+
assert_equal test_string, actual_value
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_arrow_int8_conversion
|
24
|
+
assert_type_conversion(8, 'int8', Integer)
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_arrow_int16_conversion
|
28
|
+
assert_type_conversion(16, 'int16', Integer)
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_arrow_int32_conversion
|
32
|
+
assert_type_conversion(32, 'int32', Integer)
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_arrow_int64_conversion
|
36
|
+
assert_type_conversion(64, 'int64', Integer)
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_arrow_uint8_conversion
|
40
|
+
assert_type_conversion(8, 'uint8', Integer)
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_arrow_uint16_conversion
|
44
|
+
assert_type_conversion(16, 'uint16', Integer)
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_arrow_uint32_conversion
|
48
|
+
assert_type_conversion(32, 'uint32', Integer)
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_arrow_uint64_conversion
|
52
|
+
assert_type_conversion(64, 'uint64', Integer)
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_arrow_float_conversion
|
56
|
+
assert_type_conversion(3.14, 'float', Float)
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_arrow_double_conversion
|
60
|
+
assert_type_conversion(3.14159, 'double', Float)
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_arrow_boolean_conversion
|
64
|
+
assert_type_conversion(true, 'boolean', [TrueClass, FalseClass])
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_arrow_timestamp_conversion
|
68
|
+
# Create timestamp as a Ruby Time object
|
69
|
+
timestamp = Time.now
|
70
|
+
# Pass timestamp seconds since epoch to Arrow
|
71
|
+
arrow_timestamp = timestamp.to_i
|
72
|
+
# Check that it converts back to a Time-like object
|
73
|
+
result = assert_type_conversion(arrow_timestamp, 'timestamp[s]', Time)
|
74
|
+
# Check that the timestamp values are approximately equal
|
75
|
+
assert_in_delta timestamp.to_i, result.to_i, 1
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_arrow_date32_conversion
|
79
|
+
# Date represented as days since epoch
|
80
|
+
today = Date.today
|
81
|
+
days_since_epoch = (today - Date.new(1970, 1, 1)).to_i
|
82
|
+
result = assert_type_conversion(days_since_epoch, 'date32', Date)
|
83
|
+
assert_equal today, result
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_arrow_binary_conversion
|
87
|
+
binary_data = "\x01\x02\x03\x04"
|
88
|
+
# For binary types, we might get a GLib::Bytes object, which can be converted to a string
|
89
|
+
result = assert_type_conversion(binary_data, 'binary', [String, GLib::Bytes])
|
90
|
+
# Check the actual content
|
91
|
+
actual_value = result.is_a?(GLib::Bytes) ? result.to_s : result
|
92
|
+
assert_equal binary_data, actual_value
|
93
|
+
end
|
94
|
+
|
95
|
+
def test_arrow_large_binary_conversion
|
96
|
+
large_binary_data = "\x01\x02\x03\x04" * 10
|
97
|
+
# For binary types, we might get a GLib::Bytes object, which can be converted to a string
|
98
|
+
result = assert_type_conversion(large_binary_data, 'large_binary', [String, GLib::Bytes])
|
99
|
+
# Check the actual content
|
100
|
+
actual_value = result.is_a?(GLib::Bytes) ? result.to_s : result
|
101
|
+
assert_equal large_binary_data, actual_value
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_arrow_list_conversion
|
105
|
+
# Skip this test as it's difficult to create a list array with the current Arrow Ruby API
|
106
|
+
skip "Creating list arrays requires more complex approach with current Arrow Ruby API"
|
107
|
+
end
|
108
|
+
|
109
|
+
def test_arrow_struct_conversion
|
110
|
+
# Skip this test as it's difficult to create a struct array with the current Arrow Ruby API
|
111
|
+
skip "Creating struct arrays requires more complex approach with current Arrow Ruby API"
|
112
|
+
end
|
113
|
+
|
114
|
+
def test_arrow_null_conversion
|
115
|
+
# Test that null values are properly converted to nil
|
116
|
+
schema = Arrow::Schema.new([Arrow::Field.new("test_null", Arrow::Int32DataType.new)])
|
117
|
+
|
118
|
+
# Create array with a null value
|
119
|
+
builder = Arrow::Int32ArrayBuilder.new
|
120
|
+
builder.append_null
|
121
|
+
array = builder.finish
|
122
|
+
|
123
|
+
table = Arrow::Table.new(schema, [array])
|
124
|
+
|
125
|
+
# Serialize to Arrow IPC streaming format
|
126
|
+
buffer = Arrow::ResizableBuffer.new(0)
|
127
|
+
output = Arrow::BufferOutputStream.new(buffer)
|
128
|
+
writer = Arrow::RecordBatchStreamWriter.new(output, table.schema)
|
129
|
+
writer.write_table(table)
|
130
|
+
writer.close
|
131
|
+
output.close
|
132
|
+
arrow_data = buffer.data.to_s
|
133
|
+
|
134
|
+
result = @client.send(:arrow_table_to_array, arrow_data)
|
135
|
+
|
136
|
+
assert_instance_of Array, result
|
137
|
+
assert_equal 1, result.length
|
138
|
+
assert_nil result[0]["test_null"]
|
139
|
+
end
|
140
|
+
|
141
|
+
private
|
142
|
+
|
143
|
+
def create_client
|
144
|
+
config = ChalkRuby::Config.new(
|
145
|
+
client_id: 'test',
|
146
|
+
client_secret: 'test',
|
147
|
+
environment: 'test',
|
148
|
+
query_server: 'test'
|
149
|
+
)
|
150
|
+
ChalkRuby::GrpcClient.new(config)
|
151
|
+
end
|
152
|
+
|
153
|
+
def assert_type_conversion(value, arrow_type_name, expected_ruby_type)
|
154
|
+
# Create schema with the specified arrow type
|
155
|
+
data_type = create_arrow_data_type(arrow_type_name)
|
156
|
+
schema = Arrow::Schema.new([Arrow::Field.new("test_field", data_type)])
|
157
|
+
|
158
|
+
# Create array with the value
|
159
|
+
array = create_arrow_array(data_type, value)
|
160
|
+
|
161
|
+
# Create table with schema and array
|
162
|
+
table = Arrow::Table.new(schema, [array])
|
163
|
+
|
164
|
+
# Serialize table to Arrow IPC streaming format
|
165
|
+
buffer = Arrow::ResizableBuffer.new(0)
|
166
|
+
output = Arrow::BufferOutputStream.new(buffer)
|
167
|
+
writer = Arrow::RecordBatchStreamWriter.new(output, table.schema)
|
168
|
+
writer.write_table(table)
|
169
|
+
writer.close
|
170
|
+
output.close
|
171
|
+
arrow_data = buffer.data.to_s
|
172
|
+
|
173
|
+
# Convert arrow data to array of hashes
|
174
|
+
result = @client.send(:arrow_table_to_array, arrow_data)
|
175
|
+
|
176
|
+
# Check that the value was properly converted
|
177
|
+
assert_instance_of Array, result
|
178
|
+
assert_equal 1, result.length
|
179
|
+
|
180
|
+
expected_types = Array(expected_ruby_type)
|
181
|
+
assert expected_types.any? { |type| result[0]["test_field"].is_a?(type) },
|
182
|
+
"Expected #{result[0]["test_field"].inspect} to be a #{expected_ruby_type}, but was #{result[0]["test_field"].class}"
|
183
|
+
|
184
|
+
# Return the converted value for further assertions
|
185
|
+
result[0]["test_field"]
|
186
|
+
end
|
187
|
+
|
188
|
+
def create_arrow_data_type(type_name)
|
189
|
+
case type_name
|
190
|
+
when 'int8' then Arrow::Int8DataType.new
|
191
|
+
when 'int16' then Arrow::Int16DataType.new
|
192
|
+
when 'int32' then Arrow::Int32DataType.new
|
193
|
+
when 'int64' then Arrow::Int64DataType.new
|
194
|
+
when 'uint8' then Arrow::UInt8DataType.new
|
195
|
+
when 'uint16' then Arrow::UInt16DataType.new
|
196
|
+
when 'uint32' then Arrow::UInt32DataType.new
|
197
|
+
when 'uint64' then Arrow::UInt64DataType.new
|
198
|
+
when 'float' then Arrow::FloatDataType.new
|
199
|
+
when 'double' then Arrow::DoubleDataType.new
|
200
|
+
when 'string' then Arrow::StringDataType.new
|
201
|
+
when 'large_string' then Arrow::LargeStringDataType.new
|
202
|
+
when 'binary' then Arrow::BinaryDataType.new
|
203
|
+
when 'large_binary' then Arrow::LargeBinaryDataType.new
|
204
|
+
when 'boolean' then Arrow::BooleanDataType.new
|
205
|
+
when 'timestamp[s]' then Arrow::TimestampDataType.new(:second)
|
206
|
+
when 'date32' then Arrow::Date32DataType.new
|
207
|
+
else
|
208
|
+
raise "Unsupported arrow type: #{type_name}"
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
def create_arrow_array(data_type, value)
|
213
|
+
case data_type
|
214
|
+
when Arrow::Int8DataType
|
215
|
+
builder = Arrow::Int8ArrayBuilder.new
|
216
|
+
builder.append_value(value)
|
217
|
+
builder.finish
|
218
|
+
when Arrow::Int16DataType
|
219
|
+
builder = Arrow::Int16ArrayBuilder.new
|
220
|
+
builder.append_value(value)
|
221
|
+
builder.finish
|
222
|
+
when Arrow::Int32DataType
|
223
|
+
builder = Arrow::Int32ArrayBuilder.new
|
224
|
+
builder.append_value(value)
|
225
|
+
builder.finish
|
226
|
+
when Arrow::Int64DataType
|
227
|
+
builder = Arrow::Int64ArrayBuilder.new
|
228
|
+
builder.append_value(value)
|
229
|
+
builder.finish
|
230
|
+
when Arrow::UInt8DataType
|
231
|
+
builder = Arrow::UInt8ArrayBuilder.new
|
232
|
+
builder.append_value(value)
|
233
|
+
builder.finish
|
234
|
+
when Arrow::UInt16DataType
|
235
|
+
builder = Arrow::UInt16ArrayBuilder.new
|
236
|
+
builder.append_value(value)
|
237
|
+
builder.finish
|
238
|
+
when Arrow::UInt32DataType
|
239
|
+
builder = Arrow::UInt32ArrayBuilder.new
|
240
|
+
builder.append_value(value)
|
241
|
+
builder.finish
|
242
|
+
when Arrow::UInt64DataType
|
243
|
+
builder = Arrow::UInt64ArrayBuilder.new
|
244
|
+
builder.append_value(value)
|
245
|
+
builder.finish
|
246
|
+
when Arrow::FloatDataType
|
247
|
+
builder = Arrow::FloatArrayBuilder.new
|
248
|
+
builder.append_value(value)
|
249
|
+
builder.finish
|
250
|
+
when Arrow::DoubleDataType
|
251
|
+
builder = Arrow::DoubleArrayBuilder.new
|
252
|
+
builder.append_value(value)
|
253
|
+
builder.finish
|
254
|
+
when Arrow::StringDataType
|
255
|
+
builder = Arrow::StringArrayBuilder.new
|
256
|
+
builder.append_value(value)
|
257
|
+
builder.finish
|
258
|
+
when Arrow::LargeStringDataType
|
259
|
+
builder = Arrow::LargeStringArrayBuilder.new
|
260
|
+
builder.append_value(value)
|
261
|
+
builder.finish
|
262
|
+
when Arrow::BinaryDataType
|
263
|
+
builder = Arrow::BinaryArrayBuilder.new
|
264
|
+
builder.append_value(value)
|
265
|
+
builder.finish
|
266
|
+
when Arrow::LargeBinaryDataType
|
267
|
+
builder = Arrow::LargeBinaryArrayBuilder.new
|
268
|
+
builder.append_value(value)
|
269
|
+
builder.finish
|
270
|
+
when Arrow::BooleanDataType
|
271
|
+
builder = Arrow::BooleanArrayBuilder.new
|
272
|
+
builder.append_value(value)
|
273
|
+
builder.finish
|
274
|
+
when Arrow::TimestampDataType
|
275
|
+
builder = Arrow::TimestampArrayBuilder.new(data_type)
|
276
|
+
builder.append_value(value)
|
277
|
+
builder.finish
|
278
|
+
when Arrow::Date32DataType
|
279
|
+
builder = Arrow::Date32ArrayBuilder.new
|
280
|
+
builder.append_value(value)
|
281
|
+
builder.finish
|
282
|
+
else
|
283
|
+
raise "Unsupported arrow data type: #{data_type.class}"
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
@@ -1,18 +1,51 @@
|
|
1
|
-
|
2
|
-
require '
|
1
|
+
$LOAD_PATH.unshift File.expand_path('../../../../lib', __FILE__)
|
2
|
+
# require 'rspec'
|
3
|
+
# require 'chalk_ruby'
|
4
|
+
# require 'chalk_ruby'
|
5
|
+
|
6
|
+
require 'date'
|
7
|
+
require 'rspec/autorun'
|
8
|
+
require 'chalk_ruby/grpc_client'
|
9
|
+
require 'chalk_ruby/grpc/auth_interceptor'
|
10
|
+
require 'chalk_ruby/error'
|
11
|
+
require 'chalk_ruby/protos/chalk/server/v1/auth_pb'
|
12
|
+
require 'chalk_ruby/protos/chalk/server/v1/auth_services_pb'
|
13
|
+
require 'chalk_ruby/protos/chalk/engine/v1/query_server_services_pb'
|
14
|
+
require 'arrow'
|
15
|
+
|
16
|
+
|
17
|
+
|
3
18
|
|
4
19
|
RSpec.describe ChalkRuby::GrpcClient do
|
5
|
-
describe '#
|
20
|
+
describe '#query' do
|
6
21
|
let(:client) do
|
7
|
-
ChalkRuby::GrpcClient.
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
22
|
+
ChalkRuby::GrpcClient.new(
|
23
|
+
ChalkRuby::Config.new(
|
24
|
+
query_server: "standard-gke.chalk-develop.gcp.chalk.ai",
|
25
|
+
api_server: "api.staging.chalk.ai:443",
|
26
|
+
client_id: CLIENT_ID,
|
27
|
+
client_secret: CLIENT_SECRET,
|
28
|
+
environment: "tmnmc9beyujew",
|
29
|
+
# api_timeout: 0.6, # seconds
|
30
|
+
# connect_timeout: 0.3, # seconds
|
31
|
+
# query_service_root_ca_path: "/Users/andrew/found_ca.pem" # path to the root ca for chalkai.internal.found.app,
|
32
|
+
)
|
13
33
|
)
|
14
34
|
end
|
15
35
|
|
36
|
+
# it 'can perform queries' do
|
37
|
+
# response = client.query(
|
38
|
+
# input: { 'business.id': 1 },
|
39
|
+
# output: %w(business.id)
|
40
|
+
# )
|
41
|
+
#
|
42
|
+
# expect(response).not_to be_nil
|
43
|
+
#
|
44
|
+
# puts response
|
45
|
+
# # The response should be a OnlineQueryBulkResponse
|
46
|
+
# # expect(response).to be_a(Chalk::Common::V1::OnlineQueryBulkResponse)
|
47
|
+
# end
|
48
|
+
|
16
49
|
it 'can perform bulk queries' do
|
17
50
|
response = client.query_bulk(
|
18
51
|
input: { 'user.id': 1 },
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chalk_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chalk AI, Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-05-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -362,6 +362,7 @@ files:
|
|
362
362
|
- sig/chalk_ruby/http/response.rbs
|
363
363
|
- sig/chalk_ruby/token.rbs
|
364
364
|
- sig/chalk_ruby/versions.rbs
|
365
|
+
- test/chalk_ruby/integration/arrow_conversion_test.rb
|
365
366
|
- test/chalk_ruby/integration/client_test.rb
|
366
367
|
- test/chalk_ruby/integration/grpc_client_test.rb
|
367
368
|
- test/chalk_ruby/test_helper.rb
|
@@ -392,6 +393,7 @@ signing_key:
|
|
392
393
|
specification_version: 4
|
393
394
|
summary: A simple Ruby client for Chalk
|
394
395
|
test_files:
|
396
|
+
- test/chalk_ruby/integration/arrow_conversion_test.rb
|
395
397
|
- test/chalk_ruby/integration/client_test.rb
|
396
398
|
- test/chalk_ruby/integration/grpc_client_test.rb
|
397
399
|
- test/chalk_ruby/test_helper.rb
|