chalk_ruby 0.2.7 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bd2402b4edf2bce128c0f86f9b81ff089bed5a2cb9564fc5c1b30d4290b2d062
|
4
|
+
data.tar.gz: 38380294054c04a7743adf9b2e2080dcba1d73e9a8cb9f1c1ddca032bbb46883
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 99e17dd176aab3cbec5b2c65f1b1e44364978e5f2451f631c0b0c93832078cf73519c2a04982e3cb31ef69b93209550a9610c20326d3120ff361cdd6cedff754
|
7
|
+
data.tar.gz: 0db6e82923b3aaa2ced3401d5a40988b2a98b79ababf13ec50ab228234ab08ebe7c723015f86a8a1438488c5db3627bced34a4ed3bcff5a82ed1aa23a67950a6
|
@@ -181,31 +181,7 @@ module ChalkRuby
|
|
181
181
|
output_data = nil
|
182
182
|
|
183
183
|
if (!response.scalars_data.nil?) and response.scalars_data.length > 0
|
184
|
-
|
185
|
-
|
186
|
-
# Create a buffer reader
|
187
|
-
buffer_reader = Arrow::BufferInputStream.new(buffer)
|
188
|
-
|
189
|
-
# Create an IPC reader from the buffer reader
|
190
|
-
reader = Arrow::FeatherFileReader.new(buffer_reader)
|
191
|
-
|
192
|
-
# Read the table
|
193
|
-
|
194
|
-
|
195
|
-
output_data = []
|
196
|
-
|
197
|
-
table = reader.read
|
198
|
-
|
199
|
-
|
200
|
-
field_names = table.schema.fields.map(&:name)
|
201
|
-
table.each_record do |r|
|
202
|
-
row = {}
|
203
|
-
field_names.each do |f|
|
204
|
-
row[f] = r[f]
|
205
|
-
end
|
206
|
-
|
207
|
-
output_data << row
|
208
|
-
end
|
184
|
+
output_data = arrow_table_to_array(response.scalars_data)
|
209
185
|
end
|
210
186
|
|
211
187
|
{
|
@@ -419,6 +395,47 @@ module ChalkRuby
|
|
419
395
|
|
420
396
|
private
|
421
397
|
|
398
|
+
# Converts Arrow binary data to an array of hashes
|
399
|
+
# @param arrow_data [String] Binary Arrow data (IPC stream format)
|
400
|
+
# @return [Array<Hash>] Array of hashes with column name as keys and Ruby values
|
401
|
+
def arrow_table_to_array(arrow_data)
|
402
|
+
require 'arrow'
|
403
|
+
|
404
|
+
buffer = Arrow::Buffer.new(arrow_data)
|
405
|
+
buffer_reader = Arrow::BufferInputStream.new(buffer)
|
406
|
+
|
407
|
+
# Try IPC stream format first (which is what we expect from the query service)
|
408
|
+
begin
|
409
|
+
reader = Arrow::RecordBatchStreamReader.new(buffer_reader)
|
410
|
+
table = reader.read_all
|
411
|
+
rescue => e
|
412
|
+
# Fall back to feather format for backward compatibility
|
413
|
+
# buffer_reader.rewind
|
414
|
+
begin
|
415
|
+
reader = Arrow::FeatherFileReader.new(buffer_reader)
|
416
|
+
table = reader.read
|
417
|
+
rescue => e2
|
418
|
+
raise "Failed to parse Arrow data: #{e.message}, #{e2.message}"
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
output_data = []
|
423
|
+
field_names = table.schema.fields.map(&:name)
|
424
|
+
|
425
|
+
table.each_record do |r|
|
426
|
+
row = {}
|
427
|
+
field_names.each do |f|
|
428
|
+
value = r[f]
|
429
|
+
# Convert GLib::Bytes to Ruby String for binary and large string types
|
430
|
+
value = value.to_s if value.is_a?(GLib::Bytes)
|
431
|
+
row[f] = value
|
432
|
+
end
|
433
|
+
output_data << row
|
434
|
+
end
|
435
|
+
|
436
|
+
output_data
|
437
|
+
end
|
438
|
+
|
422
439
|
def to_feather(input_hash)
|
423
440
|
require 'arrow'
|
424
441
|
|
data/lib/chalk_ruby/version.rb
CHANGED
@@ -0,0 +1,286 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'chalk_ruby/grpc_client'
|
3
|
+
require 'arrow'
|
4
|
+
|
5
|
+
class ArrowConversionTest < Minitest::Test
|
6
|
+
def setup
|
7
|
+
@client = create_client
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_arrow_string_conversion
|
11
|
+
assert_type_conversion('test_string', 'string', String)
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_arrow_large_string_conversion
|
15
|
+
test_string = 'test_large_string'
|
16
|
+
# For large string types, we might get a GLib::Bytes object, which can be converted to a string
|
17
|
+
result = assert_type_conversion(test_string, 'large_string', [String, GLib::Bytes])
|
18
|
+
# Check the actual content
|
19
|
+
actual_value = result.is_a?(GLib::Bytes) ? result.to_s : result
|
20
|
+
assert_equal test_string, actual_value
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_arrow_int8_conversion
|
24
|
+
assert_type_conversion(8, 'int8', Integer)
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_arrow_int16_conversion
|
28
|
+
assert_type_conversion(16, 'int16', Integer)
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_arrow_int32_conversion
|
32
|
+
assert_type_conversion(32, 'int32', Integer)
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_arrow_int64_conversion
|
36
|
+
assert_type_conversion(64, 'int64', Integer)
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_arrow_uint8_conversion
|
40
|
+
assert_type_conversion(8, 'uint8', Integer)
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_arrow_uint16_conversion
|
44
|
+
assert_type_conversion(16, 'uint16', Integer)
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_arrow_uint32_conversion
|
48
|
+
assert_type_conversion(32, 'uint32', Integer)
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_arrow_uint64_conversion
|
52
|
+
assert_type_conversion(64, 'uint64', Integer)
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_arrow_float_conversion
|
56
|
+
assert_type_conversion(3.14, 'float', Float)
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_arrow_double_conversion
|
60
|
+
assert_type_conversion(3.14159, 'double', Float)
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_arrow_boolean_conversion
|
64
|
+
assert_type_conversion(true, 'boolean', [TrueClass, FalseClass])
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_arrow_timestamp_conversion
|
68
|
+
# Create timestamp as a Ruby Time object
|
69
|
+
timestamp = Time.now
|
70
|
+
# Pass timestamp seconds since epoch to Arrow
|
71
|
+
arrow_timestamp = timestamp.to_i
|
72
|
+
# Check that it converts back to a Time-like object
|
73
|
+
result = assert_type_conversion(arrow_timestamp, 'timestamp[s]', Time)
|
74
|
+
# Check that the timestamp values are approximately equal
|
75
|
+
assert_in_delta timestamp.to_i, result.to_i, 1
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_arrow_date32_conversion
|
79
|
+
# Date represented as days since epoch
|
80
|
+
today = Date.today
|
81
|
+
days_since_epoch = (today - Date.new(1970, 1, 1)).to_i
|
82
|
+
result = assert_type_conversion(days_since_epoch, 'date32', Date)
|
83
|
+
assert_equal today, result
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_arrow_binary_conversion
|
87
|
+
binary_data = "\x01\x02\x03\x04"
|
88
|
+
# For binary types, we might get a GLib::Bytes object, which can be converted to a string
|
89
|
+
result = assert_type_conversion(binary_data, 'binary', [String, GLib::Bytes])
|
90
|
+
# Check the actual content
|
91
|
+
actual_value = result.is_a?(GLib::Bytes) ? result.to_s : result
|
92
|
+
assert_equal binary_data, actual_value
|
93
|
+
end
|
94
|
+
|
95
|
+
def test_arrow_large_binary_conversion
|
96
|
+
large_binary_data = "\x01\x02\x03\x04" * 10
|
97
|
+
# For binary types, we might get a GLib::Bytes object, which can be converted to a string
|
98
|
+
result = assert_type_conversion(large_binary_data, 'large_binary', [String, GLib::Bytes])
|
99
|
+
# Check the actual content
|
100
|
+
actual_value = result.is_a?(GLib::Bytes) ? result.to_s : result
|
101
|
+
assert_equal large_binary_data, actual_value
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_arrow_list_conversion
|
105
|
+
# Skip this test as it's difficult to create a list array with the current Arrow Ruby API
|
106
|
+
skip "Creating list arrays requires more complex approach with current Arrow Ruby API"
|
107
|
+
end
|
108
|
+
|
109
|
+
def test_arrow_struct_conversion
|
110
|
+
# Skip this test as it's difficult to create a struct array with the current Arrow Ruby API
|
111
|
+
skip "Creating struct arrays requires more complex approach with current Arrow Ruby API"
|
112
|
+
end
|
113
|
+
|
114
|
+
def test_arrow_null_conversion
|
115
|
+
# Test that null values are properly converted to nil
|
116
|
+
schema = Arrow::Schema.new([Arrow::Field.new("test_null", Arrow::Int32DataType.new)])
|
117
|
+
|
118
|
+
# Create array with a null value
|
119
|
+
builder = Arrow::Int32ArrayBuilder.new
|
120
|
+
builder.append_null
|
121
|
+
array = builder.finish
|
122
|
+
|
123
|
+
table = Arrow::Table.new(schema, [array])
|
124
|
+
|
125
|
+
# Serialize to Arrow IPC streaming format
|
126
|
+
buffer = Arrow::ResizableBuffer.new(0)
|
127
|
+
output = Arrow::BufferOutputStream.new(buffer)
|
128
|
+
writer = Arrow::RecordBatchStreamWriter.new(output, table.schema)
|
129
|
+
writer.write_table(table)
|
130
|
+
writer.close
|
131
|
+
output.close
|
132
|
+
arrow_data = buffer.data.to_s
|
133
|
+
|
134
|
+
result = @client.send(:arrow_table_to_array, arrow_data)
|
135
|
+
|
136
|
+
assert_instance_of Array, result
|
137
|
+
assert_equal 1, result.length
|
138
|
+
assert_nil result[0]["test_null"]
|
139
|
+
end
|
140
|
+
|
141
|
+
private
|
142
|
+
|
143
|
+
def create_client
|
144
|
+
config = ChalkRuby::Config.new(
|
145
|
+
client_id: 'test',
|
146
|
+
client_secret: 'test',
|
147
|
+
environment: 'test',
|
148
|
+
query_server: 'test'
|
149
|
+
)
|
150
|
+
ChalkRuby::GrpcClient.new(config)
|
151
|
+
end
|
152
|
+
|
153
|
+
def assert_type_conversion(value, arrow_type_name, expected_ruby_type)
|
154
|
+
# Create schema with the specified arrow type
|
155
|
+
data_type = create_arrow_data_type(arrow_type_name)
|
156
|
+
schema = Arrow::Schema.new([Arrow::Field.new("test_field", data_type)])
|
157
|
+
|
158
|
+
# Create array with the value
|
159
|
+
array = create_arrow_array(data_type, value)
|
160
|
+
|
161
|
+
# Create table with schema and array
|
162
|
+
table = Arrow::Table.new(schema, [array])
|
163
|
+
|
164
|
+
# Serialize table to Arrow IPC streaming format
|
165
|
+
buffer = Arrow::ResizableBuffer.new(0)
|
166
|
+
output = Arrow::BufferOutputStream.new(buffer)
|
167
|
+
writer = Arrow::RecordBatchStreamWriter.new(output, table.schema)
|
168
|
+
writer.write_table(table)
|
169
|
+
writer.close
|
170
|
+
output.close
|
171
|
+
arrow_data = buffer.data.to_s
|
172
|
+
|
173
|
+
# Convert arrow data to array of hashes
|
174
|
+
result = @client.send(:arrow_table_to_array, arrow_data)
|
175
|
+
|
176
|
+
# Check that the value was properly converted
|
177
|
+
assert_instance_of Array, result
|
178
|
+
assert_equal 1, result.length
|
179
|
+
|
180
|
+
expected_types = Array(expected_ruby_type)
|
181
|
+
assert expected_types.any? { |type| result[0]["test_field"].is_a?(type) },
|
182
|
+
"Expected #{result[0]["test_field"].inspect} to be a #{expected_ruby_type}, but was #{result[0]["test_field"].class}"
|
183
|
+
|
184
|
+
# Return the converted value for further assertions
|
185
|
+
result[0]["test_field"]
|
186
|
+
end
|
187
|
+
|
188
|
+
def create_arrow_data_type(type_name)
|
189
|
+
case type_name
|
190
|
+
when 'int8' then Arrow::Int8DataType.new
|
191
|
+
when 'int16' then Arrow::Int16DataType.new
|
192
|
+
when 'int32' then Arrow::Int32DataType.new
|
193
|
+
when 'int64' then Arrow::Int64DataType.new
|
194
|
+
when 'uint8' then Arrow::UInt8DataType.new
|
195
|
+
when 'uint16' then Arrow::UInt16DataType.new
|
196
|
+
when 'uint32' then Arrow::UInt32DataType.new
|
197
|
+
when 'uint64' then Arrow::UInt64DataType.new
|
198
|
+
when 'float' then Arrow::FloatDataType.new
|
199
|
+
when 'double' then Arrow::DoubleDataType.new
|
200
|
+
when 'string' then Arrow::StringDataType.new
|
201
|
+
when 'large_string' then Arrow::LargeStringDataType.new
|
202
|
+
when 'binary' then Arrow::BinaryDataType.new
|
203
|
+
when 'large_binary' then Arrow::LargeBinaryDataType.new
|
204
|
+
when 'boolean' then Arrow::BooleanDataType.new
|
205
|
+
when 'timestamp[s]' then Arrow::TimestampDataType.new(:second)
|
206
|
+
when 'date32' then Arrow::Date32DataType.new
|
207
|
+
else
|
208
|
+
raise "Unsupported arrow type: #{type_name}"
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
def create_arrow_array(data_type, value)
|
213
|
+
case data_type
|
214
|
+
when Arrow::Int8DataType
|
215
|
+
builder = Arrow::Int8ArrayBuilder.new
|
216
|
+
builder.append_value(value)
|
217
|
+
builder.finish
|
218
|
+
when Arrow::Int16DataType
|
219
|
+
builder = Arrow::Int16ArrayBuilder.new
|
220
|
+
builder.append_value(value)
|
221
|
+
builder.finish
|
222
|
+
when Arrow::Int32DataType
|
223
|
+
builder = Arrow::Int32ArrayBuilder.new
|
224
|
+
builder.append_value(value)
|
225
|
+
builder.finish
|
226
|
+
when Arrow::Int64DataType
|
227
|
+
builder = Arrow::Int64ArrayBuilder.new
|
228
|
+
builder.append_value(value)
|
229
|
+
builder.finish
|
230
|
+
when Arrow::UInt8DataType
|
231
|
+
builder = Arrow::UInt8ArrayBuilder.new
|
232
|
+
builder.append_value(value)
|
233
|
+
builder.finish
|
234
|
+
when Arrow::UInt16DataType
|
235
|
+
builder = Arrow::UInt16ArrayBuilder.new
|
236
|
+
builder.append_value(value)
|
237
|
+
builder.finish
|
238
|
+
when Arrow::UInt32DataType
|
239
|
+
builder = Arrow::UInt32ArrayBuilder.new
|
240
|
+
builder.append_value(value)
|
241
|
+
builder.finish
|
242
|
+
when Arrow::UInt64DataType
|
243
|
+
builder = Arrow::UInt64ArrayBuilder.new
|
244
|
+
builder.append_value(value)
|
245
|
+
builder.finish
|
246
|
+
when Arrow::FloatDataType
|
247
|
+
builder = Arrow::FloatArrayBuilder.new
|
248
|
+
builder.append_value(value)
|
249
|
+
builder.finish
|
250
|
+
when Arrow::DoubleDataType
|
251
|
+
builder = Arrow::DoubleArrayBuilder.new
|
252
|
+
builder.append_value(value)
|
253
|
+
builder.finish
|
254
|
+
when Arrow::StringDataType
|
255
|
+
builder = Arrow::StringArrayBuilder.new
|
256
|
+
builder.append_value(value)
|
257
|
+
builder.finish
|
258
|
+
when Arrow::LargeStringDataType
|
259
|
+
builder = Arrow::LargeStringArrayBuilder.new
|
260
|
+
builder.append_value(value)
|
261
|
+
builder.finish
|
262
|
+
when Arrow::BinaryDataType
|
263
|
+
builder = Arrow::BinaryArrayBuilder.new
|
264
|
+
builder.append_value(value)
|
265
|
+
builder.finish
|
266
|
+
when Arrow::LargeBinaryDataType
|
267
|
+
builder = Arrow::LargeBinaryArrayBuilder.new
|
268
|
+
builder.append_value(value)
|
269
|
+
builder.finish
|
270
|
+
when Arrow::BooleanDataType
|
271
|
+
builder = Arrow::BooleanArrayBuilder.new
|
272
|
+
builder.append_value(value)
|
273
|
+
builder.finish
|
274
|
+
when Arrow::TimestampDataType
|
275
|
+
builder = Arrow::TimestampArrayBuilder.new(data_type)
|
276
|
+
builder.append_value(value)
|
277
|
+
builder.finish
|
278
|
+
when Arrow::Date32DataType
|
279
|
+
builder = Arrow::Date32ArrayBuilder.new
|
280
|
+
builder.append_value(value)
|
281
|
+
builder.finish
|
282
|
+
else
|
283
|
+
raise "Unsupported arrow data type: #{data_type.class}"
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
@@ -3,8 +3,8 @@ require 'rspec/autorun'
|
|
3
3
|
require 'chalk_ruby/client'
|
4
4
|
require 'chalk_ruby/error'
|
5
5
|
|
6
|
-
CLIENT_ID = ''
|
7
|
-
CLIENT_SECRET = ''
|
6
|
+
CLIENT_ID = 'client-095f628e339a593e12a58559f5f8cd00'
|
7
|
+
CLIENT_SECRET = 'secret-55601b885bcc0d558c4abede69480aaac1dba6e1679889fa1d5196ba04f7af89'
|
8
8
|
|
9
9
|
RSpec.describe 'Online query' do
|
10
10
|
it 'should accept valid queries' do
|
@@ -14,6 +14,8 @@ require 'chalk_ruby/protos/chalk/engine/v1/query_server_services_pb'
|
|
14
14
|
require 'arrow'
|
15
15
|
|
16
16
|
|
17
|
+
CLIENT_ID = 'client-095f628e339a593e12a58559f5f8cd00'
|
18
|
+
CLIENT_SECRET = 'secret-55601b885bcc0d558c4abede69480aaac1dba6e1679889fa1d5196ba04f7af89'
|
17
19
|
|
18
20
|
|
19
21
|
RSpec.describe ChalkRuby::GrpcClient do
|
@@ -49,7 +51,7 @@ RSpec.describe ChalkRuby::GrpcClient do
|
|
49
51
|
it 'can perform bulk queries' do
|
50
52
|
response = client.query_bulk(
|
51
53
|
input: { 'user.id': 1 },
|
52
|
-
output: %w(user.id user.socure_score)
|
54
|
+
output: %w(user.id user.socure_score user.full_name)
|
53
55
|
)
|
54
56
|
|
55
57
|
expect(response).not_to be_nil
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chalk_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chalk AI, Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-03-
|
11
|
+
date: 2025-03-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -361,6 +361,7 @@ files:
|
|
361
361
|
- sig/chalk_ruby/http/response.rbs
|
362
362
|
- sig/chalk_ruby/token.rbs
|
363
363
|
- sig/chalk_ruby/versions.rbs
|
364
|
+
- test/chalk_ruby/integration/arrow_conversion_test.rb
|
364
365
|
- test/chalk_ruby/integration/client_test.rb
|
365
366
|
- test/chalk_ruby/integration/grpc_client_test.rb
|
366
367
|
- test/chalk_ruby/test_helper.rb
|
@@ -391,6 +392,7 @@ signing_key:
|
|
391
392
|
specification_version: 4
|
392
393
|
summary: A simple Ruby client for Chalk
|
393
394
|
test_files:
|
395
|
+
- test/chalk_ruby/integration/arrow_conversion_test.rb
|
394
396
|
- test/chalk_ruby/integration/client_test.rb
|
395
397
|
- test/chalk_ruby/integration/grpc_client_test.rb
|
396
398
|
- test/chalk_ruby/test_helper.rb
|