chalk_ruby 0.2.7 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f55160574645e2052ff5e688d77f508e85dc77ab01f6e97905e3367d85b11156
4
- data.tar.gz: 63f3c23d00f5432082229ee40666316058714170e0f71a06e6a61d989b1d0d89
3
+ metadata.gz: bd2402b4edf2bce128c0f86f9b81ff089bed5a2cb9564fc5c1b30d4290b2d062
4
+ data.tar.gz: 38380294054c04a7743adf9b2e2080dcba1d73e9a8cb9f1c1ddca032bbb46883
5
5
  SHA512:
6
- metadata.gz: f6e201a24d058a626add01241c3e065dba7f83f9824701727b36b5991268e845a8aecb3878105dd7f7a2269f9b334e8aaff01696519aab6335213801a842376c
7
- data.tar.gz: 107156aececa9c7fdfcd98368f78492cb76c08845be3e49c21a9060275da2844bd96cbe28ab836b6d8a7ec9a2db0a1a5197dba7c8bd5ad4286a447eba1f5e9cf
6
+ metadata.gz: 99e17dd176aab3cbec5b2c65f1b1e44364978e5f2451f631c0b0c93832078cf73519c2a04982e3cb31ef69b93209550a9610c20326d3120ff361cdd6cedff754
7
+ data.tar.gz: 0db6e82923b3aaa2ced3401d5a40988b2a98b79ababf13ec50ab228234ab08ebe7c723015f86a8a1438488c5db3627bced34a4ed3bcff5a82ed1aa23a67950a6
@@ -181,31 +181,7 @@ module ChalkRuby
181
181
  output_data = nil
182
182
 
183
183
  if (!response.scalars_data.nil?) and response.scalars_data.length > 0
184
- buffer = Arrow::Buffer.new(response.scalars_data)
185
-
186
- # Create a buffer reader
187
- buffer_reader = Arrow::BufferInputStream.new(buffer)
188
-
189
- # Create an IPC reader from the buffer reader
190
- reader = Arrow::FeatherFileReader.new(buffer_reader)
191
-
192
- # Read the table
193
-
194
-
195
- output_data = []
196
-
197
- table = reader.read
198
-
199
-
200
- field_names = table.schema.fields.map(&:name)
201
- table.each_record do |r|
202
- row = {}
203
- field_names.each do |f|
204
- row[f] = r[f]
205
- end
206
-
207
- output_data << row
208
- end
184
+ output_data = arrow_table_to_array(response.scalars_data)
209
185
  end
210
186
 
211
187
  {
@@ -419,6 +395,47 @@ module ChalkRuby
419
395
 
420
396
  private
421
397
 
398
+ # Converts Arrow binary data to an array of hashes
399
+ # @param arrow_data [String] Binary Arrow data (IPC stream format)
400
+ # @return [Array<Hash>] Array of hashes with column name as keys and Ruby values
401
+ def arrow_table_to_array(arrow_data)
402
+ require 'arrow'
403
+
404
+ buffer = Arrow::Buffer.new(arrow_data)
405
+ buffer_reader = Arrow::BufferInputStream.new(buffer)
406
+
407
+ # Try IPC stream format first (which is what we expect from the query service)
408
+ begin
409
+ reader = Arrow::RecordBatchStreamReader.new(buffer_reader)
410
+ table = reader.read_all
411
+ rescue => e
412
+ # Fall back to feather format for backward compatibility
413
+ # buffer_reader.rewind
414
+ begin
415
+ reader = Arrow::FeatherFileReader.new(buffer_reader)
416
+ table = reader.read
417
+ rescue => e2
418
+ raise "Failed to parse Arrow data: #{e.message}, #{e2.message}"
419
+ end
420
+ end
421
+
422
+ output_data = []
423
+ field_names = table.schema.fields.map(&:name)
424
+
425
+ table.each_record do |r|
426
+ row = {}
427
+ field_names.each do |f|
428
+ value = r[f]
429
+ # Convert GLib::Bytes to Ruby String for binary and large string types
430
+ value = value.to_s if value.is_a?(GLib::Bytes)
431
+ row[f] = value
432
+ end
433
+ output_data << row
434
+ end
435
+
436
+ output_data
437
+ end
438
+
422
439
  def to_feather(input_hash)
423
440
  require 'arrow'
424
441
 
@@ -1,3 +1,3 @@
1
1
  module ChalkRuby
2
- VERSION = '0.2.7'.freeze
2
+ VERSION = '0.2.8'.freeze
3
3
  end
@@ -0,0 +1,286 @@
1
+ require 'minitest/autorun'
2
+ require 'chalk_ruby/grpc_client'
3
+ require 'arrow'
4
+
5
+ class ArrowConversionTest < Minitest::Test
6
+ def setup
7
+ @client = create_client
8
+ end
9
+
10
+ def test_arrow_string_conversion
11
+ assert_type_conversion('test_string', 'string', String)
12
+ end
13
+
14
+ def test_arrow_large_string_conversion
15
+ test_string = 'test_large_string'
16
+ # For large string types, we might get a GLib::Bytes object, which can be converted to a string
17
+ result = assert_type_conversion(test_string, 'large_string', [String, GLib::Bytes])
18
+ # Check the actual content
19
+ actual_value = result.is_a?(GLib::Bytes) ? result.to_s : result
20
+ assert_equal test_string, actual_value
21
+ end
22
+
23
+ def test_arrow_int8_conversion
24
+ assert_type_conversion(8, 'int8', Integer)
25
+ end
26
+
27
+ def test_arrow_int16_conversion
28
+ assert_type_conversion(16, 'int16', Integer)
29
+ end
30
+
31
+ def test_arrow_int32_conversion
32
+ assert_type_conversion(32, 'int32', Integer)
33
+ end
34
+
35
+ def test_arrow_int64_conversion
36
+ assert_type_conversion(64, 'int64', Integer)
37
+ end
38
+
39
+ def test_arrow_uint8_conversion
40
+ assert_type_conversion(8, 'uint8', Integer)
41
+ end
42
+
43
+ def test_arrow_uint16_conversion
44
+ assert_type_conversion(16, 'uint16', Integer)
45
+ end
46
+
47
+ def test_arrow_uint32_conversion
48
+ assert_type_conversion(32, 'uint32', Integer)
49
+ end
50
+
51
+ def test_arrow_uint64_conversion
52
+ assert_type_conversion(64, 'uint64', Integer)
53
+ end
54
+
55
+ def test_arrow_float_conversion
56
+ assert_type_conversion(3.14, 'float', Float)
57
+ end
58
+
59
+ def test_arrow_double_conversion
60
+ assert_type_conversion(3.14159, 'double', Float)
61
+ end
62
+
63
+ def test_arrow_boolean_conversion
64
+ assert_type_conversion(true, 'boolean', [TrueClass, FalseClass])
65
+ end
66
+
67
+ def test_arrow_timestamp_conversion
68
+ # Create timestamp as a Ruby Time object
69
+ timestamp = Time.now
70
+ # Pass timestamp seconds since epoch to Arrow
71
+ arrow_timestamp = timestamp.to_i
72
+ # Check that it converts back to a Time-like object
73
+ result = assert_type_conversion(arrow_timestamp, 'timestamp[s]', Time)
74
+ # Check that the timestamp values are approximately equal
75
+ assert_in_delta timestamp.to_i, result.to_i, 1
76
+ end
77
+
78
+ def test_arrow_date32_conversion
79
+ # Date represented as days since epoch
80
+ today = Date.today
81
+ days_since_epoch = (today - Date.new(1970, 1, 1)).to_i
82
+ result = assert_type_conversion(days_since_epoch, 'date32', Date)
83
+ assert_equal today, result
84
+ end
85
+
86
+ def test_arrow_binary_conversion
87
+ binary_data = "\x01\x02\x03\x04"
88
+ # For binary types, we might get a GLib::Bytes object, which can be converted to a string
89
+ result = assert_type_conversion(binary_data, 'binary', [String, GLib::Bytes])
90
+ # Check the actual content
91
+ actual_value = result.is_a?(GLib::Bytes) ? result.to_s : result
92
+ assert_equal binary_data, actual_value
93
+ end
94
+
95
+ def test_arrow_large_binary_conversion
96
+ large_binary_data = "\x01\x02\x03\x04" * 10
97
+ # For binary types, we might get a GLib::Bytes object, which can be converted to a string
98
+ result = assert_type_conversion(large_binary_data, 'large_binary', [String, GLib::Bytes])
99
+ # Check the actual content
100
+ actual_value = result.is_a?(GLib::Bytes) ? result.to_s : result
101
+ assert_equal large_binary_data, actual_value
102
+ end
103
+
104
+ def test_arrow_list_conversion
105
+ # Skip this test as it's difficult to create a list array with the current Arrow Ruby API
106
+ skip "Creating list arrays requires more complex approach with current Arrow Ruby API"
107
+ end
108
+
109
+ def test_arrow_struct_conversion
110
+ # Skip this test as it's difficult to create a struct array with the current Arrow Ruby API
111
+ skip "Creating struct arrays requires more complex approach with current Arrow Ruby API"
112
+ end
113
+
114
+ def test_arrow_null_conversion
115
+ # Test that null values are properly converted to nil
116
+ schema = Arrow::Schema.new([Arrow::Field.new("test_null", Arrow::Int32DataType.new)])
117
+
118
+ # Create array with a null value
119
+ builder = Arrow::Int32ArrayBuilder.new
120
+ builder.append_null
121
+ array = builder.finish
122
+
123
+ table = Arrow::Table.new(schema, [array])
124
+
125
+ # Serialize to Arrow IPC streaming format
126
+ buffer = Arrow::ResizableBuffer.new(0)
127
+ output = Arrow::BufferOutputStream.new(buffer)
128
+ writer = Arrow::RecordBatchStreamWriter.new(output, table.schema)
129
+ writer.write_table(table)
130
+ writer.close
131
+ output.close
132
+ arrow_data = buffer.data.to_s
133
+
134
+ result = @client.send(:arrow_table_to_array, arrow_data)
135
+
136
+ assert_instance_of Array, result
137
+ assert_equal 1, result.length
138
+ assert_nil result[0]["test_null"]
139
+ end
140
+
141
+ private
142
+
143
+ def create_client
144
+ config = ChalkRuby::Config.new(
145
+ client_id: 'test',
146
+ client_secret: 'test',
147
+ environment: 'test',
148
+ query_server: 'test'
149
+ )
150
+ ChalkRuby::GrpcClient.new(config)
151
+ end
152
+
153
+ def assert_type_conversion(value, arrow_type_name, expected_ruby_type)
154
+ # Create schema with the specified arrow type
155
+ data_type = create_arrow_data_type(arrow_type_name)
156
+ schema = Arrow::Schema.new([Arrow::Field.new("test_field", data_type)])
157
+
158
+ # Create array with the value
159
+ array = create_arrow_array(data_type, value)
160
+
161
+ # Create table with schema and array
162
+ table = Arrow::Table.new(schema, [array])
163
+
164
+ # Serialize table to Arrow IPC streaming format
165
+ buffer = Arrow::ResizableBuffer.new(0)
166
+ output = Arrow::BufferOutputStream.new(buffer)
167
+ writer = Arrow::RecordBatchStreamWriter.new(output, table.schema)
168
+ writer.write_table(table)
169
+ writer.close
170
+ output.close
171
+ arrow_data = buffer.data.to_s
172
+
173
+ # Convert arrow data to array of hashes
174
+ result = @client.send(:arrow_table_to_array, arrow_data)
175
+
176
+ # Check that the value was properly converted
177
+ assert_instance_of Array, result
178
+ assert_equal 1, result.length
179
+
180
+ expected_types = Array(expected_ruby_type)
181
+ assert expected_types.any? { |type| result[0]["test_field"].is_a?(type) },
182
+ "Expected #{result[0]["test_field"].inspect} to be a #{expected_ruby_type}, but was #{result[0]["test_field"].class}"
183
+
184
+ # Return the converted value for further assertions
185
+ result[0]["test_field"]
186
+ end
187
+
188
+ def create_arrow_data_type(type_name)
189
+ case type_name
190
+ when 'int8' then Arrow::Int8DataType.new
191
+ when 'int16' then Arrow::Int16DataType.new
192
+ when 'int32' then Arrow::Int32DataType.new
193
+ when 'int64' then Arrow::Int64DataType.new
194
+ when 'uint8' then Arrow::UInt8DataType.new
195
+ when 'uint16' then Arrow::UInt16DataType.new
196
+ when 'uint32' then Arrow::UInt32DataType.new
197
+ when 'uint64' then Arrow::UInt64DataType.new
198
+ when 'float' then Arrow::FloatDataType.new
199
+ when 'double' then Arrow::DoubleDataType.new
200
+ when 'string' then Arrow::StringDataType.new
201
+ when 'large_string' then Arrow::LargeStringDataType.new
202
+ when 'binary' then Arrow::BinaryDataType.new
203
+ when 'large_binary' then Arrow::LargeBinaryDataType.new
204
+ when 'boolean' then Arrow::BooleanDataType.new
205
+ when 'timestamp[s]' then Arrow::TimestampDataType.new(:second)
206
+ when 'date32' then Arrow::Date32DataType.new
207
+ else
208
+ raise "Unsupported arrow type: #{type_name}"
209
+ end
210
+ end
211
+
212
+ def create_arrow_array(data_type, value)
213
+ case data_type
214
+ when Arrow::Int8DataType
215
+ builder = Arrow::Int8ArrayBuilder.new
216
+ builder.append_value(value)
217
+ builder.finish
218
+ when Arrow::Int16DataType
219
+ builder = Arrow::Int16ArrayBuilder.new
220
+ builder.append_value(value)
221
+ builder.finish
222
+ when Arrow::Int32DataType
223
+ builder = Arrow::Int32ArrayBuilder.new
224
+ builder.append_value(value)
225
+ builder.finish
226
+ when Arrow::Int64DataType
227
+ builder = Arrow::Int64ArrayBuilder.new
228
+ builder.append_value(value)
229
+ builder.finish
230
+ when Arrow::UInt8DataType
231
+ builder = Arrow::UInt8ArrayBuilder.new
232
+ builder.append_value(value)
233
+ builder.finish
234
+ when Arrow::UInt16DataType
235
+ builder = Arrow::UInt16ArrayBuilder.new
236
+ builder.append_value(value)
237
+ builder.finish
238
+ when Arrow::UInt32DataType
239
+ builder = Arrow::UInt32ArrayBuilder.new
240
+ builder.append_value(value)
241
+ builder.finish
242
+ when Arrow::UInt64DataType
243
+ builder = Arrow::UInt64ArrayBuilder.new
244
+ builder.append_value(value)
245
+ builder.finish
246
+ when Arrow::FloatDataType
247
+ builder = Arrow::FloatArrayBuilder.new
248
+ builder.append_value(value)
249
+ builder.finish
250
+ when Arrow::DoubleDataType
251
+ builder = Arrow::DoubleArrayBuilder.new
252
+ builder.append_value(value)
253
+ builder.finish
254
+ when Arrow::StringDataType
255
+ builder = Arrow::StringArrayBuilder.new
256
+ builder.append_value(value)
257
+ builder.finish
258
+ when Arrow::LargeStringDataType
259
+ builder = Arrow::LargeStringArrayBuilder.new
260
+ builder.append_value(value)
261
+ builder.finish
262
+ when Arrow::BinaryDataType
263
+ builder = Arrow::BinaryArrayBuilder.new
264
+ builder.append_value(value)
265
+ builder.finish
266
+ when Arrow::LargeBinaryDataType
267
+ builder = Arrow::LargeBinaryArrayBuilder.new
268
+ builder.append_value(value)
269
+ builder.finish
270
+ when Arrow::BooleanDataType
271
+ builder = Arrow::BooleanArrayBuilder.new
272
+ builder.append_value(value)
273
+ builder.finish
274
+ when Arrow::TimestampDataType
275
+ builder = Arrow::TimestampArrayBuilder.new(data_type)
276
+ builder.append_value(value)
277
+ builder.finish
278
+ when Arrow::Date32DataType
279
+ builder = Arrow::Date32ArrayBuilder.new
280
+ builder.append_value(value)
281
+ builder.finish
282
+ else
283
+ raise "Unsupported arrow data type: #{data_type.class}"
284
+ end
285
+ end
286
+ end
@@ -3,8 +3,8 @@ require 'rspec/autorun'
3
3
  require 'chalk_ruby/client'
4
4
  require 'chalk_ruby/error'
5
5
 
6
- CLIENT_ID = ''
7
- CLIENT_SECRET = ''
6
+ CLIENT_ID = 'client-095f628e339a593e12a58559f5f8cd00'
7
+ CLIENT_SECRET = 'secret-55601b885bcc0d558c4abede69480aaac1dba6e1679889fa1d5196ba04f7af89'
8
8
 
9
9
  RSpec.describe 'Online query' do
10
10
  it 'should accept valid queries' do
@@ -14,6 +14,8 @@ require 'chalk_ruby/protos/chalk/engine/v1/query_server_services_pb'
14
14
  require 'arrow'
15
15
 
16
16
 
17
+ CLIENT_ID = 'client-095f628e339a593e12a58559f5f8cd00'
18
+ CLIENT_SECRET = 'secret-55601b885bcc0d558c4abede69480aaac1dba6e1679889fa1d5196ba04f7af89'
17
19
 
18
20
 
19
21
  RSpec.describe ChalkRuby::GrpcClient do
@@ -49,7 +51,7 @@ RSpec.describe ChalkRuby::GrpcClient do
49
51
  it 'can perform bulk queries' do
50
52
  response = client.query_bulk(
51
53
  input: { 'user.id': 1 },
52
- output: %w(user.id user.socure_score)
54
+ output: %w(user.id user.socure_score user.full_name)
53
55
  )
54
56
 
55
57
  expect(response).not_to be_nil
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chalk_ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.7
4
+ version: 0.2.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chalk AI, Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-03-08 00:00:00.000000000 Z
11
+ date: 2025-03-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -361,6 +361,7 @@ files:
361
361
  - sig/chalk_ruby/http/response.rbs
362
362
  - sig/chalk_ruby/token.rbs
363
363
  - sig/chalk_ruby/versions.rbs
364
+ - test/chalk_ruby/integration/arrow_conversion_test.rb
364
365
  - test/chalk_ruby/integration/client_test.rb
365
366
  - test/chalk_ruby/integration/grpc_client_test.rb
366
367
  - test/chalk_ruby/test_helper.rb
@@ -391,6 +392,7 @@ signing_key:
391
392
  specification_version: 4
392
393
  summary: A simple Ruby client for Chalk
393
394
  test_files:
395
+ - test/chalk_ruby/integration/arrow_conversion_test.rb
394
396
  - test/chalk_ruby/integration/client_test.rb
395
397
  - test/chalk_ruby/integration/grpc_client_test.rb
396
398
  - test/chalk_ruby/test_helper.rb