chalk_ruby 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8e3e6e01afae6634b4b66b557f658573bf4ffd2b657c082652e45185ec48ec05
4
- data.tar.gz: 58222edf6ee281ae1cfbaaeec09b628985cfe96ddce3299054f5318e61a1d036
3
+ metadata.gz: 5078b236fa2532b078ba26509838b574bcb72c624ac4a27eb41f7fb34b9fb617
4
+ data.tar.gz: af01a9060816db57ff9198129a8d3ac2d0c7cce683e0ea59da1f9f6ed7ead09a
5
5
  SHA512:
6
- metadata.gz: 0e995a01c66b8086fff20b2b7d1304458d2370a73205b87b2c6c2860df0a03e93affda126d20cf8d018ed3fe32f0e612e6ef4b7cdef5d95d1b5b02456994eb5b
7
- data.tar.gz: b00008371f1f5f5a2fbd3113a7e2db1cca4e58100ac5ba8385cfad08f065acb1476d645edc5f853ca5d8a67392cd4b3ce3d2aed8b85f979aebd618bedde48a68
6
+ metadata.gz: f0f5a7f1ca26cefac916b76bf8f83b77b94ecc64344985d7d5adf6fe77aaebc61b811b6c9f5cbc3dd1cc78dc7b1fe519a3d7b9e45656294c0bc021b42cafe9cf
7
+ data.tar.gz: 910e8c846f446f44bb556f34c430d4eec7e24482ff3e13f29ad6874c635f48f5539e0502c25dfb8a9d6d48227a538beedbcd598474d6bc3c059136746f9194b6
@@ -195,31 +195,7 @@ module ChalkRuby
195
195
  output_data = nil
196
196
 
197
197
  if (!response.scalars_data.nil?) and response.scalars_data.length > 0
198
- buffer = Arrow::Buffer.new(response.scalars_data)
199
-
200
- # Create a buffer reader
201
- buffer_reader = Arrow::BufferInputStream.new(buffer)
202
-
203
- # Create an IPC reader from the buffer reader
204
- reader = Arrow::FeatherFileReader.new(buffer_reader)
205
-
206
- # Read the table
207
-
208
-
209
- output_data = []
210
-
211
- table = reader.read
212
-
213
-
214
- field_names = table.schema.fields.map(&:name)
215
- table.each_record do |r|
216
- row = {}
217
- field_names.each do |f|
218
- row[f] = r[f]
219
- end
220
-
221
- output_data << row
222
- end
198
+ output_data = arrow_table_to_array(response.scalars_data)
223
199
  end
224
200
 
225
201
  {
@@ -436,6 +412,47 @@ module ChalkRuby
436
412
 
437
413
  private
438
414
 
415
+ # Converts Arrow binary data to an array of hashes
416
+ # @param arrow_data [String] Binary Arrow data (IPC stream format)
417
+ # @return [Array<Hash>] Array of hashes with column name as keys and Ruby values
418
+ def arrow_table_to_array(arrow_data)
419
+ require 'arrow'
420
+
421
+ buffer = Arrow::Buffer.new(arrow_data)
422
+ buffer_reader = Arrow::BufferInputStream.new(buffer)
423
+
424
+ # Try IPC stream format first (which is what we expect from the query service)
425
+ begin
426
+ reader = Arrow::RecordBatchStreamReader.new(buffer_reader)
427
+ table = reader.read_all
428
+ rescue => e
429
+ # Fall back to feather format for backward compatibility
430
+ # buffer_reader.rewind
431
+ begin
432
+ reader = Arrow::FeatherFileReader.new(buffer_reader)
433
+ table = reader.read
434
+ rescue => e2
435
+ raise "Failed to parse Arrow data: #{e.message}, #{e2.message}"
436
+ end
437
+ end
438
+
439
+ output_data = []
440
+ field_names = table.schema.fields.map(&:name)
441
+
442
+ table.each_record do |r|
443
+ row = {}
444
+ field_names.each do |f|
445
+ value = r[f]
446
+ # Convert GLib::Bytes to Ruby String for binary and large string types
447
+ value = value.to_s if value.is_a?(GLib::Bytes)
448
+ row[f] = value
449
+ end
450
+ output_data << row
451
+ end
452
+
453
+ output_data
454
+ end
455
+
439
456
  def to_feather(input_hash)
440
457
  require 'arrow'
441
458
 
@@ -1,3 +1,3 @@
1
1
  module ChalkRuby
2
- VERSION = '0.3.0'.freeze
2
+ VERSION = '0.3.1'.freeze
3
3
  end
@@ -0,0 +1,286 @@
1
+ require 'minitest/autorun'
2
+ require 'chalk_ruby/grpc_client'
3
+ require 'arrow'
4
+
5
+ class ArrowConversionTest < Minitest::Test
6
+ def setup
7
+ @client = create_client
8
+ end
9
+
10
+ def test_arrow_string_conversion
11
+ assert_type_conversion('test_string', 'string', String)
12
+ end
13
+
14
+ def test_arrow_large_string_conversion
15
+ test_string = 'test_large_string'
16
+ # For large string types, we might get a GLib::Bytes object, which can be converted to a string
17
+ result = assert_type_conversion(test_string, 'large_string', [String, GLib::Bytes])
18
+ # Check the actual content
19
+ actual_value = result.is_a?(GLib::Bytes) ? result.to_s : result
20
+ assert_equal test_string, actual_value
21
+ end
22
+
23
+ def test_arrow_int8_conversion
24
+ assert_type_conversion(8, 'int8', Integer)
25
+ end
26
+
27
+ def test_arrow_int16_conversion
28
+ assert_type_conversion(16, 'int16', Integer)
29
+ end
30
+
31
+ def test_arrow_int32_conversion
32
+ assert_type_conversion(32, 'int32', Integer)
33
+ end
34
+
35
+ def test_arrow_int64_conversion
36
+ assert_type_conversion(64, 'int64', Integer)
37
+ end
38
+
39
+ def test_arrow_uint8_conversion
40
+ assert_type_conversion(8, 'uint8', Integer)
41
+ end
42
+
43
+ def test_arrow_uint16_conversion
44
+ assert_type_conversion(16, 'uint16', Integer)
45
+ end
46
+
47
+ def test_arrow_uint32_conversion
48
+ assert_type_conversion(32, 'uint32', Integer)
49
+ end
50
+
51
+ def test_arrow_uint64_conversion
52
+ assert_type_conversion(64, 'uint64', Integer)
53
+ end
54
+
55
+ def test_arrow_float_conversion
56
+ assert_type_conversion(3.14, 'float', Float)
57
+ end
58
+
59
+ def test_arrow_double_conversion
60
+ assert_type_conversion(3.14159, 'double', Float)
61
+ end
62
+
63
+ def test_arrow_boolean_conversion
64
+ assert_type_conversion(true, 'boolean', [TrueClass, FalseClass])
65
+ end
66
+
67
+ def test_arrow_timestamp_conversion
68
+ # Create timestamp as a Ruby Time object
69
+ timestamp = Time.now
70
+ # Pass timestamp seconds since epoch to Arrow
71
+ arrow_timestamp = timestamp.to_i
72
+ # Check that it converts back to a Time-like object
73
+ result = assert_type_conversion(arrow_timestamp, 'timestamp[s]', Time)
74
+ # Check that the timestamp values are approximately equal
75
+ assert_in_delta timestamp.to_i, result.to_i, 1
76
+ end
77
+
78
+ def test_arrow_date32_conversion
79
+ # Date represented as days since epoch
80
+ today = Date.today
81
+ days_since_epoch = (today - Date.new(1970, 1, 1)).to_i
82
+ result = assert_type_conversion(days_since_epoch, 'date32', Date)
83
+ assert_equal today, result
84
+ end
85
+
86
+ def test_arrow_binary_conversion
87
+ binary_data = "\x01\x02\x03\x04"
88
+ # For binary types, we might get a GLib::Bytes object, which can be converted to a string
89
+ result = assert_type_conversion(binary_data, 'binary', [String, GLib::Bytes])
90
+ # Check the actual content
91
+ actual_value = result.is_a?(GLib::Bytes) ? result.to_s : result
92
+ assert_equal binary_data, actual_value
93
+ end
94
+
95
+ def test_arrow_large_binary_conversion
96
+ large_binary_data = "\x01\x02\x03\x04" * 10
97
+ # For binary types, we might get a GLib::Bytes object, which can be converted to a string
98
+ result = assert_type_conversion(large_binary_data, 'large_binary', [String, GLib::Bytes])
99
+ # Check the actual content
100
+ actual_value = result.is_a?(GLib::Bytes) ? result.to_s : result
101
+ assert_equal large_binary_data, actual_value
102
+ end
103
+
104
+ def test_arrow_list_conversion
105
+ # Skip this test as it's difficult to create a list array with the current Arrow Ruby API
106
+ skip "Creating list arrays requires more complex approach with current Arrow Ruby API"
107
+ end
108
+
109
+ def test_arrow_struct_conversion
110
+ # Skip this test as it's difficult to create a struct array with the current Arrow Ruby API
111
+ skip "Creating struct arrays requires more complex approach with current Arrow Ruby API"
112
+ end
113
+
114
+ def test_arrow_null_conversion
115
+ # Test that null values are properly converted to nil
116
+ schema = Arrow::Schema.new([Arrow::Field.new("test_null", Arrow::Int32DataType.new)])
117
+
118
+ # Create array with a null value
119
+ builder = Arrow::Int32ArrayBuilder.new
120
+ builder.append_null
121
+ array = builder.finish
122
+
123
+ table = Arrow::Table.new(schema, [array])
124
+
125
+ # Serialize to Arrow IPC streaming format
126
+ buffer = Arrow::ResizableBuffer.new(0)
127
+ output = Arrow::BufferOutputStream.new(buffer)
128
+ writer = Arrow::RecordBatchStreamWriter.new(output, table.schema)
129
+ writer.write_table(table)
130
+ writer.close
131
+ output.close
132
+ arrow_data = buffer.data.to_s
133
+
134
+ result = @client.send(:arrow_table_to_array, arrow_data)
135
+
136
+ assert_instance_of Array, result
137
+ assert_equal 1, result.length
138
+ assert_nil result[0]["test_null"]
139
+ end
140
+
141
+ private
142
+
143
+ def create_client
144
+ config = ChalkRuby::Config.new(
145
+ client_id: 'test',
146
+ client_secret: 'test',
147
+ environment: 'test',
148
+ query_server: 'test'
149
+ )
150
+ ChalkRuby::GrpcClient.new(config)
151
+ end
152
+
153
+ def assert_type_conversion(value, arrow_type_name, expected_ruby_type)
154
+ # Create schema with the specified arrow type
155
+ data_type = create_arrow_data_type(arrow_type_name)
156
+ schema = Arrow::Schema.new([Arrow::Field.new("test_field", data_type)])
157
+
158
+ # Create array with the value
159
+ array = create_arrow_array(data_type, value)
160
+
161
+ # Create table with schema and array
162
+ table = Arrow::Table.new(schema, [array])
163
+
164
+ # Serialize table to Arrow IPC streaming format
165
+ buffer = Arrow::ResizableBuffer.new(0)
166
+ output = Arrow::BufferOutputStream.new(buffer)
167
+ writer = Arrow::RecordBatchStreamWriter.new(output, table.schema)
168
+ writer.write_table(table)
169
+ writer.close
170
+ output.close
171
+ arrow_data = buffer.data.to_s
172
+
173
+ # Convert arrow data to array of hashes
174
+ result = @client.send(:arrow_table_to_array, arrow_data)
175
+
176
+ # Check that the value was properly converted
177
+ assert_instance_of Array, result
178
+ assert_equal 1, result.length
179
+
180
+ expected_types = Array(expected_ruby_type)
181
+ assert expected_types.any? { |type| result[0]["test_field"].is_a?(type) },
182
+ "Expected #{result[0]["test_field"].inspect} to be a #{expected_ruby_type}, but was #{result[0]["test_field"].class}"
183
+
184
+ # Return the converted value for further assertions
185
+ result[0]["test_field"]
186
+ end
187
+
188
+ def create_arrow_data_type(type_name)
189
+ case type_name
190
+ when 'int8' then Arrow::Int8DataType.new
191
+ when 'int16' then Arrow::Int16DataType.new
192
+ when 'int32' then Arrow::Int32DataType.new
193
+ when 'int64' then Arrow::Int64DataType.new
194
+ when 'uint8' then Arrow::UInt8DataType.new
195
+ when 'uint16' then Arrow::UInt16DataType.new
196
+ when 'uint32' then Arrow::UInt32DataType.new
197
+ when 'uint64' then Arrow::UInt64DataType.new
198
+ when 'float' then Arrow::FloatDataType.new
199
+ when 'double' then Arrow::DoubleDataType.new
200
+ when 'string' then Arrow::StringDataType.new
201
+ when 'large_string' then Arrow::LargeStringDataType.new
202
+ when 'binary' then Arrow::BinaryDataType.new
203
+ when 'large_binary' then Arrow::LargeBinaryDataType.new
204
+ when 'boolean' then Arrow::BooleanDataType.new
205
+ when 'timestamp[s]' then Arrow::TimestampDataType.new(:second)
206
+ when 'date32' then Arrow::Date32DataType.new
207
+ else
208
+ raise "Unsupported arrow type: #{type_name}"
209
+ end
210
+ end
211
+
212
+ def create_arrow_array(data_type, value)
213
+ case data_type
214
+ when Arrow::Int8DataType
215
+ builder = Arrow::Int8ArrayBuilder.new
216
+ builder.append_value(value)
217
+ builder.finish
218
+ when Arrow::Int16DataType
219
+ builder = Arrow::Int16ArrayBuilder.new
220
+ builder.append_value(value)
221
+ builder.finish
222
+ when Arrow::Int32DataType
223
+ builder = Arrow::Int32ArrayBuilder.new
224
+ builder.append_value(value)
225
+ builder.finish
226
+ when Arrow::Int64DataType
227
+ builder = Arrow::Int64ArrayBuilder.new
228
+ builder.append_value(value)
229
+ builder.finish
230
+ when Arrow::UInt8DataType
231
+ builder = Arrow::UInt8ArrayBuilder.new
232
+ builder.append_value(value)
233
+ builder.finish
234
+ when Arrow::UInt16DataType
235
+ builder = Arrow::UInt16ArrayBuilder.new
236
+ builder.append_value(value)
237
+ builder.finish
238
+ when Arrow::UInt32DataType
239
+ builder = Arrow::UInt32ArrayBuilder.new
240
+ builder.append_value(value)
241
+ builder.finish
242
+ when Arrow::UInt64DataType
243
+ builder = Arrow::UInt64ArrayBuilder.new
244
+ builder.append_value(value)
245
+ builder.finish
246
+ when Arrow::FloatDataType
247
+ builder = Arrow::FloatArrayBuilder.new
248
+ builder.append_value(value)
249
+ builder.finish
250
+ when Arrow::DoubleDataType
251
+ builder = Arrow::DoubleArrayBuilder.new
252
+ builder.append_value(value)
253
+ builder.finish
254
+ when Arrow::StringDataType
255
+ builder = Arrow::StringArrayBuilder.new
256
+ builder.append_value(value)
257
+ builder.finish
258
+ when Arrow::LargeStringDataType
259
+ builder = Arrow::LargeStringArrayBuilder.new
260
+ builder.append_value(value)
261
+ builder.finish
262
+ when Arrow::BinaryDataType
263
+ builder = Arrow::BinaryArrayBuilder.new
264
+ builder.append_value(value)
265
+ builder.finish
266
+ when Arrow::LargeBinaryDataType
267
+ builder = Arrow::LargeBinaryArrayBuilder.new
268
+ builder.append_value(value)
269
+ builder.finish
270
+ when Arrow::BooleanDataType
271
+ builder = Arrow::BooleanArrayBuilder.new
272
+ builder.append_value(value)
273
+ builder.finish
274
+ when Arrow::TimestampDataType
275
+ builder = Arrow::TimestampArrayBuilder.new(data_type)
276
+ builder.append_value(value)
277
+ builder.finish
278
+ when Arrow::Date32DataType
279
+ builder = Arrow::Date32ArrayBuilder.new
280
+ builder.append_value(value)
281
+ builder.finish
282
+ else
283
+ raise "Unsupported arrow data type: #{data_type.class}"
284
+ end
285
+ end
286
+ end
@@ -1,18 +1,51 @@
1
- require 'rspec'
2
- require 'chalk_ruby'
1
+ $LOAD_PATH.unshift File.expand_path('../../../../lib', __FILE__)
2
+ # require 'rspec'
3
+ # require 'chalk_ruby'
4
+ # require 'chalk_ruby'
5
+
6
+ require 'date'
7
+ require 'rspec/autorun'
8
+ require 'chalk_ruby/grpc_client'
9
+ require 'chalk_ruby/grpc/auth_interceptor'
10
+ require 'chalk_ruby/error'
11
+ require 'chalk_ruby/protos/chalk/server/v1/auth_pb'
12
+ require 'chalk_ruby/protos/chalk/server/v1/auth_services_pb'
13
+ require 'chalk_ruby/protos/chalk/engine/v1/query_server_services_pb'
14
+ require 'arrow'
15
+
16
+
17
+
3
18
 
4
19
  RSpec.describe ChalkRuby::GrpcClient do
5
- describe '#query_bulk' do
20
+ describe '#query' do
6
21
  let(:client) do
7
- ChalkRuby::GrpcClient.create(
8
- ENV.fetch('CHALK_CLIENT_ID'),
9
- ENV.fetch('CHALK_CLIENT_SECRET'),
10
- ENV.fetch('CHALK_ENVIRONMENT', 'tmnmc9beyujew'),
11
- ENV.fetch('CHALK_QUERY_SERVER', 'standard-gke.chalk-develop.gcp.chalk.ai'),
12
- ENV.fetch('CHALK_API_SERVER', 'api.staging.chalk.ai:443')
22
+ ChalkRuby::GrpcClient.new(
23
+ ChalkRuby::Config.new(
24
+ query_server: "standard-gke.chalk-develop.gcp.chalk.ai",
25
+ api_server: "api.staging.chalk.ai:443",
26
+ client_id: CLIENT_ID,
27
+ client_secret: CLIENT_SECRET,
28
+ environment: "tmnmc9beyujew",
29
+ # api_timeout: 0.6, # seconds
30
+ # connect_timeout: 0.3, # seconds
31
+ # query_service_root_ca_path: "/Users/andrew/found_ca.pem" # path to the root ca for chalkai.internal.found.app,
32
+ )
13
33
  )
14
34
  end
15
35
 
36
+ # it 'can perform queries' do
37
+ # response = client.query(
38
+ # input: { 'business.id': 1 },
39
+ # output: %w(business.id)
40
+ # )
41
+ #
42
+ # expect(response).not_to be_nil
43
+ #
44
+ # puts response
45
+ # # The response should be a OnlineQueryBulkResponse
46
+ # # expect(response).to be_a(Chalk::Common::V1::OnlineQueryBulkResponse)
47
+ # end
48
+
16
49
  it 'can perform bulk queries' do
17
50
  response = client.query_bulk(
18
51
  input: { 'user.id': 1 },
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chalk_ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chalk AI, Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-03-26 00:00:00.000000000 Z
11
+ date: 2025-05-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -362,6 +362,7 @@ files:
362
362
  - sig/chalk_ruby/http/response.rbs
363
363
  - sig/chalk_ruby/token.rbs
364
364
  - sig/chalk_ruby/versions.rbs
365
+ - test/chalk_ruby/integration/arrow_conversion_test.rb
365
366
  - test/chalk_ruby/integration/client_test.rb
366
367
  - test/chalk_ruby/integration/grpc_client_test.rb
367
368
  - test/chalk_ruby/test_helper.rb
@@ -392,6 +393,7 @@ signing_key:
392
393
  specification_version: 4
393
394
  summary: A simple Ruby client for Chalk
394
395
  test_files:
396
+ - test/chalk_ruby/integration/arrow_conversion_test.rb
395
397
  - test/chalk_ruby/integration/client_test.rb
396
398
  - test/chalk_ruby/integration/grpc_client_test.rb
397
399
  - test/chalk_ruby/test_helper.rb