lancelot 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 59a61f845bead9178dc6b7ca831b0cd1f3577969c189d723b085a706527bb9a9
4
- data.tar.gz: f6941d534cc770393803c152f1718dad15639086b4ae78e84da93f8f8ab6f43f
3
+ metadata.gz: cda5bd00de23ad9f4840b1cc7f6e96b7eb4ec124ba902d3901ef36b37594de97
4
+ data.tar.gz: 23e84c317e5bcd0f52870c673d0b96f9abf4f3ab97f20eb4e5b8a8047dd1cf1f
5
5
  SHA512:
6
- metadata.gz: 04f63038fe699b2441c22618daac20ef710df82a5d95c6c8258317a2600b23ea2844ab5c2f1bc01245fb4ff90d531f3ec09f7ddcb28876e758b983a0bcb1fdc2
7
- data.tar.gz: 20b8c06914dc993b6868b7264cfcfba0eeb0a0a0c70b0473ac6e3b9e163c8ed3855c2eeae1ad02ab3f19125f80c844a0aebd2c26a91aafc005af0dfebd8d37d6
6
+ metadata.gz: 32cb3e852ed77b8ec2831b08f93252b5ccb176942c217a366079e1cb2a2a97c4be74247b5f8410a2892852456278a5ec8e5b556bd9ca3379d740b292cc32c15b
7
+ data.tar.gz: 718a99dbfcd51dce872feedb87265be86417faba152d6f2512a9391f28e5fc5e115deb0154be52dc029c2771611b71c685f1f143a4d2da5157aaafc28997ffa7
@@ -159,9 +159,9 @@ pub fn build_record_batch(
159
159
  .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))
160
160
  }
161
161
 
162
- pub fn convert_batch_to_ruby(batch: &RecordBatch) -> Result<Vec<RHash>, Error> {
162
+ pub fn convert_batch_to_ruby(batch: &RecordBatch) -> Result<RArray, Error> {
163
163
  let ruby = Ruby::get().unwrap();
164
- let mut documents = Vec::new();
164
+ let documents = ruby.ary_new();
165
165
 
166
166
  let num_rows = batch.num_rows();
167
167
  let schema = batch.schema();
@@ -173,6 +173,15 @@ pub fn convert_batch_to_ruby(batch: &RecordBatch) -> Result<Vec<RHash>, Error> {
173
173
  let column = batch.column(col_idx);
174
174
  let key = Symbol::new(field.name());
175
175
 
176
+ // CRITICAL: Add bounds checking for all array access
177
+ if row_idx >= column.len() {
178
+ return Err(Error::new(
179
+ magnus::exception::runtime_error(),
180
+ format!("Row index {} out of bounds for column '{}' with length {}",
181
+ row_idx, field.name(), column.len())
182
+ ));
183
+ }
184
+
176
185
  match field.data_type() {
177
186
  DataType::Utf8 => {
178
187
  let array = column.as_any().downcast_ref::<StringArray>()
@@ -225,9 +234,19 @@ pub fn convert_batch_to_ruby(batch: &RecordBatch) -> Result<Vec<RHash>, Error> {
225
234
  let float_array = values.as_any().downcast_ref::<Float32Array>()
226
235
  .ok_or_else(|| Error::new(magnus::exception::runtime_error(), "Failed to cast vector values to Float32Array"))?;
227
236
 
237
+ // CRITICAL: Verify the float_array has the expected size
238
+ let expected_size = *list_size as usize;
239
+ if float_array.len() != expected_size {
240
+ return Err(Error::new(
241
+ magnus::exception::runtime_error(),
242
+ format!("Vector data corruption: expected {} elements but found {} for field '{}'",
243
+ expected_size, float_array.len(), field.name())
244
+ ));
245
+ }
246
+
228
247
  let ruby_array = ruby.ary_new();
229
- for i in 0..*list_size {
230
- ruby_array.push(float_array.value(i as usize))?;
248
+ for i in 0..expected_size {
249
+ ruby_array.push(float_array.value(i))?;
231
250
  }
232
251
  doc.aset(key, ruby_array)?;
233
252
  }
@@ -238,7 +257,7 @@ pub fn convert_batch_to_ruby(batch: &RecordBatch) -> Result<Vec<RHash>, Error> {
238
257
  }
239
258
  }
240
259
 
241
- documents.push(doc);
260
+ documents.push(doc)?;
242
261
  }
243
262
 
244
263
  Ok(documents)
@@ -1,4 +1,4 @@
1
- use magnus::{Error, Ruby, RHash, RArray, Symbol, TryConvert, function, method, RClass, Module, Object};
1
+ use magnus::{Error, Ruby, RHash, RArray, Symbol, TryConvert, Value, function, method, RClass, Module, Object};
2
2
  use std::cell::RefCell;
3
3
  use std::sync::Arc;
4
4
  use tokio::runtime::Runtime;
@@ -157,9 +157,10 @@ impl LancelotDataset {
157
157
  let result_array = ruby.ary_new();
158
158
 
159
159
  for batch in batches {
160
- let documents = convert_batch_to_ruby(&batch)?;
161
- for doc in documents {
162
- result_array.push(doc)?;
160
+ let batch_docs = convert_batch_to_ruby(&batch)?;
161
+ // Merge arrays by pushing each element
162
+ for i in 0..batch_docs.len() {
163
+ result_array.push(batch_docs.entry::<Value>(i as isize)?)?;
163
164
  }
164
165
  }
165
166
 
@@ -191,9 +192,10 @@ impl LancelotDataset {
191
192
  let result_array = ruby.ary_new();
192
193
 
193
194
  for batch in batches {
194
- let documents = convert_batch_to_ruby(&batch)?;
195
- for doc in documents {
196
- result_array.push(doc)?;
195
+ let batch_docs = convert_batch_to_ruby(&batch)?;
196
+ // Merge arrays by pushing each element
197
+ for i in 0..batch_docs.len() {
198
+ result_array.push(batch_docs.entry::<Value>(i as isize)?)?;
197
199
  }
198
200
  }
199
201
 
@@ -265,9 +267,10 @@ impl LancelotDataset {
265
267
  let result_array = ruby.ary_new();
266
268
 
267
269
  for batch in batches {
268
- let documents = convert_batch_to_ruby(&batch)?;
269
- for doc in documents {
270
- result_array.push(doc)?;
270
+ let batch_docs = convert_batch_to_ruby(&batch)?;
271
+ // Merge arrays by pushing each element
272
+ for i in 0..batch_docs.len() {
273
+ result_array.push(batch_docs.entry::<Value>(i as isize)?)?;
271
274
  }
272
275
  }
273
276
 
@@ -330,9 +333,10 @@ impl LancelotDataset {
330
333
  let result_array = ruby.ary_new();
331
334
 
332
335
  for batch in batches {
333
- let documents = convert_batch_to_ruby(&batch)?;
334
- for doc in documents {
335
- result_array.push(doc)?;
336
+ let batch_docs = convert_batch_to_ruby(&batch)?;
337
+ // Merge arrays by pushing each element
338
+ for i in 0..batch_docs.len() {
339
+ result_array.push(batch_docs.entry::<Value>(i as isize)?)?;
336
340
  }
337
341
  }
338
342
 
@@ -380,9 +384,10 @@ impl LancelotDataset {
380
384
  let result_array = ruby.ary_new();
381
385
 
382
386
  for batch in batches {
383
- let documents = convert_batch_to_ruby(&batch)?;
384
- for doc in documents {
385
- result_array.push(doc)?;
387
+ let batch_docs = convert_batch_to_ruby(&batch)?;
388
+ // Merge arrays by pushing each element
389
+ for i in 0..batch_docs.len() {
390
+ result_array.push(batch_docs.entry::<Value>(i as isize)?)?;
386
391
  }
387
392
  }
388
393
 
@@ -422,9 +427,10 @@ impl LancelotDataset {
422
427
  let result_array = ruby.ary_new();
423
428
 
424
429
  for batch in batches {
425
- let documents = convert_batch_to_ruby(&batch)?;
426
- for doc in documents {
427
- result_array.push(doc)?;
430
+ let batch_docs = convert_batch_to_ruby(&batch)?;
431
+ // Merge arrays by pushing each element
432
+ for i in 0..batch_docs.len() {
433
+ result_array.push(batch_docs.entry::<Value>(i as isize)?)?;
428
434
  }
429
435
  }
430
436
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Lancelot
4
- VERSION = "0.3.1"
4
+ VERSION = "0.3.2"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lancelot
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Petersen
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-08-10 00:00:00.000000000 Z
11
+ date: 2025-08-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys