lancelot 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/lancelot/src/conversion.rs +24 -5
- data/ext/lancelot/src/dataset.rs +25 -19
- data/lib/lancelot/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cda5bd00de23ad9f4840b1cc7f6e96b7eb4ec124ba902d3901ef36b37594de97
|
4
|
+
data.tar.gz: 23e84c317e5bcd0f52870c673d0b96f9abf4f3ab97f20eb4e5b8a8047dd1cf1f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 32cb3e852ed77b8ec2831b08f93252b5ccb176942c217a366079e1cb2a2a97c4be74247b5f8410a2892852456278a5ec8e5b556bd9ca3379d740b292cc32c15b
|
7
|
+
data.tar.gz: 718a99dbfcd51dce872feedb87265be86417faba152d6f2512a9391f28e5fc5e115deb0154be52dc029c2771611b71c685f1f143a4d2da5157aaafc28997ffa7
|
@@ -159,9 +159,9 @@ pub fn build_record_batch(
|
|
159
159
|
.map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))
|
160
160
|
}
|
161
161
|
|
162
|
-
pub fn convert_batch_to_ruby(batch: &RecordBatch) -> Result<
|
162
|
+
pub fn convert_batch_to_ruby(batch: &RecordBatch) -> Result<RArray, Error> {
|
163
163
|
let ruby = Ruby::get().unwrap();
|
164
|
-
let
|
164
|
+
let documents = ruby.ary_new();
|
165
165
|
|
166
166
|
let num_rows = batch.num_rows();
|
167
167
|
let schema = batch.schema();
|
@@ -173,6 +173,15 @@ pub fn convert_batch_to_ruby(batch: &RecordBatch) -> Result<Vec<RHash>, Error> {
|
|
173
173
|
let column = batch.column(col_idx);
|
174
174
|
let key = Symbol::new(field.name());
|
175
175
|
|
176
|
+
// CRITICAL: Add bounds checking for all array access
|
177
|
+
if row_idx >= column.len() {
|
178
|
+
return Err(Error::new(
|
179
|
+
magnus::exception::runtime_error(),
|
180
|
+
format!("Row index {} out of bounds for column '{}' with length {}",
|
181
|
+
row_idx, field.name(), column.len())
|
182
|
+
));
|
183
|
+
}
|
184
|
+
|
176
185
|
match field.data_type() {
|
177
186
|
DataType::Utf8 => {
|
178
187
|
let array = column.as_any().downcast_ref::<StringArray>()
|
@@ -225,9 +234,19 @@ pub fn convert_batch_to_ruby(batch: &RecordBatch) -> Result<Vec<RHash>, Error> {
|
|
225
234
|
let float_array = values.as_any().downcast_ref::<Float32Array>()
|
226
235
|
.ok_or_else(|| Error::new(magnus::exception::runtime_error(), "Failed to cast vector values to Float32Array"))?;
|
227
236
|
|
237
|
+
// CRITICAL: Verify the float_array has the expected size
|
238
|
+
let expected_size = *list_size as usize;
|
239
|
+
if float_array.len() != expected_size {
|
240
|
+
return Err(Error::new(
|
241
|
+
magnus::exception::runtime_error(),
|
242
|
+
format!("Vector data corruption: expected {} elements but found {} for field '{}'",
|
243
|
+
expected_size, float_array.len(), field.name())
|
244
|
+
));
|
245
|
+
}
|
246
|
+
|
228
247
|
let ruby_array = ruby.ary_new();
|
229
|
-
for i in 0
|
230
|
-
ruby_array.push(float_array.value(i
|
248
|
+
for i in 0..expected_size {
|
249
|
+
ruby_array.push(float_array.value(i))?;
|
231
250
|
}
|
232
251
|
doc.aset(key, ruby_array)?;
|
233
252
|
}
|
@@ -238,7 +257,7 @@ pub fn convert_batch_to_ruby(batch: &RecordBatch) -> Result<Vec<RHash>, Error> {
|
|
238
257
|
}
|
239
258
|
}
|
240
259
|
|
241
|
-
documents.push(doc)
|
260
|
+
documents.push(doc)?;
|
242
261
|
}
|
243
262
|
|
244
263
|
Ok(documents)
|
data/ext/lancelot/src/dataset.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{Error, Ruby, RHash, RArray, Symbol, TryConvert, function, method, RClass, Module, Object};
|
1
|
+
use magnus::{Error, Ruby, RHash, RArray, Symbol, TryConvert, Value, function, method, RClass, Module, Object};
|
2
2
|
use std::cell::RefCell;
|
3
3
|
use std::sync::Arc;
|
4
4
|
use tokio::runtime::Runtime;
|
@@ -157,9 +157,10 @@ impl LancelotDataset {
|
|
157
157
|
let result_array = ruby.ary_new();
|
158
158
|
|
159
159
|
for batch in batches {
|
160
|
-
let
|
161
|
-
|
162
|
-
|
160
|
+
let batch_docs = convert_batch_to_ruby(&batch)?;
|
161
|
+
// Merge arrays by pushing each element
|
162
|
+
for i in 0..batch_docs.len() {
|
163
|
+
result_array.push(batch_docs.entry::<Value>(i as isize)?)?;
|
163
164
|
}
|
164
165
|
}
|
165
166
|
|
@@ -191,9 +192,10 @@ impl LancelotDataset {
|
|
191
192
|
let result_array = ruby.ary_new();
|
192
193
|
|
193
194
|
for batch in batches {
|
194
|
-
let
|
195
|
-
|
196
|
-
|
195
|
+
let batch_docs = convert_batch_to_ruby(&batch)?;
|
196
|
+
// Merge arrays by pushing each element
|
197
|
+
for i in 0..batch_docs.len() {
|
198
|
+
result_array.push(batch_docs.entry::<Value>(i as isize)?)?;
|
197
199
|
}
|
198
200
|
}
|
199
201
|
|
@@ -265,9 +267,10 @@ impl LancelotDataset {
|
|
265
267
|
let result_array = ruby.ary_new();
|
266
268
|
|
267
269
|
for batch in batches {
|
268
|
-
let
|
269
|
-
|
270
|
-
|
270
|
+
let batch_docs = convert_batch_to_ruby(&batch)?;
|
271
|
+
// Merge arrays by pushing each element
|
272
|
+
for i in 0..batch_docs.len() {
|
273
|
+
result_array.push(batch_docs.entry::<Value>(i as isize)?)?;
|
271
274
|
}
|
272
275
|
}
|
273
276
|
|
@@ -330,9 +333,10 @@ impl LancelotDataset {
|
|
330
333
|
let result_array = ruby.ary_new();
|
331
334
|
|
332
335
|
for batch in batches {
|
333
|
-
let
|
334
|
-
|
335
|
-
|
336
|
+
let batch_docs = convert_batch_to_ruby(&batch)?;
|
337
|
+
// Merge arrays by pushing each element
|
338
|
+
for i in 0..batch_docs.len() {
|
339
|
+
result_array.push(batch_docs.entry::<Value>(i as isize)?)?;
|
336
340
|
}
|
337
341
|
}
|
338
342
|
|
@@ -380,9 +384,10 @@ impl LancelotDataset {
|
|
380
384
|
let result_array = ruby.ary_new();
|
381
385
|
|
382
386
|
for batch in batches {
|
383
|
-
let
|
384
|
-
|
385
|
-
|
387
|
+
let batch_docs = convert_batch_to_ruby(&batch)?;
|
388
|
+
// Merge arrays by pushing each element
|
389
|
+
for i in 0..batch_docs.len() {
|
390
|
+
result_array.push(batch_docs.entry::<Value>(i as isize)?)?;
|
386
391
|
}
|
387
392
|
}
|
388
393
|
|
@@ -422,9 +427,10 @@ impl LancelotDataset {
|
|
422
427
|
let result_array = ruby.ary_new();
|
423
428
|
|
424
429
|
for batch in batches {
|
425
|
-
let
|
426
|
-
|
427
|
-
|
430
|
+
let batch_docs = convert_batch_to_ruby(&batch)?;
|
431
|
+
// Merge arrays by pushing each element
|
432
|
+
for i in 0..batch_docs.len() {
|
433
|
+
result_array.push(batch_docs.entry::<Value>(i as isize)?)?;
|
428
434
|
}
|
429
435
|
}
|
430
436
|
|
data/lib/lancelot/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lancelot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Petersen
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-08-
|
11
|
+
date: 2025-08-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|