lancelot 0.3.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,15 +7,15 @@ edition = "2021"
7
7
  crate-type = ["cdylib"]
8
8
 
9
9
  [dependencies]
10
- magnus = { version = "0.7", features = ["rb-sys"] }
11
- lance = { version = "0.31", default-features = false }
12
- lance-index = "0.31"
13
- lance-linalg = "0.31"
10
+ magnus = { version = "0.8", features = ["embed"] }
11
+ lance = { version = "3.0", default-features = false }
12
+ lance-index = "3.0"
13
+ lance-linalg = "3.0"
14
14
  tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
15
- arrow = "55"
16
- arrow-array = "55"
17
- arrow-schema = "55"
18
- arrow-data = "55"
15
+ arrow = "57"
16
+ arrow-array = "57"
17
+ arrow-schema = "57"
18
+ arrow-data = "57"
19
19
  futures = "0.3"
20
20
  thiserror = "2"
21
21
  serde = { version = "1", features = ["derive"] }
@@ -1,10 +1,11 @@
1
- use magnus::{Error, Ruby, RHash, RArray, Symbol, Value, TryConvert, value::ReprValue};
1
+ use magnus::{Error, Ruby, RHash, RArray, Value, TryConvert, value::ReprValue};
2
2
  use arrow_schema::{DataType, Schema as ArrowSchema};
3
3
  use arrow_array::{RecordBatch, StringArray, Float32Array, ArrayRef, Array, FixedSizeListArray};
4
4
  use std::collections::HashMap;
5
5
  use std::sync::Arc;
6
6
 
7
7
  pub fn build_record_batch(
8
+ ruby: &Ruby,
8
9
  data: RArray,
9
10
  schema: &ArrowSchema,
10
11
  ) -> Result<RecordBatch, Error> {
@@ -13,7 +14,7 @@ pub fn build_record_batch(
13
14
  let mut int_columns: HashMap<String, Vec<Option<i64>>> = HashMap::new();
14
15
  let mut bool_columns: HashMap<String, Vec<Option<bool>>> = HashMap::new();
15
16
  let mut vector_columns: HashMap<String, Vec<Option<Vec<f32>>>> = HashMap::new();
16
-
17
+
17
18
  for field in schema.fields() {
18
19
  match field.data_type() {
19
20
  DataType::Utf8 => {
@@ -35,18 +36,20 @@ pub fn build_record_batch(
35
36
  }
36
37
  }
37
38
 
38
- for item in data.into_iter() {
39
- let item = RHash::try_convert(item)?;
39
+ // Index-based iteration over data RArray
40
+ for idx in 0..data.len() {
41
+ let item_value: Value = data.entry(idx as isize)?;
42
+ let item = RHash::try_convert(item_value)?;
40
43
  for field in schema.fields() {
41
- let key = Symbol::new(field.name());
44
+ let key = ruby.to_symbol(field.name());
42
45
  // Make fields optional - use get instead of fetch
43
46
  let value: Value = item.get(key)
44
47
  .or_else(|| {
45
- // Try with string key
48
+ // Try with string key
46
49
  item.get(field.name().as_str())
47
50
  })
48
- .unwrap_or_else(|| Ruby::get().unwrap().qnil().as_value());
49
-
51
+ .unwrap_or_else(|| ruby.qnil().as_value());
52
+
50
53
  match field.data_type() {
51
54
  DataType::Utf8 => {
52
55
  if value.is_nil() {
@@ -85,9 +88,12 @@ pub fn build_record_batch(
85
88
  vector_columns.get_mut(field.name()).unwrap().push(None);
86
89
  } else {
87
90
  let arr = RArray::try_convert(value)?;
88
- let vec: Vec<f32> = arr.into_iter()
89
- .map(|v| f64::try_convert(v).map(|f| f as f32))
90
- .collect::<Result<Vec<_>, _>>()?;
91
+ let len = arr.len();
92
+ let mut vec: Vec<f32> = Vec::with_capacity(len);
93
+ for j in 0..len {
94
+ let v: f64 = arr.entry(j as isize)?;
95
+ vec.push(v as f32);
96
+ }
91
97
  vector_columns.get_mut(field.name()).unwrap().push(Some(vec));
92
98
  }
93
99
  }
@@ -97,7 +103,7 @@ pub fn build_record_batch(
97
103
  }
98
104
 
99
105
  let mut arrays: Vec<ArrayRef> = Vec::new();
100
-
106
+
101
107
  for field in schema.fields() {
102
108
  let array: ArrayRef = match field.data_type() {
103
109
  DataType::Utf8 => {
@@ -125,7 +131,7 @@ pub fn build_record_batch(
125
131
  Some(vec) => {
126
132
  if vec.len() != *list_size as usize {
127
133
  return Err(Error::new(
128
- magnus::exception::arg_error(),
134
+ ruby.exception_arg_error(),
129
135
  format!("Vector dimension mismatch. Expected {}, got {}", list_size, vec.len())
130
136
  ));
131
137
  }
@@ -137,7 +143,7 @@ pub fn build_record_batch(
137
143
  }
138
144
  }
139
145
  }
140
-
146
+
141
147
  let flat_array = Float32Array::from(flat_values);
142
148
  Arc::new(FixedSizeListArray::new(
143
149
  inner_field.clone(),
@@ -147,46 +153,45 @@ pub fn build_record_batch(
147
153
  ))
148
154
  }
149
155
  _ => return Err(Error::new(
150
- magnus::exception::runtime_error(),
156
+ ruby.exception_runtime_error(),
151
157
  format!("Unsupported data type: {:?}", field.data_type())
152
158
  ))
153
159
  };
154
-
160
+
155
161
  arrays.push(array);
156
162
  }
157
163
 
158
164
  RecordBatch::try_new(Arc::new(schema.clone()), arrays)
159
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))
165
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))
160
166
  }
161
167
 
162
- pub fn convert_batch_to_ruby(batch: &RecordBatch) -> Result<RArray, Error> {
163
- let ruby = Ruby::get().unwrap();
168
+ pub fn convert_batch_to_ruby(ruby: &Ruby, batch: &RecordBatch) -> Result<RArray, Error> {
164
169
  let documents = ruby.ary_new();
165
-
170
+
166
171
  let num_rows = batch.num_rows();
167
172
  let schema = batch.schema();
168
-
173
+
169
174
  for row_idx in 0..num_rows {
170
175
  let doc = ruby.hash_new();
171
-
176
+
172
177
  for (col_idx, field) in schema.fields().iter().enumerate() {
173
178
  let column = batch.column(col_idx);
174
- let key = Symbol::new(field.name());
175
-
179
+ let key = ruby.to_symbol(field.name());
180
+
176
181
  // CRITICAL: Add bounds checking for all array access
177
182
  if row_idx >= column.len() {
178
183
  return Err(Error::new(
179
- magnus::exception::runtime_error(),
180
- format!("Row index {} out of bounds for column '{}' with length {}",
184
+ ruby.exception_runtime_error(),
185
+ format!("Row index {} out of bounds for column '{}' with length {}",
181
186
  row_idx, field.name(), column.len())
182
187
  ));
183
188
  }
184
-
189
+
185
190
  match field.data_type() {
186
191
  DataType::Utf8 => {
187
192
  let array = column.as_any().downcast_ref::<StringArray>()
188
- .ok_or_else(|| Error::new(magnus::exception::runtime_error(), "Failed to cast to StringArray"))?;
189
-
193
+ .ok_or_else(|| Error::new(ruby.exception_runtime_error(), "Failed to cast to StringArray"))?;
194
+
190
195
  if array.is_null(row_idx) {
191
196
  doc.aset(key, ruby.qnil())?;
192
197
  } else {
@@ -195,8 +200,8 @@ pub fn convert_batch_to_ruby(batch: &RecordBatch) -> Result<RArray, Error> {
195
200
  }
196
201
  DataType::Float32 => {
197
202
  let array = column.as_any().downcast_ref::<Float32Array>()
198
- .ok_or_else(|| Error::new(magnus::exception::runtime_error(), "Failed to cast to Float32Array"))?;
199
-
203
+ .ok_or_else(|| Error::new(ruby.exception_runtime_error(), "Failed to cast to Float32Array"))?;
204
+
200
205
  if array.is_null(row_idx) {
201
206
  doc.aset(key, ruby.qnil())?;
202
207
  } else {
@@ -205,8 +210,8 @@ pub fn convert_batch_to_ruby(batch: &RecordBatch) -> Result<RArray, Error> {
205
210
  }
206
211
  DataType::Int64 => {
207
212
  let array = column.as_any().downcast_ref::<arrow_array::Int64Array>()
208
- .ok_or_else(|| Error::new(magnus::exception::runtime_error(), "Failed to cast to Int64Array"))?;
209
-
213
+ .ok_or_else(|| Error::new(ruby.exception_runtime_error(), "Failed to cast to Int64Array"))?;
214
+
210
215
  if array.is_null(row_idx) {
211
216
  doc.aset(key, ruby.qnil())?;
212
217
  } else {
@@ -215,8 +220,8 @@ pub fn convert_batch_to_ruby(batch: &RecordBatch) -> Result<RArray, Error> {
215
220
  }
216
221
  DataType::Boolean => {
217
222
  let array = column.as_any().downcast_ref::<arrow_array::BooleanArray>()
218
- .ok_or_else(|| Error::new(magnus::exception::runtime_error(), "Failed to cast to BooleanArray"))?;
219
-
223
+ .ok_or_else(|| Error::new(ruby.exception_runtime_error(), "Failed to cast to BooleanArray"))?;
224
+
220
225
  if array.is_null(row_idx) {
221
226
  doc.aset(key, ruby.qnil())?;
222
227
  } else {
@@ -225,25 +230,25 @@ pub fn convert_batch_to_ruby(batch: &RecordBatch) -> Result<RArray, Error> {
225
230
  }
226
231
  DataType::FixedSizeList(_, list_size) => {
227
232
  let array = column.as_any().downcast_ref::<FixedSizeListArray>()
228
- .ok_or_else(|| Error::new(magnus::exception::runtime_error(), "Failed to cast to FixedSizeListArray"))?;
229
-
233
+ .ok_or_else(|| Error::new(ruby.exception_runtime_error(), "Failed to cast to FixedSizeListArray"))?;
234
+
230
235
  if array.is_null(row_idx) {
231
236
  doc.aset(key, ruby.qnil())?;
232
237
  } else {
233
238
  let values = array.value(row_idx);
234
239
  let float_array = values.as_any().downcast_ref::<Float32Array>()
235
- .ok_or_else(|| Error::new(magnus::exception::runtime_error(), "Failed to cast vector values to Float32Array"))?;
236
-
240
+ .ok_or_else(|| Error::new(ruby.exception_runtime_error(), "Failed to cast vector values to Float32Array"))?;
241
+
237
242
  // CRITICAL: Verify the float_array has the expected size
238
243
  let expected_size = *list_size as usize;
239
244
  if float_array.len() != expected_size {
240
245
  return Err(Error::new(
241
- magnus::exception::runtime_error(),
246
+ ruby.exception_runtime_error(),
242
247
  format!("Vector data corruption: expected {} elements but found {} for field '{}'",
243
248
  expected_size, float_array.len(), field.name())
244
249
  ));
245
250
  }
246
-
251
+
247
252
  let ruby_array = ruby.ary_new();
248
253
  for i in 0..expected_size {
249
254
  ruby_array.push(float_array.value(i))?;
@@ -256,9 +261,9 @@ pub fn convert_batch_to_ruby(batch: &RecordBatch) -> Result<RArray, Error> {
256
261
  }
257
262
  }
258
263
  }
259
-
264
+
260
265
  documents.push(doc)?;
261
266
  }
262
-
267
+
263
268
  Ok(documents)
264
- }
269
+ }