lancelot 0.3.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- use magnus::{Error, Ruby, RHash, RArray, Symbol, TryConvert, Value, function, method, RClass, Module, Object};
1
+ use magnus::{Error, Ruby, RHash, RArray, Value, function, method, RClass, Module, Object};
2
2
  use std::cell::RefCell;
3
3
  use std::sync::Arc;
4
4
  use tokio::runtime::Runtime;
@@ -41,9 +41,10 @@ pub struct LancelotDataset {
41
41
 
42
42
  impl LancelotDataset {
43
43
  pub fn new(path: String) -> Result<Self, Error> {
44
+ let ruby = Ruby::get().unwrap();
44
45
  let runtime = Runtime::new()
45
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?;
46
-
46
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))?;
47
+
47
48
  Ok(Self {
48
49
  dataset: RefCell::new(None),
49
50
  runtime: RefCell::new(runtime),
@@ -56,15 +57,16 @@ impl LancelotDataset {
56
57
  }
57
58
 
58
59
  pub fn create(&self, schema_hash: RHash) -> Result<(), Error> {
59
- let schema = build_arrow_schema(schema_hash)?;
60
-
60
+ let ruby = Ruby::get().unwrap();
61
+ let schema = build_arrow_schema(&ruby, schema_hash)?;
62
+
61
63
  let empty_batch = RecordBatch::new_empty(Arc::new(schema.clone()));
62
64
  let batches = vec![empty_batch];
63
65
  let reader = RecordBatchIterator::new(
64
66
  batches.into_iter().map(Ok),
65
67
  Arc::new(schema)
66
68
  );
67
-
69
+
68
70
  let dataset = self.runtime.borrow_mut().block_on(async {
69
71
  Dataset::write(
70
72
  reader,
@@ -72,7 +74,7 @@ impl LancelotDataset {
72
74
  None,
73
75
  )
74
76
  .await
75
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))
77
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))
76
78
  })?;
77
79
 
78
80
  self.dataset.replace(Some(dataset));
@@ -80,10 +82,11 @@ impl LancelotDataset {
80
82
  }
81
83
 
82
84
  pub fn open(&self) -> Result<(), Error> {
85
+ let ruby = Ruby::get().unwrap();
83
86
  let dataset = self.runtime.borrow_mut().block_on(async {
84
87
  Dataset::open(&self.path)
85
88
  .await
86
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))
89
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))
87
90
  })?;
88
91
 
89
92
  self.dataset.replace(Some(dataset));
@@ -91,9 +94,10 @@ impl LancelotDataset {
91
94
  }
92
95
 
93
96
  pub fn add_data(&self, data: RArray) -> Result<(), Error> {
97
+ let ruby = Ruby::get().unwrap();
94
98
  let mut dataset = self.dataset.borrow_mut();
95
99
  let dataset = dataset.as_mut()
96
- .ok_or_else(|| Error::new(magnus::exception::runtime_error(), "Dataset not opened"))?;
100
+ .ok_or_else(|| Error::new(ruby.exception_runtime_error(), "Dataset not opened"))?;
97
101
 
98
102
  // Check if data is empty
99
103
  if data.len() == 0 {
@@ -104,74 +108,75 @@ impl LancelotDataset {
104
108
  let schema = self.runtime.borrow_mut().block_on(async {
105
109
  dataset.schema()
106
110
  });
107
-
111
+
108
112
  // Convert Lance schema to Arrow schema
109
113
  let arrow_schema = schema.into();
110
114
 
111
- let batch = build_record_batch(data, &arrow_schema)?;
115
+ let batch = build_record_batch(&ruby, data, &arrow_schema)?;
112
116
 
113
117
  let batches = vec![batch];
114
118
  let reader = RecordBatchIterator::new(
115
119
  batches.into_iter().map(Ok),
116
120
  Arc::new(arrow_schema)
117
121
  );
118
-
122
+
119
123
  self.runtime.borrow_mut().block_on(async move {
120
124
  dataset.append(reader, None)
121
125
  .await
122
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))
126
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))
123
127
  })?;
124
128
 
125
129
  Ok(())
126
130
  }
127
131
 
128
132
  pub fn count_rows(&self) -> Result<i64, Error> {
133
+ let ruby = Ruby::get().unwrap();
129
134
  let dataset = self.dataset.borrow();
130
135
  let dataset = dataset.as_ref()
131
- .ok_or_else(|| Error::new(magnus::exception::runtime_error(), "Dataset not opened"))?;
136
+ .ok_or_else(|| Error::new(ruby.exception_runtime_error(), "Dataset not opened"))?;
132
137
 
133
138
  let count = self.runtime.borrow_mut().block_on(async {
134
139
  dataset.count_rows(None)
135
140
  .await
136
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))
141
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))
137
142
  })?;
138
143
 
139
144
  Ok(count as i64)
140
145
  }
141
146
 
142
147
  pub fn schema(&self) -> Result<RHash, Error> {
148
+ let ruby = Ruby::get().unwrap();
143
149
  let dataset = self.dataset.borrow();
144
150
  let dataset = dataset.as_ref()
145
- .ok_or_else(|| Error::new(magnus::exception::runtime_error(), "Dataset not opened"))?;
151
+ .ok_or_else(|| Error::new(ruby.exception_runtime_error(), "Dataset not opened"))?;
146
152
 
147
153
  // Get the actual schema from the Lance dataset
148
154
  let schema = self.runtime.borrow_mut().block_on(async {
149
155
  dataset.schema()
150
156
  });
151
-
157
+
152
158
  // Convert Lance schema to Arrow schema
153
159
  let arrow_schema: arrow_schema::Schema = schema.into();
154
160
  let arrow_schema = Arc::new(arrow_schema);
155
161
 
156
- let ruby = Ruby::get().unwrap();
157
162
  let hash = ruby.hash_new();
158
-
163
+
159
164
  // Iterate over Arrow schema fields
160
165
  for field in arrow_schema.fields() {
161
- let field_name = Symbol::new(&field.name());
162
-
166
+ let field_name = ruby.to_symbol(field.name());
167
+
163
168
  // Handle vector columns specially
164
169
  if let DataType::FixedSizeList(inner_field, dimension) = field.data_type() {
165
170
  // Check if it's a vector (float list)
166
171
  if matches!(inner_field.data_type(), DataType::Float32 | DataType::Float16) {
167
172
  let vector_info = ruby.hash_new();
168
- vector_info.aset(Symbol::new("type"), "vector")?;
169
- vector_info.aset(Symbol::new("dimension"), *dimension)?;
173
+ vector_info.aset(ruby.to_symbol("type"), "vector")?;
174
+ vector_info.aset(ruby.to_symbol("dimension"), *dimension)?;
170
175
  hash.aset(field_name, vector_info)?;
171
176
  continue;
172
177
  }
173
178
  }
174
-
179
+
175
180
  let field_type = datatype_to_ruby_string(field.data_type());
176
181
  hash.aset(field_name, field_type)?;
177
182
  }
@@ -180,28 +185,28 @@ impl LancelotDataset {
180
185
  }
181
186
 
182
187
  pub fn scan_all(&self) -> Result<RArray, Error> {
188
+ let ruby = Ruby::get().unwrap();
183
189
  let dataset = self.dataset.borrow();
184
190
  let dataset = dataset.as_ref()
185
- .ok_or_else(|| Error::new(magnus::exception::runtime_error(), "Dataset not opened"))?;
191
+ .ok_or_else(|| Error::new(ruby.exception_runtime_error(), "Dataset not opened"))?;
186
192
 
187
193
  let batches: Vec<RecordBatch> = self.runtime.borrow_mut().block_on(async {
188
194
  let scanner = dataset.scan();
189
195
  let stream = scanner
190
196
  .try_into_stream()
191
197
  .await
192
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?;
193
-
198
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))?;
199
+
194
200
  stream
195
201
  .try_collect::<Vec<_>>()
196
202
  .await
197
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))
203
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))
198
204
  })?;
199
205
 
200
- let ruby = Ruby::get().unwrap();
201
206
  let result_array = ruby.ary_new();
202
207
 
203
208
  for batch in batches {
204
- let batch_docs = convert_batch_to_ruby(&batch)?;
209
+ let batch_docs = convert_batch_to_ruby(&ruby, &batch)?;
205
210
  // Merge arrays by pushing each element
206
211
  for i in 0..batch_docs.len() {
207
212
  result_array.push(batch_docs.entry::<Value>(i as isize)?)?;
@@ -212,31 +217,31 @@ impl LancelotDataset {
212
217
  }
213
218
 
214
219
  pub fn scan_limit(&self, limit: i64) -> Result<RArray, Error> {
220
+ let ruby = Ruby::get().unwrap();
215
221
  let dataset = self.dataset.borrow();
216
222
  let dataset = dataset.as_ref()
217
- .ok_or_else(|| Error::new(magnus::exception::runtime_error(), "Dataset not opened"))?;
223
+ .ok_or_else(|| Error::new(ruby.exception_runtime_error(), "Dataset not opened"))?;
218
224
 
219
225
  let batches: Vec<RecordBatch> = self.runtime.borrow_mut().block_on(async {
220
226
  let mut scanner = dataset.scan();
221
227
  scanner.limit(Some(limit), None)
222
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?;
223
-
228
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))?;
229
+
224
230
  let stream = scanner
225
231
  .try_into_stream()
226
232
  .await
227
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?;
228
-
233
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))?;
234
+
229
235
  stream
230
236
  .try_collect::<Vec<_>>()
231
237
  .await
232
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))
238
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))
233
239
  })?;
234
240
 
235
- let ruby = Ruby::get().unwrap();
236
241
  let result_array = ruby.ary_new();
237
242
 
238
243
  for batch in batches {
239
- let batch_docs = convert_batch_to_ruby(&batch)?;
244
+ let batch_docs = convert_batch_to_ruby(&ruby, &batch)?;
240
245
  // Merge arrays by pushing each element
241
246
  for i in 0..batch_docs.len() {
242
247
  result_array.push(batch_docs.entry::<Value>(i as isize)?)?;
@@ -247,25 +252,26 @@ impl LancelotDataset {
247
252
  }
248
253
 
249
254
  pub fn create_vector_index(&self, column: String) -> Result<(), Error> {
255
+ let ruby = Ruby::get().unwrap();
250
256
  let mut dataset = self.dataset.borrow_mut();
251
257
  let dataset = dataset.as_mut()
252
- .ok_or_else(|| Error::new(magnus::exception::runtime_error(), "Dataset not opened"))?;
258
+ .ok_or_else(|| Error::new(ruby.exception_runtime_error(), "Dataset not opened"))?;
253
259
 
254
260
  self.runtime.borrow_mut().block_on(async move {
255
261
  // Get row count to determine optimal number of partitions
256
262
  let num_rows = dataset.count_rows(None).await
257
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?;
258
-
263
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))?;
264
+
259
265
  // Use fewer partitions for small datasets
260
266
  let num_partitions = if num_rows < 256 {
261
267
  std::cmp::max(1, (num_rows / 4) as usize)
262
268
  } else {
263
269
  256
264
270
  };
265
-
271
+
266
272
  // Create IVF_FLAT vector index parameters
267
273
  let params = VectorIndexParams::ivf_flat(num_partitions, lance_linalg::distance::MetricType::L2);
268
-
274
+
269
275
  dataset.create_index(
270
276
  &[&column],
271
277
  IndexType::Vector,
@@ -274,44 +280,47 @@ impl LancelotDataset {
274
280
  true
275
281
  )
276
282
  .await
277
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))
283
+ .map(|_| ())
284
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))
278
285
  })
279
286
  }
280
287
 
281
288
  pub fn vector_search(&self, column: String, query_vector: RArray, limit: i64) -> Result<RArray, Error> {
289
+ let ruby = Ruby::get().unwrap();
282
290
  let dataset = self.dataset.borrow();
283
291
  let dataset = dataset.as_ref()
284
- .ok_or_else(|| Error::new(magnus::exception::runtime_error(), "Dataset not opened"))?;
285
-
286
- // Convert Ruby array to Vec<f32>
287
- let vector: Vec<f32> = query_vector
288
- .into_iter()
289
- .map(|v| f64::try_convert(v).map(|f| f as f32))
290
- .collect::<Result<Vec<_>, _>>()?;
292
+ .ok_or_else(|| Error::new(ruby.exception_runtime_error(), "Dataset not opened"))?;
293
+
294
+ // Convert Ruby array to Vec<f32> using index-based iteration
295
+ let len = query_vector.len();
296
+ let mut vector: Vec<f32> = Vec::with_capacity(len);
297
+ for i in 0..len {
298
+ let v: f64 = query_vector.entry(i as isize)?;
299
+ vector.push(v as f32);
300
+ }
291
301
 
292
302
  let batches: Vec<RecordBatch> = self.runtime.borrow_mut().block_on(async {
293
303
  let mut scanner = dataset.scan();
294
-
304
+
295
305
  // Use nearest for vector search
296
306
  scanner.nearest(&column, &Float32Array::from(vector), limit as usize)
297
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?;
298
-
307
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))?;
308
+
299
309
  let stream = scanner
300
310
  .try_into_stream()
301
311
  .await
302
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?;
303
-
312
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))?;
313
+
304
314
  stream
305
315
  .try_collect::<Vec<_>>()
306
316
  .await
307
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))
317
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))
308
318
  })?;
309
319
 
310
- let ruby = Ruby::get().unwrap();
311
320
  let result_array = ruby.ary_new();
312
321
 
313
322
  for batch in batches {
314
- let batch_docs = convert_batch_to_ruby(&batch)?;
323
+ let batch_docs = convert_batch_to_ruby(&ruby, &batch)?;
315
324
  // Merge arrays by pushing each element
316
325
  for i in 0..batch_docs.len() {
317
326
  result_array.push(batch_docs.entry::<Value>(i as isize)?)?;
@@ -322,14 +331,15 @@ impl LancelotDataset {
322
331
  }
323
332
 
324
333
  pub fn create_text_index(&self, column: String) -> Result<(), Error> {
334
+ let ruby = Ruby::get().unwrap();
325
335
  let mut dataset = self.dataset.borrow_mut();
326
336
  let dataset = dataset.as_mut()
327
- .ok_or_else(|| Error::new(magnus::exception::runtime_error(), "Dataset not opened"))?;
337
+ .ok_or_else(|| Error::new(ruby.exception_runtime_error(), "Dataset not opened"))?;
328
338
 
329
339
  self.runtime.borrow_mut().block_on(async move {
330
340
  // Create inverted index for full-text search
331
341
  let params = InvertedIndexParams::default();
332
-
342
+
333
343
  dataset.create_index(
334
344
  &[&column],
335
345
  IndexType::Inverted,
@@ -338,46 +348,47 @@ impl LancelotDataset {
338
348
  true
339
349
  )
340
350
  .await
341
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))
351
+ .map(|_| ())
352
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))
342
353
  })
343
354
  }
344
355
 
345
356
  pub fn text_search(&self, column: String, query: String, limit: i64) -> Result<RArray, Error> {
357
+ let ruby = Ruby::get().unwrap();
346
358
  let dataset = self.dataset.borrow();
347
359
  let dataset = dataset.as_ref()
348
- .ok_or_else(|| Error::new(magnus::exception::runtime_error(), "Dataset not opened"))?;
360
+ .ok_or_else(|| Error::new(ruby.exception_runtime_error(), "Dataset not opened"))?;
349
361
 
350
362
  let batches: Vec<RecordBatch> = self.runtime.borrow_mut().block_on(async {
351
363
  let mut scanner = dataset.scan();
352
-
364
+
353
365
  // Use full-text search with inverted index
354
366
  let fts_query = FullTextSearchQuery::new(query)
355
367
  .with_column(column)
356
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?;
357
-
368
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))?;
369
+
358
370
  scanner.full_text_search(fts_query)
359
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?;
360
-
371
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))?;
372
+
361
373
  // Apply limit
362
374
  scanner.limit(Some(limit), None)
363
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?;
364
-
375
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))?;
376
+
365
377
  let stream = scanner
366
378
  .try_into_stream()
367
379
  .await
368
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?;
369
-
380
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))?;
381
+
370
382
  stream
371
383
  .try_collect::<Vec<_>>()
372
384
  .await
373
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))
385
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))
374
386
  })?;
375
387
 
376
- let ruby = Ruby::get().unwrap();
377
388
  let result_array = ruby.ary_new();
378
389
 
379
390
  for batch in batches {
380
- let batch_docs = convert_batch_to_ruby(&batch)?;
391
+ let batch_docs = convert_batch_to_ruby(&ruby, &batch)?;
381
392
  // Merge arrays by pushing each element
382
393
  for i in 0..batch_docs.len() {
383
394
  result_array.push(batch_docs.entry::<Value>(i as isize)?)?;
@@ -388,47 +399,49 @@ impl LancelotDataset {
388
399
  }
389
400
 
390
401
  pub fn multi_column_text_search(&self, columns: RArray, query: String, limit: i64) -> Result<RArray, Error> {
402
+ let ruby = Ruby::get().unwrap();
391
403
  let dataset = self.dataset.borrow();
392
404
  let dataset = dataset.as_ref()
393
- .ok_or_else(|| Error::new(magnus::exception::runtime_error(), "Dataset not opened"))?;
394
-
395
- // Convert Ruby array of columns to Vec<String>
396
- let columns: Vec<String> = columns
397
- .into_iter()
398
- .map(|v| String::try_convert(v))
399
- .collect::<Result<Vec<_>, _>>()?;
405
+ .ok_or_else(|| Error::new(ruby.exception_runtime_error(), "Dataset not opened"))?;
406
+
407
+ // Convert Ruby array of columns to Vec<String> using index-based iteration
408
+ let len = columns.len();
409
+ let mut cols: Vec<String> = Vec::with_capacity(len);
410
+ for i in 0..len {
411
+ let v: String = columns.entry(i as isize)?;
412
+ cols.push(v);
413
+ }
400
414
 
401
415
  let batches: Vec<RecordBatch> = self.runtime.borrow_mut().block_on(async {
402
416
  let mut scanner = dataset.scan();
403
-
417
+
404
418
  // Create a full-text search query for multiple columns
405
419
  let fts_query = FullTextSearchQuery::new(query)
406
- .with_columns(&columns)
407
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?;
408
-
420
+ .with_columns(&cols)
421
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))?;
422
+
409
423
  scanner.full_text_search(fts_query)
410
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?;
411
-
424
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))?;
425
+
412
426
  // Apply limit
413
427
  scanner.limit(Some(limit), None)
414
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?;
415
-
428
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))?;
429
+
416
430
  let stream = scanner
417
431
  .try_into_stream()
418
432
  .await
419
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?;
420
-
433
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))?;
434
+
421
435
  stream
422
436
  .try_collect::<Vec<_>>()
423
437
  .await
424
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))
438
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))
425
439
  })?;
426
440
 
427
- let ruby = Ruby::get().unwrap();
428
441
  let result_array = ruby.ary_new();
429
442
 
430
443
  for batch in batches {
431
- let batch_docs = convert_batch_to_ruby(&batch)?;
444
+ let batch_docs = convert_batch_to_ruby(&ruby, &batch)?;
432
445
  // Merge arrays by pushing each element
433
446
  for i in 0..batch_docs.len() {
434
447
  result_array.push(batch_docs.entry::<Value>(i as isize)?)?;
@@ -439,39 +452,39 @@ impl LancelotDataset {
439
452
  }
440
453
 
441
454
  pub fn filter_scan(&self, filter_expr: String, limit: Option<i64>) -> Result<RArray, Error> {
455
+ let ruby = Ruby::get().unwrap();
442
456
  let dataset = self.dataset.borrow();
443
457
  let dataset = dataset.as_ref()
444
- .ok_or_else(|| Error::new(magnus::exception::runtime_error(), "Dataset not opened"))?;
458
+ .ok_or_else(|| Error::new(ruby.exception_runtime_error(), "Dataset not opened"))?;
445
459
 
446
460
  let batches: Vec<RecordBatch> = self.runtime.borrow_mut().block_on(async {
447
461
  let mut scanner = dataset.scan();
448
-
462
+
449
463
  // Apply SQL-like filter
450
464
  scanner.filter(&filter_expr)
451
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?;
452
-
465
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))?;
466
+
453
467
  // Apply limit if provided
454
468
  if let Some(lim) = limit {
455
469
  scanner.limit(Some(lim), None)
456
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?;
470
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))?;
457
471
  }
458
-
472
+
459
473
  let stream = scanner
460
474
  .try_into_stream()
461
475
  .await
462
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))?;
463
-
476
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))?;
477
+
464
478
  stream
465
479
  .try_collect::<Vec<_>>()
466
480
  .await
467
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e.to_string()))
481
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e.to_string()))
468
482
  })?;
469
483
 
470
- let ruby = Ruby::get().unwrap();
471
484
  let result_array = ruby.ary_new();
472
485
 
473
486
  for batch in batches {
474
- let batch_docs = convert_batch_to_ruby(&batch)?;
487
+ let batch_docs = convert_batch_to_ruby(&ruby, &batch)?;
475
488
  // Merge arrays by pushing each element
476
489
  for i in 0..batch_docs.len() {
477
490
  result_array.push(batch_docs.entry::<Value>(i as isize)?)?;
@@ -501,4 +514,4 @@ impl LancelotDataset {
501
514
  class.define_method("filter_scan", method!(LancelotDataset::filter_scan, 2))?;
502
515
  Ok(())
503
516
  }
504
- }
517
+ }
@@ -1,4 +1,4 @@
1
- use magnus::{define_module, Error, Ruby, Module};
1
+ use magnus::{Error, Ruby, Module};
2
2
 
3
3
  mod dataset;
4
4
  mod schema;
@@ -8,10 +8,10 @@ use dataset::LancelotDataset;
8
8
 
9
9
  #[magnus::init]
10
10
  fn init(ruby: &Ruby) -> Result<(), Error> {
11
- let module = define_module("Lancelot")?;
12
-
11
+ let module = ruby.define_module("Lancelot")?;
12
+
13
13
  let dataset_class = module.define_class("Dataset", ruby.class_object())?;
14
14
  LancelotDataset::bind(&dataset_class)?;
15
-
15
+
16
16
  Ok(())
17
- }
17
+ }
@@ -1,28 +1,26 @@
1
- use magnus::{Error, RHash, Symbol, Value, TryConvert, r_hash::ForEach, value::ReprValue};
1
+ use magnus::{Error, Ruby, RHash, Symbol, Value, TryConvert, r_hash::ForEach};
2
2
  use arrow_schema::{DataType, Field, Schema as ArrowSchema};
3
3
  use std::sync::Arc;
4
4
 
5
- pub fn build_arrow_schema(schema_hash: RHash) -> Result<ArrowSchema, Error> {
5
+ pub fn build_arrow_schema(ruby: &Ruby, schema_hash: RHash) -> Result<ArrowSchema, Error> {
6
6
  let mut fields = Vec::new();
7
7
 
8
8
  schema_hash.foreach(|key: Symbol, value: Value| {
9
9
  let field_name = key.name()?.to_string();
10
-
11
- let data_type = if value.is_kind_of(magnus::class::hash()) {
12
- let hash = RHash::from_value(value)
13
- .ok_or_else(|| Error::new(magnus::exception::arg_error(), "Invalid hash value"))?;
14
- let type_str: String = hash.fetch(Symbol::new("type"))?;
15
-
10
+
11
+ let data_type = if let Some(hash) = RHash::from_value(value) {
12
+ let type_str: String = hash.fetch(ruby.to_symbol("type"))?;
13
+
16
14
  match type_str.as_str() {
17
15
  "vector" => {
18
- let dimension: i32 = hash.fetch(Symbol::new("dimension"))?;
16
+ let dimension: i32 = hash.fetch(ruby.to_symbol("dimension"))?;
19
17
  DataType::FixedSizeList(
20
18
  Arc::new(Field::new("item", DataType::Float32, true)),
21
19
  dimension,
22
20
  )
23
21
  }
24
22
  _ => return Err(Error::new(
25
- magnus::exception::arg_error(),
23
+ ruby.exception_arg_error(),
26
24
  format!("Unknown field type: {}", type_str)
27
25
  ))
28
26
  }
@@ -36,7 +34,7 @@ pub fn build_arrow_schema(schema_hash: RHash) -> Result<ArrowSchema, Error> {
36
34
  "int64" => DataType::Int64,
37
35
  "boolean" => DataType::Boolean,
38
36
  _ => return Err(Error::new(
39
- magnus::exception::arg_error(),
37
+ ruby.exception_arg_error(),
40
38
  format!("Unknown field type: {}", type_str)
41
39
  ))
42
40
  }
@@ -47,4 +45,4 @@ pub fn build_arrow_schema(schema_hash: RHash) -> Result<ArrowSchema, Error> {
47
45
  })?;
48
46
 
49
47
  Ok(ArrowSchema::new(fields))
50
- }
48
+ }
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Lancelot
4
- VERSION = "0.3.4"
4
+ VERSION = "0.4.0"
5
5
  end
data/lib/lancelot.rb CHANGED
@@ -1,7 +1,20 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "lancelot/version"
4
- require_relative "lancelot/lancelot"
4
+
5
+ # Load the compiled Rust extension. Precompiled (platform) gems install it into a
6
+ # Ruby-ABI-versioned subdir (lib/lancelot/<major.minor>/lancelot.{so,bundle}) so a
7
+ # single fat gem can carry a binary per Ruby version; source/dev builds place it flat
8
+ # at lib/lancelot/lancelot.{so,bundle}. Try the versioned path first, fall back to the
9
+ # flat one. Resolution goes through $LOAD_PATH (`require`, never `require_relative`)
10
+ # because RubyGems installs native extensions outside the gem's lib/ dir.
11
+ begin
12
+ RUBY_VERSION =~ /(\d+\.\d+)/
13
+ require "lancelot/#{Regexp.last_match(1)}/lancelot"
14
+ rescue LoadError
15
+ require "lancelot/lancelot"
16
+ end
17
+
5
18
  require_relative "lancelot/dataset"
6
19
  require_relative "lancelot/rank_fusion"
7
20