zvec-ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,771 @@
1
+ #include <rice/rice.hpp>
2
+ #include <rice/stl.hpp>
3
+
4
+ #include <zvec/db/collection.h>
5
+ #include <zvec/db/config.h>
6
+ #include <zvec/db/doc.h>
7
+ #include <zvec/db/index_params.h>
8
+ #include <zvec/db/options.h>
9
+ #include <zvec/db/query_params.h>
10
+ #include <zvec/db/schema.h>
11
+ #include <zvec/db/stats.h>
12
+ #include <zvec/db/status.h>
13
+ #include <zvec/db/type.h>
14
+
15
+ #include <cstring>
16
+ #include <memory>
17
+ #include <string>
18
+ #include <vector>
19
+
20
+ using namespace Rice;
21
+
22
+ // ---------- helpers ----------
23
+
24
+ static void throw_if_error(const zvec::Status &status) {
25
+ if (status.ok()) return;
26
+ switch (status.code()) {
27
+ case zvec::StatusCode::NOT_FOUND:
28
+ throw Exception(rb_eKeyError, "%s", status.message().c_str());
29
+ case zvec::StatusCode::INVALID_ARGUMENT:
30
+ throw Exception(rb_eArgError, "%s", status.message().c_str());
31
+ default:
32
+ throw Exception(rb_eRuntimeError, "%s", status.message().c_str());
33
+ }
34
+ }
35
+
36
+ template <typename T>
37
+ static T unwrap(const zvec::Result<T> &result) {
38
+ if (result.has_value()) return result.value();
39
+ throw_if_error(result.error());
40
+ // unreachable
41
+ throw Exception(rb_eRuntimeError, "unexpected error");
42
+ }
43
+
44
+ // Convert Ruby Array of floats to a binary string (fp32 query vector)
45
+ static std::string floats_to_query_bytes(Rice::Array rb_arr) {
46
+ size_t n = rb_arr.size();
47
+ std::vector<float> buf(n);
48
+ for (size_t i = 0; i < n; ++i) {
49
+ buf[i] = Rice::detail::From_Ruby<float>().convert(rb_arr[i].value());
50
+ }
51
+ std::string s(reinterpret_cast<const char *>(buf.data()),
52
+ buf.size() * sizeof(float));
53
+ return s;
54
+ }
55
+
56
+ // ---------- Rice module ----------
57
+
58
+ extern "C" void Init_zvec_ext() {
59
+ Module rb_mZvec = define_module("Zvec");
60
+ Module rb_mExt = define_module_under(rb_mZvec, "Ext");
61
+
62
+ // ---- enums ----
63
+
64
+ define_enum_under<zvec::DataType>("DataType", rb_mExt)
65
+ .define_value("UNDEFINED", zvec::DataType::UNDEFINED)
66
+ .define_value("BINARY", zvec::DataType::BINARY)
67
+ .define_value("STRING", zvec::DataType::STRING)
68
+ .define_value("BOOL", zvec::DataType::BOOL)
69
+ .define_value("INT32", zvec::DataType::INT32)
70
+ .define_value("INT64", zvec::DataType::INT64)
71
+ .define_value("UINT32", zvec::DataType::UINT32)
72
+ .define_value("UINT64", zvec::DataType::UINT64)
73
+ .define_value("FLOAT", zvec::DataType::FLOAT)
74
+ .define_value("DOUBLE", zvec::DataType::DOUBLE)
75
+ .define_value("VECTOR_BINARY32", zvec::DataType::VECTOR_BINARY32)
76
+ .define_value("VECTOR_BINARY64", zvec::DataType::VECTOR_BINARY64)
77
+ .define_value("VECTOR_FP16", zvec::DataType::VECTOR_FP16)
78
+ .define_value("VECTOR_FP32", zvec::DataType::VECTOR_FP32)
79
+ .define_value("VECTOR_FP64", zvec::DataType::VECTOR_FP64)
80
+ .define_value("VECTOR_INT4", zvec::DataType::VECTOR_INT4)
81
+ .define_value("VECTOR_INT8", zvec::DataType::VECTOR_INT8)
82
+ .define_value("VECTOR_INT16", zvec::DataType::VECTOR_INT16)
83
+ .define_value("SPARSE_VECTOR_FP16", zvec::DataType::SPARSE_VECTOR_FP16)
84
+ .define_value("SPARSE_VECTOR_FP32", zvec::DataType::SPARSE_VECTOR_FP32)
85
+ .define_value("ARRAY_BINARY", zvec::DataType::ARRAY_BINARY)
86
+ .define_value("ARRAY_STRING", zvec::DataType::ARRAY_STRING)
87
+ .define_value("ARRAY_BOOL", zvec::DataType::ARRAY_BOOL)
88
+ .define_value("ARRAY_INT32", zvec::DataType::ARRAY_INT32)
89
+ .define_value("ARRAY_INT64", zvec::DataType::ARRAY_INT64)
90
+ .define_value("ARRAY_UINT32", zvec::DataType::ARRAY_UINT32)
91
+ .define_value("ARRAY_UINT64", zvec::DataType::ARRAY_UINT64)
92
+ .define_value("ARRAY_FLOAT", zvec::DataType::ARRAY_FLOAT)
93
+ .define_value("ARRAY_DOUBLE", zvec::DataType::ARRAY_DOUBLE);
94
+
95
+ define_enum_under<zvec::IndexType>("IndexType", rb_mExt)
96
+ .define_value("UNDEFINED", zvec::IndexType::UNDEFINED)
97
+ .define_value("HNSW", zvec::IndexType::HNSW)
98
+ .define_value("IVF", zvec::IndexType::IVF)
99
+ .define_value("FLAT", zvec::IndexType::FLAT)
100
+ .define_value("INVERT", zvec::IndexType::INVERT);
101
+
102
+ define_enum_under<zvec::MetricType>("MetricType", rb_mExt)
103
+ .define_value("UNDEFINED", zvec::MetricType::UNDEFINED)
104
+ .define_value("L2", zvec::MetricType::L2)
105
+ .define_value("IP", zvec::MetricType::IP)
106
+ .define_value("COSINE", zvec::MetricType::COSINE)
107
+ .define_value("MIPSL2", zvec::MetricType::MIPSL2);
108
+
109
+ define_enum_under<zvec::QuantizeType>("QuantizeType", rb_mExt)
110
+ .define_value("UNDEFINED", zvec::QuantizeType::UNDEFINED)
111
+ .define_value("FP16", zvec::QuantizeType::FP16)
112
+ .define_value("INT8", zvec::QuantizeType::INT8)
113
+ .define_value("INT4", zvec::QuantizeType::INT4);
114
+
115
+ // ---- Status ----
116
+
117
+ define_class_under<zvec::Status>(rb_mExt, "Status")
118
+ .define_method("ok?", &zvec::Status::ok)
119
+ .define_method("code",
120
+ [](const zvec::Status &s) {
121
+ return static_cast<uint32_t>(s.code());
122
+ })
123
+ .define_method("message", &zvec::Status::message)
124
+ .define_method("to_s", [](const zvec::Status &s) {
125
+ if (s.ok()) return std::string("OK");
126
+ return s.message();
127
+ });
128
+
129
+ // ---- CollectionOptions ----
130
+
131
+ define_class_under<zvec::CollectionOptions>(rb_mExt, "CollectionOptions")
132
+ .define_constructor(Constructor<zvec::CollectionOptions>())
133
+ .define_method("read_only?",
134
+ [](const zvec::CollectionOptions &o) {
135
+ return o.read_only_;
136
+ })
137
+ .define_method("read_only=",
138
+ [](zvec::CollectionOptions &o, bool v) {
139
+ o.read_only_ = v;
140
+ })
141
+ .define_method("enable_mmap?",
142
+ [](const zvec::CollectionOptions &o) {
143
+ return o.enable_mmap_;
144
+ })
145
+ .define_method("enable_mmap=",
146
+ [](zvec::CollectionOptions &o, bool v) {
147
+ o.enable_mmap_ = v;
148
+ })
149
+ .define_method(
150
+ "max_buffer_size",
151
+ [](const zvec::CollectionOptions &o) { return o.max_buffer_size_; })
152
+ .define_method("max_buffer_size=",
153
+ [](zvec::CollectionOptions &o, uint32_t v) {
154
+ o.max_buffer_size_ = v;
155
+ });
156
+
157
+ // ---- CollectionStats ----
158
+
159
+ define_class_under<zvec::CollectionStats>(rb_mExt, "CollectionStats")
160
+ .define_method("doc_count",
161
+ [](const zvec::CollectionStats &s) {
162
+ return s.doc_count;
163
+ })
164
+ .define_method("index_completeness",
165
+ [](const zvec::CollectionStats &s) {
166
+ Hash h;
167
+ for (auto &[k, v] : s.index_completeness) {
168
+ h[String(k)] = v;
169
+ }
170
+ return h;
171
+ })
172
+ .define_method("to_s", &zvec::CollectionStats::to_string);
173
+
174
+ // ---- IndexParams ----
175
+
176
+ define_class_under<zvec::IndexParams>(rb_mExt, "IndexParams")
177
+ .define_method(
178
+ "type",
179
+ [](const zvec::IndexParams &p) { return p.type(); })
180
+ .define_method("to_s", &zvec::IndexParams::to_string);
181
+
182
+ define_class_under<zvec::VectorIndexParams, zvec::IndexParams>(
183
+ rb_mExt, "VectorIndexParams")
184
+ .define_method("metric_type", &zvec::VectorIndexParams::metric_type)
185
+ .define_method("quantize_type", &zvec::VectorIndexParams::quantize_type);
186
+
187
+ define_class_under<zvec::HnswIndexParams, zvec::VectorIndexParams>(
188
+ rb_mExt, "HnswIndexParams")
189
+ .define_constructor(
190
+ Constructor<zvec::HnswIndexParams, zvec::MetricType, int, int,
191
+ zvec::QuantizeType>(),
192
+ Arg("metric_type"), Arg("m") = 16, Arg("ef_construction") = 200,
193
+ Arg("quantize_type") = zvec::QuantizeType::UNDEFINED)
194
+ .define_method("m", &zvec::HnswIndexParams::m)
195
+ .define_method("ef_construction",
196
+ &zvec::HnswIndexParams::ef_construction)
197
+ .define_method("metric_type", &zvec::HnswIndexParams::metric_type);
198
+
199
+ define_class_under<zvec::FlatIndexParams, zvec::VectorIndexParams>(
200
+ rb_mExt, "FlatIndexParams")
201
+ .define_constructor(
202
+ Constructor<zvec::FlatIndexParams, zvec::MetricType,
203
+ zvec::QuantizeType>(),
204
+ Arg("metric_type"),
205
+ Arg("quantize_type") = zvec::QuantizeType::UNDEFINED)
206
+ .define_method("metric_type", &zvec::FlatIndexParams::metric_type);
207
+
208
+ define_class_under<zvec::IVFIndexParams, zvec::VectorIndexParams>(
209
+ rb_mExt, "IVFIndexParams")
210
+ .define_constructor(
211
+ Constructor<zvec::IVFIndexParams, zvec::MetricType, int, int, bool,
212
+ zvec::QuantizeType>(),
213
+ Arg("metric_type"), Arg("n_list") = 1024, Arg("n_iters") = 10,
214
+ Arg("use_soar") = false,
215
+ Arg("quantize_type") = zvec::QuantizeType::UNDEFINED)
216
+ .define_method("n_list", &zvec::IVFIndexParams::n_list)
217
+ .define_method("n_iters", &zvec::IVFIndexParams::n_iters)
218
+ .define_method("metric_type", &zvec::IVFIndexParams::metric_type);
219
+
220
+ define_class_under<zvec::InvertIndexParams, zvec::IndexParams>(
221
+ rb_mExt, "InvertIndexParams")
222
+ .define_constructor(
223
+ Constructor<zvec::InvertIndexParams, bool, bool>(),
224
+ Arg("enable_range_optimization") = true,
225
+ Arg("enable_extended_wildcard") = false);
226
+
227
+ // ---- QueryParams ----
228
+
229
+ define_class_under<zvec::QueryParams>(rb_mExt, "QueryParams")
230
+ .define_method("type",
231
+ [](const zvec::QueryParams &p) { return p.type(); });
232
+
233
+ define_class_under<zvec::HnswQueryParams, zvec::QueryParams>(
234
+ rb_mExt, "HnswQueryParams")
235
+ .define_constructor(Constructor<zvec::HnswQueryParams, int>(),
236
+ Arg("ef") = 200)
237
+ .define_method("ef", &zvec::HnswQueryParams::ef);
238
+
239
+ define_class_under<zvec::IVFQueryParams, zvec::QueryParams>(
240
+ rb_mExt, "IVFQueryParams")
241
+ .define_constructor(Constructor<zvec::IVFQueryParams, int>(),
242
+ Arg("nprobe") = 10)
243
+ .define_method("nprobe", &zvec::IVFQueryParams::nprobe);
244
+
245
+ define_class_under<zvec::FlatQueryParams, zvec::QueryParams>(
246
+ rb_mExt, "FlatQueryParams")
247
+ .define_constructor(Constructor<zvec::FlatQueryParams>());
248
+
249
+ // ---- FieldSchema ----
250
+
251
+ define_class_under<zvec::FieldSchema>(rb_mExt, "FieldSchema")
252
+ .define_constructor(
253
+ Constructor<zvec::FieldSchema, const std::string &, zvec::DataType>(),
254
+ Arg("name"), Arg("data_type"))
255
+ .define_method("name", &zvec::FieldSchema::name)
256
+ .define_method("data_type", &zvec::FieldSchema::data_type)
257
+ .define_method("dimension", &zvec::FieldSchema::dimension)
258
+ .define_method("dimension=", &zvec::FieldSchema::set_dimension)
259
+ .define_method("nullable?", &zvec::FieldSchema::nullable)
260
+ .define_method("nullable=", &zvec::FieldSchema::set_nullable)
261
+ .define_method("vector_field?",
262
+ [](const zvec::FieldSchema &f) {
263
+ return f.is_vector_field();
264
+ })
265
+ .define_method("index_type", &zvec::FieldSchema::index_type)
266
+ .define_method("set_index_params",
267
+ [](zvec::FieldSchema &f,
268
+ const zvec::IndexParams &params) {
269
+ auto ptr = params.clone();
270
+ f.set_index_params(ptr);
271
+ })
272
+ .define_method("to_s", &zvec::FieldSchema::to_string);
273
+
274
+ // ---- CollectionSchema ----
275
+
276
+ define_class_under<zvec::CollectionSchema>(rb_mExt, "CollectionSchema")
277
+ .define_constructor(Constructor<zvec::CollectionSchema, const std::string &>(),
278
+ Arg("name"))
279
+ .define_method("name", &zvec::CollectionSchema::name)
280
+ .define_method("add_field",
281
+ [](zvec::CollectionSchema &s,
282
+ const zvec::FieldSchema &field) {
283
+ auto ptr = std::make_shared<zvec::FieldSchema>(field);
284
+ auto status = s.add_field(ptr);
285
+ throw_if_error(status);
286
+ })
287
+ .define_method("has_field?", &zvec::CollectionSchema::has_field)
288
+ .define_method("fields",
289
+ [](const zvec::CollectionSchema &s) {
290
+ auto fields = s.fields();
291
+ Array arr;
292
+ for (auto &ptr : fields) arr.push(*ptr);
293
+ return arr;
294
+ })
295
+ .define_method("field_names",
296
+ [](const zvec::CollectionSchema &s) {
297
+ auto names = s.all_field_names();
298
+ Array arr;
299
+ for (auto &n : names) arr.push(Rice::Object(String(n)));
300
+ return arr;
301
+ })
302
+ .define_method("vector_fields",
303
+ [](const zvec::CollectionSchema &s) {
304
+ auto fields = s.vector_fields();
305
+ Array arr;
306
+ for (auto &ptr : fields) arr.push(*ptr);
307
+ return arr;
308
+ })
309
+ .define_method("forward_fields",
310
+ [](const zvec::CollectionSchema &s) {
311
+ auto fields = s.forward_fields();
312
+ Array arr;
313
+ for (auto &ptr : fields) arr.push(*ptr);
314
+ return arr;
315
+ })
316
+ .define_method("to_s", &zvec::CollectionSchema::to_string);
317
+
318
+ // ---- CreateIndexOptions, OptimizeOptions ----
319
+
320
+ define_class_under<zvec::CreateIndexOptions>(rb_mExt, "CreateIndexOptions")
321
+ .define_constructor(Constructor<zvec::CreateIndexOptions>())
322
+ .define_method(
323
+ "concurrency",
324
+ [](const zvec::CreateIndexOptions &o) { return o.concurrency_; })
325
+ .define_method("concurrency=",
326
+ [](zvec::CreateIndexOptions &o, int v) {
327
+ o.concurrency_ = v;
328
+ });
329
+
330
+ define_class_under<zvec::OptimizeOptions>(rb_mExt, "OptimizeOptions")
331
+ .define_constructor(Constructor<zvec::OptimizeOptions>())
332
+ .define_method(
333
+ "concurrency",
334
+ [](const zvec::OptimizeOptions &o) { return o.concurrency_; })
335
+ .define_method("concurrency=", [](zvec::OptimizeOptions &o, int v) {
336
+ o.concurrency_ = v;
337
+ });
338
+
339
+ // ---- Doc ----
340
+
341
+ define_class_under<zvec::Doc>(rb_mExt, "Doc")
342
+ .define_constructor(Constructor<zvec::Doc>())
343
+ .define_method("pk", &zvec::Doc::pk)
344
+ .define_method("pk=", &zvec::Doc::set_pk)
345
+ .define_method("score", &zvec::Doc::score)
346
+ .define_method("score=", &zvec::Doc::set_score)
347
+ .define_method("field_names",
348
+ [](const zvec::Doc &d) {
349
+ auto names = d.field_names();
350
+ Array arr;
351
+ for (auto &n : names) arr.push(Rice::Object(String(n)));
352
+ return arr;
353
+ })
354
+ .define_method("has?", &zvec::Doc::has)
355
+ .define_method("has_value?", &zvec::Doc::has_value)
356
+ .define_method("empty?", &zvec::Doc::is_empty)
357
+ .define_method("set_null", &zvec::Doc::set_null)
358
+ .define_method("to_s", &zvec::Doc::to_string)
359
+
360
+ // Typed setters
361
+ .define_method("set_string",
362
+ [](zvec::Doc &d, const std::string &f,
363
+ const std::string &v) { return d.set(f, v); })
364
+ .define_method("set_bool",
365
+ [](zvec::Doc &d, const std::string &f, bool v) {
366
+ return d.set(f, v);
367
+ })
368
+ .define_method("set_int32",
369
+ [](zvec::Doc &d, const std::string &f, int32_t v) {
370
+ return d.set(f, v);
371
+ })
372
+ .define_method("set_int64",
373
+ [](zvec::Doc &d, const std::string &f, int64_t v) {
374
+ return d.set(f, v);
375
+ })
376
+ .define_method("set_uint32",
377
+ [](zvec::Doc &d, const std::string &f, uint32_t v) {
378
+ return d.set(f, v);
379
+ })
380
+ .define_method("set_uint64",
381
+ [](zvec::Doc &d, const std::string &f, uint64_t v) {
382
+ return d.set(f, v);
383
+ })
384
+ .define_method("set_float",
385
+ [](zvec::Doc &d, const std::string &f, float v) {
386
+ return d.set(f, v);
387
+ })
388
+ .define_method("set_double",
389
+ [](zvec::Doc &d, const std::string &f, double v) {
390
+ return d.set(f, v);
391
+ })
392
+ .define_method("set_float_vector",
393
+ [](zvec::Doc &d, const std::string &f,
394
+ std::vector<float> v) { return d.set(f, std::move(v)); })
395
+ .define_method("set_double_vector",
396
+ [](zvec::Doc &d, const std::string &f,
397
+ std::vector<double> v) {
398
+ return d.set(f, std::move(v));
399
+ })
400
+ .define_method("set_string_array",
401
+ [](zvec::Doc &d, const std::string &f,
402
+ std::vector<std::string> v) {
403
+ return d.set(f, std::move(v));
404
+ })
405
+
406
+ // Typed getters
407
+ .define_method("get_string",
408
+ [](const zvec::Doc &d, const std::string &f)
409
+ -> Rice::Object {
410
+ auto v = d.get<std::string>(f);
411
+ if (!v) return Rice::Object(Qnil);
412
+ return Rice::Object(String(v.value()));
413
+ })
414
+ .define_method("get_bool",
415
+ [](const zvec::Doc &d,
416
+ const std::string &f) -> Rice::Object {
417
+ auto v = d.get<bool>(f);
418
+ if (!v) return Rice::Object(Qnil);
419
+ return Rice::Object(v.value() ? Qtrue : Qfalse);
420
+ })
421
+ .define_method("get_int32",
422
+ [](const zvec::Doc &d,
423
+ const std::string &f) -> Rice::Object {
424
+ auto v = d.get<int32_t>(f);
425
+ if (!v) return Rice::Object(Qnil);
426
+ return Rice::Object(INT2NUM(v.value()));
427
+ })
428
+ .define_method("get_int64",
429
+ [](const zvec::Doc &d,
430
+ const std::string &f) -> Rice::Object {
431
+ auto v = d.get<int64_t>(f);
432
+ if (!v) return Rice::Object(Qnil);
433
+ return Rice::Object(LONG2NUM(v.value()));
434
+ })
435
+ .define_method("get_float",
436
+ [](const zvec::Doc &d,
437
+ const std::string &f) -> Rice::Object {
438
+ auto v = d.get<float>(f);
439
+ if (!v) return Rice::Object(Qnil);
440
+ return Rice::Object(rb_float_new(v.value()));
441
+ })
442
+ .define_method("get_double",
443
+ [](const zvec::Doc &d,
444
+ const std::string &f) -> Rice::Object {
445
+ auto v = d.get<double>(f);
446
+ if (!v) return Rice::Object(Qnil);
447
+ return Rice::Object(rb_float_new(v.value()));
448
+ })
449
+ .define_method("get_float_vector",
450
+ [](const zvec::Doc &d,
451
+ const std::string &f) -> Rice::Object {
452
+ auto v = d.get<std::vector<float>>(f);
453
+ if (!v) return Rice::Object(Qnil);
454
+ Array arr;
455
+ for (float x : v.value()) arr.push(Rice::Object(rb_float_new(x)));
456
+ return arr;
457
+ })
458
+ .define_method("get_double_vector",
459
+ [](const zvec::Doc &d,
460
+ const std::string &f) -> Rice::Object {
461
+ auto v = d.get<std::vector<double>>(f);
462
+ if (!v) return Rice::Object(Qnil);
463
+ Array arr;
464
+ for (double x : v.value()) arr.push(Rice::Object(rb_float_new(x)));
465
+ return arr;
466
+ })
467
+ .define_method("get_string_array",
468
+ [](const zvec::Doc &d,
469
+ const std::string &f) -> Rice::Object {
470
+ auto v = d.get<std::vector<std::string>>(f);
471
+ if (!v) return Rice::Object(Qnil);
472
+ Array arr;
473
+ for (auto &s : v.value()) arr.push(Rice::Object(String(s)));
474
+ return arr;
475
+ });
476
+
477
+ // ---- VectorQuery ----
478
+
479
+ define_class_under<zvec::VectorQuery>(rb_mExt, "VectorQuery")
480
+ .define_constructor(Constructor<zvec::VectorQuery>())
481
+ .define_method("topk",
482
+ [](const zvec::VectorQuery &q) { return q.topk_; })
483
+ .define_method("topk=",
484
+ [](zvec::VectorQuery &q, int v) { q.topk_ = v; })
485
+ .define_method("field_name",
486
+ [](const zvec::VectorQuery &q) { return q.field_name_; })
487
+ .define_method("field_name=",
488
+ [](zvec::VectorQuery &q, const std::string &v) {
489
+ q.field_name_ = v;
490
+ })
491
+ .define_method("filter",
492
+ [](const zvec::VectorQuery &q) { return q.filter_; })
493
+ .define_method("filter=",
494
+ [](zvec::VectorQuery &q, const std::string &v) {
495
+ q.filter_ = v;
496
+ })
497
+ .define_method("include_vector?",
498
+ [](const zvec::VectorQuery &q) {
499
+ return q.include_vector_;
500
+ })
501
+ .define_method("include_vector=",
502
+ [](zvec::VectorQuery &q, bool v) {
503
+ q.include_vector_ = v;
504
+ })
505
+ .define_method("set_query_vector",
506
+ [](zvec::VectorQuery &q, Rice::Array floats) {
507
+ q.query_vector_ = floats_to_query_bytes(floats);
508
+ })
509
+ .define_method("set_output_fields",
510
+ [](zvec::VectorQuery &q, std::vector<std::string> fields) {
511
+ q.output_fields_ = std::move(fields);
512
+ })
513
+ .define_method("set_hnsw_query_params",
514
+ [](zvec::VectorQuery &q,
515
+ const zvec::HnswQueryParams &params) {
516
+ q.query_params_ = std::make_shared<zvec::HnswQueryParams>(params);
517
+ })
518
+ .define_method("set_ivf_query_params",
519
+ [](zvec::VectorQuery &q,
520
+ const zvec::IVFQueryParams &params) {
521
+ q.query_params_ = std::make_shared<zvec::IVFQueryParams>(params);
522
+ })
523
+ .define_method("set_flat_query_params",
524
+ [](zvec::VectorQuery &q,
525
+ const zvec::FlatQueryParams &params) {
526
+ q.query_params_ = std::make_shared<zvec::FlatQueryParams>(params);
527
+ });
528
+
529
+ // ---- Collection ----
530
+
531
+ define_class_under<zvec::Collection>(rb_mExt, "Collection")
532
+ // Factory methods
533
+ .define_singleton_function(
534
+ "create_and_open",
535
+ [](const std::string &path, const zvec::CollectionSchema &schema,
536
+ const zvec::CollectionOptions &options) {
537
+ return unwrap(
538
+ zvec::Collection::CreateAndOpen(path, schema, options));
539
+ })
540
+ .define_singleton_function(
541
+ "open",
542
+ [](const std::string &path,
543
+ const zvec::CollectionOptions &options) {
544
+ return unwrap(zvec::Collection::Open(path, options));
545
+ })
546
+
547
+ // Properties
548
+ .define_method("path",
549
+ [](const zvec::Collection &c) {
550
+ return unwrap(c.Path());
551
+ })
552
+ .define_method("schema",
553
+ [](const zvec::Collection &c) {
554
+ return unwrap(c.Schema());
555
+ })
556
+ .define_method("options",
557
+ [](const zvec::Collection &c) {
558
+ return unwrap(c.Options());
559
+ })
560
+ .define_method("stats",
561
+ [](const zvec::Collection &c) {
562
+ return unwrap(c.Stats());
563
+ })
564
+
565
+ // DDL
566
+ .define_method("destroy",
567
+ [](zvec::Collection &c) {
568
+ throw_if_error(c.Destroy());
569
+ })
570
+ .define_method("flush",
571
+ [](zvec::Collection &c) { throw_if_error(c.Flush()); })
572
+ .define_method("create_index",
573
+ [](zvec::Collection &c, const std::string &col,
574
+ const zvec::IndexParams &params) {
575
+ auto ptr = params.clone();
576
+ throw_if_error(c.CreateIndex(col, ptr));
577
+ })
578
+ .define_method("drop_index",
579
+ [](zvec::Collection &c, const std::string &col) {
580
+ throw_if_error(c.DropIndex(col));
581
+ })
582
+ .define_method("optimize",
583
+ [](zvec::Collection &c) {
584
+ throw_if_error(c.Optimize());
585
+ })
586
+
587
+ // DML — returns array of [ok, message] pairs
588
+ .define_method("insert",
589
+ [](zvec::Collection &c, std::vector<zvec::Doc> docs) {
590
+ auto statuses = unwrap(c.Insert(docs));
591
+ Array arr;
592
+ for (auto &s : statuses) {
593
+ Array pair;
594
+ pair.push(Rice::Object(s.ok() ? Qtrue : Qfalse));
595
+ pair.push(Rice::Object(String(s.message())));
596
+ arr.push(pair);
597
+ }
598
+ return arr;
599
+ })
600
+ .define_method("upsert",
601
+ [](zvec::Collection &c, std::vector<zvec::Doc> docs) {
602
+ auto statuses = unwrap(c.Upsert(docs));
603
+ Array arr;
604
+ for (auto &s : statuses) {
605
+ Array pair;
606
+ pair.push(Rice::Object(s.ok() ? Qtrue : Qfalse));
607
+ pair.push(Rice::Object(String(s.message())));
608
+ arr.push(pair);
609
+ }
610
+ return arr;
611
+ })
612
+ .define_method("update",
613
+ [](zvec::Collection &c, std::vector<zvec::Doc> docs) {
614
+ auto statuses = unwrap(c.Update(docs));
615
+ Array arr;
616
+ for (auto &s : statuses) {
617
+ Array pair;
618
+ pair.push(Rice::Object(s.ok() ? Qtrue : Qfalse));
619
+ pair.push(Rice::Object(String(s.message())));
620
+ arr.push(pair);
621
+ }
622
+ return arr;
623
+ })
624
+ .define_method("delete_pks",
625
+ [](zvec::Collection &c,
626
+ std::vector<std::string> pks) {
627
+ auto statuses = unwrap(c.Delete(pks));
628
+ Array arr;
629
+ for (auto &s : statuses) {
630
+ Array pair;
631
+ pair.push(Rice::Object(s.ok() ? Qtrue : Qfalse));
632
+ pair.push(Rice::Object(String(s.message())));
633
+ arr.push(pair);
634
+ }
635
+ return arr;
636
+ })
637
+ .define_method("delete_by_filter",
638
+ [](zvec::Collection &c, const std::string &filter) {
639
+ throw_if_error(c.DeleteByFilter(filter));
640
+ })
641
+
642
+ // DQL — query returns Ruby Array of ext Doc objects
643
+ .define_method("query",
644
+ [](zvec::Collection &c,
645
+ const zvec::VectorQuery &q) {
646
+ auto docs = unwrap(c.Query(q));
647
+ // Store result docs in a static-lifetime vector to keep
648
+ // shared_ptrs alive while Ruby holds references
649
+ Array arr;
650
+ for (size_t i = 0; i < docs.size(); ++i) {
651
+ // Build a hash with pk, score, and all fields
652
+ Hash h;
653
+ auto &doc = *docs[i];
654
+ h[Rice::Object(String("pk"))] =
655
+ Rice::Object(String(doc.pk()));
656
+ h[Rice::Object(String("score"))] =
657
+ Rice::Object(rb_float_new(doc.score()));
658
+ h[Rice::Object(String("doc_id"))] =
659
+ Rice::Object(ULONG2NUM(doc.doc_id()));
660
+ for (auto &fname : doc.field_names()) {
661
+ // Try each type for the field
662
+ auto sv = doc.get<std::string>(fname);
663
+ if (sv) {
664
+ h[Rice::Object(String(fname))] =
665
+ Rice::Object(String(sv.value()));
666
+ continue;
667
+ }
668
+ auto bv = doc.get<bool>(fname);
669
+ if (bv) {
670
+ h[Rice::Object(String(fname))] =
671
+ Rice::Object(bv.value() ? Qtrue : Qfalse);
672
+ continue;
673
+ }
674
+ auto iv = doc.get<int32_t>(fname);
675
+ if (iv) {
676
+ h[Rice::Object(String(fname))] =
677
+ Rice::Object(INT2NUM(iv.value()));
678
+ continue;
679
+ }
680
+ auto lv = doc.get<int64_t>(fname);
681
+ if (lv) {
682
+ h[Rice::Object(String(fname))] =
683
+ Rice::Object(LONG2NUM(lv.value()));
684
+ continue;
685
+ }
686
+ auto fv = doc.get<float>(fname);
687
+ if (fv) {
688
+ h[Rice::Object(String(fname))] =
689
+ Rice::Object(rb_float_new(fv.value()));
690
+ continue;
691
+ }
692
+ auto dv = doc.get<double>(fname);
693
+ if (dv) {
694
+ h[Rice::Object(String(fname))] =
695
+ Rice::Object(rb_float_new(dv.value()));
696
+ continue;
697
+ }
698
+ auto vfv = doc.get<std::vector<float>>(fname);
699
+ if (vfv) {
700
+ Array va;
701
+ for (float x : vfv.value())
702
+ va.push(Rice::Object(rb_float_new(x)));
703
+ h[Rice::Object(String(fname))] = va;
704
+ continue;
705
+ }
706
+ }
707
+ arr.push(Rice::Object(h));
708
+ }
709
+ return arr;
710
+ })
711
+ // Fetch — returns Hash of pk => ext Doc
712
+ .define_method("fetch",
713
+ [](const zvec::Collection &c,
714
+ std::vector<std::string> pks) {
715
+ auto result = unwrap(c.Fetch(pks));
716
+ // Same issue with Doc push, return as hash of hashes
717
+ Hash outer;
718
+ for (auto &[k, ptr] : result) {
719
+ Hash h;
720
+ auto &doc = *ptr;
721
+ for (auto &fname : doc.field_names()) {
722
+ auto sv = doc.get<std::string>(fname);
723
+ if (sv) {
724
+ h[Rice::Object(String(fname))] =
725
+ Rice::Object(String(sv.value()));
726
+ continue;
727
+ }
728
+ auto bv = doc.get<bool>(fname);
729
+ if (bv) {
730
+ h[Rice::Object(String(fname))] =
731
+ Rice::Object(bv.value() ? Qtrue : Qfalse);
732
+ continue;
733
+ }
734
+ auto iv = doc.get<int32_t>(fname);
735
+ if (iv) {
736
+ h[Rice::Object(String(fname))] =
737
+ Rice::Object(INT2NUM(iv.value()));
738
+ continue;
739
+ }
740
+ auto lv = doc.get<int64_t>(fname);
741
+ if (lv) {
742
+ h[Rice::Object(String(fname))] =
743
+ Rice::Object(LONG2NUM(lv.value()));
744
+ continue;
745
+ }
746
+ auto fv = doc.get<float>(fname);
747
+ if (fv) {
748
+ h[Rice::Object(String(fname))] =
749
+ Rice::Object(rb_float_new(fv.value()));
750
+ continue;
751
+ }
752
+ auto dv = doc.get<double>(fname);
753
+ if (dv) {
754
+ h[Rice::Object(String(fname))] =
755
+ Rice::Object(rb_float_new(dv.value()));
756
+ continue;
757
+ }
758
+ auto vfv = doc.get<std::vector<float>>(fname);
759
+ if (vfv) {
760
+ Array va;
761
+ for (float x : vfv.value())
762
+ va.push(Rice::Object(rb_float_new(x)));
763
+ h[Rice::Object(String(fname))] = va;
764
+ continue;
765
+ }
766
+ }
767
+ outer[Rice::Object(String(k))] = h;
768
+ }
769
+ return outer;
770
+ });
771
+ }