faiss 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8ab397b95262fa87d28a4f08098de35f487b44e6240682ec8e98c4efc823c4df
4
- data.tar.gz: 719490d3f20bad4531f588e9469d3fa9ec2362cc18f800e913725ffb6b0aeca0
3
+ metadata.gz: 46b01025b31b0b052236ffb42c9d046de719f2c6ffbdff50299e51f339def739
4
+ data.tar.gz: 1d50d77f839ad49dd6827776dca2f7eeea25eb333ccac6d86077a4366a085065
5
5
  SHA512:
6
- metadata.gz: 56e16fad10a1ba95c9727f2e356470e984ca29b44406cf125b1ab529c83fee2c9bf15efa564a543cda331489dbdc56c9c44f4eda4ad53178979370987b6fbf9e
7
- data.tar.gz: 691e9a843eaac7e699dafc15d5505f6febf783ae6d474ee0409cb01d1b1f1575477db13cb7290a4d2cc639692503099fa7d4ff767d3c3034838d05ea0476521a
6
+ metadata.gz: ffd3ca8a5726dacc053da5c489f19212399c780fd41048793ebd9785716e934ac3e158a5a1fa09c11723fa12ab172d65166bb12216f245dd01864cef83b4608f
7
+ data.tar.gz: c485d9b425913d52f3a53c7e229dffa04fe1535de151b2abb5ac2e09a56d4ba4cfcfd8838846eab5b9a18c898115f56fa107640c210001b69ea8fd8a7eac8428
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 0.5.3 (2026-02-12)
2
+
3
+ - Added `IndexIDMap2` class
4
+ - Added `remove_ids` method
5
+ - Added `reconstruct_batch` and `reconstruct_n` methods
6
+ - Fixed error when searching frozen indexes
7
+ - Fixed memory leak with frozen exceptions
8
+
1
9
  ## 0.5.2 (2026-01-02)
2
10
 
3
11
  - Fixed error with Rice 4.8
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  MIT License
2
2
 
3
3
  Copyright (c) Facebook, Inc. and its affiliates.
4
- Copyright (c) 2020-2025 Andrew Kane
4
+ Copyright (c) 2020-2026 Andrew Kane
5
5
 
6
6
  Permission is hereby granted, free of charge, to any person obtaining a copy
7
7
  of this software and associated documentation files (the "Software"), to deal
data/ext/faiss/index.cpp CHANGED
@@ -1,4 +1,6 @@
1
+ #include <algorithm>
1
2
  #include <string>
3
+ #include <vector>
2
4
 
3
5
  #include <faiss/AutoTune.h>
4
6
  #include <faiss/Index.h>
@@ -108,7 +110,7 @@ void init_index(Rice::Module& m) {
108
110
  .define_method(
109
111
  "train",
110
112
  [](Rice::Object rb_self, numo::SFloat objects) {
111
- rb_check_frozen(rb_self.value());
113
+ check_frozen(rb_self);
112
114
 
113
115
  auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
114
116
  auto n = check_shape(objects, self.d);
@@ -117,7 +119,7 @@ void init_index(Rice::Module& m) {
117
119
  .define_method(
118
120
  "add",
119
121
  [](Rice::Object rb_self, numo::SFloat objects) {
120
- rb_check_frozen(rb_self.value());
122
+ check_frozen(rb_self);
121
123
 
122
124
  auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
123
125
  auto n = check_shape(objects, self.d);
@@ -126,7 +128,7 @@ void init_index(Rice::Module& m) {
126
128
  .define_method(
127
129
  "add_with_ids",
128
130
  [](Rice::Object rb_self, numo::SFloat objects, numo::Int64 ids) {
129
- rb_check_frozen(rb_self.value());
131
+ check_frozen(rb_self);
130
132
 
131
133
  auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
132
134
  auto n = check_shape(objects, self.d);
@@ -135,6 +137,19 @@ void init_index(Rice::Module& m) {
135
137
  }
136
138
  self.add_with_ids(n, objects.read_ptr(), ids.read_ptr());
137
139
  })
140
+ .define_method(
141
+ "remove_ids",
142
+ [](Rice::Object rb_self, numo::Int64 ids) {
143
+ check_frozen(rb_self);
144
+
145
+ auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
146
+ if (ids.ndim() != 1) {
147
+ throw Rice::Exception(rb_eArgError, "expected ids to be 1d array");
148
+ }
149
+ auto n = ids.shape()[0];
150
+ faiss::IDSelectorBatch sel(n, ids.read_ptr());
151
+ return self.remove_ids(sel);
152
+ })
138
153
  .define_method(
139
154
  "search",
140
155
  [](Rice::Object rb_self, numo::SFloat objects, size_t k) {
@@ -145,9 +160,18 @@ void init_index(Rice::Module& m) {
145
160
  auto labels = numo::Int64({n, k});
146
161
 
147
162
  if (rb_self.is_frozen()) {
163
+ // Don't mess with Ruby-owned memory while the GVL is released
164
+ auto objects_ptr = objects.read_ptr();
165
+ std::vector<float> objects_vec(objects_ptr, objects_ptr + n * self.d);
166
+ std::vector<float> distances_vec(n * k);
167
+ std::vector<int64_t> labels_vec(n * k);
168
+
148
169
  Rice::detail::no_gvl([&] {
149
- self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
170
+ self.search(n, objects_vec.data(), k, distances_vec.data(), labels_vec.data());
150
171
  });
172
+
173
+ std::copy(distances_vec.begin(), distances_vec.end(), distances.write_ptr());
174
+ std::copy(labels_vec.begin(), labels_vec.end(), labels.write_ptr());
151
175
  } else {
152
176
  self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
153
177
  }
@@ -160,7 +184,7 @@ void init_index(Rice::Module& m) {
160
184
  .define_method(
161
185
  "nprobe=",
162
186
  [](Rice::Object rb_self, double val) {
163
- rb_check_frozen(rb_self.value());
187
+ check_frozen(rb_self);
164
188
 
165
189
  auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
166
190
  faiss::ParameterSpace().set_index_parameter(&self, "nprobe", val);
@@ -173,6 +197,34 @@ void init_index(Rice::Module& m) {
173
197
  self.reconstruct(key, recons.write_ptr());
174
198
  return recons;
175
199
  })
200
+ .define_method(
201
+ "reconstruct_batch",
202
+ [](faiss::Index &self, numo::Int64 ids) {
203
+ if (ids.ndim() != 1) {
204
+ throw Rice::Exception(rb_eArgError, "expected ids to be 1d array");
205
+ }
206
+ auto n = static_cast<std::size_t>(ids.shape()[0]);
207
+ auto d = static_cast<std::size_t>(self.d);
208
+ auto recons = numo::SFloat({n, d});
209
+ self.reconstruct_batch(n, ids.read_ptr(), recons.write_ptr());
210
+ return recons;
211
+ })
212
+ .define_method(
213
+ "reconstruct_n",
214
+ [](faiss::Index &self, int64_t i0, int64_t ni) {
215
+ if (ni < 0) {
216
+ throw Rice::Exception(rb_eArgError, "expected n to be non-negative");
217
+ }
218
+ // second condition written this way to avoid overflow
219
+ if (i0 < 0 || i0 > self.ntotal - ni) {
220
+ throw Rice::Exception(rb_eIndexError, "index out of range");
221
+ }
222
+ auto d = static_cast<std::size_t>(self.d);
223
+ auto n = static_cast<std::size_t>(ni);
224
+ auto recons = numo::SFloat({n, d});
225
+ self.reconstruct_n(i0, ni, recons.write_ptr());
226
+ return recons;
227
+ })
176
228
  .define_method(
177
229
  "save",
178
230
  [](faiss::Index &self, Rice::String fname) {
@@ -227,4 +279,7 @@ void init_index(Rice::Module& m) {
227
279
 
228
280
  Rice::define_class_under<faiss::IndexIDMap, faiss::Index>(m, "IndexIDMap")
229
281
  .define_constructor(Rice::Constructor<faiss::IndexIDMap, faiss::Index*>());
282
+
283
+ Rice::define_class_under<faiss::IndexIDMap2, faiss::Index>(m, "IndexIDMap2")
284
+ .define_constructor(Rice::Constructor<faiss::IndexIDMap2, faiss::Index*>());
230
285
  }
@@ -1,3 +1,6 @@
1
+ #include <algorithm>
2
+ #include <vector>
3
+
1
4
  #include <faiss/IndexBinary.h>
2
5
  #include <faiss/IndexBinaryFlat.h>
3
6
  #include <faiss/IndexBinaryIVF.h>
@@ -28,7 +31,7 @@ void init_index_binary(Rice::Module& m) {
28
31
  .define_method(
29
32
  "train",
30
33
  [](Rice::Object rb_self, numo::UInt8 objects) {
31
- rb_check_frozen(rb_self.value());
34
+ check_frozen(rb_self);
32
35
 
33
36
  auto &self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
34
37
  auto n = check_shape(objects, self.d / 8);
@@ -37,12 +40,25 @@ void init_index_binary(Rice::Module& m) {
37
40
  .define_method(
38
41
  "add",
39
42
  [](Rice::Object rb_self, numo::UInt8 objects) {
40
- rb_check_frozen(rb_self.value());
43
+ check_frozen(rb_self);
41
44
 
42
45
  auto &self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
43
46
  auto n = check_shape(objects, self.d / 8);
44
47
  self.add(n, objects.read_ptr());
45
48
  })
49
+ .define_method(
50
+ "remove_ids",
51
+ [](Rice::Object rb_self, numo::Int64 ids) {
52
+ check_frozen(rb_self);
53
+
54
+ auto &self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
55
+ if (ids.ndim() != 1) {
56
+ throw Rice::Exception(rb_eArgError, "expected ids to be 1d array");
57
+ }
58
+ auto n = ids.shape()[0];
59
+ faiss::IDSelectorBatch sel(n, ids.read_ptr());
60
+ return self.remove_ids(sel);
61
+ })
46
62
  .define_method(
47
63
  "search",
48
64
  [](Rice::Object rb_self, numo::UInt8 objects, size_t k) {
@@ -53,9 +69,18 @@ void init_index_binary(Rice::Module& m) {
53
69
  auto labels = numo::Int64({n, k});
54
70
 
55
71
  if (rb_self.is_frozen()) {
72
+ // Don't mess with Ruby-owned memory while the GVL is released
73
+ auto objects_ptr = objects.read_ptr();
74
+ std::vector<uint8_t> objects_vec(objects_ptr, objects_ptr + n * (self.d / 8));
75
+ std::vector<int32_t> distances_vec(n * k);
76
+ std::vector<int64_t> labels_vec(n * k);
77
+
56
78
  Rice::detail::no_gvl([&] {
57
- self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
79
+ self.search(n, objects_vec.data(), k, distances_vec.data(), labels_vec.data());
58
80
  });
81
+
82
+ std::copy(distances_vec.begin(), distances_vec.end(), distances.write_ptr());
83
+ std::copy(labels_vec.begin(), labels_vec.end(), labels.write_ptr());
59
84
  } else {
60
85
  self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
61
86
  }
@@ -65,6 +90,30 @@ void init_index_binary(Rice::Module& m) {
65
90
  ret.push(std::move(labels), false);
66
91
  return ret;
67
92
  })
93
+ .define_method(
94
+ "reconstruct",
95
+ [](faiss::IndexBinary &self, int64_t key) {
96
+ auto d = static_cast<std::size_t>(self.d / 8);
97
+ auto recons = numo::UInt8({d});
98
+ self.reconstruct(key, recons.write_ptr());
99
+ return recons;
100
+ })
101
+ .define_method(
102
+ "reconstruct_n",
103
+ [](faiss::IndexBinary &self, int64_t i0, int64_t ni) {
104
+ if (ni < 0) {
105
+ throw Rice::Exception(rb_eArgError, "expected n to be non-negative");
106
+ }
107
+ // second condition written this way to avoid overflow
108
+ if (i0 < 0 || i0 > self.ntotal - ni) {
109
+ throw Rice::Exception(rb_eIndexError, "index out of range");
110
+ }
111
+ auto d = static_cast<std::size_t>(self.d / 8);
112
+ auto n = static_cast<std::size_t>(ni);
113
+ auto recons = numo::UInt8({n, d});
114
+ self.reconstruct_n(i0, ni, recons.write_ptr());
115
+ return recons;
116
+ })
68
117
  .define_method(
69
118
  "save",
70
119
  [](faiss::IndexBinary &self, Rice::String fname) {
data/ext/faiss/kmeans.cpp CHANGED
@@ -1,3 +1,5 @@
1
+ #include <algorithm>
2
+
1
3
  #include <faiss/Clustering.h>
2
4
  #include <faiss/IndexFlat.h>
3
5
  #include <rice/rice.hpp>
@@ -27,7 +29,7 @@ void init_kmeans(Rice::Module& m) {
27
29
  "centroids",
28
30
  [](faiss::Clustering &self) {
29
31
  auto centroids = numo::SFloat({self.k, self.d});
30
- memcpy(centroids.write_ptr(), self.centroids.data(), self.centroids.size() * sizeof(float));
32
+ std::copy(self.centroids.begin(), self.centroids.end(), centroids.write_ptr());
31
33
  return centroids;
32
34
  })
33
35
  .define_method(
data/ext/faiss/numo.hpp CHANGED
@@ -1,5 +1,5 @@
1
1
  /*!
2
- * Numo.hpp v0.3.0
2
+ * Numo.hpp v0.3.1
3
3
  * https://github.com/ankane/numo.hpp
4
4
  * BSD-2-Clause License
5
5
  */
@@ -38,7 +38,7 @@ public:
38
38
  }
39
39
 
40
40
  bool is_contiguous() const {
41
- return nary_check_contiguous(this->_value) == Qtrue;
41
+ return Rice::detail::protect(nary_check_contiguous, this->_value) == Qtrue;
42
42
  }
43
43
 
44
44
  operator Rice::Object() const {
@@ -46,26 +46,28 @@ public:
46
46
  }
47
47
 
48
48
  const void* read_ptr() {
49
- if (!is_contiguous()) {
50
- this->_value = nary_dup(this->_value);
51
- }
52
- return nary_get_pointer_for_read(this->_value) + nary_get_offset(this->_value);
49
+ return Rice::detail::protect([&]() {
50
+ if (!nary_check_contiguous(this->_value)) {
51
+ this->_value = nary_dup(this->_value);
52
+ }
53
+ return nary_get_pointer_for_read(this->_value) + nary_get_offset(this->_value);
54
+ });
53
55
  }
54
56
 
55
57
  void* write_ptr() {
56
- return nary_get_pointer_for_write(this->_value);
58
+ return Rice::detail::protect(nary_get_pointer_for_write, this->_value);
57
59
  }
58
60
 
59
61
  protected:
60
62
  NArray() { }
61
63
 
62
64
  void construct_value(VALUE dtype, VALUE v) {
63
- this->_value = rb_funcall(dtype, rb_intern("cast"), 1, v);
65
+ this->_value = Rice::detail::protect(rb_funcall, dtype, rb_intern("cast"), 1, v);
64
66
  }
65
67
 
66
68
  void construct_shape(VALUE dtype, std::initializer_list<size_t> shape) {
67
69
  // rb_narray_new doesn't modify shape, but not marked as const
68
- this->_value = rb_narray_new(dtype, shape.size(), const_cast<size_t*>(shape.begin()));
70
+ this->_value = Rice::detail::protect(rb_narray_new, dtype, shape.size(), const_cast<size_t*>(shape.begin()));
69
71
  }
70
72
 
71
73
  VALUE _value;
data/ext/faiss/utils.cpp CHANGED
@@ -14,3 +14,8 @@ size_t check_shape(const numo::NArray& objects, size_t k) {
14
14
  }
15
15
  return shape[0];
16
16
  }
17
+
18
+ void check_frozen(Rice::Object obj) {
19
+ // use protect to avoid skipping destructors and leaking memory
20
+ Rice::detail::protect(rb_check_frozen, obj.value());
21
+ }
data/ext/faiss/utils.h CHANGED
@@ -3,3 +3,5 @@
3
3
  #include "numo.hpp"
4
4
 
5
5
  size_t check_shape(const numo::NArray& objects, size_t k);
6
+
7
+ void check_frozen(Rice::Object obj);
data/lib/faiss/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Faiss
2
- VERSION = "0.5.2"
2
+ VERSION = "0.5.3"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: faiss
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.5.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane