faiss 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +1 -1
- data/ext/faiss/index.cpp +60 -5
- data/ext/faiss/index_binary.cpp +52 -3
- data/ext/faiss/kmeans.cpp +3 -1
- data/ext/faiss/numo.hpp +11 -9
- data/ext/faiss/utils.cpp +5 -0
- data/ext/faiss/utils.h +2 -0
- data/lib/faiss/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 46b01025b31b0b052236ffb42c9d046de719f2c6ffbdff50299e51f339def739
|
|
4
|
+
data.tar.gz: 1d50d77f839ad49dd6827776dca2f7eeea25eb333ccac6d86077a4366a085065
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ffd3ca8a5726dacc053da5c489f19212399c780fd41048793ebd9785716e934ac3e158a5a1fa09c11723fa12ab172d65166bb12216f245dd01864cef83b4608f
|
|
7
|
+
data.tar.gz: c485d9b425913d52f3a53c7e229dffa04fe1535de151b2abb5ac2e09a56d4ba4cfcfd8838846eab5b9a18c898115f56fa107640c210001b69ea8fd8a7eac8428
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
## 0.5.3 (2026-02-12)
|
|
2
|
+
|
|
3
|
+
- Added `IndexIDMap2` class
|
|
4
|
+
- Added `remove_ids` method
|
|
5
|
+
- Added `reconstruct_batch` and `reconstruct_n` methods
|
|
6
|
+
- Fixed error when searching frozen indexes
|
|
7
|
+
- Fixed memory leak with frozen exceptions
|
|
8
|
+
|
|
1
9
|
## 0.5.2 (2026-01-02)
|
|
2
10
|
|
|
3
11
|
- Fixed error with Rice 4.8
|
data/LICENSE.txt
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
MIT License
|
|
2
2
|
|
|
3
3
|
Copyright (c) Facebook, Inc. and its affiliates.
|
|
4
|
-
Copyright (c) 2020-
|
|
4
|
+
Copyright (c) 2020-2026 Andrew Kane
|
|
5
5
|
|
|
6
6
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
7
|
of this software and associated documentation files (the "Software"), to deal
|
data/ext/faiss/index.cpp
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
|
+
#include <algorithm>
|
|
1
2
|
#include <string>
|
|
3
|
+
#include <vector>
|
|
2
4
|
|
|
3
5
|
#include <faiss/AutoTune.h>
|
|
4
6
|
#include <faiss/Index.h>
|
|
@@ -108,7 +110,7 @@ void init_index(Rice::Module& m) {
|
|
|
108
110
|
.define_method(
|
|
109
111
|
"train",
|
|
110
112
|
[](Rice::Object rb_self, numo::SFloat objects) {
|
|
111
|
-
|
|
113
|
+
check_frozen(rb_self);
|
|
112
114
|
|
|
113
115
|
auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
|
|
114
116
|
auto n = check_shape(objects, self.d);
|
|
@@ -117,7 +119,7 @@ void init_index(Rice::Module& m) {
|
|
|
117
119
|
.define_method(
|
|
118
120
|
"add",
|
|
119
121
|
[](Rice::Object rb_self, numo::SFloat objects) {
|
|
120
|
-
|
|
122
|
+
check_frozen(rb_self);
|
|
121
123
|
|
|
122
124
|
auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
|
|
123
125
|
auto n = check_shape(objects, self.d);
|
|
@@ -126,7 +128,7 @@ void init_index(Rice::Module& m) {
|
|
|
126
128
|
.define_method(
|
|
127
129
|
"add_with_ids",
|
|
128
130
|
[](Rice::Object rb_self, numo::SFloat objects, numo::Int64 ids) {
|
|
129
|
-
|
|
131
|
+
check_frozen(rb_self);
|
|
130
132
|
|
|
131
133
|
auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
|
|
132
134
|
auto n = check_shape(objects, self.d);
|
|
@@ -135,6 +137,19 @@ void init_index(Rice::Module& m) {
|
|
|
135
137
|
}
|
|
136
138
|
self.add_with_ids(n, objects.read_ptr(), ids.read_ptr());
|
|
137
139
|
})
|
|
140
|
+
.define_method(
|
|
141
|
+
"remove_ids",
|
|
142
|
+
[](Rice::Object rb_self, numo::Int64 ids) {
|
|
143
|
+
check_frozen(rb_self);
|
|
144
|
+
|
|
145
|
+
auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
|
|
146
|
+
if (ids.ndim() != 1) {
|
|
147
|
+
throw Rice::Exception(rb_eArgError, "expected ids to be 1d array");
|
|
148
|
+
}
|
|
149
|
+
auto n = ids.shape()[0];
|
|
150
|
+
faiss::IDSelectorBatch sel(n, ids.read_ptr());
|
|
151
|
+
return self.remove_ids(sel);
|
|
152
|
+
})
|
|
138
153
|
.define_method(
|
|
139
154
|
"search",
|
|
140
155
|
[](Rice::Object rb_self, numo::SFloat objects, size_t k) {
|
|
@@ -145,9 +160,18 @@ void init_index(Rice::Module& m) {
|
|
|
145
160
|
auto labels = numo::Int64({n, k});
|
|
146
161
|
|
|
147
162
|
if (rb_self.is_frozen()) {
|
|
163
|
+
// Don't mess with Ruby-owned memory while the GVL is released
|
|
164
|
+
auto objects_ptr = objects.read_ptr();
|
|
165
|
+
std::vector<float> objects_vec(objects_ptr, objects_ptr + n * self.d);
|
|
166
|
+
std::vector<float> distances_vec(n * k);
|
|
167
|
+
std::vector<int64_t> labels_vec(n * k);
|
|
168
|
+
|
|
148
169
|
Rice::detail::no_gvl([&] {
|
|
149
|
-
self.search(n,
|
|
170
|
+
self.search(n, objects_vec.data(), k, distances_vec.data(), labels_vec.data());
|
|
150
171
|
});
|
|
172
|
+
|
|
173
|
+
std::copy(distances_vec.begin(), distances_vec.end(), distances.write_ptr());
|
|
174
|
+
std::copy(labels_vec.begin(), labels_vec.end(), labels.write_ptr());
|
|
151
175
|
} else {
|
|
152
176
|
self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
|
|
153
177
|
}
|
|
@@ -160,7 +184,7 @@ void init_index(Rice::Module& m) {
|
|
|
160
184
|
.define_method(
|
|
161
185
|
"nprobe=",
|
|
162
186
|
[](Rice::Object rb_self, double val) {
|
|
163
|
-
|
|
187
|
+
check_frozen(rb_self);
|
|
164
188
|
|
|
165
189
|
auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
|
|
166
190
|
faiss::ParameterSpace().set_index_parameter(&self, "nprobe", val);
|
|
@@ -173,6 +197,34 @@ void init_index(Rice::Module& m) {
|
|
|
173
197
|
self.reconstruct(key, recons.write_ptr());
|
|
174
198
|
return recons;
|
|
175
199
|
})
|
|
200
|
+
.define_method(
|
|
201
|
+
"reconstruct_batch",
|
|
202
|
+
[](faiss::Index &self, numo::Int64 ids) {
|
|
203
|
+
if (ids.ndim() != 1) {
|
|
204
|
+
throw Rice::Exception(rb_eArgError, "expected ids to be 1d array");
|
|
205
|
+
}
|
|
206
|
+
auto n = static_cast<std::size_t>(ids.shape()[0]);
|
|
207
|
+
auto d = static_cast<std::size_t>(self.d);
|
|
208
|
+
auto recons = numo::SFloat({n, d});
|
|
209
|
+
self.reconstruct_batch(n, ids.read_ptr(), recons.write_ptr());
|
|
210
|
+
return recons;
|
|
211
|
+
})
|
|
212
|
+
.define_method(
|
|
213
|
+
"reconstruct_n",
|
|
214
|
+
[](faiss::Index &self, int64_t i0, int64_t ni) {
|
|
215
|
+
if (ni < 0) {
|
|
216
|
+
throw Rice::Exception(rb_eArgError, "expected n to be non-negative");
|
|
217
|
+
}
|
|
218
|
+
// second condition written this way to avoid overflow
|
|
219
|
+
if (i0 < 0 || i0 > self.ntotal - ni) {
|
|
220
|
+
throw Rice::Exception(rb_eIndexError, "index out of range");
|
|
221
|
+
}
|
|
222
|
+
auto d = static_cast<std::size_t>(self.d);
|
|
223
|
+
auto n = static_cast<std::size_t>(ni);
|
|
224
|
+
auto recons = numo::SFloat({n, d});
|
|
225
|
+
self.reconstruct_n(i0, ni, recons.write_ptr());
|
|
226
|
+
return recons;
|
|
227
|
+
})
|
|
176
228
|
.define_method(
|
|
177
229
|
"save",
|
|
178
230
|
[](faiss::Index &self, Rice::String fname) {
|
|
@@ -227,4 +279,7 @@ void init_index(Rice::Module& m) {
|
|
|
227
279
|
|
|
228
280
|
Rice::define_class_under<faiss::IndexIDMap, faiss::Index>(m, "IndexIDMap")
|
|
229
281
|
.define_constructor(Rice::Constructor<faiss::IndexIDMap, faiss::Index*>());
|
|
282
|
+
|
|
283
|
+
Rice::define_class_under<faiss::IndexIDMap2, faiss::Index>(m, "IndexIDMap2")
|
|
284
|
+
.define_constructor(Rice::Constructor<faiss::IndexIDMap2, faiss::Index*>());
|
|
230
285
|
}
|
data/ext/faiss/index_binary.cpp
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
#include <algorithm>
|
|
2
|
+
#include <vector>
|
|
3
|
+
|
|
1
4
|
#include <faiss/IndexBinary.h>
|
|
2
5
|
#include <faiss/IndexBinaryFlat.h>
|
|
3
6
|
#include <faiss/IndexBinaryIVF.h>
|
|
@@ -28,7 +31,7 @@ void init_index_binary(Rice::Module& m) {
|
|
|
28
31
|
.define_method(
|
|
29
32
|
"train",
|
|
30
33
|
[](Rice::Object rb_self, numo::UInt8 objects) {
|
|
31
|
-
|
|
34
|
+
check_frozen(rb_self);
|
|
32
35
|
|
|
33
36
|
auto &self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
|
|
34
37
|
auto n = check_shape(objects, self.d / 8);
|
|
@@ -37,12 +40,25 @@ void init_index_binary(Rice::Module& m) {
|
|
|
37
40
|
.define_method(
|
|
38
41
|
"add",
|
|
39
42
|
[](Rice::Object rb_self, numo::UInt8 objects) {
|
|
40
|
-
|
|
43
|
+
check_frozen(rb_self);
|
|
41
44
|
|
|
42
45
|
auto &self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
|
|
43
46
|
auto n = check_shape(objects, self.d / 8);
|
|
44
47
|
self.add(n, objects.read_ptr());
|
|
45
48
|
})
|
|
49
|
+
.define_method(
|
|
50
|
+
"remove_ids",
|
|
51
|
+
[](Rice::Object rb_self, numo::Int64 ids) {
|
|
52
|
+
check_frozen(rb_self);
|
|
53
|
+
|
|
54
|
+
auto &self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
|
|
55
|
+
if (ids.ndim() != 1) {
|
|
56
|
+
throw Rice::Exception(rb_eArgError, "expected ids to be 1d array");
|
|
57
|
+
}
|
|
58
|
+
auto n = ids.shape()[0];
|
|
59
|
+
faiss::IDSelectorBatch sel(n, ids.read_ptr());
|
|
60
|
+
return self.remove_ids(sel);
|
|
61
|
+
})
|
|
46
62
|
.define_method(
|
|
47
63
|
"search",
|
|
48
64
|
[](Rice::Object rb_self, numo::UInt8 objects, size_t k) {
|
|
@@ -53,9 +69,18 @@ void init_index_binary(Rice::Module& m) {
|
|
|
53
69
|
auto labels = numo::Int64({n, k});
|
|
54
70
|
|
|
55
71
|
if (rb_self.is_frozen()) {
|
|
72
|
+
// Don't mess with Ruby-owned memory while the GVL is released
|
|
73
|
+
auto objects_ptr = objects.read_ptr();
|
|
74
|
+
std::vector<uint8_t> objects_vec(objects_ptr, objects_ptr + n * (self.d / 8));
|
|
75
|
+
std::vector<int32_t> distances_vec(n * k);
|
|
76
|
+
std::vector<int64_t> labels_vec(n * k);
|
|
77
|
+
|
|
56
78
|
Rice::detail::no_gvl([&] {
|
|
57
|
-
self.search(n,
|
|
79
|
+
self.search(n, objects_vec.data(), k, distances_vec.data(), labels_vec.data());
|
|
58
80
|
});
|
|
81
|
+
|
|
82
|
+
std::copy(distances_vec.begin(), distances_vec.end(), distances.write_ptr());
|
|
83
|
+
std::copy(labels_vec.begin(), labels_vec.end(), labels.write_ptr());
|
|
59
84
|
} else {
|
|
60
85
|
self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
|
|
61
86
|
}
|
|
@@ -65,6 +90,30 @@ void init_index_binary(Rice::Module& m) {
|
|
|
65
90
|
ret.push(std::move(labels), false);
|
|
66
91
|
return ret;
|
|
67
92
|
})
|
|
93
|
+
.define_method(
|
|
94
|
+
"reconstruct",
|
|
95
|
+
[](faiss::IndexBinary &self, int64_t key) {
|
|
96
|
+
auto d = static_cast<std::size_t>(self.d / 8);
|
|
97
|
+
auto recons = numo::UInt8({d});
|
|
98
|
+
self.reconstruct(key, recons.write_ptr());
|
|
99
|
+
return recons;
|
|
100
|
+
})
|
|
101
|
+
.define_method(
|
|
102
|
+
"reconstruct_n",
|
|
103
|
+
[](faiss::IndexBinary &self, int64_t i0, int64_t ni) {
|
|
104
|
+
if (ni < 0) {
|
|
105
|
+
throw Rice::Exception(rb_eArgError, "expected n to be non-negative");
|
|
106
|
+
}
|
|
107
|
+
// second condition written this way to avoid overflow
|
|
108
|
+
if (i0 < 0 || i0 > self.ntotal - ni) {
|
|
109
|
+
throw Rice::Exception(rb_eIndexError, "index out of range");
|
|
110
|
+
}
|
|
111
|
+
auto d = static_cast<std::size_t>(self.d / 8);
|
|
112
|
+
auto n = static_cast<std::size_t>(ni);
|
|
113
|
+
auto recons = numo::UInt8({n, d});
|
|
114
|
+
self.reconstruct_n(i0, ni, recons.write_ptr());
|
|
115
|
+
return recons;
|
|
116
|
+
})
|
|
68
117
|
.define_method(
|
|
69
118
|
"save",
|
|
70
119
|
[](faiss::IndexBinary &self, Rice::String fname) {
|
data/ext/faiss/kmeans.cpp
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
#include <algorithm>
|
|
2
|
+
|
|
1
3
|
#include <faiss/Clustering.h>
|
|
2
4
|
#include <faiss/IndexFlat.h>
|
|
3
5
|
#include <rice/rice.hpp>
|
|
@@ -27,7 +29,7 @@ void init_kmeans(Rice::Module& m) {
|
|
|
27
29
|
"centroids",
|
|
28
30
|
[](faiss::Clustering &self) {
|
|
29
31
|
auto centroids = numo::SFloat({self.k, self.d});
|
|
30
|
-
|
|
32
|
+
std::copy(self.centroids.begin(), self.centroids.end(), centroids.write_ptr());
|
|
31
33
|
return centroids;
|
|
32
34
|
})
|
|
33
35
|
.define_method(
|
data/ext/faiss/numo.hpp
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*!
|
|
2
|
-
* Numo.hpp v0.3.
|
|
2
|
+
* Numo.hpp v0.3.1
|
|
3
3
|
* https://github.com/ankane/numo.hpp
|
|
4
4
|
* BSD-2-Clause License
|
|
5
5
|
*/
|
|
@@ -38,7 +38,7 @@ public:
|
|
|
38
38
|
}
|
|
39
39
|
|
|
40
40
|
bool is_contiguous() const {
|
|
41
|
-
return nary_check_contiguous
|
|
41
|
+
return Rice::detail::protect(nary_check_contiguous, this->_value) == Qtrue;
|
|
42
42
|
}
|
|
43
43
|
|
|
44
44
|
operator Rice::Object() const {
|
|
@@ -46,26 +46,28 @@ public:
|
|
|
46
46
|
}
|
|
47
47
|
|
|
48
48
|
const void* read_ptr() {
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
49
|
+
return Rice::detail::protect([&]() {
|
|
50
|
+
if (!nary_check_contiguous(this->_value)) {
|
|
51
|
+
this->_value = nary_dup(this->_value);
|
|
52
|
+
}
|
|
53
|
+
return nary_get_pointer_for_read(this->_value) + nary_get_offset(this->_value);
|
|
54
|
+
});
|
|
53
55
|
}
|
|
54
56
|
|
|
55
57
|
void* write_ptr() {
|
|
56
|
-
return nary_get_pointer_for_write
|
|
58
|
+
return Rice::detail::protect(nary_get_pointer_for_write, this->_value);
|
|
57
59
|
}
|
|
58
60
|
|
|
59
61
|
protected:
|
|
60
62
|
NArray() { }
|
|
61
63
|
|
|
62
64
|
void construct_value(VALUE dtype, VALUE v) {
|
|
63
|
-
this->_value = rb_funcall
|
|
65
|
+
this->_value = Rice::detail::protect(rb_funcall, dtype, rb_intern("cast"), 1, v);
|
|
64
66
|
}
|
|
65
67
|
|
|
66
68
|
void construct_shape(VALUE dtype, std::initializer_list<size_t> shape) {
|
|
67
69
|
// rb_narray_new doesn't modify shape, but not marked as const
|
|
68
|
-
this->_value = rb_narray_new
|
|
70
|
+
this->_value = Rice::detail::protect(rb_narray_new, dtype, shape.size(), const_cast<size_t*>(shape.begin()));
|
|
69
71
|
}
|
|
70
72
|
|
|
71
73
|
VALUE _value;
|
data/ext/faiss/utils.cpp
CHANGED
data/ext/faiss/utils.h
CHANGED
data/lib/faiss/version.rb
CHANGED