faiss 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE.txt +1 -1
  4. data/ext/faiss/ext.cpp +1 -1
  5. data/ext/faiss/extconf.rb +5 -6
  6. data/ext/faiss/index_binary.cpp +76 -17
  7. data/ext/faiss/{index.cpp → index_rb.cpp} +108 -35
  8. data/ext/faiss/kmeans.cpp +12 -9
  9. data/ext/faiss/numo.hpp +11 -9
  10. data/ext/faiss/pca_matrix.cpp +10 -8
  11. data/ext/faiss/product_quantizer.cpp +14 -12
  12. data/ext/faiss/{utils.cpp → utils_rb.cpp} +10 -3
  13. data/ext/faiss/{utils.h → utils_rb.h} +6 -0
  14. data/lib/faiss/version.rb +1 -1
  15. data/lib/faiss.rb +1 -1
  16. data/vendor/faiss/faiss/AutoTune.cpp +130 -11
  17. data/vendor/faiss/faiss/AutoTune.h +14 -1
  18. data/vendor/faiss/faiss/Clustering.cpp +59 -10
  19. data/vendor/faiss/faiss/Clustering.h +12 -0
  20. data/vendor/faiss/faiss/IVFlib.cpp +31 -28
  21. data/vendor/faiss/faiss/Index.cpp +20 -8
  22. data/vendor/faiss/faiss/Index.h +25 -3
  23. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +19 -24
  24. data/vendor/faiss/faiss/IndexBinary.cpp +1 -0
  25. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +9 -4
  26. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +45 -11
  27. data/vendor/faiss/faiss/IndexFastScan.cpp +35 -22
  28. data/vendor/faiss/faiss/IndexFastScan.h +10 -1
  29. data/vendor/faiss/faiss/IndexFlat.cpp +193 -136
  30. data/vendor/faiss/faiss/IndexFlat.h +16 -1
  31. data/vendor/faiss/faiss/IndexFlatCodes.cpp +46 -22
  32. data/vendor/faiss/faiss/IndexFlatCodes.h +7 -1
  33. data/vendor/faiss/faiss/IndexHNSW.cpp +24 -50
  34. data/vendor/faiss/faiss/IndexHNSW.h +14 -12
  35. data/vendor/faiss/faiss/IndexIDMap.cpp +1 -1
  36. data/vendor/faiss/faiss/IndexIVF.cpp +76 -49
  37. data/vendor/faiss/faiss/IndexIVF.h +14 -4
  38. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +11 -8
  39. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +2 -2
  40. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +25 -14
  41. data/vendor/faiss/faiss/IndexIVFFastScan.h +26 -22
  42. data/vendor/faiss/faiss/IndexIVFFlat.cpp +10 -61
  43. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +39 -111
  44. data/vendor/faiss/faiss/IndexIVFPQ.cpp +89 -147
  45. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +37 -5
  46. data/vendor/faiss/faiss/IndexIVFPQR.cpp +2 -1
  47. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +42 -30
  48. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -2
  49. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +246 -97
  50. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +32 -29
  51. data/vendor/faiss/faiss/IndexLSH.cpp +8 -6
  52. data/vendor/faiss/faiss/IndexLattice.cpp +29 -24
  53. data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -0
  54. data/vendor/faiss/faiss/IndexNSG.cpp +2 -1
  55. data/vendor/faiss/faiss/IndexNSG.h +0 -2
  56. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +1 -1
  57. data/vendor/faiss/faiss/IndexPQ.cpp +19 -10
  58. data/vendor/faiss/faiss/IndexRaBitQ.cpp +26 -13
  59. data/vendor/faiss/faiss/IndexRaBitQ.h +2 -2
  60. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +132 -78
  61. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +14 -12
  62. data/vendor/faiss/faiss/IndexRefine.cpp +0 -30
  63. data/vendor/faiss/faiss/IndexShards.cpp +3 -4
  64. data/vendor/faiss/faiss/MetricType.h +16 -0
  65. data/vendor/faiss/faiss/VectorTransform.cpp +120 -0
  66. data/vendor/faiss/faiss/VectorTransform.h +23 -0
  67. data/vendor/faiss/faiss/clone_index.cpp +7 -4
  68. data/vendor/faiss/faiss/{cppcontrib/factory_tools.cpp → factory_tools.cpp} +1 -1
  69. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
  70. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +37 -11
  71. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -28
  72. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +367 -0
  73. data/vendor/faiss/faiss/impl/ClusteringInitialization.h +107 -0
  74. data/vendor/faiss/faiss/impl/CodePacker.cpp +4 -0
  75. data/vendor/faiss/faiss/impl/CodePacker.h +11 -3
  76. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +83 -0
  77. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.h +47 -0
  78. data/vendor/faiss/faiss/impl/FaissAssert.h +60 -2
  79. data/vendor/faiss/faiss/impl/HNSW.cpp +25 -34
  80. data/vendor/faiss/faiss/impl/HNSW.h +8 -6
  81. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +34 -27
  82. data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
  83. data/vendor/faiss/faiss/impl/NSG.cpp +6 -5
  84. data/vendor/faiss/faiss/impl/NSG.h +17 -7
  85. data/vendor/faiss/faiss/impl/Panorama.cpp +53 -46
  86. data/vendor/faiss/faiss/impl/Panorama.h +22 -6
  87. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +16 -5
  88. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +70 -58
  89. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +92 -0
  90. data/vendor/faiss/faiss/impl/RaBitQUtils.h +93 -31
  91. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +12 -28
  92. data/vendor/faiss/faiss/impl/RaBitQuantizer.h +3 -10
  93. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +15 -41
  94. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +0 -4
  95. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +14 -9
  96. data/vendor/faiss/faiss/impl/ResultHandler.h +131 -50
  97. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +67 -2358
  98. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -2
  99. data/vendor/faiss/faiss/impl/VisitedTable.cpp +42 -0
  100. data/vendor/faiss/faiss/impl/VisitedTable.h +69 -0
  101. data/vendor/faiss/faiss/impl/expanded_scanners.h +158 -0
  102. data/vendor/faiss/faiss/impl/index_read.cpp +829 -471
  103. data/vendor/faiss/faiss/impl/index_read_utils.h +0 -1
  104. data/vendor/faiss/faiss/impl/index_write.cpp +17 -8
  105. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +47 -20
  106. data/vendor/faiss/faiss/impl/mapped_io.cpp +9 -2
  107. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +7 -2
  108. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +11 -3
  109. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +19 -13
  110. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +29 -21
  111. data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx2.h → pq_code_distance/pq_code_distance-avx2.cpp} +42 -215
  112. data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx512.h → pq_code_distance/pq_code_distance-avx512.cpp} +68 -107
  113. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +141 -0
  114. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +23 -0
  115. data/vendor/faiss/faiss/impl/{code_distance/code_distance-sve.h → pq_code_distance/pq_code_distance-sve.cpp} +57 -144
  116. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +9 -6
  117. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +121 -0
  118. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +136 -0
  119. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +280 -0
  120. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +164 -0
  121. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +94 -0
  122. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +455 -0
  123. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +430 -0
  124. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +329 -0
  125. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +467 -0
  126. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +203 -0
  127. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +42 -0
  128. data/vendor/faiss/faiss/impl/simd_dispatch.h +139 -0
  129. data/vendor/faiss/faiss/impl/simd_result_handlers.h +18 -18
  130. data/vendor/faiss/faiss/index_factory.cpp +35 -16
  131. data/vendor/faiss/faiss/index_io.h +29 -3
  132. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +7 -4
  133. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +1 -1
  134. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +9 -19
  135. data/vendor/faiss/faiss/svs/IndexSVSFlat.h +2 -0
  136. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +2 -1
  137. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +9 -1
  138. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +9 -0
  139. data/vendor/faiss/faiss/utils/Heap.cpp +46 -0
  140. data/vendor/faiss/faiss/utils/Heap.h +21 -0
  141. data/vendor/faiss/faiss/utils/NeuralNet.cpp +10 -7
  142. data/vendor/faiss/faiss/utils/distances.cpp +141 -23
  143. data/vendor/faiss/faiss/utils/distances.h +98 -0
  144. data/vendor/faiss/faiss/utils/distances_dispatch.h +170 -0
  145. data/vendor/faiss/faiss/utils/distances_simd.cpp +74 -3511
  146. data/vendor/faiss/faiss/utils/extra_distances-inl.h +164 -157
  147. data/vendor/faiss/faiss/utils/extra_distances.cpp +52 -95
  148. data/vendor/faiss/faiss/utils/extra_distances.h +47 -1
  149. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -1
  150. data/vendor/faiss/faiss/utils/partitioning.cpp +1 -1
  151. data/vendor/faiss/faiss/utils/pq_code_distance.h +251 -0
  152. data/vendor/faiss/faiss/utils/rabitq_simd.h +260 -0
  153. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +150 -0
  154. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +568 -0
  155. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +153 -0
  156. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +1185 -0
  157. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +1092 -0
  158. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +391 -0
  159. data/vendor/faiss/faiss/utils/simd_levels.cpp +322 -0
  160. data/vendor/faiss/faiss/utils/simd_levels.h +91 -0
  161. data/vendor/faiss/faiss/utils/simdlib_avx2.h +12 -1
  162. data/vendor/faiss/faiss/utils/simdlib_avx512.h +69 -0
  163. data/vendor/faiss/faiss/utils/simdlib_neon.h +6 -0
  164. data/vendor/faiss/faiss/utils/sorting.cpp +4 -4
  165. data/vendor/faiss/faiss/utils/utils.cpp +16 -9
  166. metadata +47 -18
  167. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +0 -81
  168. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +0 -186
  169. /data/vendor/faiss/faiss/{cppcontrib/factory_tools.h → factory_tools.h} +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8ab397b95262fa87d28a4f08098de35f487b44e6240682ec8e98c4efc823c4df
4
- data.tar.gz: 719490d3f20bad4531f588e9469d3fa9ec2362cc18f800e913725ffb6b0aeca0
3
+ metadata.gz: 18db7914c6db421beb972845cdf1d6489f179d6347f2cc52252b5908c7dd2db0
4
+ data.tar.gz: fcbc66c8b544a9f96e913c4e4e3d7a8be8c450d56db0332bc3ea1810776e424b
5
5
  SHA512:
6
- metadata.gz: 56e16fad10a1ba95c9727f2e356470e984ca29b44406cf125b1ab529c83fee2c9bf15efa564a543cda331489dbdc56c9c44f4eda4ad53178979370987b6fbf9e
7
- data.tar.gz: 691e9a843eaac7e699dafc15d5505f6febf783ae6d474ee0409cb01d1b1f1575477db13cb7290a4d2cc639692503099fa7d4ff767d3c3034838d05ea0476521a
6
+ metadata.gz: a885807f3c74cb4ce27a5931f7f2b73dc3ec9bd214c06e96d191086a841d1f366e1d8ebbe9af35143c8facd1285070d0f0b46814f5a78d1d55fb0b1821ab3fa3
7
+ data.tar.gz: f7ec06da8dc75e0341763f15501e2bc503ef3001dee29c88a0b0253ce78c38cb5e5b222ec75b26814a7c9f2a69e02e1ff685f108d3a4512ef8bf55e5a42214ce
data/CHANGELOG.md CHANGED
@@ -1,3 +1,19 @@
1
+ ## 0.6.0 (2026-04-06)
2
+
3
+ - Updated Faiss to 1.14.1
4
+ - Added `id_map` method to `IndexIDMap2`
5
+ - Switched to `numo-narray-alt`
6
+ - Dropped support for Ruby < 3.3
7
+ - Dropped support for C++17
8
+
9
+ ## 0.5.3 (2026-02-12)
10
+
11
+ - Added `IndexIDMap2` class
12
+ - Added `remove_ids` method
13
+ - Added `reconstruct_batch` and `reconstruct_n` methods
14
+ - Fixed error when searching frozen indexes
15
+ - Fixed memory leak with frozen exceptions
16
+
1
17
  ## 0.5.2 (2026-01-02)
2
18
 
3
19
  - Fixed error with Rice 4.8
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  MIT License
2
2
 
3
3
  Copyright (c) Facebook, Inc. and its affiliates.
4
- Copyright (c) 2020-2025 Andrew Kane
4
+ Copyright (c) 2020-2026 Andrew Kane
5
5
 
6
6
  Permission is hereby granted, free of charge, to any person obtaining a copy
7
7
  of this software and associated documentation files (the "Software"), to deal
data/ext/faiss/ext.cpp CHANGED
@@ -8,7 +8,7 @@ void init_product_quantizer(Rice::Module& m);
8
8
 
9
9
  extern "C"
10
10
  void Init_ext() {
11
- auto m = Rice::define_module("Faiss");
11
+ Rice::Module m = Rice::define_module("Faiss");
12
12
 
13
13
  init_index(m);
14
14
  init_index_binary(m);
data/ext/faiss/extconf.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  require "mkmf-rice"
2
- require "numo/narray"
2
+ require "numo/narray/alt"
3
3
 
4
4
  # libomp changed to keg-only
5
5
  # https://github.com/Homebrew/homebrew-core/issues/112107
@@ -13,14 +13,13 @@ abort "BLAS not found" unless have_library("blas")
13
13
  abort "LAPACK not found" unless have_library("lapack")
14
14
  abort "OpenMP not found" unless have_library("omp") || have_library("gomp")
15
15
 
16
- numo = File.join(Gem.loaded_specs["numo-narray"].require_path, "numo")
16
+ numo = File.join(Gem.loaded_specs["numo-narray-alt"].require_path, "numo")
17
17
  abort "Numo not found" unless find_header("numo/narray.h", numo)
18
18
 
19
19
  # for https://bugs.ruby-lang.org/issues/19005
20
20
  $LDFLAGS += " -Wl,-undefined,dynamic_lookup" if RbConfig::CONFIG["host_os"] =~ /darwin/i
21
21
 
22
- $CXXFLAGS += " -std=c++17 $(optflags) -DFINTEGER=int"
23
- $CXXFLAGS += " -Wall -Wno-unused-parameter -Wno-unused-function -Wno-unused-variable -Wno-unused-private-field -Wno-deprecated-declarations -Wno-sign-compare"
22
+ $CXXFLAGS += " -std=c++20 $(optflags) -DFINTEGER=int"
24
23
 
25
24
  # -march=native not supported with ARM Mac
26
25
  default_optflags = RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i ? "" : " -march=native"
@@ -34,8 +33,8 @@ ext = File.expand_path(".", __dir__)
34
33
  vendor = File.expand_path("../../vendor/faiss", __dir__)
35
34
 
36
35
  $srcs = Dir["{#{ext},#{vendor}/faiss,#{vendor}/faiss/{impl,invlists,utils}/**}/*.{cpp}"]
37
- $objs = $srcs.map { |v| v.sub(/cpp\z/, "o") }
36
+ $srcs -= ["avx2", "avx512", "aarch64", "arm_sve"].map { |v| "#{vendor}/faiss/utils/simd_impl/distances_#{v}.cpp" }
38
37
  abort "Faiss not found" unless find_header("faiss/Index.h", vendor)
39
- $VPATH << vendor
38
+ $VPATH += $srcs.filter_map { |v| File.dirname(v) if v.start_with?(vendor) }.uniq
40
39
 
41
40
  create_makefile("faiss/ext")
@@ -1,3 +1,10 @@
1
+ #include <algorithm>
2
+ #include <cstddef>
3
+ #include <cstdint>
4
+ #include <limits>
5
+ #include <utility>
6
+ #include <vector>
7
+
1
8
  #include <faiss/IndexBinary.h>
2
9
  #include <faiss/IndexBinaryFlat.h>
3
10
  #include <faiss/IndexBinaryIVF.h>
@@ -6,56 +13,84 @@
6
13
  #include <rice/rice.hpp>
7
14
 
8
15
  #include "numo.hpp"
9
- #include "utils.h"
16
+ #include "utils_rb.h"
10
17
 
11
18
  void init_index_binary(Rice::Module& m) {
12
19
  Rice::define_class_under<faiss::IndexBinary>(m, "IndexBinary")
13
20
  .define_method(
14
21
  "d",
15
- [](faiss::IndexBinary &self) {
22
+ [](faiss::IndexBinary& self) {
16
23
  return self.d;
17
24
  })
18
25
  .define_method(
19
26
  "trained?",
20
- [](faiss::IndexBinary &self) {
27
+ [](faiss::IndexBinary& self) {
21
28
  return self.is_trained;
22
29
  })
23
30
  .define_method(
24
31
  "ntotal",
25
- [](faiss::IndexBinary &self) {
32
+ [](faiss::IndexBinary& self) {
26
33
  return self.ntotal;
27
34
  })
28
35
  .define_method(
29
36
  "train",
30
37
  [](Rice::Object rb_self, numo::UInt8 objects) {
31
- rb_check_frozen(rb_self.value());
38
+ check_frozen(rb_self);
32
39
 
33
- auto &self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
34
- auto n = check_shape(objects, self.d / 8);
40
+ auto& self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
41
+ size_t n = check_shape(objects, self.d / 8);
35
42
  self.train(n, objects.read_ptr());
36
43
  })
37
44
  .define_method(
38
45
  "add",
39
46
  [](Rice::Object rb_self, numo::UInt8 objects) {
40
- rb_check_frozen(rb_self.value());
47
+ check_frozen(rb_self);
41
48
 
42
- auto &self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
43
- auto n = check_shape(objects, self.d / 8);
49
+ auto& self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
50
+ size_t n = check_shape(objects, self.d / 8);
44
51
  self.add(n, objects.read_ptr());
45
52
  })
53
+ .define_method(
54
+ "remove_ids",
55
+ [](Rice::Object rb_self, numo::Int64 ids) {
56
+ check_frozen(rb_self);
57
+
58
+ auto& self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
59
+ if (ids.ndim() != 1) {
60
+ throw Rice::Exception(rb_eArgError, "expected ids to be 1d array");
61
+ }
62
+ size_t n = ids.shape()[0];
63
+ faiss::IDSelectorBatch sel(n, ids.read_ptr());
64
+ return self.remove_ids(sel);
65
+ })
46
66
  .define_method(
47
67
  "search",
48
- [](Rice::Object rb_self, numo::UInt8 objects, size_t k) {
49
- auto &self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
50
- auto n = check_shape(objects, self.d / 8);
68
+ [](Rice::Object rb_self, numo::UInt8 objects, int64_t k) {
69
+ auto& self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
70
+ size_t n = check_shape(objects, self.d / 8);
71
+ if (k <= 0) {
72
+ throw Rice::Exception(rb_eArgError, "expected k to be positive");
73
+ }
74
+ if (std::cmp_greater_equal(k, std::numeric_limits<size_t>::max() / n)) {
75
+ throw Rice::Exception(rb_eArgError, "k too large");
76
+ }
51
77
 
52
- auto distances = numo::Int32({n, k});
53
- auto labels = numo::Int64({n, k});
78
+ numo::Int32 distances({n, static_cast<size_t>(k)});
79
+ numo::Int64 labels({n, static_cast<size_t>(k)});
54
80
 
55
81
  if (rb_self.is_frozen()) {
82
+ // Don't mess with Ruby-owned memory while the GVL is released
83
+ const auto* objects_ptr = objects.read_ptr();
84
+ std::vector<uint8_t> objects_vec(objects_ptr, objects_ptr + n * (self.d / 8));
85
+ std::vector<int32_t> distances_vec(n * static_cast<size_t>(k));
86
+ std::vector<int64_t> labels_vec(n * static_cast<size_t>(k));
87
+
56
88
  Rice::detail::no_gvl([&] {
57
- self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
89
+ self.search(n, objects_vec.data(), k, distances_vec.data(), labels_vec.data());
58
90
  });
91
+
92
+ std::ranges::copy(distances_vec, distances.write_ptr());
93
+ std::ranges::copy(labels_vec, labels.write_ptr());
59
94
  } else {
60
95
  self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
61
96
  }
@@ -65,9 +100,33 @@ void init_index_binary(Rice::Module& m) {
65
100
  ret.push(std::move(labels), false);
66
101
  return ret;
67
102
  })
103
+ .define_method(
104
+ "reconstruct",
105
+ [](faiss::IndexBinary& self, int64_t key) {
106
+ auto d = static_cast<size_t>(self.d / 8);
107
+ numo::UInt8 recons({d});
108
+ self.reconstruct(key, recons.write_ptr());
109
+ return recons;
110
+ })
111
+ .define_method(
112
+ "reconstruct_n",
113
+ [](faiss::IndexBinary& self, int64_t i0, int64_t ni) {
114
+ if (ni < 0) {
115
+ throw Rice::Exception(rb_eArgError, "expected n to be non-negative");
116
+ }
117
+ // second condition written this way to avoid overflow
118
+ if (i0 < 0 || i0 > self.ntotal - ni) {
119
+ throw Rice::Exception(rb_eIndexError, "index out of range");
120
+ }
121
+ auto d = static_cast<size_t>(self.d / 8);
122
+ auto n = static_cast<size_t>(ni);
123
+ numo::UInt8 recons({n, d});
124
+ self.reconstruct_n(i0, ni, recons.write_ptr());
125
+ return recons;
126
+ })
68
127
  .define_method(
69
128
  "save",
70
- [](faiss::IndexBinary &self, Rice::String fname) {
129
+ [](faiss::IndexBinary& self, Rice::String fname) {
71
130
  faiss::write_index_binary(&self, fname.c_str());
72
131
  })
73
132
  .define_singleton_function(
@@ -1,4 +1,10 @@
1
+ #include <algorithm>
2
+ #include <cstddef>
3
+ #include <cstdint>
4
+ #include <limits>
1
5
  #include <string>
6
+ #include <utility>
7
+ #include <vector>
2
8
 
3
9
  #include <faiss/AutoTune.h>
4
10
  #include <faiss/Index.h>
@@ -6,17 +12,17 @@
6
12
  #include <faiss/IndexHNSW.h>
7
13
  #include <faiss/IndexIDMap.h>
8
14
  #include <faiss/IndexIVFFlat.h>
9
- #include <faiss/IndexLSH.h>
10
- #include <faiss/IndexScalarQuantizer.h>
11
- #include <faiss/IndexPQ.h>
12
15
  #include <faiss/IndexIVFPQ.h>
13
16
  #include <faiss/IndexIVFPQR.h>
17
+ #include <faiss/IndexLSH.h>
18
+ #include <faiss/IndexPQ.h>
19
+ #include <faiss/IndexScalarQuantizer.h>
20
+ #include <faiss/MetricType.h>
14
21
  #include <faiss/index_io.h>
15
22
  #include <rice/rice.hpp>
16
- #include <rice/stl.hpp>
17
23
 
18
24
  #include "numo.hpp"
19
- #include "utils.h"
25
+ #include "utils_rb.h"
20
26
 
21
27
  namespace Rice::detail {
22
28
  template<>
@@ -31,10 +37,10 @@ namespace Rice::detail {
31
37
 
32
38
  explicit From_Ruby(Arg* arg) : arg_(arg) { }
33
39
 
34
- double is_convertible(VALUE value) { return Convertible::Exact; }
40
+ double is_convertible(VALUE /*value*/) { return Convertible::Exact; }
35
41
 
36
42
  faiss::MetricType convert(VALUE x) {
37
- auto s = Object(x).to_s().str();
43
+ std::string s = Object(x).to_s().str();
38
44
  if (s == "inner_product") {
39
45
  return faiss::METRIC_INNER_PRODUCT;
40
46
  } else if (s == "l2") {
@@ -60,10 +66,10 @@ namespace Rice::detail {
60
66
 
61
67
  explicit From_Ruby(Arg* arg) : arg_(arg) { }
62
68
 
63
- double is_convertible(VALUE value) { return Convertible::Exact; }
69
+ double is_convertible(VALUE /*value*/) { return Convertible::Exact; }
64
70
 
65
71
  faiss::ScalarQuantizer::QuantizerType convert(VALUE x) {
66
- auto s = Object(x).to_s().str();
72
+ std::string s = Object(x).to_s().str();
67
73
  if (s == "qt_8bit") {
68
74
  return faiss::ScalarQuantizer::QT_8bit;
69
75
  } else if (s == "qt_4bit") {
@@ -92,62 +98,90 @@ void init_index(Rice::Module& m) {
92
98
  Rice::define_class_under<faiss::Index>(m, "Index")
93
99
  .define_method(
94
100
  "d",
95
- [](faiss::Index &self) {
101
+ [](faiss::Index& self) {
96
102
  return self.d;
97
103
  })
98
104
  .define_method(
99
105
  "trained?",
100
- [](faiss::Index &self) {
106
+ [](faiss::Index& self) {
101
107
  return self.is_trained;
102
108
  })
103
109
  .define_method(
104
110
  "ntotal",
105
- [](faiss::Index &self) {
111
+ [](faiss::Index& self) {
106
112
  return self.ntotal;
107
113
  })
108
114
  .define_method(
109
115
  "train",
110
116
  [](Rice::Object rb_self, numo::SFloat objects) {
111
- rb_check_frozen(rb_self.value());
117
+ check_frozen(rb_self);
112
118
 
113
- auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
114
- auto n = check_shape(objects, self.d);
119
+ auto& self = *Rice::Data_Object<faiss::Index>{rb_self};
120
+ size_t n = check_shape(objects, self.d);
115
121
  self.train(n, objects.read_ptr());
116
122
  })
117
123
  .define_method(
118
124
  "add",
119
125
  [](Rice::Object rb_self, numo::SFloat objects) {
120
- rb_check_frozen(rb_self.value());
126
+ check_frozen(rb_self);
121
127
 
122
- auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
123
- auto n = check_shape(objects, self.d);
128
+ auto& self = *Rice::Data_Object<faiss::Index>{rb_self};
129
+ size_t n = check_shape(objects, self.d);
124
130
  self.add(n, objects.read_ptr());
125
131
  })
126
132
  .define_method(
127
133
  "add_with_ids",
128
134
  [](Rice::Object rb_self, numo::SFloat objects, numo::Int64 ids) {
129
- rb_check_frozen(rb_self.value());
135
+ check_frozen(rb_self);
130
136
 
131
- auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
132
- auto n = check_shape(objects, self.d);
137
+ auto& self = *Rice::Data_Object<faiss::Index>{rb_self};
138
+ size_t n = check_shape(objects, self.d);
133
139
  if (ids.ndim() != 1 || ids.shape()[0] != n) {
134
140
  throw Rice::Exception(rb_eArgError, "expected ids to be 1d array with size %d", n);
135
141
  }
136
142
  self.add_with_ids(n, objects.read_ptr(), ids.read_ptr());
137
143
  })
144
+ .define_method(
145
+ "remove_ids",
146
+ [](Rice::Object rb_self, numo::Int64 ids) {
147
+ check_frozen(rb_self);
148
+
149
+ auto& self = *Rice::Data_Object<faiss::Index>{rb_self};
150
+ if (ids.ndim() != 1) {
151
+ throw Rice::Exception(rb_eArgError, "expected ids to be 1d array");
152
+ }
153
+ size_t n = ids.shape()[0];
154
+ faiss::IDSelectorBatch sel(n, ids.read_ptr());
155
+ return self.remove_ids(sel);
156
+ })
138
157
  .define_method(
139
158
  "search",
140
- [](Rice::Object rb_self, numo::SFloat objects, size_t k) {
141
- auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
142
- auto n = check_shape(objects, self.d);
159
+ [](Rice::Object rb_self, numo::SFloat objects, int64_t k) {
160
+ auto& self = *Rice::Data_Object<faiss::Index>{rb_self};
161
+ size_t n = check_shape(objects, self.d);
162
+ if (k <= 0) {
163
+ throw Rice::Exception(rb_eArgError, "expected k to be positive");
164
+ }
165
+ if (k >= std::numeric_limits<size_t>::max() / n) {
166
+ throw Rice::Exception(rb_eArgError, "k too large");
167
+ }
143
168
 
144
- auto distances = numo::SFloat({n, k});
145
- auto labels = numo::Int64({n, k});
169
+ numo::SFloat distances({n, static_cast<size_t>(k)});
170
+ numo::Int64 labels({n, static_cast<size_t>(k)});
146
171
 
147
172
  if (rb_self.is_frozen()) {
173
+ // Don't mess with Ruby-owned memory while the GVL is released
174
+ const auto* objects_ptr = objects.read_ptr();
175
+ std::vector<float> objects_vec(objects_ptr, objects_ptr + n * self.d);
176
+ std::vector<float> distances_vec(n * static_cast<size_t>(k));
177
+ std::vector<int64_t> labels_vec(n * static_cast<size_t>(k));
178
+
148
179
  Rice::detail::no_gvl([&] {
149
- self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
180
+ self.search(n, objects_vec.data(), k, distances_vec.data(), labels_vec.data());
150
181
  });
182
+
183
+ std::ranges::copy(distances_vec, distances.write_ptr());
184
+ std::ranges::copy(labels_vec, labels.write_ptr());
151
185
  } else {
152
186
  self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
153
187
  }
@@ -160,22 +194,50 @@ void init_index(Rice::Module& m) {
160
194
  .define_method(
161
195
  "nprobe=",
162
196
  [](Rice::Object rb_self, double val) {
163
- rb_check_frozen(rb_self.value());
197
+ check_frozen(rb_self);
164
198
 
165
- auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
199
+ auto& self = *Rice::Data_Object<faiss::Index>{rb_self};
166
200
  faiss::ParameterSpace().set_index_parameter(&self, "nprobe", val);
167
201
  })
168
202
  .define_method(
169
203
  "reconstruct",
170
- [](faiss::Index &self, int64_t key) {
171
- auto d = static_cast<std::size_t>(self.d);
172
- auto recons = numo::SFloat({d});
204
+ [](faiss::Index& self, int64_t key) {
205
+ auto d = static_cast<size_t>(self.d);
206
+ numo::SFloat recons({d});
173
207
  self.reconstruct(key, recons.write_ptr());
174
208
  return recons;
175
209
  })
210
+ .define_method(
211
+ "reconstruct_batch",
212
+ [](faiss::Index& self, numo::Int64 ids) {
213
+ if (ids.ndim() != 1) {
214
+ throw Rice::Exception(rb_eArgError, "expected ids to be 1d array");
215
+ }
216
+ auto n = static_cast<size_t>(ids.shape()[0]);
217
+ auto d = static_cast<size_t>(self.d);
218
+ numo::SFloat recons({n, d});
219
+ self.reconstruct_batch(n, ids.read_ptr(), recons.write_ptr());
220
+ return recons;
221
+ })
222
+ .define_method(
223
+ "reconstruct_n",
224
+ [](faiss::Index& self, int64_t i0, int64_t ni) {
225
+ if (ni < 0) {
226
+ throw Rice::Exception(rb_eArgError, "expected n to be non-negative");
227
+ }
228
+ // second condition written this way to avoid overflow
229
+ if (i0 < 0 || i0 > self.ntotal - ni) {
230
+ throw Rice::Exception(rb_eIndexError, "index out of range");
231
+ }
232
+ auto d = static_cast<size_t>(self.d);
233
+ auto n = static_cast<size_t>(ni);
234
+ numo::SFloat recons({n, d});
235
+ self.reconstruct_n(i0, ni, recons.write_ptr());
236
+ return recons;
237
+ })
176
238
  .define_method(
177
239
  "save",
178
- [](faiss::Index &self, Rice::String fname) {
240
+ [](faiss::Index& self, Rice::String fname) {
179
241
  faiss::write_index(&self, fname.c_str());
180
242
  })
181
243
  .define_singleton_function(
@@ -221,10 +283,21 @@ void init_index(Rice::Module& m) {
221
283
  .define_constructor(Rice::Constructor<faiss::ParameterSpace>())
222
284
  .define_method(
223
285
  "set_index_parameter",
224
- [](faiss::ParameterSpace& self, faiss::Index* index, const std::string& name, double val) {
225
- self.set_index_parameter(index, name, val);
286
+ [](faiss::ParameterSpace& self, faiss::Index* index, Rice::String name, double val) {
287
+ self.set_index_parameter(index, name.str(), val);
226
288
  });
227
289
 
228
290
  Rice::define_class_under<faiss::IndexIDMap, faiss::Index>(m, "IndexIDMap")
229
291
  .define_constructor(Rice::Constructor<faiss::IndexIDMap, faiss::Index*>());
292
+
293
+ Rice::define_class_under<faiss::IndexIDMap2, faiss::Index>(m, "IndexIDMap2")
294
+ .define_constructor(Rice::Constructor<faiss::IndexIDMap2, faiss::Index*>())
295
+ .define_method(
296
+ "id_map",
297
+ [](faiss::IndexIDMap2& self) {
298
+ size_t n = self.id_map.size();
299
+ numo::Int64 ids({n});
300
+ std::ranges::copy(self.id_map, ids.write_ptr());
301
+ return ids;
302
+ });
230
303
  }
data/ext/faiss/kmeans.cpp CHANGED
@@ -1,9 +1,12 @@
1
+ #include <algorithm>
2
+ #include <cstddef>
3
+
1
4
  #include <faiss/Clustering.h>
2
5
  #include <faiss/IndexFlat.h>
3
6
  #include <rice/rice.hpp>
4
7
 
5
8
  #include "numo.hpp"
6
- #include "utils.h"
9
+ #include "utils_rb.h"
7
10
 
8
11
  void init_kmeans(Rice::Module& m) {
9
12
  Rice::define_class_under<faiss::Clustering>(m, "Kmeans")
@@ -15,27 +18,27 @@ void init_kmeans(Rice::Module& m) {
15
18
  })
16
19
  .define_method(
17
20
  "d",
18
- [](faiss::Clustering &self) {
21
+ [](faiss::Clustering& self) {
19
22
  return self.d;
20
23
  })
21
24
  .define_method(
22
25
  "k",
23
- [](faiss::Clustering &self) {
26
+ [](faiss::Clustering& self) {
24
27
  return self.k;
25
28
  })
26
29
  .define_method(
27
30
  "centroids",
28
- [](faiss::Clustering &self) {
29
- auto centroids = numo::SFloat({self.k, self.d});
30
- memcpy(centroids.write_ptr(), self.centroids.data(), self.centroids.size() * sizeof(float));
31
+ [](faiss::Clustering& self) {
32
+ numo::SFloat centroids({self.k, self.d});
33
+ std::ranges::copy(self.centroids, centroids.write_ptr());
31
34
  return centroids;
32
35
  })
33
36
  .define_method(
34
37
  "train",
35
38
  [](Rice::Object rb_self, numo::SFloat objects) {
36
- auto &self = *Rice::Data_Object<faiss::Clustering>{rb_self};
37
- auto n = check_shape(objects, self.d);
38
- auto index = faiss::IndexFlatL2(self.d);
39
+ auto& self = *Rice::Data_Object<faiss::Clustering>{rb_self};
40
+ size_t n = check_shape(objects, self.d);
41
+ faiss::IndexFlatL2 index(self.d);
39
42
  rb_self.iv_set("@index", index);
40
43
  self.train(n, objects.read_ptr(), index);
41
44
  });
data/ext/faiss/numo.hpp CHANGED
@@ -1,5 +1,5 @@
1
1
  /*!
2
- * Numo.hpp v0.3.0
2
+ * Numo.hpp v0.3.1
3
3
  * https://github.com/ankane/numo.hpp
4
4
  * BSD-2-Clause License
5
5
  */
@@ -38,7 +38,7 @@ public:
38
38
  }
39
39
 
40
40
  bool is_contiguous() const {
41
- return nary_check_contiguous(this->_value) == Qtrue;
41
+ return Rice::detail::protect(nary_check_contiguous, this->_value) == Qtrue;
42
42
  }
43
43
 
44
44
  operator Rice::Object() const {
@@ -46,26 +46,28 @@ public:
46
46
  }
47
47
 
48
48
  const void* read_ptr() {
49
- if (!is_contiguous()) {
50
- this->_value = nary_dup(this->_value);
51
- }
52
- return nary_get_pointer_for_read(this->_value) + nary_get_offset(this->_value);
49
+ return Rice::detail::protect([&]() {
50
+ if (!nary_check_contiguous(this->_value)) {
51
+ this->_value = nary_dup(this->_value);
52
+ }
53
+ return nary_get_pointer_for_read(this->_value) + nary_get_offset(this->_value);
54
+ });
53
55
  }
54
56
 
55
57
  void* write_ptr() {
56
- return nary_get_pointer_for_write(this->_value);
58
+ return Rice::detail::protect(nary_get_pointer_for_write, this->_value);
57
59
  }
58
60
 
59
61
  protected:
60
62
  NArray() { }
61
63
 
62
64
  void construct_value(VALUE dtype, VALUE v) {
63
- this->_value = rb_funcall(dtype, rb_intern("cast"), 1, v);
65
+ this->_value = Rice::detail::protect(rb_funcall, dtype, rb_intern("cast"), 1, v);
64
66
  }
65
67
 
66
68
  void construct_shape(VALUE dtype, std::initializer_list<size_t> shape) {
67
69
  // rb_narray_new doesn't modify shape, but not marked as const
68
- this->_value = rb_narray_new(dtype, shape.size(), const_cast<size_t*>(shape.begin()));
70
+ this->_value = Rice::detail::protect(rb_narray_new, dtype, shape.size(), const_cast<size_t*>(shape.begin()));
69
71
  }
70
72
 
71
73
  VALUE _value;
@@ -1,34 +1,36 @@
1
+ #include <cstddef>
2
+
1
3
  #include <faiss/VectorTransform.h>
2
4
  #include <rice/rice.hpp>
3
5
 
4
6
  #include "numo.hpp"
5
- #include "utils.h"
7
+ #include "utils_rb.h"
6
8
 
7
9
  void init_pca_matrix(Rice::Module& m) {
8
10
  Rice::define_class_under<faiss::PCAMatrix>(m, "PCAMatrix")
9
11
  .define_constructor(Rice::Constructor<faiss::PCAMatrix, int, int>())
10
12
  .define_method(
11
13
  "d_in",
12
- [](faiss::PCAMatrix &self) {
14
+ [](faiss::PCAMatrix& self) {
13
15
  return self.d_in;
14
16
  })
15
17
  .define_method(
16
18
  "d_out",
17
- [](faiss::PCAMatrix &self) {
19
+ [](faiss::PCAMatrix& self) {
18
20
  return self.d_out;
19
21
  })
20
22
  .define_method(
21
23
  "train",
22
- [](faiss::PCAMatrix &self, numo::SFloat objects) {
23
- auto n = check_shape(objects, self.d_in);
24
+ [](faiss::PCAMatrix& self, numo::SFloat objects) {
25
+ size_t n = check_shape(objects, self.d_in);
24
26
  self.train(n, objects.read_ptr());
25
27
  })
26
28
  .define_method(
27
29
  "apply",
28
- [](faiss::PCAMatrix &self, numo::SFloat objects) {
29
- auto n = check_shape(objects, self.d_in);
30
+ [](faiss::PCAMatrix& self, numo::SFloat objects) {
31
+ size_t n = check_shape(objects, self.d_in);
30
32
 
31
- auto ary = numo::SFloat({n, static_cast<size_t>(self.d_out)});
33
+ numo::SFloat ary({n, static_cast<size_t>(self.d_out)});
32
34
  self.apply_noalloc(n, objects.read_ptr(), ary.write_ptr());
33
35
  return ary;
34
36
  });