native-vector-store 0.3.9 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,407 @@
1
+ #if !defined(phmap_utils_h_guard_)
2
+ #define phmap_utils_h_guard_
3
+
4
+ // ---------------------------------------------------------------------------
5
+ // Copyright (c) 2019, Gregory Popovitch - greg7mdp@gmail.com
6
+ //
7
+ // minimal header providing phmap::HashState
8
+ //
9
+ // use as: phmap::HashState().combine(0, _first_name, _last_name, _age);
10
+ //
11
+ // Licensed under the Apache License, Version 2.0 (the "License");
12
+ // you may not use this file except in compliance with the License.
13
+ // You may obtain a copy of the License at
14
+ //
15
+ // https://www.apache.org/licenses/LICENSE-2.0
16
+ //
17
+ // Unless required by applicable law or agreed to in writing, software
18
+ // distributed under the License is distributed on an "AS IS" BASIS,
19
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20
+ // See the License for the specific language governing permissions and
21
+ // limitations under the License.
22
+ // ---------------------------------------------------------------------------
23
+
24
+ #ifdef _MSC_VER
25
+ #pragma warning(push)
26
+ #pragma warning(disable : 4514) // unreferenced inline function has been removed
27
+ #pragma warning(disable : 4710) // function not inlined
28
+ #pragma warning(disable : 4711) // selected for automatic inline expansion
29
+ #endif
30
+
31
+ #include <cstdint>
32
+ #include <functional>
33
+ #include <tuple>
34
+ #include "phmap_bits.h"
35
+
36
+ // ---------------------------------------------------------------
37
+ // Absl forward declaration requires global scope.
38
+ // ---------------------------------------------------------------
39
+ #if defined(PHMAP_USE_ABSL_HASH) && !defined(phmap_fwd_decl_h_guard_) && !defined(ABSL_HASH_HASH_H_)
40
+ namespace absl { template <class T> struct Hash; };
41
+ #endif
42
+
43
+ namespace phmap
44
+ {
45
+
46
+ // ---------------------------------------------------------------
47
+ // ---------------------------------------------------------------
48
+ template<int n>
49
+ struct phmap_mix
50
+ {
51
+ inline size_t operator()(size_t) const;
52
+ };
53
+
54
+ template<>
55
+ struct phmap_mix<4>
56
+ {
57
+ inline size_t operator()(size_t a) const
58
+ {
59
+ static constexpr uint64_t kmul = 0xcc9e2d51UL;
60
+ uint64_t l = a * kmul;
61
+ return static_cast<size_t>(l ^ (l >> 32));
62
+ }
63
+ };
64
+
65
+ #if defined(PHMAP_HAS_UMUL128)
66
+ template<>
67
+ struct phmap_mix<8>
68
+ {
69
+ // Very fast mixing (similar to Abseil)
70
+ inline size_t operator()(size_t a) const
71
+ {
72
+ static constexpr uint64_t k = 0xde5fb9d2630458e9ULL;
73
+ uint64_t h;
74
+ uint64_t l = umul128(a, k, &h);
75
+ return static_cast<size_t>(h + l);
76
+ }
77
+ };
78
+ #else
79
+ template<>
80
+ struct phmap_mix<8>
81
+ {
82
+ inline size_t operator()(size_t a) const
83
+ {
84
+ a = (~a) + (a << 21); // a = (a << 21) - a - 1;
85
+ a = a ^ (a >> 24);
86
+ a = (a + (a << 3)) + (a << 8); // a * 265
87
+ a = a ^ (a >> 14);
88
+ a = (a + (a << 2)) + (a << 4); // a * 21
89
+ a = a ^ (a >> 28);
90
+ a = a + (a << 31);
91
+ return static_cast<size_t>(a);
92
+ }
93
+ };
94
+ #endif
95
+
96
+ // --------------------------------------------
97
+ template<int n>
98
+ struct fold_if_needed
99
+ {
100
+ inline size_t operator()(uint64_t) const;
101
+ };
102
+
103
+ template<>
104
+ struct fold_if_needed<4>
105
+ {
106
+ inline size_t operator()(uint64_t a) const
107
+ {
108
+ return static_cast<size_t>(a ^ (a >> 32));
109
+ }
110
+ };
111
+
112
+ template<>
113
+ struct fold_if_needed<8>
114
+ {
115
+ inline size_t operator()(uint64_t a) const
116
+ {
117
+ return static_cast<size_t>(a);
118
+ }
119
+ };
120
+
121
+ // ---------------------------------------------------------------
122
+ // see if class T has a hash_value() friend method
123
+ // ---------------------------------------------------------------
124
+ template<typename T>
125
+ struct has_hash_value
126
+ {
127
+ private:
128
+ typedef std::true_type yes;
129
+ typedef std::false_type no;
130
+
131
+ template<typename U> static auto test(int) -> decltype(hash_value(std::declval<const U&>()) == 1, yes());
132
+
133
+ template<typename> static no test(...);
134
+
135
+ public:
136
+ static constexpr bool value = std::is_same<decltype(test<T>(0)), yes>::value;
137
+ };
138
+
139
+ #if defined(PHMAP_USE_ABSL_HASH) && !defined(phmap_fwd_decl_h_guard_)
140
+ template <class T> using Hash = ::absl::Hash<T>;
141
+ #elif !defined(PHMAP_USE_ABSL_HASH)
142
+ // ---------------------------------------------------------------
143
+ // phmap::Hash
144
+ // ---------------------------------------------------------------
145
+ template <class T>
146
+ struct Hash
147
+ {
148
+ template <class U, typename std::enable_if<has_hash_value<U>::value, int>::type = 0>
149
+ size_t _hash(const T& val) const
150
+ {
151
+ return hash_value(val);
152
+ }
153
+
154
+ template <class U, typename std::enable_if<!has_hash_value<U>::value, int>::type = 0>
155
+ size_t _hash(const T& val) const
156
+ {
157
+ return std::hash<T>()(val);
158
+ }
159
+
160
+ inline size_t operator()(const T& val) const
161
+ {
162
+ return _hash<T>(val);
163
+ }
164
+ };
165
+
166
+ template<class ArgumentType, class ResultType>
167
+ struct phmap_unary_function
168
+ {
169
+ typedef ArgumentType argument_type;
170
+ typedef ResultType result_type;
171
+ };
172
+
173
+ template <>
174
+ struct Hash<bool> : public phmap_unary_function<bool, size_t>
175
+ {
176
+ inline size_t operator()(bool val) const noexcept
177
+ { return static_cast<size_t>(val); }
178
+ };
179
+
180
+ template <>
181
+ struct Hash<char> : public phmap_unary_function<char, size_t>
182
+ {
183
+ inline size_t operator()(char val) const noexcept
184
+ { return static_cast<size_t>(val); }
185
+ };
186
+
187
+ template <>
188
+ struct Hash<signed char> : public phmap_unary_function<signed char, size_t>
189
+ {
190
+ inline size_t operator()(signed char val) const noexcept
191
+ { return static_cast<size_t>(val); }
192
+ };
193
+
194
+ template <>
195
+ struct Hash<unsigned char> : public phmap_unary_function<unsigned char, size_t>
196
+ {
197
+ inline size_t operator()(unsigned char val) const noexcept
198
+ { return static_cast<size_t>(val); }
199
+ };
200
+
201
+ #ifdef PHMAP_HAS_NATIVE_WCHAR_T
202
+ template <>
203
+ struct Hash<wchar_t> : public phmap_unary_function<wchar_t, size_t>
204
+ {
205
+ inline size_t operator()(wchar_t val) const noexcept
206
+ { return static_cast<size_t>(val); }
207
+ };
208
+ #endif
209
+
210
+ template <>
211
+ struct Hash<int16_t> : public phmap_unary_function<int16_t, size_t>
212
+ {
213
+ inline size_t operator()(int16_t val) const noexcept
214
+ { return static_cast<size_t>(val); }
215
+ };
216
+
217
+ template <>
218
+ struct Hash<uint16_t> : public phmap_unary_function<uint16_t, size_t>
219
+ {
220
+ inline size_t operator()(uint16_t val) const noexcept
221
+ { return static_cast<size_t>(val); }
222
+ };
223
+
224
+ template <>
225
+ struct Hash<int32_t> : public phmap_unary_function<int32_t, size_t>
226
+ {
227
+ inline size_t operator()(int32_t val) const noexcept
228
+ { return static_cast<size_t>(val); }
229
+ };
230
+
231
+ template <>
232
+ struct Hash<uint32_t> : public phmap_unary_function<uint32_t, size_t>
233
+ {
234
+ inline size_t operator()(uint32_t val) const noexcept
235
+ { return static_cast<size_t>(val); }
236
+ };
237
+
238
+ template <>
239
+ struct Hash<int64_t> : public phmap_unary_function<int64_t, size_t>
240
+ {
241
+ inline size_t operator()(int64_t val) const noexcept
242
+ { return fold_if_needed<sizeof(size_t)>()(static_cast<uint64_t>(val)); }
243
+ };
244
+
245
+ template <>
246
+ struct Hash<uint64_t> : public phmap_unary_function<uint64_t, size_t>
247
+ {
248
+ inline size_t operator()(uint64_t val) const noexcept
249
+ { return fold_if_needed<sizeof(size_t)>()(val); }
250
+ };
251
+
252
+ template <>
253
+ struct Hash<float> : public phmap_unary_function<float, size_t>
254
+ {
255
+ inline size_t operator()(float val) const noexcept
256
+ {
257
+ // -0.0 and 0.0 should return same hash
258
+ uint32_t *as_int = reinterpret_cast<uint32_t *>(&val);
259
+ return (val == 0) ? static_cast<size_t>(0) :
260
+ static_cast<size_t>(*as_int);
261
+ }
262
+ };
263
+
264
+ template <>
265
+ struct Hash<double> : public phmap_unary_function<double, size_t>
266
+ {
267
+ inline size_t operator()(double val) const noexcept
268
+ {
269
+ // -0.0 and 0.0 should return same hash
270
+ uint64_t *as_int = reinterpret_cast<uint64_t *>(&val);
271
+ return (val == 0) ? static_cast<size_t>(0) :
272
+ fold_if_needed<sizeof(size_t)>()(*as_int);
273
+ }
274
+ };
275
+
276
+ #endif
277
+
278
+ #if defined(_MSC_VER)
279
+ # define PHMAP_HASH_ROTL32(x, r) _rotl(x,r)
280
+ #else
281
+ # define PHMAP_HASH_ROTL32(x, r) (x << r) | (x >> (32 - r))
282
+ #endif
283
+
284
+
285
+ template <class H, int sz> struct Combiner
286
+ {
287
+ H operator()(H seed, size_t value);
288
+ };
289
+
290
+ template <class H> struct Combiner<H, 4>
291
+ {
292
+ H operator()(H h1, size_t k1)
293
+ {
294
+ // Copyright 2005-2014 Daniel James.
295
+ // Distributed under the Boost Software License, Version 1.0. (See accompanying
296
+ // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
297
+
298
+ const uint32_t c1 = 0xcc9e2d51;
299
+ const uint32_t c2 = 0x1b873593;
300
+
301
+ k1 *= c1;
302
+ k1 = PHMAP_HASH_ROTL32(k1,15);
303
+ k1 *= c2;
304
+
305
+ h1 ^= k1;
306
+ h1 = PHMAP_HASH_ROTL32(h1,13);
307
+ h1 = h1*5+0xe6546b64;
308
+
309
+ return h1;
310
+ }
311
+ };
312
+
313
+ template <class H> struct Combiner<H, 8>
314
+ {
315
+ H operator()(H h, size_t k)
316
+ {
317
+ // Copyright 2005-2014 Daniel James.
318
+ // Distributed under the Boost Software License, Version 1.0. (See accompanying
319
+ // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
320
+ const uint64_t m = (uint64_t(0xc6a4a793) << 32) + 0x5bd1e995;
321
+ const int r = 47;
322
+
323
+ k *= m;
324
+ k ^= k >> r;
325
+ k *= m;
326
+
327
+ h ^= k;
328
+ h *= m;
329
+
330
+ // Completely arbitrary number, to prevent 0's
331
+ // from hashing to 0.
332
+ h += 0xe6546b64;
333
+
334
+ return h;
335
+ }
336
+ };
337
+
338
+ // define HashState to combine member hashes... see example below
339
+ // -----------------------------------------------------------------------------
340
+ template <typename H>
341
+ class HashStateBase {
342
+ public:
343
+ template <typename T, typename... Ts>
344
+ static H combine(H state, const T& value, const Ts&... values);
345
+
346
+ static H combine(H state) { return state; }
347
+ };
348
+
349
+ template <typename H>
350
+ template <typename T, typename... Ts>
351
+ H HashStateBase<H>::combine(H seed, const T& v, const Ts&... vs)
352
+ {
353
+ return HashStateBase<H>::combine(Combiner<H, sizeof(H)>()(
354
+ seed, phmap::Hash<T>()(v)),
355
+ vs...);
356
+ }
357
+
358
+ using HashState = HashStateBase<size_t>;
359
+
360
+ // -----------------------------------------------------------------------------
361
+
362
+ #if !defined(PHMAP_USE_ABSL_HASH)
363
+
364
+ // define Hash for std::pair
365
+ // -------------------------
366
+ template<class T1, class T2>
367
+ struct Hash<std::pair<T1, T2>> {
368
+ size_t operator()(std::pair<T1, T2> const& p) const noexcept {
369
+ return phmap::HashState().combine(phmap::Hash<T1>()(p.first), p.second);
370
+ }
371
+ };
372
+
373
+ // define Hash for std::tuple
374
+ // --------------------------
375
+ template<class... T>
376
+ struct Hash<std::tuple<T...>> {
377
+ size_t operator()(std::tuple<T...> const& t) const noexcept {
378
+ size_t seed = 0;
379
+ return _hash_helper(seed, t);
380
+ }
381
+
382
+ private:
383
+ template<size_t I = 0, class TUP>
384
+ typename std::enable_if<I == std::tuple_size<TUP>::value, size_t>::type
385
+ _hash_helper(size_t seed, const TUP &) const noexcept { return seed; }
386
+
387
+ template<size_t I = 0, class TUP>
388
+ typename std::enable_if<I < std::tuple_size<TUP>::value, size_t>::type
389
+ _hash_helper(size_t seed, const TUP &t) const noexcept {
390
+ const auto &el = std::get<I>(t);
391
+ using el_type = typename std::remove_cv<typename std::remove_reference<decltype(el)>::type>::type;
392
+ seed = Combiner<size_t, sizeof(size_t)>()(seed, phmap::Hash<el_type>()(el));
393
+ return _hash_helper<I + 1>(seed, t);
394
+ }
395
+ };
396
+
397
+
398
+ #endif
399
+
400
+
401
+ } // namespace phmap
402
+
403
+ #ifdef _MSC_VER
404
+ #pragma warning(pop)
405
+ #endif
406
+
407
+ #endif // phmap_utils_h_guard_
package/docs/index.html CHANGED
@@ -60,8 +60,9 @@
60
60
  <li><strong>🚀 High Performance</strong>: C++ implementation with OpenMP SIMD optimization</li>
61
61
  <li><strong>📦 Arena Allocation</strong>: Memory-efficient storage with 64MB chunks</li>
62
62
  <li><strong>⚡ Fast Search</strong>: Sub-10ms similarity search for large document collections</li>
63
+ <li><strong>🔍 Hybrid Search</strong>: Combines vector similarity (semantic) with BM25 text search (lexical)</li>
63
64
  <li><strong>🔧 MCP Integration</strong>: Built for Model Context Protocol servers</li>
64
- <li><strong>🌐 Cross-Platform</strong>: Works on Linux, macOS, and Windows</li>
65
+ <li><strong>🌐 Cross-Platform</strong>: Works on Linux and macOS (Windows users: use WSL)</li>
65
66
  <li><strong>📊 TypeScript Support</strong>: Full type definitions included</li>
66
67
  <li><strong>🔄 Producer-Consumer Loading</strong>: Parallel document loading at 178k+ docs/sec</li>
67
68
  </ul>
@@ -85,7 +86,7 @@
85
86
  <li><strong>Linux</strong>: <code>sudo apt-get install libgomp1</code> (Ubuntu/Debian) or <code>dnf install libgomp</code> (Fedora)</li>
86
87
  <li><strong>Alpine</strong>: <code>apk add libgomp</code></li>
87
88
  <li><strong>macOS</strong>: <code>brew install libomp</code></li>
88
- <li><strong>Windows</strong>: Included with Visual C++ runtime</li>
89
+ <li><strong>Windows</strong>: Use WSL (Windows Subsystem for Linux)</li>
89
90
  </ul>
90
91
  </li>
91
92
  </ul>
@@ -93,7 +94,6 @@
93
94
  <ul>
94
95
  <li>Linux (x64, arm64, musl/Alpine) - x64 builds are AWS Lambda compatible (no AVX-512)</li>
95
96
  <li>macOS (x64, arm64/Apple Silicon)</li>
96
- <li>Windows (x64)</li>
97
97
  </ul>
98
98
  <p>If building from source, you'll need:</p>
99
99
  <ul>
@@ -125,8 +125,16 @@ store.finalize(); // Must call before searching!
125
125
 
126
126
  // Search for similar documents
127
127
  const queryEmbedding = new Float32Array(1536);
128
+
129
+ // Option 1: Vector-only search (traditional)
128
130
  const results = store.search(queryEmbedding, 5); // Top 5 results
129
131
 
132
+ // Option 2: Hybrid search (NEW - combines vector + BM25 text search)
133
+ const hybridResults = store.search(queryEmbedding, 5, &quot;your search query text&quot;);
134
+
135
+ // Option 3: BM25 text-only search
136
+ const textResults = store.searchBM25(&quot;your search query&quot;, 5);
137
+
130
138
  // Results format - array of SearchResult objects, sorted by score (highest first):
131
139
  console.log(results);
132
140
  // [
@@ -284,6 +292,47 @@ if (process.env.NODE_ENV === 'development') {
284
292
  fs.watch('./documents', { recursive: true }, reloadStore);
285
293
  }
286
294
  </code></pre>
295
+ <h2 id="hybrid-search">Hybrid Search</h2>
296
+ <p>The vector store now supports hybrid search, combining semantic similarity (vector search) with lexical matching (BM25 text search) for improved retrieval accuracy:</p>
297
+ <pre class="prettyprint source lang-javascript"><code>const { VectorStore } = require('native-vector-store');
298
+
299
+ const store = new VectorStore(1536);
300
+ store.loadDir('./documents');
301
+
302
+ // Hybrid search automatically combines vector and text search
303
+ const queryEmbedding = new Float32Array(1536);
304
+ const results = store.search(
305
+ queryEmbedding,
306
+ 10, // Top 10 results
307
+ &quot;machine learning algorithms&quot; // Query text for BM25
308
+ );
309
+
310
+ // You can also use individual search methods
311
+ const vectorResults = store.searchVector(queryEmbedding, 10);
312
+ const textResults = store.searchBM25(&quot;machine learning&quot;, 10);
313
+
314
+ // Or explicitly control the hybrid weights
315
+ const customResults = store.searchHybrid(
316
+ queryEmbedding,
317
+ &quot;machine learning&quot;,
318
+ 10,
319
+ 0.3, // Vector weight (30%)
320
+ 0.7 // BM25 weight (70%)
321
+ );
322
+
323
+ // Tune BM25 parameters for your corpus
324
+ store.setBM25Parameters(
325
+ 1.2, // k1: Term frequency saturation (default: 1.2)
326
+ 0.75, // b: Document length normalization (default: 0.75)
327
+ 1.0 // delta: Smoothing parameter (default: 1.0)
328
+ );
329
+ </code></pre>
330
+ <p>Hybrid search is particularly effective for:</p>
331
+ <ul>
332
+ <li><strong>Question answering</strong>: BM25 finds documents with exact terms while vectors capture semantic meaning</li>
333
+ <li><strong>Knowledge retrieval</strong>: Combines conceptual similarity with keyword matching</li>
334
+ <li><strong>Multi-lingual search</strong>: Vectors handle cross-language similarity while BM25 matches exact terms</li>
335
+ </ul>
287
336
  <h2 id="mcp-server-integration">MCP Server Integration</h2>
288
337
  <p>Perfect for building local RAG capabilities in MCP servers:</p>
289
338
  <pre class="prettyprint source lang-javascript"><code>const { MCPVectorServer } = require('native-vector-store/examples/mcp-server');
package/lib/index.d.ts CHANGED
@@ -31,11 +31,45 @@ export class VectorStore {
31
31
 
32
32
  /**
33
33
  * Search for k most similar documents
34
+ * Uses hybrid search if queryText is provided, otherwise vector-only search
35
+ * @param query - Query embedding vector
36
+ * @param k - Number of results to return
37
+ * @param queryText - Optional text query for hybrid search (BM25 + vector)
38
+ */
39
+ search(query: Float32Array, k: number, queryText?: string): SearchResult[];
40
+
41
+ /**
42
+ * Pure vector similarity search
34
43
  * @param query - Query embedding vector
35
44
  * @param k - Number of results to return
36
45
  * @param normalizeQuery - Whether to L2 normalize the query (default: true)
37
46
  */
38
- search(query: Float32Array, k: number, normalizeQuery?: boolean): SearchResult[];
47
+ searchVector(query: Float32Array, k: number, normalizeQuery?: boolean): SearchResult[];
48
+
49
+ /**
50
+ * Pure BM25 text search
51
+ * @param queryText - Text query or array of query terms
52
+ * @param k - Number of results to return
53
+ */
54
+ searchBM25(queryText: string | string[], k: number): SearchResult[];
55
+
56
+ /**
57
+ * Hybrid search combining vector similarity and BM25 text search
58
+ * @param query - Query embedding vector
59
+ * @param queryText - Text query for BM25 component
60
+ * @param k - Number of results to return
61
+ * @param vectorWeight - Weight for vector similarity (default: 0.5)
62
+ * @param bm25Weight - Weight for BM25 score (default: 0.5)
63
+ */
64
+ searchHybrid(query: Float32Array, queryText: string, k: number, vectorWeight?: number, bm25Weight?: number): SearchResult[];
65
+
66
+ /**
67
+ * Set BM25 parameters for text search
68
+ * @param k1 - Controls term frequency saturation (default: 1.2)
69
+ * @param b - Controls document length normalization (default: 0.75)
70
+ * @param delta - Smoothing parameter (default: 1.0)
71
+ */
72
+ setBM25Parameters(k1: number, b: number, delta?: number): void;
39
73
 
40
74
  /**
41
75
  * Normalize all stored embeddings
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "native-vector-store",
3
- "version": "0.3.9",
3
+ "version": "0.4.0",
4
4
  "description": "High-performance local vector store with SIMD optimization for MCP servers",
5
5
  "main": "index.js",
6
6
  "types": "lib/index.d.ts",
package/src/Makefile CHANGED
@@ -14,7 +14,7 @@ CXXFLAGS = -std=c++17 -g -O0 -fno-omit-frame-pointer -DDEBUG
14
14
  # OS-specific configuration
15
15
  ifeq ($(UNAME_S),Darwin)
16
16
  # macOS configuration
17
- INCLUDES = -I. -I../deps/simdjson -I../deps/atomic_queue -I/opt/homebrew/opt/libomp/include
17
+ INCLUDES = -I. -I../deps/simdjson -I../deps/atomic_queue -I../deps -I/opt/homebrew/opt/libomp/include
18
18
  LDFLAGS = -L/opt/homebrew/opt/libomp/lib
19
19
  LIBS = -lomp
20
20
  # Add flags for better debugging with lldb
@@ -23,7 +23,7 @@ ifeq ($(UNAME_S),Darwin)
23
23
  CXXFLAGS += -Xpreprocessor -fopenmp
24
24
  else
25
25
  # Linux configuration
26
- INCLUDES = -I. -I../deps/simdjson -I../deps/atomic_queue
26
+ INCLUDES = -I. -I../deps/simdjson -I../deps/atomic_queue -I../deps
27
27
  LDFLAGS = -L/usr/lib
28
28
  LIBS = -lgomp
29
29
  # OpenMP flags for Linux
@@ -32,12 +32,18 @@ endif
32
32
 
33
33
  TARGET = test_vector_store
34
34
  STRESS_TARGET = test_stress
35
- SOURCES = test_main.cpp vector_store.cpp ../deps/simdjson/simdjson.cpp
36
- STRESS_SOURCES = test_stress.cpp vector_store.cpp vector_store_loader.cpp vector_store_loader_mmap.cpp vector_store_loader_adaptive.cpp ../deps/simdjson/simdjson.cpp
35
+ BM25_TARGET = test_bm25
36
+ HYBRID_TARGET = test_hybrid_search
37
+ SOURCES = test_main.cpp vector_store.cpp simple_tokenizer.cpp ../deps/simdjson/simdjson.cpp
38
+ STRESS_SOURCES = test_stress.cpp vector_store.cpp simple_tokenizer.cpp vector_store_loader.cpp vector_store_loader_mmap.cpp vector_store_loader_adaptive.cpp ../deps/simdjson/simdjson.cpp
39
+ BM25_SOURCES = test_bm25.cpp vector_store.cpp simple_tokenizer.cpp ../deps/simdjson/simdjson.cpp
40
+ HYBRID_SOURCES = test_hybrid_search.cpp vector_store.cpp simple_tokenizer.cpp ../deps/simdjson/simdjson.cpp
37
41
  OBJECTS = $(SOURCES:.cpp=.o)
38
42
  STRESS_OBJECTS = $(STRESS_SOURCES:.cpp=.o)
43
+ BM25_OBJECTS = $(BM25_SOURCES:.cpp=.o)
44
+ HYBRID_OBJECTS = $(HYBRID_SOURCES:.cpp=.o)
39
45
 
40
- all: $(TARGET) $(STRESS_TARGET)
46
+ all: $(TARGET) $(STRESS_TARGET) $(BM25_TARGET) $(HYBRID_TARGET)
41
47
 
42
48
  $(TARGET): $(OBJECTS)
43
49
  $(CXX) $(CXXFLAGS) $(OBJECTS) -o $(TARGET) $(LDFLAGS) $(LIBS)
@@ -45,11 +51,25 @@ $(TARGET): $(OBJECTS)
45
51
  $(STRESS_TARGET): $(STRESS_OBJECTS)
46
52
  $(CXX) $(CXXFLAGS) $(STRESS_OBJECTS) -o $(STRESS_TARGET) $(LDFLAGS) $(LIBS)
47
53
 
54
+ $(BM25_TARGET): $(BM25_OBJECTS)
55
+ $(CXX) $(CXXFLAGS) $(BM25_OBJECTS) -o $(BM25_TARGET) $(LDFLAGS) $(LIBS)
56
+
57
+ $(HYBRID_TARGET): $(HYBRID_OBJECTS)
58
+ $(CXX) $(CXXFLAGS) $(HYBRID_OBJECTS) -o $(HYBRID_TARGET) $(LDFLAGS) $(LIBS)
59
+
48
60
  %.o: %.cpp
49
61
  $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
50
62
 
51
63
  clean:
52
- rm -f $(OBJECTS) $(STRESS_OBJECTS) $(TARGET) $(STRESS_TARGET)
64
+ rm -f $(OBJECTS) $(STRESS_OBJECTS) $(BM25_OBJECTS) $(HYBRID_OBJECTS) $(TARGET) $(STRESS_TARGET) $(BM25_TARGET) $(HYBRID_TARGET)
65
+
66
+ # BM25 test
67
+ bm25: $(BM25_TARGET)
68
+ ./$(BM25_TARGET)
69
+
70
+ # Hybrid search test
71
+ hybrid: $(HYBRID_TARGET)
72
+ ./$(HYBRID_TARGET)
53
73
 
54
74
  # Force rebuild (useful for CI)
55
75
  rebuild: clean all