vectra 0.12.2 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +92 -100
- package/bin/vectra.js +3 -0
- package/lib/BrowserWebFetcher.d.ts +75 -0
- package/lib/BrowserWebFetcher.d.ts.map +1 -0
- package/lib/BrowserWebFetcher.js +290 -0
- package/lib/BrowserWebFetcher.js.map +1 -0
- package/lib/FileFetcher.d.ts +5 -0
- package/lib/FileFetcher.d.ts.map +1 -0
- package/lib/FileFetcher.js +89 -0
- package/lib/FileFetcher.js.map +1 -0
- package/lib/FileFetcher.spec.d.ts +2 -0
- package/lib/FileFetcher.spec.d.ts.map +1 -0
- package/lib/FileFetcher.spec.js +244 -0
- package/lib/FileFetcher.spec.js.map +1 -0
- package/lib/FolderWatcher.d.ts +91 -0
- package/lib/FolderWatcher.d.ts.map +1 -0
- package/lib/FolderWatcher.js +304 -0
- package/lib/FolderWatcher.js.map +1 -0
- package/lib/FolderWatcher.spec.d.ts +2 -0
- package/lib/FolderWatcher.spec.d.ts.map +1 -0
- package/lib/FolderWatcher.spec.js +308 -0
- package/lib/FolderWatcher.spec.js.map +1 -0
- package/lib/GPT3Tokenizer.d.ts +9 -0
- package/lib/GPT3Tokenizer.spec.d.ts +2 -0
- package/lib/GPT3Tokenizer.spec.d.ts.map +1 -0
- package/lib/GPT3Tokenizer.spec.js +45 -0
- package/lib/GPT3Tokenizer.spec.js.map +1 -0
- package/lib/ItemSelector.d.ts +41 -0
- package/lib/ItemSelector.d.ts.map +1 -0
- package/lib/ItemSelector.js +179 -0
- package/lib/ItemSelector.js.map +1 -0
- package/lib/ItemSelector.spec.d.ts +2 -0
- package/lib/ItemSelector.spec.d.ts.map +1 -0
- package/lib/ItemSelector.spec.js +204 -0
- package/lib/ItemSelector.spec.js.map +1 -0
- package/lib/LocalDocument.d.ts +54 -0
- package/lib/LocalDocument.d.ts.map +1 -1
- package/lib/LocalDocument.js +116 -0
- package/lib/LocalDocument.js.map +1 -0
- package/lib/LocalDocument.spec.d.ts +2 -0
- package/lib/LocalDocument.spec.d.ts.map +1 -0
- package/lib/LocalDocument.spec.js +214 -0
- package/lib/LocalDocument.spec.js.map +1 -0
- package/lib/LocalDocumentIndex.d.ts +152 -0
- package/lib/LocalDocumentIndex.d.ts.map +1 -1
- package/lib/LocalDocumentIndex.js +420 -0
- package/lib/LocalDocumentIndex.js.map +1 -0
- package/lib/LocalDocumentIndex.spec.d.ts +2 -0
- package/lib/LocalDocumentIndex.spec.d.ts.map +1 -0
- package/lib/LocalDocumentIndex.spec.js +494 -0
- package/lib/LocalDocumentIndex.spec.js.map +1 -0
- package/lib/LocalDocumentResult.d.ts +66 -0
- package/lib/LocalDocumentResult.d.ts.map +1 -1
- package/lib/LocalDocumentResult.js +376 -0
- package/lib/LocalDocumentResult.js.map +1 -0
- package/lib/LocalDocumentResult.spec.d.ts +2 -0
- package/lib/LocalDocumentResult.spec.d.ts.map +1 -0
- package/lib/LocalDocumentResult.spec.js +373 -0
- package/lib/LocalDocumentResult.spec.js.map +1 -0
- package/lib/LocalEmbeddings.d.ts +59 -0
- package/lib/LocalEmbeddings.d.ts.map +1 -0
- package/lib/LocalEmbeddings.js +101 -0
- package/lib/LocalEmbeddings.js.map +1 -0
- package/lib/LocalEmbeddings.spec.d.ts +2 -0
- package/lib/LocalEmbeddings.spec.d.ts.map +1 -0
- package/lib/LocalEmbeddings.spec.js +155 -0
- package/lib/LocalEmbeddings.spec.js.map +1 -0
- package/lib/LocalIndex.d.ts +159 -0
- package/lib/LocalIndex.d.ts.map +1 -1
- package/lib/LocalIndex.js +519 -0
- package/lib/LocalIndex.js.map +1 -0
- package/lib/LocalIndex.spec.d.ts +2 -0
- package/lib/LocalIndex.spec.js +611 -9
- package/lib/LocalIndex.spec.js.map +1 -1
- package/lib/OpenAIEmbeddings.d.ts +124 -0
- package/lib/OpenAIEmbeddings.d.ts.map +1 -0
- package/lib/OpenAIEmbeddings.js +166 -0
- package/lib/OpenAIEmbeddings.js.map +1 -0
- package/lib/OpenAIEmbeddings.spec.d.ts +2 -0
- package/lib/OpenAIEmbeddings.spec.d.ts.map +1 -0
- package/lib/OpenAIEmbeddings.spec.js +298 -0
- package/lib/OpenAIEmbeddings.spec.js.map +1 -0
- package/lib/TextSplitter.d.ts +21 -0
- package/lib/TextSplitter.d.ts.map +1 -1
- package/lib/TextSplitter.js +500 -0
- package/lib/TextSplitter.js.map +1 -0
- package/lib/TextSplitter.spec.d.ts +2 -0
- package/lib/TextSplitter.spec.d.ts.map +1 -0
- package/lib/TextSplitter.spec.js +337 -0
- package/lib/TextSplitter.spec.js.map +1 -0
- package/lib/TransformersEmbeddings.d.ts +121 -0
- package/lib/TransformersEmbeddings.d.ts.map +1 -0
- package/lib/TransformersEmbeddings.js +176 -0
- package/lib/TransformersEmbeddings.js.map +1 -0
- package/lib/TransformersEmbeddings.spec.d.ts +2 -0
- package/lib/TransformersEmbeddings.spec.d.ts.map +1 -0
- package/lib/TransformersEmbeddings.spec.js +198 -0
- package/lib/TransformersEmbeddings.spec.js.map +1 -0
- package/lib/TransformersTokenizer.d.ts +33 -0
- package/lib/TransformersTokenizer.d.ts.map +1 -0
- package/lib/TransformersTokenizer.js +44 -0
- package/lib/TransformersTokenizer.js.map +1 -0
- package/lib/TransformersTokenizer.spec.d.ts +2 -0
- package/lib/TransformersTokenizer.spec.d.ts.map +1 -0
- package/lib/TransformersTokenizer.spec.js +112 -0
- package/lib/TransformersTokenizer.spec.js.map +1 -0
- package/lib/WebFetcher.d.ts +14 -0
- package/lib/WebFetcher.d.ts.map +1 -0
- package/lib/WebFetcher.js +238 -0
- package/lib/WebFetcher.js.map +1 -0
- package/lib/WebFetcher.spec.d.ts +2 -0
- package/lib/WebFetcher.spec.d.ts.map +1 -0
- package/lib/WebFetcher.spec.js +263 -0
- package/lib/WebFetcher.spec.js.map +1 -0
- package/lib/browser.d.ts +30 -0
- package/lib/browser.d.ts.map +1 -0
- package/lib/browser.js +52 -0
- package/lib/browser.js.map +1 -0
- package/lib/codecs/IndexCodec.d.ts +37 -0
- package/lib/codecs/IndexCodec.d.ts.map +1 -0
- package/lib/codecs/IndexCodec.js +3 -0
- package/lib/codecs/IndexCodec.js.map +1 -0
- package/lib/codecs/JsonCodec.d.ts +19 -0
- package/lib/codecs/JsonCodec.d.ts.map +1 -0
- package/lib/codecs/JsonCodec.js +35 -0
- package/lib/codecs/JsonCodec.js.map +1 -0
- package/lib/codecs/JsonCodec.spec.d.ts +2 -0
- package/lib/codecs/JsonCodec.spec.d.ts.map +1 -0
- package/lib/codecs/JsonCodec.spec.js +66 -0
- package/lib/codecs/JsonCodec.spec.js.map +1 -0
- package/lib/codecs/LocalIndex.protobuf.spec.d.ts +2 -0
- package/lib/codecs/LocalIndex.protobuf.spec.d.ts.map +1 -0
- package/lib/codecs/LocalIndex.protobuf.spec.js +108 -0
- package/lib/codecs/LocalIndex.protobuf.spec.js.map +1 -0
- package/lib/codecs/ProtobufCodec.d.ts +20 -0
- package/lib/codecs/ProtobufCodec.d.ts.map +1 -0
- package/lib/codecs/ProtobufCodec.js +225 -0
- package/lib/codecs/ProtobufCodec.js.map +1 -0
- package/lib/codecs/ProtobufCodec.spec.d.ts +2 -0
- package/lib/codecs/ProtobufCodec.spec.d.ts.map +1 -0
- package/lib/codecs/ProtobufCodec.spec.js +155 -0
- package/lib/codecs/ProtobufCodec.spec.js.map +1 -0
- package/lib/codecs/index.d.ts +5 -0
- package/lib/codecs/index.d.ts.map +1 -0
- package/lib/codecs/index.js +21 -0
- package/lib/codecs/index.js.map +1 -0
- package/lib/codecs/migrateIndex.d.ts +24 -0
- package/lib/codecs/migrateIndex.d.ts.map +1 -0
- package/lib/codecs/migrateIndex.js +119 -0
- package/lib/codecs/migrateIndex.js.map +1 -0
- package/lib/codecs/migrateIndex.spec.d.ts +2 -0
- package/lib/codecs/migrateIndex.spec.d.ts.map +1 -0
- package/lib/codecs/migrateIndex.spec.js +151 -0
- package/lib/codecs/migrateIndex.spec.js.map +1 -0
- package/lib/codecs/schemas/index.proto +34 -0
- package/lib/index.d.ts +20 -0
- package/lib/index.d.ts.map +1 -1
- package/lib/index.js +36 -0
- package/lib/index.js.map +1 -0
- package/lib/internals/Colorize.d.ts +14 -0
- package/lib/internals/Colorize.d.ts.map +1 -0
- package/lib/internals/Colorize.js +69 -0
- package/lib/internals/Colorize.js.map +1 -0
- package/lib/internals/index.d.ts +3 -0
- package/lib/internals/index.d.ts.map +1 -0
- package/lib/internals/index.js +19 -0
- package/lib/internals/index.js.map +1 -0
- package/lib/internals/types.d.ts +43 -0
- package/lib/internals/types.d.ts.map +1 -0
- package/lib/internals/types.js +3 -0
- package/lib/internals/types.js.map +1 -0
- package/lib/server/IndexManager.d.ts +78 -0
- package/lib/server/IndexManager.d.ts.map +1 -0
- package/lib/server/IndexManager.js +259 -0
- package/lib/server/IndexManager.js.map +1 -0
- package/lib/server/VectraServer.d.ts +40 -0
- package/lib/server/VectraServer.d.ts.map +1 -0
- package/lib/server/VectraServer.js +151 -0
- package/lib/server/VectraServer.js.map +1 -0
- package/lib/server/VectraServer.spec.d.ts +2 -0
- package/lib/server/VectraServer.spec.d.ts.map +1 -0
- package/lib/server/VectraServer.spec.js +322 -0
- package/lib/server/VectraServer.spec.js.map +1 -0
- package/lib/server/handlers/documentHandlers.d.ts +15 -0
- package/lib/server/handlers/documentHandlers.d.ts.map +1 -0
- package/lib/server/handlers/documentHandlers.js +95 -0
- package/lib/server/handlers/documentHandlers.js.map +1 -0
- package/lib/server/handlers/helpers.d.ts +23 -0
- package/lib/server/handlers/helpers.d.ts.map +1 -0
- package/lib/server/handlers/helpers.js +138 -0
- package/lib/server/handlers/helpers.js.map +1 -0
- package/lib/server/handlers/index.d.ts +8 -0
- package/lib/server/handlers/index.d.ts.map +1 -0
- package/lib/server/handlers/index.js +22 -0
- package/lib/server/handlers/index.js.map +1 -0
- package/lib/server/handlers/indexHandlers.d.ts +14 -0
- package/lib/server/handlers/indexHandlers.d.ts.map +1 -0
- package/lib/server/handlers/indexHandlers.js +85 -0
- package/lib/server/handlers/indexHandlers.js.map +1 -0
- package/lib/server/handlers/itemHandlers.d.ts +34 -0
- package/lib/server/handlers/itemHandlers.d.ts.map +1 -0
- package/lib/server/handlers/itemHandlers.js +166 -0
- package/lib/server/handlers/itemHandlers.js.map +1 -0
- package/lib/server/handlers/lifecycleHandlers.d.ts +11 -0
- package/lib/server/handlers/lifecycleHandlers.d.ts.map +1 -0
- package/lib/server/handlers/lifecycleHandlers.js +31 -0
- package/lib/server/handlers/lifecycleHandlers.js.map +1 -0
- package/lib/server/handlers/queryHandlers.d.ts +27 -0
- package/lib/server/handlers/queryHandlers.d.ts.map +1 -0
- package/lib/server/handlers/queryHandlers.js +135 -0
- package/lib/server/handlers/queryHandlers.js.map +1 -0
- package/lib/server/handlers/statsHandlers.d.ts +17 -0
- package/lib/server/handlers/statsHandlers.d.ts.map +1 -0
- package/lib/server/handlers/statsHandlers.js +81 -0
- package/lib/server/handlers/statsHandlers.js.map +1 -0
- package/lib/server/index.d.ts +4 -0
- package/lib/server/index.d.ts.map +1 -0
- package/lib/server/index.js +23 -0
- package/lib/server/index.js.map +1 -0
- package/lib/storage/FileStorage.d.ts +92 -0
- package/lib/storage/FileStorage.d.ts.map +1 -0
- package/lib/storage/FileStorage.js +3 -0
- package/lib/storage/FileStorage.js.map +1 -0
- package/lib/storage/FileStorageUtilities.d.ts +36 -0
- package/lib/storage/FileStorageUtilities.d.ts.map +1 -0
- package/lib/storage/FileStorageUtilities.js +91 -0
- package/lib/storage/FileStorageUtilities.js.map +1 -0
- package/lib/storage/FileStorageUtilities.spec.d.ts +2 -0
- package/lib/storage/FileStorageUtilities.spec.d.ts.map +1 -0
- package/lib/storage/FileStorageUtilities.spec.js +98 -0
- package/lib/storage/FileStorageUtilities.spec.js.map +1 -0
- package/lib/storage/FileType.d.ts +29 -0
- package/lib/storage/FileType.d.ts.map +1 -0
- package/lib/storage/FileType.js +38 -0
- package/lib/storage/FileType.js.map +1 -0
- package/lib/storage/IndexedDBStorage.d.ts +47 -0
- package/lib/storage/IndexedDBStorage.d.ts.map +1 -0
- package/lib/storage/IndexedDBStorage.js +347 -0
- package/lib/storage/IndexedDBStorage.js.map +1 -0
- package/lib/storage/LocalFileStorage.browser.d.ts +19 -0
- package/lib/storage/LocalFileStorage.browser.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.browser.js +43 -0
- package/lib/storage/LocalFileStorage.browser.js.map +1 -0
- package/lib/storage/LocalFileStorage.d.ts +23 -0
- package/lib/storage/LocalFileStorage.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.js +152 -0
- package/lib/storage/LocalFileStorage.js.map +1 -0
- package/lib/storage/LocalFileStorage.spec.d.ts +2 -0
- package/lib/storage/LocalFileStorage.spec.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.spec.js +249 -0
- package/lib/storage/LocalFileStorage.spec.js.map +1 -0
- package/lib/storage/VirtualFileStorage.d.ts +18 -0
- package/lib/storage/VirtualFileStorage.d.ts.map +1 -0
- package/lib/storage/VirtualFileStorage.js +178 -0
- package/lib/storage/VirtualFileStorage.js.map +1 -0
- package/lib/storage/VirtualFileStorage.spec.d.ts +2 -0
- package/lib/storage/VirtualFileStorage.spec.d.ts.map +1 -0
- package/lib/storage/VirtualFileStorage.spec.js +302 -0
- package/lib/storage/VirtualFileStorage.spec.js.map +1 -0
- package/lib/storage/index.d.ts +6 -0
- package/lib/storage/index.d.ts.map +1 -0
- package/lib/storage/index.js +22 -0
- package/lib/storage/index.js.map +1 -0
- package/lib/templates/templates/csharp/README.md +48 -0
- package/lib/templates/templates/csharp/VectraClient.cs +234 -0
- package/lib/templates/templates/go/README.md +71 -0
- package/lib/templates/templates/go/vectra_client.go +322 -0
- package/lib/templates/templates/java/README.md +81 -0
- package/lib/templates/templates/java/VectraClient.java +232 -0
- package/lib/templates/templates/python/README.md +37 -0
- package/lib/templates/templates/python/vectra_client.py +279 -0
- package/lib/templates/templates/rust/Cargo.toml +14 -0
- package/lib/templates/templates/rust/README.md +39 -0
- package/lib/templates/templates/rust/build.rs +4 -0
- package/lib/templates/templates/rust/lib.rs +284 -0
- package/lib/templates/templates/typescript/README.md +96 -0
- package/lib/templates/templates/typescript/VectraClient.ts +374 -0
- package/lib/templates/typescript/VectraClient.d.ts +114 -0
- package/lib/templates/typescript/VectraClient.d.ts.map +1 -0
- package/lib/templates/typescript/VectraClient.js +328 -0
- package/lib/templates/typescript/VectraClient.js.map +1 -0
- package/lib/types.d.ts +153 -0
- package/lib/types.d.ts.map +1 -0
- package/lib/types.js +3 -0
- package/lib/types.js.map +1 -0
- package/lib/utils/index.d.ts +2 -0
- package/lib/utils/index.d.ts.map +1 -0
- package/lib/utils/index.js +18 -0
- package/lib/utils/index.js.map +1 -0
- package/lib/utils/pathUtils.d.ts +40 -0
- package/lib/utils/pathUtils.d.ts.map +1 -0
- package/lib/utils/pathUtils.js +98 -0
- package/lib/utils/pathUtils.js.map +1 -0
- package/lib/vectra-cli.d.ts +2 -0
- package/lib/vectra-cli.d.ts.map +1 -1
- package/lib/vectra-cli.generate.spec.d.ts +2 -0
- package/lib/vectra-cli.generate.spec.d.ts.map +1 -0
- package/lib/vectra-cli.generate.spec.js +112 -0
- package/lib/vectra-cli.generate.spec.js.map +1 -0
- package/lib/vectra-cli.js +760 -0
- package/lib/vectra-cli.js.map +1 -0
- package/lib/vectra-cli.spec.d.ts +1 -0
- package/lib/vectra-cli.spec.d.ts.map +1 -0
- package/lib/vectra-cli.spec.js +2 -0
- package/lib/vectra-cli.spec.js.map +1 -0
- package/package.json +91 -16
- package/proto/vectra_service.proto +276 -0
- package/src/BrowserWebFetcher.ts +345 -0
- package/src/FileFetcher.spec.ts +234 -0
- package/src/FileFetcher.ts +37 -25
- package/src/FolderWatcher.spec.ts +288 -0
- package/src/FolderWatcher.ts +304 -0
- package/src/GPT3Tokenizer.spec.ts +50 -0
- package/src/ItemSelector.spec.ts +252 -0
- package/src/ItemSelector.ts +163 -150
- package/src/LocalDocument.spec.ts +211 -0
- package/src/LocalDocument.ts +88 -94
- package/src/LocalDocumentIndex.spec.ts +481 -0
- package/src/LocalDocumentIndex.ts +39 -40
- package/src/LocalDocumentResult.spec.ts +373 -0
- package/src/LocalDocumentResult.ts +489 -319
- package/src/LocalEmbeddings.spec.ts +138 -0
- package/src/LocalEmbeddings.ts +120 -0
- package/src/LocalIndex.spec.ts +808 -66
- package/src/LocalIndex.ts +479 -429
- package/src/OpenAIEmbeddings.spec.ts +354 -0
- package/src/OpenAIEmbeddings.ts +26 -27
- package/src/TextSplitter.spec.ts +342 -0
- package/src/TextSplitter.ts +517 -532
- package/src/TransformersEmbeddings.spec.ts +188 -0
- package/src/TransformersEmbeddings.ts +232 -0
- package/src/TransformersTokenizer.spec.ts +143 -0
- package/src/TransformersTokenizer.ts +45 -0
- package/src/WebFetcher.spec.ts +288 -0
- package/src/WebFetcher.ts +184 -186
- package/src/browser.ts +69 -0
- package/src/codecs/IndexCodec.ts +40 -0
- package/src/codecs/JsonCodec.spec.ts +70 -0
- package/src/codecs/JsonCodec.ts +37 -0
- package/src/codecs/LocalIndex.protobuf.spec.ts +115 -0
- package/src/codecs/ProtobufCodec.spec.ts +166 -0
- package/src/codecs/ProtobufCodec.ts +193 -0
- package/src/codecs/index.ts +4 -0
- package/src/codecs/migrateIndex.spec.ts +176 -0
- package/src/codecs/migrateIndex.ts +125 -0
- package/src/codecs/schemas/index.proto +34 -0
- package/src/index.ts +9 -1
- package/src/internals/Colorize.ts +19 -16
- package/src/server/IndexManager.ts +243 -0
- package/src/server/VectraServer.spec.ts +303 -0
- package/src/server/VectraServer.ts +156 -0
- package/src/server/handlers/documentHandlers.ts +59 -0
- package/src/server/handlers/helpers.ts +93 -0
- package/src/server/handlers/index.ts +7 -0
- package/src/server/handlers/indexHandlers.ts +44 -0
- package/src/server/handlers/itemHandlers.ts +140 -0
- package/src/server/handlers/lifecycleHandlers.ts +26 -0
- package/src/server/handlers/queryHandlers.ts +96 -0
- package/src/server/handlers/statsHandlers.ts +38 -0
- package/src/server/index.ts +3 -0
- package/src/storage/FileStorage.ts +105 -0
- package/src/storage/FileStorageUtilities.spec.ts +106 -0
- package/src/storage/FileStorageUtilities.ts +77 -0
- package/src/storage/FileType.ts +61 -0
- package/src/storage/IndexedDBStorage.ts +365 -0
- package/src/storage/LocalFileStorage.browser.ts +52 -0
- package/src/storage/LocalFileStorage.spec.ts +292 -0
- package/src/storage/LocalFileStorage.ts +98 -0
- package/src/storage/VirtualFileStorage.spec.ts +307 -0
- package/src/storage/VirtualFileStorage.ts +169 -0
- package/src/storage/index.ts +5 -0
- package/src/templates/csharp/README.md +48 -0
- package/src/templates/csharp/VectraClient.cs +234 -0
- package/src/templates/go/README.md +71 -0
- package/src/templates/go/vectra_client.go +322 -0
- package/src/templates/java/README.md +81 -0
- package/src/templates/java/VectraClient.java +232 -0
- package/src/templates/python/README.md +37 -0
- package/src/templates/python/vectra_client.py +279 -0
- package/src/templates/rust/Cargo.toml +14 -0
- package/src/templates/rust/README.md +39 -0
- package/src/templates/rust/build.rs +4 -0
- package/src/templates/rust/lib.rs +284 -0
- package/src/templates/typescript/README.md +96 -0
- package/src/templates/typescript/VectraClient.ts +374 -0
- package/src/types.ts +131 -123
- package/src/utils/index.ts +1 -0
- package/src/utils/pathUtils.ts +106 -0
- package/src/vectra-cli.generate.spec.ts +72 -0
- package/src/vectra-cli.spec.ts +0 -0
- package/src/vectra-cli.ts +687 -246
package/src/LocalIndex.spec.ts
CHANGED
|
@@ -1,66 +1,808 @@
|
|
|
1
|
-
import assert from 'node:assert'
|
|
2
|
-
import
|
|
3
|
-
import {
|
|
4
|
-
import
|
|
5
|
-
import
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
await
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
})
|
|
1
|
+
import assert from 'node:assert'
|
|
2
|
+
import sinon from 'sinon'
|
|
3
|
+
import { LocalIndex } from './LocalIndex'
|
|
4
|
+
import { IndexItem } from './types'
|
|
5
|
+
import fs from 'fs/promises'
|
|
6
|
+
import path from 'path'
|
|
7
|
+
import { VirtualFileStorage } from './storage'
|
|
8
|
+
import { LocalDocument } from './LocalDocument'
|
|
9
|
+
|
|
10
|
+
describe('LocalIndex', () => {
|
|
11
|
+
const testIndexDir = path.join(__dirname, 'test_index')
|
|
12
|
+
|
|
13
|
+
const basicIndexItems: Partial<IndexItem>[] = [
|
|
14
|
+
{ id: '1', vector: [1, 2, 3] },
|
|
15
|
+
{ id: '2', vector: [2, 3, 4] },
|
|
16
|
+
{ id: '3', vector: [3, 4, 5] }
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
beforeEach(async () => {
|
|
20
|
+
await fs.rm(testIndexDir, { recursive: true, force: true })
|
|
21
|
+
})
|
|
22
|
+
|
|
23
|
+
afterEach(async () => {
|
|
24
|
+
await fs.rm(testIndexDir, { recursive: true, force: true })
|
|
25
|
+
sinon.restore()
|
|
26
|
+
})
|
|
27
|
+
|
|
28
|
+
it('should create a new index', async () => {
|
|
29
|
+
const index = new LocalIndex(testIndexDir)
|
|
30
|
+
await index.createIndex()
|
|
31
|
+
const created = await index.isIndexCreated()
|
|
32
|
+
assert.equal(created, true)
|
|
33
|
+
assert.equal(index.folderPath, testIndexDir)
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
it('exposes getters indexName and storage', async () => {
|
|
37
|
+
const index = new LocalIndex(testIndexDir)
|
|
38
|
+
assert.equal(index.indexName, 'index.json')
|
|
39
|
+
assert.ok(index.storage)
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
it('blocks concurrent operations when lock is held', async () => {
|
|
43
|
+
const index = new LocalIndex(testIndexDir)
|
|
44
|
+
await index.createIndex()
|
|
45
|
+
await index.beginUpdate()
|
|
46
|
+
await assert.rejects(async () => {
|
|
47
|
+
await index.beginUpdate()
|
|
48
|
+
}, new Error('Update already in progress'))
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
describe('createIndex', () => {
|
|
52
|
+
it('checks for existing index on creation', async () => {
|
|
53
|
+
const index = new LocalIndex(testIndexDir)
|
|
54
|
+
await index.createIndex()
|
|
55
|
+
|
|
56
|
+
await assert.rejects(async () => {
|
|
57
|
+
await index.createIndex()
|
|
58
|
+
}, new Error('Index already exists'))
|
|
59
|
+
|
|
60
|
+
await index.insertItem({ id: '1', vector: [1, 2, 3] })
|
|
61
|
+
const lengthBefore = (await index.listItems()).length
|
|
62
|
+
assert.equal(lengthBefore, 1)
|
|
63
|
+
await index.createIndex({ deleteIfExists: true, version: 2, metadata_config: {} })
|
|
64
|
+
const lengthAfter = (await index.listItems()).length
|
|
65
|
+
assert.equal(lengthAfter, 0)
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
it('delete index if file creation fails', async () => {
|
|
69
|
+
const index = new LocalIndex(testIndexDir)
|
|
70
|
+
sinon.stub(fs, 'writeFile').rejects(new Error('fs error'))
|
|
71
|
+
|
|
72
|
+
await assert.rejects(async () => {
|
|
73
|
+
await index.createIndex()
|
|
74
|
+
}, new Error('Error creating index'))
|
|
75
|
+
|
|
76
|
+
await assert.rejects(async () => {
|
|
77
|
+
await index.listItems()
|
|
78
|
+
})
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
it('persists version and metadata_config', async () => {
|
|
82
|
+
const index = new LocalIndex(testIndexDir)
|
|
83
|
+
await index.createIndex({ deleteIfExists: true, version: 2, metadata_config: { indexed: [] } })
|
|
84
|
+
|
|
85
|
+
const stats = await index.getIndexStats()
|
|
86
|
+
assert.deepStrictEqual(stats, { version: 2, metadata_config: { indexed: [] }, items: 0 })
|
|
87
|
+
})
|
|
88
|
+
})
|
|
89
|
+
|
|
90
|
+
describe('deleteIndex and load guard', () => {
|
|
91
|
+
it('listItems before createIndex rejects with not-exist error', async () => {
|
|
92
|
+
const index = new LocalIndex(testIndexDir)
|
|
93
|
+
await assert.rejects(async () => index.listItems(), new Error('Index does not exist'))
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
it('deleteIndex removes the index and prevents further reads', async () => {
|
|
97
|
+
const index = new LocalIndex(testIndexDir)
|
|
98
|
+
await index.createIndex()
|
|
99
|
+
await index.insertItem({ id: 'a', vector: [1] })
|
|
100
|
+
|
|
101
|
+
assert.equal(await index.isIndexCreated(), true)
|
|
102
|
+
await index.deleteIndex()
|
|
103
|
+
|
|
104
|
+
assert.equal(await index.isIndexCreated(), false)
|
|
105
|
+
await assert.rejects(async () => index.listItems(), new Error('Index does not exist'))
|
|
106
|
+
})
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
describe('insert/upsert validation and behavior', () => {
|
|
110
|
+
it('insertItem validation: requires vector', async () => {
|
|
111
|
+
const index = new LocalIndex(testIndexDir)
|
|
112
|
+
await index.createIndex()
|
|
113
|
+
await index.beginUpdate()
|
|
114
|
+
await assert.rejects(
|
|
115
|
+
async () => index.insertItem({ id: 'x' } as any),
|
|
116
|
+
new Error('Vector is required')
|
|
117
|
+
)
|
|
118
|
+
await index.cancelUpdate()
|
|
119
|
+
})
|
|
120
|
+
|
|
121
|
+
it('upsertItem inserts when not existing', async () => {
|
|
122
|
+
const index = new LocalIndex(testIndexDir)
|
|
123
|
+
await index.createIndex()
|
|
124
|
+
|
|
125
|
+
const inserted = await index.upsertItem({ id: 'up', vector: [9, 9] })
|
|
126
|
+
assert.equal(inserted.id, 'up')
|
|
127
|
+
assert.deepStrictEqual(inserted.vector, [9, 9])
|
|
128
|
+
|
|
129
|
+
const all = await index.listItems()
|
|
130
|
+
assert.equal(all.length, 1)
|
|
131
|
+
assert.equal(all[0].id, 'up')
|
|
132
|
+
})
|
|
133
|
+
|
|
134
|
+
it('upsertItem replaces existing item contents', async () => {
|
|
135
|
+
const index = new LocalIndex(testIndexDir)
|
|
136
|
+
await index.createIndex()
|
|
137
|
+
|
|
138
|
+
await index.insertItem({ id: 'same', vector: [1], metadata: { a: 1 } })
|
|
139
|
+
const updated = await index.upsertItem({ id: 'same', vector: [2], metadata: { a: 2 } })
|
|
140
|
+
|
|
141
|
+
assert.equal(updated.id, 'same')
|
|
142
|
+
assert.deepStrictEqual(updated.vector, [2])
|
|
143
|
+
assert.deepStrictEqual(updated.metadata, { a: 2 })
|
|
144
|
+
|
|
145
|
+
const reread = await index.getItem('same')
|
|
146
|
+
assert.deepStrictEqual(reread?.vector, [2])
|
|
147
|
+
assert.deepStrictEqual(reread?.metadata, { a: 2 })
|
|
148
|
+
})
|
|
149
|
+
|
|
150
|
+
it('auto-generates id when not provided and does not create metadataFile with no metadata even if indexed config exists', async () => {
|
|
151
|
+
const index = new LocalIndex(testIndexDir)
|
|
152
|
+
await index.createIndex({ version: 1, metadata_config: { indexed: ['keep'] } })
|
|
153
|
+
|
|
154
|
+
const inserted = await index.insertItem({ vector: [1, 2, 3] } as any)
|
|
155
|
+
assert.ok(inserted.id && typeof inserted.id === 'string')
|
|
156
|
+
|
|
157
|
+
const reread = await index.getItem(inserted.id)
|
|
158
|
+
assert.deepStrictEqual(reread?.metadata, {})
|
|
159
|
+
assert.equal((reread as any).metadataFile, undefined)
|
|
160
|
+
})
|
|
161
|
+
})
|
|
162
|
+
|
|
163
|
+
describe('cancelUpdate', () => {
|
|
164
|
+
it('discards staged changes', async () => {
|
|
165
|
+
const index = new LocalIndex(testIndexDir)
|
|
166
|
+
await index.createIndex()
|
|
167
|
+
|
|
168
|
+
await index.beginUpdate()
|
|
169
|
+
await index.insertItem({ id: 'temp', vector: [1, 1, 1] })
|
|
170
|
+
index.cancelUpdate()
|
|
171
|
+
|
|
172
|
+
const items = await index.listItems()
|
|
173
|
+
assert.deepStrictEqual(items, [])
|
|
174
|
+
})
|
|
175
|
+
})
|
|
176
|
+
|
|
177
|
+
describe('isIndexCreated transitions', () => {
|
|
178
|
+
it('reports false -> true -> false across lifecycle', async () => {
|
|
179
|
+
const index = new LocalIndex(testIndexDir)
|
|
180
|
+
assert.equal(await index.isIndexCreated(), false)
|
|
181
|
+
|
|
182
|
+
await index.createIndex()
|
|
183
|
+
assert.equal(await index.isIndexCreated(), true)
|
|
184
|
+
|
|
185
|
+
await index.deleteIndex()
|
|
186
|
+
assert.equal(await index.isIndexCreated(), false)
|
|
187
|
+
})
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
describe('deleteItem', () => {
|
|
191
|
+
it('does nothing when id not found', async () => {
|
|
192
|
+
const index = new LocalIndex(testIndexDir)
|
|
193
|
+
await index.createIndex()
|
|
194
|
+
await index.beginUpdate()
|
|
195
|
+
await index.insertItem(basicIndexItems[0])
|
|
196
|
+
await index.insertItem(basicIndexItems[1])
|
|
197
|
+
await index.insertItem(basicIndexItems[2])
|
|
198
|
+
await index.endUpdate()
|
|
199
|
+
|
|
200
|
+
await assert.doesNotReject(async () => {
|
|
201
|
+
await index.deleteItem('dne')
|
|
202
|
+
})
|
|
203
|
+
assert.equal((await index.listItems()).length, 3)
|
|
204
|
+
})
|
|
205
|
+
|
|
206
|
+
it('leaves existing empty index when last el deleted', async () => {
|
|
207
|
+
const index = new LocalIndex(testIndexDir)
|
|
208
|
+
await index.createIndex()
|
|
209
|
+
await index.insertItem(basicIndexItems[0])
|
|
210
|
+
|
|
211
|
+
await index.deleteItem(basicIndexItems[0].id ?? '')
|
|
212
|
+
assert.equal(await index.isIndexCreated(), true)
|
|
213
|
+
assert.equal((await index.listItems()).length, 0)
|
|
214
|
+
})
|
|
215
|
+
|
|
216
|
+
it('removes elements from any position', async () => {
|
|
217
|
+
const index = new LocalIndex(testIndexDir)
|
|
218
|
+
await index.createIndex()
|
|
219
|
+
await index.batchInsertItems([
|
|
220
|
+
{ id: '1', vector: [] },
|
|
221
|
+
{ id: '2', vector: [] },
|
|
222
|
+
{ id: '3', vector: [] },
|
|
223
|
+
{ id: '4', vector: [] },
|
|
224
|
+
{ id: '5', vector: [] }
|
|
225
|
+
])
|
|
226
|
+
|
|
227
|
+
await index.beginUpdate()
|
|
228
|
+
await index.deleteItem('1')
|
|
229
|
+
await index.deleteItem('3')
|
|
230
|
+
await index.deleteItem('5')
|
|
231
|
+
await index.endUpdate()
|
|
232
|
+
|
|
233
|
+
assert.deepStrictEqual(await index.listItems(), [
|
|
234
|
+
{ id: '2', vector: [], metadata: {}, norm: 0 },
|
|
235
|
+
{ id: '4', vector: [], metadata: {}, norm: 0 }
|
|
236
|
+
])
|
|
237
|
+
})
|
|
238
|
+
|
|
239
|
+
it('no-op delete while update active when id missing', async () => {
|
|
240
|
+
const index = new LocalIndex(testIndexDir)
|
|
241
|
+
await index.createIndex()
|
|
242
|
+
await index.batchInsertItems([
|
|
243
|
+
{ id: '1', vector: [1] },
|
|
244
|
+
{ id: '2', vector: [2] }
|
|
245
|
+
])
|
|
246
|
+
|
|
247
|
+
await index.beginUpdate()
|
|
248
|
+
await assert.doesNotReject(async () => index.deleteItem('missing'))
|
|
249
|
+
await index.endUpdate()
|
|
250
|
+
|
|
251
|
+
const items = await index.listItems()
|
|
252
|
+
assert.deepStrictEqual(items.map(i => i.id), ['1', '2'])
|
|
253
|
+
})
|
|
254
|
+
})
|
|
255
|
+
|
|
256
|
+
describe('endUpdate', () => {
|
|
257
|
+
it('throws an error if no update has begun', async () => {
|
|
258
|
+
const index = new LocalIndex(testIndexDir)
|
|
259
|
+
|
|
260
|
+
await assert.rejects(async () => {
|
|
261
|
+
await index.endUpdate()
|
|
262
|
+
}, new Error('No update in progress'))
|
|
263
|
+
})
|
|
264
|
+
|
|
265
|
+
it('throws an error if the index could not be saved', async () => {
|
|
266
|
+
const index = new LocalIndex(testIndexDir)
|
|
267
|
+
await index.createIndex()
|
|
268
|
+
await index.beginUpdate()
|
|
269
|
+
|
|
270
|
+
sinon.stub(fs, 'writeFile').rejects(new Error('fs error'))
|
|
271
|
+
|
|
272
|
+
await assert.rejects(async () => {
|
|
273
|
+
await index.endUpdate()
|
|
274
|
+
}, new Error('Error saving index: Error: fs error'))
|
|
275
|
+
})
|
|
276
|
+
})
|
|
277
|
+
|
|
278
|
+
describe('getIndexStats', () => {
|
|
279
|
+
it('reports empty index correctly', async () => {
|
|
280
|
+
const index = new LocalIndex(testIndexDir)
|
|
281
|
+
await index.createIndex()
|
|
282
|
+
|
|
283
|
+
assert.deepStrictEqual(await index.getIndexStats(), {
|
|
284
|
+
version: 1,
|
|
285
|
+
metadata_config: {},
|
|
286
|
+
items: 0
|
|
287
|
+
})
|
|
288
|
+
})
|
|
289
|
+
|
|
290
|
+
it('correctly reports non-empty index stats', async () => {
|
|
291
|
+
const index = new LocalIndex(testIndexDir)
|
|
292
|
+
await index.createIndex({ version: 1, metadata_config: { indexed: [] } })
|
|
293
|
+
await index.batchInsertItems(basicIndexItems)
|
|
294
|
+
|
|
295
|
+
assert.deepStrictEqual(await index.getIndexStats(), {
|
|
296
|
+
version: 1,
|
|
297
|
+
metadata_config: { indexed: [] },
|
|
298
|
+
items: 3
|
|
299
|
+
})
|
|
300
|
+
})
|
|
301
|
+
})
|
|
302
|
+
|
|
303
|
+
describe('getItem', () => {
|
|
304
|
+
it('returns undefined when item not found', async () => {
|
|
305
|
+
const index = new LocalIndex(testIndexDir)
|
|
306
|
+
await index.createIndex()
|
|
307
|
+
|
|
308
|
+
assert.equal(await index.getItem('1'), undefined)
|
|
309
|
+
})
|
|
310
|
+
|
|
311
|
+
it('returns requested item', async () => {
|
|
312
|
+
const index = new LocalIndex(testIndexDir)
|
|
313
|
+
await index.createIndex()
|
|
314
|
+
await index.batchInsertItems(basicIndexItems)
|
|
315
|
+
|
|
316
|
+
const item2 = await index.getItem('2')
|
|
317
|
+
assert.equal(item2?.id, basicIndexItems[1].id)
|
|
318
|
+
assert.deepStrictEqual(item2?.vector, basicIndexItems[1].vector)
|
|
319
|
+
assert.equal((await index.listItems()).length, 3)
|
|
320
|
+
})
|
|
321
|
+
})
|
|
322
|
+
|
|
323
|
+
describe('batchInsertItems', () => {
|
|
324
|
+
it('should insert provided items', async () => {
|
|
325
|
+
const index = new LocalIndex(testIndexDir)
|
|
326
|
+
await index.createIndex()
|
|
327
|
+
|
|
328
|
+
const newItems = await index.batchInsertItems(basicIndexItems)
|
|
329
|
+
|
|
330
|
+
assert.equal(newItems.length, 3)
|
|
331
|
+
|
|
332
|
+
const retrievedItems = await index.listItems()
|
|
333
|
+
assert.equal(retrievedItems.length, 3)
|
|
334
|
+
})
|
|
335
|
+
|
|
336
|
+
it('on id collision - cancel batch insert & bubble up error', async () => {
|
|
337
|
+
const index = new LocalIndex(testIndexDir)
|
|
338
|
+
await index.createIndex()
|
|
339
|
+
|
|
340
|
+
await index.insertItem({ id: '2', vector: [9, 9, 9] })
|
|
341
|
+
|
|
342
|
+
await assert.rejects(
|
|
343
|
+
async () => {
|
|
344
|
+
await index.batchInsertItems(basicIndexItems)
|
|
345
|
+
},
|
|
346
|
+
{
|
|
347
|
+
name: 'Error',
|
|
348
|
+
message: 'Item with id 2 already exists'
|
|
349
|
+
}
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
const storedItems = await index.listItems()
|
|
353
|
+
assert.equal(storedItems.length, 1)
|
|
354
|
+
})
|
|
355
|
+
})
|
|
356
|
+
|
|
357
|
+
describe('listItemsByMetadata', () => {
|
|
358
|
+
it('returns items matching metadata filter', async () => {
|
|
359
|
+
const index = new LocalIndex(testIndexDir)
|
|
360
|
+
await index.createIndex()
|
|
361
|
+
await index.batchInsertItems([
|
|
362
|
+
{ id: '1', vector: [], metadata: { category: 'food' } },
|
|
363
|
+
{ id: '2', vector: [], metadata: { category: 'food' } },
|
|
364
|
+
{ id: '3', vector: [], metadata: { category: 'electronics' } },
|
|
365
|
+
{ id: '4', vector: [], metadata: { category: 'drink' } },
|
|
366
|
+
{ id: '5', vector: [], metadata: { category: 'food' } }
|
|
367
|
+
])
|
|
368
|
+
|
|
369
|
+
const foodItems = await index.listItemsByMetadata({ category: { $eq: 'food' } })
|
|
370
|
+
assert.deepStrictEqual(foodItems.map(item => item.id), ['1', '2', '5'])
|
|
371
|
+
const drinkItems = await index.listItemsByMetadata({ category: { $eq: 'drink' } })
|
|
372
|
+
assert.deepStrictEqual(drinkItems.map(item => item.id), ['4'])
|
|
373
|
+
const clothingItems = await index.listItemsByMetadata({ category: { $eq: 'clothes' } })
|
|
374
|
+
assert.deepStrictEqual(clothingItems, [])
|
|
375
|
+
})
|
|
376
|
+
|
|
377
|
+
it('returns nothing when no items in index', async () => {
|
|
378
|
+
const index = new LocalIndex(testIndexDir)
|
|
379
|
+
await index.createIndex()
|
|
380
|
+
|
|
381
|
+
const items = await index.listItemsByMetadata({})
|
|
382
|
+
assert.deepStrictEqual(items, [])
|
|
383
|
+
})
|
|
384
|
+
|
|
385
|
+
it('empty filter {} with items returns all', async () => {
|
|
386
|
+
const index = new LocalIndex(testIndexDir)
|
|
387
|
+
await index.createIndex()
|
|
388
|
+
await index.batchInsertItems([
|
|
389
|
+
{ id: 'a', vector: [1] },
|
|
390
|
+
{ id: 'b', vector: [2] }
|
|
391
|
+
])
|
|
392
|
+
|
|
393
|
+
const items = await index.listItemsByMetadata({})
|
|
394
|
+
assert.deepStrictEqual(items.map(i => i.id), ['a', 'b'])
|
|
395
|
+
})
|
|
396
|
+
})
|
|
397
|
+
|
|
398
|
+
describe('metadata indexing behavior', () => {
|
|
399
|
+
it('stores only indexed metadata in index and writes full metadata to external file', async () => {
|
|
400
|
+
const index = new LocalIndex(testIndexDir)
|
|
401
|
+
await index.createIndex({ version: 1, metadata_config: { indexed: ['keep'] } })
|
|
402
|
+
|
|
403
|
+
const realWriteFile = fs.writeFile.bind(fs)
|
|
404
|
+
const writes: { file: string | Buffer | URL; data: any }[] = []
|
|
405
|
+
sinon.stub(fs, 'writeFile').callsFake(async (file: any, data: any, options?: any) => {
|
|
406
|
+
writes.push({ file, data })
|
|
407
|
+
return realWriteFile(file, data, options as any)
|
|
408
|
+
})
|
|
409
|
+
|
|
410
|
+
await index.insertItem({ id: 'm1', vector: [1], metadata: { keep: 'x', drop: 'y' } })
|
|
411
|
+
|
|
412
|
+
const items = await index.listItems()
|
|
413
|
+
const stored = items.find(i => i.id === 'm1')!
|
|
414
|
+
assert.deepStrictEqual(stored.metadata, { keep: 'x' })
|
|
415
|
+
assert.ok((stored as any).metadataFile)
|
|
416
|
+
|
|
417
|
+
const textPayloads = writes
|
|
418
|
+
.map(w => (typeof w.data === 'string' ? w.data : w.data?.toString?.()))
|
|
419
|
+
.filter(Boolean) as string[]
|
|
420
|
+
|
|
421
|
+
const hasFullMetadata = textPayloads.some(t => {
|
|
422
|
+
try {
|
|
423
|
+
const obj = JSON.parse(t)
|
|
424
|
+
return obj && obj.keep === 'x' && obj.drop === 'y'
|
|
425
|
+
} catch {
|
|
426
|
+
return false
|
|
427
|
+
}
|
|
428
|
+
})
|
|
429
|
+
assert.equal(hasFullMetadata, true)
|
|
430
|
+
})
|
|
431
|
+
|
|
432
|
+
it('does not write external file when metadata only contains indexed keys', async () => {
|
|
433
|
+
const index = new LocalIndex(testIndexDir)
|
|
434
|
+
await index.createIndex({ version: 1, metadata_config: { indexed: ['only'] } })
|
|
435
|
+
|
|
436
|
+
await index.insertItem({ id: 'm2', vector: [1], metadata: { only: 'ok' } })
|
|
437
|
+
const item = await index.getItem('m2')
|
|
438
|
+
|
|
439
|
+
assert.deepStrictEqual(item?.metadata, { only: 'ok' })
|
|
440
|
+
assert.equal((item as any).metadataFile, undefined)
|
|
441
|
+
})
|
|
442
|
+
})
|
|
443
|
+
|
|
444
|
+
describe('queryItems', () => {
|
|
445
|
+
it('upsertItem uses the in-progress update path when _update is present', async () => {
|
|
446
|
+
const index = new LocalIndex(testIndexDir)
|
|
447
|
+
await index.createIndex()
|
|
448
|
+
|
|
449
|
+
await index.beginUpdate()
|
|
450
|
+
// goes through the if (this._update) early return branch in upsertItem
|
|
451
|
+
const up = await index.upsertItem({ id: 'inplace', vector: [7, 7], metadata: { k: 'v' } })
|
|
452
|
+
// not persisted until endUpdate
|
|
453
|
+
let items = await index.listItems()
|
|
454
|
+
assert.equal(items.length, 0)
|
|
455
|
+
|
|
456
|
+
await index.endUpdate()
|
|
457
|
+
items = await index.listItems()
|
|
458
|
+
assert.equal(items.length, 1)
|
|
459
|
+
assert.equal(items[0].id, 'inplace')
|
|
460
|
+
assert.deepStrictEqual(items[0].metadata, { k: 'v' })
|
|
461
|
+
})
|
|
462
|
+
|
|
463
|
+
it('with no indexed config, full metadata is stored inline and no metadataFile is written', async () => {
|
|
464
|
+
const index = new LocalIndex(testIndexDir)
|
|
465
|
+
await index.createIndex({ version: 1 /* metadata_config undefined => inline */ })
|
|
466
|
+
|
|
467
|
+
await index.insertItem({ id: 'fullMd', vector: [1], metadata: { a: 1, b: 2 } })
|
|
468
|
+
const item = await index.getItem('fullMd')
|
|
469
|
+
assert.deepStrictEqual(item?.metadata, { a: 1, b: 2 })
|
|
470
|
+
assert.equal((item as any).metadataFile, undefined)
|
|
471
|
+
})
|
|
472
|
+
|
|
473
|
+
it('BM25 uses the default docReader (LocalDocument.loadText) when not injected', async () => {
|
|
474
|
+
// stub LocalDocument.loadText so default docReader path runs without touching disk
|
|
475
|
+
const stub = sinon.stub(LocalDocument.prototype, 'loadText').resolves('Hello default docReader text')
|
|
476
|
+
const addDocSpy = sinon.spy()
|
|
477
|
+
const fakeEngine = {
|
|
478
|
+
defineConfig: sinon.stub(),
|
|
479
|
+
definePrepTasks: sinon.stub(),
|
|
480
|
+
addDoc: (...a: any[]) => addDocSpy(...a),
|
|
481
|
+
consolidate: sinon.stub(),
|
|
482
|
+
search: (_q: string) => []
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
const index = new LocalIndex(testIndexDir, undefined, undefined, undefined, { bm25Factory: () => fakeEngine /* no docReader injected */ })
|
|
486
|
+
await index.createIndex()
|
|
487
|
+
await index.batchInsertItems([
|
|
488
|
+
{ id: 'a', vector: [1, 0, 0], metadata: { documentId: 'docA', startPos: 0, endPos: 4 } },
|
|
489
|
+
{ id: 'b', vector: [0, 1, 0], metadata: { documentId: 'docB', startPos: 0, endPos: 4 } }
|
|
490
|
+
])
|
|
491
|
+
|
|
492
|
+
await index.queryItems([1, 0, 0], 'kw', 1, undefined, true)
|
|
493
|
+
assert.ok(addDocSpy.called) // default docReader loaded and chunks added
|
|
494
|
+
stub.restore()
|
|
495
|
+
})
|
|
496
|
+
|
|
497
|
+
it('executes the prepTask defined in setupBm25 to cover tokenization/negation pipeline', async () => {
|
|
498
|
+
let capturedTasks: ((text: string) => string[])[] = []
|
|
499
|
+
const fakeEngine = {
|
|
500
|
+
defineConfig: sinon.stub(),
|
|
501
|
+
definePrepTasks: (arr: any[]) => { capturedTasks = arr },
|
|
502
|
+
addDoc: sinon.stub(),
|
|
503
|
+
consolidate: sinon.stub(),
|
|
504
|
+
search: (_q: string) => []
|
|
505
|
+
}
|
|
506
|
+
const index = new LocalIndex(testIndexDir, undefined, undefined, undefined, { bm25Factory: () => fakeEngine, docReader: async () => 'text' })
|
|
507
|
+
await index.createIndex()
|
|
508
|
+
// trigger setupBm25 via a BM25 query
|
|
509
|
+
await index.batchInsertItems([{ id: 'x', vector: [1], metadata: { documentId: 'd', startPos: 0, endPos: 0 } }])
|
|
510
|
+
await index.queryItems([1], 'q', 1, undefined, true)
|
|
511
|
+
|
|
512
|
+
assert.ok(Array.isArray(capturedTasks) && capturedTasks.length === 1)
|
|
513
|
+
const tokens = capturedTasks[0]("I don't like complicated tokens!")
|
|
514
|
+
// ensure pipeline ran: non-empty tokens, punctuation removed, possible negation handling
|
|
515
|
+
assert.ok(Array.isArray(tokens) && tokens.length > 0)
|
|
516
|
+
})
|
|
517
|
+
|
|
518
|
+
it('upsertItem uses the in-progress update path when _update is present', async () => {
|
|
519
|
+
const index = new LocalIndex(testIndexDir)
|
|
520
|
+
await index.createIndex()
|
|
521
|
+
|
|
522
|
+
await index.beginUpdate()
|
|
523
|
+
await index.upsertItem({ id: 'inplace', vector: [7, 7], metadata: { k: 'v' } })
|
|
524
|
+
|
|
525
|
+
// Not persisted yet because endUpdate not called
|
|
526
|
+
let items = await index.listItems()
|
|
527
|
+
assert.equal(items.length, 0)
|
|
528
|
+
|
|
529
|
+
await index.endUpdate()
|
|
530
|
+
items = await index.listItems()
|
|
531
|
+
assert.equal(items.length, 1)
|
|
532
|
+
assert.equal(items[0].id, 'inplace')
|
|
533
|
+
assert.deepStrictEqual(items[0].metadata, { k: 'v' })
|
|
534
|
+
})
|
|
535
|
+
|
|
536
|
+
it('with no indexed config, full metadata is stored inline and no metadataFile is written', async () => {
|
|
537
|
+
const index = new LocalIndex(testIndexDir)
|
|
538
|
+
await index.createIndex({ version: 1 }) // metadata_config undefined => inline
|
|
539
|
+
|
|
540
|
+
await index.insertItem({ id: 'fullMd', vector: [1], metadata: { a: 1, b: 2 } })
|
|
541
|
+
const item = await index.getItem('fullMd')
|
|
542
|
+
assert.deepStrictEqual(item?.metadata, { a: 1, b: 2 })
|
|
543
|
+
assert.equal((item as any).metadataFile, undefined)
|
|
544
|
+
})
|
|
545
|
+
|
|
546
|
+
it('BM25 uses the default docReader (LocalDocument.loadText) when not injected', async () => {
|
|
547
|
+
const stub = sinon.stub(LocalDocument.prototype, 'loadText').resolves('Hello default docReader text')
|
|
548
|
+
const addDocSpy = sinon.spy()
|
|
549
|
+
const fakeEngine = {
|
|
550
|
+
defineConfig: sinon.stub(),
|
|
551
|
+
definePrepTasks: sinon.stub(),
|
|
552
|
+
addDoc: (...a: any[]) => addDocSpy(...a),
|
|
553
|
+
consolidate: sinon.stub(),
|
|
554
|
+
search: (_q: string) => []
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
const index = new LocalIndex(testIndexDir, undefined, undefined, undefined, { bm25Factory: () => fakeEngine })
|
|
558
|
+
await index.createIndex()
|
|
559
|
+
await index.batchInsertItems([
|
|
560
|
+
{ id: 'a', vector: [1, 0, 0], metadata: { documentId: 'docA', startPos: 0, endPos: 4 } },
|
|
561
|
+
{ id: 'b', vector: [0, 1, 0], metadata: { documentId: 'docB', startPos: 0, endPos: 4 } }
|
|
562
|
+
])
|
|
563
|
+
|
|
564
|
+
await index.queryItems([1, 0, 0], 'kw', 1, undefined, true)
|
|
565
|
+
assert.ok(addDocSpy.called)
|
|
566
|
+
|
|
567
|
+
stub.restore()
|
|
568
|
+
})
|
|
569
|
+
|
|
570
|
+
it('executes the prepTask defined in setupBm25 to cover tokenization/negation pipeline', async () => {
|
|
571
|
+
let capturedTasks: ((text: string) => string[])[] = []
|
|
572
|
+
const fakeEngine = {
|
|
573
|
+
defineConfig: sinon.stub(),
|
|
574
|
+
definePrepTasks: (arr: any[]) => { capturedTasks = arr },
|
|
575
|
+
addDoc: sinon.stub(),
|
|
576
|
+
consolidate: sinon.stub(),
|
|
577
|
+
search: (_q: string) => []
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
const index = new LocalIndex(testIndexDir, undefined, undefined, undefined, { bm25Factory: () => fakeEngine, docReader: async () => 'text' })
|
|
581
|
+
await index.createIndex()
|
|
582
|
+
await index.batchInsertItems([{ id: 'x', vector: [1], metadata: { documentId: 'd', startPos: 0, endPos: 0 } }])
|
|
583
|
+
await index.queryItems([1], 'q', 1, undefined, true)
|
|
584
|
+
|
|
585
|
+
assert.ok(Array.isArray(capturedTasks) && capturedTasks.length === 1)
|
|
586
|
+
const tokens = capturedTasks[0]("I don't like complicated tokens!")
|
|
587
|
+
assert.ok(Array.isArray(tokens) && tokens.length > 0)
|
|
588
|
+
})
|
|
589
|
+
|
|
590
|
+
it('returns empty array on empty index search', async () => {
|
|
591
|
+
const index = new LocalIndex(testIndexDir)
|
|
592
|
+
await index.createIndex()
|
|
593
|
+
|
|
594
|
+
const result = await index.queryItems([1, 2, 3], '', 10)
|
|
595
|
+
assert.deepStrictEqual(result, [])
|
|
596
|
+
})
|
|
597
|
+
|
|
598
|
+
it('returns bad match when no better match exists', async () => {
|
|
599
|
+
const index = new LocalIndex(testIndexDir)
|
|
600
|
+
await index.createIndex()
|
|
601
|
+
await index.insertItem({ id: '1', vector: [0.9, 0, 0, 0, 0] })
|
|
602
|
+
|
|
603
|
+
const result = await index.queryItems([0, 0, 0, 0, 0.1], '', 1)
|
|
604
|
+
assert.equal(result[0]?.score, 0)
|
|
605
|
+
assert.equal(result[0]?.item.id, '1')
|
|
606
|
+
})
|
|
607
|
+
|
|
608
|
+
it('returns all vectors when fewer than topK exist', async () => {
|
|
609
|
+
const index = new LocalIndex(testIndexDir)
|
|
610
|
+
await index.createIndex()
|
|
611
|
+
await index.batchInsertItems(basicIndexItems)
|
|
612
|
+
|
|
613
|
+
const result = await index.queryItems([0, 0, 1], '', 10)
|
|
614
|
+
assert.equal(result.length, 3)
|
|
615
|
+
assert.deepStrictEqual(
|
|
616
|
+
result.map(({ item }) => item.id),
|
|
617
|
+
basicIndexItems.map(item => item.id)
|
|
618
|
+
)
|
|
619
|
+
})
|
|
620
|
+
|
|
621
|
+
it('limits results to topK when more items exist', async () => {
|
|
622
|
+
const index = new LocalIndex(testIndexDir)
|
|
623
|
+
await index.createIndex()
|
|
624
|
+
await index.batchInsertItems([
|
|
625
|
+
{ id: 'a', vector: [1, 0, 0] },
|
|
626
|
+
{ id: 'b', vector: [0, 1, 0] },
|
|
627
|
+
{ id: 'c', vector: [0, 0, 1] },
|
|
628
|
+
{ id: 'd', vector: [1, 1, 0] },
|
|
629
|
+
{ id: 'e', vector: [0, 1, 1] }
|
|
630
|
+
])
|
|
631
|
+
|
|
632
|
+
const result = await index.queryItems([1, 0, 0], '', 2)
|
|
633
|
+
assert.equal(result.length, 2)
|
|
634
|
+
})
|
|
635
|
+
|
|
636
|
+
it('filters by metadata when filter provided', async () => {
|
|
637
|
+
const index = new LocalIndex(testIndexDir)
|
|
638
|
+
await index.createIndex()
|
|
639
|
+
await index.batchInsertItems([
|
|
640
|
+
{ id: '1', vector: [1, 0, 0], metadata: { category: 'food' } },
|
|
641
|
+
{ id: '2', vector: [0, 0, 1], metadata: { category: 'drink' } }
|
|
642
|
+
])
|
|
643
|
+
|
|
644
|
+
const bestGeneralMatch = await index.queryItems([1, 0, 0], '', 1)
|
|
645
|
+
const bestDrinkMatch = await index.queryItems([1, 0, 0], '', 1, {
|
|
646
|
+
category: { $eq: 'drink' }
|
|
647
|
+
})
|
|
648
|
+
|
|
649
|
+
assert.equal(bestGeneralMatch[0].item.id, '1')
|
|
650
|
+
assert.equal(bestDrinkMatch[0].item.id, '2')
|
|
651
|
+
})
|
|
652
|
+
|
|
653
|
+
it('reads item metadata file when provided', async () => {
|
|
654
|
+
const index = new LocalIndex(testIndexDir)
|
|
655
|
+
await index.createIndex({ version: 1, metadata_config: { indexed: ['category'] } })
|
|
656
|
+
await index.batchInsertItems([
|
|
657
|
+
{ id: '1', vector: [1, 0, 0] },
|
|
658
|
+
{ id: '2', vector: [0, 0, 1], metadata: { category: 'drink', extra: 'x' } }
|
|
659
|
+
])
|
|
660
|
+
|
|
661
|
+
sinon.stub(fs, 'readFile').resolves(JSON.stringify({ category: 'drink', extra: 'x' }))
|
|
662
|
+
|
|
663
|
+
const bestDrinkMatch = await index.queryItems([1, 0, 0], '', 2, { category: { $eq: 'drink' } })
|
|
664
|
+
|
|
665
|
+
assert.notEqual(bestDrinkMatch[0].item.metadataFile, undefined)
|
|
666
|
+
assert.equal(bestDrinkMatch[0].item.id, '2')
|
|
667
|
+
})
|
|
668
|
+
|
|
669
|
+
it('appends BM25 results when isBm25=true (tuple results)', async () => {
|
|
670
|
+
// Inject a fake BM25 engine and docReader
|
|
671
|
+
const addDocSpy = sinon.spy()
|
|
672
|
+
const consolidateSpy = sinon.spy()
|
|
673
|
+
const fakeEngine = {
|
|
674
|
+
defineConfig: sinon.stub(),
|
|
675
|
+
definePrepTasks: sinon.stub(),
|
|
676
|
+
addDoc: (...a: any[]) => addDocSpy(...a),
|
|
677
|
+
consolidate: (...a: any[]) => consolidateSpy(...a),
|
|
678
|
+
search: (q: string) => {
|
|
679
|
+
void q
|
|
680
|
+
// results as tuples [index, score]
|
|
681
|
+
return [[0, 0.123], [1, 0.122]]
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
const index = new LocalIndex(
|
|
685
|
+
testIndexDir,
|
|
686
|
+
undefined,
|
|
687
|
+
undefined,
|
|
688
|
+
undefined,
|
|
689
|
+
{
|
|
690
|
+
bm25Factory: () => fakeEngine,
|
|
691
|
+
docReader: async (_docId: string) => 'SAMPLETEXT'
|
|
692
|
+
}
|
|
693
|
+
)
|
|
694
|
+
await index.createIndex()
|
|
695
|
+
|
|
696
|
+
await index.batchInsertItems([
|
|
697
|
+
{ id: 's1', vector: [1, 0, 0], metadata: { category: 'alpha', documentId: 'd1', startPos: 0, endPos: 3 } },
|
|
698
|
+
{ id: 's2', vector: [0, 1, 0], metadata: { category: 'beta', documentId: 'd2', startPos: 0, endPos: 3 } },
|
|
699
|
+
{ id: 's3', vector: [0, 0, 1], metadata: { category: 'gamma', documentId: 'd3', startPos: 0, endPos: 3 } }
|
|
700
|
+
])
|
|
701
|
+
|
|
702
|
+
const topK = 1
|
|
703
|
+
const results = await index.queryItems([1, 0, 0], 'keyword', topK, undefined, true)
|
|
704
|
+
|
|
705
|
+
assert.ok(results.length >= 1)
|
|
706
|
+
assert.ok(results.some(r => (r.item.metadata as any)?.isBm25 === true))
|
|
707
|
+
assert.ok(consolidateSpy.calledOnce)
|
|
708
|
+
// only non-top items should have been offered to addDoc (top item excluded)
|
|
709
|
+
assert.ok(addDocSpy.callCount >= 1)
|
|
710
|
+
})
|
|
711
|
+
|
|
712
|
+
it('BM25 indexes only non-top items with document metadata, calls consolidate, and swallows load errors', async () => {
|
|
713
|
+
// docReader returns text for docB, throws for docC
|
|
714
|
+
const addDocSpy = sinon.spy()
|
|
715
|
+
const consolidateSpy = sinon.spy()
|
|
716
|
+
const fakeEngine = {
|
|
717
|
+
defineConfig: sinon.stub(),
|
|
718
|
+
definePrepTasks: sinon.stub(),
|
|
719
|
+
addDoc: (...a: any[]) => addDocSpy(...a),
|
|
720
|
+
consolidate: (...a: any[]) => consolidateSpy(...a),
|
|
721
|
+
search: (_q: string) => {
|
|
722
|
+
// invalid tuple index is ignored
|
|
723
|
+
return [[999, 0.77]]
|
|
724
|
+
}
|
|
725
|
+
}
|
|
726
|
+
const docReader = async (docId: string) => {
|
|
727
|
+
if (docId === 'docB') return 'ABCDEFGHIJ'
|
|
728
|
+
if (docId === 'docC') throw new Error('load failed')
|
|
729
|
+
return ''
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
const index = new LocalIndex(testIndexDir, undefined, undefined, undefined, { bm25Factory: () => fakeEngine, docReader })
|
|
733
|
+
await index.createIndex()
|
|
734
|
+
|
|
735
|
+
await index.batchInsertItems([
|
|
736
|
+
{ id: 'sTop', vector: [1, 0, 0], metadata: { documentId: 'docA', startPos: 0, endPos: 4 } },
|
|
737
|
+
{ id: 'd1', vector: [0, 1, 0], metadata: { documentId: 'docB', startPos: 0, endPos: 4 } },
|
|
738
|
+
{ id: 'd2', vector: [0, 0, 1], metadata: { documentId: 'docC', startPos: 0, endPos: 4 } }
|
|
739
|
+
])
|
|
740
|
+
|
|
741
|
+
const res = await index.queryItems([1, 0, 0], 'kw', 1, undefined, true)
|
|
742
|
+
assert.equal(addDocSpy.callCount, 1) // only d1 added; d2 threw; sTop excluded
|
|
743
|
+
assert.equal(consolidateSpy.callCount, 1)
|
|
744
|
+
assert.ok(res.length >= 1)
|
|
745
|
+
})
|
|
746
|
+
|
|
747
|
+
it('BM25 object results are appended and marked isBm25 even if item is undefined', async () => {
|
|
748
|
+
const fakeEngine = {
|
|
749
|
+
defineConfig: sinon.stub(),
|
|
750
|
+
definePrepTasks: sinon.stub(),
|
|
751
|
+
addDoc: sinon.stub(),
|
|
752
|
+
consolidate: sinon.stub(),
|
|
753
|
+
search: (_q: string) => [{ item: undefined, score: 0.55 }]
|
|
754
|
+
}
|
|
755
|
+
const index = new LocalIndex(
|
|
756
|
+
testIndexDir,
|
|
757
|
+
undefined,
|
|
758
|
+
undefined,
|
|
759
|
+
undefined,
|
|
760
|
+
{ bm25Factory: () => fakeEngine, docReader: async () => 'YY' }
|
|
761
|
+
)
|
|
762
|
+
await index.createIndex()
|
|
763
|
+
await index.batchInsertItems([
|
|
764
|
+
{ id: 'x1', vector: [1, 0], metadata: { documentId: 'doc1', startPos: 0, endPos: 1 } },
|
|
765
|
+
{ id: 'x2', vector: [0, 1], metadata: { documentId: 'doc2', startPos: 0, endPos: 1 } }
|
|
766
|
+
])
|
|
767
|
+
|
|
768
|
+
const res = await index.queryItems([1, 0], 'any', 1, undefined, true)
|
|
769
|
+
assert.ok(res.some(r => (r.item.metadata as any)?.isBm25 === true))
|
|
770
|
+
})
|
|
771
|
+
|
|
772
|
+
it('BM25 engine missing addDoc/consolidate does not throw and returns semantic results', async () => {
|
|
773
|
+
const fakeEngine = {
|
|
774
|
+
defineConfig: sinon.stub(),
|
|
775
|
+
definePrepTasks: sinon.stub(),
|
|
776
|
+
search: (_q: string) => []
|
|
777
|
+
}
|
|
778
|
+
const index = new LocalIndex(
|
|
779
|
+
testIndexDir,
|
|
780
|
+
undefined,
|
|
781
|
+
undefined,
|
|
782
|
+
undefined,
|
|
783
|
+
{ bm25Factory: () => fakeEngine, docReader: async () => 'YY' }
|
|
784
|
+
)
|
|
785
|
+
await index.createIndex()
|
|
786
|
+
|
|
787
|
+
await index.batchInsertItems([
|
|
788
|
+
{ id: 's1', vector: [1, 0], metadata: { documentId: 'doc1', startPos: 0, endPos: 1 } },
|
|
789
|
+
{ id: 's2', vector: [0, 1], metadata: { documentId: 'doc2', startPos: 0, endPos: 1 } }
|
|
790
|
+
])
|
|
791
|
+
|
|
792
|
+
const res = await index.queryItems([1, 0], 'kw', 1, undefined, true)
|
|
793
|
+
assert.ok(res.length >= 1)
|
|
794
|
+
assert.ok(res.every(r => !(r.item.metadata as any)?.isBm25))
|
|
795
|
+
})
|
|
796
|
+
})
|
|
797
|
+
|
|
798
|
+
describe('VirtualFileStorage (in-memory)', () => {
|
|
799
|
+
it('works end-to-end in-memory without touching disk', async () => {
|
|
800
|
+
const storage = new VirtualFileStorage()
|
|
801
|
+
const index = new LocalIndex('mem://idx', undefined, storage)
|
|
802
|
+
await index.createIndex({ version: 3, metadata_config: { indexed: ['t'] } })
|
|
803
|
+
await index.insertItem({ vector: [1], metadata: { t: 'x', other: 'y' } })
|
|
804
|
+
const stats = await index.getIndexStats()
|
|
805
|
+
assert.deepStrictEqual(stats, { version: 3, metadata_config: { indexed: ['t'] }, items: 1 })
|
|
806
|
+
})
|
|
807
|
+
})
|
|
808
|
+
})
|