vectra 0.12.2 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +92 -100
- package/bin/vectra.js +3 -0
- package/lib/BrowserWebFetcher.d.ts +75 -0
- package/lib/BrowserWebFetcher.d.ts.map +1 -0
- package/lib/BrowserWebFetcher.js +290 -0
- package/lib/BrowserWebFetcher.js.map +1 -0
- package/lib/FileFetcher.d.ts +5 -0
- package/lib/FileFetcher.d.ts.map +1 -0
- package/lib/FileFetcher.js +89 -0
- package/lib/FileFetcher.js.map +1 -0
- package/lib/FileFetcher.spec.d.ts +2 -0
- package/lib/FileFetcher.spec.d.ts.map +1 -0
- package/lib/FileFetcher.spec.js +244 -0
- package/lib/FileFetcher.spec.js.map +1 -0
- package/lib/FolderWatcher.d.ts +91 -0
- package/lib/FolderWatcher.d.ts.map +1 -0
- package/lib/FolderWatcher.js +304 -0
- package/lib/FolderWatcher.js.map +1 -0
- package/lib/FolderWatcher.spec.d.ts +2 -0
- package/lib/FolderWatcher.spec.d.ts.map +1 -0
- package/lib/FolderWatcher.spec.js +308 -0
- package/lib/FolderWatcher.spec.js.map +1 -0
- package/lib/GPT3Tokenizer.d.ts +9 -0
- package/lib/GPT3Tokenizer.spec.d.ts +2 -0
- package/lib/GPT3Tokenizer.spec.d.ts.map +1 -0
- package/lib/GPT3Tokenizer.spec.js +45 -0
- package/lib/GPT3Tokenizer.spec.js.map +1 -0
- package/lib/ItemSelector.d.ts +41 -0
- package/lib/ItemSelector.d.ts.map +1 -0
- package/lib/ItemSelector.js +179 -0
- package/lib/ItemSelector.js.map +1 -0
- package/lib/ItemSelector.spec.d.ts +2 -0
- package/lib/ItemSelector.spec.d.ts.map +1 -0
- package/lib/ItemSelector.spec.js +204 -0
- package/lib/ItemSelector.spec.js.map +1 -0
- package/lib/LocalDocument.d.ts +54 -0
- package/lib/LocalDocument.d.ts.map +1 -1
- package/lib/LocalDocument.js +116 -0
- package/lib/LocalDocument.js.map +1 -0
- package/lib/LocalDocument.spec.d.ts +2 -0
- package/lib/LocalDocument.spec.d.ts.map +1 -0
- package/lib/LocalDocument.spec.js +214 -0
- package/lib/LocalDocument.spec.js.map +1 -0
- package/lib/LocalDocumentIndex.d.ts +152 -0
- package/lib/LocalDocumentIndex.d.ts.map +1 -1
- package/lib/LocalDocumentIndex.js +420 -0
- package/lib/LocalDocumentIndex.js.map +1 -0
- package/lib/LocalDocumentIndex.spec.d.ts +2 -0
- package/lib/LocalDocumentIndex.spec.d.ts.map +1 -0
- package/lib/LocalDocumentIndex.spec.js +494 -0
- package/lib/LocalDocumentIndex.spec.js.map +1 -0
- package/lib/LocalDocumentResult.d.ts +66 -0
- package/lib/LocalDocumentResult.d.ts.map +1 -1
- package/lib/LocalDocumentResult.js +376 -0
- package/lib/LocalDocumentResult.js.map +1 -0
- package/lib/LocalDocumentResult.spec.d.ts +2 -0
- package/lib/LocalDocumentResult.spec.d.ts.map +1 -0
- package/lib/LocalDocumentResult.spec.js +373 -0
- package/lib/LocalDocumentResult.spec.js.map +1 -0
- package/lib/LocalEmbeddings.d.ts +59 -0
- package/lib/LocalEmbeddings.d.ts.map +1 -0
- package/lib/LocalEmbeddings.js +101 -0
- package/lib/LocalEmbeddings.js.map +1 -0
- package/lib/LocalEmbeddings.spec.d.ts +2 -0
- package/lib/LocalEmbeddings.spec.d.ts.map +1 -0
- package/lib/LocalEmbeddings.spec.js +155 -0
- package/lib/LocalEmbeddings.spec.js.map +1 -0
- package/lib/LocalIndex.d.ts +159 -0
- package/lib/LocalIndex.d.ts.map +1 -1
- package/lib/LocalIndex.js +519 -0
- package/lib/LocalIndex.js.map +1 -0
- package/lib/LocalIndex.spec.d.ts +2 -0
- package/lib/LocalIndex.spec.js +611 -9
- package/lib/LocalIndex.spec.js.map +1 -1
- package/lib/OpenAIEmbeddings.d.ts +124 -0
- package/lib/OpenAIEmbeddings.d.ts.map +1 -0
- package/lib/OpenAIEmbeddings.js +166 -0
- package/lib/OpenAIEmbeddings.js.map +1 -0
- package/lib/OpenAIEmbeddings.spec.d.ts +2 -0
- package/lib/OpenAIEmbeddings.spec.d.ts.map +1 -0
- package/lib/OpenAIEmbeddings.spec.js +298 -0
- package/lib/OpenAIEmbeddings.spec.js.map +1 -0
- package/lib/TextSplitter.d.ts +21 -0
- package/lib/TextSplitter.d.ts.map +1 -1
- package/lib/TextSplitter.js +500 -0
- package/lib/TextSplitter.js.map +1 -0
- package/lib/TextSplitter.spec.d.ts +2 -0
- package/lib/TextSplitter.spec.d.ts.map +1 -0
- package/lib/TextSplitter.spec.js +337 -0
- package/lib/TextSplitter.spec.js.map +1 -0
- package/lib/TransformersEmbeddings.d.ts +121 -0
- package/lib/TransformersEmbeddings.d.ts.map +1 -0
- package/lib/TransformersEmbeddings.js +176 -0
- package/lib/TransformersEmbeddings.js.map +1 -0
- package/lib/TransformersEmbeddings.spec.d.ts +2 -0
- package/lib/TransformersEmbeddings.spec.d.ts.map +1 -0
- package/lib/TransformersEmbeddings.spec.js +198 -0
- package/lib/TransformersEmbeddings.spec.js.map +1 -0
- package/lib/TransformersTokenizer.d.ts +33 -0
- package/lib/TransformersTokenizer.d.ts.map +1 -0
- package/lib/TransformersTokenizer.js +44 -0
- package/lib/TransformersTokenizer.js.map +1 -0
- package/lib/TransformersTokenizer.spec.d.ts +2 -0
- package/lib/TransformersTokenizer.spec.d.ts.map +1 -0
- package/lib/TransformersTokenizer.spec.js +112 -0
- package/lib/TransformersTokenizer.spec.js.map +1 -0
- package/lib/WebFetcher.d.ts +14 -0
- package/lib/WebFetcher.d.ts.map +1 -0
- package/lib/WebFetcher.js +238 -0
- package/lib/WebFetcher.js.map +1 -0
- package/lib/WebFetcher.spec.d.ts +2 -0
- package/lib/WebFetcher.spec.d.ts.map +1 -0
- package/lib/WebFetcher.spec.js +263 -0
- package/lib/WebFetcher.spec.js.map +1 -0
- package/lib/browser.d.ts +30 -0
- package/lib/browser.d.ts.map +1 -0
- package/lib/browser.js +52 -0
- package/lib/browser.js.map +1 -0
- package/lib/codecs/IndexCodec.d.ts +37 -0
- package/lib/codecs/IndexCodec.d.ts.map +1 -0
- package/lib/codecs/IndexCodec.js +3 -0
- package/lib/codecs/IndexCodec.js.map +1 -0
- package/lib/codecs/JsonCodec.d.ts +19 -0
- package/lib/codecs/JsonCodec.d.ts.map +1 -0
- package/lib/codecs/JsonCodec.js +35 -0
- package/lib/codecs/JsonCodec.js.map +1 -0
- package/lib/codecs/JsonCodec.spec.d.ts +2 -0
- package/lib/codecs/JsonCodec.spec.d.ts.map +1 -0
- package/lib/codecs/JsonCodec.spec.js +66 -0
- package/lib/codecs/JsonCodec.spec.js.map +1 -0
- package/lib/codecs/LocalIndex.protobuf.spec.d.ts +2 -0
- package/lib/codecs/LocalIndex.protobuf.spec.d.ts.map +1 -0
- package/lib/codecs/LocalIndex.protobuf.spec.js +108 -0
- package/lib/codecs/LocalIndex.protobuf.spec.js.map +1 -0
- package/lib/codecs/ProtobufCodec.d.ts +20 -0
- package/lib/codecs/ProtobufCodec.d.ts.map +1 -0
- package/lib/codecs/ProtobufCodec.js +225 -0
- package/lib/codecs/ProtobufCodec.js.map +1 -0
- package/lib/codecs/ProtobufCodec.spec.d.ts +2 -0
- package/lib/codecs/ProtobufCodec.spec.d.ts.map +1 -0
- package/lib/codecs/ProtobufCodec.spec.js +155 -0
- package/lib/codecs/ProtobufCodec.spec.js.map +1 -0
- package/lib/codecs/index.d.ts +5 -0
- package/lib/codecs/index.d.ts.map +1 -0
- package/lib/codecs/index.js +21 -0
- package/lib/codecs/index.js.map +1 -0
- package/lib/codecs/migrateIndex.d.ts +24 -0
- package/lib/codecs/migrateIndex.d.ts.map +1 -0
- package/lib/codecs/migrateIndex.js +119 -0
- package/lib/codecs/migrateIndex.js.map +1 -0
- package/lib/codecs/migrateIndex.spec.d.ts +2 -0
- package/lib/codecs/migrateIndex.spec.d.ts.map +1 -0
- package/lib/codecs/migrateIndex.spec.js +151 -0
- package/lib/codecs/migrateIndex.spec.js.map +1 -0
- package/lib/codecs/schemas/index.proto +34 -0
- package/lib/index.d.ts +20 -0
- package/lib/index.d.ts.map +1 -1
- package/lib/index.js +36 -0
- package/lib/index.js.map +1 -0
- package/lib/internals/Colorize.d.ts +14 -0
- package/lib/internals/Colorize.d.ts.map +1 -0
- package/lib/internals/Colorize.js +69 -0
- package/lib/internals/Colorize.js.map +1 -0
- package/lib/internals/index.d.ts +3 -0
- package/lib/internals/index.d.ts.map +1 -0
- package/lib/internals/index.js +19 -0
- package/lib/internals/index.js.map +1 -0
- package/lib/internals/types.d.ts +43 -0
- package/lib/internals/types.d.ts.map +1 -0
- package/lib/internals/types.js +3 -0
- package/lib/internals/types.js.map +1 -0
- package/lib/server/IndexManager.d.ts +78 -0
- package/lib/server/IndexManager.d.ts.map +1 -0
- package/lib/server/IndexManager.js +259 -0
- package/lib/server/IndexManager.js.map +1 -0
- package/lib/server/VectraServer.d.ts +40 -0
- package/lib/server/VectraServer.d.ts.map +1 -0
- package/lib/server/VectraServer.js +151 -0
- package/lib/server/VectraServer.js.map +1 -0
- package/lib/server/VectraServer.spec.d.ts +2 -0
- package/lib/server/VectraServer.spec.d.ts.map +1 -0
- package/lib/server/VectraServer.spec.js +322 -0
- package/lib/server/VectraServer.spec.js.map +1 -0
- package/lib/server/handlers/documentHandlers.d.ts +15 -0
- package/lib/server/handlers/documentHandlers.d.ts.map +1 -0
- package/lib/server/handlers/documentHandlers.js +95 -0
- package/lib/server/handlers/documentHandlers.js.map +1 -0
- package/lib/server/handlers/helpers.d.ts +23 -0
- package/lib/server/handlers/helpers.d.ts.map +1 -0
- package/lib/server/handlers/helpers.js +138 -0
- package/lib/server/handlers/helpers.js.map +1 -0
- package/lib/server/handlers/index.d.ts +8 -0
- package/lib/server/handlers/index.d.ts.map +1 -0
- package/lib/server/handlers/index.js +22 -0
- package/lib/server/handlers/index.js.map +1 -0
- package/lib/server/handlers/indexHandlers.d.ts +14 -0
- package/lib/server/handlers/indexHandlers.d.ts.map +1 -0
- package/lib/server/handlers/indexHandlers.js +85 -0
- package/lib/server/handlers/indexHandlers.js.map +1 -0
- package/lib/server/handlers/itemHandlers.d.ts +34 -0
- package/lib/server/handlers/itemHandlers.d.ts.map +1 -0
- package/lib/server/handlers/itemHandlers.js +166 -0
- package/lib/server/handlers/itemHandlers.js.map +1 -0
- package/lib/server/handlers/lifecycleHandlers.d.ts +11 -0
- package/lib/server/handlers/lifecycleHandlers.d.ts.map +1 -0
- package/lib/server/handlers/lifecycleHandlers.js +31 -0
- package/lib/server/handlers/lifecycleHandlers.js.map +1 -0
- package/lib/server/handlers/queryHandlers.d.ts +27 -0
- package/lib/server/handlers/queryHandlers.d.ts.map +1 -0
- package/lib/server/handlers/queryHandlers.js +135 -0
- package/lib/server/handlers/queryHandlers.js.map +1 -0
- package/lib/server/handlers/statsHandlers.d.ts +17 -0
- package/lib/server/handlers/statsHandlers.d.ts.map +1 -0
- package/lib/server/handlers/statsHandlers.js +81 -0
- package/lib/server/handlers/statsHandlers.js.map +1 -0
- package/lib/server/index.d.ts +4 -0
- package/lib/server/index.d.ts.map +1 -0
- package/lib/server/index.js +23 -0
- package/lib/server/index.js.map +1 -0
- package/lib/storage/FileStorage.d.ts +92 -0
- package/lib/storage/FileStorage.d.ts.map +1 -0
- package/lib/storage/FileStorage.js +3 -0
- package/lib/storage/FileStorage.js.map +1 -0
- package/lib/storage/FileStorageUtilities.d.ts +36 -0
- package/lib/storage/FileStorageUtilities.d.ts.map +1 -0
- package/lib/storage/FileStorageUtilities.js +91 -0
- package/lib/storage/FileStorageUtilities.js.map +1 -0
- package/lib/storage/FileStorageUtilities.spec.d.ts +2 -0
- package/lib/storage/FileStorageUtilities.spec.d.ts.map +1 -0
- package/lib/storage/FileStorageUtilities.spec.js +98 -0
- package/lib/storage/FileStorageUtilities.spec.js.map +1 -0
- package/lib/storage/FileType.d.ts +29 -0
- package/lib/storage/FileType.d.ts.map +1 -0
- package/lib/storage/FileType.js +38 -0
- package/lib/storage/FileType.js.map +1 -0
- package/lib/storage/IndexedDBStorage.d.ts +47 -0
- package/lib/storage/IndexedDBStorage.d.ts.map +1 -0
- package/lib/storage/IndexedDBStorage.js +347 -0
- package/lib/storage/IndexedDBStorage.js.map +1 -0
- package/lib/storage/LocalFileStorage.browser.d.ts +19 -0
- package/lib/storage/LocalFileStorage.browser.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.browser.js +43 -0
- package/lib/storage/LocalFileStorage.browser.js.map +1 -0
- package/lib/storage/LocalFileStorage.d.ts +23 -0
- package/lib/storage/LocalFileStorage.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.js +152 -0
- package/lib/storage/LocalFileStorage.js.map +1 -0
- package/lib/storage/LocalFileStorage.spec.d.ts +2 -0
- package/lib/storage/LocalFileStorage.spec.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.spec.js +249 -0
- package/lib/storage/LocalFileStorage.spec.js.map +1 -0
- package/lib/storage/VirtualFileStorage.d.ts +18 -0
- package/lib/storage/VirtualFileStorage.d.ts.map +1 -0
- package/lib/storage/VirtualFileStorage.js +178 -0
- package/lib/storage/VirtualFileStorage.js.map +1 -0
- package/lib/storage/VirtualFileStorage.spec.d.ts +2 -0
- package/lib/storage/VirtualFileStorage.spec.d.ts.map +1 -0
- package/lib/storage/VirtualFileStorage.spec.js +302 -0
- package/lib/storage/VirtualFileStorage.spec.js.map +1 -0
- package/lib/storage/index.d.ts +6 -0
- package/lib/storage/index.d.ts.map +1 -0
- package/lib/storage/index.js +22 -0
- package/lib/storage/index.js.map +1 -0
- package/lib/templates/templates/csharp/README.md +48 -0
- package/lib/templates/templates/csharp/VectraClient.cs +234 -0
- package/lib/templates/templates/go/README.md +71 -0
- package/lib/templates/templates/go/vectra_client.go +322 -0
- package/lib/templates/templates/java/README.md +81 -0
- package/lib/templates/templates/java/VectraClient.java +232 -0
- package/lib/templates/templates/python/README.md +37 -0
- package/lib/templates/templates/python/vectra_client.py +279 -0
- package/lib/templates/templates/rust/Cargo.toml +14 -0
- package/lib/templates/templates/rust/README.md +39 -0
- package/lib/templates/templates/rust/build.rs +4 -0
- package/lib/templates/templates/rust/lib.rs +284 -0
- package/lib/templates/templates/typescript/README.md +96 -0
- package/lib/templates/templates/typescript/VectraClient.ts +374 -0
- package/lib/templates/typescript/VectraClient.d.ts +114 -0
- package/lib/templates/typescript/VectraClient.d.ts.map +1 -0
- package/lib/templates/typescript/VectraClient.js +328 -0
- package/lib/templates/typescript/VectraClient.js.map +1 -0
- package/lib/types.d.ts +153 -0
- package/lib/types.d.ts.map +1 -0
- package/lib/types.js +3 -0
- package/lib/types.js.map +1 -0
- package/lib/utils/index.d.ts +2 -0
- package/lib/utils/index.d.ts.map +1 -0
- package/lib/utils/index.js +18 -0
- package/lib/utils/index.js.map +1 -0
- package/lib/utils/pathUtils.d.ts +40 -0
- package/lib/utils/pathUtils.d.ts.map +1 -0
- package/lib/utils/pathUtils.js +98 -0
- package/lib/utils/pathUtils.js.map +1 -0
- package/lib/vectra-cli.d.ts +2 -0
- package/lib/vectra-cli.d.ts.map +1 -1
- package/lib/vectra-cli.generate.spec.d.ts +2 -0
- package/lib/vectra-cli.generate.spec.d.ts.map +1 -0
- package/lib/vectra-cli.generate.spec.js +112 -0
- package/lib/vectra-cli.generate.spec.js.map +1 -0
- package/lib/vectra-cli.js +760 -0
- package/lib/vectra-cli.js.map +1 -0
- package/lib/vectra-cli.spec.d.ts +1 -0
- package/lib/vectra-cli.spec.d.ts.map +1 -0
- package/lib/vectra-cli.spec.js +2 -0
- package/lib/vectra-cli.spec.js.map +1 -0
- package/package.json +91 -16
- package/proto/vectra_service.proto +276 -0
- package/src/BrowserWebFetcher.ts +345 -0
- package/src/FileFetcher.spec.ts +234 -0
- package/src/FileFetcher.ts +37 -25
- package/src/FolderWatcher.spec.ts +288 -0
- package/src/FolderWatcher.ts +304 -0
- package/src/GPT3Tokenizer.spec.ts +50 -0
- package/src/ItemSelector.spec.ts +252 -0
- package/src/ItemSelector.ts +163 -150
- package/src/LocalDocument.spec.ts +211 -0
- package/src/LocalDocument.ts +88 -94
- package/src/LocalDocumentIndex.spec.ts +481 -0
- package/src/LocalDocumentIndex.ts +39 -40
- package/src/LocalDocumentResult.spec.ts +373 -0
- package/src/LocalDocumentResult.ts +489 -319
- package/src/LocalEmbeddings.spec.ts +138 -0
- package/src/LocalEmbeddings.ts +120 -0
- package/src/LocalIndex.spec.ts +808 -66
- package/src/LocalIndex.ts +479 -429
- package/src/OpenAIEmbeddings.spec.ts +354 -0
- package/src/OpenAIEmbeddings.ts +26 -27
- package/src/TextSplitter.spec.ts +342 -0
- package/src/TextSplitter.ts +517 -532
- package/src/TransformersEmbeddings.spec.ts +188 -0
- package/src/TransformersEmbeddings.ts +232 -0
- package/src/TransformersTokenizer.spec.ts +143 -0
- package/src/TransformersTokenizer.ts +45 -0
- package/src/WebFetcher.spec.ts +288 -0
- package/src/WebFetcher.ts +184 -186
- package/src/browser.ts +69 -0
- package/src/codecs/IndexCodec.ts +40 -0
- package/src/codecs/JsonCodec.spec.ts +70 -0
- package/src/codecs/JsonCodec.ts +37 -0
- package/src/codecs/LocalIndex.protobuf.spec.ts +115 -0
- package/src/codecs/ProtobufCodec.spec.ts +166 -0
- package/src/codecs/ProtobufCodec.ts +193 -0
- package/src/codecs/index.ts +4 -0
- package/src/codecs/migrateIndex.spec.ts +176 -0
- package/src/codecs/migrateIndex.ts +125 -0
- package/src/codecs/schemas/index.proto +34 -0
- package/src/index.ts +9 -1
- package/src/internals/Colorize.ts +19 -16
- package/src/server/IndexManager.ts +243 -0
- package/src/server/VectraServer.spec.ts +303 -0
- package/src/server/VectraServer.ts +156 -0
- package/src/server/handlers/documentHandlers.ts +59 -0
- package/src/server/handlers/helpers.ts +93 -0
- package/src/server/handlers/index.ts +7 -0
- package/src/server/handlers/indexHandlers.ts +44 -0
- package/src/server/handlers/itemHandlers.ts +140 -0
- package/src/server/handlers/lifecycleHandlers.ts +26 -0
- package/src/server/handlers/queryHandlers.ts +96 -0
- package/src/server/handlers/statsHandlers.ts +38 -0
- package/src/server/index.ts +3 -0
- package/src/storage/FileStorage.ts +105 -0
- package/src/storage/FileStorageUtilities.spec.ts +106 -0
- package/src/storage/FileStorageUtilities.ts +77 -0
- package/src/storage/FileType.ts +61 -0
- package/src/storage/IndexedDBStorage.ts +365 -0
- package/src/storage/LocalFileStorage.browser.ts +52 -0
- package/src/storage/LocalFileStorage.spec.ts +292 -0
- package/src/storage/LocalFileStorage.ts +98 -0
- package/src/storage/VirtualFileStorage.spec.ts +307 -0
- package/src/storage/VirtualFileStorage.ts +169 -0
- package/src/storage/index.ts +5 -0
- package/src/templates/csharp/README.md +48 -0
- package/src/templates/csharp/VectraClient.cs +234 -0
- package/src/templates/go/README.md +71 -0
- package/src/templates/go/vectra_client.go +322 -0
- package/src/templates/java/README.md +81 -0
- package/src/templates/java/VectraClient.java +232 -0
- package/src/templates/python/README.md +37 -0
- package/src/templates/python/vectra_client.py +279 -0
- package/src/templates/rust/Cargo.toml +14 -0
- package/src/templates/rust/README.md +39 -0
- package/src/templates/rust/build.rs +4 -0
- package/src/templates/rust/lib.rs +284 -0
- package/src/templates/typescript/README.md +96 -0
- package/src/templates/typescript/VectraClient.ts +374 -0
- package/src/types.ts +131 -123
- package/src/utils/index.ts +1 -0
- package/src/utils/pathUtils.ts +106 -0
- package/src/vectra-cli.generate.spec.ts +72 -0
- package/src/vectra-cli.spec.ts +0 -0
- package/src/vectra-cli.ts +687 -246
|
@@ -0,0 +1,481 @@
|
|
|
1
|
+
import path from "./utils/pathUtils";
|
|
2
|
+
import { strict as assert } from 'node:assert';
|
|
3
|
+
import { describe, it, beforeEach } from 'mocha';
|
|
4
|
+
import { LocalDocumentIndex, LocalDocumentIndexConfig } from './LocalDocumentIndex';
|
|
5
|
+
import { EmbeddingsModel, EmbeddingsResponse, Tokenizer } from './types';
|
|
6
|
+
import { FileStorage, FileDetails } from './storage';
|
|
7
|
+
|
|
8
|
+
class FakeEmbeddings implements EmbeddingsModel {
|
|
9
|
+
maxTokens = 10;
|
|
10
|
+
createEmbeddingsCalls: (string | string[])[] = [];
|
|
11
|
+
createEmbeddingsResponses: EmbeddingsResponse[] = [];
|
|
12
|
+
async createEmbeddings(inputs: string | string[]): Promise<EmbeddingsResponse> {
|
|
13
|
+
this.createEmbeddingsCalls.push(inputs);
|
|
14
|
+
if (this.createEmbeddingsResponses.length > 0) {
|
|
15
|
+
return this.createEmbeddingsResponses.shift()!;
|
|
16
|
+
}
|
|
17
|
+
const arr = Array.isArray(inputs) ? inputs : [inputs];
|
|
18
|
+
return { status: 'success', output: arr.map(() => [0.1, 0.2]) };
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
class SimpleTokenizer implements Tokenizer {
|
|
23
|
+
encode(text: string): number[] {
|
|
24
|
+
return Array.from(text).map(ch => ch.charCodeAt(0));
|
|
25
|
+
}
|
|
26
|
+
decode(tokens: number[]): string {
|
|
27
|
+
return tokens.map(t => String.fromCharCode(t)).join('');
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
class FakeStorage implements FileStorage {
|
|
32
|
+
files: Map<string, string> = new Map();
|
|
33
|
+
folders: Set<string> = new Set();
|
|
34
|
+
pathExistsCalls: string[] = [];
|
|
35
|
+
readFileCalls: string[] = [];
|
|
36
|
+
upsertFileCalls: { path: string; content: string }[] = [];
|
|
37
|
+
deleteFileCalls: string[] = [];
|
|
38
|
+
createdFolders: string[] = [];
|
|
39
|
+
deletedFolders: string[] = [];
|
|
40
|
+
createdFiles: string[] = [];
|
|
41
|
+
async createFile(filePath: string, content: Buffer | string): Promise<void> {
|
|
42
|
+
if (this.files.has(filePath)) throw new Error('File already exists');
|
|
43
|
+
this.createdFiles.push(filePath);
|
|
44
|
+
const text = Buffer.isBuffer(content) ? content.toString('utf8') : content;
|
|
45
|
+
this.files.set(filePath, text);
|
|
46
|
+
}
|
|
47
|
+
async createFolder(folderPath: string): Promise<void> {
|
|
48
|
+
this.createdFolders.push(folderPath);
|
|
49
|
+
this.folders.add(folderPath);
|
|
50
|
+
}
|
|
51
|
+
async deleteFile(filePath: string): Promise<void> {
|
|
52
|
+
this.deleteFileCalls.push(filePath);
|
|
53
|
+
if (!this.files.has(filePath)) {
|
|
54
|
+
throw new Error('File not found');
|
|
55
|
+
}
|
|
56
|
+
this.files.delete(filePath);
|
|
57
|
+
}
|
|
58
|
+
async deleteFolder(folderPath: string): Promise<void> {
|
|
59
|
+
this.deletedFolders.push(folderPath);
|
|
60
|
+
for (const key of [...this.files.keys()]) {
|
|
61
|
+
if (key.startsWith(folderPath)) this.files.delete(key);
|
|
62
|
+
}
|
|
63
|
+
this.folders.delete(folderPath);
|
|
64
|
+
}
|
|
65
|
+
async getDetails(fileOrFolderPath: string): Promise<FileDetails> {
|
|
66
|
+
if (this.folders.has(fileOrFolderPath)) {
|
|
67
|
+
return { name: path.basename(fileOrFolderPath), path: fileOrFolderPath, isFolder: true };
|
|
68
|
+
}
|
|
69
|
+
if (this.files.has(fileOrFolderPath)) {
|
|
70
|
+
return { name: path.basename(fileOrFolderPath), path: fileOrFolderPath, isFolder: false };
|
|
71
|
+
}
|
|
72
|
+
throw new Error('Path not found');
|
|
73
|
+
}
|
|
74
|
+
async listFiles(folderPath: string, filter: 'files' | 'folders' | 'all' = 'all'): Promise<FileDetails[]> {
|
|
75
|
+
const results: FileDetails[] = [];
|
|
76
|
+
for (const f of this.folders) {
|
|
77
|
+
const parent = path.dirname(f);
|
|
78
|
+
if (parent === folderPath) {
|
|
79
|
+
if (filter === 'all' || filter === 'folders') {
|
|
80
|
+
results.push({ name: path.basename(f), path: f, isFolder: true });
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
for (const f of this.files.keys()) {
|
|
85
|
+
const parent = path.dirname(f);
|
|
86
|
+
if (parent === folderPath) {
|
|
87
|
+
if (filter === 'all' || filter === 'files') {
|
|
88
|
+
results.push({ name: path.basename(f), path: f, isFolder: false });
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
return results;
|
|
93
|
+
}
|
|
94
|
+
async pathExists(fileOrFolderPath: string): Promise<boolean> {
|
|
95
|
+
this.pathExistsCalls.push(fileOrFolderPath);
|
|
96
|
+
return this.files.has(fileOrFolderPath) || this.folders.has(fileOrFolderPath);
|
|
97
|
+
}
|
|
98
|
+
async readFile(filePath: string): Promise<Buffer> {
|
|
99
|
+
this.readFileCalls.push(filePath);
|
|
100
|
+
if (!this.files.has(filePath)) throw new Error('File not found');
|
|
101
|
+
return Buffer.from(this.files.get(filePath)!, 'utf8');
|
|
102
|
+
}
|
|
103
|
+
async upsertFile(filePath: string, content: Buffer | string): Promise<void> {
|
|
104
|
+
const text = Buffer.isBuffer(content) ? content.toString('utf8') : content;
|
|
105
|
+
this.upsertFileCalls.push({ path: filePath, content: text });
|
|
106
|
+
this.files.set(filePath, text);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
describe('LocalDocumentIndex', () => {
|
|
111
|
+
let storage: FakeStorage;
|
|
112
|
+
let embeddings: FakeEmbeddings;
|
|
113
|
+
let tokenizer: SimpleTokenizer;
|
|
114
|
+
let index: LocalDocumentIndex;
|
|
115
|
+
const folderPath = '/fake/path';
|
|
116
|
+
const indexJsonPath = path.join(folderPath, 'index.json');
|
|
117
|
+
|
|
118
|
+
beforeEach(() => {
|
|
119
|
+
storage = new FakeStorage();
|
|
120
|
+
embeddings = new FakeEmbeddings();
|
|
121
|
+
tokenizer = new SimpleTokenizer();
|
|
122
|
+
storage.files.set(indexJsonPath, JSON.stringify({ version: 1, metadata_config: {}, items: [] }));
|
|
123
|
+
const config: LocalDocumentIndexConfig = { folderPath, embeddings, tokenizer, storage };
|
|
124
|
+
index = new LocalDocumentIndex(config);
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
describe('constructor and getters', () => {
|
|
128
|
+
it('uses defaults when not provided (GPT3Tokenizer, undefined embeddings)', () => {
|
|
129
|
+
const idx = new LocalDocumentIndex({ folderPath });
|
|
130
|
+
assert.equal(idx.tokenizer.constructor.name, 'GPT3Tokenizer');
|
|
131
|
+
assert.equal(idx.embeddings, undefined);
|
|
132
|
+
});
|
|
133
|
+
it('uses provided tokenizer and embeddings', () => {
|
|
134
|
+
const fakeTok: Tokenizer = new SimpleTokenizer();
|
|
135
|
+
const idx = new LocalDocumentIndex({ folderPath, tokenizer: fakeTok, embeddings, storage });
|
|
136
|
+
assert.equal(idx.tokenizer, fakeTok);
|
|
137
|
+
assert.equal(idx.embeddings, embeddings);
|
|
138
|
+
});
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
describe('isCatalogCreated', () => {
|
|
142
|
+
it('returns true if catalog.json exists at correct path', async () => {
|
|
143
|
+
const catalogPath = path.join(folderPath, 'catalog.json');
|
|
144
|
+
storage.files.set(catalogPath, '{}');
|
|
145
|
+
const exists = await index.isCatalogCreated();
|
|
146
|
+
assert.equal(exists, true);
|
|
147
|
+
assert.equal(storage.pathExistsCalls[0], catalogPath);
|
|
148
|
+
});
|
|
149
|
+
it('returns false if catalog.json does not exist', async () => {
|
|
150
|
+
const exists = await index.isCatalogCreated();
|
|
151
|
+
assert.equal(exists, false);
|
|
152
|
+
});
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
describe('loadIndexData', () => {
|
|
156
|
+
it('early returns if catalog already loaded (no catalog read)', async () => {
|
|
157
|
+
(index as any)._catalog = { version: 1, count: 0, uriToId: {}, idToUri: {} };
|
|
158
|
+
await (index as any).loadIndexData(); // base will read index.json; catalog should not be read
|
|
159
|
+
const catalogPath = path.join(folderPath, 'catalog.json');
|
|
160
|
+
const catalogReads = storage.readFileCalls.filter(p => p === catalogPath).length;
|
|
161
|
+
assert.equal(catalogReads, 0);
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
it('loads existing catalog from storage', async () => {
|
|
165
|
+
const catalogPath = path.join(folderPath, 'catalog.json');
|
|
166
|
+
const catalogData = JSON.stringify({ version: 1, count: 2, uriToId: { a: 'id1' }, idToUri: { id1: 'a' } });
|
|
167
|
+
storage.files.set(catalogPath, catalogData);
|
|
168
|
+
const loaded = new LocalDocumentIndex({ folderPath, storage, tokenizer });
|
|
169
|
+
(storage.files).set(indexJsonPath, JSON.stringify({ version: 1, metadata_config: {}, items: [] }));
|
|
170
|
+
await (loaded as any).loadIndexData();
|
|
171
|
+
assert.deepEqual((loaded as any)._catalog, JSON.parse(catalogData));
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
it('creates new catalog if none exists and persists it', async () => {
|
|
175
|
+
const newIndex = new LocalDocumentIndex({ folderPath, storage, tokenizer });
|
|
176
|
+
await (newIndex as any).loadIndexData();
|
|
177
|
+
const catalogPath = path.join(folderPath, 'catalog.json');
|
|
178
|
+
assert(storage.upsertFileCalls.some(c => c.path === catalogPath));
|
|
179
|
+
const catalog = (newIndex as any)._catalog;
|
|
180
|
+
assert.equal(catalog.version, 1);
|
|
181
|
+
assert.equal(catalog.count, 0);
|
|
182
|
+
assert.deepEqual(catalog.uriToId, {});
|
|
183
|
+
assert.deepEqual(catalog.idToUri, {});
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
it('wraps error when failing to create catalog', async () => {
|
|
187
|
+
const failing = new LocalDocumentIndex({ folderPath, storage, tokenizer });
|
|
188
|
+
const orig = storage.upsertFile.bind(storage);
|
|
189
|
+
storage.upsertFile = async (p, c) => {
|
|
190
|
+
if (p.endsWith('catalog.json')) throw new Error('disk error');
|
|
191
|
+
return orig(p, c);
|
|
192
|
+
};
|
|
193
|
+
await assert.rejects((failing as any).loadIndexData(), /Error creating document catalog: Error: disk error/);
|
|
194
|
+
});
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
describe('getDocumentId and getDocumentUri', () => {
|
|
198
|
+
beforeEach(() => {
|
|
199
|
+
(index as any)._catalog = { version: 1, count: 1, uriToId: { doc1: 'id1' }, idToUri: { id1: 'doc1' } };
|
|
200
|
+
});
|
|
201
|
+
it('returns id for known uri', async () => {
|
|
202
|
+
assert.equal(await index.getDocumentId('doc1'), 'id1');
|
|
203
|
+
});
|
|
204
|
+
it('returns undefined for unknown uri', async () => {
|
|
205
|
+
assert.equal(await index.getDocumentId('nope'), undefined);
|
|
206
|
+
});
|
|
207
|
+
it('returns uri for known id', async () => {
|
|
208
|
+
assert.equal(await index.getDocumentUri('id1'), 'doc1');
|
|
209
|
+
});
|
|
210
|
+
it('returns undefined for unknown id', async () => {
|
|
211
|
+
assert.equal(await index.getDocumentUri('nope'), undefined);
|
|
212
|
+
});
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
describe('getCatalogStats', () => {
|
|
216
|
+
it('returns catalog stats merged with index stats', async () => {
|
|
217
|
+
(index as any)._catalog = { version: 1, count: 2, uriToId: {}, idToUri: {} };
|
|
218
|
+
(index as any).getIndexStats = async () => ({ items: 5, metadata_config: { indexed: ['a'] } });
|
|
219
|
+
const stats = await index.getCatalogStats();
|
|
220
|
+
assert.equal(stats.version, 1);
|
|
221
|
+
assert.equal(stats.documents, 2);
|
|
222
|
+
assert.equal(stats.chunks, 5);
|
|
223
|
+
assert.deepEqual(stats.metadata_config, { indexed: ['a'] });
|
|
224
|
+
});
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
describe('deleteDocument', () => {
|
|
228
|
+
it('returns early if document not found', async () => {
|
|
229
|
+
const result = await index.deleteDocument('missing');
|
|
230
|
+
assert.equal(result, undefined);
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
it('successfully deletes chunks, updates catalog, and removes files', async () => {
|
|
234
|
+
const uri = 'doc://one';
|
|
235
|
+
const documentId = 'id-1';
|
|
236
|
+
(index as any)._catalog = { version: 1, count: 1, uriToId: { [uri]: documentId }, idToUri: { [documentId]: uri } };
|
|
237
|
+
storage.files.set(path.join(folderPath, `${documentId}.txt`), 'hello');
|
|
238
|
+
storage.files.set(path.join(folderPath, `${documentId}.json`), '{"k":"v"}');
|
|
239
|
+
(index as any).listItemsByMetadata = async () => [{ id: 'chunk-1', metadata: { documentId } }];
|
|
240
|
+
(index as any).deleteItem = async () => { };
|
|
241
|
+
await index.deleteDocument(uri);
|
|
242
|
+
assert(!storage.files.has(path.join(folderPath, `${documentId}.txt`)));
|
|
243
|
+
assert(!storage.files.has(path.join(folderPath, `${documentId}.json`)));
|
|
244
|
+
assert.equal((index as any)._catalog.count, 0);
|
|
245
|
+
assert.equal((index as any)._catalog.uriToId[uri], undefined);
|
|
246
|
+
assert.equal((index as any)._catalog.idToUri[documentId], undefined);
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
it('cancels update and throws when deleteItem fails', async () => {
|
|
250
|
+
const uri = 'doc://err';
|
|
251
|
+
const documentId = 'id-err';
|
|
252
|
+
(index as any)._catalog = { version: 1, count: 1, uriToId: { [uri]: documentId }, idToUri: { [documentId]: uri } };
|
|
253
|
+
(index as any).listItemsByMetadata = async () => [{ id: 'chunk-1', metadata: { documentId } }];
|
|
254
|
+
(index as any).deleteItem = async () => { throw new Error('delete error'); };
|
|
255
|
+
await assert.rejects(index.deleteDocument(uri), /Error deleting document "doc:\/\/err": Error: delete error/);
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
it('wraps error when deleting text file fails and ignores metadata deletion failures', async () => {
|
|
259
|
+
const uri = 'doc://bad-delete';
|
|
260
|
+
const documentId = 'id-text-err';
|
|
261
|
+
(index as any)._catalog = { version: 1, count: 1, uriToId: { [uri]: documentId }, idToUri: { [documentId]: uri } };
|
|
262
|
+
(index as any).listItemsByMetadata = async () => [];
|
|
263
|
+
(index as any).deleteItem = async () => { };
|
|
264
|
+
const originalDelete = storage.deleteFile.bind(storage);
|
|
265
|
+
storage.deleteFile = async (filePath: string) => {
|
|
266
|
+
if (filePath.endsWith('.txt')) throw new Error('delete text error');
|
|
267
|
+
return originalDelete(filePath);
|
|
268
|
+
};
|
|
269
|
+
await assert.rejects(index.deleteDocument(uri), /Error removing text file for document "doc:\/\/bad-delete" from disk: Error: delete text error/);
|
|
270
|
+
|
|
271
|
+
(index as any)._catalog = { version: 1, count: 1, uriToId: { [uri]: documentId }, idToUri: { [documentId]: uri } };
|
|
272
|
+
storage.files.set(path.join(folderPath, `${documentId}.txt`), 'x');
|
|
273
|
+
storage.files.set(path.join(folderPath, `${documentId}.json`), '{}');
|
|
274
|
+
storage.deleteFile = async (filePath: string) => {
|
|
275
|
+
if (filePath.endsWith('.json')) throw new Error('delete json error');
|
|
276
|
+
return originalDelete(filePath);
|
|
277
|
+
};
|
|
278
|
+
await index.deleteDocument(uri);
|
|
279
|
+
assert(!storage.files.has(path.join(folderPath, `${documentId}.txt`)));
|
|
280
|
+
});
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
describe('upsertDocument', () => {
|
|
284
|
+
it('throws if embeddings not configured', async () => {
|
|
285
|
+
const idx = new LocalDocumentIndex({ folderPath, storage, tokenizer });
|
|
286
|
+
await assert.rejects(idx.upsertDocument('uri', 'text'), /Embeddings model not configured/);
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
it('deletes existing document when re-inserting same uri', async () => {
|
|
290
|
+
const uri = 'doc://exists';
|
|
291
|
+
const existingId = 'doc-1';
|
|
292
|
+
(index as any)._catalog = { version: 1, count: 1, uriToId: { [uri]: existingId }, idToUri: { [existingId]: uri } };
|
|
293
|
+
let deleteCalled = false;
|
|
294
|
+
(index as any).deleteDocument = async () => { deleteCalled = true; };
|
|
295
|
+
(index as any).insertItem = async () => { };
|
|
296
|
+
embeddings.createEmbeddingsResponses.push({ status: 'success', output: [[0.01, 0.02]] });
|
|
297
|
+
const doc = await index.upsertDocument(uri, 'hello world');
|
|
298
|
+
assert(deleteCalled);
|
|
299
|
+
assert.equal(doc.uri, uri);
|
|
300
|
+
});
|
|
301
|
+
|
|
302
|
+
it('infers docType from uri when not provided and writes files, updates catalog', async () => {
|
|
303
|
+
(index as any).insertItem = async () => { };
|
|
304
|
+
embeddings.createEmbeddingsResponses.push({ status: 'success', output: [[0.1, 0.2]] });
|
|
305
|
+
const metadata = { author: 'test' };
|
|
306
|
+
const doc = await index.upsertDocument('file.md', 'content here', undefined, metadata);
|
|
307
|
+
const textPath = path.join(folderPath, `${doc.id}.txt`);
|
|
308
|
+
const metaPath = path.join(folderPath, `${doc.id}.json`);
|
|
309
|
+
assert(storage.files.has(textPath));
|
|
310
|
+
assert(storage.files.has(metaPath));
|
|
311
|
+
assert.equal((index as any)._catalog.count, 1);
|
|
312
|
+
assert.equal((index as any)._catalog.idToUri[doc.id], 'file.md');
|
|
313
|
+
assert.equal((index as any)._catalog.uriToId['file.md'], doc.id);
|
|
314
|
+
});
|
|
315
|
+
|
|
316
|
+
it('batches chunk embeddings using embeddings.maxTokens and calls createEmbeddings per batch', async () => {
|
|
317
|
+
const longText = 'a '.repeat(30);
|
|
318
|
+
const calls: (string | string[])[] = [];
|
|
319
|
+
embeddings.createEmbeddings = async (inputs: string | string[]) => {
|
|
320
|
+
calls.push(inputs);
|
|
321
|
+
const arr = Array.isArray(inputs) ? inputs : [inputs];
|
|
322
|
+
return { status: 'success', output: arr.map(() => [0.1, 0.2]) };
|
|
323
|
+
};
|
|
324
|
+
(index as any).insertItem = async () => { };
|
|
325
|
+
await index.upsertDocument('file.txt', longText);
|
|
326
|
+
assert(calls.length > 1);
|
|
327
|
+
});
|
|
328
|
+
|
|
329
|
+
it('wraps embedding generation errors and non-success statuses', async () => {
|
|
330
|
+
embeddings.createEmbeddings = async () => { throw new Error('fail'); };
|
|
331
|
+
await assert.rejects(index.upsertDocument('u', 't'), /Error generating embeddings: Error: fail/);
|
|
332
|
+
embeddings.createEmbeddings = async () => ({ status: 'error', message: 'bad' } as any);
|
|
333
|
+
await assert.rejects(index.upsertDocument('u', 't'), /Error generating embeddings: bad/);
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
it('cancels update and throws when insertItem fails', async () => {
|
|
337
|
+
embeddings.createEmbeddingsResponses.push({ status: 'success', output: [[0.1, 0.2]] });
|
|
338
|
+
(index as any).insertItem = async () => { throw new Error('insert error'); };
|
|
339
|
+
await assert.rejects(index.upsertDocument('u', 't'), /Error adding document "u": Error: insert error/);
|
|
340
|
+
});
|
|
341
|
+
});
|
|
342
|
+
|
|
343
|
+
describe('listDocuments', () => {
|
|
344
|
+
it('returns empty list when no chunks', async () => {
|
|
345
|
+
(index as any).listItems = async () => [];
|
|
346
|
+
const docs = await index.listDocuments();
|
|
347
|
+
assert.deepEqual(docs, []);
|
|
348
|
+
});
|
|
349
|
+
|
|
350
|
+
it('groups chunks per document and produces LocalDocumentResult', async () => {
|
|
351
|
+
const docId = 'idx-1';
|
|
352
|
+
const uri = 'doc://1';
|
|
353
|
+
(index as any)._catalog = { version: 1, count: 1, uriToId: { [uri]: docId }, idToUri: { [docId]: uri } };
|
|
354
|
+
(index as any).listItems = async () => [
|
|
355
|
+
{ id: 'c1', metadata: { documentId: docId, startPos: 0, endPos: 9 }, vector: [0], norm: 1 },
|
|
356
|
+
{ id: 'c2', metadata: { documentId: docId, startPos: 10, endPos: 19 }, vector: [0], norm: 1 },
|
|
357
|
+
];
|
|
358
|
+
const docs = await index.listDocuments();
|
|
359
|
+
assert.equal(docs.length, 1);
|
|
360
|
+
assert.equal(docs[0].id, docId);
|
|
361
|
+
assert.equal(docs[0].uri, uri);
|
|
362
|
+
assert.ok(docs[0].score >= 0);
|
|
363
|
+
});
|
|
364
|
+
});
|
|
365
|
+
|
|
366
|
+
describe('queryDocuments', () => {
|
|
367
|
+
it('throws if embeddings not configured', async () => {
|
|
368
|
+
const idx = new LocalDocumentIndex({ folderPath, storage, tokenizer });
|
|
369
|
+
await assert.rejects(idx.queryDocuments('q'), /Embeddings model not configured/);
|
|
370
|
+
});
|
|
371
|
+
|
|
372
|
+
it('uses default options, normalizes query for embeddings, and forwards to queryItems', async () => {
|
|
373
|
+
let received: { emb: number[]; query: string; maxChunks: number; filter: any; isBm25?: boolean } | undefined;
|
|
374
|
+
embeddings.createEmbeddingsResponses.push({ status: 'success', output: [[0.5, 0.6]] });
|
|
375
|
+
(index as any).queryItems = async (emb: number[], q: string, maxChunks: number, filter: any, isBm25?: boolean) => {
|
|
376
|
+
received = { emb, query: q, maxChunks, filter, isBm25 };
|
|
377
|
+
return [];
|
|
378
|
+
};
|
|
379
|
+
await index.queryDocuments('a\nb\nc');
|
|
380
|
+
assert(received);
|
|
381
|
+
assert.equal(received!.query, 'a\nb\nc');
|
|
382
|
+
assert.equal(received!.maxChunks, 50);
|
|
383
|
+
assert.equal(received!.filter, undefined);
|
|
384
|
+
assert.equal(received!.isBm25, undefined);
|
|
385
|
+
const lastEmbCall = (embeddings.createEmbeddingsCalls.pop() as string).toString();
|
|
386
|
+
assert.equal(lastEmbCall, 'a b c');
|
|
387
|
+
});
|
|
388
|
+
|
|
389
|
+
it('wraps errors in query embedding generation and non-success status', async () => {
|
|
390
|
+
embeddings.createEmbeddings = async () => { throw new Error('fail'); };
|
|
391
|
+
await assert.rejects(index.queryDocuments('q'), /Error generating embeddings for query: Error: fail/);
|
|
392
|
+
embeddings.createEmbeddings = async () => ({ status: 'error', message: 'bad' } as any);
|
|
393
|
+
await assert.rejects(index.queryDocuments('q'), /Error generating embeddings for query: bad/);
|
|
394
|
+
});
|
|
395
|
+
|
|
396
|
+
it('groups results by document, sorts by score, returns top N, and forwards filter/isBm25', async () => {
|
|
397
|
+
embeddings.createEmbeddingsResponses.push({ status: 'success', output: [[0.9, 0.8]] });
|
|
398
|
+
const docId1 = 'd1', docId2 = 'd2';
|
|
399
|
+
(index as any)._catalog = { version: 1, count: 2, uriToId: {}, idToUri: { [docId1]: 'u1', [docId2]: 'u2' } };
|
|
400
|
+
let forwarded = { filter: undefined as any, isBm25: undefined as any };
|
|
401
|
+
(index as any).queryItems = async (_e: number[], q: string, k: number, f: any, isBm25?: boolean) => {
|
|
402
|
+
forwarded.filter = f;
|
|
403
|
+
forwarded.isBm25 = isBm25;
|
|
404
|
+
return [
|
|
405
|
+
{ item: { id: 'c1', metadata: { documentId: docId1 }, vector: [0], norm: 1 }, score: 0.85 },
|
|
406
|
+
{ item: { id: 'c2', metadata: { documentId: docId2 }, vector: [0], norm: 1 }, score: 0.95 },
|
|
407
|
+
{ item: { id: 'c3', metadata: { documentId: docId1 }, vector: [0], norm: 1 }, score: 0.8 },
|
|
408
|
+
];
|
|
409
|
+
};
|
|
410
|
+
const results = await index.queryDocuments('q', { maxDocuments: 1, filter: { key: 'v' } as any, isBm25: true });
|
|
411
|
+
assert.equal(results.length, 1);
|
|
412
|
+
assert.equal(results[0].id, docId2);
|
|
413
|
+
assert.deepEqual(forwarded.filter, { key: 'v' });
|
|
414
|
+
assert.equal(forwarded.isBm25, true);
|
|
415
|
+
});
|
|
416
|
+
});
|
|
417
|
+
|
|
418
|
+
describe('update transaction overrides', () => {
|
|
419
|
+
it('beginUpdate clones catalog; cancelUpdate clears; endUpdate persists and clears new catalog', async () => {
|
|
420
|
+
(index as any)._catalog = { version: 1, count: 0, uriToId: {}, idToUri: {} };
|
|
421
|
+
await index.beginUpdate();
|
|
422
|
+
assert.notEqual((index as any)._newCatalog, (index as any)._catalog);
|
|
423
|
+
assert.deepEqual((index as any)._newCatalog, (index as any)._catalog);
|
|
424
|
+
index.cancelUpdate();
|
|
425
|
+
assert.equal((index as any)._newCatalog, undefined);
|
|
426
|
+
await index.beginUpdate();
|
|
427
|
+
(index as any)._newCatalog.count = 1;
|
|
428
|
+
await index.endUpdate();
|
|
429
|
+
const catalogPath = path.join(folderPath, 'catalog.json');
|
|
430
|
+
assert(storage.upsertFileCalls.some(c => c.path === catalogPath));
|
|
431
|
+
assert.equal((index as any)._newCatalog, undefined);
|
|
432
|
+
assert.equal((index as any)._catalog.count, 1);
|
|
433
|
+
});
|
|
434
|
+
|
|
435
|
+
it('endUpdate wraps storage errors when saving catalog', async () => {
|
|
436
|
+
(index as any)._catalog = { version: 1, count: 0, uriToId: {}, idToUri: {} };
|
|
437
|
+
await index.beginUpdate();
|
|
438
|
+
const orig = storage.upsertFile.bind(storage);
|
|
439
|
+
storage.upsertFile = async (p: string, c: any) => {
|
|
440
|
+
if (p.endsWith('index.json')) return orig(p, c);
|
|
441
|
+
if (p.endsWith('catalog.json')) throw new Error('fail');
|
|
442
|
+
return orig(p, c);
|
|
443
|
+
};
|
|
444
|
+
await assert.rejects(index.endUpdate(), /Error saving document catalog: Error: fail/);
|
|
445
|
+
assert((index as any)._newCatalog);
|
|
446
|
+
});
|
|
447
|
+
|
|
448
|
+
it('createIndex calls super then loads catalog', async () => {
|
|
449
|
+
let loadCalled = false;
|
|
450
|
+
(index as any).loadIndexData = async () => { loadCalled = true; };
|
|
451
|
+
await index.createIndex({ version: 1, deleteIfExists: true }); // ensure we don't fail if index.json exists [[2]]
|
|
452
|
+
assert(loadCalled);
|
|
453
|
+
});
|
|
454
|
+
});
|
|
455
|
+
|
|
456
|
+
describe('file path correctness across operations', () => {
|
|
457
|
+
it('uses correct joined paths for catalog, text, and metadata files', async () => {
|
|
458
|
+
const uri = 'doc://paths.txt';
|
|
459
|
+
const docId = 'id-paths';
|
|
460
|
+
(index as any)._catalog = { version: 1, count: 1, uriToId: { [uri]: docId }, idToUri: { [docId]: uri } };
|
|
461
|
+
storage.files.set(path.join(folderPath, `${docId}.txt`), 'text');
|
|
462
|
+
storage.files.set(path.join(folderPath, `${docId}.json`), '{}');
|
|
463
|
+
(index as any).listItemsByMetadata = async () => [];
|
|
464
|
+
(index as any).deleteItem = async () => { };
|
|
465
|
+
await index.deleteDocument(uri);
|
|
466
|
+
assert(storage.deleteFileCalls.includes(path.join(folderPath, `${docId}.txt`)));
|
|
467
|
+
assert(storage.deleteFileCalls.includes(path.join(folderPath, `${docId}.json`)));
|
|
468
|
+
(index as any).insertItem = async () => { };
|
|
469
|
+
embeddings.createEmbeddingsResponses.push({ status: 'success', output: [[0.1, 0.2]] });
|
|
470
|
+
const newDoc = await index.upsertDocument(uri, 'text', 'txt', { key: 'value' });
|
|
471
|
+
const metaPath = path.join(folderPath, `${newDoc.id}.json`);
|
|
472
|
+
const textPath = path.join(folderPath, `${newDoc.id}.txt`);
|
|
473
|
+
assert(storage.upsertFileCalls.some(c => c.path === metaPath));
|
|
474
|
+
assert(storage.upsertFileCalls.some(c => c.path === textPath));
|
|
475
|
+
await index.beginUpdate();
|
|
476
|
+
await index.endUpdate();
|
|
477
|
+
const catalogPath = path.join(folderPath, 'catalog.json');
|
|
478
|
+
assert(storage.upsertFileCalls.some(c => c.path === catalogPath));
|
|
479
|
+
});
|
|
480
|
+
});
|
|
481
|
+
});
|