vectra 0.12.2 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +92 -100
- package/bin/vectra.js +3 -0
- package/lib/BrowserWebFetcher.d.ts +75 -0
- package/lib/BrowserWebFetcher.d.ts.map +1 -0
- package/lib/BrowserWebFetcher.js +290 -0
- package/lib/BrowserWebFetcher.js.map +1 -0
- package/lib/FileFetcher.d.ts +5 -0
- package/lib/FileFetcher.d.ts.map +1 -0
- package/lib/FileFetcher.js +89 -0
- package/lib/FileFetcher.js.map +1 -0
- package/lib/FileFetcher.spec.d.ts +2 -0
- package/lib/FileFetcher.spec.d.ts.map +1 -0
- package/lib/FileFetcher.spec.js +244 -0
- package/lib/FileFetcher.spec.js.map +1 -0
- package/lib/FolderWatcher.d.ts +91 -0
- package/lib/FolderWatcher.d.ts.map +1 -0
- package/lib/FolderWatcher.js +304 -0
- package/lib/FolderWatcher.js.map +1 -0
- package/lib/FolderWatcher.spec.d.ts +2 -0
- package/lib/FolderWatcher.spec.d.ts.map +1 -0
- package/lib/FolderWatcher.spec.js +308 -0
- package/lib/FolderWatcher.spec.js.map +1 -0
- package/lib/GPT3Tokenizer.d.ts +9 -0
- package/lib/GPT3Tokenizer.spec.d.ts +2 -0
- package/lib/GPT3Tokenizer.spec.d.ts.map +1 -0
- package/lib/GPT3Tokenizer.spec.js +45 -0
- package/lib/GPT3Tokenizer.spec.js.map +1 -0
- package/lib/ItemSelector.d.ts +41 -0
- package/lib/ItemSelector.d.ts.map +1 -0
- package/lib/ItemSelector.js +179 -0
- package/lib/ItemSelector.js.map +1 -0
- package/lib/ItemSelector.spec.d.ts +2 -0
- package/lib/ItemSelector.spec.d.ts.map +1 -0
- package/lib/ItemSelector.spec.js +204 -0
- package/lib/ItemSelector.spec.js.map +1 -0
- package/lib/LocalDocument.d.ts +54 -0
- package/lib/LocalDocument.d.ts.map +1 -1
- package/lib/LocalDocument.js +116 -0
- package/lib/LocalDocument.js.map +1 -0
- package/lib/LocalDocument.spec.d.ts +2 -0
- package/lib/LocalDocument.spec.d.ts.map +1 -0
- package/lib/LocalDocument.spec.js +214 -0
- package/lib/LocalDocument.spec.js.map +1 -0
- package/lib/LocalDocumentIndex.d.ts +152 -0
- package/lib/LocalDocumentIndex.d.ts.map +1 -1
- package/lib/LocalDocumentIndex.js +420 -0
- package/lib/LocalDocumentIndex.js.map +1 -0
- package/lib/LocalDocumentIndex.spec.d.ts +2 -0
- package/lib/LocalDocumentIndex.spec.d.ts.map +1 -0
- package/lib/LocalDocumentIndex.spec.js +494 -0
- package/lib/LocalDocumentIndex.spec.js.map +1 -0
- package/lib/LocalDocumentResult.d.ts +66 -0
- package/lib/LocalDocumentResult.d.ts.map +1 -1
- package/lib/LocalDocumentResult.js +376 -0
- package/lib/LocalDocumentResult.js.map +1 -0
- package/lib/LocalDocumentResult.spec.d.ts +2 -0
- package/lib/LocalDocumentResult.spec.d.ts.map +1 -0
- package/lib/LocalDocumentResult.spec.js +373 -0
- package/lib/LocalDocumentResult.spec.js.map +1 -0
- package/lib/LocalEmbeddings.d.ts +59 -0
- package/lib/LocalEmbeddings.d.ts.map +1 -0
- package/lib/LocalEmbeddings.js +101 -0
- package/lib/LocalEmbeddings.js.map +1 -0
- package/lib/LocalEmbeddings.spec.d.ts +2 -0
- package/lib/LocalEmbeddings.spec.d.ts.map +1 -0
- package/lib/LocalEmbeddings.spec.js +155 -0
- package/lib/LocalEmbeddings.spec.js.map +1 -0
- package/lib/LocalIndex.d.ts +159 -0
- package/lib/LocalIndex.d.ts.map +1 -1
- package/lib/LocalIndex.js +519 -0
- package/lib/LocalIndex.js.map +1 -0
- package/lib/LocalIndex.spec.d.ts +2 -0
- package/lib/LocalIndex.spec.js +611 -9
- package/lib/LocalIndex.spec.js.map +1 -1
- package/lib/OpenAIEmbeddings.d.ts +124 -0
- package/lib/OpenAIEmbeddings.d.ts.map +1 -0
- package/lib/OpenAIEmbeddings.js +166 -0
- package/lib/OpenAIEmbeddings.js.map +1 -0
- package/lib/OpenAIEmbeddings.spec.d.ts +2 -0
- package/lib/OpenAIEmbeddings.spec.d.ts.map +1 -0
- package/lib/OpenAIEmbeddings.spec.js +298 -0
- package/lib/OpenAIEmbeddings.spec.js.map +1 -0
- package/lib/TextSplitter.d.ts +21 -0
- package/lib/TextSplitter.d.ts.map +1 -1
- package/lib/TextSplitter.js +500 -0
- package/lib/TextSplitter.js.map +1 -0
- package/lib/TextSplitter.spec.d.ts +2 -0
- package/lib/TextSplitter.spec.d.ts.map +1 -0
- package/lib/TextSplitter.spec.js +337 -0
- package/lib/TextSplitter.spec.js.map +1 -0
- package/lib/TransformersEmbeddings.d.ts +121 -0
- package/lib/TransformersEmbeddings.d.ts.map +1 -0
- package/lib/TransformersEmbeddings.js +176 -0
- package/lib/TransformersEmbeddings.js.map +1 -0
- package/lib/TransformersEmbeddings.spec.d.ts +2 -0
- package/lib/TransformersEmbeddings.spec.d.ts.map +1 -0
- package/lib/TransformersEmbeddings.spec.js +198 -0
- package/lib/TransformersEmbeddings.spec.js.map +1 -0
- package/lib/TransformersTokenizer.d.ts +33 -0
- package/lib/TransformersTokenizer.d.ts.map +1 -0
- package/lib/TransformersTokenizer.js +44 -0
- package/lib/TransformersTokenizer.js.map +1 -0
- package/lib/TransformersTokenizer.spec.d.ts +2 -0
- package/lib/TransformersTokenizer.spec.d.ts.map +1 -0
- package/lib/TransformersTokenizer.spec.js +112 -0
- package/lib/TransformersTokenizer.spec.js.map +1 -0
- package/lib/WebFetcher.d.ts +14 -0
- package/lib/WebFetcher.d.ts.map +1 -0
- package/lib/WebFetcher.js +238 -0
- package/lib/WebFetcher.js.map +1 -0
- package/lib/WebFetcher.spec.d.ts +2 -0
- package/lib/WebFetcher.spec.d.ts.map +1 -0
- package/lib/WebFetcher.spec.js +263 -0
- package/lib/WebFetcher.spec.js.map +1 -0
- package/lib/browser.d.ts +30 -0
- package/lib/browser.d.ts.map +1 -0
- package/lib/browser.js +52 -0
- package/lib/browser.js.map +1 -0
- package/lib/codecs/IndexCodec.d.ts +37 -0
- package/lib/codecs/IndexCodec.d.ts.map +1 -0
- package/lib/codecs/IndexCodec.js +3 -0
- package/lib/codecs/IndexCodec.js.map +1 -0
- package/lib/codecs/JsonCodec.d.ts +19 -0
- package/lib/codecs/JsonCodec.d.ts.map +1 -0
- package/lib/codecs/JsonCodec.js +35 -0
- package/lib/codecs/JsonCodec.js.map +1 -0
- package/lib/codecs/JsonCodec.spec.d.ts +2 -0
- package/lib/codecs/JsonCodec.spec.d.ts.map +1 -0
- package/lib/codecs/JsonCodec.spec.js +66 -0
- package/lib/codecs/JsonCodec.spec.js.map +1 -0
- package/lib/codecs/LocalIndex.protobuf.spec.d.ts +2 -0
- package/lib/codecs/LocalIndex.protobuf.spec.d.ts.map +1 -0
- package/lib/codecs/LocalIndex.protobuf.spec.js +108 -0
- package/lib/codecs/LocalIndex.protobuf.spec.js.map +1 -0
- package/lib/codecs/ProtobufCodec.d.ts +20 -0
- package/lib/codecs/ProtobufCodec.d.ts.map +1 -0
- package/lib/codecs/ProtobufCodec.js +225 -0
- package/lib/codecs/ProtobufCodec.js.map +1 -0
- package/lib/codecs/ProtobufCodec.spec.d.ts +2 -0
- package/lib/codecs/ProtobufCodec.spec.d.ts.map +1 -0
- package/lib/codecs/ProtobufCodec.spec.js +155 -0
- package/lib/codecs/ProtobufCodec.spec.js.map +1 -0
- package/lib/codecs/index.d.ts +5 -0
- package/lib/codecs/index.d.ts.map +1 -0
- package/lib/codecs/index.js +21 -0
- package/lib/codecs/index.js.map +1 -0
- package/lib/codecs/migrateIndex.d.ts +24 -0
- package/lib/codecs/migrateIndex.d.ts.map +1 -0
- package/lib/codecs/migrateIndex.js +119 -0
- package/lib/codecs/migrateIndex.js.map +1 -0
- package/lib/codecs/migrateIndex.spec.d.ts +2 -0
- package/lib/codecs/migrateIndex.spec.d.ts.map +1 -0
- package/lib/codecs/migrateIndex.spec.js +151 -0
- package/lib/codecs/migrateIndex.spec.js.map +1 -0
- package/lib/codecs/schemas/index.proto +34 -0
- package/lib/index.d.ts +20 -0
- package/lib/index.d.ts.map +1 -1
- package/lib/index.js +36 -0
- package/lib/index.js.map +1 -0
- package/lib/internals/Colorize.d.ts +14 -0
- package/lib/internals/Colorize.d.ts.map +1 -0
- package/lib/internals/Colorize.js +69 -0
- package/lib/internals/Colorize.js.map +1 -0
- package/lib/internals/index.d.ts +3 -0
- package/lib/internals/index.d.ts.map +1 -0
- package/lib/internals/index.js +19 -0
- package/lib/internals/index.js.map +1 -0
- package/lib/internals/types.d.ts +43 -0
- package/lib/internals/types.d.ts.map +1 -0
- package/lib/internals/types.js +3 -0
- package/lib/internals/types.js.map +1 -0
- package/lib/server/IndexManager.d.ts +78 -0
- package/lib/server/IndexManager.d.ts.map +1 -0
- package/lib/server/IndexManager.js +259 -0
- package/lib/server/IndexManager.js.map +1 -0
- package/lib/server/VectraServer.d.ts +40 -0
- package/lib/server/VectraServer.d.ts.map +1 -0
- package/lib/server/VectraServer.js +151 -0
- package/lib/server/VectraServer.js.map +1 -0
- package/lib/server/VectraServer.spec.d.ts +2 -0
- package/lib/server/VectraServer.spec.d.ts.map +1 -0
- package/lib/server/VectraServer.spec.js +322 -0
- package/lib/server/VectraServer.spec.js.map +1 -0
- package/lib/server/handlers/documentHandlers.d.ts +15 -0
- package/lib/server/handlers/documentHandlers.d.ts.map +1 -0
- package/lib/server/handlers/documentHandlers.js +95 -0
- package/lib/server/handlers/documentHandlers.js.map +1 -0
- package/lib/server/handlers/helpers.d.ts +23 -0
- package/lib/server/handlers/helpers.d.ts.map +1 -0
- package/lib/server/handlers/helpers.js +138 -0
- package/lib/server/handlers/helpers.js.map +1 -0
- package/lib/server/handlers/index.d.ts +8 -0
- package/lib/server/handlers/index.d.ts.map +1 -0
- package/lib/server/handlers/index.js +22 -0
- package/lib/server/handlers/index.js.map +1 -0
- package/lib/server/handlers/indexHandlers.d.ts +14 -0
- package/lib/server/handlers/indexHandlers.d.ts.map +1 -0
- package/lib/server/handlers/indexHandlers.js +85 -0
- package/lib/server/handlers/indexHandlers.js.map +1 -0
- package/lib/server/handlers/itemHandlers.d.ts +34 -0
- package/lib/server/handlers/itemHandlers.d.ts.map +1 -0
- package/lib/server/handlers/itemHandlers.js +166 -0
- package/lib/server/handlers/itemHandlers.js.map +1 -0
- package/lib/server/handlers/lifecycleHandlers.d.ts +11 -0
- package/lib/server/handlers/lifecycleHandlers.d.ts.map +1 -0
- package/lib/server/handlers/lifecycleHandlers.js +31 -0
- package/lib/server/handlers/lifecycleHandlers.js.map +1 -0
- package/lib/server/handlers/queryHandlers.d.ts +27 -0
- package/lib/server/handlers/queryHandlers.d.ts.map +1 -0
- package/lib/server/handlers/queryHandlers.js +135 -0
- package/lib/server/handlers/queryHandlers.js.map +1 -0
- package/lib/server/handlers/statsHandlers.d.ts +17 -0
- package/lib/server/handlers/statsHandlers.d.ts.map +1 -0
- package/lib/server/handlers/statsHandlers.js +81 -0
- package/lib/server/handlers/statsHandlers.js.map +1 -0
- package/lib/server/index.d.ts +4 -0
- package/lib/server/index.d.ts.map +1 -0
- package/lib/server/index.js +23 -0
- package/lib/server/index.js.map +1 -0
- package/lib/storage/FileStorage.d.ts +92 -0
- package/lib/storage/FileStorage.d.ts.map +1 -0
- package/lib/storage/FileStorage.js +3 -0
- package/lib/storage/FileStorage.js.map +1 -0
- package/lib/storage/FileStorageUtilities.d.ts +36 -0
- package/lib/storage/FileStorageUtilities.d.ts.map +1 -0
- package/lib/storage/FileStorageUtilities.js +91 -0
- package/lib/storage/FileStorageUtilities.js.map +1 -0
- package/lib/storage/FileStorageUtilities.spec.d.ts +2 -0
- package/lib/storage/FileStorageUtilities.spec.d.ts.map +1 -0
- package/lib/storage/FileStorageUtilities.spec.js +98 -0
- package/lib/storage/FileStorageUtilities.spec.js.map +1 -0
- package/lib/storage/FileType.d.ts +29 -0
- package/lib/storage/FileType.d.ts.map +1 -0
- package/lib/storage/FileType.js +38 -0
- package/lib/storage/FileType.js.map +1 -0
- package/lib/storage/IndexedDBStorage.d.ts +47 -0
- package/lib/storage/IndexedDBStorage.d.ts.map +1 -0
- package/lib/storage/IndexedDBStorage.js +347 -0
- package/lib/storage/IndexedDBStorage.js.map +1 -0
- package/lib/storage/LocalFileStorage.browser.d.ts +19 -0
- package/lib/storage/LocalFileStorage.browser.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.browser.js +43 -0
- package/lib/storage/LocalFileStorage.browser.js.map +1 -0
- package/lib/storage/LocalFileStorage.d.ts +23 -0
- package/lib/storage/LocalFileStorage.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.js +152 -0
- package/lib/storage/LocalFileStorage.js.map +1 -0
- package/lib/storage/LocalFileStorage.spec.d.ts +2 -0
- package/lib/storage/LocalFileStorage.spec.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.spec.js +249 -0
- package/lib/storage/LocalFileStorage.spec.js.map +1 -0
- package/lib/storage/VirtualFileStorage.d.ts +18 -0
- package/lib/storage/VirtualFileStorage.d.ts.map +1 -0
- package/lib/storage/VirtualFileStorage.js +178 -0
- package/lib/storage/VirtualFileStorage.js.map +1 -0
- package/lib/storage/VirtualFileStorage.spec.d.ts +2 -0
- package/lib/storage/VirtualFileStorage.spec.d.ts.map +1 -0
- package/lib/storage/VirtualFileStorage.spec.js +302 -0
- package/lib/storage/VirtualFileStorage.spec.js.map +1 -0
- package/lib/storage/index.d.ts +6 -0
- package/lib/storage/index.d.ts.map +1 -0
- package/lib/storage/index.js +22 -0
- package/lib/storage/index.js.map +1 -0
- package/lib/templates/templates/csharp/README.md +48 -0
- package/lib/templates/templates/csharp/VectraClient.cs +234 -0
- package/lib/templates/templates/go/README.md +71 -0
- package/lib/templates/templates/go/vectra_client.go +322 -0
- package/lib/templates/templates/java/README.md +81 -0
- package/lib/templates/templates/java/VectraClient.java +232 -0
- package/lib/templates/templates/python/README.md +37 -0
- package/lib/templates/templates/python/vectra_client.py +279 -0
- package/lib/templates/templates/rust/Cargo.toml +14 -0
- package/lib/templates/templates/rust/README.md +39 -0
- package/lib/templates/templates/rust/build.rs +4 -0
- package/lib/templates/templates/rust/lib.rs +284 -0
- package/lib/templates/templates/typescript/README.md +96 -0
- package/lib/templates/templates/typescript/VectraClient.ts +374 -0
- package/lib/templates/typescript/VectraClient.d.ts +114 -0
- package/lib/templates/typescript/VectraClient.d.ts.map +1 -0
- package/lib/templates/typescript/VectraClient.js +328 -0
- package/lib/templates/typescript/VectraClient.js.map +1 -0
- package/lib/types.d.ts +153 -0
- package/lib/types.d.ts.map +1 -0
- package/lib/types.js +3 -0
- package/lib/types.js.map +1 -0
- package/lib/utils/index.d.ts +2 -0
- package/lib/utils/index.d.ts.map +1 -0
- package/lib/utils/index.js +18 -0
- package/lib/utils/index.js.map +1 -0
- package/lib/utils/pathUtils.d.ts +40 -0
- package/lib/utils/pathUtils.d.ts.map +1 -0
- package/lib/utils/pathUtils.js +98 -0
- package/lib/utils/pathUtils.js.map +1 -0
- package/lib/vectra-cli.d.ts +2 -0
- package/lib/vectra-cli.d.ts.map +1 -1
- package/lib/vectra-cli.generate.spec.d.ts +2 -0
- package/lib/vectra-cli.generate.spec.d.ts.map +1 -0
- package/lib/vectra-cli.generate.spec.js +112 -0
- package/lib/vectra-cli.generate.spec.js.map +1 -0
- package/lib/vectra-cli.js +760 -0
- package/lib/vectra-cli.js.map +1 -0
- package/lib/vectra-cli.spec.d.ts +1 -0
- package/lib/vectra-cli.spec.d.ts.map +1 -0
- package/lib/vectra-cli.spec.js +2 -0
- package/lib/vectra-cli.spec.js.map +1 -0
- package/package.json +91 -16
- package/proto/vectra_service.proto +276 -0
- package/src/BrowserWebFetcher.ts +345 -0
- package/src/FileFetcher.spec.ts +234 -0
- package/src/FileFetcher.ts +37 -25
- package/src/FolderWatcher.spec.ts +288 -0
- package/src/FolderWatcher.ts +304 -0
- package/src/GPT3Tokenizer.spec.ts +50 -0
- package/src/ItemSelector.spec.ts +252 -0
- package/src/ItemSelector.ts +163 -150
- package/src/LocalDocument.spec.ts +211 -0
- package/src/LocalDocument.ts +88 -94
- package/src/LocalDocumentIndex.spec.ts +481 -0
- package/src/LocalDocumentIndex.ts +39 -40
- package/src/LocalDocumentResult.spec.ts +373 -0
- package/src/LocalDocumentResult.ts +489 -319
- package/src/LocalEmbeddings.spec.ts +138 -0
- package/src/LocalEmbeddings.ts +120 -0
- package/src/LocalIndex.spec.ts +808 -66
- package/src/LocalIndex.ts +479 -429
- package/src/OpenAIEmbeddings.spec.ts +354 -0
- package/src/OpenAIEmbeddings.ts +26 -27
- package/src/TextSplitter.spec.ts +342 -0
- package/src/TextSplitter.ts +517 -532
- package/src/TransformersEmbeddings.spec.ts +188 -0
- package/src/TransformersEmbeddings.ts +232 -0
- package/src/TransformersTokenizer.spec.ts +143 -0
- package/src/TransformersTokenizer.ts +45 -0
- package/src/WebFetcher.spec.ts +288 -0
- package/src/WebFetcher.ts +184 -186
- package/src/browser.ts +69 -0
- package/src/codecs/IndexCodec.ts +40 -0
- package/src/codecs/JsonCodec.spec.ts +70 -0
- package/src/codecs/JsonCodec.ts +37 -0
- package/src/codecs/LocalIndex.protobuf.spec.ts +115 -0
- package/src/codecs/ProtobufCodec.spec.ts +166 -0
- package/src/codecs/ProtobufCodec.ts +193 -0
- package/src/codecs/index.ts +4 -0
- package/src/codecs/migrateIndex.spec.ts +176 -0
- package/src/codecs/migrateIndex.ts +125 -0
- package/src/codecs/schemas/index.proto +34 -0
- package/src/index.ts +9 -1
- package/src/internals/Colorize.ts +19 -16
- package/src/server/IndexManager.ts +243 -0
- package/src/server/VectraServer.spec.ts +303 -0
- package/src/server/VectraServer.ts +156 -0
- package/src/server/handlers/documentHandlers.ts +59 -0
- package/src/server/handlers/helpers.ts +93 -0
- package/src/server/handlers/index.ts +7 -0
- package/src/server/handlers/indexHandlers.ts +44 -0
- package/src/server/handlers/itemHandlers.ts +140 -0
- package/src/server/handlers/lifecycleHandlers.ts +26 -0
- package/src/server/handlers/queryHandlers.ts +96 -0
- package/src/server/handlers/statsHandlers.ts +38 -0
- package/src/server/index.ts +3 -0
- package/src/storage/FileStorage.ts +105 -0
- package/src/storage/FileStorageUtilities.spec.ts +106 -0
- package/src/storage/FileStorageUtilities.ts +77 -0
- package/src/storage/FileType.ts +61 -0
- package/src/storage/IndexedDBStorage.ts +365 -0
- package/src/storage/LocalFileStorage.browser.ts +52 -0
- package/src/storage/LocalFileStorage.spec.ts +292 -0
- package/src/storage/LocalFileStorage.ts +98 -0
- package/src/storage/VirtualFileStorage.spec.ts +307 -0
- package/src/storage/VirtualFileStorage.ts +169 -0
- package/src/storage/index.ts +5 -0
- package/src/templates/csharp/README.md +48 -0
- package/src/templates/csharp/VectraClient.cs +234 -0
- package/src/templates/go/README.md +71 -0
- package/src/templates/go/vectra_client.go +322 -0
- package/src/templates/java/README.md +81 -0
- package/src/templates/java/VectraClient.java +232 -0
- package/src/templates/python/README.md +37 -0
- package/src/templates/python/vectra_client.py +279 -0
- package/src/templates/rust/Cargo.toml +14 -0
- package/src/templates/rust/README.md +39 -0
- package/src/templates/rust/build.rs +4 -0
- package/src/templates/rust/lib.rs +284 -0
- package/src/templates/typescript/README.md +96 -0
- package/src/templates/typescript/VectraClient.ts +374 -0
- package/src/types.ts +131 -123
- package/src/utils/index.ts +1 -0
- package/src/utils/pathUtils.ts +106 -0
- package/src/vectra-cli.generate.spec.ts +72 -0
- package/src/vectra-cli.spec.ts +0 -0
- package/src/vectra-cli.ts +687 -246
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import assert from 'node:assert';
|
|
2
|
+
import { LocalIndex } from '../LocalIndex';
|
|
3
|
+
import { VirtualFileStorage } from '../storage';
|
|
4
|
+
import { ProtobufCodec } from './ProtobufCodec';
|
|
5
|
+
|
|
6
|
+
describe('LocalIndex with ProtobufCodec', () => {
|
|
7
|
+
const codec = new ProtobufCodec();
|
|
8
|
+
|
|
9
|
+
it('creates index with .pb extension', async () => {
|
|
10
|
+
const storage = new VirtualFileStorage();
|
|
11
|
+
const index = new LocalIndex('mem://idx', undefined, storage, codec);
|
|
12
|
+
assert.equal(index.indexName, 'index.pb');
|
|
13
|
+
await index.createIndex();
|
|
14
|
+
assert.equal(await index.isIndexCreated(), true);
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
it('full CRUD operations produce correct results', async () => {
|
|
18
|
+
const storage = new VirtualFileStorage();
|
|
19
|
+
const index = new LocalIndex('mem://idx', undefined, storage, codec);
|
|
20
|
+
await index.createIndex();
|
|
21
|
+
|
|
22
|
+
// Insert
|
|
23
|
+
await index.insertItem({ id: 'a', vector: [1, 0, 0], metadata: { cat: 'x' } });
|
|
24
|
+
await index.insertItem({ id: 'b', vector: [0, 1, 0], metadata: { cat: 'y' } });
|
|
25
|
+
|
|
26
|
+
// List
|
|
27
|
+
const items = await index.listItems();
|
|
28
|
+
assert.equal(items.length, 2);
|
|
29
|
+
assert.equal(items[0].id, 'a');
|
|
30
|
+
|
|
31
|
+
// Get
|
|
32
|
+
const item = await index.getItem('a');
|
|
33
|
+
assert.ok(item);
|
|
34
|
+
assert.equal(item.metadata.cat, 'x');
|
|
35
|
+
|
|
36
|
+
// Query
|
|
37
|
+
const results = await index.queryItems([1, 0, 0], '', 1);
|
|
38
|
+
assert.equal(results.length, 1);
|
|
39
|
+
assert.equal(results[0].item.id, 'a');
|
|
40
|
+
|
|
41
|
+
// Upsert
|
|
42
|
+
await index.upsertItem({ id: 'a', vector: [0, 0, 1], metadata: { cat: 'z' } });
|
|
43
|
+
const updated = await index.getItem('a');
|
|
44
|
+
assert.equal(updated?.metadata.cat, 'z');
|
|
45
|
+
|
|
46
|
+
// Delete
|
|
47
|
+
await index.deleteItem('b');
|
|
48
|
+
const remaining = await index.listItems();
|
|
49
|
+
assert.equal(remaining.length, 1);
|
|
50
|
+
assert.equal(remaining[0].id, 'a');
|
|
51
|
+
|
|
52
|
+
// Stats
|
|
53
|
+
const stats = await index.getIndexStats();
|
|
54
|
+
assert.equal(stats.items, 1);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it('batch insert works', async () => {
|
|
58
|
+
const storage = new VirtualFileStorage();
|
|
59
|
+
const index = new LocalIndex('mem://idx', undefined, storage, codec);
|
|
60
|
+
await index.createIndex();
|
|
61
|
+
|
|
62
|
+
await index.batchInsertItems([
|
|
63
|
+
{ id: '1', vector: [1, 0, 0] },
|
|
64
|
+
{ id: '2', vector: [0, 1, 0] },
|
|
65
|
+
{ id: '3', vector: [0, 0, 1] },
|
|
66
|
+
]);
|
|
67
|
+
|
|
68
|
+
const items = await index.listItems();
|
|
69
|
+
assert.equal(items.length, 3);
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it('metadata filtering works with protobuf storage', async () => {
|
|
73
|
+
const storage = new VirtualFileStorage();
|
|
74
|
+
const index = new LocalIndex('mem://idx', undefined, storage, codec);
|
|
75
|
+
await index.createIndex();
|
|
76
|
+
|
|
77
|
+
await index.batchInsertItems([
|
|
78
|
+
{ id: '1', vector: [1, 0], metadata: { category: 'food' } },
|
|
79
|
+
{ id: '2', vector: [0, 1], metadata: { category: 'drink' } },
|
|
80
|
+
]);
|
|
81
|
+
|
|
82
|
+
const food = await index.listItemsByMetadata({ category: { $eq: 'food' } });
|
|
83
|
+
assert.equal(food.length, 1);
|
|
84
|
+
assert.equal(food[0].id, '1');
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
it('external metadata files use .pb extension', async () => {
|
|
88
|
+
const storage = new VirtualFileStorage();
|
|
89
|
+
const index = new LocalIndex('mem://idx', undefined, storage, codec);
|
|
90
|
+
await index.createIndex({ version: 1, metadata_config: { indexed: ['keep'] } });
|
|
91
|
+
|
|
92
|
+
await index.insertItem({ id: 'm1', vector: [1], metadata: { keep: 'x', extra: 'y' } });
|
|
93
|
+
|
|
94
|
+
const items = await index.listItems();
|
|
95
|
+
const stored = items.find(i => i.id === 'm1')!;
|
|
96
|
+
assert.ok(stored.metadataFile);
|
|
97
|
+
assert.ok(stored.metadataFile.endsWith('.pb'), `Expected .pb extension, got: ${stored.metadataFile}`);
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
it('persists and reloads across instances', async () => {
|
|
101
|
+
const storage = new VirtualFileStorage();
|
|
102
|
+
|
|
103
|
+
// First instance: create and populate
|
|
104
|
+
const idx1 = new LocalIndex('mem://idx', undefined, storage, codec);
|
|
105
|
+
await idx1.createIndex();
|
|
106
|
+
await idx1.insertItem({ id: 'persist', vector: [1, 2, 3], metadata: { key: 'val' } });
|
|
107
|
+
|
|
108
|
+
// Second instance: should read persisted data
|
|
109
|
+
const idx2 = new LocalIndex('mem://idx', undefined, storage, codec);
|
|
110
|
+
const item = await idx2.getItem('persist');
|
|
111
|
+
assert.ok(item);
|
|
112
|
+
assert.equal(item.id, 'persist');
|
|
113
|
+
assert.equal(item.metadata.key, 'val');
|
|
114
|
+
});
|
|
115
|
+
});
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import assert from 'node:assert';
|
|
2
|
+
import { ProtobufCodec } from './ProtobufCodec';
|
|
3
|
+
import { JsonCodec } from './JsonCodec';
|
|
4
|
+
import { IndexData } from '../types';
|
|
5
|
+
import { DocumentCatalog } from './IndexCodec';
|
|
6
|
+
|
|
7
|
+
describe('ProtobufCodec', () => {
|
|
8
|
+
const codec = new ProtobufCodec();
|
|
9
|
+
|
|
10
|
+
it('has .pb extension', () => {
|
|
11
|
+
assert.equal(codec.extension, '.pb');
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
describe('serializeIndex / deserializeIndex', () => {
|
|
15
|
+
it('round-trips IndexData', () => {
|
|
16
|
+
const data: IndexData = {
|
|
17
|
+
version: 1,
|
|
18
|
+
metadata_config: { indexed: ['category'] },
|
|
19
|
+
items: [
|
|
20
|
+
{ id: 'a', metadata: { category: 'food', score: 42 }, vector: [1.5, 2.5, 3.5], norm: 4.5 },
|
|
21
|
+
{ id: 'b', metadata: {}, vector: [0, 0, 0], norm: 0, metadataFile: 'ext.pb' },
|
|
22
|
+
],
|
|
23
|
+
};
|
|
24
|
+
const buf = codec.serializeIndex(data);
|
|
25
|
+
assert.ok(Buffer.isBuffer(buf));
|
|
26
|
+
const result = codec.deserializeIndex(buf);
|
|
27
|
+
assert.equal(result.version, data.version);
|
|
28
|
+
assert.deepStrictEqual(result.metadata_config, data.metadata_config);
|
|
29
|
+
assert.equal(result.items.length, 2);
|
|
30
|
+
assert.equal(result.items[0].id, 'a');
|
|
31
|
+
assert.deepStrictEqual(result.items[0].metadata, { category: 'food', score: 42 });
|
|
32
|
+
// Float32 precision: vectors should be close but not exact
|
|
33
|
+
assert.ok(Math.abs(result.items[0].vector[0] - 1.5) < 0.001);
|
|
34
|
+
assert.ok(Math.abs(result.items[0].vector[1] - 2.5) < 0.001);
|
|
35
|
+
assert.ok(Math.abs(result.items[0].vector[2] - 3.5) < 0.001);
|
|
36
|
+
assert.equal(result.items[0].norm, 4.5); // norm is double, exact
|
|
37
|
+
assert.equal(result.items[1].metadataFile, 'ext.pb');
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
it('handles empty items', () => {
|
|
41
|
+
const data: IndexData = { version: 2, metadata_config: {}, items: [] };
|
|
42
|
+
const result = codec.deserializeIndex(codec.serializeIndex(data));
|
|
43
|
+
assert.equal(result.version, 2);
|
|
44
|
+
assert.deepStrictEqual(result.items, []);
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
it('handles boolean metadata values', () => {
|
|
48
|
+
const data: IndexData = {
|
|
49
|
+
version: 1,
|
|
50
|
+
metadata_config: {},
|
|
51
|
+
items: [
|
|
52
|
+
{ id: 'x', metadata: { active: true, deleted: false }, vector: [1], norm: 1 },
|
|
53
|
+
],
|
|
54
|
+
};
|
|
55
|
+
// Note: false/0/"" are proto3 default values — test the tricky case
|
|
56
|
+
const result = codec.deserializeIndex(codec.serializeIndex(data));
|
|
57
|
+
assert.equal(result.items[0].metadata.active, true);
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it('handles string metadata', () => {
|
|
61
|
+
const data: IndexData = {
|
|
62
|
+
version: 1,
|
|
63
|
+
metadata_config: {},
|
|
64
|
+
items: [
|
|
65
|
+
{ id: 'x', metadata: { name: 'hello', tag: 'world' }, vector: [1], norm: 1 },
|
|
66
|
+
],
|
|
67
|
+
};
|
|
68
|
+
const result = codec.deserializeIndex(codec.serializeIndex(data));
|
|
69
|
+
assert.equal(result.items[0].metadata.name, 'hello');
|
|
70
|
+
assert.equal(result.items[0].metadata.tag, 'world');
|
|
71
|
+
});
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
describe('serializeCatalog / deserializeCatalog', () => {
|
|
75
|
+
it('round-trips DocumentCatalog', () => {
|
|
76
|
+
const catalog: DocumentCatalog = {
|
|
77
|
+
version: 1,
|
|
78
|
+
count: 2,
|
|
79
|
+
uriToId: { 'http://a': 'id-a', 'http://b': 'id-b' },
|
|
80
|
+
idToUri: { 'id-a': 'http://a', 'id-b': 'http://b' },
|
|
81
|
+
};
|
|
82
|
+
const buf = codec.serializeCatalog(catalog);
|
|
83
|
+
assert.ok(Buffer.isBuffer(buf));
|
|
84
|
+
const result = codec.deserializeCatalog(buf);
|
|
85
|
+
assert.deepStrictEqual(result, catalog);
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
it('handles empty catalog', () => {
|
|
89
|
+
const catalog: DocumentCatalog = { version: 1, count: 0, uriToId: {}, idToUri: {} };
|
|
90
|
+
const result = codec.deserializeCatalog(codec.serializeCatalog(catalog));
|
|
91
|
+
assert.deepStrictEqual(result, catalog);
|
|
92
|
+
});
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
describe('serializeMetadata / deserializeMetadata', () => {
|
|
96
|
+
it('round-trips metadata (uses JSON for external files)', () => {
|
|
97
|
+
const metadata = { name: 'test', score: 42, active: true };
|
|
98
|
+
const buf = codec.serializeMetadata(metadata);
|
|
99
|
+
const result = codec.deserializeMetadata(buf);
|
|
100
|
+
assert.deepStrictEqual(result, metadata);
|
|
101
|
+
});
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
describe('cross-codec compatibility', () => {
|
|
105
|
+
const json = new JsonCodec();
|
|
106
|
+
const pb = new ProtobufCodec();
|
|
107
|
+
|
|
108
|
+
it('data serialized with JSON can be re-serialized with Protobuf and back (logical equivalence)', () => {
|
|
109
|
+
const data: IndexData = {
|
|
110
|
+
version: 1,
|
|
111
|
+
metadata_config: { indexed: ['cat'] },
|
|
112
|
+
items: [
|
|
113
|
+
{ id: 'item1', metadata: { cat: 'a', num: 7 }, vector: [0.1, 0.2, 0.3], norm: 0.374 },
|
|
114
|
+
],
|
|
115
|
+
};
|
|
116
|
+
// JSON -> Protobuf
|
|
117
|
+
const jsonBuf = json.serializeIndex(data);
|
|
118
|
+
const fromJson = json.deserializeIndex(jsonBuf);
|
|
119
|
+
const pbBuf = pb.serializeIndex(fromJson);
|
|
120
|
+
const fromPb = pb.deserializeIndex(pbBuf);
|
|
121
|
+
|
|
122
|
+
assert.equal(fromPb.version, data.version);
|
|
123
|
+
assert.deepStrictEqual(fromPb.metadata_config, data.metadata_config);
|
|
124
|
+
assert.equal(fromPb.items[0].id, data.items[0].id);
|
|
125
|
+
assert.deepStrictEqual(fromPb.items[0].metadata, data.items[0].metadata);
|
|
126
|
+
|
|
127
|
+
// Protobuf -> JSON
|
|
128
|
+
const jsonBuf2 = json.serializeIndex(fromPb);
|
|
129
|
+
const final = json.deserializeIndex(jsonBuf2);
|
|
130
|
+
assert.equal(final.items[0].id, 'item1');
|
|
131
|
+
assert.deepStrictEqual(final.items[0].metadata, { cat: 'a', num: 7 });
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
it('catalog round-trips across codecs', () => {
|
|
135
|
+
const catalog: DocumentCatalog = {
|
|
136
|
+
version: 1,
|
|
137
|
+
count: 1,
|
|
138
|
+
uriToId: { 'file://doc.txt': 'uuid-1' },
|
|
139
|
+
idToUri: { 'uuid-1': 'file://doc.txt' },
|
|
140
|
+
};
|
|
141
|
+
const pbBuf = pb.serializeCatalog(catalog);
|
|
142
|
+
const fromPb = pb.deserializeCatalog(pbBuf);
|
|
143
|
+
const jsonBuf = json.serializeCatalog(fromPb);
|
|
144
|
+
const final = json.deserializeCatalog(jsonBuf);
|
|
145
|
+
assert.deepStrictEqual(final, catalog);
|
|
146
|
+
});
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
describe('protobuf produces smaller output than JSON for vector data', () => {
|
|
150
|
+
it('binary is smaller than JSON for a 1536-dim vector', () => {
|
|
151
|
+
const vector = Array.from({ length: 1536 }, (_, i) => Math.random() * 2 - 1);
|
|
152
|
+
const norm = Math.sqrt(vector.reduce((s, v) => s + v * v, 0));
|
|
153
|
+
const data: IndexData = {
|
|
154
|
+
version: 1,
|
|
155
|
+
metadata_config: {},
|
|
156
|
+
items: [{ id: 'vec', metadata: {}, vector, norm }],
|
|
157
|
+
};
|
|
158
|
+
const jsonSize = new JsonCodec().serializeIndex(data).length;
|
|
159
|
+
const pbSize = codec.serializeIndex(data).length;
|
|
160
|
+
// Protobuf should be significantly smaller
|
|
161
|
+
assert.ok(pbSize < jsonSize, `Expected pb (${pbSize}) < json (${jsonSize})`);
|
|
162
|
+
// Spec says ~40-50% savings
|
|
163
|
+
assert.ok(pbSize < jsonSize * 0.7, `Expected at least 30% savings, got ${((1 - pbSize / jsonSize) * 100).toFixed(1)}%`);
|
|
164
|
+
});
|
|
165
|
+
});
|
|
166
|
+
});
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
import * as path from 'path';
|
|
2
|
+
import { IndexData, IndexItem, MetadataTypes } from '../types';
|
|
3
|
+
import { DocumentCatalog, IndexCodec } from './IndexCodec';
|
|
4
|
+
|
|
5
|
+
let protobuf: typeof import('protobufjs') | undefined;
|
|
6
|
+
|
|
7
|
+
function loadProtobuf(): typeof import('protobufjs') {
|
|
8
|
+
if (!protobuf) {
|
|
9
|
+
try {
|
|
10
|
+
protobuf = require('protobufjs');
|
|
11
|
+
} catch {
|
|
12
|
+
throw new Error(
|
|
13
|
+
"ProtobufCodec requires the 'protobufjs' package. Install it with: npm install protobufjs"
|
|
14
|
+
);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
return protobuf!;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// Cached proto root — loaded once per process
|
|
21
|
+
let cachedRoot: any;
|
|
22
|
+
|
|
23
|
+
function getRoot(): any {
|
|
24
|
+
if (!cachedRoot) {
|
|
25
|
+
const pb = loadProtobuf();
|
|
26
|
+
cachedRoot = pb.loadSync(path.join(__dirname, 'schemas', 'index.proto'));
|
|
27
|
+
}
|
|
28
|
+
return cachedRoot;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/** Convert MetadataTypes value to protobuf MetadataValue shape */
|
|
32
|
+
function toProtoMetadataValue(value: MetadataTypes): Record<string, any> {
|
|
33
|
+
if (typeof value === 'string') return { stringValue: value };
|
|
34
|
+
if (typeof value === 'number') return { numberValue: value };
|
|
35
|
+
if (typeof value === 'boolean') return { boolValue: value };
|
|
36
|
+
return { stringValue: String(value) };
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/** Convert protobuf MetadataValue back to MetadataTypes */
|
|
40
|
+
function fromProtoMetadataValue(mv: any): MetadataTypes {
|
|
41
|
+
if (mv.stringValue !== undefined && mv.stringValue !== '') return mv.stringValue;
|
|
42
|
+
if (mv.numberValue !== undefined && mv.numberValue !== 0) return mv.numberValue;
|
|
43
|
+
if (mv.boolValue !== undefined && mv.boolValue !== false) return mv.boolValue;
|
|
44
|
+
// Disambiguate zero-value fields by checking which oneof field is set
|
|
45
|
+
if (mv.value === 'boolValue') return false;
|
|
46
|
+
if (mv.value === 'numberValue') return 0;
|
|
47
|
+
if (mv.value === 'stringValue') return '';
|
|
48
|
+
// Fallback — proto3 defaults make this tricky; prefer string empty
|
|
49
|
+
return '';
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/** Convert JS IndexItem to proto-friendly shape */
|
|
53
|
+
function toProtoItem(item: IndexItem): Record<string, any> {
|
|
54
|
+
const metadata: Record<string, any> = {};
|
|
55
|
+
if (item.metadata) {
|
|
56
|
+
for (const [k, v] of Object.entries(item.metadata)) {
|
|
57
|
+
metadata[k] = toProtoMetadataValue(v as MetadataTypes);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
const out: Record<string, any> = {
|
|
61
|
+
id: item.id,
|
|
62
|
+
metadata,
|
|
63
|
+
vector: item.vector,
|
|
64
|
+
norm: item.norm,
|
|
65
|
+
};
|
|
66
|
+
if (item.metadataFile) {
|
|
67
|
+
out.metadataFile = item.metadataFile;
|
|
68
|
+
}
|
|
69
|
+
return out;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/** Convert proto IndexItem back to JS IndexItem */
|
|
73
|
+
function fromProtoItem(pi: any): IndexItem {
|
|
74
|
+
const metadata: Record<string, MetadataTypes> = {};
|
|
75
|
+
if (pi.metadata) {
|
|
76
|
+
for (const [k, v] of Object.entries(pi.metadata as Record<string, any>)) {
|
|
77
|
+
metadata[k] = fromProtoMetadataValue(v);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
// proto packed float → JS number[]; ensure plain array
|
|
81
|
+
const vector: number[] = pi.vector ? Array.from(pi.vector as number[]) : [];
|
|
82
|
+
const item: IndexItem = {
|
|
83
|
+
id: pi.id,
|
|
84
|
+
metadata,
|
|
85
|
+
vector,
|
|
86
|
+
norm: pi.norm,
|
|
87
|
+
};
|
|
88
|
+
if (pi.metadataFile) {
|
|
89
|
+
item.metadataFile = pi.metadataFile;
|
|
90
|
+
}
|
|
91
|
+
return item;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Protocol Buffers codec — opt-in binary format.
|
|
96
|
+
* @remarks
|
|
97
|
+
* Vectors are stored as packed float32 arrays (~50% smaller than JSON).
|
|
98
|
+
* Norms are stored as float64 to avoid compounding rounding error.
|
|
99
|
+
* Requires the `protobufjs` package to be installed.
|
|
100
|
+
*/
|
|
101
|
+
export class ProtobufCodec implements IndexCodec {
|
|
102
|
+
readonly extension = '.pb';
|
|
103
|
+
|
|
104
|
+
constructor() {
|
|
105
|
+
// Eagerly validate that protobufjs is available
|
|
106
|
+
loadProtobuf();
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
serializeIndex(data: IndexData): Buffer {
|
|
110
|
+
const root = getRoot();
|
|
111
|
+
const IndexDataMsg = root.lookupType('IndexData');
|
|
112
|
+
const payload = {
|
|
113
|
+
version: data.version,
|
|
114
|
+
metadataConfig: {
|
|
115
|
+
indexed: data.metadata_config?.indexed ?? [],
|
|
116
|
+
},
|
|
117
|
+
items: data.items.map(toProtoItem),
|
|
118
|
+
};
|
|
119
|
+
const err = IndexDataMsg.verify(payload);
|
|
120
|
+
if (err) throw new Error(`Protobuf verify error: ${err}`);
|
|
121
|
+
const message = IndexDataMsg.create(payload);
|
|
122
|
+
return Buffer.from(IndexDataMsg.encode(message).finish());
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
deserializeIndex(buffer: Buffer): IndexData {
|
|
126
|
+
const root = getRoot();
|
|
127
|
+
const IndexDataMsg = root.lookupType('IndexData');
|
|
128
|
+
const decoded = IndexDataMsg.decode(new Uint8Array(buffer));
|
|
129
|
+
const obj = IndexDataMsg.toObject(decoded, {
|
|
130
|
+
longs: Number,
|
|
131
|
+
enums: String,
|
|
132
|
+
defaults: true,
|
|
133
|
+
oneofs: true,
|
|
134
|
+
});
|
|
135
|
+
return {
|
|
136
|
+
version: obj.version,
|
|
137
|
+
metadata_config: {
|
|
138
|
+
indexed: obj.metadataConfig?.indexed ?? [],
|
|
139
|
+
},
|
|
140
|
+
items: (obj.items || []).map(fromProtoItem),
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
serializeCatalog(catalog: DocumentCatalog): Buffer {
|
|
145
|
+
const root = getRoot();
|
|
146
|
+
const CatalogMsg = root.lookupType('DocumentCatalog');
|
|
147
|
+
const payload = {
|
|
148
|
+
version: catalog.version,
|
|
149
|
+
count: catalog.count,
|
|
150
|
+
uriToId: catalog.uriToId,
|
|
151
|
+
idToUri: catalog.idToUri,
|
|
152
|
+
};
|
|
153
|
+
const err = CatalogMsg.verify(payload);
|
|
154
|
+
if (err) throw new Error(`Protobuf verify error: ${err}`);
|
|
155
|
+
const message = CatalogMsg.create(payload);
|
|
156
|
+
return Buffer.from(CatalogMsg.encode(message).finish());
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
deserializeCatalog(buffer: Buffer): DocumentCatalog {
|
|
160
|
+
const root = getRoot();
|
|
161
|
+
const CatalogMsg = root.lookupType('DocumentCatalog');
|
|
162
|
+
const decoded = CatalogMsg.decode(new Uint8Array(buffer));
|
|
163
|
+
const obj = CatalogMsg.toObject(decoded, {
|
|
164
|
+
longs: Number,
|
|
165
|
+
enums: String,
|
|
166
|
+
defaults: true,
|
|
167
|
+
oneofs: true,
|
|
168
|
+
});
|
|
169
|
+
return {
|
|
170
|
+
version: obj.version,
|
|
171
|
+
count: obj.count,
|
|
172
|
+
uriToId: obj.uriToId || {},
|
|
173
|
+
idToUri: obj.idToUri || {},
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
serializeMetadata(metadata: Record<string, MetadataTypes>): Buffer {
|
|
178
|
+
// Metadata is a flat key-value map. We encode it as a JSON buffer
|
|
179
|
+
// wrapped in a simple length-prefixed envelope to stay self-describing
|
|
180
|
+
// while keeping the main index binary.
|
|
181
|
+
//
|
|
182
|
+
// We reuse the IndexItem metadata map encoding for consistency:
|
|
183
|
+
// encode as { entries: map<string, MetadataValue> }.
|
|
184
|
+
// However, since there's no standalone proto message for this,
|
|
185
|
+
// we just use JSON for external metadata files — the size savings
|
|
186
|
+
// from protobuf on small metadata objects are negligible.
|
|
187
|
+
return Buffer.from(JSON.stringify(metadata), 'utf-8');
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
deserializeMetadata(buffer: Buffer): Record<string, MetadataTypes> {
|
|
191
|
+
return JSON.parse(buffer.toString('utf-8'));
|
|
192
|
+
}
|
|
193
|
+
}
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import assert from 'node:assert';
|
|
2
|
+
import { VirtualFileStorage } from '../storage';
|
|
3
|
+
import { JsonCodec } from './JsonCodec';
|
|
4
|
+
import { ProtobufCodec } from './ProtobufCodec';
|
|
5
|
+
import { detectCodec, migrateIndex } from './migrateIndex';
|
|
6
|
+
import { IndexData } from '../types';
|
|
7
|
+
import { DocumentCatalog } from './IndexCodec';
|
|
8
|
+
|
|
9
|
+
describe('detectCodec', () => {
|
|
10
|
+
it('detects JSON format', async () => {
|
|
11
|
+
const storage = new VirtualFileStorage();
|
|
12
|
+
await storage.createFolder('/idx');
|
|
13
|
+
await storage.upsertFile('/idx/index.json', '{}');
|
|
14
|
+
const codec = await detectCodec('/idx', storage);
|
|
15
|
+
assert.equal(codec.extension, '.json');
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
it('detects Protobuf format', async () => {
|
|
19
|
+
const storage = new VirtualFileStorage();
|
|
20
|
+
await storage.createFolder('/idx');
|
|
21
|
+
await storage.upsertFile('/idx/index.pb', Buffer.from([0]));
|
|
22
|
+
const codec = await detectCodec('/idx', storage);
|
|
23
|
+
assert.equal(codec.extension, '.pb');
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
it('throws when both formats exist', async () => {
|
|
27
|
+
const storage = new VirtualFileStorage();
|
|
28
|
+
await storage.createFolder('/idx');
|
|
29
|
+
await storage.upsertFile('/idx/index.json', '{}');
|
|
30
|
+
await storage.upsertFile('/idx/index.pb', Buffer.from([0]));
|
|
31
|
+
await assert.rejects(
|
|
32
|
+
() => detectCodec('/idx', storage),
|
|
33
|
+
/Both index\.json and index\.pb found/
|
|
34
|
+
);
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it('throws when no index file exists', async () => {
|
|
38
|
+
const storage = new VirtualFileStorage();
|
|
39
|
+
await storage.createFolder('/idx');
|
|
40
|
+
await assert.rejects(
|
|
41
|
+
() => detectCodec('/idx', storage),
|
|
42
|
+
/No index file found/
|
|
43
|
+
);
|
|
44
|
+
});
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
describe('migrateIndex', () => {
|
|
48
|
+
const json = new JsonCodec();
|
|
49
|
+
const pb = new ProtobufCodec();
|
|
50
|
+
|
|
51
|
+
function makeIndexData(): IndexData {
|
|
52
|
+
return {
|
|
53
|
+
version: 1,
|
|
54
|
+
metadata_config: { indexed: ['cat'] },
|
|
55
|
+
items: [
|
|
56
|
+
{ id: 'item1', metadata: { cat: 'food' }, vector: [0.1, 0.2, 0.3], norm: 0.374 },
|
|
57
|
+
{ id: 'item2', metadata: { cat: 'drink' }, vector: [0.4, 0.5, 0.6], norm: 0.877, metadataFile: 'abc.json' },
|
|
58
|
+
],
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function makeCatalog(): DocumentCatalog {
|
|
63
|
+
return {
|
|
64
|
+
version: 1,
|
|
65
|
+
count: 1,
|
|
66
|
+
uriToId: { 'doc.txt': 'doc-id-1' },
|
|
67
|
+
idToUri: { 'doc-id-1': 'doc.txt' },
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
it('migrates JSON -> Protobuf', async () => {
|
|
72
|
+
const storage = new VirtualFileStorage();
|
|
73
|
+
await storage.createFolder('/idx');
|
|
74
|
+
const data = makeIndexData();
|
|
75
|
+
await storage.upsertFile('/idx/index.json', json.serializeIndex(data));
|
|
76
|
+
await storage.upsertFile('/idx/abc.json', json.serializeMetadata({ cat: 'drink', extra: 'data' }));
|
|
77
|
+
|
|
78
|
+
await migrateIndex('/idx', { to: 'protobuf', storage });
|
|
79
|
+
|
|
80
|
+
// Old files should be gone
|
|
81
|
+
assert.equal(await storage.pathExists('/idx/index.json'), false);
|
|
82
|
+
assert.equal(await storage.pathExists('/idx/abc.json'), false);
|
|
83
|
+
|
|
84
|
+
// New files should exist
|
|
85
|
+
assert.equal(await storage.pathExists('/idx/index.pb'), true);
|
|
86
|
+
assert.equal(await storage.pathExists('/idx/abc.pb'), true);
|
|
87
|
+
|
|
88
|
+
// Data should be intact
|
|
89
|
+
const result = pb.deserializeIndex(await storage.readFile('/idx/index.pb'));
|
|
90
|
+
assert.equal(result.items.length, 2);
|
|
91
|
+
assert.equal(result.items[0].id, 'item1');
|
|
92
|
+
assert.equal(result.items[1].metadataFile, 'abc.pb');
|
|
93
|
+
|
|
94
|
+
// External metadata should be readable
|
|
95
|
+
const meta = pb.deserializeMetadata(await storage.readFile('/idx/abc.pb'));
|
|
96
|
+
assert.equal(meta.cat, 'drink');
|
|
97
|
+
assert.equal(meta.extra, 'data');
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
it('migrates Protobuf -> JSON', async () => {
|
|
101
|
+
const storage = new VirtualFileStorage();
|
|
102
|
+
await storage.createFolder('/idx');
|
|
103
|
+
const data = makeIndexData();
|
|
104
|
+
// Adjust metadataFile extension
|
|
105
|
+
data.items[1].metadataFile = 'abc.pb';
|
|
106
|
+
await storage.upsertFile('/idx/index.pb', pb.serializeIndex(data));
|
|
107
|
+
await storage.upsertFile('/idx/abc.pb', pb.serializeMetadata({ cat: 'drink', extra: 'data' }));
|
|
108
|
+
|
|
109
|
+
await migrateIndex('/idx', { to: 'json', storage });
|
|
110
|
+
|
|
111
|
+
assert.equal(await storage.pathExists('/idx/index.pb'), false);
|
|
112
|
+
assert.equal(await storage.pathExists('/idx/index.json'), true);
|
|
113
|
+
|
|
114
|
+
const result = json.deserializeIndex(await storage.readFile('/idx/index.json'));
|
|
115
|
+
assert.equal(result.items.length, 2);
|
|
116
|
+
assert.equal(result.items[1].metadataFile, 'abc.json');
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
it('migrates catalog along with index', async () => {
|
|
120
|
+
const storage = new VirtualFileStorage();
|
|
121
|
+
await storage.createFolder('/idx');
|
|
122
|
+
const data: IndexData = { version: 1, metadata_config: {}, items: [] };
|
|
123
|
+
const catalog = makeCatalog();
|
|
124
|
+
await storage.upsertFile('/idx/index.json', json.serializeIndex(data));
|
|
125
|
+
await storage.upsertFile('/idx/catalog.json', json.serializeCatalog(catalog));
|
|
126
|
+
|
|
127
|
+
await migrateIndex('/idx', { to: 'protobuf', storage });
|
|
128
|
+
|
|
129
|
+
assert.equal(await storage.pathExists('/idx/catalog.json'), false);
|
|
130
|
+
assert.equal(await storage.pathExists('/idx/catalog.pb'), true);
|
|
131
|
+
|
|
132
|
+
const result = pb.deserializeCatalog(await storage.readFile('/idx/catalog.pb'));
|
|
133
|
+
assert.deepStrictEqual(result, catalog);
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
it('migrates document metadata files referenced by catalog', async () => {
|
|
137
|
+
const storage = new VirtualFileStorage();
|
|
138
|
+
await storage.createFolder('/idx');
|
|
139
|
+
const data: IndexData = { version: 1, metadata_config: {}, items: [] };
|
|
140
|
+
const catalog = makeCatalog();
|
|
141
|
+
await storage.upsertFile('/idx/index.json', json.serializeIndex(data));
|
|
142
|
+
await storage.upsertFile('/idx/catalog.json', json.serializeCatalog(catalog));
|
|
143
|
+
await storage.upsertFile('/idx/doc-id-1.json', json.serializeMetadata({ author: 'test' }));
|
|
144
|
+
|
|
145
|
+
await migrateIndex('/idx', { to: 'protobuf', storage });
|
|
146
|
+
|
|
147
|
+
assert.equal(await storage.pathExists('/idx/doc-id-1.json'), false);
|
|
148
|
+
assert.equal(await storage.pathExists('/idx/doc-id-1.pb'), true);
|
|
149
|
+
const meta = pb.deserializeMetadata(await storage.readFile('/idx/doc-id-1.pb'));
|
|
150
|
+
assert.equal(meta.author, 'test');
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
it('no-op when already in target format', async () => {
|
|
154
|
+
const storage = new VirtualFileStorage();
|
|
155
|
+
await storage.createFolder('/idx');
|
|
156
|
+
const data: IndexData = { version: 1, metadata_config: {}, items: [] };
|
|
157
|
+
await storage.upsertFile('/idx/index.json', json.serializeIndex(data));
|
|
158
|
+
|
|
159
|
+
await migrateIndex('/idx', { to: 'json', storage });
|
|
160
|
+
|
|
161
|
+
// Should still be there, unchanged
|
|
162
|
+
assert.equal(await storage.pathExists('/idx/index.json'), true);
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
it('interruption detection: dual format raises error on detectCodec', async () => {
|
|
166
|
+
const storage = new VirtualFileStorage();
|
|
167
|
+
await storage.createFolder('/idx');
|
|
168
|
+
await storage.upsertFile('/idx/index.json', '{}');
|
|
169
|
+
await storage.upsertFile('/idx/index.pb', Buffer.from([0]));
|
|
170
|
+
|
|
171
|
+
await assert.rejects(
|
|
172
|
+
() => detectCodec('/idx', storage),
|
|
173
|
+
/Both index\.json and index\.pb found/
|
|
174
|
+
);
|
|
175
|
+
});
|
|
176
|
+
});
|