vectra 0.12.2 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +92 -100
- package/bin/vectra.js +3 -0
- package/lib/BrowserWebFetcher.d.ts +75 -0
- package/lib/BrowserWebFetcher.d.ts.map +1 -0
- package/lib/BrowserWebFetcher.js +290 -0
- package/lib/BrowserWebFetcher.js.map +1 -0
- package/lib/FileFetcher.d.ts +5 -0
- package/lib/FileFetcher.d.ts.map +1 -0
- package/lib/FileFetcher.js +89 -0
- package/lib/FileFetcher.js.map +1 -0
- package/lib/FileFetcher.spec.d.ts +2 -0
- package/lib/FileFetcher.spec.d.ts.map +1 -0
- package/lib/FileFetcher.spec.js +244 -0
- package/lib/FileFetcher.spec.js.map +1 -0
- package/lib/FolderWatcher.d.ts +91 -0
- package/lib/FolderWatcher.d.ts.map +1 -0
- package/lib/FolderWatcher.js +304 -0
- package/lib/FolderWatcher.js.map +1 -0
- package/lib/FolderWatcher.spec.d.ts +2 -0
- package/lib/FolderWatcher.spec.d.ts.map +1 -0
- package/lib/FolderWatcher.spec.js +308 -0
- package/lib/FolderWatcher.spec.js.map +1 -0
- package/lib/GPT3Tokenizer.d.ts +9 -0
- package/lib/GPT3Tokenizer.spec.d.ts +2 -0
- package/lib/GPT3Tokenizer.spec.d.ts.map +1 -0
- package/lib/GPT3Tokenizer.spec.js +45 -0
- package/lib/GPT3Tokenizer.spec.js.map +1 -0
- package/lib/ItemSelector.d.ts +41 -0
- package/lib/ItemSelector.d.ts.map +1 -0
- package/lib/ItemSelector.js +179 -0
- package/lib/ItemSelector.js.map +1 -0
- package/lib/ItemSelector.spec.d.ts +2 -0
- package/lib/ItemSelector.spec.d.ts.map +1 -0
- package/lib/ItemSelector.spec.js +204 -0
- package/lib/ItemSelector.spec.js.map +1 -0
- package/lib/LocalDocument.d.ts +54 -0
- package/lib/LocalDocument.d.ts.map +1 -1
- package/lib/LocalDocument.js +116 -0
- package/lib/LocalDocument.js.map +1 -0
- package/lib/LocalDocument.spec.d.ts +2 -0
- package/lib/LocalDocument.spec.d.ts.map +1 -0
- package/lib/LocalDocument.spec.js +214 -0
- package/lib/LocalDocument.spec.js.map +1 -0
- package/lib/LocalDocumentIndex.d.ts +152 -0
- package/lib/LocalDocumentIndex.d.ts.map +1 -1
- package/lib/LocalDocumentIndex.js +420 -0
- package/lib/LocalDocumentIndex.js.map +1 -0
- package/lib/LocalDocumentIndex.spec.d.ts +2 -0
- package/lib/LocalDocumentIndex.spec.d.ts.map +1 -0
- package/lib/LocalDocumentIndex.spec.js +494 -0
- package/lib/LocalDocumentIndex.spec.js.map +1 -0
- package/lib/LocalDocumentResult.d.ts +66 -0
- package/lib/LocalDocumentResult.d.ts.map +1 -1
- package/lib/LocalDocumentResult.js +376 -0
- package/lib/LocalDocumentResult.js.map +1 -0
- package/lib/LocalDocumentResult.spec.d.ts +2 -0
- package/lib/LocalDocumentResult.spec.d.ts.map +1 -0
- package/lib/LocalDocumentResult.spec.js +373 -0
- package/lib/LocalDocumentResult.spec.js.map +1 -0
- package/lib/LocalEmbeddings.d.ts +59 -0
- package/lib/LocalEmbeddings.d.ts.map +1 -0
- package/lib/LocalEmbeddings.js +101 -0
- package/lib/LocalEmbeddings.js.map +1 -0
- package/lib/LocalEmbeddings.spec.d.ts +2 -0
- package/lib/LocalEmbeddings.spec.d.ts.map +1 -0
- package/lib/LocalEmbeddings.spec.js +155 -0
- package/lib/LocalEmbeddings.spec.js.map +1 -0
- package/lib/LocalIndex.d.ts +159 -0
- package/lib/LocalIndex.d.ts.map +1 -1
- package/lib/LocalIndex.js +519 -0
- package/lib/LocalIndex.js.map +1 -0
- package/lib/LocalIndex.spec.d.ts +2 -0
- package/lib/LocalIndex.spec.js +611 -9
- package/lib/LocalIndex.spec.js.map +1 -1
- package/lib/OpenAIEmbeddings.d.ts +124 -0
- package/lib/OpenAIEmbeddings.d.ts.map +1 -0
- package/lib/OpenAIEmbeddings.js +166 -0
- package/lib/OpenAIEmbeddings.js.map +1 -0
- package/lib/OpenAIEmbeddings.spec.d.ts +2 -0
- package/lib/OpenAIEmbeddings.spec.d.ts.map +1 -0
- package/lib/OpenAIEmbeddings.spec.js +298 -0
- package/lib/OpenAIEmbeddings.spec.js.map +1 -0
- package/lib/TextSplitter.d.ts +21 -0
- package/lib/TextSplitter.d.ts.map +1 -1
- package/lib/TextSplitter.js +500 -0
- package/lib/TextSplitter.js.map +1 -0
- package/lib/TextSplitter.spec.d.ts +2 -0
- package/lib/TextSplitter.spec.d.ts.map +1 -0
- package/lib/TextSplitter.spec.js +337 -0
- package/lib/TextSplitter.spec.js.map +1 -0
- package/lib/TransformersEmbeddings.d.ts +121 -0
- package/lib/TransformersEmbeddings.d.ts.map +1 -0
- package/lib/TransformersEmbeddings.js +176 -0
- package/lib/TransformersEmbeddings.js.map +1 -0
- package/lib/TransformersEmbeddings.spec.d.ts +2 -0
- package/lib/TransformersEmbeddings.spec.d.ts.map +1 -0
- package/lib/TransformersEmbeddings.spec.js +198 -0
- package/lib/TransformersEmbeddings.spec.js.map +1 -0
- package/lib/TransformersTokenizer.d.ts +33 -0
- package/lib/TransformersTokenizer.d.ts.map +1 -0
- package/lib/TransformersTokenizer.js +44 -0
- package/lib/TransformersTokenizer.js.map +1 -0
- package/lib/TransformersTokenizer.spec.d.ts +2 -0
- package/lib/TransformersTokenizer.spec.d.ts.map +1 -0
- package/lib/TransformersTokenizer.spec.js +112 -0
- package/lib/TransformersTokenizer.spec.js.map +1 -0
- package/lib/WebFetcher.d.ts +14 -0
- package/lib/WebFetcher.d.ts.map +1 -0
- package/lib/WebFetcher.js +238 -0
- package/lib/WebFetcher.js.map +1 -0
- package/lib/WebFetcher.spec.d.ts +2 -0
- package/lib/WebFetcher.spec.d.ts.map +1 -0
- package/lib/WebFetcher.spec.js +263 -0
- package/lib/WebFetcher.spec.js.map +1 -0
- package/lib/browser.d.ts +30 -0
- package/lib/browser.d.ts.map +1 -0
- package/lib/browser.js +52 -0
- package/lib/browser.js.map +1 -0
- package/lib/codecs/IndexCodec.d.ts +37 -0
- package/lib/codecs/IndexCodec.d.ts.map +1 -0
- package/lib/codecs/IndexCodec.js +3 -0
- package/lib/codecs/IndexCodec.js.map +1 -0
- package/lib/codecs/JsonCodec.d.ts +19 -0
- package/lib/codecs/JsonCodec.d.ts.map +1 -0
- package/lib/codecs/JsonCodec.js +35 -0
- package/lib/codecs/JsonCodec.js.map +1 -0
- package/lib/codecs/JsonCodec.spec.d.ts +2 -0
- package/lib/codecs/JsonCodec.spec.d.ts.map +1 -0
- package/lib/codecs/JsonCodec.spec.js +66 -0
- package/lib/codecs/JsonCodec.spec.js.map +1 -0
- package/lib/codecs/LocalIndex.protobuf.spec.d.ts +2 -0
- package/lib/codecs/LocalIndex.protobuf.spec.d.ts.map +1 -0
- package/lib/codecs/LocalIndex.protobuf.spec.js +108 -0
- package/lib/codecs/LocalIndex.protobuf.spec.js.map +1 -0
- package/lib/codecs/ProtobufCodec.d.ts +20 -0
- package/lib/codecs/ProtobufCodec.d.ts.map +1 -0
- package/lib/codecs/ProtobufCodec.js +225 -0
- package/lib/codecs/ProtobufCodec.js.map +1 -0
- package/lib/codecs/ProtobufCodec.spec.d.ts +2 -0
- package/lib/codecs/ProtobufCodec.spec.d.ts.map +1 -0
- package/lib/codecs/ProtobufCodec.spec.js +155 -0
- package/lib/codecs/ProtobufCodec.spec.js.map +1 -0
- package/lib/codecs/index.d.ts +5 -0
- package/lib/codecs/index.d.ts.map +1 -0
- package/lib/codecs/index.js +21 -0
- package/lib/codecs/index.js.map +1 -0
- package/lib/codecs/migrateIndex.d.ts +24 -0
- package/lib/codecs/migrateIndex.d.ts.map +1 -0
- package/lib/codecs/migrateIndex.js +119 -0
- package/lib/codecs/migrateIndex.js.map +1 -0
- package/lib/codecs/migrateIndex.spec.d.ts +2 -0
- package/lib/codecs/migrateIndex.spec.d.ts.map +1 -0
- package/lib/codecs/migrateIndex.spec.js +151 -0
- package/lib/codecs/migrateIndex.spec.js.map +1 -0
- package/lib/codecs/schemas/index.proto +34 -0
- package/lib/index.d.ts +20 -0
- package/lib/index.d.ts.map +1 -1
- package/lib/index.js +36 -0
- package/lib/index.js.map +1 -0
- package/lib/internals/Colorize.d.ts +14 -0
- package/lib/internals/Colorize.d.ts.map +1 -0
- package/lib/internals/Colorize.js +69 -0
- package/lib/internals/Colorize.js.map +1 -0
- package/lib/internals/index.d.ts +3 -0
- package/lib/internals/index.d.ts.map +1 -0
- package/lib/internals/index.js +19 -0
- package/lib/internals/index.js.map +1 -0
- package/lib/internals/types.d.ts +43 -0
- package/lib/internals/types.d.ts.map +1 -0
- package/lib/internals/types.js +3 -0
- package/lib/internals/types.js.map +1 -0
- package/lib/server/IndexManager.d.ts +78 -0
- package/lib/server/IndexManager.d.ts.map +1 -0
- package/lib/server/IndexManager.js +259 -0
- package/lib/server/IndexManager.js.map +1 -0
- package/lib/server/VectraServer.d.ts +40 -0
- package/lib/server/VectraServer.d.ts.map +1 -0
- package/lib/server/VectraServer.js +151 -0
- package/lib/server/VectraServer.js.map +1 -0
- package/lib/server/VectraServer.spec.d.ts +2 -0
- package/lib/server/VectraServer.spec.d.ts.map +1 -0
- package/lib/server/VectraServer.spec.js +322 -0
- package/lib/server/VectraServer.spec.js.map +1 -0
- package/lib/server/handlers/documentHandlers.d.ts +15 -0
- package/lib/server/handlers/documentHandlers.d.ts.map +1 -0
- package/lib/server/handlers/documentHandlers.js +95 -0
- package/lib/server/handlers/documentHandlers.js.map +1 -0
- package/lib/server/handlers/helpers.d.ts +23 -0
- package/lib/server/handlers/helpers.d.ts.map +1 -0
- package/lib/server/handlers/helpers.js +138 -0
- package/lib/server/handlers/helpers.js.map +1 -0
- package/lib/server/handlers/index.d.ts +8 -0
- package/lib/server/handlers/index.d.ts.map +1 -0
- package/lib/server/handlers/index.js +22 -0
- package/lib/server/handlers/index.js.map +1 -0
- package/lib/server/handlers/indexHandlers.d.ts +14 -0
- package/lib/server/handlers/indexHandlers.d.ts.map +1 -0
- package/lib/server/handlers/indexHandlers.js +85 -0
- package/lib/server/handlers/indexHandlers.js.map +1 -0
- package/lib/server/handlers/itemHandlers.d.ts +34 -0
- package/lib/server/handlers/itemHandlers.d.ts.map +1 -0
- package/lib/server/handlers/itemHandlers.js +166 -0
- package/lib/server/handlers/itemHandlers.js.map +1 -0
- package/lib/server/handlers/lifecycleHandlers.d.ts +11 -0
- package/lib/server/handlers/lifecycleHandlers.d.ts.map +1 -0
- package/lib/server/handlers/lifecycleHandlers.js +31 -0
- package/lib/server/handlers/lifecycleHandlers.js.map +1 -0
- package/lib/server/handlers/queryHandlers.d.ts +27 -0
- package/lib/server/handlers/queryHandlers.d.ts.map +1 -0
- package/lib/server/handlers/queryHandlers.js +135 -0
- package/lib/server/handlers/queryHandlers.js.map +1 -0
- package/lib/server/handlers/statsHandlers.d.ts +17 -0
- package/lib/server/handlers/statsHandlers.d.ts.map +1 -0
- package/lib/server/handlers/statsHandlers.js +81 -0
- package/lib/server/handlers/statsHandlers.js.map +1 -0
- package/lib/server/index.d.ts +4 -0
- package/lib/server/index.d.ts.map +1 -0
- package/lib/server/index.js +23 -0
- package/lib/server/index.js.map +1 -0
- package/lib/storage/FileStorage.d.ts +92 -0
- package/lib/storage/FileStorage.d.ts.map +1 -0
- package/lib/storage/FileStorage.js +3 -0
- package/lib/storage/FileStorage.js.map +1 -0
- package/lib/storage/FileStorageUtilities.d.ts +36 -0
- package/lib/storage/FileStorageUtilities.d.ts.map +1 -0
- package/lib/storage/FileStorageUtilities.js +91 -0
- package/lib/storage/FileStorageUtilities.js.map +1 -0
- package/lib/storage/FileStorageUtilities.spec.d.ts +2 -0
- package/lib/storage/FileStorageUtilities.spec.d.ts.map +1 -0
- package/lib/storage/FileStorageUtilities.spec.js +98 -0
- package/lib/storage/FileStorageUtilities.spec.js.map +1 -0
- package/lib/storage/FileType.d.ts +29 -0
- package/lib/storage/FileType.d.ts.map +1 -0
- package/lib/storage/FileType.js +38 -0
- package/lib/storage/FileType.js.map +1 -0
- package/lib/storage/IndexedDBStorage.d.ts +47 -0
- package/lib/storage/IndexedDBStorage.d.ts.map +1 -0
- package/lib/storage/IndexedDBStorage.js +347 -0
- package/lib/storage/IndexedDBStorage.js.map +1 -0
- package/lib/storage/LocalFileStorage.browser.d.ts +19 -0
- package/lib/storage/LocalFileStorage.browser.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.browser.js +43 -0
- package/lib/storage/LocalFileStorage.browser.js.map +1 -0
- package/lib/storage/LocalFileStorage.d.ts +23 -0
- package/lib/storage/LocalFileStorage.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.js +152 -0
- package/lib/storage/LocalFileStorage.js.map +1 -0
- package/lib/storage/LocalFileStorage.spec.d.ts +2 -0
- package/lib/storage/LocalFileStorage.spec.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.spec.js +249 -0
- package/lib/storage/LocalFileStorage.spec.js.map +1 -0
- package/lib/storage/VirtualFileStorage.d.ts +18 -0
- package/lib/storage/VirtualFileStorage.d.ts.map +1 -0
- package/lib/storage/VirtualFileStorage.js +178 -0
- package/lib/storage/VirtualFileStorage.js.map +1 -0
- package/lib/storage/VirtualFileStorage.spec.d.ts +2 -0
- package/lib/storage/VirtualFileStorage.spec.d.ts.map +1 -0
- package/lib/storage/VirtualFileStorage.spec.js +302 -0
- package/lib/storage/VirtualFileStorage.spec.js.map +1 -0
- package/lib/storage/index.d.ts +6 -0
- package/lib/storage/index.d.ts.map +1 -0
- package/lib/storage/index.js +22 -0
- package/lib/storage/index.js.map +1 -0
- package/lib/templates/templates/csharp/README.md +48 -0
- package/lib/templates/templates/csharp/VectraClient.cs +234 -0
- package/lib/templates/templates/go/README.md +71 -0
- package/lib/templates/templates/go/vectra_client.go +322 -0
- package/lib/templates/templates/java/README.md +81 -0
- package/lib/templates/templates/java/VectraClient.java +232 -0
- package/lib/templates/templates/python/README.md +37 -0
- package/lib/templates/templates/python/vectra_client.py +279 -0
- package/lib/templates/templates/rust/Cargo.toml +14 -0
- package/lib/templates/templates/rust/README.md +39 -0
- package/lib/templates/templates/rust/build.rs +4 -0
- package/lib/templates/templates/rust/lib.rs +284 -0
- package/lib/templates/templates/typescript/README.md +96 -0
- package/lib/templates/templates/typescript/VectraClient.ts +374 -0
- package/lib/templates/typescript/VectraClient.d.ts +114 -0
- package/lib/templates/typescript/VectraClient.d.ts.map +1 -0
- package/lib/templates/typescript/VectraClient.js +328 -0
- package/lib/templates/typescript/VectraClient.js.map +1 -0
- package/lib/types.d.ts +153 -0
- package/lib/types.d.ts.map +1 -0
- package/lib/types.js +3 -0
- package/lib/types.js.map +1 -0
- package/lib/utils/index.d.ts +2 -0
- package/lib/utils/index.d.ts.map +1 -0
- package/lib/utils/index.js +18 -0
- package/lib/utils/index.js.map +1 -0
- package/lib/utils/pathUtils.d.ts +40 -0
- package/lib/utils/pathUtils.d.ts.map +1 -0
- package/lib/utils/pathUtils.js +98 -0
- package/lib/utils/pathUtils.js.map +1 -0
- package/lib/vectra-cli.d.ts +2 -0
- package/lib/vectra-cli.d.ts.map +1 -1
- package/lib/vectra-cli.generate.spec.d.ts +2 -0
- package/lib/vectra-cli.generate.spec.d.ts.map +1 -0
- package/lib/vectra-cli.generate.spec.js +112 -0
- package/lib/vectra-cli.generate.spec.js.map +1 -0
- package/lib/vectra-cli.js +760 -0
- package/lib/vectra-cli.js.map +1 -0
- package/lib/vectra-cli.spec.d.ts +1 -0
- package/lib/vectra-cli.spec.d.ts.map +1 -0
- package/lib/vectra-cli.spec.js +2 -0
- package/lib/vectra-cli.spec.js.map +1 -0
- package/package.json +91 -16
- package/proto/vectra_service.proto +276 -0
- package/src/BrowserWebFetcher.ts +345 -0
- package/src/FileFetcher.spec.ts +234 -0
- package/src/FileFetcher.ts +37 -25
- package/src/FolderWatcher.spec.ts +288 -0
- package/src/FolderWatcher.ts +304 -0
- package/src/GPT3Tokenizer.spec.ts +50 -0
- package/src/ItemSelector.spec.ts +252 -0
- package/src/ItemSelector.ts +163 -150
- package/src/LocalDocument.spec.ts +211 -0
- package/src/LocalDocument.ts +88 -94
- package/src/LocalDocumentIndex.spec.ts +481 -0
- package/src/LocalDocumentIndex.ts +39 -40
- package/src/LocalDocumentResult.spec.ts +373 -0
- package/src/LocalDocumentResult.ts +489 -319
- package/src/LocalEmbeddings.spec.ts +138 -0
- package/src/LocalEmbeddings.ts +120 -0
- package/src/LocalIndex.spec.ts +808 -66
- package/src/LocalIndex.ts +479 -429
- package/src/OpenAIEmbeddings.spec.ts +354 -0
- package/src/OpenAIEmbeddings.ts +26 -27
- package/src/TextSplitter.spec.ts +342 -0
- package/src/TextSplitter.ts +517 -532
- package/src/TransformersEmbeddings.spec.ts +188 -0
- package/src/TransformersEmbeddings.ts +232 -0
- package/src/TransformersTokenizer.spec.ts +143 -0
- package/src/TransformersTokenizer.ts +45 -0
- package/src/WebFetcher.spec.ts +288 -0
- package/src/WebFetcher.ts +184 -186
- package/src/browser.ts +69 -0
- package/src/codecs/IndexCodec.ts +40 -0
- package/src/codecs/JsonCodec.spec.ts +70 -0
- package/src/codecs/JsonCodec.ts +37 -0
- package/src/codecs/LocalIndex.protobuf.spec.ts +115 -0
- package/src/codecs/ProtobufCodec.spec.ts +166 -0
- package/src/codecs/ProtobufCodec.ts +193 -0
- package/src/codecs/index.ts +4 -0
- package/src/codecs/migrateIndex.spec.ts +176 -0
- package/src/codecs/migrateIndex.ts +125 -0
- package/src/codecs/schemas/index.proto +34 -0
- package/src/index.ts +9 -1
- package/src/internals/Colorize.ts +19 -16
- package/src/server/IndexManager.ts +243 -0
- package/src/server/VectraServer.spec.ts +303 -0
- package/src/server/VectraServer.ts +156 -0
- package/src/server/handlers/documentHandlers.ts +59 -0
- package/src/server/handlers/helpers.ts +93 -0
- package/src/server/handlers/index.ts +7 -0
- package/src/server/handlers/indexHandlers.ts +44 -0
- package/src/server/handlers/itemHandlers.ts +140 -0
- package/src/server/handlers/lifecycleHandlers.ts +26 -0
- package/src/server/handlers/queryHandlers.ts +96 -0
- package/src/server/handlers/statsHandlers.ts +38 -0
- package/src/server/index.ts +3 -0
- package/src/storage/FileStorage.ts +105 -0
- package/src/storage/FileStorageUtilities.spec.ts +106 -0
- package/src/storage/FileStorageUtilities.ts +77 -0
- package/src/storage/FileType.ts +61 -0
- package/src/storage/IndexedDBStorage.ts +365 -0
- package/src/storage/LocalFileStorage.browser.ts +52 -0
- package/src/storage/LocalFileStorage.spec.ts +292 -0
- package/src/storage/LocalFileStorage.ts +98 -0
- package/src/storage/VirtualFileStorage.spec.ts +307 -0
- package/src/storage/VirtualFileStorage.ts +169 -0
- package/src/storage/index.ts +5 -0
- package/src/templates/csharp/README.md +48 -0
- package/src/templates/csharp/VectraClient.cs +234 -0
- package/src/templates/go/README.md +71 -0
- package/src/templates/go/vectra_client.go +322 -0
- package/src/templates/java/README.md +81 -0
- package/src/templates/java/VectraClient.java +232 -0
- package/src/templates/python/README.md +37 -0
- package/src/templates/python/vectra_client.py +279 -0
- package/src/templates/rust/Cargo.toml +14 -0
- package/src/templates/rust/README.md +39 -0
- package/src/templates/rust/build.rs +4 -0
- package/src/templates/rust/lib.rs +284 -0
- package/src/templates/typescript/README.md +96 -0
- package/src/templates/typescript/VectraClient.ts +374 -0
- package/src/types.ts +131 -123
- package/src/utils/index.ts +1 -0
- package/src/utils/pathUtils.ts +106 -0
- package/src/vectra-cli.generate.spec.ts +72 -0
- package/src/vectra-cli.spec.ts +0 -0
- package/src/vectra-cli.ts +687 -246
package/src/vectra-cli.ts
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import * as fs from 'fs/promises';
|
|
2
|
+
import * as fsSync from 'fs';
|
|
3
|
+
import * as path from 'path';
|
|
2
4
|
import yargs from "yargs/yargs";
|
|
3
5
|
import { hideBin } from "yargs/helpers";
|
|
4
6
|
import { LocalDocumentIndex } from "./LocalDocumentIndex";
|
|
@@ -6,272 +8,711 @@ import { WebFetcher } from './WebFetcher';
|
|
|
6
8
|
import { AzureOpenAIEmbeddingsOptions, OSSEmbeddingsOptions, OpenAIEmbeddings, OpenAIEmbeddingsOptions } from './OpenAIEmbeddings';
|
|
7
9
|
import { Colorize } from './internals';
|
|
8
10
|
import { FileFetcher } from './FileFetcher';
|
|
11
|
+
import { LocalFileStorage } from './storage/LocalFileStorage';
|
|
12
|
+
import { VirtualFileStorage } from './storage/VirtualFileStorage';
|
|
13
|
+
import { IndexCodec, JsonCodec, ProtobufCodec, detectCodec, migrateIndex, FormatName } from './codecs';
|
|
14
|
+
import { VectraServer } from './server/VectraServer';
|
|
15
|
+
import { FolderWatcher } from './FolderWatcher';
|
|
16
|
+
|
|
17
|
+
function getStorage(args: any) {
|
|
18
|
+
if (args.storage === 'virtual') {
|
|
19
|
+
return new VirtualFileStorage();
|
|
20
|
+
} else {
|
|
21
|
+
return new LocalFileStorage(args.storageRoot);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function getCodecFromFormat(format?: string): IndexCodec | undefined {
|
|
26
|
+
if (format === 'protobuf') return new ProtobufCodec();
|
|
27
|
+
if (format === 'json') return new JsonCodec();
|
|
28
|
+
return undefined; // default
|
|
29
|
+
}
|
|
9
30
|
|
|
10
31
|
export async function run() {
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
32
|
+
// prettier-ignore
|
|
33
|
+
const args = await yargs(hideBin(process.argv))
|
|
34
|
+
.scriptName('vectra')
|
|
35
|
+
.option('storage', {
|
|
36
|
+
describe: 'storage backend to use',
|
|
37
|
+
choices: ['local', 'virtual'],
|
|
38
|
+
default: 'local'
|
|
39
|
+
})
|
|
40
|
+
.option('storage-root', {
|
|
41
|
+
describe: 'root folder for local storage (only applies if storage=local)',
|
|
42
|
+
type: 'string'
|
|
43
|
+
})
|
|
44
|
+
.command('create <index>', `create a new local index`, (yargs) => {
|
|
45
|
+
return yargs.option('format', {
|
|
46
|
+
describe: 'serialization format for the index',
|
|
47
|
+
choices: ['json', 'protobuf'] as const,
|
|
48
|
+
default: 'json' as const
|
|
49
|
+
});
|
|
50
|
+
}, async (args) => {
|
|
51
|
+
const folderPath = args.index as string;
|
|
52
|
+
const storage = getStorage(args);
|
|
53
|
+
const codec = getCodecFromFormat(args.format);
|
|
54
|
+
const index = new LocalDocumentIndex({ folderPath, storage, codec });
|
|
55
|
+
const formatLabel = args.format === 'protobuf' ? 'protobuf' : 'json';
|
|
56
|
+
console.log(Colorize.output(`creating ${formatLabel} index at ${folderPath}`));
|
|
57
|
+
await index.createIndex({ version: 1, deleteIfExists: true });
|
|
58
|
+
})
|
|
59
|
+
.command('delete <index>', `delete an existing local index`, {}, async (args) => {
|
|
60
|
+
const folderPath = args.index as string;
|
|
61
|
+
console.log(Colorize.output(`deleting index at ${folderPath}`));
|
|
62
|
+
const storage = getStorage(args);
|
|
63
|
+
const codec = await detectCodec(folderPath, storage).catch(() => undefined);
|
|
64
|
+
const index = new LocalDocumentIndex({ folderPath, storage, codec });
|
|
65
|
+
await index.deleteIndex();
|
|
66
|
+
})
|
|
67
|
+
.command('add <index>', `adds one or more web pages to an index`, (yargs) => {
|
|
68
|
+
return yargs
|
|
69
|
+
.option('keys', {
|
|
70
|
+
alias: 'k',
|
|
71
|
+
describe: 'path of a JSON file containing the model keys to use for generating embeddings',
|
|
72
|
+
type: 'string'
|
|
73
|
+
})
|
|
74
|
+
.option('uri', {
|
|
75
|
+
alias: 'u',
|
|
76
|
+
array: true,
|
|
77
|
+
describe: 'http/https link to a web page to add',
|
|
78
|
+
type: 'string'
|
|
79
|
+
})
|
|
80
|
+
.option('list', {
|
|
81
|
+
alias: 'l',
|
|
82
|
+
describe: 'path to a file containing a list of web pages to add',
|
|
83
|
+
type: 'string'
|
|
84
|
+
})
|
|
85
|
+
.option('cookie', {
|
|
86
|
+
alias: 'c',
|
|
87
|
+
describe: 'optional cookies to add to web fetch requests',
|
|
88
|
+
type: 'string'
|
|
89
|
+
})
|
|
90
|
+
.option('chunk-size', {
|
|
91
|
+
alias: 'cs',
|
|
92
|
+
describe: 'size of the generated chunks in tokens (defaults to 512)',
|
|
93
|
+
type: 'number',
|
|
94
|
+
default: 512
|
|
95
|
+
})
|
|
96
|
+
.check((argv) => {
|
|
97
|
+
if (Array.isArray(argv.uri) && argv.uri.length > 0) {
|
|
98
|
+
return true;
|
|
99
|
+
} else if (typeof argv.list == 'string' && argv.list.trim().length > 0) {
|
|
100
|
+
return true;
|
|
101
|
+
} else {
|
|
102
|
+
throw new Error(`you must specify either one or more "--uri <link>" for the pages to add or a "--list <file path>" for a file containing the list of pages to add.`);
|
|
103
|
+
}
|
|
104
|
+
})
|
|
105
|
+
.demandOption(['keys']);
|
|
106
|
+
}, async (args) => {
|
|
107
|
+
console.log(Colorize.title('Adding Web Pages to Index'));
|
|
108
|
+
// Get embedding options
|
|
109
|
+
const options: OpenAIEmbeddingsOptions | AzureOpenAIEmbeddingsOptions | OSSEmbeddingsOptions = JSON.parse(await fs.readFile(args.keys as string, 'utf-8'));
|
|
110
|
+
if ((options as OpenAIEmbeddingsOptions).apiKey && !(options as OpenAIEmbeddingsOptions).model) {
|
|
111
|
+
(options as OpenAIEmbeddingsOptions).model = 'text-embedding-ada-002';
|
|
112
|
+
(options as OpenAIEmbeddingsOptions).maxTokens = 8000;
|
|
113
|
+
}
|
|
114
|
+
// Create embeddings
|
|
115
|
+
const embeddings = new OpenAIEmbeddings(options);
|
|
116
|
+
// Initialize index
|
|
117
|
+
const folderPath = args.index as string;
|
|
118
|
+
const storage = getStorage(args);
|
|
119
|
+
const codec = await detectCodec(folderPath, storage).catch(() => undefined);
|
|
120
|
+
const index = new LocalDocumentIndex({
|
|
121
|
+
folderPath,
|
|
122
|
+
embeddings,
|
|
123
|
+
chunkingConfig: {
|
|
124
|
+
chunkSize: args.chunkSize
|
|
125
|
+
},
|
|
126
|
+
storage,
|
|
127
|
+
codec
|
|
128
|
+
});
|
|
129
|
+
// Get list of url's
|
|
130
|
+
const uris = await getItemList(args.uri as string[], args.list as string, 'web page');
|
|
131
|
+
// Fetch documents
|
|
132
|
+
const fileFetcher = new FileFetcher();
|
|
133
|
+
const webFetcher = args.cookie ? new WebFetcher({ headers: { "cookie": args.cookie } }) : new WebFetcher();
|
|
134
|
+
for (const path of uris) {
|
|
135
|
+
try {
|
|
136
|
+
console.log(Colorize.progress(`fetching ${path}`));
|
|
137
|
+
const fetcher = path.startsWith('http') ? webFetcher : fileFetcher;
|
|
138
|
+
await fetcher.fetch(path, async (uri, text, docType) => {
|
|
139
|
+
console.log(Colorize.replaceLine(Colorize.progress(`indexing ${uri}`)));
|
|
140
|
+
await index.upsertDocument(uri, text, docType);
|
|
141
|
+
console.log(Colorize.replaceLine(Colorize.success(`added ${uri}`)));
|
|
142
|
+
return true;
|
|
143
|
+
});
|
|
144
|
+
} catch (err: unknown) {
|
|
145
|
+
console.log(Colorize.replaceLine(Colorize.error(`Error adding: ${path}\n${(err as Error).message}`)));
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
})
|
|
149
|
+
.command('remove <index>', `removes one or more documents from an index`, (yargs) => {
|
|
150
|
+
return yargs
|
|
151
|
+
.option('uri', {
|
|
152
|
+
alias: 'u',
|
|
153
|
+
array: true,
|
|
154
|
+
describe: 'uri of a document to remove',
|
|
155
|
+
type: 'string'
|
|
156
|
+
})
|
|
157
|
+
.option('list', {
|
|
158
|
+
alias: 'l',
|
|
159
|
+
describe: 'path to a file containing a list of documents to remove',
|
|
160
|
+
type: 'string'
|
|
161
|
+
})
|
|
162
|
+
.check((argv) => {
|
|
163
|
+
if (Array.isArray(argv.uri) && argv.uri.length > 0) {
|
|
164
|
+
return true;
|
|
165
|
+
} else if (typeof argv.list == 'string' && argv.list.trim().length > 0) {
|
|
166
|
+
return true;
|
|
167
|
+
} else {
|
|
168
|
+
throw new Error(`you must specify either one or more "--uri <link>" for the pages to add or a "--list <file path>" for a file containing the list of pages to add.`);
|
|
169
|
+
}
|
|
170
|
+
});
|
|
171
|
+
}, async (args) => {
|
|
172
|
+
// Initialize index
|
|
173
|
+
const folderPath = args.index as string;
|
|
174
|
+
const storage = getStorage(args);
|
|
175
|
+
const codec = await detectCodec(folderPath, storage).catch(() => undefined);
|
|
176
|
+
const index = new LocalDocumentIndex({ folderPath, storage, codec });
|
|
177
|
+
// Get list of uri's
|
|
178
|
+
const uris = await getItemList(args.uri as string[], args.list as string, 'document');
|
|
179
|
+
// Remove documents
|
|
180
|
+
for (const uri of uris) {
|
|
181
|
+
console.log(`removing ${uri}`);
|
|
182
|
+
await index.deleteDocument(uri);
|
|
183
|
+
}
|
|
184
|
+
})
|
|
185
|
+
.command('stats <index>', `prints the stats for a local index`, (yargs) => {
|
|
186
|
+
return yargs;
|
|
187
|
+
}, async (args) => {
|
|
188
|
+
const folderPath = args.index as string;
|
|
189
|
+
const storage = getStorage(args);
|
|
190
|
+
// Auto-detect format from files on disk
|
|
191
|
+
const codec = await detectCodec(folderPath, storage);
|
|
192
|
+
const index = new LocalDocumentIndex({ folderPath, storage, codec });
|
|
193
|
+
const stats = await index.getCatalogStats();
|
|
194
|
+
console.log(Colorize.title('Index Stats'));
|
|
195
|
+
console.log(Colorize.output(stats));
|
|
196
|
+
})
|
|
197
|
+
.command('migrate <index>', `migrate an index between serialization formats`, (yargs) => {
|
|
198
|
+
return yargs.option('to', {
|
|
199
|
+
describe: 'target format',
|
|
200
|
+
choices: ['json', 'protobuf'] as const,
|
|
201
|
+
demandOption: true
|
|
202
|
+
});
|
|
203
|
+
}, async (args) => {
|
|
204
|
+
const folderPath = args.index as string;
|
|
205
|
+
const storage = getStorage(args);
|
|
206
|
+
const to = args.to as FormatName;
|
|
207
|
+
console.log(Colorize.output(`migrating index at ${folderPath} to ${to} format`));
|
|
208
|
+
await migrateIndex(folderPath, { to, storage });
|
|
209
|
+
console.log(Colorize.output(`migration complete`));
|
|
210
|
+
})
|
|
211
|
+
.command('query <index> <query>', `queries a local index`, (yargs) => {
|
|
212
|
+
return yargs
|
|
213
|
+
.option('keys', {
|
|
214
|
+
alias: 'k',
|
|
215
|
+
describe: 'path of a JSON file containing the model keys to use for generating embeddings'
|
|
216
|
+
})
|
|
217
|
+
.option('document-count', {
|
|
218
|
+
alias: 'dc',
|
|
219
|
+
describe: 'max number of documents to return (defaults to 10)',
|
|
220
|
+
type: 'number',
|
|
221
|
+
default: 10
|
|
222
|
+
})
|
|
223
|
+
.option('chunk-count', {
|
|
224
|
+
alias: 'cc',
|
|
225
|
+
describe: 'max number of chunks to return (defaults to 50)',
|
|
226
|
+
type: 'number',
|
|
227
|
+
default: 50
|
|
228
|
+
})
|
|
229
|
+
.option('section-count', {
|
|
230
|
+
alias: 'sc',
|
|
231
|
+
describe: 'max number of document sections to render (defaults to 1)',
|
|
232
|
+
type: 'number',
|
|
233
|
+
default: 1
|
|
234
|
+
})
|
|
235
|
+
.option('tokens', {
|
|
236
|
+
alias: 't',
|
|
237
|
+
describe: 'max number of tokens to render for each document section (defaults to 2000)',
|
|
238
|
+
type: 'number',
|
|
239
|
+
default: 2000
|
|
240
|
+
})
|
|
241
|
+
.option('format', {
|
|
242
|
+
alias: 'f',
|
|
243
|
+
describe: `format of the rendered results. Defaults to 'sections'`,
|
|
244
|
+
choices: ['sections', 'stats', 'chunks'],
|
|
245
|
+
default: 'sections'
|
|
246
|
+
})
|
|
247
|
+
.option('overlap', {
|
|
248
|
+
alias: 'o',
|
|
249
|
+
describe: `whether to add overlapping chunks to sections.`,
|
|
250
|
+
type: 'boolean',
|
|
251
|
+
default: true
|
|
252
|
+
})
|
|
253
|
+
.option('bm25', {
|
|
254
|
+
alias: 'b',
|
|
255
|
+
describe: 'Use Okapi-bm25 keyword search alogrithm to perform hybrid search - semantic + keyword. Displayed in blue during search.',
|
|
256
|
+
type: 'boolean',
|
|
257
|
+
default: false
|
|
258
|
+
})
|
|
259
|
+
.demandOption(['keys']);
|
|
260
|
+
}, async (args) => {
|
|
261
|
+
console.log(Colorize.title('Querying Index'));
|
|
262
|
+
// Get embedding options
|
|
263
|
+
const options: OpenAIEmbeddingsOptions | AzureOpenAIEmbeddingsOptions | OSSEmbeddingsOptions = JSON.parse(await fs.readFile(args.keys as string, 'utf-8'));
|
|
264
|
+
if ((options as OpenAIEmbeddingsOptions).apiKey && !(options as OpenAIEmbeddingsOptions).model) {
|
|
265
|
+
(options as OpenAIEmbeddingsOptions).model = 'text-embedding-ada-002';
|
|
266
|
+
(options as OpenAIEmbeddingsOptions).maxTokens = 8000;
|
|
267
|
+
}
|
|
268
|
+
// Create embeddings
|
|
269
|
+
const embeddings = new OpenAIEmbeddings(options);
|
|
270
|
+
// Initialize index
|
|
271
|
+
const folderPath = args.index as string;
|
|
272
|
+
const storage = getStorage(args);
|
|
273
|
+
const codec = await detectCodec(folderPath, storage).catch(() => undefined);
|
|
274
|
+
const index = new LocalDocumentIndex({
|
|
275
|
+
folderPath,
|
|
276
|
+
embeddings,
|
|
277
|
+
storage,
|
|
278
|
+
codec
|
|
279
|
+
});
|
|
280
|
+
// Query index
|
|
281
|
+
const query = args.query as string;
|
|
282
|
+
const results = await index.queryDocuments(query, {
|
|
283
|
+
maxDocuments: args.documentCount,
|
|
284
|
+
maxChunks: args.chunkCount,
|
|
285
|
+
isBm25: args.bm25 as boolean,
|
|
286
|
+
});
|
|
287
|
+
// Render results
|
|
288
|
+
for (const result of results) {
|
|
289
|
+
console.log(Colorize.output(result.uri));
|
|
290
|
+
console.log(Colorize.value('score', result.score));
|
|
291
|
+
console.log(Colorize.value('chunks', result.chunks.length));
|
|
292
|
+
if (args.format == 'sections') {
|
|
293
|
+
const sections = await result.renderSections(args.tokens, args.sectionCount, args.overlap);
|
|
294
|
+
console.log(sections.length);
|
|
295
|
+
for (let i = 0; i < sections.length; i++) {
|
|
296
|
+
const section = sections[i];
|
|
297
|
+
const isBm25 = sections[i].isBm25;
|
|
298
|
+
console.log(isBm25);
|
|
299
|
+
console.log(Colorize.title(args.sectionCount == 1 ? 'Section' : `Section ${i + 1}`));
|
|
300
|
+
console.log(Colorize.value('score', section.score));
|
|
301
|
+
console.log(Colorize.value('tokens', section.tokenCount));
|
|
302
|
+
console.log(Colorize.output(section.text, isBm25));
|
|
303
|
+
}
|
|
304
|
+
} else if (args.format == 'chunks') {
|
|
305
|
+
const text = await result.loadText();
|
|
306
|
+
for (let i = 0; i < result.chunks.length; i++) {
|
|
307
|
+
const chunk = result.chunks[i];
|
|
308
|
+
const startPos = chunk.item.metadata.startPos;
|
|
309
|
+
const endPos = chunk.item.metadata.endPos;
|
|
310
|
+
const isBm25 = Boolean(chunk.item.metadata.isBm25);
|
|
311
|
+
console.log(Colorize.title(`Chunk ${i + 1}`));
|
|
312
|
+
console.log(Colorize.value('score', chunk.score));
|
|
313
|
+
console.log(Colorize.value('startPos', startPos));
|
|
314
|
+
console.log(Colorize.value('endPos', endPos));
|
|
315
|
+
console.log(Colorize.output(text.substring(startPos, endPos + 1), isBm25));
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
})
|
|
320
|
+
.command('watch <index>', 'watch folders and automatically sync file changes into the index', (yargs) => {
|
|
321
|
+
return yargs
|
|
322
|
+
.option('keys', {
|
|
323
|
+
alias: 'k',
|
|
324
|
+
describe: 'path of a JSON file containing the model keys to use for generating embeddings',
|
|
325
|
+
type: 'string'
|
|
326
|
+
})
|
|
327
|
+
.option('uri', {
|
|
328
|
+
alias: 'u',
|
|
329
|
+
array: true,
|
|
330
|
+
describe: 'folder or file path to watch',
|
|
331
|
+
type: 'string'
|
|
332
|
+
})
|
|
333
|
+
.option('list', {
|
|
334
|
+
alias: 'l',
|
|
335
|
+
describe: 'path to a file containing a list of folders/files to watch',
|
|
336
|
+
type: 'string'
|
|
19
337
|
})
|
|
20
|
-
.
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
338
|
+
.option('extensions', {
|
|
339
|
+
alias: 'e',
|
|
340
|
+
array: true,
|
|
341
|
+
describe: 'file extensions to include (e.g., .txt .md .html)',
|
|
342
|
+
type: 'string'
|
|
25
343
|
})
|
|
26
|
-
.
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
344
|
+
.option('chunk-size', {
|
|
345
|
+
alias: 'cs',
|
|
346
|
+
describe: 'size of the generated chunks in tokens (defaults to 512)',
|
|
347
|
+
type: 'number',
|
|
348
|
+
default: 512
|
|
349
|
+
})
|
|
350
|
+
.option('debounce', {
|
|
351
|
+
describe: 'debounce interval in milliseconds (defaults to 500)',
|
|
352
|
+
type: 'number',
|
|
353
|
+
default: 500
|
|
354
|
+
})
|
|
355
|
+
.check((argv) => {
|
|
356
|
+
if (Array.isArray(argv.uri) && argv.uri.length > 0) {
|
|
357
|
+
return true;
|
|
358
|
+
} else if (typeof argv.list == 'string' && argv.list.trim().length > 0) {
|
|
359
|
+
return true;
|
|
360
|
+
} else {
|
|
361
|
+
throw new Error(`you must specify either one or more "--uri <path>" for the folders/files to watch or a "--list <file path>" for a file containing the paths.`);
|
|
362
|
+
}
|
|
363
|
+
})
|
|
364
|
+
.demandOption(['keys']);
|
|
365
|
+
}, async (args) => {
|
|
366
|
+
console.log(Colorize.title('Vectra Watch Mode'));
|
|
367
|
+
|
|
368
|
+
// Get embedding options
|
|
369
|
+
const options: OpenAIEmbeddingsOptions | AzureOpenAIEmbeddingsOptions | OSSEmbeddingsOptions = JSON.parse(await fs.readFile(args.keys as string, 'utf-8'));
|
|
370
|
+
if ((options as OpenAIEmbeddingsOptions).apiKey && !(options as OpenAIEmbeddingsOptions).model) {
|
|
371
|
+
(options as OpenAIEmbeddingsOptions).model = 'text-embedding-ada-002';
|
|
372
|
+
(options as OpenAIEmbeddingsOptions).maxTokens = 8000;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
// Create embeddings
|
|
376
|
+
const embeddings = new OpenAIEmbeddings(options);
|
|
377
|
+
|
|
378
|
+
// Initialize index
|
|
379
|
+
const folderPath = args.index as string;
|
|
380
|
+
const storage = getStorage(args);
|
|
381
|
+
const codec = await detectCodec(folderPath, storage).catch(() => undefined);
|
|
382
|
+
const index = new LocalDocumentIndex({
|
|
383
|
+
folderPath,
|
|
384
|
+
embeddings,
|
|
385
|
+
chunkingConfig: {
|
|
386
|
+
chunkSize: args.chunkSize
|
|
387
|
+
},
|
|
388
|
+
storage,
|
|
389
|
+
codec
|
|
390
|
+
});
|
|
67
391
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
if ((options as OpenAIEmbeddingsOptions).apiKey && !(options as OpenAIEmbeddingsOptions).model) {
|
|
71
|
-
(options as OpenAIEmbeddingsOptions).model = 'text-embedding-ada-002';
|
|
72
|
-
(options as OpenAIEmbeddingsOptions).maxTokens = 8000;
|
|
73
|
-
}
|
|
392
|
+
// Get list of paths to watch
|
|
393
|
+
const watchPaths = await getItemList(args.uri as string[], args.list as string, 'path');
|
|
74
394
|
|
|
75
|
-
|
|
76
|
-
|
|
395
|
+
// Create watcher
|
|
396
|
+
const watcher = new FolderWatcher({
|
|
397
|
+
index,
|
|
398
|
+
paths: watchPaths,
|
|
399
|
+
extensions: args.extensions as string[] | undefined,
|
|
400
|
+
debounceMs: args.debounce
|
|
401
|
+
});
|
|
77
402
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
403
|
+
// Wire up events
|
|
404
|
+
watcher.on('sync', (uri: string, action: string) => {
|
|
405
|
+
if (action === 'deleted') {
|
|
406
|
+
console.log(Colorize.warning(`removed ${uri}`));
|
|
407
|
+
} else {
|
|
408
|
+
console.log(Colorize.success(`${action} ${uri}`));
|
|
409
|
+
}
|
|
410
|
+
});
|
|
411
|
+
watcher.on('error', (err: Error, uri: string) => {
|
|
412
|
+
console.log(Colorize.error(`Error syncing ${uri}: ${err.message}`));
|
|
413
|
+
});
|
|
87
414
|
|
|
88
|
-
|
|
89
|
-
|
|
415
|
+
// Start watching
|
|
416
|
+
console.log(Colorize.progress(`performing initial sync...`));
|
|
417
|
+
await watcher.start();
|
|
418
|
+
console.log(Colorize.success(`initial sync complete (${watcher.trackedFileCount} files tracked)`));
|
|
419
|
+
console.log(Colorize.output(`watching for changes... (press Ctrl+C to stop)`));
|
|
90
420
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
}
|
|
421
|
+
// Handle graceful shutdown
|
|
422
|
+
const handleSignal = async () => {
|
|
423
|
+
console.log(Colorize.output('\nStopping watcher...'));
|
|
424
|
+
await watcher.stop();
|
|
425
|
+
process.exit(0);
|
|
426
|
+
};
|
|
427
|
+
process.on('SIGINT', handleSignal);
|
|
428
|
+
process.on('SIGTERM', handleSignal);
|
|
429
|
+
})
|
|
430
|
+
.command('generate', 'generate language bindings for the gRPC service', (yargs) => {
|
|
431
|
+
return yargs
|
|
432
|
+
.option('language', {
|
|
433
|
+
alias: 'l',
|
|
434
|
+
describe: 'target language for the generated bindings',
|
|
435
|
+
choices: ['python', 'csharp', 'rust', 'go', 'java', 'typescript'] as const,
|
|
436
|
+
demandOption: true
|
|
108
437
|
})
|
|
109
|
-
.
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
438
|
+
.option('output', {
|
|
439
|
+
alias: 'o',
|
|
440
|
+
describe: 'output directory for the generated files',
|
|
441
|
+
type: 'string',
|
|
442
|
+
demandOption: true
|
|
443
|
+
});
|
|
444
|
+
}, async (args) => {
|
|
445
|
+
const language = args.language as string;
|
|
446
|
+
const outputDir = path.resolve(args.output as string);
|
|
447
|
+
|
|
448
|
+
// Locate the proto file — check lib/ first (installed package), then project root
|
|
449
|
+
const protoSearchPaths = [
|
|
450
|
+
path.join(__dirname, '..', 'proto', 'vectra_service.proto'),
|
|
451
|
+
path.join(__dirname, '..', '..', 'proto', 'vectra_service.proto'),
|
|
452
|
+
];
|
|
453
|
+
let protoSource: string | undefined;
|
|
454
|
+
for (const p of protoSearchPaths) {
|
|
455
|
+
if (fsSync.existsSync(p)) {
|
|
456
|
+
protoSource = p;
|
|
457
|
+
break;
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
if (!protoSource) {
|
|
461
|
+
console.error(Colorize.error('Could not locate vectra_service.proto'));
|
|
462
|
+
process.exit(1);
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
// Locate the template directory
|
|
466
|
+
const templateSearchPaths = [
|
|
467
|
+
path.join(__dirname, '..', 'src', 'templates', language),
|
|
468
|
+
path.join(__dirname, 'templates', language),
|
|
469
|
+
];
|
|
470
|
+
let templateDir: string | undefined;
|
|
471
|
+
for (const p of templateSearchPaths) {
|
|
472
|
+
if (fsSync.existsSync(p)) {
|
|
473
|
+
templateDir = p;
|
|
474
|
+
break;
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
if (!templateDir) {
|
|
478
|
+
console.error(Colorize.error(`Could not locate template for language: ${language}`));
|
|
479
|
+
process.exit(1);
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
// Create output directory
|
|
483
|
+
await fs.mkdir(outputDir, { recursive: true });
|
|
484
|
+
|
|
485
|
+
// Copy proto file
|
|
486
|
+
const protoDest = path.join(outputDir, 'vectra_service.proto');
|
|
487
|
+
await fs.copyFile(protoSource, protoDest);
|
|
488
|
+
console.log(Colorize.success(`copied vectra_service.proto`));
|
|
489
|
+
|
|
490
|
+
// Copy all template files
|
|
491
|
+
const templateFiles = await fs.readdir(templateDir);
|
|
492
|
+
for (const file of templateFiles) {
|
|
493
|
+
const src = path.join(templateDir, file);
|
|
494
|
+
const stat = await fs.stat(src);
|
|
495
|
+
if (stat.isFile()) {
|
|
496
|
+
const dest = path.join(outputDir, file);
|
|
497
|
+
await fs.copyFile(src, dest);
|
|
498
|
+
console.log(Colorize.success(`copied ${file}`));
|
|
499
|
+
}
|
|
500
|
+
}
|
|
135
501
|
|
|
136
|
-
|
|
137
|
-
const uris = await getItemList(args.uri as string[], args.list as string, 'document');
|
|
502
|
+
console.log(Colorize.output(`\nGenerated ${language} bindings in ${outputDir}`));
|
|
138
503
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
504
|
+
// Print next steps
|
|
505
|
+
const nextSteps: Record<string, string> = {
|
|
506
|
+
python: [
|
|
507
|
+
'Next steps:',
|
|
508
|
+
' pip install grpcio grpcio-tools',
|
|
509
|
+
' python -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. vectra_service.proto',
|
|
510
|
+
].join('\n'),
|
|
511
|
+
csharp: [
|
|
512
|
+
'Next steps:',
|
|
513
|
+
' dotnet add package Grpc.Net.Client',
|
|
514
|
+
' dotnet add package Google.Protobuf',
|
|
515
|
+
' dotnet add package Grpc.Tools',
|
|
516
|
+
' Add <Protobuf Include="vectra_service.proto" GrpcServices="Client" /> to your .csproj',
|
|
517
|
+
].join('\n'),
|
|
518
|
+
rust: [
|
|
519
|
+
'Next steps:',
|
|
520
|
+
' Ensure protoc is installed (apt install protobuf-compiler / brew install protobuf)',
|
|
521
|
+
' cargo build (tonic-build generates stubs automatically)',
|
|
522
|
+
].join('\n'),
|
|
523
|
+
go: [
|
|
524
|
+
'Next steps:',
|
|
525
|
+
' go install google.golang.org/protobuf/cmd/protoc-gen-go@latest',
|
|
526
|
+
' go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest',
|
|
527
|
+
' protoc --go_out=. --go-grpc_out=. vectra_service.proto',
|
|
528
|
+
' Update the import path in vectra_client.go to match your module',
|
|
529
|
+
].join('\n'),
|
|
530
|
+
java: [
|
|
531
|
+
'Next steps:',
|
|
532
|
+
' Place vectra_service.proto in src/main/proto/',
|
|
533
|
+
' Add gRPC dependencies to your build tool (see README.md for Gradle/Maven)',
|
|
534
|
+
' Build to generate stubs automatically',
|
|
535
|
+
].join('\n'),
|
|
536
|
+
typescript: [
|
|
537
|
+
'Next steps:',
|
|
538
|
+
' npm install @grpc/grpc-js @grpc/proto-loader',
|
|
539
|
+
' No codegen needed — proto is loaded dynamically at runtime',
|
|
540
|
+
' import { VectraClient } from \'./VectraClient\';',
|
|
541
|
+
].join('\n'),
|
|
542
|
+
};
|
|
543
|
+
console.log(Colorize.output(nextSteps[language]));
|
|
544
|
+
})
|
|
545
|
+
.command('serve [index]', 'start the gRPC server to serve indexes', (yargs) => {
|
|
546
|
+
return yargs
|
|
547
|
+
.positional('index', {
|
|
548
|
+
describe: 'path to a single index directory (mutually exclusive with --root)',
|
|
549
|
+
type: 'string'
|
|
144
550
|
})
|
|
145
|
-
.
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
const stats = await index.getCatalogStats();
|
|
149
|
-
console.log(Colorize.title('Index Stats'));
|
|
150
|
-
console.log(Colorize.output(stats));
|
|
551
|
+
.option('root', {
|
|
552
|
+
describe: 'directory containing multiple index subdirectories',
|
|
553
|
+
type: 'string'
|
|
151
554
|
})
|
|
152
|
-
.
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
default: true
|
|
193
|
-
})
|
|
194
|
-
.option('bm25', {
|
|
195
|
-
alias: 'b',
|
|
196
|
-
describe: 'Use Okapi-bm25 keyword search alogrithm to perform hybrid search - semantic + keyword. Displayed in blue during search.',
|
|
197
|
-
type: 'boolean',
|
|
198
|
-
default: false
|
|
199
|
-
})
|
|
200
|
-
.demandOption(['keys']);
|
|
201
|
-
}, async (args) => {
|
|
202
|
-
console.log(Colorize.title('Querying Index'));
|
|
555
|
+
.option('port', {
|
|
556
|
+
alias: 'p',
|
|
557
|
+
describe: 'port to bind the gRPC server on',
|
|
558
|
+
type: 'number',
|
|
559
|
+
default: 50051
|
|
560
|
+
})
|
|
561
|
+
.option('daemon', {
|
|
562
|
+
describe: 'fork to background as a daemon process',
|
|
563
|
+
type: 'boolean',
|
|
564
|
+
default: false
|
|
565
|
+
})
|
|
566
|
+
.option('pid-file', {
|
|
567
|
+
describe: 'path to PID file (daemon mode only)',
|
|
568
|
+
type: 'string'
|
|
569
|
+
})
|
|
570
|
+
.option('keys', {
|
|
571
|
+
alias: 'k',
|
|
572
|
+
describe: 'path to a JSON file containing the model keys for embeddings',
|
|
573
|
+
type: 'string'
|
|
574
|
+
})
|
|
575
|
+
.check((argv) => {
|
|
576
|
+
if (!argv.index && !argv.root) {
|
|
577
|
+
throw new Error('You must provide either an <index> path or --root <dir>');
|
|
578
|
+
}
|
|
579
|
+
if (argv.index && argv.root) {
|
|
580
|
+
throw new Error('<index> and --root are mutually exclusive');
|
|
581
|
+
}
|
|
582
|
+
return true;
|
|
583
|
+
});
|
|
584
|
+
}, async (args) => {
|
|
585
|
+
// Load embeddings if keys provided
|
|
586
|
+
let embeddings: OpenAIEmbeddings | undefined;
|
|
587
|
+
if (args.keys) {
|
|
588
|
+
const options: OpenAIEmbeddingsOptions | AzureOpenAIEmbeddingsOptions | OSSEmbeddingsOptions = JSON.parse(await fs.readFile(args.keys as string, 'utf-8'));
|
|
589
|
+
if ((options as OpenAIEmbeddingsOptions).apiKey && !(options as OpenAIEmbeddingsOptions).model) {
|
|
590
|
+
(options as OpenAIEmbeddingsOptions).model = 'text-embedding-ada-002';
|
|
591
|
+
(options as OpenAIEmbeddingsOptions).maxTokens = 8000;
|
|
592
|
+
}
|
|
593
|
+
embeddings = new OpenAIEmbeddings(options);
|
|
594
|
+
}
|
|
203
595
|
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
596
|
+
const server = new VectraServer({
|
|
597
|
+
port: args.port,
|
|
598
|
+
indexPath: args.index as string | undefined,
|
|
599
|
+
rootDir: args.root as string | undefined,
|
|
600
|
+
embeddings,
|
|
601
|
+
});
|
|
210
602
|
|
|
211
|
-
|
|
212
|
-
|
|
603
|
+
if (args.daemon) {
|
|
604
|
+
// Daemon mode: fork a child process
|
|
605
|
+
const { spawn } = require('child_process');
|
|
606
|
+
const cliArgs = process.argv.slice(2).filter(a => a !== '--daemon');
|
|
607
|
+
const child = spawn(process.execPath, [process.argv[1], ...cliArgs], {
|
|
608
|
+
detached: true,
|
|
609
|
+
stdio: 'ignore',
|
|
610
|
+
});
|
|
611
|
+
child.unref();
|
|
213
612
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
613
|
+
// Write PID file
|
|
614
|
+
const pidFile = args.pidFile as string || path.join(
|
|
615
|
+
(args.root as string) || path.dirname(args.index as string),
|
|
616
|
+
'.vectra.pid'
|
|
617
|
+
);
|
|
618
|
+
await fs.writeFile(pidFile, String(child.pid));
|
|
619
|
+
console.log(Colorize.output(`Vectra server started as daemon (PID: ${child.pid})`));
|
|
620
|
+
console.log(Colorize.output(`PID file: ${pidFile}`));
|
|
621
|
+
process.exit(0);
|
|
622
|
+
} else {
|
|
623
|
+
// Foreground mode
|
|
624
|
+
const port = await server.start();
|
|
625
|
+
console.log(Colorize.output(`Vectra gRPC server listening on 127.0.0.1:${port}`));
|
|
220
626
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
627
|
+
const loaded = server.indexManager.listIndexes();
|
|
628
|
+
if (loaded.length > 0) {
|
|
629
|
+
console.log(Colorize.output(`Loaded indexes:`));
|
|
630
|
+
for (const idx of loaded) {
|
|
631
|
+
console.log(Colorize.output(` - ${idx.name} (${idx.format}, ${idx.isDocumentIndex ? 'document' : 'item'})`));
|
|
632
|
+
}
|
|
633
|
+
} else {
|
|
634
|
+
console.log(Colorize.output(`No indexes loaded yet. Use CreateIndex RPC or add index directories.`));
|
|
635
|
+
}
|
|
228
636
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
.
|
|
264
|
-
.
|
|
265
|
-
|
|
637
|
+
// Handle graceful shutdown
|
|
638
|
+
const handleSignal = async () => {
|
|
639
|
+
console.log(Colorize.output('\nShutting down...'));
|
|
640
|
+
await server.shutdown();
|
|
641
|
+
process.exit(0);
|
|
642
|
+
};
|
|
643
|
+
process.on('SIGINT', handleSignal);
|
|
644
|
+
process.on('SIGTERM', handleSignal);
|
|
645
|
+
}
|
|
646
|
+
})
|
|
647
|
+
.command('stop', 'stop a running Vectra daemon', (yargs) => {
|
|
648
|
+
return yargs.option('pid-file', {
|
|
649
|
+
describe: 'path to PID file',
|
|
650
|
+
type: 'string',
|
|
651
|
+
demandOption: true
|
|
652
|
+
});
|
|
653
|
+
}, async (args) => {
|
|
654
|
+
const pidFile = args.pidFile as string;
|
|
655
|
+
if (!fsSync.existsSync(pidFile)) {
|
|
656
|
+
console.log(Colorize.error(`PID file not found: ${pidFile}`));
|
|
657
|
+
process.exit(1);
|
|
658
|
+
}
|
|
659
|
+
const pid = parseInt(await fs.readFile(pidFile, 'utf-8'), 10);
|
|
660
|
+
if (isNaN(pid)) {
|
|
661
|
+
console.log(Colorize.error(`Invalid PID in file: ${pidFile}`));
|
|
662
|
+
process.exit(1);
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
try {
|
|
666
|
+
// Send SIGTERM for graceful shutdown
|
|
667
|
+
process.kill(pid, 'SIGTERM');
|
|
668
|
+
console.log(Colorize.output(`Sent SIGTERM to PID ${pid}`));
|
|
669
|
+
|
|
670
|
+
// Wait up to 10s for process to exit
|
|
671
|
+
const deadline = Date.now() + 10000;
|
|
672
|
+
while (Date.now() < deadline) {
|
|
673
|
+
try {
|
|
674
|
+
process.kill(pid, 0); // check if process exists
|
|
675
|
+
await new Promise(r => setTimeout(r, 500));
|
|
676
|
+
} catch {
|
|
677
|
+
// Process no longer exists
|
|
678
|
+
break;
|
|
679
|
+
}
|
|
680
|
+
}
|
|
266
681
|
|
|
682
|
+
// Check if still alive and force kill
|
|
683
|
+
try {
|
|
684
|
+
process.kill(pid, 0);
|
|
685
|
+
process.kill(pid, 'SIGKILL');
|
|
686
|
+
console.log(Colorize.output(`Force-killed PID ${pid}`));
|
|
687
|
+
} catch {
|
|
688
|
+
// Already dead
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
// Remove PID file
|
|
692
|
+
await fs.unlink(pidFile).catch(() => {});
|
|
693
|
+
console.log(Colorize.output('Vectra server stopped'));
|
|
694
|
+
} catch (err: any) {
|
|
695
|
+
if (err.code === 'ESRCH') {
|
|
696
|
+
console.log(Colorize.output(`Process ${pid} not running. Cleaning up PID file.`));
|
|
697
|
+
await fs.unlink(pidFile).catch(() => {});
|
|
698
|
+
} else {
|
|
699
|
+
console.log(Colorize.error(`Failed to stop server: ${err.message}`));
|
|
700
|
+
process.exit(1);
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
})
|
|
704
|
+
.help()
|
|
705
|
+
.demandCommand()
|
|
706
|
+
.parseAsync();
|
|
707
|
+
}
|
|
267
708
|
|
|
268
709
|
async function getItemList(items: string[], listFile: string, uriType: string): Promise<string[]> {
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
710
|
+
if (Array.isArray(items) && items.length > 0) {
|
|
711
|
+
return items;
|
|
712
|
+
} else if (typeof listFile == 'string' && listFile.trim().length > 0) {
|
|
713
|
+
const list = await fs.readFile(listFile, 'utf-8');
|
|
714
|
+
return list.split('\n').map((item) => item.trim()).filter((item) => item.length > 0);
|
|
715
|
+
} else {
|
|
716
|
+
throw new Error(`you must specify either one or more "--uri <${uriType}>" for the items or a "--list <file path>" for a file containing the items.`);
|
|
717
|
+
}
|
|
277
718
|
}
|