vectra 0.12.2 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +92 -100
- package/bin/vectra.js +3 -0
- package/lib/BrowserWebFetcher.d.ts +75 -0
- package/lib/BrowserWebFetcher.d.ts.map +1 -0
- package/lib/BrowserWebFetcher.js +290 -0
- package/lib/BrowserWebFetcher.js.map +1 -0
- package/lib/FileFetcher.d.ts +5 -0
- package/lib/FileFetcher.d.ts.map +1 -0
- package/lib/FileFetcher.js +89 -0
- package/lib/FileFetcher.js.map +1 -0
- package/lib/FileFetcher.spec.d.ts +2 -0
- package/lib/FileFetcher.spec.d.ts.map +1 -0
- package/lib/FileFetcher.spec.js +244 -0
- package/lib/FileFetcher.spec.js.map +1 -0
- package/lib/FolderWatcher.d.ts +91 -0
- package/lib/FolderWatcher.d.ts.map +1 -0
- package/lib/FolderWatcher.js +304 -0
- package/lib/FolderWatcher.js.map +1 -0
- package/lib/FolderWatcher.spec.d.ts +2 -0
- package/lib/FolderWatcher.spec.d.ts.map +1 -0
- package/lib/FolderWatcher.spec.js +308 -0
- package/lib/FolderWatcher.spec.js.map +1 -0
- package/lib/GPT3Tokenizer.d.ts +9 -0
- package/lib/GPT3Tokenizer.spec.d.ts +2 -0
- package/lib/GPT3Tokenizer.spec.d.ts.map +1 -0
- package/lib/GPT3Tokenizer.spec.js +45 -0
- package/lib/GPT3Tokenizer.spec.js.map +1 -0
- package/lib/ItemSelector.d.ts +41 -0
- package/lib/ItemSelector.d.ts.map +1 -0
- package/lib/ItemSelector.js +179 -0
- package/lib/ItemSelector.js.map +1 -0
- package/lib/ItemSelector.spec.d.ts +2 -0
- package/lib/ItemSelector.spec.d.ts.map +1 -0
- package/lib/ItemSelector.spec.js +204 -0
- package/lib/ItemSelector.spec.js.map +1 -0
- package/lib/LocalDocument.d.ts +54 -0
- package/lib/LocalDocument.d.ts.map +1 -1
- package/lib/LocalDocument.js +116 -0
- package/lib/LocalDocument.js.map +1 -0
- package/lib/LocalDocument.spec.d.ts +2 -0
- package/lib/LocalDocument.spec.d.ts.map +1 -0
- package/lib/LocalDocument.spec.js +214 -0
- package/lib/LocalDocument.spec.js.map +1 -0
- package/lib/LocalDocumentIndex.d.ts +152 -0
- package/lib/LocalDocumentIndex.d.ts.map +1 -1
- package/lib/LocalDocumentIndex.js +420 -0
- package/lib/LocalDocumentIndex.js.map +1 -0
- package/lib/LocalDocumentIndex.spec.d.ts +2 -0
- package/lib/LocalDocumentIndex.spec.d.ts.map +1 -0
- package/lib/LocalDocumentIndex.spec.js +494 -0
- package/lib/LocalDocumentIndex.spec.js.map +1 -0
- package/lib/LocalDocumentResult.d.ts +66 -0
- package/lib/LocalDocumentResult.d.ts.map +1 -1
- package/lib/LocalDocumentResult.js +376 -0
- package/lib/LocalDocumentResult.js.map +1 -0
- package/lib/LocalDocumentResult.spec.d.ts +2 -0
- package/lib/LocalDocumentResult.spec.d.ts.map +1 -0
- package/lib/LocalDocumentResult.spec.js +373 -0
- package/lib/LocalDocumentResult.spec.js.map +1 -0
- package/lib/LocalEmbeddings.d.ts +59 -0
- package/lib/LocalEmbeddings.d.ts.map +1 -0
- package/lib/LocalEmbeddings.js +101 -0
- package/lib/LocalEmbeddings.js.map +1 -0
- package/lib/LocalEmbeddings.spec.d.ts +2 -0
- package/lib/LocalEmbeddings.spec.d.ts.map +1 -0
- package/lib/LocalEmbeddings.spec.js +155 -0
- package/lib/LocalEmbeddings.spec.js.map +1 -0
- package/lib/LocalIndex.d.ts +159 -0
- package/lib/LocalIndex.d.ts.map +1 -1
- package/lib/LocalIndex.js +519 -0
- package/lib/LocalIndex.js.map +1 -0
- package/lib/LocalIndex.spec.d.ts +2 -0
- package/lib/LocalIndex.spec.js +611 -9
- package/lib/LocalIndex.spec.js.map +1 -1
- package/lib/OpenAIEmbeddings.d.ts +124 -0
- package/lib/OpenAIEmbeddings.d.ts.map +1 -0
- package/lib/OpenAIEmbeddings.js +166 -0
- package/lib/OpenAIEmbeddings.js.map +1 -0
- package/lib/OpenAIEmbeddings.spec.d.ts +2 -0
- package/lib/OpenAIEmbeddings.spec.d.ts.map +1 -0
- package/lib/OpenAIEmbeddings.spec.js +298 -0
- package/lib/OpenAIEmbeddings.spec.js.map +1 -0
- package/lib/TextSplitter.d.ts +21 -0
- package/lib/TextSplitter.d.ts.map +1 -1
- package/lib/TextSplitter.js +500 -0
- package/lib/TextSplitter.js.map +1 -0
- package/lib/TextSplitter.spec.d.ts +2 -0
- package/lib/TextSplitter.spec.d.ts.map +1 -0
- package/lib/TextSplitter.spec.js +337 -0
- package/lib/TextSplitter.spec.js.map +1 -0
- package/lib/TransformersEmbeddings.d.ts +121 -0
- package/lib/TransformersEmbeddings.d.ts.map +1 -0
- package/lib/TransformersEmbeddings.js +176 -0
- package/lib/TransformersEmbeddings.js.map +1 -0
- package/lib/TransformersEmbeddings.spec.d.ts +2 -0
- package/lib/TransformersEmbeddings.spec.d.ts.map +1 -0
- package/lib/TransformersEmbeddings.spec.js +198 -0
- package/lib/TransformersEmbeddings.spec.js.map +1 -0
- package/lib/TransformersTokenizer.d.ts +33 -0
- package/lib/TransformersTokenizer.d.ts.map +1 -0
- package/lib/TransformersTokenizer.js +44 -0
- package/lib/TransformersTokenizer.js.map +1 -0
- package/lib/TransformersTokenizer.spec.d.ts +2 -0
- package/lib/TransformersTokenizer.spec.d.ts.map +1 -0
- package/lib/TransformersTokenizer.spec.js +112 -0
- package/lib/TransformersTokenizer.spec.js.map +1 -0
- package/lib/WebFetcher.d.ts +14 -0
- package/lib/WebFetcher.d.ts.map +1 -0
- package/lib/WebFetcher.js +238 -0
- package/lib/WebFetcher.js.map +1 -0
- package/lib/WebFetcher.spec.d.ts +2 -0
- package/lib/WebFetcher.spec.d.ts.map +1 -0
- package/lib/WebFetcher.spec.js +263 -0
- package/lib/WebFetcher.spec.js.map +1 -0
- package/lib/browser.d.ts +30 -0
- package/lib/browser.d.ts.map +1 -0
- package/lib/browser.js +52 -0
- package/lib/browser.js.map +1 -0
- package/lib/codecs/IndexCodec.d.ts +37 -0
- package/lib/codecs/IndexCodec.d.ts.map +1 -0
- package/lib/codecs/IndexCodec.js +3 -0
- package/lib/codecs/IndexCodec.js.map +1 -0
- package/lib/codecs/JsonCodec.d.ts +19 -0
- package/lib/codecs/JsonCodec.d.ts.map +1 -0
- package/lib/codecs/JsonCodec.js +35 -0
- package/lib/codecs/JsonCodec.js.map +1 -0
- package/lib/codecs/JsonCodec.spec.d.ts +2 -0
- package/lib/codecs/JsonCodec.spec.d.ts.map +1 -0
- package/lib/codecs/JsonCodec.spec.js +66 -0
- package/lib/codecs/JsonCodec.spec.js.map +1 -0
- package/lib/codecs/LocalIndex.protobuf.spec.d.ts +2 -0
- package/lib/codecs/LocalIndex.protobuf.spec.d.ts.map +1 -0
- package/lib/codecs/LocalIndex.protobuf.spec.js +108 -0
- package/lib/codecs/LocalIndex.protobuf.spec.js.map +1 -0
- package/lib/codecs/ProtobufCodec.d.ts +20 -0
- package/lib/codecs/ProtobufCodec.d.ts.map +1 -0
- package/lib/codecs/ProtobufCodec.js +225 -0
- package/lib/codecs/ProtobufCodec.js.map +1 -0
- package/lib/codecs/ProtobufCodec.spec.d.ts +2 -0
- package/lib/codecs/ProtobufCodec.spec.d.ts.map +1 -0
- package/lib/codecs/ProtobufCodec.spec.js +155 -0
- package/lib/codecs/ProtobufCodec.spec.js.map +1 -0
- package/lib/codecs/index.d.ts +5 -0
- package/lib/codecs/index.d.ts.map +1 -0
- package/lib/codecs/index.js +21 -0
- package/lib/codecs/index.js.map +1 -0
- package/lib/codecs/migrateIndex.d.ts +24 -0
- package/lib/codecs/migrateIndex.d.ts.map +1 -0
- package/lib/codecs/migrateIndex.js +119 -0
- package/lib/codecs/migrateIndex.js.map +1 -0
- package/lib/codecs/migrateIndex.spec.d.ts +2 -0
- package/lib/codecs/migrateIndex.spec.d.ts.map +1 -0
- package/lib/codecs/migrateIndex.spec.js +151 -0
- package/lib/codecs/migrateIndex.spec.js.map +1 -0
- package/lib/codecs/schemas/index.proto +34 -0
- package/lib/index.d.ts +20 -0
- package/lib/index.d.ts.map +1 -1
- package/lib/index.js +36 -0
- package/lib/index.js.map +1 -0
- package/lib/internals/Colorize.d.ts +14 -0
- package/lib/internals/Colorize.d.ts.map +1 -0
- package/lib/internals/Colorize.js +69 -0
- package/lib/internals/Colorize.js.map +1 -0
- package/lib/internals/index.d.ts +3 -0
- package/lib/internals/index.d.ts.map +1 -0
- package/lib/internals/index.js +19 -0
- package/lib/internals/index.js.map +1 -0
- package/lib/internals/types.d.ts +43 -0
- package/lib/internals/types.d.ts.map +1 -0
- package/lib/internals/types.js +3 -0
- package/lib/internals/types.js.map +1 -0
- package/lib/server/IndexManager.d.ts +78 -0
- package/lib/server/IndexManager.d.ts.map +1 -0
- package/lib/server/IndexManager.js +259 -0
- package/lib/server/IndexManager.js.map +1 -0
- package/lib/server/VectraServer.d.ts +40 -0
- package/lib/server/VectraServer.d.ts.map +1 -0
- package/lib/server/VectraServer.js +151 -0
- package/lib/server/VectraServer.js.map +1 -0
- package/lib/server/VectraServer.spec.d.ts +2 -0
- package/lib/server/VectraServer.spec.d.ts.map +1 -0
- package/lib/server/VectraServer.spec.js +322 -0
- package/lib/server/VectraServer.spec.js.map +1 -0
- package/lib/server/handlers/documentHandlers.d.ts +15 -0
- package/lib/server/handlers/documentHandlers.d.ts.map +1 -0
- package/lib/server/handlers/documentHandlers.js +95 -0
- package/lib/server/handlers/documentHandlers.js.map +1 -0
- package/lib/server/handlers/helpers.d.ts +23 -0
- package/lib/server/handlers/helpers.d.ts.map +1 -0
- package/lib/server/handlers/helpers.js +138 -0
- package/lib/server/handlers/helpers.js.map +1 -0
- package/lib/server/handlers/index.d.ts +8 -0
- package/lib/server/handlers/index.d.ts.map +1 -0
- package/lib/server/handlers/index.js +22 -0
- package/lib/server/handlers/index.js.map +1 -0
- package/lib/server/handlers/indexHandlers.d.ts +14 -0
- package/lib/server/handlers/indexHandlers.d.ts.map +1 -0
- package/lib/server/handlers/indexHandlers.js +85 -0
- package/lib/server/handlers/indexHandlers.js.map +1 -0
- package/lib/server/handlers/itemHandlers.d.ts +34 -0
- package/lib/server/handlers/itemHandlers.d.ts.map +1 -0
- package/lib/server/handlers/itemHandlers.js +166 -0
- package/lib/server/handlers/itemHandlers.js.map +1 -0
- package/lib/server/handlers/lifecycleHandlers.d.ts +11 -0
- package/lib/server/handlers/lifecycleHandlers.d.ts.map +1 -0
- package/lib/server/handlers/lifecycleHandlers.js +31 -0
- package/lib/server/handlers/lifecycleHandlers.js.map +1 -0
- package/lib/server/handlers/queryHandlers.d.ts +27 -0
- package/lib/server/handlers/queryHandlers.d.ts.map +1 -0
- package/lib/server/handlers/queryHandlers.js +135 -0
- package/lib/server/handlers/queryHandlers.js.map +1 -0
- package/lib/server/handlers/statsHandlers.d.ts +17 -0
- package/lib/server/handlers/statsHandlers.d.ts.map +1 -0
- package/lib/server/handlers/statsHandlers.js +81 -0
- package/lib/server/handlers/statsHandlers.js.map +1 -0
- package/lib/server/index.d.ts +4 -0
- package/lib/server/index.d.ts.map +1 -0
- package/lib/server/index.js +23 -0
- package/lib/server/index.js.map +1 -0
- package/lib/storage/FileStorage.d.ts +92 -0
- package/lib/storage/FileStorage.d.ts.map +1 -0
- package/lib/storage/FileStorage.js +3 -0
- package/lib/storage/FileStorage.js.map +1 -0
- package/lib/storage/FileStorageUtilities.d.ts +36 -0
- package/lib/storage/FileStorageUtilities.d.ts.map +1 -0
- package/lib/storage/FileStorageUtilities.js +91 -0
- package/lib/storage/FileStorageUtilities.js.map +1 -0
- package/lib/storage/FileStorageUtilities.spec.d.ts +2 -0
- package/lib/storage/FileStorageUtilities.spec.d.ts.map +1 -0
- package/lib/storage/FileStorageUtilities.spec.js +98 -0
- package/lib/storage/FileStorageUtilities.spec.js.map +1 -0
- package/lib/storage/FileType.d.ts +29 -0
- package/lib/storage/FileType.d.ts.map +1 -0
- package/lib/storage/FileType.js +38 -0
- package/lib/storage/FileType.js.map +1 -0
- package/lib/storage/IndexedDBStorage.d.ts +47 -0
- package/lib/storage/IndexedDBStorage.d.ts.map +1 -0
- package/lib/storage/IndexedDBStorage.js +347 -0
- package/lib/storage/IndexedDBStorage.js.map +1 -0
- package/lib/storage/LocalFileStorage.browser.d.ts +19 -0
- package/lib/storage/LocalFileStorage.browser.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.browser.js +43 -0
- package/lib/storage/LocalFileStorage.browser.js.map +1 -0
- package/lib/storage/LocalFileStorage.d.ts +23 -0
- package/lib/storage/LocalFileStorage.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.js +152 -0
- package/lib/storage/LocalFileStorage.js.map +1 -0
- package/lib/storage/LocalFileStorage.spec.d.ts +2 -0
- package/lib/storage/LocalFileStorage.spec.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.spec.js +249 -0
- package/lib/storage/LocalFileStorage.spec.js.map +1 -0
- package/lib/storage/VirtualFileStorage.d.ts +18 -0
- package/lib/storage/VirtualFileStorage.d.ts.map +1 -0
- package/lib/storage/VirtualFileStorage.js +178 -0
- package/lib/storage/VirtualFileStorage.js.map +1 -0
- package/lib/storage/VirtualFileStorage.spec.d.ts +2 -0
- package/lib/storage/VirtualFileStorage.spec.d.ts.map +1 -0
- package/lib/storage/VirtualFileStorage.spec.js +302 -0
- package/lib/storage/VirtualFileStorage.spec.js.map +1 -0
- package/lib/storage/index.d.ts +6 -0
- package/lib/storage/index.d.ts.map +1 -0
- package/lib/storage/index.js +22 -0
- package/lib/storage/index.js.map +1 -0
- package/lib/templates/templates/csharp/README.md +48 -0
- package/lib/templates/templates/csharp/VectraClient.cs +234 -0
- package/lib/templates/templates/go/README.md +71 -0
- package/lib/templates/templates/go/vectra_client.go +322 -0
- package/lib/templates/templates/java/README.md +81 -0
- package/lib/templates/templates/java/VectraClient.java +232 -0
- package/lib/templates/templates/python/README.md +37 -0
- package/lib/templates/templates/python/vectra_client.py +279 -0
- package/lib/templates/templates/rust/Cargo.toml +14 -0
- package/lib/templates/templates/rust/README.md +39 -0
- package/lib/templates/templates/rust/build.rs +4 -0
- package/lib/templates/templates/rust/lib.rs +284 -0
- package/lib/templates/templates/typescript/README.md +96 -0
- package/lib/templates/templates/typescript/VectraClient.ts +374 -0
- package/lib/templates/typescript/VectraClient.d.ts +114 -0
- package/lib/templates/typescript/VectraClient.d.ts.map +1 -0
- package/lib/templates/typescript/VectraClient.js +328 -0
- package/lib/templates/typescript/VectraClient.js.map +1 -0
- package/lib/types.d.ts +153 -0
- package/lib/types.d.ts.map +1 -0
- package/lib/types.js +3 -0
- package/lib/types.js.map +1 -0
- package/lib/utils/index.d.ts +2 -0
- package/lib/utils/index.d.ts.map +1 -0
- package/lib/utils/index.js +18 -0
- package/lib/utils/index.js.map +1 -0
- package/lib/utils/pathUtils.d.ts +40 -0
- package/lib/utils/pathUtils.d.ts.map +1 -0
- package/lib/utils/pathUtils.js +98 -0
- package/lib/utils/pathUtils.js.map +1 -0
- package/lib/vectra-cli.d.ts +2 -0
- package/lib/vectra-cli.d.ts.map +1 -1
- package/lib/vectra-cli.generate.spec.d.ts +2 -0
- package/lib/vectra-cli.generate.spec.d.ts.map +1 -0
- package/lib/vectra-cli.generate.spec.js +112 -0
- package/lib/vectra-cli.generate.spec.js.map +1 -0
- package/lib/vectra-cli.js +760 -0
- package/lib/vectra-cli.js.map +1 -0
- package/lib/vectra-cli.spec.d.ts +1 -0
- package/lib/vectra-cli.spec.d.ts.map +1 -0
- package/lib/vectra-cli.spec.js +2 -0
- package/lib/vectra-cli.spec.js.map +1 -0
- package/package.json +91 -16
- package/proto/vectra_service.proto +276 -0
- package/src/BrowserWebFetcher.ts +345 -0
- package/src/FileFetcher.spec.ts +234 -0
- package/src/FileFetcher.ts +37 -25
- package/src/FolderWatcher.spec.ts +288 -0
- package/src/FolderWatcher.ts +304 -0
- package/src/GPT3Tokenizer.spec.ts +50 -0
- package/src/ItemSelector.spec.ts +252 -0
- package/src/ItemSelector.ts +163 -150
- package/src/LocalDocument.spec.ts +211 -0
- package/src/LocalDocument.ts +88 -94
- package/src/LocalDocumentIndex.spec.ts +481 -0
- package/src/LocalDocumentIndex.ts +39 -40
- package/src/LocalDocumentResult.spec.ts +373 -0
- package/src/LocalDocumentResult.ts +489 -319
- package/src/LocalEmbeddings.spec.ts +138 -0
- package/src/LocalEmbeddings.ts +120 -0
- package/src/LocalIndex.spec.ts +808 -66
- package/src/LocalIndex.ts +479 -429
- package/src/OpenAIEmbeddings.spec.ts +354 -0
- package/src/OpenAIEmbeddings.ts +26 -27
- package/src/TextSplitter.spec.ts +342 -0
- package/src/TextSplitter.ts +517 -532
- package/src/TransformersEmbeddings.spec.ts +188 -0
- package/src/TransformersEmbeddings.ts +232 -0
- package/src/TransformersTokenizer.spec.ts +143 -0
- package/src/TransformersTokenizer.ts +45 -0
- package/src/WebFetcher.spec.ts +288 -0
- package/src/WebFetcher.ts +184 -186
- package/src/browser.ts +69 -0
- package/src/codecs/IndexCodec.ts +40 -0
- package/src/codecs/JsonCodec.spec.ts +70 -0
- package/src/codecs/JsonCodec.ts +37 -0
- package/src/codecs/LocalIndex.protobuf.spec.ts +115 -0
- package/src/codecs/ProtobufCodec.spec.ts +166 -0
- package/src/codecs/ProtobufCodec.ts +193 -0
- package/src/codecs/index.ts +4 -0
- package/src/codecs/migrateIndex.spec.ts +176 -0
- package/src/codecs/migrateIndex.ts +125 -0
- package/src/codecs/schemas/index.proto +34 -0
- package/src/index.ts +9 -1
- package/src/internals/Colorize.ts +19 -16
- package/src/server/IndexManager.ts +243 -0
- package/src/server/VectraServer.spec.ts +303 -0
- package/src/server/VectraServer.ts +156 -0
- package/src/server/handlers/documentHandlers.ts +59 -0
- package/src/server/handlers/helpers.ts +93 -0
- package/src/server/handlers/index.ts +7 -0
- package/src/server/handlers/indexHandlers.ts +44 -0
- package/src/server/handlers/itemHandlers.ts +140 -0
- package/src/server/handlers/lifecycleHandlers.ts +26 -0
- package/src/server/handlers/queryHandlers.ts +96 -0
- package/src/server/handlers/statsHandlers.ts +38 -0
- package/src/server/index.ts +3 -0
- package/src/storage/FileStorage.ts +105 -0
- package/src/storage/FileStorageUtilities.spec.ts +106 -0
- package/src/storage/FileStorageUtilities.ts +77 -0
- package/src/storage/FileType.ts +61 -0
- package/src/storage/IndexedDBStorage.ts +365 -0
- package/src/storage/LocalFileStorage.browser.ts +52 -0
- package/src/storage/LocalFileStorage.spec.ts +292 -0
- package/src/storage/LocalFileStorage.ts +98 -0
- package/src/storage/VirtualFileStorage.spec.ts +307 -0
- package/src/storage/VirtualFileStorage.ts +169 -0
- package/src/storage/index.ts +5 -0
- package/src/templates/csharp/README.md +48 -0
- package/src/templates/csharp/VectraClient.cs +234 -0
- package/src/templates/go/README.md +71 -0
- package/src/templates/go/vectra_client.go +322 -0
- package/src/templates/java/README.md +81 -0
- package/src/templates/java/VectraClient.java +232 -0
- package/src/templates/python/README.md +37 -0
- package/src/templates/python/vectra_client.py +279 -0
- package/src/templates/rust/Cargo.toml +14 -0
- package/src/templates/rust/README.md +39 -0
- package/src/templates/rust/build.rs +4 -0
- package/src/templates/rust/lib.rs +284 -0
- package/src/templates/typescript/README.md +96 -0
- package/src/templates/typescript/VectraClient.ts +374 -0
- package/src/types.ts +131 -123
- package/src/utils/index.ts +1 -0
- package/src/utils/pathUtils.ts +106 -0
- package/src/vectra-cli.generate.spec.ts +72 -0
- package/src/vectra-cli.spec.ts +0 -0
- package/src/vectra-cli.ts +687 -246
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
import { strict as assert } from 'assert';
|
|
2
|
+
import * as sinon from 'sinon';
|
|
3
|
+
import * as os from 'os';
|
|
4
|
+
import * as path from 'path';
|
|
5
|
+
import fs from 'node:fs';
|
|
6
|
+
import { FolderWatcher } from './FolderWatcher';
|
|
7
|
+
import { LocalDocumentIndex } from './LocalDocumentIndex';
|
|
8
|
+
import { EmbeddingsModel, EmbeddingsResponse } from './types';
|
|
9
|
+
import { LocalFileStorage } from './storage/LocalFileStorage';
|
|
10
|
+
|
|
11
|
+
// Stub embeddings model that returns deterministic vectors
|
|
12
|
+
class StubEmbeddings implements EmbeddingsModel {
|
|
13
|
+
public readonly maxTokens = 8000;
|
|
14
|
+
public async createEmbeddings(inputs: string | string[]): Promise<EmbeddingsResponse> {
|
|
15
|
+
const texts = Array.isArray(inputs) ? inputs : [inputs];
|
|
16
|
+
const output = texts.map(() => {
|
|
17
|
+
const vec = new Array(384).fill(0);
|
|
18
|
+
vec[0] = 1; // unit vector
|
|
19
|
+
return vec;
|
|
20
|
+
});
|
|
21
|
+
return { status: 'success', output };
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
describe('FolderWatcher', () => {
|
|
26
|
+
let tmpDir: string;
|
|
27
|
+
let indexDir: string;
|
|
28
|
+
let watchDir: string;
|
|
29
|
+
let index: LocalDocumentIndex;
|
|
30
|
+
let sandbox: sinon.SinonSandbox;
|
|
31
|
+
|
|
32
|
+
beforeEach(async () => {
|
|
33
|
+
sandbox = sinon.createSandbox();
|
|
34
|
+
tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), 'vectra-watch-'));
|
|
35
|
+
indexDir = path.join(tmpDir, 'index');
|
|
36
|
+
watchDir = path.join(tmpDir, 'watch');
|
|
37
|
+
await fs.promises.mkdir(indexDir, { recursive: true });
|
|
38
|
+
await fs.promises.mkdir(watchDir, { recursive: true });
|
|
39
|
+
|
|
40
|
+
index = new LocalDocumentIndex({
|
|
41
|
+
folderPath: indexDir,
|
|
42
|
+
embeddings: new StubEmbeddings(),
|
|
43
|
+
storage: new LocalFileStorage(),
|
|
44
|
+
});
|
|
45
|
+
await index.createIndex({ version: 1, deleteIfExists: true });
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
afterEach(async () => {
|
|
49
|
+
sandbox.restore();
|
|
50
|
+
await fs.promises.rm(tmpDir, { recursive: true, force: true });
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
it('should perform initial sync of existing files', async () => {
|
|
54
|
+
// Create files before starting watcher
|
|
55
|
+
await fs.promises.writeFile(path.join(watchDir, 'file1.txt'), 'hello world');
|
|
56
|
+
await fs.promises.writeFile(path.join(watchDir, 'file2.txt'), 'goodbye world');
|
|
57
|
+
|
|
58
|
+
const watcher = new FolderWatcher({ index, paths: [watchDir] });
|
|
59
|
+
const synced: string[] = [];
|
|
60
|
+
watcher.on('sync', (uri: string) => synced.push(uri));
|
|
61
|
+
|
|
62
|
+
await watcher.start();
|
|
63
|
+
try {
|
|
64
|
+
assert.equal(watcher.trackedFileCount, 2);
|
|
65
|
+
assert.equal(synced.length, 2);
|
|
66
|
+
// Verify documents exist in index
|
|
67
|
+
const id1 = await index.getDocumentId(path.join(watchDir, 'file1.txt'));
|
|
68
|
+
const id2 = await index.getDocumentId(path.join(watchDir, 'file2.txt'));
|
|
69
|
+
assert.ok(id1, 'file1.txt should be indexed');
|
|
70
|
+
assert.ok(id2, 'file2.txt should be indexed');
|
|
71
|
+
} finally {
|
|
72
|
+
await watcher.stop();
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
it('should emit ready event after initial sync', async () => {
|
|
77
|
+
const watcher = new FolderWatcher({ index, paths: [watchDir] });
|
|
78
|
+
let ready = false;
|
|
79
|
+
watcher.on('ready', () => { ready = true; });
|
|
80
|
+
|
|
81
|
+
await watcher.start();
|
|
82
|
+
try {
|
|
83
|
+
assert.equal(ready, true);
|
|
84
|
+
} finally {
|
|
85
|
+
await watcher.stop();
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
it('should filter by extensions', async () => {
|
|
90
|
+
await fs.promises.writeFile(path.join(watchDir, 'include.txt'), 'included');
|
|
91
|
+
await fs.promises.writeFile(path.join(watchDir, 'exclude.js'), 'excluded');
|
|
92
|
+
await fs.promises.writeFile(path.join(watchDir, 'include.md'), 'also included');
|
|
93
|
+
|
|
94
|
+
const watcher = new FolderWatcher({
|
|
95
|
+
index,
|
|
96
|
+
paths: [watchDir],
|
|
97
|
+
extensions: ['.txt', '.md']
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
await watcher.start();
|
|
101
|
+
try {
|
|
102
|
+
assert.equal(watcher.trackedFileCount, 2);
|
|
103
|
+
const idTxt = await index.getDocumentId(path.join(watchDir, 'include.txt'));
|
|
104
|
+
const idMd = await index.getDocumentId(path.join(watchDir, 'include.md'));
|
|
105
|
+
const idJs = await index.getDocumentId(path.join(watchDir, 'exclude.js'));
|
|
106
|
+
assert.ok(idTxt, 'include.txt should be indexed');
|
|
107
|
+
assert.ok(idMd, 'include.md should be indexed');
|
|
108
|
+
assert.equal(idJs, undefined, 'exclude.js should not be indexed');
|
|
109
|
+
} finally {
|
|
110
|
+
await watcher.stop();
|
|
111
|
+
}
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
it('should handle extensions without leading dot', async () => {
|
|
115
|
+
await fs.promises.writeFile(path.join(watchDir, 'test.txt'), 'hello');
|
|
116
|
+
|
|
117
|
+
const watcher = new FolderWatcher({
|
|
118
|
+
index,
|
|
119
|
+
paths: [watchDir],
|
|
120
|
+
extensions: ['txt']
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
await watcher.start();
|
|
124
|
+
try {
|
|
125
|
+
assert.equal(watcher.trackedFileCount, 1);
|
|
126
|
+
} finally {
|
|
127
|
+
await watcher.stop();
|
|
128
|
+
}
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
it('should recurse into subdirectories', async () => {
|
|
132
|
+
const subDir = path.join(watchDir, 'sub');
|
|
133
|
+
await fs.promises.mkdir(subDir, { recursive: true });
|
|
134
|
+
await fs.promises.writeFile(path.join(watchDir, 'root.txt'), 'root');
|
|
135
|
+
await fs.promises.writeFile(path.join(subDir, 'nested.txt'), 'nested');
|
|
136
|
+
|
|
137
|
+
const watcher = new FolderWatcher({ index, paths: [watchDir] });
|
|
138
|
+
|
|
139
|
+
await watcher.start();
|
|
140
|
+
try {
|
|
141
|
+
assert.equal(watcher.trackedFileCount, 2);
|
|
142
|
+
const idRoot = await index.getDocumentId(path.join(watchDir, 'root.txt'));
|
|
143
|
+
const idNested = await index.getDocumentId(path.join(subDir, 'nested.txt'));
|
|
144
|
+
assert.ok(idRoot, 'root.txt should be indexed');
|
|
145
|
+
assert.ok(idNested, 'nested.txt should be indexed');
|
|
146
|
+
} finally {
|
|
147
|
+
await watcher.stop();
|
|
148
|
+
}
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
it('should watch individual files', async () => {
|
|
152
|
+
const singleFile = path.join(tmpDir, 'single.txt');
|
|
153
|
+
await fs.promises.writeFile(singleFile, 'single file');
|
|
154
|
+
|
|
155
|
+
const watcher = new FolderWatcher({ index, paths: [singleFile] });
|
|
156
|
+
|
|
157
|
+
await watcher.start();
|
|
158
|
+
try {
|
|
159
|
+
assert.equal(watcher.trackedFileCount, 1);
|
|
160
|
+
const id = await index.getDocumentId(singleFile);
|
|
161
|
+
assert.ok(id, 'single.txt should be indexed');
|
|
162
|
+
} finally {
|
|
163
|
+
await watcher.stop();
|
|
164
|
+
}
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
it('should watch multiple paths', async () => {
|
|
168
|
+
const dir2 = path.join(tmpDir, 'watch2');
|
|
169
|
+
await fs.promises.mkdir(dir2, { recursive: true });
|
|
170
|
+
await fs.promises.writeFile(path.join(watchDir, 'a.txt'), 'a');
|
|
171
|
+
await fs.promises.writeFile(path.join(dir2, 'b.txt'), 'b');
|
|
172
|
+
|
|
173
|
+
const watcher = new FolderWatcher({ index, paths: [watchDir, dir2] });
|
|
174
|
+
|
|
175
|
+
await watcher.start();
|
|
176
|
+
try {
|
|
177
|
+
assert.equal(watcher.trackedFileCount, 2);
|
|
178
|
+
} finally {
|
|
179
|
+
await watcher.stop();
|
|
180
|
+
}
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
it('should handle sync() detecting deleted files', async () => {
|
|
184
|
+
await fs.promises.writeFile(path.join(watchDir, 'ephemeral.txt'), 'temporary');
|
|
185
|
+
|
|
186
|
+
const watcher = new FolderWatcher({ index, paths: [watchDir] });
|
|
187
|
+
await watcher.start();
|
|
188
|
+
assert.equal(watcher.trackedFileCount, 1);
|
|
189
|
+
|
|
190
|
+
// Delete the file
|
|
191
|
+
await fs.promises.unlink(path.join(watchDir, 'ephemeral.txt'));
|
|
192
|
+
|
|
193
|
+
// Manual sync should detect deletion
|
|
194
|
+
const synced: Array<{ uri: string; action: string }> = [];
|
|
195
|
+
watcher.on('sync', (uri: string, action: string) => synced.push({ uri, action }));
|
|
196
|
+
await watcher.sync();
|
|
197
|
+
|
|
198
|
+
try {
|
|
199
|
+
assert.equal(watcher.trackedFileCount, 0);
|
|
200
|
+
const deleted = synced.find(s => s.action === 'deleted');
|
|
201
|
+
assert.ok(deleted, 'should have emitted a delete event');
|
|
202
|
+
} finally {
|
|
203
|
+
await watcher.stop();
|
|
204
|
+
}
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
it('should handle sync() detecting updated files', async () => {
|
|
208
|
+
const filePath = path.join(watchDir, 'mutable.txt');
|
|
209
|
+
await fs.promises.writeFile(filePath, 'version 1');
|
|
210
|
+
|
|
211
|
+
const watcher = new FolderWatcher({ index, paths: [watchDir] });
|
|
212
|
+
await watcher.start();
|
|
213
|
+
assert.equal(watcher.trackedFileCount, 1);
|
|
214
|
+
|
|
215
|
+
// Update the file (ensure mtime changes)
|
|
216
|
+
await new Promise(r => setTimeout(r, 50));
|
|
217
|
+
await fs.promises.writeFile(filePath, 'version 2');
|
|
218
|
+
|
|
219
|
+
// Manual sync should detect update
|
|
220
|
+
const synced: Array<{ uri: string; action: string }> = [];
|
|
221
|
+
watcher.on('sync', (uri: string, action: string) => synced.push({ uri, action }));
|
|
222
|
+
await watcher.sync();
|
|
223
|
+
|
|
224
|
+
try {
|
|
225
|
+
assert.equal(watcher.trackedFileCount, 1);
|
|
226
|
+
const updated = synced.find(s => s.action === 'updated');
|
|
227
|
+
assert.ok(updated, 'should have emitted an update event');
|
|
228
|
+
} finally {
|
|
229
|
+
await watcher.stop();
|
|
230
|
+
}
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
it('should not throw if path does not exist', async () => {
|
|
234
|
+
const watcher = new FolderWatcher({
|
|
235
|
+
index,
|
|
236
|
+
paths: [path.join(tmpDir, 'nonexistent')]
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
await watcher.start();
|
|
240
|
+
try {
|
|
241
|
+
assert.equal(watcher.trackedFileCount, 0);
|
|
242
|
+
} finally {
|
|
243
|
+
await watcher.stop();
|
|
244
|
+
}
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
it('should throw if started twice', async () => {
|
|
248
|
+
const watcher = new FolderWatcher({ index, paths: [watchDir] });
|
|
249
|
+
await watcher.start();
|
|
250
|
+
try {
|
|
251
|
+
await assert.rejects(() => watcher.start(), /already running/);
|
|
252
|
+
} finally {
|
|
253
|
+
await watcher.stop();
|
|
254
|
+
}
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
it('should report isRunning correctly', async () => {
|
|
258
|
+
const watcher = new FolderWatcher({ index, paths: [watchDir] });
|
|
259
|
+
assert.equal(watcher.isRunning, false);
|
|
260
|
+
await watcher.start();
|
|
261
|
+
assert.equal(watcher.isRunning, true);
|
|
262
|
+
await watcher.stop();
|
|
263
|
+
assert.equal(watcher.isRunning, false);
|
|
264
|
+
});
|
|
265
|
+
|
|
266
|
+
it('should emit error events for sync failures', async () => {
|
|
267
|
+
await fs.promises.writeFile(path.join(watchDir, 'bad.txt'), 'content');
|
|
268
|
+
|
|
269
|
+
// Create watcher with no embeddings to force error
|
|
270
|
+
const badIndex = new LocalDocumentIndex({
|
|
271
|
+
folderPath: indexDir,
|
|
272
|
+
// no embeddings — will throw on upsertDocument
|
|
273
|
+
storage: new LocalFileStorage(),
|
|
274
|
+
});
|
|
275
|
+
|
|
276
|
+
const watcher = new FolderWatcher({ index: badIndex, paths: [watchDir] });
|
|
277
|
+
const errors: Array<{ err: Error; uri: string }> = [];
|
|
278
|
+
watcher.on('error', (err: Error, uri: string) => errors.push({ err, uri }));
|
|
279
|
+
|
|
280
|
+
await watcher.start();
|
|
281
|
+
try {
|
|
282
|
+
assert.equal(errors.length, 1);
|
|
283
|
+
assert.ok(errors[0].err.message.includes('Embeddings model not configured'));
|
|
284
|
+
} finally {
|
|
285
|
+
await watcher.stop();
|
|
286
|
+
}
|
|
287
|
+
});
|
|
288
|
+
});
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
import { EventEmitter } from 'events';
|
|
2
|
+
import fs from 'node:fs';
|
|
3
|
+
import * as path from 'path';
|
|
4
|
+
import { LocalDocumentIndex } from './LocalDocumentIndex';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Configuration for FolderWatcher.
|
|
8
|
+
*/
|
|
9
|
+
export interface FolderWatcherConfig {
|
|
10
|
+
/**
|
|
11
|
+
* The LocalDocumentIndex to sync files into.
|
|
12
|
+
*/
|
|
13
|
+
index: LocalDocumentIndex;
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* List of folder or file paths to watch.
|
|
17
|
+
*/
|
|
18
|
+
paths: string[];
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Optional. File extensions to include (e.g., ['.txt', '.md', '.html']).
|
|
22
|
+
* @remarks
|
|
23
|
+
* If not specified, all files are included.
|
|
24
|
+
*/
|
|
25
|
+
extensions?: string[];
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Optional. Debounce interval in milliseconds for file change events.
|
|
29
|
+
* @remarks
|
|
30
|
+
* Default is 500ms. Multiple rapid changes to the same file are collapsed into one sync.
|
|
31
|
+
*/
|
|
32
|
+
debounceMs?: number;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Events emitted by FolderWatcher.
|
|
37
|
+
*
|
|
38
|
+
* - `sync` — emitted after a file is synced. Args: `(uri: string, action: 'added' | 'updated' | 'deleted')`
|
|
39
|
+
* - `error` — emitted when a sync operation fails. Args: `(error: Error, uri: string)`
|
|
40
|
+
* - `ready` — emitted after the initial sync completes.
|
|
41
|
+
*/
|
|
42
|
+
export interface FolderWatcherEvents {
|
|
43
|
+
sync: [uri: string, action: 'added' | 'updated' | 'deleted'];
|
|
44
|
+
error: [error: Error, uri: string];
|
|
45
|
+
ready: [];
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
interface TrackedFile {
|
|
49
|
+
uri: string;
|
|
50
|
+
mtimeMs: number;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Watches folders for file changes and automatically syncs them into a LocalDocumentIndex.
|
|
55
|
+
*
|
|
56
|
+
* @remarks
|
|
57
|
+
* Uses Node.js `fs.watch` for efficient filesystem monitoring with debouncing.
|
|
58
|
+
* Performs an initial full sync on start, then watches for incremental changes.
|
|
59
|
+
*/
|
|
60
|
+
export class FolderWatcher extends EventEmitter {
|
|
61
|
+
private readonly _index: LocalDocumentIndex;
|
|
62
|
+
private readonly _paths: string[];
|
|
63
|
+
private readonly _extensions?: Set<string>;
|
|
64
|
+
private readonly _debounceMs: number;
|
|
65
|
+
private readonly _tracked: Map<string, TrackedFile> = new Map();
|
|
66
|
+
private readonly _pending: Map<string, NodeJS.Timeout> = new Map();
|
|
67
|
+
private readonly _watchers: fs.FSWatcher[] = [];
|
|
68
|
+
private _running: boolean = false;
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Creates a new FolderWatcher instance.
|
|
72
|
+
* @param config Configuration for the watcher.
|
|
73
|
+
*/
|
|
74
|
+
public constructor(config: FolderWatcherConfig) {
|
|
75
|
+
super();
|
|
76
|
+
this._index = config.index;
|
|
77
|
+
this._paths = config.paths.map(p => path.resolve(p));
|
|
78
|
+
this._extensions = config.extensions
|
|
79
|
+
? new Set(config.extensions.map(e => e.startsWith('.') ? e.toLowerCase() : `.${e.toLowerCase()}`))
|
|
80
|
+
: undefined;
|
|
81
|
+
this._debounceMs = config.debounceMs ?? 500;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Returns true if the watcher is currently running.
|
|
86
|
+
*/
|
|
87
|
+
public get isRunning(): boolean {
|
|
88
|
+
return this._running;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Returns the number of tracked files.
|
|
93
|
+
*/
|
|
94
|
+
public get trackedFileCount(): number {
|
|
95
|
+
return this._tracked.size;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Starts the watcher: performs an initial sync and then watches for changes.
|
|
100
|
+
*/
|
|
101
|
+
public async start(): Promise<void> {
|
|
102
|
+
if (this._running) {
|
|
103
|
+
throw new Error('FolderWatcher is already running');
|
|
104
|
+
}
|
|
105
|
+
this._running = true;
|
|
106
|
+
|
|
107
|
+
// Initial sync
|
|
108
|
+
await this._initialSync();
|
|
109
|
+
this.emit('ready');
|
|
110
|
+
|
|
111
|
+
// Set up watchers
|
|
112
|
+
for (const watchPath of this._paths) {
|
|
113
|
+
try {
|
|
114
|
+
const stat = await fs.promises.stat(watchPath);
|
|
115
|
+
if (stat.isDirectory()) {
|
|
116
|
+
this._watchDirectory(watchPath);
|
|
117
|
+
} else if (stat.isFile()) {
|
|
118
|
+
this._watchFile(watchPath);
|
|
119
|
+
}
|
|
120
|
+
} catch {
|
|
121
|
+
// Path doesn't exist — skip
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Stops the watcher and cleans up all resources.
|
|
128
|
+
*/
|
|
129
|
+
public async stop(): Promise<void> {
|
|
130
|
+
this._running = false;
|
|
131
|
+
|
|
132
|
+
// Close all watchers
|
|
133
|
+
for (const watcher of this._watchers) {
|
|
134
|
+
watcher.close();
|
|
135
|
+
}
|
|
136
|
+
this._watchers.length = 0;
|
|
137
|
+
|
|
138
|
+
// Clear pending debounced operations
|
|
139
|
+
for (const timeout of this._pending.values()) {
|
|
140
|
+
clearTimeout(timeout);
|
|
141
|
+
}
|
|
142
|
+
this._pending.clear();
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Performs a full sync: scans all watched paths and upserts/deletes as needed.
|
|
147
|
+
* @returns Number of files synced (added + updated + deleted).
|
|
148
|
+
*/
|
|
149
|
+
public async sync(): Promise<number> {
|
|
150
|
+
let count = 0;
|
|
151
|
+
|
|
152
|
+
// Collect current files on disk
|
|
153
|
+
const currentFiles = new Map<string, number>();
|
|
154
|
+
for (const watchPath of this._paths) {
|
|
155
|
+
await this._collectFiles(watchPath, currentFiles);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Upsert new or changed files
|
|
159
|
+
for (const [filePath, mtimeMs] of currentFiles) {
|
|
160
|
+
const tracked = this._tracked.get(filePath);
|
|
161
|
+
if (!tracked || tracked.mtimeMs < mtimeMs) {
|
|
162
|
+
const ok = await this._syncFile(filePath, mtimeMs);
|
|
163
|
+
if (ok) count++;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Delete files that no longer exist
|
|
168
|
+
for (const [filePath, tracked] of this._tracked) {
|
|
169
|
+
if (!currentFiles.has(filePath)) {
|
|
170
|
+
const ok = await this._deleteFile(tracked.uri);
|
|
171
|
+
if (ok) count++;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
return count;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// --- Private methods ---
|
|
179
|
+
|
|
180
|
+
private async _initialSync(): Promise<void> {
|
|
181
|
+
await this.sync();
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
private _shouldInclude(filePath: string): boolean {
|
|
185
|
+
if (!this._extensions) return true;
|
|
186
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
187
|
+
return this._extensions.has(ext);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
private async _collectFiles(dirOrFile: string, out: Map<string, number>): Promise<void> {
|
|
191
|
+
let stat: fs.Stats;
|
|
192
|
+
try {
|
|
193
|
+
stat = await fs.promises.stat(dirOrFile);
|
|
194
|
+
} catch {
|
|
195
|
+
return;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
if (stat.isFile()) {
|
|
199
|
+
if (this._shouldInclude(dirOrFile)) {
|
|
200
|
+
out.set(dirOrFile, stat.mtimeMs);
|
|
201
|
+
}
|
|
202
|
+
} else if (stat.isDirectory()) {
|
|
203
|
+
const entries = await fs.promises.readdir(dirOrFile);
|
|
204
|
+
for (const entry of entries) {
|
|
205
|
+
await this._collectFiles(path.join(dirOrFile, entry), out);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
private async _syncFile(filePath: string, mtimeMs: number): Promise<boolean> {
|
|
211
|
+
const wasTracked = this._tracked.has(filePath);
|
|
212
|
+
const action = wasTracked ? 'updated' : 'added';
|
|
213
|
+
try {
|
|
214
|
+
const text = await fs.promises.readFile(filePath, 'utf-8');
|
|
215
|
+
const ext = path.extname(filePath);
|
|
216
|
+
const docType = ext ? ext.slice(1).toLowerCase() : undefined;
|
|
217
|
+
await this._index.upsertDocument(filePath, text, docType);
|
|
218
|
+
this._tracked.set(filePath, { uri: filePath, mtimeMs });
|
|
219
|
+
this.emit('sync', filePath, action);
|
|
220
|
+
return true;
|
|
221
|
+
} catch (err: unknown) {
|
|
222
|
+
this.emit('error', err instanceof Error ? err : new Error(String(err)), filePath);
|
|
223
|
+
return false;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
private async _deleteFile(uri: string): Promise<boolean> {
|
|
228
|
+
try {
|
|
229
|
+
await this._index.deleteDocument(uri);
|
|
230
|
+
// Find and remove from tracked by URI
|
|
231
|
+
for (const [filePath, tracked] of this._tracked) {
|
|
232
|
+
if (tracked.uri === uri) {
|
|
233
|
+
this._tracked.delete(filePath);
|
|
234
|
+
break;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
this.emit('sync', uri, 'deleted');
|
|
238
|
+
return true;
|
|
239
|
+
} catch (err: unknown) {
|
|
240
|
+
this.emit('error', err instanceof Error ? err : new Error(String(err)), uri);
|
|
241
|
+
return false;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
private _watchDirectory(dirPath: string): void {
|
|
246
|
+
try {
|
|
247
|
+
const watcher = fs.watch(dirPath, { recursive: true }, (eventType, filename) => {
|
|
248
|
+
if (!this._running || !filename) return;
|
|
249
|
+
const fullPath = path.join(dirPath, filename);
|
|
250
|
+
if (this._shouldInclude(fullPath)) {
|
|
251
|
+
this._debouncedSync(fullPath);
|
|
252
|
+
}
|
|
253
|
+
});
|
|
254
|
+
watcher.on('error', (err) => {
|
|
255
|
+
this.emit('error', err, dirPath);
|
|
256
|
+
});
|
|
257
|
+
this._watchers.push(watcher);
|
|
258
|
+
} catch (err: unknown) {
|
|
259
|
+
this.emit('error', err instanceof Error ? err : new Error(String(err)), dirPath);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
private _watchFile(filePath: string): void {
|
|
264
|
+
try {
|
|
265
|
+
const watcher = fs.watch(filePath, (eventType) => {
|
|
266
|
+
if (!this._running) return;
|
|
267
|
+
this._debouncedSync(filePath);
|
|
268
|
+
});
|
|
269
|
+
watcher.on('error', (err) => {
|
|
270
|
+
this.emit('error', err, filePath);
|
|
271
|
+
});
|
|
272
|
+
this._watchers.push(watcher);
|
|
273
|
+
} catch (err: unknown) {
|
|
274
|
+
this.emit('error', err instanceof Error ? err : new Error(String(err)), filePath);
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
private _debouncedSync(filePath: string): void {
|
|
279
|
+
// Cancel any pending sync for this file
|
|
280
|
+
const existing = this._pending.get(filePath);
|
|
281
|
+
if (existing) {
|
|
282
|
+
clearTimeout(existing);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
const timeout = setTimeout(async () => {
|
|
286
|
+
this._pending.delete(filePath);
|
|
287
|
+
if (!this._running) return;
|
|
288
|
+
|
|
289
|
+
try {
|
|
290
|
+
const stat = await fs.promises.stat(filePath);
|
|
291
|
+
if (stat.isFile()) {
|
|
292
|
+
await this._syncFile(filePath, stat.mtimeMs);
|
|
293
|
+
}
|
|
294
|
+
} catch {
|
|
295
|
+
// File was deleted
|
|
296
|
+
if (this._tracked.has(filePath)) {
|
|
297
|
+
await this._deleteFile(filePath);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
}, this._debounceMs);
|
|
301
|
+
|
|
302
|
+
this._pending.set(filePath, timeout);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { strict as assert } from 'node:assert';
|
|
2
|
+
import { describe, it } from 'mocha';
|
|
3
|
+
import { GPT3Tokenizer } from '../src/GPT3Tokenizer';
|
|
4
|
+
|
|
5
|
+
describe('GPT3Tokenizer', () => {
|
|
6
|
+
const tokenizer = new GPT3Tokenizer();
|
|
7
|
+
|
|
8
|
+
it('encodes empty string to [] and decodes [] to empty string', () => {
|
|
9
|
+
const tokens = tokenizer.encode('');
|
|
10
|
+
assert.deepEqual(tokens, [], 'encode("") should return []');
|
|
11
|
+
|
|
12
|
+
const text = tokenizer.decode([]);
|
|
13
|
+
assert.equal(text, '', 'decode([]) should return empty string');
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
it('round-trips various strings including unicode and punctuation', () => {
|
|
17
|
+
const samples = [
|
|
18
|
+
'Hello, world!',
|
|
19
|
+
'Café 😊 こんにちは 𠜎𠜱𠝹𠱓',
|
|
20
|
+
'Newlines\nand\ttabs with multiple spaces.',
|
|
21
|
+
'--- *** ===='
|
|
22
|
+
];
|
|
23
|
+
|
|
24
|
+
for (const s of samples) {
|
|
25
|
+
const tokens = tokenizer.encode(s);
|
|
26
|
+
const decoded = tokenizer.decode(tokens);
|
|
27
|
+
assert.equal(decoded, s, `decode(encode(s)) should equal s for: ${JSON.stringify(s)}`);
|
|
28
|
+
|
|
29
|
+
// Validate token array shape: array of non-negative integers
|
|
30
|
+
assert.ok(Array.isArray(tokens), 'encode should return an array');
|
|
31
|
+
for (const t of tokens) {
|
|
32
|
+
assert.equal(typeof t, 'number', 'each token should be a number');
|
|
33
|
+
assert.ok(Number.isInteger(t), 'each token should be an integer');
|
|
34
|
+
assert.ok(t >= 0, 'each token should be non-negative');
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Encoding should be stable across calls for the same input
|
|
38
|
+
const tokens2 = tokenizer.encode(s);
|
|
39
|
+
assert.deepEqual(tokens2, tokens, 'encode should be deterministic for the same input');
|
|
40
|
+
}
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
it('produces non-empty tokens for typical non-empty input', () => {
|
|
44
|
+
const s = 'This is a simple test.';
|
|
45
|
+
const tokens = tokenizer.encode(s);
|
|
46
|
+
assert.ok(tokens.length > 0, 'expected some tokens for non-empty input');
|
|
47
|
+
const decoded = tokenizer.decode(tokens);
|
|
48
|
+
assert.equal(decoded, s, 'decoded text should match original input');
|
|
49
|
+
});
|
|
50
|
+
});
|