vectra 0.12.2 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +92 -100
- package/bin/vectra.js +3 -0
- package/lib/BrowserWebFetcher.d.ts +75 -0
- package/lib/BrowserWebFetcher.d.ts.map +1 -0
- package/lib/BrowserWebFetcher.js +290 -0
- package/lib/BrowserWebFetcher.js.map +1 -0
- package/lib/FileFetcher.d.ts +5 -0
- package/lib/FileFetcher.d.ts.map +1 -0
- package/lib/FileFetcher.js +89 -0
- package/lib/FileFetcher.js.map +1 -0
- package/lib/FileFetcher.spec.d.ts +2 -0
- package/lib/FileFetcher.spec.d.ts.map +1 -0
- package/lib/FileFetcher.spec.js +244 -0
- package/lib/FileFetcher.spec.js.map +1 -0
- package/lib/FolderWatcher.d.ts +91 -0
- package/lib/FolderWatcher.d.ts.map +1 -0
- package/lib/FolderWatcher.js +304 -0
- package/lib/FolderWatcher.js.map +1 -0
- package/lib/FolderWatcher.spec.d.ts +2 -0
- package/lib/FolderWatcher.spec.d.ts.map +1 -0
- package/lib/FolderWatcher.spec.js +308 -0
- package/lib/FolderWatcher.spec.js.map +1 -0
- package/lib/GPT3Tokenizer.d.ts +9 -0
- package/lib/GPT3Tokenizer.spec.d.ts +2 -0
- package/lib/GPT3Tokenizer.spec.d.ts.map +1 -0
- package/lib/GPT3Tokenizer.spec.js +45 -0
- package/lib/GPT3Tokenizer.spec.js.map +1 -0
- package/lib/ItemSelector.d.ts +41 -0
- package/lib/ItemSelector.d.ts.map +1 -0
- package/lib/ItemSelector.js +179 -0
- package/lib/ItemSelector.js.map +1 -0
- package/lib/ItemSelector.spec.d.ts +2 -0
- package/lib/ItemSelector.spec.d.ts.map +1 -0
- package/lib/ItemSelector.spec.js +204 -0
- package/lib/ItemSelector.spec.js.map +1 -0
- package/lib/LocalDocument.d.ts +54 -0
- package/lib/LocalDocument.d.ts.map +1 -1
- package/lib/LocalDocument.js +116 -0
- package/lib/LocalDocument.js.map +1 -0
- package/lib/LocalDocument.spec.d.ts +2 -0
- package/lib/LocalDocument.spec.d.ts.map +1 -0
- package/lib/LocalDocument.spec.js +214 -0
- package/lib/LocalDocument.spec.js.map +1 -0
- package/lib/LocalDocumentIndex.d.ts +152 -0
- package/lib/LocalDocumentIndex.d.ts.map +1 -1
- package/lib/LocalDocumentIndex.js +420 -0
- package/lib/LocalDocumentIndex.js.map +1 -0
- package/lib/LocalDocumentIndex.spec.d.ts +2 -0
- package/lib/LocalDocumentIndex.spec.d.ts.map +1 -0
- package/lib/LocalDocumentIndex.spec.js +494 -0
- package/lib/LocalDocumentIndex.spec.js.map +1 -0
- package/lib/LocalDocumentResult.d.ts +66 -0
- package/lib/LocalDocumentResult.d.ts.map +1 -1
- package/lib/LocalDocumentResult.js +376 -0
- package/lib/LocalDocumentResult.js.map +1 -0
- package/lib/LocalDocumentResult.spec.d.ts +2 -0
- package/lib/LocalDocumentResult.spec.d.ts.map +1 -0
- package/lib/LocalDocumentResult.spec.js +373 -0
- package/lib/LocalDocumentResult.spec.js.map +1 -0
- package/lib/LocalEmbeddings.d.ts +59 -0
- package/lib/LocalEmbeddings.d.ts.map +1 -0
- package/lib/LocalEmbeddings.js +101 -0
- package/lib/LocalEmbeddings.js.map +1 -0
- package/lib/LocalEmbeddings.spec.d.ts +2 -0
- package/lib/LocalEmbeddings.spec.d.ts.map +1 -0
- package/lib/LocalEmbeddings.spec.js +155 -0
- package/lib/LocalEmbeddings.spec.js.map +1 -0
- package/lib/LocalIndex.d.ts +159 -0
- package/lib/LocalIndex.d.ts.map +1 -1
- package/lib/LocalIndex.js +519 -0
- package/lib/LocalIndex.js.map +1 -0
- package/lib/LocalIndex.spec.d.ts +2 -0
- package/lib/LocalIndex.spec.js +611 -9
- package/lib/LocalIndex.spec.js.map +1 -1
- package/lib/OpenAIEmbeddings.d.ts +124 -0
- package/lib/OpenAIEmbeddings.d.ts.map +1 -0
- package/lib/OpenAIEmbeddings.js +166 -0
- package/lib/OpenAIEmbeddings.js.map +1 -0
- package/lib/OpenAIEmbeddings.spec.d.ts +2 -0
- package/lib/OpenAIEmbeddings.spec.d.ts.map +1 -0
- package/lib/OpenAIEmbeddings.spec.js +298 -0
- package/lib/OpenAIEmbeddings.spec.js.map +1 -0
- package/lib/TextSplitter.d.ts +21 -0
- package/lib/TextSplitter.d.ts.map +1 -1
- package/lib/TextSplitter.js +500 -0
- package/lib/TextSplitter.js.map +1 -0
- package/lib/TextSplitter.spec.d.ts +2 -0
- package/lib/TextSplitter.spec.d.ts.map +1 -0
- package/lib/TextSplitter.spec.js +337 -0
- package/lib/TextSplitter.spec.js.map +1 -0
- package/lib/TransformersEmbeddings.d.ts +121 -0
- package/lib/TransformersEmbeddings.d.ts.map +1 -0
- package/lib/TransformersEmbeddings.js +176 -0
- package/lib/TransformersEmbeddings.js.map +1 -0
- package/lib/TransformersEmbeddings.spec.d.ts +2 -0
- package/lib/TransformersEmbeddings.spec.d.ts.map +1 -0
- package/lib/TransformersEmbeddings.spec.js +198 -0
- package/lib/TransformersEmbeddings.spec.js.map +1 -0
- package/lib/TransformersTokenizer.d.ts +33 -0
- package/lib/TransformersTokenizer.d.ts.map +1 -0
- package/lib/TransformersTokenizer.js +44 -0
- package/lib/TransformersTokenizer.js.map +1 -0
- package/lib/TransformersTokenizer.spec.d.ts +2 -0
- package/lib/TransformersTokenizer.spec.d.ts.map +1 -0
- package/lib/TransformersTokenizer.spec.js +112 -0
- package/lib/TransformersTokenizer.spec.js.map +1 -0
- package/lib/WebFetcher.d.ts +14 -0
- package/lib/WebFetcher.d.ts.map +1 -0
- package/lib/WebFetcher.js +238 -0
- package/lib/WebFetcher.js.map +1 -0
- package/lib/WebFetcher.spec.d.ts +2 -0
- package/lib/WebFetcher.spec.d.ts.map +1 -0
- package/lib/WebFetcher.spec.js +263 -0
- package/lib/WebFetcher.spec.js.map +1 -0
- package/lib/browser.d.ts +30 -0
- package/lib/browser.d.ts.map +1 -0
- package/lib/browser.js +52 -0
- package/lib/browser.js.map +1 -0
- package/lib/codecs/IndexCodec.d.ts +37 -0
- package/lib/codecs/IndexCodec.d.ts.map +1 -0
- package/lib/codecs/IndexCodec.js +3 -0
- package/lib/codecs/IndexCodec.js.map +1 -0
- package/lib/codecs/JsonCodec.d.ts +19 -0
- package/lib/codecs/JsonCodec.d.ts.map +1 -0
- package/lib/codecs/JsonCodec.js +35 -0
- package/lib/codecs/JsonCodec.js.map +1 -0
- package/lib/codecs/JsonCodec.spec.d.ts +2 -0
- package/lib/codecs/JsonCodec.spec.d.ts.map +1 -0
- package/lib/codecs/JsonCodec.spec.js +66 -0
- package/lib/codecs/JsonCodec.spec.js.map +1 -0
- package/lib/codecs/LocalIndex.protobuf.spec.d.ts +2 -0
- package/lib/codecs/LocalIndex.protobuf.spec.d.ts.map +1 -0
- package/lib/codecs/LocalIndex.protobuf.spec.js +108 -0
- package/lib/codecs/LocalIndex.protobuf.spec.js.map +1 -0
- package/lib/codecs/ProtobufCodec.d.ts +20 -0
- package/lib/codecs/ProtobufCodec.d.ts.map +1 -0
- package/lib/codecs/ProtobufCodec.js +225 -0
- package/lib/codecs/ProtobufCodec.js.map +1 -0
- package/lib/codecs/ProtobufCodec.spec.d.ts +2 -0
- package/lib/codecs/ProtobufCodec.spec.d.ts.map +1 -0
- package/lib/codecs/ProtobufCodec.spec.js +155 -0
- package/lib/codecs/ProtobufCodec.spec.js.map +1 -0
- package/lib/codecs/index.d.ts +5 -0
- package/lib/codecs/index.d.ts.map +1 -0
- package/lib/codecs/index.js +21 -0
- package/lib/codecs/index.js.map +1 -0
- package/lib/codecs/migrateIndex.d.ts +24 -0
- package/lib/codecs/migrateIndex.d.ts.map +1 -0
- package/lib/codecs/migrateIndex.js +119 -0
- package/lib/codecs/migrateIndex.js.map +1 -0
- package/lib/codecs/migrateIndex.spec.d.ts +2 -0
- package/lib/codecs/migrateIndex.spec.d.ts.map +1 -0
- package/lib/codecs/migrateIndex.spec.js +151 -0
- package/lib/codecs/migrateIndex.spec.js.map +1 -0
- package/lib/codecs/schemas/index.proto +34 -0
- package/lib/index.d.ts +20 -0
- package/lib/index.d.ts.map +1 -1
- package/lib/index.js +36 -0
- package/lib/index.js.map +1 -0
- package/lib/internals/Colorize.d.ts +14 -0
- package/lib/internals/Colorize.d.ts.map +1 -0
- package/lib/internals/Colorize.js +69 -0
- package/lib/internals/Colorize.js.map +1 -0
- package/lib/internals/index.d.ts +3 -0
- package/lib/internals/index.d.ts.map +1 -0
- package/lib/internals/index.js +19 -0
- package/lib/internals/index.js.map +1 -0
- package/lib/internals/types.d.ts +43 -0
- package/lib/internals/types.d.ts.map +1 -0
- package/lib/internals/types.js +3 -0
- package/lib/internals/types.js.map +1 -0
- package/lib/server/IndexManager.d.ts +78 -0
- package/lib/server/IndexManager.d.ts.map +1 -0
- package/lib/server/IndexManager.js +259 -0
- package/lib/server/IndexManager.js.map +1 -0
- package/lib/server/VectraServer.d.ts +40 -0
- package/lib/server/VectraServer.d.ts.map +1 -0
- package/lib/server/VectraServer.js +151 -0
- package/lib/server/VectraServer.js.map +1 -0
- package/lib/server/VectraServer.spec.d.ts +2 -0
- package/lib/server/VectraServer.spec.d.ts.map +1 -0
- package/lib/server/VectraServer.spec.js +322 -0
- package/lib/server/VectraServer.spec.js.map +1 -0
- package/lib/server/handlers/documentHandlers.d.ts +15 -0
- package/lib/server/handlers/documentHandlers.d.ts.map +1 -0
- package/lib/server/handlers/documentHandlers.js +95 -0
- package/lib/server/handlers/documentHandlers.js.map +1 -0
- package/lib/server/handlers/helpers.d.ts +23 -0
- package/lib/server/handlers/helpers.d.ts.map +1 -0
- package/lib/server/handlers/helpers.js +138 -0
- package/lib/server/handlers/helpers.js.map +1 -0
- package/lib/server/handlers/index.d.ts +8 -0
- package/lib/server/handlers/index.d.ts.map +1 -0
- package/lib/server/handlers/index.js +22 -0
- package/lib/server/handlers/index.js.map +1 -0
- package/lib/server/handlers/indexHandlers.d.ts +14 -0
- package/lib/server/handlers/indexHandlers.d.ts.map +1 -0
- package/lib/server/handlers/indexHandlers.js +85 -0
- package/lib/server/handlers/indexHandlers.js.map +1 -0
- package/lib/server/handlers/itemHandlers.d.ts +34 -0
- package/lib/server/handlers/itemHandlers.d.ts.map +1 -0
- package/lib/server/handlers/itemHandlers.js +166 -0
- package/lib/server/handlers/itemHandlers.js.map +1 -0
- package/lib/server/handlers/lifecycleHandlers.d.ts +11 -0
- package/lib/server/handlers/lifecycleHandlers.d.ts.map +1 -0
- package/lib/server/handlers/lifecycleHandlers.js +31 -0
- package/lib/server/handlers/lifecycleHandlers.js.map +1 -0
- package/lib/server/handlers/queryHandlers.d.ts +27 -0
- package/lib/server/handlers/queryHandlers.d.ts.map +1 -0
- package/lib/server/handlers/queryHandlers.js +135 -0
- package/lib/server/handlers/queryHandlers.js.map +1 -0
- package/lib/server/handlers/statsHandlers.d.ts +17 -0
- package/lib/server/handlers/statsHandlers.d.ts.map +1 -0
- package/lib/server/handlers/statsHandlers.js +81 -0
- package/lib/server/handlers/statsHandlers.js.map +1 -0
- package/lib/server/index.d.ts +4 -0
- package/lib/server/index.d.ts.map +1 -0
- package/lib/server/index.js +23 -0
- package/lib/server/index.js.map +1 -0
- package/lib/storage/FileStorage.d.ts +92 -0
- package/lib/storage/FileStorage.d.ts.map +1 -0
- package/lib/storage/FileStorage.js +3 -0
- package/lib/storage/FileStorage.js.map +1 -0
- package/lib/storage/FileStorageUtilities.d.ts +36 -0
- package/lib/storage/FileStorageUtilities.d.ts.map +1 -0
- package/lib/storage/FileStorageUtilities.js +91 -0
- package/lib/storage/FileStorageUtilities.js.map +1 -0
- package/lib/storage/FileStorageUtilities.spec.d.ts +2 -0
- package/lib/storage/FileStorageUtilities.spec.d.ts.map +1 -0
- package/lib/storage/FileStorageUtilities.spec.js +98 -0
- package/lib/storage/FileStorageUtilities.spec.js.map +1 -0
- package/lib/storage/FileType.d.ts +29 -0
- package/lib/storage/FileType.d.ts.map +1 -0
- package/lib/storage/FileType.js +38 -0
- package/lib/storage/FileType.js.map +1 -0
- package/lib/storage/IndexedDBStorage.d.ts +47 -0
- package/lib/storage/IndexedDBStorage.d.ts.map +1 -0
- package/lib/storage/IndexedDBStorage.js +347 -0
- package/lib/storage/IndexedDBStorage.js.map +1 -0
- package/lib/storage/LocalFileStorage.browser.d.ts +19 -0
- package/lib/storage/LocalFileStorage.browser.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.browser.js +43 -0
- package/lib/storage/LocalFileStorage.browser.js.map +1 -0
- package/lib/storage/LocalFileStorage.d.ts +23 -0
- package/lib/storage/LocalFileStorage.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.js +152 -0
- package/lib/storage/LocalFileStorage.js.map +1 -0
- package/lib/storage/LocalFileStorage.spec.d.ts +2 -0
- package/lib/storage/LocalFileStorage.spec.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.spec.js +249 -0
- package/lib/storage/LocalFileStorage.spec.js.map +1 -0
- package/lib/storage/VirtualFileStorage.d.ts +18 -0
- package/lib/storage/VirtualFileStorage.d.ts.map +1 -0
- package/lib/storage/VirtualFileStorage.js +178 -0
- package/lib/storage/VirtualFileStorage.js.map +1 -0
- package/lib/storage/VirtualFileStorage.spec.d.ts +2 -0
- package/lib/storage/VirtualFileStorage.spec.d.ts.map +1 -0
- package/lib/storage/VirtualFileStorage.spec.js +302 -0
- package/lib/storage/VirtualFileStorage.spec.js.map +1 -0
- package/lib/storage/index.d.ts +6 -0
- package/lib/storage/index.d.ts.map +1 -0
- package/lib/storage/index.js +22 -0
- package/lib/storage/index.js.map +1 -0
- package/lib/templates/templates/csharp/README.md +48 -0
- package/lib/templates/templates/csharp/VectraClient.cs +234 -0
- package/lib/templates/templates/go/README.md +71 -0
- package/lib/templates/templates/go/vectra_client.go +322 -0
- package/lib/templates/templates/java/README.md +81 -0
- package/lib/templates/templates/java/VectraClient.java +232 -0
- package/lib/templates/templates/python/README.md +37 -0
- package/lib/templates/templates/python/vectra_client.py +279 -0
- package/lib/templates/templates/rust/Cargo.toml +14 -0
- package/lib/templates/templates/rust/README.md +39 -0
- package/lib/templates/templates/rust/build.rs +4 -0
- package/lib/templates/templates/rust/lib.rs +284 -0
- package/lib/templates/templates/typescript/README.md +96 -0
- package/lib/templates/templates/typescript/VectraClient.ts +374 -0
- package/lib/templates/typescript/VectraClient.d.ts +114 -0
- package/lib/templates/typescript/VectraClient.d.ts.map +1 -0
- package/lib/templates/typescript/VectraClient.js +328 -0
- package/lib/templates/typescript/VectraClient.js.map +1 -0
- package/lib/types.d.ts +153 -0
- package/lib/types.d.ts.map +1 -0
- package/lib/types.js +3 -0
- package/lib/types.js.map +1 -0
- package/lib/utils/index.d.ts +2 -0
- package/lib/utils/index.d.ts.map +1 -0
- package/lib/utils/index.js +18 -0
- package/lib/utils/index.js.map +1 -0
- package/lib/utils/pathUtils.d.ts +40 -0
- package/lib/utils/pathUtils.d.ts.map +1 -0
- package/lib/utils/pathUtils.js +98 -0
- package/lib/utils/pathUtils.js.map +1 -0
- package/lib/vectra-cli.d.ts +2 -0
- package/lib/vectra-cli.d.ts.map +1 -1
- package/lib/vectra-cli.generate.spec.d.ts +2 -0
- package/lib/vectra-cli.generate.spec.d.ts.map +1 -0
- package/lib/vectra-cli.generate.spec.js +112 -0
- package/lib/vectra-cli.generate.spec.js.map +1 -0
- package/lib/vectra-cli.js +760 -0
- package/lib/vectra-cli.js.map +1 -0
- package/lib/vectra-cli.spec.d.ts +1 -0
- package/lib/vectra-cli.spec.d.ts.map +1 -0
- package/lib/vectra-cli.spec.js +2 -0
- package/lib/vectra-cli.spec.js.map +1 -0
- package/package.json +91 -16
- package/proto/vectra_service.proto +276 -0
- package/src/BrowserWebFetcher.ts +345 -0
- package/src/FileFetcher.spec.ts +234 -0
- package/src/FileFetcher.ts +37 -25
- package/src/FolderWatcher.spec.ts +288 -0
- package/src/FolderWatcher.ts +304 -0
- package/src/GPT3Tokenizer.spec.ts +50 -0
- package/src/ItemSelector.spec.ts +252 -0
- package/src/ItemSelector.ts +163 -150
- package/src/LocalDocument.spec.ts +211 -0
- package/src/LocalDocument.ts +88 -94
- package/src/LocalDocumentIndex.spec.ts +481 -0
- package/src/LocalDocumentIndex.ts +39 -40
- package/src/LocalDocumentResult.spec.ts +373 -0
- package/src/LocalDocumentResult.ts +489 -319
- package/src/LocalEmbeddings.spec.ts +138 -0
- package/src/LocalEmbeddings.ts +120 -0
- package/src/LocalIndex.spec.ts +808 -66
- package/src/LocalIndex.ts +479 -429
- package/src/OpenAIEmbeddings.spec.ts +354 -0
- package/src/OpenAIEmbeddings.ts +26 -27
- package/src/TextSplitter.spec.ts +342 -0
- package/src/TextSplitter.ts +517 -532
- package/src/TransformersEmbeddings.spec.ts +188 -0
- package/src/TransformersEmbeddings.ts +232 -0
- package/src/TransformersTokenizer.spec.ts +143 -0
- package/src/TransformersTokenizer.ts +45 -0
- package/src/WebFetcher.spec.ts +288 -0
- package/src/WebFetcher.ts +184 -186
- package/src/browser.ts +69 -0
- package/src/codecs/IndexCodec.ts +40 -0
- package/src/codecs/JsonCodec.spec.ts +70 -0
- package/src/codecs/JsonCodec.ts +37 -0
- package/src/codecs/LocalIndex.protobuf.spec.ts +115 -0
- package/src/codecs/ProtobufCodec.spec.ts +166 -0
- package/src/codecs/ProtobufCodec.ts +193 -0
- package/src/codecs/index.ts +4 -0
- package/src/codecs/migrateIndex.spec.ts +176 -0
- package/src/codecs/migrateIndex.ts +125 -0
- package/src/codecs/schemas/index.proto +34 -0
- package/src/index.ts +9 -1
- package/src/internals/Colorize.ts +19 -16
- package/src/server/IndexManager.ts +243 -0
- package/src/server/VectraServer.spec.ts +303 -0
- package/src/server/VectraServer.ts +156 -0
- package/src/server/handlers/documentHandlers.ts +59 -0
- package/src/server/handlers/helpers.ts +93 -0
- package/src/server/handlers/index.ts +7 -0
- package/src/server/handlers/indexHandlers.ts +44 -0
- package/src/server/handlers/itemHandlers.ts +140 -0
- package/src/server/handlers/lifecycleHandlers.ts +26 -0
- package/src/server/handlers/queryHandlers.ts +96 -0
- package/src/server/handlers/statsHandlers.ts +38 -0
- package/src/server/index.ts +3 -0
- package/src/storage/FileStorage.ts +105 -0
- package/src/storage/FileStorageUtilities.spec.ts +106 -0
- package/src/storage/FileStorageUtilities.ts +77 -0
- package/src/storage/FileType.ts +61 -0
- package/src/storage/IndexedDBStorage.ts +365 -0
- package/src/storage/LocalFileStorage.browser.ts +52 -0
- package/src/storage/LocalFileStorage.spec.ts +292 -0
- package/src/storage/LocalFileStorage.ts +98 -0
- package/src/storage/VirtualFileStorage.spec.ts +307 -0
- package/src/storage/VirtualFileStorage.ts +169 -0
- package/src/storage/index.ts +5 -0
- package/src/templates/csharp/README.md +48 -0
- package/src/templates/csharp/VectraClient.cs +234 -0
- package/src/templates/go/README.md +71 -0
- package/src/templates/go/vectra_client.go +322 -0
- package/src/templates/java/README.md +81 -0
- package/src/templates/java/VectraClient.java +232 -0
- package/src/templates/python/README.md +37 -0
- package/src/templates/python/vectra_client.py +279 -0
- package/src/templates/rust/Cargo.toml +14 -0
- package/src/templates/rust/README.md +39 -0
- package/src/templates/rust/build.rs +4 -0
- package/src/templates/rust/lib.rs +284 -0
- package/src/templates/typescript/README.md +96 -0
- package/src/templates/typescript/VectraClient.ts +374 -0
- package/src/types.ts +131 -123
- package/src/utils/index.ts +1 -0
- package/src/utils/pathUtils.ts +106 -0
- package/src/vectra-cli.generate.spec.ts +72 -0
- package/src/vectra-cli.spec.ts +0 -0
- package/src/vectra-cli.ts +687 -246
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
import { TextFetcher } from './types';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Configuration options for BrowserWebFetcher.
|
|
5
|
+
*/
|
|
6
|
+
export interface BrowserWebFetcherConfig {
|
|
7
|
+
/**
|
|
8
|
+
* Optional. Whether to convert HTML to a simplified text/markdown format.
|
|
9
|
+
* @remarks
|
|
10
|
+
* Defaults to `true`.
|
|
11
|
+
*/
|
|
12
|
+
htmlToMarkdown?: boolean;
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Optional. Additional headers to include in requests.
|
|
16
|
+
*/
|
|
17
|
+
headers?: Record<string, string>;
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Optional. Request mode for fetch.
|
|
21
|
+
* @remarks
|
|
22
|
+
* Defaults to 'cors'.
|
|
23
|
+
*/
|
|
24
|
+
mode?: RequestMode;
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Optional. Credentials mode for fetch.
|
|
28
|
+
* @remarks
|
|
29
|
+
* Defaults to 'same-origin'.
|
|
30
|
+
*/
|
|
31
|
+
credentials?: RequestCredentials;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Browser-compatible web fetcher using the native Fetch API.
|
|
36
|
+
* @remarks
|
|
37
|
+
* This fetcher works in browsers and Electron renderer processes.
|
|
38
|
+
* Uses DOMParser instead of cheerio for HTML parsing.
|
|
39
|
+
*/
|
|
40
|
+
export class BrowserWebFetcher implements TextFetcher {
|
|
41
|
+
private readonly _config: BrowserWebFetcherConfig;
|
|
42
|
+
|
|
43
|
+
private static readonly ALLOWED_CONTENT_TYPES = [
|
|
44
|
+
'text/html',
|
|
45
|
+
'application/json',
|
|
46
|
+
'application/xml',
|
|
47
|
+
'application/javascript',
|
|
48
|
+
'text/plain',
|
|
49
|
+
'text/markdown',
|
|
50
|
+
'text/xml'
|
|
51
|
+
];
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Creates a new `BrowserWebFetcher` instance.
|
|
55
|
+
* @param config Optional configuration options.
|
|
56
|
+
*/
|
|
57
|
+
constructor(config?: BrowserWebFetcherConfig) {
|
|
58
|
+
this._config = {
|
|
59
|
+
htmlToMarkdown: true,
|
|
60
|
+
mode: 'cors',
|
|
61
|
+
credentials: 'same-origin',
|
|
62
|
+
...config
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Fetches content from a URL and passes it to the document handler.
|
|
68
|
+
* @param uri URL to fetch.
|
|
69
|
+
* @param onDocument Callback to handle the fetched document.
|
|
70
|
+
* @returns Promise that resolves to the return value of onDocument.
|
|
71
|
+
*/
|
|
72
|
+
async fetch(
|
|
73
|
+
uri: string,
|
|
74
|
+
onDocument: (uri: string, text: string, docType?: string) => Promise<boolean>
|
|
75
|
+
): Promise<boolean> {
|
|
76
|
+
const response = await fetch(uri, {
|
|
77
|
+
method: 'GET',
|
|
78
|
+
headers: this._config.headers,
|
|
79
|
+
mode: this._config.mode,
|
|
80
|
+
credentials: this._config.credentials
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
if (!response.ok) {
|
|
84
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const contentType = response.headers.get('content-type') || 'text/plain';
|
|
88
|
+
const mimeType = contentType.split(';')[0].trim().toLowerCase();
|
|
89
|
+
|
|
90
|
+
// Validate content type
|
|
91
|
+
if (!BrowserWebFetcher.ALLOWED_CONTENT_TYPES.some(allowed => mimeType.includes(allowed))) {
|
|
92
|
+
throw new Error(`Unsupported content type: ${contentType}`);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const text = await response.text();
|
|
96
|
+
|
|
97
|
+
// Handle HTML content
|
|
98
|
+
if (mimeType.includes('text/html') && this._config.htmlToMarkdown) {
|
|
99
|
+
const markdown = this.htmlToMarkdown(text, uri);
|
|
100
|
+
return onDocument(uri, markdown, 'md');
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Determine doc type from content type
|
|
104
|
+
const docType = this.getDocTypeFromMime(mimeType);
|
|
105
|
+
return onDocument(uri, text, docType);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Converts HTML to a simplified markdown-like format using DOMParser.
|
|
110
|
+
*/
|
|
111
|
+
private htmlToMarkdown(html: string, baseUrl: string): string {
|
|
112
|
+
const parser = new DOMParser();
|
|
113
|
+
const doc = parser.parseFromString(html, 'text/html');
|
|
114
|
+
|
|
115
|
+
// Remove unwanted elements
|
|
116
|
+
const removeSelectors = ['script', 'style', 'noscript', 'iframe', 'svg', 'canvas'];
|
|
117
|
+
removeSelectors.forEach(selector => {
|
|
118
|
+
doc.querySelectorAll(selector).forEach(el => el.remove());
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
// Convert relative URLs to absolute
|
|
122
|
+
doc.querySelectorAll('a[href]').forEach(el => {
|
|
123
|
+
const href = el.getAttribute('href');
|
|
124
|
+
if (href && !href.startsWith('http') && !href.startsWith('//') && !href.startsWith('#')) {
|
|
125
|
+
try {
|
|
126
|
+
el.setAttribute('href', new URL(href, baseUrl).toString());
|
|
127
|
+
} catch {
|
|
128
|
+
// Leave as-is if URL parsing fails
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
// Process the body
|
|
134
|
+
const body = doc.body;
|
|
135
|
+
if (!body) {
|
|
136
|
+
return html;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const lines: string[] = [];
|
|
140
|
+
this.processNode(body, lines);
|
|
141
|
+
|
|
142
|
+
// Clean up the result
|
|
143
|
+
let result = lines.join('\n');
|
|
144
|
+
|
|
145
|
+
// Remove excessive newlines
|
|
146
|
+
result = result.replace(/\n{3,}/g, '\n\n');
|
|
147
|
+
|
|
148
|
+
// Trim leading/trailing whitespace
|
|
149
|
+
result = result.trim();
|
|
150
|
+
|
|
151
|
+
return result;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Recursively processes DOM nodes to extract text content.
|
|
156
|
+
*/
|
|
157
|
+
private processNode(node: Node, lines: string[]): void {
|
|
158
|
+
if (node.nodeType === Node.TEXT_NODE) {
|
|
159
|
+
const text = node.textContent?.trim();
|
|
160
|
+
if (text) {
|
|
161
|
+
lines.push(text);
|
|
162
|
+
}
|
|
163
|
+
return;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
if (node.nodeType !== Node.ELEMENT_NODE) {
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const el = node as Element;
|
|
171
|
+
const tagName = el.tagName.toLowerCase();
|
|
172
|
+
|
|
173
|
+
// Handle specific elements
|
|
174
|
+
switch (tagName) {
|
|
175
|
+
case 'h1':
|
|
176
|
+
lines.push('');
|
|
177
|
+
lines.push(`# ${this.getTextContent(el)}`);
|
|
178
|
+
lines.push('');
|
|
179
|
+
return;
|
|
180
|
+
case 'h2':
|
|
181
|
+
lines.push('');
|
|
182
|
+
lines.push(`## ${this.getTextContent(el)}`);
|
|
183
|
+
lines.push('');
|
|
184
|
+
return;
|
|
185
|
+
case 'h3':
|
|
186
|
+
lines.push('');
|
|
187
|
+
lines.push(`### ${this.getTextContent(el)}`);
|
|
188
|
+
lines.push('');
|
|
189
|
+
return;
|
|
190
|
+
case 'h4':
|
|
191
|
+
lines.push('');
|
|
192
|
+
lines.push(`#### ${this.getTextContent(el)}`);
|
|
193
|
+
lines.push('');
|
|
194
|
+
return;
|
|
195
|
+
case 'h5':
|
|
196
|
+
lines.push('');
|
|
197
|
+
lines.push(`##### ${this.getTextContent(el)}`);
|
|
198
|
+
lines.push('');
|
|
199
|
+
return;
|
|
200
|
+
case 'h6':
|
|
201
|
+
lines.push('');
|
|
202
|
+
lines.push(`###### ${this.getTextContent(el)}`);
|
|
203
|
+
lines.push('');
|
|
204
|
+
return;
|
|
205
|
+
case 'p':
|
|
206
|
+
lines.push('');
|
|
207
|
+
this.processChildren(el, lines);
|
|
208
|
+
lines.push('');
|
|
209
|
+
return;
|
|
210
|
+
case 'br':
|
|
211
|
+
lines.push('');
|
|
212
|
+
return;
|
|
213
|
+
case 'hr':
|
|
214
|
+
lines.push('');
|
|
215
|
+
lines.push('---');
|
|
216
|
+
lines.push('');
|
|
217
|
+
return;
|
|
218
|
+
case 'a':
|
|
219
|
+
const href = el.getAttribute('href');
|
|
220
|
+
const text = this.getTextContent(el);
|
|
221
|
+
if (href && text) {
|
|
222
|
+
lines.push(`[${text}](${href})`);
|
|
223
|
+
} else if (text) {
|
|
224
|
+
lines.push(text);
|
|
225
|
+
}
|
|
226
|
+
return;
|
|
227
|
+
case 'strong':
|
|
228
|
+
case 'b':
|
|
229
|
+
lines.push(`**${this.getTextContent(el)}**`);
|
|
230
|
+
return;
|
|
231
|
+
case 'em':
|
|
232
|
+
case 'i':
|
|
233
|
+
lines.push(`*${this.getTextContent(el)}*`);
|
|
234
|
+
return;
|
|
235
|
+
case 'code':
|
|
236
|
+
lines.push(`\`${this.getTextContent(el)}\``);
|
|
237
|
+
return;
|
|
238
|
+
case 'pre':
|
|
239
|
+
lines.push('');
|
|
240
|
+
lines.push('```');
|
|
241
|
+
lines.push(this.getTextContent(el));
|
|
242
|
+
lines.push('```');
|
|
243
|
+
lines.push('');
|
|
244
|
+
return;
|
|
245
|
+
case 'blockquote':
|
|
246
|
+
lines.push('');
|
|
247
|
+
const quoteText = this.getTextContent(el);
|
|
248
|
+
quoteText.split('\n').forEach(line => {
|
|
249
|
+
lines.push(`> ${line}`);
|
|
250
|
+
});
|
|
251
|
+
lines.push('');
|
|
252
|
+
return;
|
|
253
|
+
case 'ul':
|
|
254
|
+
case 'ol':
|
|
255
|
+
lines.push('');
|
|
256
|
+
el.querySelectorAll(':scope > li').forEach((li, index) => {
|
|
257
|
+
const prefix = tagName === 'ol' ? `${index + 1}.` : '-';
|
|
258
|
+
lines.push(`${prefix} ${this.getTextContent(li)}`);
|
|
259
|
+
});
|
|
260
|
+
lines.push('');
|
|
261
|
+
return;
|
|
262
|
+
case 'table':
|
|
263
|
+
lines.push('');
|
|
264
|
+
this.processTable(el, lines);
|
|
265
|
+
lines.push('');
|
|
266
|
+
return;
|
|
267
|
+
case 'img':
|
|
268
|
+
const alt = el.getAttribute('alt') || 'image';
|
|
269
|
+
const src = el.getAttribute('src');
|
|
270
|
+
if (src) {
|
|
271
|
+
lines.push(``);
|
|
272
|
+
}
|
|
273
|
+
return;
|
|
274
|
+
default:
|
|
275
|
+
// For other elements, process children
|
|
276
|
+
this.processChildren(el, lines);
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
/**
|
|
281
|
+
* Processes child nodes of an element.
|
|
282
|
+
*/
|
|
283
|
+
private processChildren(el: Element, lines: string[]): void {
|
|
284
|
+
el.childNodes.forEach(child => {
|
|
285
|
+
this.processNode(child, lines);
|
|
286
|
+
});
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* Gets clean text content from an element.
|
|
291
|
+
*/
|
|
292
|
+
private getTextContent(el: Element): string {
|
|
293
|
+
return (el.textContent || '').replace(/\s+/g, ' ').trim();
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* Processes a table element to markdown format.
|
|
298
|
+
*/
|
|
299
|
+
private processTable(table: Element, lines: string[]): void {
|
|
300
|
+
const rows = table.querySelectorAll('tr');
|
|
301
|
+
let isFirstRow = true;
|
|
302
|
+
|
|
303
|
+
rows.forEach(row => {
|
|
304
|
+
const cells = row.querySelectorAll('th, td');
|
|
305
|
+
const cellContents: string[] = [];
|
|
306
|
+
|
|
307
|
+
cells.forEach(cell => {
|
|
308
|
+
cellContents.push(this.getTextContent(cell));
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
if (cellContents.length > 0) {
|
|
312
|
+
lines.push(`| ${cellContents.join(' | ')} |`);
|
|
313
|
+
|
|
314
|
+
// Add separator after header row
|
|
315
|
+
if (isFirstRow) {
|
|
316
|
+
lines.push(`| ${cellContents.map(() => '---').join(' | ')} |`);
|
|
317
|
+
isFirstRow = false;
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
});
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
/**
|
|
324
|
+
* Maps MIME type to document type.
|
|
325
|
+
*/
|
|
326
|
+
private getDocTypeFromMime(mimeType: string): string | undefined {
|
|
327
|
+
const mimeMap: Record<string, string> = {
|
|
328
|
+
'text/html': 'html',
|
|
329
|
+
'text/plain': undefined as any,
|
|
330
|
+
'text/markdown': 'md',
|
|
331
|
+
'text/xml': 'xml',
|
|
332
|
+
'application/json': 'json',
|
|
333
|
+
'application/xml': 'xml',
|
|
334
|
+
'application/javascript': 'js'
|
|
335
|
+
};
|
|
336
|
+
|
|
337
|
+
for (const [mime, docType] of Object.entries(mimeMap)) {
|
|
338
|
+
if (mimeType.includes(mime)) {
|
|
339
|
+
return docType;
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
return undefined;
|
|
344
|
+
}
|
|
345
|
+
}
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
import { strict as assert } from 'assert';
|
|
2
|
+
import * as sinon from 'sinon';
|
|
3
|
+
import fs from 'node:fs';
|
|
4
|
+
import * as path from 'path';
|
|
5
|
+
import { FileFetcher } from './FileFetcher';
|
|
6
|
+
|
|
7
|
+
describe('FileFetcher', () => {
|
|
8
|
+
let fetcher: FileFetcher;
|
|
9
|
+
|
|
10
|
+
let statStub: sinon.SinonStub;
|
|
11
|
+
let readdirStub: sinon.SinonStub;
|
|
12
|
+
let readFileStub: sinon.SinonStub;
|
|
13
|
+
|
|
14
|
+
beforeEach(() => {
|
|
15
|
+
statStub = sinon.stub(fs.promises, 'stat') as sinon.SinonStub;
|
|
16
|
+
readdirStub = sinon.stub(fs.promises, 'readdir') as sinon.SinonStub;
|
|
17
|
+
readFileStub = sinon.stub(fs.promises, 'readFile') as sinon.SinonStub;
|
|
18
|
+
|
|
19
|
+
fetcher = new FileFetcher();
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
afterEach(() => {
|
|
23
|
+
sinon.restore();
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
it('resolves true when the path does not exist and does not call onDocument', async () => {
|
|
27
|
+
statStub.rejects(new Error('not found'));
|
|
28
|
+
const onDocument = sinon.fake.resolves(true);
|
|
29
|
+
|
|
30
|
+
const result = await fetcher.fetch('/nonexistent', onDocument);
|
|
31
|
+
|
|
32
|
+
assert.equal(result, true);
|
|
33
|
+
assert.equal(onDocument.callCount, 0);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
it('recurses a flat directory and calls fetch for each file', async () => {
|
|
37
|
+
statStub.callsFake(async (uri: string) => {
|
|
38
|
+
if (uri === '/dir') return { isDirectory: () => true };
|
|
39
|
+
if (uri === path.join('/dir', 'file1.txt')) return { isDirectory: () => false };
|
|
40
|
+
if (uri === path.join('/dir', 'file2.md')) return { isDirectory: () => false };
|
|
41
|
+
return { isDirectory: () => false };
|
|
42
|
+
});
|
|
43
|
+
readdirStub.callsFake(async (uri: string) => {
|
|
44
|
+
if (uri === '/dir') return ['file1.txt', 'file2.md'];
|
|
45
|
+
return [];
|
|
46
|
+
});
|
|
47
|
+
readFileStub.resolves('content');
|
|
48
|
+
|
|
49
|
+
const fetchSpy = sinon.spy(fetcher, 'fetch');
|
|
50
|
+
const onDocument = sinon.fake.resolves(true);
|
|
51
|
+
|
|
52
|
+
const result = await fetcher.fetch('/dir', onDocument);
|
|
53
|
+
|
|
54
|
+
assert.equal(result, true);
|
|
55
|
+
assert.equal(fetchSpy.callCount, 3); // root + 2 files
|
|
56
|
+
assert(fetchSpy.calledWith(path.join('/dir', 'file1.txt'), sinon.match.func));
|
|
57
|
+
assert(fetchSpy.calledWith(path.join('/dir', 'file2.md'), sinon.match.func));
|
|
58
|
+
assert.equal(onDocument.callCount, 2);
|
|
59
|
+
assert(onDocument.calledWith(path.join('/dir', 'file1.txt'), 'content', 'txt'));
|
|
60
|
+
assert(onDocument.calledWith(path.join('/dir', 'file2.md'), 'content', 'md'));
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it('reads a file and passes correct args to onDocument (uri, text, docType)', async () => {
|
|
64
|
+
statStub.resolves({ isDirectory: () => false });
|
|
65
|
+
readFileStub.resolves('file content');
|
|
66
|
+
|
|
67
|
+
const onDocument = sinon.fake.resolves(true);
|
|
68
|
+
const result = await fetcher.fetch('/file.txt', onDocument);
|
|
69
|
+
|
|
70
|
+
assert.equal(result, true);
|
|
71
|
+
assert.equal(onDocument.callCount, 1);
|
|
72
|
+
|
|
73
|
+
const [uri, text, docType] = onDocument.firstCall.args;
|
|
74
|
+
assert.equal(uri, '/file.txt');
|
|
75
|
+
assert.equal(text, 'file content');
|
|
76
|
+
assert.equal(docType, 'txt');
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
it('handles file with no extension by using last path segment as docType', async () => {
|
|
80
|
+
statStub.resolves({ isDirectory: () => false });
|
|
81
|
+
readFileStub.resolves('content');
|
|
82
|
+
|
|
83
|
+
const onDocument = sinon.fake.resolves(true);
|
|
84
|
+
const result = await fetcher.fetch('/file', onDocument);
|
|
85
|
+
|
|
86
|
+
assert.equal(result, true);
|
|
87
|
+
assert.equal(onDocument.callCount, 1);
|
|
88
|
+
|
|
89
|
+
const [uri, text, docType] = onDocument.firstCall.args;
|
|
90
|
+
assert.equal(uri, '/file');
|
|
91
|
+
assert.equal(text, 'content');
|
|
92
|
+
assert.equal(docType, 'file');
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it('awaits onDocument and returns its boolean result (true)', async () => {
|
|
96
|
+
statStub.resolves({ isDirectory: () => false });
|
|
97
|
+
readFileStub.resolves('text');
|
|
98
|
+
|
|
99
|
+
const onDocumentTrue = sinon.fake.resolves(true);
|
|
100
|
+
const resultTrue = await fetcher.fetch('/file.txt', onDocumentTrue);
|
|
101
|
+
|
|
102
|
+
assert.equal(resultTrue, true);
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
it('awaits onDocument and returns its boolean result (false)', async () => {
|
|
106
|
+
statStub.resolves({ isDirectory: () => false });
|
|
107
|
+
readFileStub.resolves('text');
|
|
108
|
+
|
|
109
|
+
const onDocumentFalse = sinon.fake.resolves(false);
|
|
110
|
+
const resultFalse = await fetcher.fetch('/file.txt', onDocumentFalse);
|
|
111
|
+
|
|
112
|
+
assert.equal(resultFalse, false);
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
it('propagates a rejection from onDocument as a rejected promise', async () => {
|
|
116
|
+
statStub.resolves({ isDirectory: () => false });
|
|
117
|
+
readFileStub.resolves('text');
|
|
118
|
+
|
|
119
|
+
const onDocument = sinon.fake.rejects(new Error('fail'));
|
|
120
|
+
await assert.rejects(() => fetcher.fetch('/file.txt', onDocument), /fail/);
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
it('recurses nested directories and processes files at multiple depths', async () => {
|
|
124
|
+
statStub.callsFake(async (uri: string) => {
|
|
125
|
+
if (uri === '/dir' || uri === path.join('/dir', 'subdir')) return { isDirectory: () => true };
|
|
126
|
+
return { isDirectory: () => false };
|
|
127
|
+
});
|
|
128
|
+
readdirStub.callsFake(async (uri: string) => {
|
|
129
|
+
if (uri === '/dir') return ['file1.txt', 'subdir'];
|
|
130
|
+
if (uri === path.join('/dir', 'subdir')) return ['file2.md'];
|
|
131
|
+
return [];
|
|
132
|
+
});
|
|
133
|
+
readFileStub.resolves('content');
|
|
134
|
+
|
|
135
|
+
const onDocument = sinon.fake.resolves(true);
|
|
136
|
+
const result = await fetcher.fetch('/dir', onDocument);
|
|
137
|
+
|
|
138
|
+
assert.equal(result, true);
|
|
139
|
+
assert.equal(onDocument.callCount, 2);
|
|
140
|
+
assert(onDocument.calledWith(path.join('/dir', 'file1.txt'), 'content', 'txt'));
|
|
141
|
+
assert(onDocument.calledWith(path.join('/dir', 'subdir', 'file2.md'), 'content', 'md'));
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
it('handles an empty directory (no onDocument calls, resolves true)', async () => {
|
|
145
|
+
statStub.resolves({ isDirectory: () => true });
|
|
146
|
+
readdirStub.resolves([]);
|
|
147
|
+
|
|
148
|
+
const onDocument = sinon.fake.resolves(true);
|
|
149
|
+
const result = await fetcher.fetch('/emptydir', onDocument);
|
|
150
|
+
|
|
151
|
+
assert.equal(result, true);
|
|
152
|
+
assert.equal(onDocument.callCount, 0);
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
it('recurses directories that contain only subdirectories (no onDocument calls)', async () => {
|
|
156
|
+
statStub.callsFake(async (uri: string) => {
|
|
157
|
+
if (
|
|
158
|
+
uri === '/dir' ||
|
|
159
|
+
uri === path.join('/dir', 'subdir1') ||
|
|
160
|
+
uri === path.join('/dir', 'subdir2')
|
|
161
|
+
) {
|
|
162
|
+
return { isDirectory: () => true };
|
|
163
|
+
}
|
|
164
|
+
return { isDirectory: () => false };
|
|
165
|
+
});
|
|
166
|
+
readdirStub.callsFake(async (uri: string) => {
|
|
167
|
+
if (uri === '/dir') return ['subdir1', 'subdir2'];
|
|
168
|
+
if (uri === path.join('/dir', 'subdir1')) return [];
|
|
169
|
+
if (uri === path.join('/dir', 'subdir2')) return [];
|
|
170
|
+
return [];
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
const onDocument = sinon.fake.resolves(true);
|
|
174
|
+
const result = await fetcher.fetch('/dir', onDocument);
|
|
175
|
+
|
|
176
|
+
assert.equal(result, true);
|
|
177
|
+
assert.equal(onDocument.callCount, 0);
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
it('rejects when readdir fails for a directory', async () => {
|
|
181
|
+
statStub.resolves({ isDirectory: () => true });
|
|
182
|
+
readdirStub.rejects(new Error('fail'));
|
|
183
|
+
|
|
184
|
+
const onDocument = sinon.fake.resolves(true);
|
|
185
|
+
await assert.rejects(() => fetcher.fetch('/dir', onDocument), /fail/);
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
it('rejects when readFile fails for a file', async () => {
|
|
189
|
+
statStub.resolves({ isDirectory: () => false });
|
|
190
|
+
readFileStub.rejects(new Error('fail'));
|
|
191
|
+
|
|
192
|
+
const onDocument = sinon.fake.resolves(true);
|
|
193
|
+
await assert.rejects(() => fetcher.fetch('/file.txt', onDocument), /fail/);
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
it('extracts docType from multi-part extensions (file.tar.gz -> gz)', async () => {
|
|
197
|
+
statStub.resolves({ isDirectory: () => false });
|
|
198
|
+
readFileStub.resolves('content');
|
|
199
|
+
|
|
200
|
+
const onDocument = sinon.fake.resolves(true);
|
|
201
|
+
const result = await fetcher.fetch('/file.tar.gz', onDocument);
|
|
202
|
+
|
|
203
|
+
assert.equal(result, true);
|
|
204
|
+
assert.equal(onDocument.callCount, 1);
|
|
205
|
+
|
|
206
|
+
const [, , docType] = onDocument.firstCall.args;
|
|
207
|
+
assert.equal(docType, 'gz');
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
it('returns false when any child in a directory returns false (aggregates to allOk=false)', async () => {
|
|
211
|
+
statStub.callsFake(async (uri: string) => {
|
|
212
|
+
if (uri === '/dir') return { isDirectory: () => true };
|
|
213
|
+
return { isDirectory: () => false };
|
|
214
|
+
});
|
|
215
|
+
readdirStub.callsFake(async (uri: string) => {
|
|
216
|
+
if (uri === '/dir') return ['good.txt', 'bad.txt'];
|
|
217
|
+
return [];
|
|
218
|
+
});
|
|
219
|
+
readFileStub.resolves('content');
|
|
220
|
+
|
|
221
|
+
const onDocument = sinon.stub();
|
|
222
|
+
onDocument.callsFake(async (uri: string) => {
|
|
223
|
+
if (uri === path.join('/dir', 'bad.txt')) return false;
|
|
224
|
+
return true;
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
const result = await fetcher.fetch('/dir', onDocument);
|
|
228
|
+
|
|
229
|
+
assert.equal(result, false);
|
|
230
|
+
assert.equal(onDocument.callCount, 2);
|
|
231
|
+
assert(onDocument.calledWith(path.join('/dir', 'good.txt'), 'content', 'txt'));
|
|
232
|
+
assert(onDocument.calledWith(path.join('/dir', 'bad.txt'), 'content', 'txt'));
|
|
233
|
+
});
|
|
234
|
+
});
|
package/src/FileFetcher.ts
CHANGED
|
@@ -1,31 +1,43 @@
|
|
|
1
1
|
import { TextFetcher } from './types';
|
|
2
|
-
import
|
|
3
|
-
import * as path from 'path';
|
|
2
|
+
import fs from 'node:fs';
|
|
3
|
+
import * as path from 'node:path';
|
|
4
4
|
|
|
5
5
|
export class FileFetcher implements TextFetcher {
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
6
|
+
public async fetch(
|
|
7
|
+
uri: string,
|
|
8
|
+
onDocument: (uri: string, text: string, docType?: string | undefined) => Promise<boolean>
|
|
9
|
+
): Promise<boolean> {
|
|
10
|
+
// Does path exist and is it a directory?
|
|
11
|
+
let stat;
|
|
12
|
+
try {
|
|
13
|
+
stat = await fs.promises.stat(uri);
|
|
14
|
+
} catch {
|
|
15
|
+
// Non-existent path: treat as no-op success
|
|
16
|
+
return true;
|
|
17
|
+
}
|
|
15
18
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
// Read file and call onDocument
|
|
26
|
-
const text = await fs.readFile(uri, 'utf8');
|
|
27
|
-
const parts = uri.split('.');
|
|
28
|
-
return await onDocument(uri, text, parts.length > 0 ? parts[parts.length - 1].toLowerCase() : undefined);
|
|
19
|
+
if (stat.isDirectory()) {
|
|
20
|
+
// Read directory and recurse. If any child returns false, aggregate to false.
|
|
21
|
+
const entries = await fs.promises.readdir(uri);
|
|
22
|
+
let allOk = true;
|
|
23
|
+
for (const file of entries) {
|
|
24
|
+
const filePath = path.join(uri, file);
|
|
25
|
+
const ok = await this.fetch(filePath, onDocument);
|
|
26
|
+
if (!ok) {
|
|
27
|
+
allOk = false;
|
|
29
28
|
}
|
|
29
|
+
}
|
|
30
|
+
return allOk;
|
|
31
|
+
} else {
|
|
32
|
+
// Read file and invoke onDocument
|
|
33
|
+
const text = await fs.promises.readFile(uri, 'utf8');
|
|
34
|
+
const ext = path.extname(uri);
|
|
35
|
+
const docType =
|
|
36
|
+
ext && ext.length > 1
|
|
37
|
+
? ext.slice(1).toLowerCase()
|
|
38
|
+
: path.basename(uri).toLowerCase();
|
|
39
|
+
|
|
40
|
+
return await onDocument(uri, text, docType);
|
|
30
41
|
}
|
|
31
|
-
}
|
|
42
|
+
}
|
|
43
|
+
}
|