vectra 0.12.2 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +92 -100
- package/bin/vectra.js +3 -0
- package/lib/BrowserWebFetcher.d.ts +75 -0
- package/lib/BrowserWebFetcher.d.ts.map +1 -0
- package/lib/BrowserWebFetcher.js +290 -0
- package/lib/BrowserWebFetcher.js.map +1 -0
- package/lib/FileFetcher.d.ts +5 -0
- package/lib/FileFetcher.d.ts.map +1 -0
- package/lib/FileFetcher.js +89 -0
- package/lib/FileFetcher.js.map +1 -0
- package/lib/FileFetcher.spec.d.ts +2 -0
- package/lib/FileFetcher.spec.d.ts.map +1 -0
- package/lib/FileFetcher.spec.js +244 -0
- package/lib/FileFetcher.spec.js.map +1 -0
- package/lib/FolderWatcher.d.ts +91 -0
- package/lib/FolderWatcher.d.ts.map +1 -0
- package/lib/FolderWatcher.js +304 -0
- package/lib/FolderWatcher.js.map +1 -0
- package/lib/FolderWatcher.spec.d.ts +2 -0
- package/lib/FolderWatcher.spec.d.ts.map +1 -0
- package/lib/FolderWatcher.spec.js +308 -0
- package/lib/FolderWatcher.spec.js.map +1 -0
- package/lib/GPT3Tokenizer.d.ts +9 -0
- package/lib/GPT3Tokenizer.spec.d.ts +2 -0
- package/lib/GPT3Tokenizer.spec.d.ts.map +1 -0
- package/lib/GPT3Tokenizer.spec.js +45 -0
- package/lib/GPT3Tokenizer.spec.js.map +1 -0
- package/lib/ItemSelector.d.ts +41 -0
- package/lib/ItemSelector.d.ts.map +1 -0
- package/lib/ItemSelector.js +179 -0
- package/lib/ItemSelector.js.map +1 -0
- package/lib/ItemSelector.spec.d.ts +2 -0
- package/lib/ItemSelector.spec.d.ts.map +1 -0
- package/lib/ItemSelector.spec.js +204 -0
- package/lib/ItemSelector.spec.js.map +1 -0
- package/lib/LocalDocument.d.ts +54 -0
- package/lib/LocalDocument.d.ts.map +1 -1
- package/lib/LocalDocument.js +116 -0
- package/lib/LocalDocument.js.map +1 -0
- package/lib/LocalDocument.spec.d.ts +2 -0
- package/lib/LocalDocument.spec.d.ts.map +1 -0
- package/lib/LocalDocument.spec.js +214 -0
- package/lib/LocalDocument.spec.js.map +1 -0
- package/lib/LocalDocumentIndex.d.ts +152 -0
- package/lib/LocalDocumentIndex.d.ts.map +1 -1
- package/lib/LocalDocumentIndex.js +420 -0
- package/lib/LocalDocumentIndex.js.map +1 -0
- package/lib/LocalDocumentIndex.spec.d.ts +2 -0
- package/lib/LocalDocumentIndex.spec.d.ts.map +1 -0
- package/lib/LocalDocumentIndex.spec.js +494 -0
- package/lib/LocalDocumentIndex.spec.js.map +1 -0
- package/lib/LocalDocumentResult.d.ts +66 -0
- package/lib/LocalDocumentResult.d.ts.map +1 -1
- package/lib/LocalDocumentResult.js +376 -0
- package/lib/LocalDocumentResult.js.map +1 -0
- package/lib/LocalDocumentResult.spec.d.ts +2 -0
- package/lib/LocalDocumentResult.spec.d.ts.map +1 -0
- package/lib/LocalDocumentResult.spec.js +373 -0
- package/lib/LocalDocumentResult.spec.js.map +1 -0
- package/lib/LocalEmbeddings.d.ts +59 -0
- package/lib/LocalEmbeddings.d.ts.map +1 -0
- package/lib/LocalEmbeddings.js +101 -0
- package/lib/LocalEmbeddings.js.map +1 -0
- package/lib/LocalEmbeddings.spec.d.ts +2 -0
- package/lib/LocalEmbeddings.spec.d.ts.map +1 -0
- package/lib/LocalEmbeddings.spec.js +155 -0
- package/lib/LocalEmbeddings.spec.js.map +1 -0
- package/lib/LocalIndex.d.ts +159 -0
- package/lib/LocalIndex.d.ts.map +1 -1
- package/lib/LocalIndex.js +519 -0
- package/lib/LocalIndex.js.map +1 -0
- package/lib/LocalIndex.spec.d.ts +2 -0
- package/lib/LocalIndex.spec.js +611 -9
- package/lib/LocalIndex.spec.js.map +1 -1
- package/lib/OpenAIEmbeddings.d.ts +124 -0
- package/lib/OpenAIEmbeddings.d.ts.map +1 -0
- package/lib/OpenAIEmbeddings.js +166 -0
- package/lib/OpenAIEmbeddings.js.map +1 -0
- package/lib/OpenAIEmbeddings.spec.d.ts +2 -0
- package/lib/OpenAIEmbeddings.spec.d.ts.map +1 -0
- package/lib/OpenAIEmbeddings.spec.js +298 -0
- package/lib/OpenAIEmbeddings.spec.js.map +1 -0
- package/lib/TextSplitter.d.ts +21 -0
- package/lib/TextSplitter.d.ts.map +1 -1
- package/lib/TextSplitter.js +500 -0
- package/lib/TextSplitter.js.map +1 -0
- package/lib/TextSplitter.spec.d.ts +2 -0
- package/lib/TextSplitter.spec.d.ts.map +1 -0
- package/lib/TextSplitter.spec.js +337 -0
- package/lib/TextSplitter.spec.js.map +1 -0
- package/lib/TransformersEmbeddings.d.ts +121 -0
- package/lib/TransformersEmbeddings.d.ts.map +1 -0
- package/lib/TransformersEmbeddings.js +176 -0
- package/lib/TransformersEmbeddings.js.map +1 -0
- package/lib/TransformersEmbeddings.spec.d.ts +2 -0
- package/lib/TransformersEmbeddings.spec.d.ts.map +1 -0
- package/lib/TransformersEmbeddings.spec.js +198 -0
- package/lib/TransformersEmbeddings.spec.js.map +1 -0
- package/lib/TransformersTokenizer.d.ts +33 -0
- package/lib/TransformersTokenizer.d.ts.map +1 -0
- package/lib/TransformersTokenizer.js +44 -0
- package/lib/TransformersTokenizer.js.map +1 -0
- package/lib/TransformersTokenizer.spec.d.ts +2 -0
- package/lib/TransformersTokenizer.spec.d.ts.map +1 -0
- package/lib/TransformersTokenizer.spec.js +112 -0
- package/lib/TransformersTokenizer.spec.js.map +1 -0
- package/lib/WebFetcher.d.ts +14 -0
- package/lib/WebFetcher.d.ts.map +1 -0
- package/lib/WebFetcher.js +238 -0
- package/lib/WebFetcher.js.map +1 -0
- package/lib/WebFetcher.spec.d.ts +2 -0
- package/lib/WebFetcher.spec.d.ts.map +1 -0
- package/lib/WebFetcher.spec.js +263 -0
- package/lib/WebFetcher.spec.js.map +1 -0
- package/lib/browser.d.ts +30 -0
- package/lib/browser.d.ts.map +1 -0
- package/lib/browser.js +52 -0
- package/lib/browser.js.map +1 -0
- package/lib/codecs/IndexCodec.d.ts +37 -0
- package/lib/codecs/IndexCodec.d.ts.map +1 -0
- package/lib/codecs/IndexCodec.js +3 -0
- package/lib/codecs/IndexCodec.js.map +1 -0
- package/lib/codecs/JsonCodec.d.ts +19 -0
- package/lib/codecs/JsonCodec.d.ts.map +1 -0
- package/lib/codecs/JsonCodec.js +35 -0
- package/lib/codecs/JsonCodec.js.map +1 -0
- package/lib/codecs/JsonCodec.spec.d.ts +2 -0
- package/lib/codecs/JsonCodec.spec.d.ts.map +1 -0
- package/lib/codecs/JsonCodec.spec.js +66 -0
- package/lib/codecs/JsonCodec.spec.js.map +1 -0
- package/lib/codecs/LocalIndex.protobuf.spec.d.ts +2 -0
- package/lib/codecs/LocalIndex.protobuf.spec.d.ts.map +1 -0
- package/lib/codecs/LocalIndex.protobuf.spec.js +108 -0
- package/lib/codecs/LocalIndex.protobuf.spec.js.map +1 -0
- package/lib/codecs/ProtobufCodec.d.ts +20 -0
- package/lib/codecs/ProtobufCodec.d.ts.map +1 -0
- package/lib/codecs/ProtobufCodec.js +225 -0
- package/lib/codecs/ProtobufCodec.js.map +1 -0
- package/lib/codecs/ProtobufCodec.spec.d.ts +2 -0
- package/lib/codecs/ProtobufCodec.spec.d.ts.map +1 -0
- package/lib/codecs/ProtobufCodec.spec.js +155 -0
- package/lib/codecs/ProtobufCodec.spec.js.map +1 -0
- package/lib/codecs/index.d.ts +5 -0
- package/lib/codecs/index.d.ts.map +1 -0
- package/lib/codecs/index.js +21 -0
- package/lib/codecs/index.js.map +1 -0
- package/lib/codecs/migrateIndex.d.ts +24 -0
- package/lib/codecs/migrateIndex.d.ts.map +1 -0
- package/lib/codecs/migrateIndex.js +119 -0
- package/lib/codecs/migrateIndex.js.map +1 -0
- package/lib/codecs/migrateIndex.spec.d.ts +2 -0
- package/lib/codecs/migrateIndex.spec.d.ts.map +1 -0
- package/lib/codecs/migrateIndex.spec.js +151 -0
- package/lib/codecs/migrateIndex.spec.js.map +1 -0
- package/lib/codecs/schemas/index.proto +34 -0
- package/lib/index.d.ts +20 -0
- package/lib/index.d.ts.map +1 -1
- package/lib/index.js +36 -0
- package/lib/index.js.map +1 -0
- package/lib/internals/Colorize.d.ts +14 -0
- package/lib/internals/Colorize.d.ts.map +1 -0
- package/lib/internals/Colorize.js +69 -0
- package/lib/internals/Colorize.js.map +1 -0
- package/lib/internals/index.d.ts +3 -0
- package/lib/internals/index.d.ts.map +1 -0
- package/lib/internals/index.js +19 -0
- package/lib/internals/index.js.map +1 -0
- package/lib/internals/types.d.ts +43 -0
- package/lib/internals/types.d.ts.map +1 -0
- package/lib/internals/types.js +3 -0
- package/lib/internals/types.js.map +1 -0
- package/lib/server/IndexManager.d.ts +78 -0
- package/lib/server/IndexManager.d.ts.map +1 -0
- package/lib/server/IndexManager.js +259 -0
- package/lib/server/IndexManager.js.map +1 -0
- package/lib/server/VectraServer.d.ts +40 -0
- package/lib/server/VectraServer.d.ts.map +1 -0
- package/lib/server/VectraServer.js +151 -0
- package/lib/server/VectraServer.js.map +1 -0
- package/lib/server/VectraServer.spec.d.ts +2 -0
- package/lib/server/VectraServer.spec.d.ts.map +1 -0
- package/lib/server/VectraServer.spec.js +322 -0
- package/lib/server/VectraServer.spec.js.map +1 -0
- package/lib/server/handlers/documentHandlers.d.ts +15 -0
- package/lib/server/handlers/documentHandlers.d.ts.map +1 -0
- package/lib/server/handlers/documentHandlers.js +95 -0
- package/lib/server/handlers/documentHandlers.js.map +1 -0
- package/lib/server/handlers/helpers.d.ts +23 -0
- package/lib/server/handlers/helpers.d.ts.map +1 -0
- package/lib/server/handlers/helpers.js +138 -0
- package/lib/server/handlers/helpers.js.map +1 -0
- package/lib/server/handlers/index.d.ts +8 -0
- package/lib/server/handlers/index.d.ts.map +1 -0
- package/lib/server/handlers/index.js +22 -0
- package/lib/server/handlers/index.js.map +1 -0
- package/lib/server/handlers/indexHandlers.d.ts +14 -0
- package/lib/server/handlers/indexHandlers.d.ts.map +1 -0
- package/lib/server/handlers/indexHandlers.js +85 -0
- package/lib/server/handlers/indexHandlers.js.map +1 -0
- package/lib/server/handlers/itemHandlers.d.ts +34 -0
- package/lib/server/handlers/itemHandlers.d.ts.map +1 -0
- package/lib/server/handlers/itemHandlers.js +166 -0
- package/lib/server/handlers/itemHandlers.js.map +1 -0
- package/lib/server/handlers/lifecycleHandlers.d.ts +11 -0
- package/lib/server/handlers/lifecycleHandlers.d.ts.map +1 -0
- package/lib/server/handlers/lifecycleHandlers.js +31 -0
- package/lib/server/handlers/lifecycleHandlers.js.map +1 -0
- package/lib/server/handlers/queryHandlers.d.ts +27 -0
- package/lib/server/handlers/queryHandlers.d.ts.map +1 -0
- package/lib/server/handlers/queryHandlers.js +135 -0
- package/lib/server/handlers/queryHandlers.js.map +1 -0
- package/lib/server/handlers/statsHandlers.d.ts +17 -0
- package/lib/server/handlers/statsHandlers.d.ts.map +1 -0
- package/lib/server/handlers/statsHandlers.js +81 -0
- package/lib/server/handlers/statsHandlers.js.map +1 -0
- package/lib/server/index.d.ts +4 -0
- package/lib/server/index.d.ts.map +1 -0
- package/lib/server/index.js +23 -0
- package/lib/server/index.js.map +1 -0
- package/lib/storage/FileStorage.d.ts +92 -0
- package/lib/storage/FileStorage.d.ts.map +1 -0
- package/lib/storage/FileStorage.js +3 -0
- package/lib/storage/FileStorage.js.map +1 -0
- package/lib/storage/FileStorageUtilities.d.ts +36 -0
- package/lib/storage/FileStorageUtilities.d.ts.map +1 -0
- package/lib/storage/FileStorageUtilities.js +91 -0
- package/lib/storage/FileStorageUtilities.js.map +1 -0
- package/lib/storage/FileStorageUtilities.spec.d.ts +2 -0
- package/lib/storage/FileStorageUtilities.spec.d.ts.map +1 -0
- package/lib/storage/FileStorageUtilities.spec.js +98 -0
- package/lib/storage/FileStorageUtilities.spec.js.map +1 -0
- package/lib/storage/FileType.d.ts +29 -0
- package/lib/storage/FileType.d.ts.map +1 -0
- package/lib/storage/FileType.js +38 -0
- package/lib/storage/FileType.js.map +1 -0
- package/lib/storage/IndexedDBStorage.d.ts +47 -0
- package/lib/storage/IndexedDBStorage.d.ts.map +1 -0
- package/lib/storage/IndexedDBStorage.js +347 -0
- package/lib/storage/IndexedDBStorage.js.map +1 -0
- package/lib/storage/LocalFileStorage.browser.d.ts +19 -0
- package/lib/storage/LocalFileStorage.browser.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.browser.js +43 -0
- package/lib/storage/LocalFileStorage.browser.js.map +1 -0
- package/lib/storage/LocalFileStorage.d.ts +23 -0
- package/lib/storage/LocalFileStorage.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.js +152 -0
- package/lib/storage/LocalFileStorage.js.map +1 -0
- package/lib/storage/LocalFileStorage.spec.d.ts +2 -0
- package/lib/storage/LocalFileStorage.spec.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.spec.js +249 -0
- package/lib/storage/LocalFileStorage.spec.js.map +1 -0
- package/lib/storage/VirtualFileStorage.d.ts +18 -0
- package/lib/storage/VirtualFileStorage.d.ts.map +1 -0
- package/lib/storage/VirtualFileStorage.js +178 -0
- package/lib/storage/VirtualFileStorage.js.map +1 -0
- package/lib/storage/VirtualFileStorage.spec.d.ts +2 -0
- package/lib/storage/VirtualFileStorage.spec.d.ts.map +1 -0
- package/lib/storage/VirtualFileStorage.spec.js +302 -0
- package/lib/storage/VirtualFileStorage.spec.js.map +1 -0
- package/lib/storage/index.d.ts +6 -0
- package/lib/storage/index.d.ts.map +1 -0
- package/lib/storage/index.js +22 -0
- package/lib/storage/index.js.map +1 -0
- package/lib/templates/templates/csharp/README.md +48 -0
- package/lib/templates/templates/csharp/VectraClient.cs +234 -0
- package/lib/templates/templates/go/README.md +71 -0
- package/lib/templates/templates/go/vectra_client.go +322 -0
- package/lib/templates/templates/java/README.md +81 -0
- package/lib/templates/templates/java/VectraClient.java +232 -0
- package/lib/templates/templates/python/README.md +37 -0
- package/lib/templates/templates/python/vectra_client.py +279 -0
- package/lib/templates/templates/rust/Cargo.toml +14 -0
- package/lib/templates/templates/rust/README.md +39 -0
- package/lib/templates/templates/rust/build.rs +4 -0
- package/lib/templates/templates/rust/lib.rs +284 -0
- package/lib/templates/templates/typescript/README.md +96 -0
- package/lib/templates/templates/typescript/VectraClient.ts +374 -0
- package/lib/templates/typescript/VectraClient.d.ts +114 -0
- package/lib/templates/typescript/VectraClient.d.ts.map +1 -0
- package/lib/templates/typescript/VectraClient.js +328 -0
- package/lib/templates/typescript/VectraClient.js.map +1 -0
- package/lib/types.d.ts +153 -0
- package/lib/types.d.ts.map +1 -0
- package/lib/types.js +3 -0
- package/lib/types.js.map +1 -0
- package/lib/utils/index.d.ts +2 -0
- package/lib/utils/index.d.ts.map +1 -0
- package/lib/utils/index.js +18 -0
- package/lib/utils/index.js.map +1 -0
- package/lib/utils/pathUtils.d.ts +40 -0
- package/lib/utils/pathUtils.d.ts.map +1 -0
- package/lib/utils/pathUtils.js +98 -0
- package/lib/utils/pathUtils.js.map +1 -0
- package/lib/vectra-cli.d.ts +2 -0
- package/lib/vectra-cli.d.ts.map +1 -1
- package/lib/vectra-cli.generate.spec.d.ts +2 -0
- package/lib/vectra-cli.generate.spec.d.ts.map +1 -0
- package/lib/vectra-cli.generate.spec.js +112 -0
- package/lib/vectra-cli.generate.spec.js.map +1 -0
- package/lib/vectra-cli.js +760 -0
- package/lib/vectra-cli.js.map +1 -0
- package/lib/vectra-cli.spec.d.ts +1 -0
- package/lib/vectra-cli.spec.d.ts.map +1 -0
- package/lib/vectra-cli.spec.js +2 -0
- package/lib/vectra-cli.spec.js.map +1 -0
- package/package.json +91 -16
- package/proto/vectra_service.proto +276 -0
- package/src/BrowserWebFetcher.ts +345 -0
- package/src/FileFetcher.spec.ts +234 -0
- package/src/FileFetcher.ts +37 -25
- package/src/FolderWatcher.spec.ts +288 -0
- package/src/FolderWatcher.ts +304 -0
- package/src/GPT3Tokenizer.spec.ts +50 -0
- package/src/ItemSelector.spec.ts +252 -0
- package/src/ItemSelector.ts +163 -150
- package/src/LocalDocument.spec.ts +211 -0
- package/src/LocalDocument.ts +88 -94
- package/src/LocalDocumentIndex.spec.ts +481 -0
- package/src/LocalDocumentIndex.ts +39 -40
- package/src/LocalDocumentResult.spec.ts +373 -0
- package/src/LocalDocumentResult.ts +489 -319
- package/src/LocalEmbeddings.spec.ts +138 -0
- package/src/LocalEmbeddings.ts +120 -0
- package/src/LocalIndex.spec.ts +808 -66
- package/src/LocalIndex.ts +479 -429
- package/src/OpenAIEmbeddings.spec.ts +354 -0
- package/src/OpenAIEmbeddings.ts +26 -27
- package/src/TextSplitter.spec.ts +342 -0
- package/src/TextSplitter.ts +517 -532
- package/src/TransformersEmbeddings.spec.ts +188 -0
- package/src/TransformersEmbeddings.ts +232 -0
- package/src/TransformersTokenizer.spec.ts +143 -0
- package/src/TransformersTokenizer.ts +45 -0
- package/src/WebFetcher.spec.ts +288 -0
- package/src/WebFetcher.ts +184 -186
- package/src/browser.ts +69 -0
- package/src/codecs/IndexCodec.ts +40 -0
- package/src/codecs/JsonCodec.spec.ts +70 -0
- package/src/codecs/JsonCodec.ts +37 -0
- package/src/codecs/LocalIndex.protobuf.spec.ts +115 -0
- package/src/codecs/ProtobufCodec.spec.ts +166 -0
- package/src/codecs/ProtobufCodec.ts +193 -0
- package/src/codecs/index.ts +4 -0
- package/src/codecs/migrateIndex.spec.ts +176 -0
- package/src/codecs/migrateIndex.ts +125 -0
- package/src/codecs/schemas/index.proto +34 -0
- package/src/index.ts +9 -1
- package/src/internals/Colorize.ts +19 -16
- package/src/server/IndexManager.ts +243 -0
- package/src/server/VectraServer.spec.ts +303 -0
- package/src/server/VectraServer.ts +156 -0
- package/src/server/handlers/documentHandlers.ts +59 -0
- package/src/server/handlers/helpers.ts +93 -0
- package/src/server/handlers/index.ts +7 -0
- package/src/server/handlers/indexHandlers.ts +44 -0
- package/src/server/handlers/itemHandlers.ts +140 -0
- package/src/server/handlers/lifecycleHandlers.ts +26 -0
- package/src/server/handlers/queryHandlers.ts +96 -0
- package/src/server/handlers/statsHandlers.ts +38 -0
- package/src/server/index.ts +3 -0
- package/src/storage/FileStorage.ts +105 -0
- package/src/storage/FileStorageUtilities.spec.ts +106 -0
- package/src/storage/FileStorageUtilities.ts +77 -0
- package/src/storage/FileType.ts +61 -0
- package/src/storage/IndexedDBStorage.ts +365 -0
- package/src/storage/LocalFileStorage.browser.ts +52 -0
- package/src/storage/LocalFileStorage.spec.ts +292 -0
- package/src/storage/LocalFileStorage.ts +98 -0
- package/src/storage/VirtualFileStorage.spec.ts +307 -0
- package/src/storage/VirtualFileStorage.ts +169 -0
- package/src/storage/index.ts +5 -0
- package/src/templates/csharp/README.md +48 -0
- package/src/templates/csharp/VectraClient.cs +234 -0
- package/src/templates/go/README.md +71 -0
- package/src/templates/go/vectra_client.go +322 -0
- package/src/templates/java/README.md +81 -0
- package/src/templates/java/VectraClient.java +232 -0
- package/src/templates/python/README.md +37 -0
- package/src/templates/python/vectra_client.py +279 -0
- package/src/templates/rust/Cargo.toml +14 -0
- package/src/templates/rust/README.md +39 -0
- package/src/templates/rust/build.rs +4 -0
- package/src/templates/rust/lib.rs +284 -0
- package/src/templates/typescript/README.md +96 -0
- package/src/templates/typescript/VectraClient.ts +374 -0
- package/src/types.ts +131 -123
- package/src/utils/index.ts +1 -0
- package/src/utils/pathUtils.ts +106 -0
- package/src/vectra-cli.generate.spec.ts +72 -0
- package/src/vectra-cli.spec.ts +0 -0
- package/src/vectra-cli.ts +687 -246
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
36
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
37
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
38
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
39
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
40
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
41
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
42
|
+
});
|
|
43
|
+
};
|
|
44
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
45
|
+
exports.TransformersEmbeddings = void 0;
|
|
46
|
+
const TransformersTokenizer_1 = require("./TransformersTokenizer");
|
|
47
|
+
const DEFAULT_MODEL = 'Xenova/all-MiniLM-L6-v2';
|
|
48
|
+
/**
|
|
49
|
+
* An embeddings model using Transformers.js for local, offline inference.
|
|
50
|
+
* @remarks
|
|
51
|
+
* Requires @huggingface/transformers as a peer dependency.
|
|
52
|
+
* Use the static `create()` method to instantiate.
|
|
53
|
+
*
|
|
54
|
+
* @example
|
|
55
|
+
* ```typescript
|
|
56
|
+
* const embeddings = await TransformersEmbeddings.create({
|
|
57
|
+
* model: 'Xenova/all-MiniLM-L6-v2'
|
|
58
|
+
* });
|
|
59
|
+
*
|
|
60
|
+
* const index = new LocalDocumentIndex({
|
|
61
|
+
* folderPath: 'my-index',
|
|
62
|
+
* embeddings: embeddings,
|
|
63
|
+
* tokenizer: embeddings.getTokenizer()
|
|
64
|
+
* });
|
|
65
|
+
* ```
|
|
66
|
+
*/
|
|
67
|
+
class TransformersEmbeddings {
|
|
68
|
+
/**
|
|
69
|
+
* Private constructor - use TransformersEmbeddings.create() instead.
|
|
70
|
+
*/
|
|
71
|
+
constructor(extractor, tokenizer, options) {
|
|
72
|
+
this._extractor = extractor;
|
|
73
|
+
this._tokenizer = tokenizer;
|
|
74
|
+
this._options = options;
|
|
75
|
+
this.maxTokens = options.maxTokens;
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Creates a new TransformersEmbeddings instance.
|
|
79
|
+
* @param options Configuration options.
|
|
80
|
+
* @returns Promise resolving to initialized TransformersEmbeddings instance.
|
|
81
|
+
* @throws Error if @huggingface/transformers is not installed.
|
|
82
|
+
*/
|
|
83
|
+
static create(options) {
|
|
84
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
85
|
+
var _a, _b, _c, _d, _e, _f;
|
|
86
|
+
// Dynamically import to allow optional dependency
|
|
87
|
+
let transformers;
|
|
88
|
+
try {
|
|
89
|
+
transformers = yield Promise.resolve().then(() => __importStar(require('@huggingface/transformers')));
|
|
90
|
+
}
|
|
91
|
+
catch (e) {
|
|
92
|
+
throw new Error('TransformersEmbeddings requires @huggingface/transformers. ' +
|
|
93
|
+
'Install it with: npm install @huggingface/transformers');
|
|
94
|
+
}
|
|
95
|
+
const { pipeline } = transformers;
|
|
96
|
+
// Apply defaults
|
|
97
|
+
const opts = {
|
|
98
|
+
model: (_a = options === null || options === void 0 ? void 0 : options.model) !== null && _a !== void 0 ? _a : DEFAULT_MODEL,
|
|
99
|
+
maxTokens: (_b = options === null || options === void 0 ? void 0 : options.maxTokens) !== null && _b !== void 0 ? _b : 512,
|
|
100
|
+
device: (_c = options === null || options === void 0 ? void 0 : options.device) !== null && _c !== void 0 ? _c : 'auto',
|
|
101
|
+
dtype: (_d = options === null || options === void 0 ? void 0 : options.dtype) !== null && _d !== void 0 ? _d : 'fp32',
|
|
102
|
+
normalize: (_e = options === null || options === void 0 ? void 0 : options.normalize) !== null && _e !== void 0 ? _e : true,
|
|
103
|
+
pooling: (_f = options === null || options === void 0 ? void 0 : options.pooling) !== null && _f !== void 0 ? _f : 'mean',
|
|
104
|
+
progressCallback: options === null || options === void 0 ? void 0 : options.progressCallback
|
|
105
|
+
};
|
|
106
|
+
// Build pipeline options
|
|
107
|
+
const pipelineOptions = {
|
|
108
|
+
device: opts.device,
|
|
109
|
+
dtype: opts.dtype
|
|
110
|
+
};
|
|
111
|
+
if (opts.progressCallback) {
|
|
112
|
+
pipelineOptions.progress_callback = opts.progressCallback;
|
|
113
|
+
}
|
|
114
|
+
// Load the feature extraction pipeline
|
|
115
|
+
const extractor = yield pipeline('feature-extraction', opts.model, pipelineOptions);
|
|
116
|
+
// Load the tokenizer separately for use with TextSplitter
|
|
117
|
+
const tokenizer = extractor.tokenizer;
|
|
118
|
+
return new TransformersEmbeddings(extractor, tokenizer, opts);
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Returns a tokenizer that uses the same tokenization as this embedding model.
|
|
123
|
+
* @remarks
|
|
124
|
+
* Use this tokenizer with LocalDocumentIndex to ensure text chunking
|
|
125
|
+
* aligns with the embedding model's token boundaries.
|
|
126
|
+
* @returns TransformersTokenizer instance.
|
|
127
|
+
*/
|
|
128
|
+
getTokenizer() {
|
|
129
|
+
return new TransformersTokenizer_1.TransformersTokenizer(this._tokenizer);
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Creates embeddings for the given inputs.
|
|
133
|
+
* @param inputs Text inputs to create embeddings for.
|
|
134
|
+
* @returns EmbeddingsResponse with status and generated embeddings.
|
|
135
|
+
*/
|
|
136
|
+
createEmbeddings(inputs) {
|
|
137
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
138
|
+
try {
|
|
139
|
+
const inputArray = Array.isArray(inputs) ? inputs : [inputs];
|
|
140
|
+
// Process all inputs in a single batch
|
|
141
|
+
const output = yield this._extractor(inputArray, {
|
|
142
|
+
pooling: this._options.pooling,
|
|
143
|
+
normalize: this._options.normalize
|
|
144
|
+
});
|
|
145
|
+
const [batchSize, embeddingDim] = output.dims;
|
|
146
|
+
const data = output.data;
|
|
147
|
+
// Slice the flat array into individual embeddings
|
|
148
|
+
const embeddings = [];
|
|
149
|
+
for (let i = 0; i < batchSize; i++) {
|
|
150
|
+
const start = i * embeddingDim;
|
|
151
|
+
const end = start + embeddingDim;
|
|
152
|
+
embeddings.push(Array.from(data.slice(start, end)));
|
|
153
|
+
}
|
|
154
|
+
return {
|
|
155
|
+
status: 'success',
|
|
156
|
+
output: embeddings,
|
|
157
|
+
model: this._options.model
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
catch (error) {
|
|
161
|
+
return {
|
|
162
|
+
status: 'error',
|
|
163
|
+
message: `Error generating embeddings: ${error.message}`
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
/**
|
|
169
|
+
* Returns the model name being used.
|
|
170
|
+
*/
|
|
171
|
+
get model() {
|
|
172
|
+
return this._options.model;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
exports.TransformersEmbeddings = TransformersEmbeddings;
|
|
176
|
+
//# sourceMappingURL=TransformersEmbeddings.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"TransformersEmbeddings.js","sourceRoot":"","sources":["../src/TransformersEmbeddings.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AACA,mEAAgE;AAIhE,MAAM,aAAa,GAAG,yBAAyB,CAAC;AA2EhD;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAa,sBAAsB;IAO/B;;OAEG;IACH,YACI,SAAoC,EACpC,SAA8B,EAC9B,OAAoI;QAEpI,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC;QACxB,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC;IACvC,CAAC;IAED;;;;;OAKG;IACI,MAAM,CAAO,MAAM,CAAC,OAAuC;;;YAC9D,kDAAkD;YAClD,IAAI,YAAiC,CAAC;YAEtC,IAAI,CAAC;gBACD,YAAY,GAAG,wDAAa,2BAA2B,GAAC,CAAC;YAC7D,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACT,MAAM,IAAI,KAAK,CACX,6DAA6D;oBAC7D,wDAAwD,CAC3D,CAAC;YACN,CAAC;YAED,MAAM,EAAE,QAAQ,EAAE,GAAG,YAAY,CAAC;YAElC,iBAAiB;YACjB,MAAM,IAAI,GAAG;gBACT,KAAK,EAAE,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,KAAK,mCAAI,aAAa;gBACtC,SAAS,EAAE,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,SAAS,mCAAI,GAAG;gBACpC,MAAM,EAAE,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,MAAM,mCAAI,MAAM;gBACjC,KAAK,EAAE,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,KAAK,mCAAI,MAAM;gBAC/B,SAAS,EAAE,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,SAAS,mCAAI,IAAI;gBACrC,OAAO,EAAE,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,OAAO,mCAAI,MAAM;gBACnC,gBAAgB,EAAE,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,gBAAgB;aAC9C,CAAC;YAEF,yBAAyB;YACzB,MAAM,eAAe,GAAQ;gBACzB,MAAM,EAAE,IAAI,CAAC,MAAM;gBACnB,KAAK,EAAE,IAAI,CAAC,KAAK;aACpB,CAAC;YAEF,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;gBACxB,eAAe,CAAC,iBAAiB,GAAG,IAAI,CAAC,gBAAgB,CAAC;YAC9D,CAAC;YAED,uCAAuC;YACvC,MAAM,SAAS,GAAG,MAAM,QAAQ,CAC5B,oBAAoB,EACpB,IAAI,CAAC,KAAK,EACV,eAAe,CAClB,CAAC;YAEF,0DAA0D;YAC1D,MAAM,SAAS,GAAG,SAAS,CAAC,SAAS,CAAC;YAEtC,OAAO,IAAI,sBAAsB,CAAC,SAAS,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC;QAClE,CAAC;KAAA;IAED;;;;;;OAMG;IACI,YAAY;QACf,OAAO,IAAI,6CAAqB,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACtD,CAAC;IAED;;;;OAIG;IACU,gBAAgB,CAAC,MAAyB;;YACnD,IAAI,CAAC;gBACD,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;gBAE7D,uCAAuC;gBACvC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,UAAU,EAAE;oBAC7C,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,OAAO;oBAC9B,SAAS,EAAE,IAAI,CAAC,QAAQ,CAAC,SAAS;iBACrC,CAAC,CAAC;gBAEH,MAAM,CAAC,SAAS,EAAE,YAAY,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC;gBAC9C,MAAM,IAAI,GAAG,MAAM,CAAC,IAAoB,CAAC;gBAEzC,kDAAkD;gBAClD,MAAM,UAAU,GAAe,EAAE,CAAC;gBAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;oBACjC,MAAM,KAAK,GAAG,CAAC,GAAG,YAAY,CAAC;oBAC/B,MAAM,GAAG,GAAG,KAAK,GAAG,YAAY,CAAC;oBACjC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC;gBACxD,CAAC;gBAED,OAAO;oBACH,MAAM,EAAE,SAAS;oBACjB,MAAM,EAAE,UAAU;oBAClB,KAAK,EAAE,IAAI,CAAC,QAAQ,CAAC,KAAK;iBAC7B,CAAC;YACN,CAAC;YAAC,OAAO,KAAc,EAAE,CAAC;gBACtB,OAAO;oBACH,MAAM,EAAE,OAAO;oBACf,OAAO,EAAE,gCAAiC,KAAe,CAAC,OAAO,EAAE;iBACtE,CAAC;YACN,CAAC;QACL,CAAC;KAAA;IAED;;OAEG;IACH,IAAW,KAAK;QACZ,OAAO,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC;IAC/B,CAAC;CACJ;AApID,wDAoIC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"TransformersEmbeddings.spec.d.ts","sourceRoot":"","sources":["../src/TransformersEmbeddings.spec.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
36
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
37
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
38
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
39
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
40
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
41
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
42
|
+
});
|
|
43
|
+
};
|
|
44
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
45
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
46
|
+
};
|
|
47
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
48
|
+
const node_assert_1 = require("node:assert");
|
|
49
|
+
const mocha_1 = require("mocha");
|
|
50
|
+
const sinon_1 = __importDefault(require("sinon"));
|
|
51
|
+
const transformersModule = __importStar(require("@huggingface/transformers"));
|
|
52
|
+
(0, mocha_1.describe)('TransformersEmbeddings', () => {
|
|
53
|
+
let TransformersEmbeddings;
|
|
54
|
+
let mockExtractor;
|
|
55
|
+
let mockTokenizer;
|
|
56
|
+
let sandbox;
|
|
57
|
+
let pipelineStub;
|
|
58
|
+
(0, mocha_1.beforeEach)(() => __awaiter(void 0, void 0, void 0, function* () {
|
|
59
|
+
sandbox = sinon_1.default.createSandbox();
|
|
60
|
+
// Create mock tokenizer
|
|
61
|
+
mockTokenizer = {
|
|
62
|
+
__call__: sandbox.stub().returns({
|
|
63
|
+
input_ids: { data: BigInt64Array.from([BigInt(1), BigInt(2), BigInt(3)]) }
|
|
64
|
+
}),
|
|
65
|
+
decode: sandbox.stub().returns('decoded text')
|
|
66
|
+
};
|
|
67
|
+
// Make it callable
|
|
68
|
+
const callableTokenizer = Object.assign((...args) => mockTokenizer.__call__(...args), mockTokenizer);
|
|
69
|
+
// Create mock extractor (feature extraction pipeline)
|
|
70
|
+
mockExtractor = sandbox.stub().callsFake((inputs) => __awaiter(void 0, void 0, void 0, function* () {
|
|
71
|
+
const inputArray = Array.isArray(inputs) ? inputs : [inputs];
|
|
72
|
+
const batchSize = inputArray.length;
|
|
73
|
+
const embeddingDim = 4;
|
|
74
|
+
const data = new Float32Array(batchSize * embeddingDim);
|
|
75
|
+
for (let i = 0; i < batchSize; i++) {
|
|
76
|
+
data[i * embeddingDim] = 0.1;
|
|
77
|
+
data[i * embeddingDim + 1] = 0.2;
|
|
78
|
+
data[i * embeddingDim + 2] = 0.3;
|
|
79
|
+
data[i * embeddingDim + 3] = 0.4;
|
|
80
|
+
}
|
|
81
|
+
return {
|
|
82
|
+
data: data,
|
|
83
|
+
dims: [batchSize, embeddingDim]
|
|
84
|
+
};
|
|
85
|
+
}));
|
|
86
|
+
// Attach tokenizer to the mock extractor so pipeline result has .tokenizer
|
|
87
|
+
mockExtractor.tokenizer = callableTokenizer;
|
|
88
|
+
// Stub the pipeline function from @huggingface/transformers
|
|
89
|
+
pipelineStub = sandbox.stub(transformersModule, 'pipeline').resolves(mockExtractor);
|
|
90
|
+
// Import TransformersEmbeddings fresh (uses the stubbed pipeline via dynamic import)
|
|
91
|
+
const mod = yield Promise.resolve().then(() => __importStar(require('./TransformersEmbeddings')));
|
|
92
|
+
TransformersEmbeddings = mod.TransformersEmbeddings;
|
|
93
|
+
}));
|
|
94
|
+
(0, mocha_1.afterEach)(() => {
|
|
95
|
+
sandbox.restore();
|
|
96
|
+
});
|
|
97
|
+
(0, mocha_1.describe)('create()', () => {
|
|
98
|
+
(0, mocha_1.it)('creates instance with default options', () => __awaiter(void 0, void 0, void 0, function* () {
|
|
99
|
+
const embeddings = yield TransformersEmbeddings.create();
|
|
100
|
+
node_assert_1.strict.equal(embeddings.maxTokens, 512, 'default maxTokens should be 512');
|
|
101
|
+
node_assert_1.strict.equal(embeddings.model, 'Xenova/all-MiniLM-L6-v2', 'default model should be all-MiniLM-L6-v2');
|
|
102
|
+
// Verify pipeline was called with correct arguments
|
|
103
|
+
node_assert_1.strict.ok(pipelineStub.calledOnce, 'pipeline should be called once');
|
|
104
|
+
node_assert_1.strict.equal(pipelineStub.firstCall.args[0], 'feature-extraction');
|
|
105
|
+
node_assert_1.strict.equal(pipelineStub.firstCall.args[1], 'Xenova/all-MiniLM-L6-v2');
|
|
106
|
+
}));
|
|
107
|
+
(0, mocha_1.it)('creates instance with custom options', () => __awaiter(void 0, void 0, void 0, function* () {
|
|
108
|
+
const embeddings = yield TransformersEmbeddings.create({
|
|
109
|
+
model: 'Xenova/bge-small-en-v1.5',
|
|
110
|
+
maxTokens: 256,
|
|
111
|
+
device: 'cpu',
|
|
112
|
+
normalize: false,
|
|
113
|
+
pooling: 'cls'
|
|
114
|
+
});
|
|
115
|
+
node_assert_1.strict.equal(embeddings.maxTokens, 256);
|
|
116
|
+
node_assert_1.strict.equal(embeddings.model, 'Xenova/bge-small-en-v1.5');
|
|
117
|
+
}));
|
|
118
|
+
(0, mocha_1.it)('implements EmbeddingsModel interface', () => __awaiter(void 0, void 0, void 0, function* () {
|
|
119
|
+
const embeddings = yield TransformersEmbeddings.create();
|
|
120
|
+
node_assert_1.strict.equal(typeof embeddings.maxTokens, 'number');
|
|
121
|
+
node_assert_1.strict.equal(typeof embeddings.createEmbeddings, 'function');
|
|
122
|
+
}));
|
|
123
|
+
});
|
|
124
|
+
(0, mocha_1.describe)('createEmbeddings()', () => {
|
|
125
|
+
(0, mocha_1.it)('generates embeddings for single string', () => __awaiter(void 0, void 0, void 0, function* () {
|
|
126
|
+
const embeddings = yield TransformersEmbeddings.create();
|
|
127
|
+
const result = yield embeddings.createEmbeddings('hello world');
|
|
128
|
+
node_assert_1.strict.equal(result.status, 'success');
|
|
129
|
+
node_assert_1.strict.ok(result.output, 'output should be defined');
|
|
130
|
+
node_assert_1.strict.equal(result.output.length, 1, 'should have one embedding');
|
|
131
|
+
node_assert_1.strict.equal(result.output[0].length, 4, 'embedding should have 4 dimensions');
|
|
132
|
+
const expected = [0.1, 0.2, 0.3, 0.4];
|
|
133
|
+
result.output[0].forEach((val, i) => {
|
|
134
|
+
node_assert_1.strict.ok(Math.abs(val - expected[i]) < 0.001, `value ${val} should be close to ${expected[i]}`);
|
|
135
|
+
});
|
|
136
|
+
node_assert_1.strict.equal(result.model, 'Xenova/all-MiniLM-L6-v2');
|
|
137
|
+
}));
|
|
138
|
+
(0, mocha_1.it)('generates embeddings for string array', () => __awaiter(void 0, void 0, void 0, function* () {
|
|
139
|
+
const embeddings = yield TransformersEmbeddings.create();
|
|
140
|
+
const result = yield embeddings.createEmbeddings(['hello', 'world']);
|
|
141
|
+
node_assert_1.strict.equal(result.status, 'success');
|
|
142
|
+
node_assert_1.strict.ok(result.output, 'output should be defined');
|
|
143
|
+
node_assert_1.strict.equal(result.output.length, 2, 'should have two embeddings');
|
|
144
|
+
node_assert_1.strict.equal(mockExtractor.callCount, 1);
|
|
145
|
+
node_assert_1.strict.deepEqual(mockExtractor.firstCall.args[0], ['hello', 'world']);
|
|
146
|
+
}));
|
|
147
|
+
(0, mocha_1.it)('passes pooling and normalize options to extractor', () => __awaiter(void 0, void 0, void 0, function* () {
|
|
148
|
+
const embeddings = yield TransformersEmbeddings.create({
|
|
149
|
+
pooling: 'cls',
|
|
150
|
+
normalize: false
|
|
151
|
+
});
|
|
152
|
+
yield embeddings.createEmbeddings('test');
|
|
153
|
+
node_assert_1.strict.ok(mockExtractor.calledOnce);
|
|
154
|
+
const options = mockExtractor.firstCall.args[1];
|
|
155
|
+
node_assert_1.strict.equal(options.pooling, 'cls');
|
|
156
|
+
node_assert_1.strict.equal(options.normalize, false);
|
|
157
|
+
}));
|
|
158
|
+
(0, mocha_1.it)('returns error status on failure', () => __awaiter(void 0, void 0, void 0, function* () {
|
|
159
|
+
var _a;
|
|
160
|
+
mockExtractor.rejects(new Error('Model inference failed'));
|
|
161
|
+
const embeddings = yield TransformersEmbeddings.create();
|
|
162
|
+
const result = yield embeddings.createEmbeddings('test');
|
|
163
|
+
node_assert_1.strict.equal(result.status, 'error');
|
|
164
|
+
node_assert_1.strict.ok((_a = result.message) === null || _a === void 0 ? void 0 : _a.includes('Model inference failed'));
|
|
165
|
+
}));
|
|
166
|
+
(0, mocha_1.it)('handles empty string input', () => __awaiter(void 0, void 0, void 0, function* () {
|
|
167
|
+
const embeddings = yield TransformersEmbeddings.create();
|
|
168
|
+
const result = yield embeddings.createEmbeddings('');
|
|
169
|
+
node_assert_1.strict.equal(result.status, 'success');
|
|
170
|
+
node_assert_1.strict.ok(result.output);
|
|
171
|
+
node_assert_1.strict.equal(result.output.length, 1);
|
|
172
|
+
}));
|
|
173
|
+
(0, mocha_1.it)('handles empty array input', () => __awaiter(void 0, void 0, void 0, function* () {
|
|
174
|
+
const embeddings = yield TransformersEmbeddings.create();
|
|
175
|
+
const result = yield embeddings.createEmbeddings([]);
|
|
176
|
+
node_assert_1.strict.equal(result.status, 'success');
|
|
177
|
+
node_assert_1.strict.ok(result.output);
|
|
178
|
+
node_assert_1.strict.equal(result.output.length, 0);
|
|
179
|
+
}));
|
|
180
|
+
});
|
|
181
|
+
(0, mocha_1.describe)('getTokenizer()', () => {
|
|
182
|
+
(0, mocha_1.it)('returns a TransformersTokenizer instance', () => __awaiter(void 0, void 0, void 0, function* () {
|
|
183
|
+
const embeddings = yield TransformersEmbeddings.create();
|
|
184
|
+
const tokenizer = embeddings.getTokenizer();
|
|
185
|
+
node_assert_1.strict.ok(tokenizer, 'tokenizer should be defined');
|
|
186
|
+
node_assert_1.strict.equal(typeof tokenizer.encode, 'function');
|
|
187
|
+
node_assert_1.strict.equal(typeof tokenizer.decode, 'function');
|
|
188
|
+
}));
|
|
189
|
+
(0, mocha_1.it)('returns consistent tokenizer across calls', () => __awaiter(void 0, void 0, void 0, function* () {
|
|
190
|
+
const embeddings = yield TransformersEmbeddings.create();
|
|
191
|
+
const tokenizer1 = embeddings.getTokenizer();
|
|
192
|
+
const tokenizer2 = embeddings.getTokenizer();
|
|
193
|
+
node_assert_1.strict.ok(tokenizer1);
|
|
194
|
+
node_assert_1.strict.ok(tokenizer2);
|
|
195
|
+
}));
|
|
196
|
+
});
|
|
197
|
+
});
|
|
198
|
+
//# sourceMappingURL=TransformersEmbeddings.spec.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"TransformersEmbeddings.spec.js","sourceRoot":"","sources":["../src/TransformersEmbeddings.spec.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,6CAA+C;AAC/C,iCAA4D;AAC5D,kDAA0B;AAE1B,8EAAgE;AAEhE,IAAA,gBAAQ,EAAC,wBAAwB,EAAE,GAAG,EAAE;IACpC,IAAI,sBAA2B,CAAC;IAChC,IAAI,aAA8B,CAAC;IACnC,IAAI,aAAkB,CAAC;IACvB,IAAI,OAA2B,CAAC;IAChC,IAAI,YAA6B,CAAC;IAElC,IAAA,kBAAU,EAAC,GAAS,EAAE;QAClB,OAAO,GAAG,eAAK,CAAC,aAAa,EAAE,CAAC;QAEhC,wBAAwB;QACxB,aAAa,GAAG;YACZ,QAAQ,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC;gBAC7B,SAAS,EAAE,EAAE,IAAI,EAAE,aAAa,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE;aAC7E,CAAC;YACF,MAAM,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,cAAc,CAAC;SACjD,CAAC;QACF,mBAAmB;QACnB,MAAM,iBAAiB,GAAG,MAAM,CAAC,MAAM,CACnC,CAAC,GAAG,IAAW,EAAE,EAAE,CAAC,aAAa,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC,EACnD,aAAa,CAChB,CAAC;QAEF,sDAAsD;QACtD,aAAa,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,SAAS,CAAC,CAAO,MAAyB,EAAE,EAAE;YACzE,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;YAC7D,MAAM,SAAS,GAAG,UAAU,CAAC,MAAM,CAAC;YACpC,MAAM,YAAY,GAAG,CAAC,CAAC;YAEvB,MAAM,IAAI,GAAG,IAAI,YAAY,CAAC,SAAS,GAAG,YAAY,CAAC,CAAC;YACxD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;gBACjC,IAAI,CAAC,CAAC,GAAG,YAAY,CAAC,GAAG,GAAG,CAAC;gBAC7B,IAAI,CAAC,CAAC,GAAG,YAAY,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC;gBACjC,IAAI,CAAC,CAAC,GAAG,YAAY,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC;gBACjC,IAAI,CAAC,CAAC,GAAG,YAAY,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC;YACrC,CAAC;YAED,OAAO;gBACH,IAAI,EAAE,IAAI;gBACV,IAAI,EAAE,CAAC,SAAS,EAAE,YAAY,CAAC;aAClC,CAAC;QACN,CAAC,CAAA,CAAC,CAAC;QAEH,2EAA2E;QAC1E,aAAqB,CAAC,SAAS,GAAG,iBAAiB,CAAC;QAErD,4DAA4D;QAC5D,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,kBAAkB,EAAE,UAAiB,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAE3F,qFAAqF;QACrF,MAAM,GAAG,GAAG,wDAAa,0BAA0B,GAAC,CAAC;QACrD,sBAAsB,GAAG,GAAG,CAAC,sBAAsB,CAAC;IACxD,CAAC,CAAA,CAAC,CAAC;IAEH,IAAA,iBAAS,EAAC,GAAG,EAAE;QACX,OAAO,CAAC,OAAO,EAAE,CAAC;IACtB,CAAC,CAAC,CAAC;IAEH,IAAA,gBAAQ,EAAC,UAAU,EAAE,GAAG,EAAE;QACtB,IAAA,UAAE,EAAC,uCAAuC,EAAE,GAAS,EAAE;YACnD,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,MAAM,EAAE,CAAC;YAEzD,oBAAM,CAAC,KAAK,CAAC,UAAU,CAAC,SAAS,EAAE,GAAG,EAAE,iCAAiC,CAAC,CAAC;YAC3E,oBAAM,CAAC,KAAK,CAAC,UAAU,CAAC,KAAK,EAAE,yBAAyB,EAAE,0CAA0C,CAAC,CAAC;YAEtG,oDAAoD;YACpD,oBAAM,CAAC,EAAE,CAAC,YAAY,CAAC,UAAU,EAAE,gCAAgC,CAAC,CAAC;YACrE,oBAAM,CAAC,KAAK,CAAC,YAAY,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,oBAAoB,CAAC,CAAC;YACnE,oBAAM,CAAC,KAAK,CAAC,YAAY,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,yBAAyB,CAAC,CAAC;QAC5E,CAAC,CAAA,CAAC,CAAC;QAEH,IAAA,UAAE,EAAC,sCAAsC,EAAE,GAAS,EAAE;YAClD,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,MAAM,CAAC;gBACnD,KAAK,EAAE,0BAA0B;gBACjC,SAAS,EAAE,GAAG;gBACd,MAAM,EAAE,KAAK;gBACb,SAAS,EAAE,KAAK;gBAChB,OAAO,EAAE,KAAK;aACjB,CAAC,CAAC;YAEH,oBAAM,CAAC,KAAK,CAAC,UAAU,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;YACxC,oBAAM,CAAC,KAAK,CAAC,UAAU,CAAC,KAAK,EAAE,0BAA0B,CAAC,CAAC;QAC/D,CAAC,CAAA,CAAC,CAAC;QAEH,IAAA,UAAE,EAAC,sCAAsC,EAAE,GAAS,EAAE;YAClD,MAAM,UAAU,GAAoB,MAAM,sBAAsB,CAAC,MAAM,EAAE,CAAC;YAE1E,oBAAM,CAAC,KAAK,CAAC,OAAO,UAAU,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;YACpD,oBAAM,CAAC,KAAK,CAAC,OAAO,UAAU,CAAC,gBAAgB,EAAE,UAAU,CAAC,CAAC;QACjE,CAAC,CAAA,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;IAEH,IAAA,gBAAQ,EAAC,oBAAoB,EAAE,GAAG,EAAE;QAChC,IAAA,UAAE,EAAC,wCAAwC,EAAE,GAAS,EAAE;YACpD,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,MAAM,EAAE,CAAC;YACzD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,gBAAgB,CAAC,aAAa,CAAC,CAAC;YAEhE,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;YACvC,oBAAM,CAAC,EAAE,CAAC,MAAM,CAAC,MAAM,EAAE,0BAA0B,CAAC,CAAC;YACrD,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAO,CAAC,MAAM,EAAE,CAAC,EAAE,2BAA2B,CAAC,CAAC;YACpE,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAO,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,oCAAoC,CAAC,CAAC;YAChF,MAAM,QAAQ,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;YACtC,MAAM,CAAC,MAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,GAAW,EAAE,CAAS,EAAE,EAAE;gBACjD,oBAAM,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,GAAG,KAAK,EAAE,SAAS,GAAG,uBAAuB,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YACrG,CAAC,CAAC,CAAC;YACH,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,EAAE,yBAAyB,CAAC,CAAC;QAC1D,CAAC,CAAA,CAAC,CAAC;QAEH,IAAA,UAAE,EAAC,uCAAuC,EAAE,GAAS,EAAE;YACnD,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,MAAM,EAAE,CAAC;YACzD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,gBAAgB,CAAC,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC;YAErE,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;YACvC,oBAAM,CAAC,EAAE,CAAC,MAAM,CAAC,MAAM,EAAE,0BAA0B,CAAC,CAAC;YACrD,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAO,CAAC,MAAM,EAAE,CAAC,EAAE,4BAA4B,CAAC,CAAC;YAErE,oBAAM,CAAC,KAAK,CAAC,aAAa,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC;YACzC,oBAAM,CAAC,SAAS,CAAC,aAAa,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC;QAC1E,CAAC,CAAA,CAAC,CAAC;QAEH,IAAA,UAAE,EAAC,mDAAmD,EAAE,GAAS,EAAE;YAC/D,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,MAAM,CAAC;gBACnD,OAAO,EAAE,KAAK;gBACd,SAAS,EAAE,KAAK;aACnB,CAAC,CAAC;YACH,MAAM,UAAU,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC;YAE1C,oBAAM,CAAC,EAAE,CAAC,aAAa,CAAC,UAAU,CAAC,CAAC;YACpC,MAAM,OAAO,GAAG,aAAa,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAChD,oBAAM,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;YACrC,oBAAM,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QAC3C,CAAC,CAAA,CAAC,CAAC;QAEH,IAAA,UAAE,EAAC,iCAAiC,EAAE,GAAS,EAAE;;YAC7C,aAAa,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC,CAAC;YAE3D,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,MAAM,EAAE,CAAC;YACzD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC;YAEzD,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;YACrC,oBAAM,CAAC,EAAE,CAAC,MAAA,MAAM,CAAC,OAAO,0CAAE,QAAQ,CAAC,wBAAwB,CAAC,CAAC,CAAC;QAClE,CAAC,CAAA,CAAC,CAAC;QAEH,IAAA,UAAE,EAAC,4BAA4B,EAAE,GAAS,EAAE;YACxC,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,MAAM,EAAE,CAAC;YACzD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,gBAAgB,CAAC,EAAE,CAAC,CAAC;YAErD,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;YACvC,oBAAM,CAAC,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACzB,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAC3C,CAAC,CAAA,CAAC,CAAC;QAEH,IAAA,UAAE,EAAC,2BAA2B,EAAE,GAAS,EAAE;YACvC,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,MAAM,EAAE,CAAC;YACzD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,gBAAgB,CAAC,EAAE,CAAC,CAAC;YAErD,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;YACvC,oBAAM,CAAC,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACzB,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAC3C,CAAC,CAAA,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;IAEH,IAAA,gBAAQ,EAAC,gBAAgB,EAAE,GAAG,EAAE;QAC5B,IAAA,UAAE,EAAC,0CAA0C,EAAE,GAAS,EAAE;YACtD,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,MAAM,EAAE,CAAC;YACzD,MAAM,SAAS,GAAG,UAAU,CAAC,YAAY,EAAE,CAAC;YAE5C,oBAAM,CAAC,EAAE,CAAC,SAAS,EAAE,6BAA6B,CAAC,CAAC;YACpD,oBAAM,CAAC,KAAK,CAAC,OAAO,SAAS,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;YAClD,oBAAM,CAAC,KAAK,CAAC,OAAO,SAAS,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;QACtD,CAAC,CAAA,CAAC,CAAC;QAEH,IAAA,UAAE,EAAC,2CAA2C,EAAE,GAAS,EAAE;YACvD,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,MAAM,EAAE,CAAC;YACzD,MAAM,UAAU,GAAG,UAAU,CAAC,YAAY,EAAE,CAAC;YAC7C,MAAM,UAAU,GAAG,UAAU,CAAC,YAAY,EAAE,CAAC;YAE7C,oBAAM,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC;YACtB,oBAAM,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC;QAC1B,CAAC,CAAA,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;AACP,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { PreTrainedTokenizer } from "@huggingface/transformers";
|
|
2
|
+
import { Tokenizer } from "./types";
|
|
3
|
+
/**
|
|
4
|
+
* A tokenizer wrapper for Transformers.js models.
|
|
5
|
+
* @remarks
|
|
6
|
+
* This tokenizer uses the same tokenizer as the embedding model,
|
|
7
|
+
* ensuring consistency between text splitting and embedding generation.
|
|
8
|
+
*
|
|
9
|
+
* Obtain an instance via TransformersEmbeddings.getTokenizer().
|
|
10
|
+
*/
|
|
11
|
+
export declare class TransformersTokenizer implements Tokenizer {
|
|
12
|
+
private readonly _tokenizer;
|
|
13
|
+
/**
|
|
14
|
+
* Creates a new TransformersTokenizer.
|
|
15
|
+
* @param tokenizer The underlying Transformers.js tokenizer.
|
|
16
|
+
* @remarks
|
|
17
|
+
* Typically created via TransformersEmbeddings.getTokenizer().
|
|
18
|
+
*/
|
|
19
|
+
constructor(tokenizer: PreTrainedTokenizer);
|
|
20
|
+
/**
|
|
21
|
+
* Encodes text into token IDs.
|
|
22
|
+
* @param text The text to encode.
|
|
23
|
+
* @returns Array of token IDs.
|
|
24
|
+
*/
|
|
25
|
+
encode(text: string): number[];
|
|
26
|
+
/**
|
|
27
|
+
* Decodes token IDs back into text.
|
|
28
|
+
* @param tokens Array of token IDs.
|
|
29
|
+
* @returns Decoded text string.
|
|
30
|
+
*/
|
|
31
|
+
decode(tokens: number[]): string;
|
|
32
|
+
}
|
|
33
|
+
//# sourceMappingURL=TransformersTokenizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"TransformersTokenizer.d.ts","sourceRoot":"","sources":["../src/TransformersTokenizer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,2BAA2B,CAAC;AAChE,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAEpC;;;;;;;GAOG;AACH,qBAAa,qBAAsB,YAAW,SAAS;IACnD,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAsB;IAEjD;;;;;OAKG;gBACgB,SAAS,EAAE,mBAAmB;IAIjD;;;;OAIG;IACI,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE;IAOrC;;;;OAIG;IACI,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM;CAG1C"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.TransformersTokenizer = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* A tokenizer wrapper for Transformers.js models.
|
|
6
|
+
* @remarks
|
|
7
|
+
* This tokenizer uses the same tokenizer as the embedding model,
|
|
8
|
+
* ensuring consistency between text splitting and embedding generation.
|
|
9
|
+
*
|
|
10
|
+
* Obtain an instance via TransformersEmbeddings.getTokenizer().
|
|
11
|
+
*/
|
|
12
|
+
class TransformersTokenizer {
|
|
13
|
+
/**
|
|
14
|
+
* Creates a new TransformersTokenizer.
|
|
15
|
+
* @param tokenizer The underlying Transformers.js tokenizer.
|
|
16
|
+
* @remarks
|
|
17
|
+
* Typically created via TransformersEmbeddings.getTokenizer().
|
|
18
|
+
*/
|
|
19
|
+
constructor(tokenizer) {
|
|
20
|
+
this._tokenizer = tokenizer;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Encodes text into token IDs.
|
|
24
|
+
* @param text The text to encode.
|
|
25
|
+
* @returns Array of token IDs.
|
|
26
|
+
*/
|
|
27
|
+
encode(text) {
|
|
28
|
+
var _a, _b, _c;
|
|
29
|
+
const encoded = this._tokenizer(text);
|
|
30
|
+
// Transformers.js returns an object with input_ids as BigInt64Array or similar
|
|
31
|
+
const inputIds = (_c = (_b = (_a = encoded.input_ids) === null || _a === void 0 ? void 0 : _a.data) !== null && _b !== void 0 ? _b : encoded.input_ids) !== null && _c !== void 0 ? _c : encoded;
|
|
32
|
+
return Array.from(inputIds).map((id) => Number(id));
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Decodes token IDs back into text.
|
|
36
|
+
* @param tokens Array of token IDs.
|
|
37
|
+
* @returns Decoded text string.
|
|
38
|
+
*/
|
|
39
|
+
decode(tokens) {
|
|
40
|
+
return this._tokenizer.decode(tokens, { skip_special_tokens: true });
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
exports.TransformersTokenizer = TransformersTokenizer;
|
|
44
|
+
//# sourceMappingURL=TransformersTokenizer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"TransformersTokenizer.js","sourceRoot":"","sources":["../src/TransformersTokenizer.ts"],"names":[],"mappings":";;;AAGA;;;;;;;GAOG;AACH,MAAa,qBAAqB;IAG9B;;;;;OAKG;IACH,YAAmB,SAA8B;QAC7C,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;IAChC,CAAC;IAED;;;;OAIG;IACI,MAAM,CAAC,IAAY;;QACtB,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QACtC,+EAA+E;QAC/E,MAAM,QAAQ,GAAG,MAAA,MAAA,MAAA,OAAO,CAAC,SAAS,0CAAE,IAAI,mCAAI,OAAO,CAAC,SAAS,mCAAI,OAAO,CAAC;QACzE,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,EAAO,EAAE,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IAC7D,CAAC;IAED;;;;OAIG;IACI,MAAM,CAAC,MAAgB;QAC1B,OAAO,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,MAAM,EAAE,EAAE,mBAAmB,EAAE,IAAI,EAAE,CAAC,CAAC;IACzE,CAAC;CACJ;AAjCD,sDAiCC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"TransformersTokenizer.spec.d.ts","sourceRoot":"","sources":["../src/TransformersTokenizer.spec.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const node_assert_1 = require("node:assert");
|
|
4
|
+
const mocha_1 = require("mocha");
|
|
5
|
+
const TransformersTokenizer_1 = require("./TransformersTokenizer");
|
|
6
|
+
(0, mocha_1.describe)('TransformersTokenizer', () => {
|
|
7
|
+
// Create a mock tokenizer that mimics Transformers.js behavior
|
|
8
|
+
function createMockTokenizer() {
|
|
9
|
+
const vocab = new Map([
|
|
10
|
+
['hello', 101],
|
|
11
|
+
['world', 102],
|
|
12
|
+
['test', 103],
|
|
13
|
+
['[CLS]', 1],
|
|
14
|
+
['[SEP]', 2]
|
|
15
|
+
]);
|
|
16
|
+
const reverseVocab = new Map();
|
|
17
|
+
vocab.forEach((v, k) => reverseVocab.set(v, k));
|
|
18
|
+
return {
|
|
19
|
+
// Mimics the callable tokenizer behavior
|
|
20
|
+
__call__: (text) => {
|
|
21
|
+
const words = text.toLowerCase().split(/\s+/).filter(w => w);
|
|
22
|
+
const ids = words.map(w => { var _a; return (_a = vocab.get(w)) !== null && _a !== void 0 ? _a : 100; });
|
|
23
|
+
return {
|
|
24
|
+
input_ids: {
|
|
25
|
+
data: BigInt64Array.from(ids.map(id => BigInt(id)))
|
|
26
|
+
}
|
|
27
|
+
};
|
|
28
|
+
},
|
|
29
|
+
decode: (tokens, options) => {
|
|
30
|
+
const words = tokens
|
|
31
|
+
.filter(t => !(options === null || options === void 0 ? void 0 : options.skip_special_tokens) || (t !== 1 && t !== 2))
|
|
32
|
+
.map(t => { var _a; return (_a = reverseVocab.get(t)) !== null && _a !== void 0 ? _a : '[UNK]'; });
|
|
33
|
+
return words.join(' ');
|
|
34
|
+
}
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
(0, mocha_1.it)('encodes text to token array using callable tokenizer', () => {
|
|
38
|
+
const mockTokenizer = createMockTokenizer();
|
|
39
|
+
// Make it callable
|
|
40
|
+
const callableTokenizer = Object.assign((text) => mockTokenizer.__call__(text), { decode: mockTokenizer.decode });
|
|
41
|
+
const tokenizer = new TransformersTokenizer_1.TransformersTokenizer(callableTokenizer);
|
|
42
|
+
const tokens = tokenizer.encode('hello world');
|
|
43
|
+
node_assert_1.strict.ok(Array.isArray(tokens), 'encode should return an array');
|
|
44
|
+
node_assert_1.strict.equal(tokens.length, 2, 'should have 2 tokens');
|
|
45
|
+
node_assert_1.strict.deepEqual(tokens, [101, 102], 'tokens should match expected values');
|
|
46
|
+
});
|
|
47
|
+
(0, mocha_1.it)('handles BigInt64Array conversion correctly', () => {
|
|
48
|
+
const mockTokenizer = {
|
|
49
|
+
__call__: () => ({
|
|
50
|
+
input_ids: {
|
|
51
|
+
data: BigInt64Array.from([BigInt(1), BigInt(2), BigInt(3)])
|
|
52
|
+
}
|
|
53
|
+
}),
|
|
54
|
+
decode: () => 'decoded'
|
|
55
|
+
};
|
|
56
|
+
const callableTokenizer = Object.assign(() => mockTokenizer.__call__(), { decode: mockTokenizer.decode });
|
|
57
|
+
const tokenizer = new TransformersTokenizer_1.TransformersTokenizer(callableTokenizer);
|
|
58
|
+
const tokens = tokenizer.encode('any text');
|
|
59
|
+
node_assert_1.strict.deepEqual(tokens, [1, 2, 3], 'should convert BigInt to number');
|
|
60
|
+
tokens.forEach(t => {
|
|
61
|
+
node_assert_1.strict.equal(typeof t, 'number', 'each token should be a number');
|
|
62
|
+
});
|
|
63
|
+
});
|
|
64
|
+
(0, mocha_1.it)('decodes tokens back to text', () => {
|
|
65
|
+
const mockTokenizer = {
|
|
66
|
+
__call__: () => ({ input_ids: { data: BigInt64Array.from([]) } }),
|
|
67
|
+
decode: (tokens, opts) => {
|
|
68
|
+
if (opts === null || opts === void 0 ? void 0 : opts.skip_special_tokens) {
|
|
69
|
+
return 'hello world';
|
|
70
|
+
}
|
|
71
|
+
return '[CLS] hello world [SEP]';
|
|
72
|
+
}
|
|
73
|
+
};
|
|
74
|
+
const callableTokenizer = Object.assign(() => mockTokenizer.__call__(), { decode: mockTokenizer.decode });
|
|
75
|
+
const tokenizer = new TransformersTokenizer_1.TransformersTokenizer(callableTokenizer);
|
|
76
|
+
const text = tokenizer.decode([1, 101, 102, 2]);
|
|
77
|
+
node_assert_1.strict.equal(text, 'hello world', 'should decode with skip_special_tokens=true');
|
|
78
|
+
});
|
|
79
|
+
(0, mocha_1.it)('handles empty input', () => {
|
|
80
|
+
const mockTokenizer = {
|
|
81
|
+
__call__: () => ({
|
|
82
|
+
input_ids: { data: BigInt64Array.from([]) }
|
|
83
|
+
}),
|
|
84
|
+
decode: () => ''
|
|
85
|
+
};
|
|
86
|
+
const callableTokenizer = Object.assign(() => mockTokenizer.__call__(), { decode: mockTokenizer.decode });
|
|
87
|
+
const tokenizer = new TransformersTokenizer_1.TransformersTokenizer(callableTokenizer);
|
|
88
|
+
const tokens = tokenizer.encode('');
|
|
89
|
+
node_assert_1.strict.deepEqual(tokens, [], 'empty input should return empty array');
|
|
90
|
+
const text = tokenizer.decode([]);
|
|
91
|
+
node_assert_1.strict.equal(text, '', 'empty tokens should return empty string');
|
|
92
|
+
});
|
|
93
|
+
(0, mocha_1.it)('returns consistent results for same input', () => {
|
|
94
|
+
let callCount = 0;
|
|
95
|
+
const mockTokenizer = {
|
|
96
|
+
__call__: () => {
|
|
97
|
+
callCount++;
|
|
98
|
+
return {
|
|
99
|
+
input_ids: { data: BigInt64Array.from([BigInt(101), BigInt(102)]) }
|
|
100
|
+
};
|
|
101
|
+
},
|
|
102
|
+
decode: () => 'hello world'
|
|
103
|
+
};
|
|
104
|
+
const callableTokenizer = Object.assign(() => mockTokenizer.__call__(), { decode: mockTokenizer.decode });
|
|
105
|
+
const tokenizer = new TransformersTokenizer_1.TransformersTokenizer(callableTokenizer);
|
|
106
|
+
const tokens1 = tokenizer.encode('hello world');
|
|
107
|
+
const tokens2 = tokenizer.encode('hello world');
|
|
108
|
+
node_assert_1.strict.deepEqual(tokens1, tokens2, 'encode should be deterministic');
|
|
109
|
+
node_assert_1.strict.equal(callCount, 2, 'should call underlying tokenizer each time');
|
|
110
|
+
});
|
|
111
|
+
});
|
|
112
|
+
//# sourceMappingURL=TransformersTokenizer.spec.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"TransformersTokenizer.spec.js","sourceRoot":"","sources":["../src/TransformersTokenizer.spec.ts"],"names":[],"mappings":";;AAAA,6CAA+C;AAC/C,iCAAqC;AACrC,mEAAgE;AAEhE,IAAA,gBAAQ,EAAC,uBAAuB,EAAE,GAAG,EAAE;IACnC,+DAA+D;IAC/D,SAAS,mBAAmB;QACxB,MAAM,KAAK,GAAwB,IAAI,GAAG,CAAC;YACvC,CAAC,OAAO,EAAE,GAAG,CAAC;YACd,CAAC,OAAO,EAAE,GAAG,CAAC;YACd,CAAC,MAAM,EAAE,GAAG,CAAC;YACb,CAAC,OAAO,EAAE,CAAC,CAAC;YACZ,CAAC,OAAO,EAAE,CAAC,CAAC;SACf,CAAC,CAAC;QACH,MAAM,YAAY,GAAwB,IAAI,GAAG,EAAE,CAAC;QACpD,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAEhD,OAAO;YACH,yCAAyC;YACzC,QAAQ,EAAE,CAAC,IAAY,EAAE,EAAE;gBACvB,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;gBAC7D,MAAM,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,WAAC,OAAA,MAAA,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,mCAAI,GAAG,CAAA,EAAA,CAAC,CAAC;gBAChD,OAAO;oBACH,SAAS,EAAE;wBACP,IAAI,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;qBACtD;iBACJ,CAAC;YACN,CAAC;YACD,MAAM,EAAE,CAAC,MAAgB,EAAE,OAA2C,EAAE,EAAE;gBACtE,MAAM,KAAK,GAAG,MAAM;qBACf,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,mBAAmB,CAAA,IAAI,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;qBAClE,GAAG,CAAC,CAAC,CAAC,EAAE,WAAC,OAAA,MAAA,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,mCAAI,OAAO,CAAA,EAAA,CAAC,CAAC;gBAC9C,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC3B,CAAC;SACJ,CAAC;IACN,CAAC;IAED,IAAA,UAAE,EAAC,sDAAsD,EAAE,GAAG,EAAE;QAC5D,MAAM,aAAa,GAAG,mBAAmB,EAAE,CAAC;QAC5C,mBAAmB;QACnB,MAAM,iBAAiB,GAAG,MAAM,CAAC,MAAM,CACnC,CAAC,IAAY,EAAE,EAAE,CAAC,aAAa,CAAC,QAAQ,CAAC,IAAI,CAAC,EAC9C,EAAE,MAAM,EAAE,aAAa,CAAC,MAAM,EAAE,CAC5B,CAAC;QAET,MAAM,SAAS,GAAG,IAAI,6CAAqB,CAAC,iBAAiB,CAAC,CAAC;QAC/D,MAAM,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;QAE/C,oBAAM,CAAC,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,+BAA+B,CAAC,CAAC;QAClE,oBAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,sBAAsB,CAAC,CAAC;QACvD,oBAAM,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,qCAAqC,CAAC,CAAC;IAChF,CAAC,CAAC,CAAC;IAEH,IAAA,UAAE,EAAC,4CAA4C,EAAE,GAAG,EAAE;QAClD,MAAM,aAAa,GAAG;YAClB,QAAQ,EAAE,GAAG,EAAE,CAAC,CAAC;gBACb,SAAS,EAAE;oBACP,IAAI,EAAE,aAAa,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;iBAC9D;aACJ,CAAC;YACF,MAAM,EAAE,GAAG,EAAE,CAAC,SAAS;SAC1B,CAAC;QACF,MAAM,iBAAiB,GAAG,MAAM,CAAC,MAAM,CACnC,GAAG,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,EAC9B,EAAE,MAAM,EAAE,aAAa,CAAC,MAAM,EAAE,CAC5B,CAAC;QAET,MAAM,SAAS,GAAG,IAAI,6CAAqB,CAAC,iBAAiB,CAAC,CAAC;QAC/D,MAAM,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QAE5C,oBAAM,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,EAAE,iCAAiC,CAAC,CAAC;QACvE,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;YACf,oBAAM,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,QAAQ,EAAE,+BAA+B,CAAC,CAAC;QACtE,CAAC,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;IAEH,IAAA,UAAE,EAAC,6BAA6B,EAAE,GAAG,EAAE;QACnC,MAAM,aAAa,GAAG;YAClB,QAAQ,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,SAAS,EAAE,EAAE,IAAI,EAAE,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACjE,MAAM,EAAE,CAAC,MAAgB,EAAE,IAAwC,EAAE,EAAE;gBACnE,IAAI,IAAI,aAAJ,IAAI,uBAAJ,IAAI,CAAE,mBAAmB,EAAE,CAAC;oBAC5B,OAAO,aAAa,CAAC;gBACzB,CAAC;gBACD,OAAO,yBAAyB,CAAC;YACrC,CAAC;SACJ,CAAC;QACF,MAAM,iBAAiB,GAAG,MAAM,CAAC,MAAM,CACnC,GAAG,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,EAC9B,EAAE,MAAM,EAAE,aAAa,CAAC,MAAM,EAAE,CAC5B,CAAC;QAET,MAAM,SAAS,GAAG,IAAI,6CAAqB,CAAC,iBAAiB,CAAC,CAAC;QAC/D,MAAM,IAAI,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;QAEhD,oBAAM,CAAC,KAAK,CAAC,IAAI,EAAE,aAAa,EAAE,6CAA6C,CAAC,CAAC;IACrF,CAAC,CAAC,CAAC;IAEH,IAAA,UAAE,EAAC,qBAAqB,EAAE,GAAG,EAAE;QAC3B,MAAM,aAAa,GAAG;YAClB,QAAQ,EAAE,GAAG,EAAE,CAAC,CAAC;gBACb,SAAS,EAAE,EAAE,IAAI,EAAE,aAAa,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE;aAC9C,CAAC;YACF,MAAM,EAAE,GAAG,EAAE,CAAC,EAAE;SACnB,CAAC;QACF,MAAM,iBAAiB,GAAG,MAAM,CAAC,MAAM,CACnC,GAAG,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,EAC9B,EAAE,MAAM,EAAE,aAAa,CAAC,MAAM,EAAE,CAC5B,CAAC;QAET,MAAM,SAAS,GAAG,IAAI,6CAAqB,CAAC,iBAAiB,CAAC,CAAC;QAE/D,MAAM,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACpC,oBAAM,CAAC,SAAS,CAAC,MAAM,EAAE,EAAE,EAAE,uCAAuC,CAAC,CAAC;QAEtE,MAAM,IAAI,GAAG,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAClC,oBAAM,CAAC,KAAK,CAAC,IAAI,EAAE,EAAE,EAAE,yCAAyC,CAAC,CAAC;IACtE,CAAC,CAAC,CAAC;IAEH,IAAA,UAAE,EAAC,2CAA2C,EAAE,GAAG,EAAE;QACjD,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,MAAM,aAAa,GAAG;YAClB,QAAQ,EAAE,GAAG,EAAE;gBACX,SAAS,EAAE,CAAC;gBACZ,OAAO;oBACH,SAAS,EAAE,EAAE,IAAI,EAAE,aAAa,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;iBACtE,CAAC;YACN,CAAC;YACD,MAAM,EAAE,GAAG,EAAE,CAAC,aAAa;SAC9B,CAAC;QACF,MAAM,iBAAiB,GAAG,MAAM,CAAC,MAAM,CACnC,GAAG,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,EAC9B,EAAE,MAAM,EAAE,aAAa,CAAC,MAAM,EAAE,CAC5B,CAAC;QAET,MAAM,SAAS,GAAG,IAAI,6CAAqB,CAAC,iBAAiB,CAAC,CAAC;QAE/D,MAAM,OAAO,GAAG,SAAS,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;QAChD,MAAM,OAAO,GAAG,SAAS,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;QAEhD,oBAAM,CAAC,SAAS,CAAC,OAAO,EAAE,OAAO,EAAE,gCAAgC,CAAC,CAAC;QACrE,oBAAM,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC,EAAE,4CAA4C,CAAC,CAAC;IAC7E,CAAC,CAAC,CAAC;AACP,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { TextFetcher } from './types';
|
|
2
|
+
export interface WebFetcherConfig {
|
|
3
|
+
headers?: Record<string, string>;
|
|
4
|
+
requestConfig?: RequestInit;
|
|
5
|
+
htmlToMarkdown: boolean;
|
|
6
|
+
summarizeHtml: boolean;
|
|
7
|
+
}
|
|
8
|
+
export declare class WebFetcher implements TextFetcher {
|
|
9
|
+
private readonly _config;
|
|
10
|
+
constructor(config?: Partial<WebFetcherConfig>);
|
|
11
|
+
fetch(uri: string, onDocument: (uri: string, text: string, docType?: string) => Promise<boolean>): Promise<boolean>;
|
|
12
|
+
private htmlToMarkdown;
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=WebFetcher.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"WebFetcher.d.ts","sourceRoot":"","sources":["../src/WebFetcher.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AA2BtC,MAAM,WAAW,gBAAgB;IAC/B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,aAAa,CAAC,EAAE,WAAW,CAAC;IAC5B,cAAc,EAAE,OAAO,CAAC;IACxB,aAAa,EAAE,OAAO,CAAC;CACxB;AAED,qBAAa,UAAW,YAAW,WAAW;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAmB;gBAExB,MAAM,CAAC,EAAE,OAAO,CAAC,gBAAgB,CAAC;IAUxC,KAAK,CAChB,GAAG,EAAE,MAAM,EACX,UAAU,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,GAC5E,OAAO,CAAC,OAAO,CAAC;IAsCnB,OAAO,CAAC,cAAc;CAsCvB"}
|