vectra 0.12.3 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +92 -100
- package/lib/BrowserWebFetcher.d.ts +75 -0
- package/lib/BrowserWebFetcher.d.ts.map +1 -0
- package/lib/BrowserWebFetcher.js +290 -0
- package/lib/BrowserWebFetcher.js.map +1 -0
- package/lib/FileFetcher.d.ts.map +1 -1
- package/lib/FileFetcher.js +25 -15
- package/lib/FileFetcher.js.map +1 -1
- package/lib/FileFetcher.spec.d.ts +2 -0
- package/lib/FileFetcher.spec.d.ts.map +1 -0
- package/lib/FileFetcher.spec.js +244 -0
- package/lib/FileFetcher.spec.js.map +1 -0
- package/lib/FolderWatcher.d.ts +91 -0
- package/lib/FolderWatcher.d.ts.map +1 -0
- package/lib/FolderWatcher.js +304 -0
- package/lib/FolderWatcher.js.map +1 -0
- package/lib/FolderWatcher.spec.d.ts +2 -0
- package/lib/FolderWatcher.spec.d.ts.map +1 -0
- package/lib/FolderWatcher.spec.js +308 -0
- package/lib/FolderWatcher.spec.js.map +1 -0
- package/lib/GPT3Tokenizer.spec.d.ts +2 -0
- package/lib/GPT3Tokenizer.spec.d.ts.map +1 -0
- package/lib/GPT3Tokenizer.spec.js +45 -0
- package/lib/GPT3Tokenizer.spec.js.map +1 -0
- package/lib/ItemSelector.d.ts.map +1 -1
- package/lib/ItemSelector.js +19 -8
- package/lib/ItemSelector.js.map +1 -1
- package/lib/ItemSelector.spec.d.ts +2 -0
- package/lib/ItemSelector.spec.d.ts.map +1 -0
- package/lib/ItemSelector.spec.js +204 -0
- package/lib/ItemSelector.spec.js.map +1 -0
- package/lib/LocalDocument.d.ts +1 -1
- package/lib/LocalDocument.d.ts.map +1 -1
- package/lib/LocalDocument.js +5 -45
- package/lib/LocalDocument.js.map +1 -1
- package/lib/LocalDocument.spec.d.ts +2 -0
- package/lib/LocalDocument.spec.d.ts.map +1 -0
- package/lib/LocalDocument.spec.js +214 -0
- package/lib/LocalDocument.spec.js.map +1 -0
- package/lib/LocalDocumentIndex.d.ts +20 -0
- package/lib/LocalDocumentIndex.d.ts.map +1 -1
- package/lib/LocalDocumentIndex.js +16 -52
- package/lib/LocalDocumentIndex.js.map +1 -1
- package/lib/LocalDocumentIndex.spec.d.ts +2 -0
- package/lib/LocalDocumentIndex.spec.d.ts.map +1 -0
- package/lib/LocalDocumentIndex.spec.js +494 -0
- package/lib/LocalDocumentIndex.spec.js.map +1 -0
- package/lib/LocalDocumentResult.d.ts +32 -11
- package/lib/LocalDocumentResult.d.ts.map +1 -1
- package/lib/LocalDocumentResult.js +305 -257
- package/lib/LocalDocumentResult.js.map +1 -1
- package/lib/LocalDocumentResult.spec.d.ts +2 -0
- package/lib/LocalDocumentResult.spec.d.ts.map +1 -0
- package/lib/LocalDocumentResult.spec.js +373 -0
- package/lib/LocalDocumentResult.spec.js.map +1 -0
- package/lib/LocalEmbeddings.d.ts +59 -0
- package/lib/LocalEmbeddings.d.ts.map +1 -0
- package/lib/LocalEmbeddings.js +101 -0
- package/lib/LocalEmbeddings.js.map +1 -0
- package/lib/LocalEmbeddings.spec.d.ts +2 -0
- package/lib/LocalEmbeddings.spec.d.ts.map +1 -0
- package/lib/LocalEmbeddings.spec.js +155 -0
- package/lib/LocalEmbeddings.spec.js.map +1 -0
- package/lib/LocalIndex.d.ts +27 -18
- package/lib/LocalIndex.d.ts.map +1 -1
- package/lib/LocalIndex.js +109 -105
- package/lib/LocalIndex.js.map +1 -1
- package/lib/LocalIndex.spec.js +434 -43
- package/lib/LocalIndex.spec.js.map +1 -1
- package/lib/OpenAIEmbeddings.d.ts +4 -6
- package/lib/OpenAIEmbeddings.d.ts.map +1 -1
- package/lib/OpenAIEmbeddings.js +16 -24
- package/lib/OpenAIEmbeddings.js.map +1 -1
- package/lib/OpenAIEmbeddings.spec.d.ts +2 -0
- package/lib/OpenAIEmbeddings.spec.d.ts.map +1 -0
- package/lib/OpenAIEmbeddings.spec.js +298 -0
- package/lib/OpenAIEmbeddings.spec.js.map +1 -0
- package/lib/TextSplitter.d.ts +2 -0
- package/lib/TextSplitter.d.ts.map +1 -1
- package/lib/TextSplitter.js +154 -111
- package/lib/TextSplitter.js.map +1 -1
- package/lib/TextSplitter.spec.js +289 -61
- package/lib/TextSplitter.spec.js.map +1 -1
- package/lib/TransformersEmbeddings.d.ts +121 -0
- package/lib/TransformersEmbeddings.d.ts.map +1 -0
- package/lib/TransformersEmbeddings.js +176 -0
- package/lib/TransformersEmbeddings.js.map +1 -0
- package/lib/TransformersEmbeddings.spec.d.ts +2 -0
- package/lib/TransformersEmbeddings.spec.d.ts.map +1 -0
- package/lib/TransformersEmbeddings.spec.js +198 -0
- package/lib/TransformersEmbeddings.spec.js.map +1 -0
- package/lib/TransformersTokenizer.d.ts +33 -0
- package/lib/TransformersTokenizer.d.ts.map +1 -0
- package/lib/TransformersTokenizer.js +44 -0
- package/lib/TransformersTokenizer.js.map +1 -0
- package/lib/TransformersTokenizer.spec.d.ts +2 -0
- package/lib/TransformersTokenizer.spec.d.ts.map +1 -0
- package/lib/TransformersTokenizer.spec.js +112 -0
- package/lib/TransformersTokenizer.spec.js.map +1 -0
- package/lib/WebFetcher.d.ts +1 -2
- package/lib/WebFetcher.d.ts.map +1 -1
- package/lib/WebFetcher.js +58 -54
- package/lib/WebFetcher.js.map +1 -1
- package/lib/WebFetcher.spec.d.ts +2 -0
- package/lib/WebFetcher.spec.d.ts.map +1 -0
- package/lib/WebFetcher.spec.js +263 -0
- package/lib/WebFetcher.spec.js.map +1 -0
- package/lib/browser.d.ts +30 -0
- package/lib/browser.d.ts.map +1 -0
- package/lib/browser.js +52 -0
- package/lib/browser.js.map +1 -0
- package/lib/codecs/IndexCodec.d.ts +37 -0
- package/lib/codecs/IndexCodec.d.ts.map +1 -0
- package/lib/codecs/IndexCodec.js +3 -0
- package/lib/codecs/IndexCodec.js.map +1 -0
- package/lib/codecs/JsonCodec.d.ts +19 -0
- package/lib/codecs/JsonCodec.d.ts.map +1 -0
- package/lib/codecs/JsonCodec.js +35 -0
- package/lib/codecs/JsonCodec.js.map +1 -0
- package/lib/codecs/JsonCodec.spec.d.ts +2 -0
- package/lib/codecs/JsonCodec.spec.d.ts.map +1 -0
- package/lib/codecs/JsonCodec.spec.js +66 -0
- package/lib/codecs/JsonCodec.spec.js.map +1 -0
- package/lib/codecs/LocalIndex.protobuf.spec.d.ts +2 -0
- package/lib/codecs/LocalIndex.protobuf.spec.d.ts.map +1 -0
- package/lib/codecs/LocalIndex.protobuf.spec.js +108 -0
- package/lib/codecs/LocalIndex.protobuf.spec.js.map +1 -0
- package/lib/codecs/ProtobufCodec.d.ts +20 -0
- package/lib/codecs/ProtobufCodec.d.ts.map +1 -0
- package/lib/codecs/ProtobufCodec.js +225 -0
- package/lib/codecs/ProtobufCodec.js.map +1 -0
- package/lib/codecs/ProtobufCodec.spec.d.ts +2 -0
- package/lib/codecs/ProtobufCodec.spec.d.ts.map +1 -0
- package/lib/codecs/ProtobufCodec.spec.js +155 -0
- package/lib/codecs/ProtobufCodec.spec.js.map +1 -0
- package/lib/codecs/index.d.ts +5 -0
- package/lib/codecs/index.d.ts.map +1 -0
- package/lib/codecs/index.js +21 -0
- package/lib/codecs/index.js.map +1 -0
- package/lib/codecs/migrateIndex.d.ts +24 -0
- package/lib/codecs/migrateIndex.d.ts.map +1 -0
- package/lib/codecs/migrateIndex.js +119 -0
- package/lib/codecs/migrateIndex.js.map +1 -0
- package/lib/codecs/migrateIndex.spec.d.ts +2 -0
- package/lib/codecs/migrateIndex.spec.d.ts.map +1 -0
- package/lib/codecs/migrateIndex.spec.js +151 -0
- package/lib/codecs/migrateIndex.spec.js.map +1 -0
- package/lib/codecs/schemas/index.proto +34 -0
- package/lib/index.d.ts +9 -1
- package/lib/index.d.ts.map +1 -1
- package/lib/index.js +9 -1
- package/lib/index.js.map +1 -1
- package/lib/internals/Colorize.d.ts.map +1 -1
- package/lib/internals/Colorize.js +20 -15
- package/lib/internals/Colorize.js.map +1 -1
- package/lib/server/IndexManager.d.ts +78 -0
- package/lib/server/IndexManager.d.ts.map +1 -0
- package/lib/server/IndexManager.js +259 -0
- package/lib/server/IndexManager.js.map +1 -0
- package/lib/server/VectraServer.d.ts +40 -0
- package/lib/server/VectraServer.d.ts.map +1 -0
- package/lib/server/VectraServer.js +151 -0
- package/lib/server/VectraServer.js.map +1 -0
- package/lib/server/VectraServer.spec.d.ts +2 -0
- package/lib/server/VectraServer.spec.d.ts.map +1 -0
- package/lib/server/VectraServer.spec.js +322 -0
- package/lib/server/VectraServer.spec.js.map +1 -0
- package/lib/server/handlers/documentHandlers.d.ts +15 -0
- package/lib/server/handlers/documentHandlers.d.ts.map +1 -0
- package/lib/server/handlers/documentHandlers.js +95 -0
- package/lib/server/handlers/documentHandlers.js.map +1 -0
- package/lib/server/handlers/helpers.d.ts +23 -0
- package/lib/server/handlers/helpers.d.ts.map +1 -0
- package/lib/server/handlers/helpers.js +138 -0
- package/lib/server/handlers/helpers.js.map +1 -0
- package/lib/server/handlers/index.d.ts +8 -0
- package/lib/server/handlers/index.d.ts.map +1 -0
- package/lib/server/handlers/index.js +22 -0
- package/lib/server/handlers/index.js.map +1 -0
- package/lib/server/handlers/indexHandlers.d.ts +14 -0
- package/lib/server/handlers/indexHandlers.d.ts.map +1 -0
- package/lib/server/handlers/indexHandlers.js +85 -0
- package/lib/server/handlers/indexHandlers.js.map +1 -0
- package/lib/server/handlers/itemHandlers.d.ts +34 -0
- package/lib/server/handlers/itemHandlers.d.ts.map +1 -0
- package/lib/server/handlers/itemHandlers.js +166 -0
- package/lib/server/handlers/itemHandlers.js.map +1 -0
- package/lib/server/handlers/lifecycleHandlers.d.ts +11 -0
- package/lib/server/handlers/lifecycleHandlers.d.ts.map +1 -0
- package/lib/server/handlers/lifecycleHandlers.js +31 -0
- package/lib/server/handlers/lifecycleHandlers.js.map +1 -0
- package/lib/server/handlers/queryHandlers.d.ts +27 -0
- package/lib/server/handlers/queryHandlers.d.ts.map +1 -0
- package/lib/server/handlers/queryHandlers.js +135 -0
- package/lib/server/handlers/queryHandlers.js.map +1 -0
- package/lib/server/handlers/statsHandlers.d.ts +17 -0
- package/lib/server/handlers/statsHandlers.d.ts.map +1 -0
- package/lib/server/handlers/statsHandlers.js +81 -0
- package/lib/server/handlers/statsHandlers.js.map +1 -0
- package/lib/server/index.d.ts +4 -0
- package/lib/server/index.d.ts.map +1 -0
- package/lib/server/index.js +23 -0
- package/lib/server/index.js.map +1 -0
- package/lib/storage/FileStorage.d.ts +92 -0
- package/lib/storage/FileStorage.d.ts.map +1 -0
- package/lib/storage/FileStorage.js +3 -0
- package/lib/storage/FileStorage.js.map +1 -0
- package/lib/storage/FileStorageUtilities.d.ts +36 -0
- package/lib/storage/FileStorageUtilities.d.ts.map +1 -0
- package/lib/storage/FileStorageUtilities.js +91 -0
- package/lib/storage/FileStorageUtilities.js.map +1 -0
- package/lib/storage/FileStorageUtilities.spec.d.ts +2 -0
- package/lib/storage/FileStorageUtilities.spec.d.ts.map +1 -0
- package/lib/storage/FileStorageUtilities.spec.js +98 -0
- package/lib/storage/FileStorageUtilities.spec.js.map +1 -0
- package/lib/storage/FileType.d.ts +29 -0
- package/lib/storage/FileType.d.ts.map +1 -0
- package/lib/storage/FileType.js +38 -0
- package/lib/storage/FileType.js.map +1 -0
- package/lib/storage/IndexedDBStorage.d.ts +47 -0
- package/lib/storage/IndexedDBStorage.d.ts.map +1 -0
- package/lib/storage/IndexedDBStorage.js +347 -0
- package/lib/storage/IndexedDBStorage.js.map +1 -0
- package/lib/storage/LocalFileStorage.browser.d.ts +19 -0
- package/lib/storage/LocalFileStorage.browser.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.browser.js +43 -0
- package/lib/storage/LocalFileStorage.browser.js.map +1 -0
- package/lib/storage/LocalFileStorage.d.ts +23 -0
- package/lib/storage/LocalFileStorage.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.js +152 -0
- package/lib/storage/LocalFileStorage.js.map +1 -0
- package/lib/storage/LocalFileStorage.spec.d.ts +2 -0
- package/lib/storage/LocalFileStorage.spec.d.ts.map +1 -0
- package/lib/storage/LocalFileStorage.spec.js +249 -0
- package/lib/storage/LocalFileStorage.spec.js.map +1 -0
- package/lib/storage/VirtualFileStorage.d.ts +18 -0
- package/lib/storage/VirtualFileStorage.d.ts.map +1 -0
- package/lib/storage/VirtualFileStorage.js +178 -0
- package/lib/storage/VirtualFileStorage.js.map +1 -0
- package/lib/storage/VirtualFileStorage.spec.d.ts +2 -0
- package/lib/storage/VirtualFileStorage.spec.d.ts.map +1 -0
- package/lib/storage/VirtualFileStorage.spec.js +302 -0
- package/lib/storage/VirtualFileStorage.spec.js.map +1 -0
- package/lib/storage/index.d.ts +6 -0
- package/lib/storage/index.d.ts.map +1 -0
- package/lib/storage/index.js +22 -0
- package/lib/storage/index.js.map +1 -0
- package/lib/templates/templates/csharp/README.md +48 -0
- package/lib/templates/templates/csharp/VectraClient.cs +234 -0
- package/lib/templates/templates/go/README.md +71 -0
- package/lib/templates/templates/go/vectra_client.go +322 -0
- package/lib/templates/templates/java/README.md +81 -0
- package/lib/templates/templates/java/VectraClient.java +232 -0
- package/lib/templates/templates/python/README.md +37 -0
- package/lib/templates/templates/python/vectra_client.py +279 -0
- package/lib/templates/templates/rust/Cargo.toml +14 -0
- package/lib/templates/templates/rust/README.md +39 -0
- package/lib/templates/templates/rust/build.rs +4 -0
- package/lib/templates/templates/rust/lib.rs +284 -0
- package/lib/templates/templates/typescript/README.md +96 -0
- package/lib/templates/templates/typescript/VectraClient.ts +374 -0
- package/lib/templates/typescript/VectraClient.d.ts +114 -0
- package/lib/templates/typescript/VectraClient.d.ts.map +1 -0
- package/lib/templates/typescript/VectraClient.js +328 -0
- package/lib/templates/typescript/VectraClient.js.map +1 -0
- package/lib/types.d.ts +7 -0
- package/lib/types.d.ts.map +1 -1
- package/lib/utils/index.d.ts +2 -0
- package/lib/utils/index.d.ts.map +1 -0
- package/lib/utils/index.js +18 -0
- package/lib/utils/index.js.map +1 -0
- package/lib/utils/pathUtils.d.ts +40 -0
- package/lib/utils/pathUtils.d.ts.map +1 -0
- package/lib/utils/pathUtils.js +98 -0
- package/lib/utils/pathUtils.js.map +1 -0
- package/lib/vectra-cli.d.ts.map +1 -1
- package/lib/vectra-cli.generate.spec.d.ts +2 -0
- package/lib/vectra-cli.generate.spec.d.ts.map +1 -0
- package/lib/vectra-cli.generate.spec.js +112 -0
- package/lib/vectra-cli.generate.spec.js.map +1 -0
- package/lib/vectra-cli.js +446 -9
- package/lib/vectra-cli.js.map +1 -1
- package/lib/vectra-cli.spec.d.ts +1 -0
- package/lib/vectra-cli.spec.d.ts.map +1 -0
- package/lib/vectra-cli.spec.js +2 -0
- package/lib/vectra-cli.spec.js.map +1 -0
- package/package.json +89 -16
- package/proto/vectra_service.proto +276 -0
- package/src/BrowserWebFetcher.ts +345 -0
- package/src/FileFetcher.spec.ts +234 -0
- package/src/FileFetcher.ts +37 -25
- package/src/FolderWatcher.spec.ts +288 -0
- package/src/FolderWatcher.ts +304 -0
- package/src/GPT3Tokenizer.spec.ts +50 -0
- package/src/ItemSelector.spec.ts +252 -0
- package/src/ItemSelector.ts +163 -150
- package/src/LocalDocument.spec.ts +211 -0
- package/src/LocalDocument.ts +88 -94
- package/src/LocalDocumentIndex.spec.ts +481 -0
- package/src/LocalDocumentIndex.ts +39 -40
- package/src/LocalDocumentResult.spec.ts +373 -0
- package/src/LocalDocumentResult.ts +489 -319
- package/src/LocalEmbeddings.spec.ts +138 -0
- package/src/LocalEmbeddings.ts +120 -0
- package/src/LocalIndex.spec.ts +808 -323
- package/src/LocalIndex.ts +479 -430
- package/src/OpenAIEmbeddings.spec.ts +354 -0
- package/src/OpenAIEmbeddings.ts +26 -27
- package/src/TextSplitter.spec.ts +320 -65
- package/src/TextSplitter.ts +172 -115
- package/src/TransformersEmbeddings.spec.ts +188 -0
- package/src/TransformersEmbeddings.ts +232 -0
- package/src/TransformersTokenizer.spec.ts +143 -0
- package/src/TransformersTokenizer.ts +45 -0
- package/src/WebFetcher.spec.ts +288 -0
- package/src/WebFetcher.ts +184 -186
- package/src/browser.ts +69 -0
- package/src/codecs/IndexCodec.ts +40 -0
- package/src/codecs/JsonCodec.spec.ts +70 -0
- package/src/codecs/JsonCodec.ts +37 -0
- package/src/codecs/LocalIndex.protobuf.spec.ts +115 -0
- package/src/codecs/ProtobufCodec.spec.ts +166 -0
- package/src/codecs/ProtobufCodec.ts +193 -0
- package/src/codecs/index.ts +4 -0
- package/src/codecs/migrateIndex.spec.ts +176 -0
- package/src/codecs/migrateIndex.ts +125 -0
- package/src/codecs/schemas/index.proto +34 -0
- package/src/index.ts +9 -1
- package/src/internals/Colorize.ts +19 -16
- package/src/server/IndexManager.ts +243 -0
- package/src/server/VectraServer.spec.ts +303 -0
- package/src/server/VectraServer.ts +156 -0
- package/src/server/handlers/documentHandlers.ts +59 -0
- package/src/server/handlers/helpers.ts +93 -0
- package/src/server/handlers/index.ts +7 -0
- package/src/server/handlers/indexHandlers.ts +44 -0
- package/src/server/handlers/itemHandlers.ts +140 -0
- package/src/server/handlers/lifecycleHandlers.ts +26 -0
- package/src/server/handlers/queryHandlers.ts +96 -0
- package/src/server/handlers/statsHandlers.ts +38 -0
- package/src/server/index.ts +3 -0
- package/src/storage/FileStorage.ts +105 -0
- package/src/storage/FileStorageUtilities.spec.ts +106 -0
- package/src/storage/FileStorageUtilities.ts +77 -0
- package/src/storage/FileType.ts +61 -0
- package/src/storage/IndexedDBStorage.ts +365 -0
- package/src/storage/LocalFileStorage.browser.ts +52 -0
- package/src/storage/LocalFileStorage.spec.ts +292 -0
- package/src/storage/LocalFileStorage.ts +98 -0
- package/src/storage/VirtualFileStorage.spec.ts +307 -0
- package/src/storage/VirtualFileStorage.ts +169 -0
- package/src/storage/index.ts +5 -0
- package/src/templates/csharp/README.md +48 -0
- package/src/templates/csharp/VectraClient.cs +234 -0
- package/src/templates/go/README.md +71 -0
- package/src/templates/go/vectra_client.go +322 -0
- package/src/templates/java/README.md +81 -0
- package/src/templates/java/VectraClient.java +232 -0
- package/src/templates/python/README.md +37 -0
- package/src/templates/python/vectra_client.py +279 -0
- package/src/templates/rust/Cargo.toml +14 -0
- package/src/templates/rust/README.md +39 -0
- package/src/templates/rust/build.rs +4 -0
- package/src/templates/rust/lib.rs +284 -0
- package/src/templates/typescript/README.md +96 -0
- package/src/templates/typescript/VectraClient.ts +374 -0
- package/src/types.ts +131 -123
- package/src/utils/index.ts +1 -0
- package/src/utils/pathUtils.ts +106 -0
- package/src/vectra-cli.generate.spec.ts +72 -0
- package/src/vectra-cli.spec.ts +0 -0
- package/src/vectra-cli.ts +687 -246
- package/README.draft.md +0 -499
- package/README.draft.outline.md +0 -160
- package/README.research.md +0 -2159
|
@@ -23,10 +23,10 @@ class LocalDocumentResult extends LocalDocument_1.LocalDocument {
|
|
|
23
23
|
super(index, id, uri);
|
|
24
24
|
this._chunks = chunks;
|
|
25
25
|
this._tokenizer = tokenizer;
|
|
26
|
-
//
|
|
26
|
+
// Average score across chunks
|
|
27
27
|
let score = 0;
|
|
28
|
-
this._chunks.forEach(chunk => score += chunk.score);
|
|
29
|
-
this._score = score / this._chunks.length;
|
|
28
|
+
this._chunks.forEach((chunk) => (score += chunk.score));
|
|
29
|
+
this._score = this._chunks.length > 0 ? score / this._chunks.length : 0;
|
|
30
30
|
}
|
|
31
31
|
/**
|
|
32
32
|
* Returns the chunks of the document that matched the query.
|
|
@@ -40,289 +40,337 @@ class LocalDocumentResult extends LocalDocument_1.LocalDocument {
|
|
|
40
40
|
get score() {
|
|
41
41
|
return this._score;
|
|
42
42
|
}
|
|
43
|
+
/**
|
|
44
|
+
* Helper: robust check for BM25-marked chunks.
|
|
45
|
+
*/
|
|
46
|
+
isBm25Chunk(chunk) {
|
|
47
|
+
var _a;
|
|
48
|
+
const val = (_a = chunk.item.metadata) === null || _a === void 0 ? void 0 : _a.isBm25;
|
|
49
|
+
return val === true || val === "true" || val === 1 || val === "1";
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* A small, testable packer that mimics the old `renderAllSections()` behavior
|
|
53
|
+
* but exposes the internal flush logic so all branches are coverable.
|
|
54
|
+
*/
|
|
55
|
+
createAllSectionsPacker() {
|
|
56
|
+
const sections = [];
|
|
57
|
+
let currentTokens = [];
|
|
58
|
+
let currentScores = [];
|
|
59
|
+
let currentIsBm25AllTrue = true;
|
|
60
|
+
const flush = () => {
|
|
61
|
+
// Branch 1: empty flush
|
|
62
|
+
if (currentTokens.length === 0)
|
|
63
|
+
return;
|
|
64
|
+
// Branch 2: avgScore fallback when scores are missing
|
|
65
|
+
const avgScore = currentScores.length > 0
|
|
66
|
+
? currentScores.reduce((a, b) => a + b, 0) / currentScores.length
|
|
67
|
+
: 0;
|
|
68
|
+
// Branch 3: isBm25 depends on "all bm25" and "has scores"
|
|
69
|
+
const isBm25 = currentIsBm25AllTrue && currentScores.length > 0 ? true : false;
|
|
70
|
+
sections.push({
|
|
71
|
+
text: this._tokenizer.decode(currentTokens),
|
|
72
|
+
tokenCount: currentTokens.length,
|
|
73
|
+
score: avgScore,
|
|
74
|
+
isBm25,
|
|
75
|
+
});
|
|
76
|
+
currentTokens = [];
|
|
77
|
+
currentScores = [];
|
|
78
|
+
currentIsBm25AllTrue = true;
|
|
79
|
+
};
|
|
80
|
+
const pushChunkTokens = (tokens, score, isBm25Chunk) => {
|
|
81
|
+
currentTokens.push(...tokens);
|
|
82
|
+
currentScores.push(score);
|
|
83
|
+
currentIsBm25AllTrue = currentIsBm25AllTrue && isBm25Chunk;
|
|
84
|
+
};
|
|
85
|
+
/**
|
|
86
|
+
* Test-only escape hatch: force internal state to cover otherwise-invariant branches.
|
|
87
|
+
* Not used by production code.
|
|
88
|
+
*/
|
|
89
|
+
const __testSetState = (state) => {
|
|
90
|
+
if (state.currentTokens)
|
|
91
|
+
currentTokens = state.currentTokens.slice();
|
|
92
|
+
if (state.currentScores)
|
|
93
|
+
currentScores = state.currentScores.slice();
|
|
94
|
+
if (typeof state.currentIsBm25AllTrue === "boolean") {
|
|
95
|
+
currentIsBm25AllTrue = state.currentIsBm25AllTrue;
|
|
96
|
+
}
|
|
97
|
+
};
|
|
98
|
+
const getSections = () => sections;
|
|
99
|
+
return { flush, pushChunkTokens, getSections, __testSetState };
|
|
100
|
+
}
|
|
43
101
|
/**
|
|
44
102
|
* Renders all of the results chunks as spans of text (sections.)
|
|
45
103
|
* @remarks
|
|
46
|
-
*
|
|
47
|
-
*
|
|
48
|
-
*
|
|
104
|
+
* - Chunks are sorted by document order.
|
|
105
|
+
* - Multiple small chunks are packed into a single section up to maxTokens.
|
|
106
|
+
* - Oversized chunks are split into multiple sections, each carrying the chunk's score.
|
|
107
|
+
* - When multiple chunks are packed, section score is the arithmetic mean of packed chunks' scores.
|
|
49
108
|
*/
|
|
50
109
|
renderAllSections(maxTokens) {
|
|
51
110
|
return __awaiter(this, void 0, void 0, function* () {
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
111
|
+
const docText = yield this.loadText();
|
|
112
|
+
// Sort by document order
|
|
113
|
+
const sorted = this._chunks
|
|
114
|
+
.slice()
|
|
115
|
+
.sort((a, b) => Number(a.item.metadata.startPos) - Number(b.item.metadata.startPos));
|
|
116
|
+
const packer = this.createAllSectionsPacker();
|
|
117
|
+
// We'll keep a local "current length" mirror, to avoid re-encoding just to compute lengths.
|
|
118
|
+
let currentLen = 0;
|
|
119
|
+
const flushAndReset = () => {
|
|
120
|
+
packer.flush();
|
|
121
|
+
currentLen = 0;
|
|
122
|
+
};
|
|
123
|
+
for (const chunk of sorted) {
|
|
124
|
+
const startPos = Number(chunk.item.metadata.startPos);
|
|
125
|
+
const endPos = Number(chunk.item.metadata.endPos);
|
|
126
|
+
const chunkText = docText.substring(startPos, endPos + 1);
|
|
61
127
|
const tokens = this._tokenizer.encode(chunkText);
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
128
|
+
// Oversized chunk: split
|
|
129
|
+
if (tokens.length > maxTokens) {
|
|
130
|
+
// flush pending packed group
|
|
131
|
+
flushAndReset();
|
|
132
|
+
let offset = 0;
|
|
133
|
+
while (offset < tokens.length) {
|
|
134
|
+
const part = tokens.slice(offset, offset + maxTokens);
|
|
135
|
+
// Each split part is its own section (force packer state then flush)
|
|
136
|
+
packer.__testSetState({
|
|
137
|
+
currentTokens: part,
|
|
138
|
+
currentScores: [chunk.score],
|
|
139
|
+
currentIsBm25AllTrue: this.isBm25Chunk(chunk),
|
|
140
|
+
});
|
|
141
|
+
packer.flush();
|
|
142
|
+
offset += part.length;
|
|
143
|
+
}
|
|
144
|
+
continue;
|
|
74
145
|
}
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
chunks: [],
|
|
86
|
-
score: 0,
|
|
87
|
-
tokenCount: 0
|
|
88
|
-
};
|
|
89
|
-
sections.push(section);
|
|
146
|
+
// Pack if it fits
|
|
147
|
+
if (currentLen + tokens.length <= maxTokens) {
|
|
148
|
+
packer.pushChunkTokens(tokens, chunk.score, this.isBm25Chunk(chunk));
|
|
149
|
+
currentLen += tokens.length;
|
|
150
|
+
}
|
|
151
|
+
else {
|
|
152
|
+
// overflow: flush, then start new group
|
|
153
|
+
flushAndReset();
|
|
154
|
+
packer.pushChunkTokens(tokens, chunk.score, this.isBm25Chunk(chunk));
|
|
155
|
+
currentLen = tokens.length;
|
|
90
156
|
}
|
|
91
|
-
section.chunks.push(chunk);
|
|
92
|
-
section.score += chunk.score;
|
|
93
|
-
section.tokenCount += chunk.tokenCount;
|
|
94
157
|
}
|
|
95
|
-
//
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
return sections.map(section => {
|
|
99
|
-
let text = '';
|
|
100
|
-
section.chunks.forEach(chunk => text += chunk.text);
|
|
101
|
-
return {
|
|
102
|
-
text: text,
|
|
103
|
-
tokenCount: section.tokenCount,
|
|
104
|
-
score: section.score,
|
|
105
|
-
isBm25: false,
|
|
106
|
-
};
|
|
107
|
-
});
|
|
158
|
+
// final flush
|
|
159
|
+
packer.flush();
|
|
160
|
+
return packer.getSections();
|
|
108
161
|
});
|
|
109
162
|
}
|
|
110
163
|
/**
|
|
111
|
-
*
|
|
112
|
-
*
|
|
113
|
-
* The returned sections will be sorted by relevance and limited to the top `maxSections`.
|
|
114
|
-
* @param maxTokens Maximum number of tokens per section.
|
|
115
|
-
* @param maxSections Maximum number of sections to return.
|
|
116
|
-
* @param overlappingChunks Optional. If true, overlapping chunks of text will be added to each section until the maxTokens is reached.
|
|
117
|
-
* @returns Array of rendered text sections.
|
|
164
|
+
* Testable helper: build a single fallback section from the top-scoring chunk,
|
|
165
|
+
* truncated to exactly maxTokens tokens.
|
|
118
166
|
*/
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
167
|
+
buildFallbackTopChunkSection(docText, chunks, isBm25, maxTokens) {
|
|
168
|
+
if (chunks.length === 0)
|
|
169
|
+
return [];
|
|
170
|
+
const topChunk = chunks.reduce((prev, curr) => (curr.score > prev.score ? curr : prev), chunks[0]);
|
|
171
|
+
const start = Number(topChunk.item.metadata.startPos);
|
|
172
|
+
const end = Number(topChunk.item.metadata.endPos);
|
|
173
|
+
const chunkText = docText.substring(start, end + 1);
|
|
174
|
+
const chunkTokens = this._tokenizer.encode(chunkText);
|
|
175
|
+
const truncatedTokens = chunkTokens.slice(0, maxTokens);
|
|
176
|
+
return [
|
|
177
|
+
{
|
|
178
|
+
text: this._tokenizer.decode(truncatedTokens),
|
|
179
|
+
tokenCount: maxTokens,
|
|
180
|
+
score: topChunk.score,
|
|
181
|
+
isBm25,
|
|
182
|
+
},
|
|
183
|
+
];
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Internal helper: builds sections for either semantic or BM25 chunk lists using a heatmap.
|
|
187
|
+
*/
|
|
188
|
+
buildSectionsFor(docText, chunks, isBm25, maxTokens, maxSections, overlappingChunks) {
|
|
189
|
+
if (chunks.length === 0)
|
|
190
|
+
return [];
|
|
191
|
+
const connector = LocalDocumentResult.CONNECTOR;
|
|
192
|
+
const connectorTokens = this._tokenizer.encode(connector);
|
|
193
|
+
// Build heatmap: map each character position to accumulated score
|
|
194
|
+
const heatmap = new Map();
|
|
195
|
+
for (const chunk of chunks) {
|
|
196
|
+
const start = Number(chunk.item.metadata.startPos);
|
|
197
|
+
const end = Number(chunk.item.metadata.endPos);
|
|
198
|
+
for (let pos = start; pos <= end; pos++) {
|
|
199
|
+
heatmap.set(pos, (heatmap.get(pos) || 0) + chunk.score);
|
|
132
200
|
}
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
text: chunkText,
|
|
147
|
-
startPos,
|
|
148
|
-
endPos,
|
|
149
|
-
score: chunk.score,
|
|
150
|
-
tokenCount: this._tokenizer.encode(chunkText).length,
|
|
151
|
-
isBm25: Boolean(chunk.item.metadata.isBm25),
|
|
152
|
-
};
|
|
153
|
-
}).filter(chunk => chunk.tokenCount <= maxTokens).sort((a, b) => a.startPos - b.startPos);
|
|
154
|
-
// Check for no chunks
|
|
155
|
-
if (chunks.length === 0) {
|
|
156
|
-
// Take the top chunk and return a subset of its text
|
|
157
|
-
const topChunk = this._chunks[0];
|
|
158
|
-
const startPos = topChunk.item.metadata.startPos;
|
|
159
|
-
const endPos = topChunk.item.metadata.endPos;
|
|
160
|
-
const chunkText = text.substring(startPos, endPos + 1);
|
|
161
|
-
const tokens = this._tokenizer.encode(chunkText);
|
|
162
|
-
return [{
|
|
163
|
-
text: this._tokenizer.decode(tokens.slice(0, maxTokens)),
|
|
164
|
-
tokenCount: maxTokens,
|
|
165
|
-
score: topChunk.score,
|
|
166
|
-
isBm25: false,
|
|
167
|
-
}];
|
|
201
|
+
}
|
|
202
|
+
const peaks = [];
|
|
203
|
+
const sortedPositions = Array.from(heatmap.keys()).sort((a, b) => a - b);
|
|
204
|
+
let currentPeak = null;
|
|
205
|
+
const PEAK_THRESHOLD = 0.1;
|
|
206
|
+
for (const pos of sortedPositions) {
|
|
207
|
+
const score = heatmap.get(pos);
|
|
208
|
+
if (score < PEAK_THRESHOLD) {
|
|
209
|
+
if (currentPeak) {
|
|
210
|
+
peaks.push(currentPeak);
|
|
211
|
+
currentPeak = null;
|
|
212
|
+
}
|
|
213
|
+
continue;
|
|
168
214
|
}
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
section = {
|
|
177
|
-
chunks: [],
|
|
178
|
-
score: 0,
|
|
179
|
-
tokenCount: 0
|
|
180
|
-
};
|
|
181
|
-
sections.push(section);
|
|
182
|
-
}
|
|
183
|
-
section.chunks.push(chunk);
|
|
184
|
-
section.score += chunk.score;
|
|
185
|
-
section.tokenCount += chunk.tokenCount;
|
|
215
|
+
if (!currentPeak) {
|
|
216
|
+
currentPeak = { position: pos, score, chunks: [] };
|
|
217
|
+
}
|
|
218
|
+
else {
|
|
219
|
+
if (score > currentPeak.score) {
|
|
220
|
+
currentPeak.position = pos;
|
|
221
|
+
currentPeak.score = score;
|
|
186
222
|
}
|
|
187
223
|
}
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
224
|
+
}
|
|
225
|
+
if (currentPeak)
|
|
226
|
+
peaks.push(currentPeak);
|
|
227
|
+
// No-peaks fallback: create one at center of top chunk
|
|
228
|
+
if (peaks.length === 0) {
|
|
229
|
+
const topChunk = chunks.reduce((prev, curr) => (curr.score > prev.score ? curr : prev), chunks[0]);
|
|
230
|
+
const start = Number(topChunk.item.metadata.startPos);
|
|
231
|
+
const end = Number(topChunk.item.metadata.endPos);
|
|
232
|
+
const center = Math.floor((start + end) / 2);
|
|
233
|
+
peaks.push({ position: center, score: topChunk.score, chunks: [] });
|
|
234
|
+
}
|
|
235
|
+
// Associate chunks to nearest peak
|
|
236
|
+
for (const chunk of chunks) {
|
|
237
|
+
const start = Number(chunk.item.metadata.startPos);
|
|
238
|
+
const end = Number(chunk.item.metadata.endPos);
|
|
239
|
+
const center = Math.floor((start + end) / 2);
|
|
240
|
+
let closestPeak = peaks[0];
|
|
241
|
+
let minDist = Math.abs(center - closestPeak.position);
|
|
242
|
+
for (const peak of peaks) {
|
|
243
|
+
const dist = Math.abs(center - peak.position);
|
|
244
|
+
if (dist < minDist) {
|
|
245
|
+
minDist = dist;
|
|
246
|
+
closestPeak = peak;
|
|
205
247
|
}
|
|
206
248
|
}
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
249
|
+
closestPeak.chunks.push(chunk);
|
|
250
|
+
}
|
|
251
|
+
// Sort peaks by score desc
|
|
252
|
+
peaks.sort((a, b) => b.score - a.score);
|
|
253
|
+
const topPeaks = peaks.slice(0, maxSections);
|
|
254
|
+
const sections = [];
|
|
255
|
+
for (const peak of topPeaks) {
|
|
256
|
+
const sortedChunks = peak.chunks.slice().sort((a, b) => {
|
|
257
|
+
const aCenter = Math.floor((Number(a.item.metadata.startPos) + Number(a.item.metadata.endPos)) / 2);
|
|
258
|
+
const bCenter = Math.floor((Number(b.item.metadata.startPos) + Number(b.item.metadata.endPos)) / 2);
|
|
259
|
+
return (Math.abs(aCenter - peak.position) - Math.abs(bCenter - peak.position));
|
|
260
|
+
});
|
|
261
|
+
const selected = [];
|
|
262
|
+
let currentTokenCount = 0;
|
|
263
|
+
for (const chunk of sortedChunks) {
|
|
264
|
+
const start = Number(chunk.item.metadata.startPos);
|
|
265
|
+
const end = Number(chunk.item.metadata.endPos);
|
|
266
|
+
const chunkText = docText.substring(start, end + 1);
|
|
267
|
+
const chunkTokens = this._tokenizer.encode(chunkText);
|
|
268
|
+
// Whole-chunk preference, skip oversize
|
|
269
|
+
if (chunkTokens.length > maxTokens)
|
|
270
|
+
continue;
|
|
271
|
+
let tokensNeeded = chunkTokens.length;
|
|
272
|
+
if (selected.length > 0 && overlappingChunks) {
|
|
273
|
+
const isAdjacent = selected.some((s) => {
|
|
274
|
+
const sEnd = Number(s.item.metadata.endPos);
|
|
275
|
+
const sStart = Number(s.item.metadata.startPos);
|
|
276
|
+
return sEnd + 1 === start || end + 1 === sStart;
|
|
277
|
+
});
|
|
278
|
+
if (!isAdjacent) {
|
|
279
|
+
tokensNeeded += connectorTokens.length;
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
if (currentTokenCount + tokensNeeded <= maxTokens) {
|
|
283
|
+
selected.push(chunk);
|
|
284
|
+
currentTokenCount += tokensNeeded;
|
|
285
|
+
}
|
|
215
286
|
}
|
|
216
|
-
|
|
217
|
-
|
|
287
|
+
// If nothing selected, fall back (required by contract)
|
|
288
|
+
if (selected.length === 0) {
|
|
289
|
+
return this.buildFallbackTopChunkSection(docText, chunks, isBm25, maxTokens);
|
|
218
290
|
}
|
|
219
|
-
//
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
291
|
+
// Assemble selected in document order with connectors
|
|
292
|
+
const ordered = selected
|
|
293
|
+
.slice()
|
|
294
|
+
.sort((a, b) => Number(a.item.metadata.startPos) - Number(b.item.metadata.startPos));
|
|
295
|
+
let sectionText = "";
|
|
296
|
+
let sectionTokens = [];
|
|
297
|
+
for (let i = 0; i < ordered.length; i++) {
|
|
298
|
+
const curr = ordered[i];
|
|
299
|
+
const start = Number(curr.item.metadata.startPos);
|
|
300
|
+
const end = Number(curr.item.metadata.endPos);
|
|
301
|
+
const chunkText = docText.substring(start, end + 1);
|
|
302
|
+
if (i > 0 && overlappingChunks) {
|
|
303
|
+
const prev = ordered[i - 1];
|
|
304
|
+
const prevEnd = Number(prev.item.metadata.endPos);
|
|
305
|
+
if (prevEnd + 1 < start) {
|
|
306
|
+
sectionText += connector;
|
|
307
|
+
sectionTokens.push(...connectorTokens);
|
|
230
308
|
}
|
|
231
309
|
}
|
|
232
|
-
|
|
233
|
-
|
|
310
|
+
sectionText += chunkText;
|
|
311
|
+
sectionTokens.push(...this._tokenizer.encode(chunkText));
|
|
312
|
+
}
|
|
313
|
+
// Optional expansion if budget remains
|
|
234
314
|
if (overlappingChunks) {
|
|
235
|
-
const
|
|
236
|
-
|
|
237
|
-
startPos
|
|
238
|
-
endPos
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
let budget = maxTokens - section.tokenCount;
|
|
254
|
-
if (budget > 40) {
|
|
255
|
-
const sectionStart = section.chunks[0].startPos;
|
|
256
|
-
const sectionEnd = section.chunks[section.chunks.length - 1].endPos;
|
|
257
|
-
if (sectionStart > 0) {
|
|
258
|
-
const beforeTex = text.substring(0, section.chunks[0].startPos);
|
|
259
|
-
const beforeTokens = this.encodeBeforeText(beforeTex, Math.ceil(budget / 2));
|
|
260
|
-
const beforeBudget = sectionEnd < text.length - 1 ? Math.min(beforeTokens.length, Math.ceil(budget / 2)) : Math.min(beforeTokens.length, budget);
|
|
261
|
-
const chunk = {
|
|
262
|
-
text: this._tokenizer.decode(beforeTokens.slice(-beforeBudget)),
|
|
263
|
-
startPos: sectionStart - beforeBudget,
|
|
264
|
-
endPos: sectionStart - 1,
|
|
265
|
-
score: 0,
|
|
266
|
-
tokenCount: beforeBudget,
|
|
267
|
-
isBm25: false,
|
|
268
|
-
};
|
|
269
|
-
section.chunks.unshift(chunk);
|
|
270
|
-
section.tokenCount += chunk.tokenCount;
|
|
271
|
-
budget -= chunk.tokenCount;
|
|
272
|
-
}
|
|
273
|
-
if (sectionEnd < text.length - 1) {
|
|
274
|
-
const afterText = text.substring(sectionEnd + 1);
|
|
275
|
-
const afterTokens = this.encodeAfterText(afterText, budget);
|
|
276
|
-
const afterBudget = Math.min(afterTokens.length, budget);
|
|
277
|
-
const chunk = {
|
|
278
|
-
text: this._tokenizer.decode(afterTokens.slice(0, afterBudget)),
|
|
279
|
-
startPos: sectionEnd + 1,
|
|
280
|
-
endPos: sectionEnd + afterBudget,
|
|
281
|
-
score: 0,
|
|
282
|
-
tokenCount: afterBudget,
|
|
283
|
-
isBm25: false,
|
|
284
|
-
};
|
|
285
|
-
section.chunks.push(chunk);
|
|
286
|
-
section.tokenCount += chunk.tokenCount;
|
|
287
|
-
budget -= chunk.tokenCount;
|
|
288
|
-
}
|
|
289
|
-
}
|
|
290
|
-
});
|
|
315
|
+
const budgetRemain = maxTokens - sectionTokens.length;
|
|
316
|
+
if (budgetRemain > 40) {
|
|
317
|
+
const firstStart = Math.min(...selected.map((c) => Number(c.item.metadata.startPos)));
|
|
318
|
+
const lastEnd = Math.max(...selected.map((c) => Number(c.item.metadata.endPos)));
|
|
319
|
+
const beforeRegion = docText.slice(0, firstStart);
|
|
320
|
+
const afterRegion = docText.slice(lastEnd + 1);
|
|
321
|
+
const beforeToksAll = this._tokenizer.encode(beforeRegion);
|
|
322
|
+
const afterToksAll = this._tokenizer.encode(afterRegion);
|
|
323
|
+
const beforeBudget = Math.min(Math.ceil(budgetRemain / 2), beforeToksAll.length);
|
|
324
|
+
const afterBudget = Math.min(budgetRemain - beforeBudget, afterToksAll.length);
|
|
325
|
+
const beforeTail = beforeToksAll.slice(beforeToksAll.length - beforeBudget);
|
|
326
|
+
const afterHead = afterToksAll.slice(0, afterBudget);
|
|
327
|
+
sectionText =
|
|
328
|
+
this._tokenizer.decode(beforeTail) +
|
|
329
|
+
sectionText +
|
|
330
|
+
this._tokenizer.decode(afterHead);
|
|
331
|
+
sectionTokens = [...beforeTail, ...sectionTokens, ...afterHead];
|
|
332
|
+
}
|
|
291
333
|
}
|
|
292
|
-
const
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
score: section.score,
|
|
299
|
-
isBm25: false,
|
|
300
|
-
};
|
|
301
|
-
});
|
|
302
|
-
const bm25DocTextSections = bm25Sections.map(section => {
|
|
303
|
-
let text = '';
|
|
304
|
-
section.chunks.forEach(chunk => text += chunk.text);
|
|
305
|
-
return {
|
|
306
|
-
text: text,
|
|
307
|
-
tokenCount: section.tokenCount,
|
|
308
|
-
score: section.score,
|
|
309
|
-
isBm25: true,
|
|
310
|
-
};
|
|
334
|
+
const avgScore = selected.reduce((sum, c) => sum + c.score, 0) / selected.length;
|
|
335
|
+
sections.push({
|
|
336
|
+
text: sectionText,
|
|
337
|
+
tokenCount: sectionTokens.length,
|
|
338
|
+
score: avgScore,
|
|
339
|
+
isBm25,
|
|
311
340
|
});
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
341
|
+
}
|
|
342
|
+
// If maxSections=0 (or slice emptied), this is reachable:
|
|
343
|
+
if (sections.length === 0) {
|
|
344
|
+
return this.buildFallbackTopChunkSection(docText, chunks, isBm25, maxTokens);
|
|
345
|
+
}
|
|
346
|
+
return sections;
|
|
315
347
|
}
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
348
|
+
/**
|
|
349
|
+
* Renders the top spans of text (sections) of the document based on the query result.
|
|
350
|
+
*/
|
|
351
|
+
renderSections(maxTokens_1, maxSections_1) {
|
|
352
|
+
return __awaiter(this, arguments, void 0, function* (maxTokens, maxSections, overlappingChunks = true) {
|
|
353
|
+
const length = yield this.getLength();
|
|
354
|
+
if (length <= maxTokens) {
|
|
355
|
+
const text = yield this.loadText();
|
|
356
|
+
return [
|
|
357
|
+
{
|
|
358
|
+
text,
|
|
359
|
+
tokenCount: length,
|
|
360
|
+
score: 1.0,
|
|
361
|
+
isBm25: false,
|
|
362
|
+
},
|
|
363
|
+
];
|
|
364
|
+
}
|
|
365
|
+
const docText = yield this.loadText();
|
|
366
|
+
const semanticChunks = this._chunks.filter((c) => !this.isBm25Chunk(c));
|
|
367
|
+
const bm25Chunks = this._chunks.filter((c) => this.isBm25Chunk(c));
|
|
368
|
+
const semSections = this.buildSectionsFor(docText, semanticChunks, false, maxTokens, maxSections, overlappingChunks).slice(0, maxSections);
|
|
369
|
+
const bmSections = this.buildSectionsFor(docText, bm25Chunks, true, maxTokens, maxSections, overlappingChunks).slice(0, maxSections);
|
|
370
|
+
return [...semSections, ...bmSections];
|
|
371
|
+
});
|
|
325
372
|
}
|
|
326
373
|
}
|
|
327
374
|
exports.LocalDocumentResult = LocalDocumentResult;
|
|
375
|
+
LocalDocumentResult.CONNECTOR = "\n\n...\n\n";
|
|
328
376
|
//# sourceMappingURL=LocalDocumentResult.js.map
|