@mastra/rag 2.0.0-beta.2 → 2.0.0-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +58 -0
- package/dist/document/document.d.ts.map +1 -1
- package/dist/document/extractors/keywords.d.ts +2 -2
- package/dist/document/extractors/keywords.d.ts.map +1 -1
- package/dist/document/extractors/questions.d.ts +2 -2
- package/dist/document/extractors/questions.d.ts.map +1 -1
- package/dist/document/extractors/summary.d.ts.map +1 -1
- package/dist/document/extractors/title.d.ts +2 -2
- package/dist/document/extractors/title.d.ts.map +1 -1
- package/dist/document/extractors/types.d.ts +6 -6
- package/dist/document/extractors/types.d.ts.map +1 -1
- package/dist/document/transformers/html.d.ts +1 -0
- package/dist/document/transformers/html.d.ts.map +1 -1
- package/dist/index.cjs +68 -17
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +70 -19
- package/dist/index.js.map +1 -1
- package/dist/rerank/relevance/mastra-agent/index.d.ts +2 -2
- package/dist/rerank/relevance/mastra-agent/index.d.ts.map +1 -1
- package/dist/utils/vector-search.d.ts.map +1 -1
- package/package.json +3 -4
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,63 @@
|
|
|
1
1
|
# @mastra/rag
|
|
2
2
|
|
|
3
|
+
## 2.0.0-beta.4
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- Add support for AI SDK v6 (LanguageModelV3) ([#11191](https://github.com/mastra-ai/mastra/pull/11191))
|
|
8
|
+
|
|
9
|
+
Agents can now use `LanguageModelV3` models from AI SDK v6 beta providers like `@ai-sdk/openai@^3.0.0-beta`.
|
|
10
|
+
|
|
11
|
+
**New features:**
|
|
12
|
+
- Usage normalization: V3's nested usage format is normalized to Mastra's flat format with `reasoningTokens`, `cachedInputTokens`, and raw data preserved in a `raw` field
|
|
13
|
+
|
|
14
|
+
**Backward compatible:** All existing V1 and V2 models continue to work unchanged.
|
|
15
|
+
|
|
16
|
+
- Updated dependencies [[`4f94ed8`](https://github.com/mastra-ai/mastra/commit/4f94ed8177abfde3ec536e3574883e075423350c), [`ac3cc23`](https://github.com/mastra-ai/mastra/commit/ac3cc2397d1966bc0fc2736a223abc449d3c7719), [`a86f4df`](https://github.com/mastra-ai/mastra/commit/a86f4df0407311e0d2ea49b9a541f0938810d6a9), [`029540c`](https://github.com/mastra-ai/mastra/commit/029540ca1e582fc2dd8d288ecd4a9b0f31a954ef), [`66741d1`](https://github.com/mastra-ai/mastra/commit/66741d1a99c4f42cf23a16109939e8348ac6852e), [`01b20fe`](https://github.com/mastra-ai/mastra/commit/01b20fefb7c67c2b7d79417598ef4e60256d1225), [`0dbf199`](https://github.com/mastra-ai/mastra/commit/0dbf199110f22192ce5c95b1c8148d4872b4d119), [`a7ce182`](https://github.com/mastra-ai/mastra/commit/a7ce1822a8785ce45d62dd5c911af465e144f7d7)]:
|
|
17
|
+
- @mastra/core@1.0.0-beta.14
|
|
18
|
+
|
|
19
|
+
## 2.0.0-beta.3
|
|
20
|
+
|
|
21
|
+
### Patch Changes
|
|
22
|
+
|
|
23
|
+
- Add maxSize support for HTML chunking strategies ([#10654](https://github.com/mastra-ai/mastra/pull/10654))
|
|
24
|
+
|
|
25
|
+
Added support for the `maxSize` option in HTML chunking strategies (`headers` and `sections`), allowing users to control the maximum chunk size when chunking HTML documents. Previously, HTML chunks could be excessively large when sections contained substantial content.
|
|
26
|
+
|
|
27
|
+
**Changes:**
|
|
28
|
+
- Added `maxSize` support to `headers` strategy - applies `RecursiveCharacterTransformer` after header-based splitting
|
|
29
|
+
- Added `maxSize` support to `sections` strategy - applies `RecursiveCharacterTransformer` after section-based splitting
|
|
30
|
+
- Fixed `splitHtmlByHeaders` content extraction bug - changed from broken `nextElementSibling` to working `parentNode.childNodes` approach
|
|
31
|
+
- Added comprehensive test coverage including integration test with real arXiv paper
|
|
32
|
+
|
|
33
|
+
**Usage:**
|
|
34
|
+
|
|
35
|
+
```typescript
|
|
36
|
+
import { MDocument } from '@mastra/rag';
|
|
37
|
+
|
|
38
|
+
const doc = MDocument.fromHTML(htmlContent);
|
|
39
|
+
|
|
40
|
+
const chunks = await doc.chunk({
|
|
41
|
+
strategy: 'html',
|
|
42
|
+
headers: [
|
|
43
|
+
['h1', 'Header 1'],
|
|
44
|
+
['h2', 'Header 2'],
|
|
45
|
+
['h3', 'Header 3'],
|
|
46
|
+
],
|
|
47
|
+
maxSize: 512, // Control chunk size
|
|
48
|
+
overlap: 50, // Optional overlap for context
|
|
49
|
+
});
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
**Results from real arXiv paper test:**
|
|
53
|
+
- Without maxSize: 22 chunks, max 45,531 chars (too big!)
|
|
54
|
+
- With maxSize=512: 499 chunks, max 512 chars (properly sized)
|
|
55
|
+
|
|
56
|
+
Fixes #7942
|
|
57
|
+
|
|
58
|
+
- Updated dependencies [[`ac0d2f4`](https://github.com/mastra-ai/mastra/commit/ac0d2f4ff8831f72c1c66c2be809706d17f65789), [`1a0d3fc`](https://github.com/mastra-ai/mastra/commit/1a0d3fc811482c9c376cdf79ee615c23bae9b2d6), [`85a628b`](https://github.com/mastra-ai/mastra/commit/85a628b1224a8f64cd82ea7f033774bf22df7a7e), [`c237233`](https://github.com/mastra-ai/mastra/commit/c23723399ccedf7f5744b3f40997b79246bfbe64), [`15f9e21`](https://github.com/mastra-ai/mastra/commit/15f9e216177201ea6e3f6d0bfb063fcc0953444f), [`ff94dea`](https://github.com/mastra-ai/mastra/commit/ff94dea935f4e34545c63bcb6c29804732698809), [`5b2ff46`](https://github.com/mastra-ai/mastra/commit/5b2ff4651df70c146523a7fca773f8eb0a2272f8), [`db41688`](https://github.com/mastra-ai/mastra/commit/db4168806d007417e2e60b4f68656dca4e5f40c9), [`5ca599d`](https://github.com/mastra-ai/mastra/commit/5ca599d0bb59a1595f19f58473fcd67cc71cef58), [`bff1145`](https://github.com/mastra-ai/mastra/commit/bff114556b3cbadad9b2768488708f8ad0e91475), [`5c8ca24`](https://github.com/mastra-ai/mastra/commit/5c8ca247094e0cc2cdbd7137822fb47241f86e77), [`e191844`](https://github.com/mastra-ai/mastra/commit/e1918444ca3f80e82feef1dad506cd4ec6e2875f), [`22553f1`](https://github.com/mastra-ai/mastra/commit/22553f11c63ee5e966a9c034a349822249584691), [`7237163`](https://github.com/mastra-ai/mastra/commit/72371635dbf96a87df4b073cc48fc655afbdce3d), [`2500740`](https://github.com/mastra-ai/mastra/commit/2500740ea23da067d6e50ec71c625ab3ce275e64), [`873ecbb`](https://github.com/mastra-ai/mastra/commit/873ecbb517586aa17d2f1e99283755b3ebb2863f), [`4f9bbe5`](https://github.com/mastra-ai/mastra/commit/4f9bbe5968f42c86f4930b8193de3c3c17e5bd36), [`02e51fe`](https://github.com/mastra-ai/mastra/commit/02e51feddb3d4155cfbcc42624fd0d0970d032c0), [`8f3fa3a`](https://github.com/mastra-ai/mastra/commit/8f3fa3a652bb77da092f913ec51ae46e3a7e27dc), [`cd29ad2`](https://github.com/mastra-ai/mastra/commit/cd29ad23a255534e8191f249593849ed29160886), [`bdf4d8c`](https://github.com/mastra-ai/mastra/commit/bdf4d8cdc656d8a2c21d81834bfa3bfa70f56c16), [`854e3da`](https://github.com/mastra-ai/mastra/commit/854e3dad5daac17a91a20986399d3a51f54bf68b), [`ce18d38`](https://github.com/mastra-ai/mastra/commit/ce18d38678c65870350d123955014a8432075fd9), [`cccf9c8`](https://github.com/mastra-ai/mastra/commit/cccf9c8b2d2dfc1a5e63919395b83d78c89682a0), [`61a5705`](https://github.com/mastra-ai/mastra/commit/61a570551278b6743e64243b3ce7d73de915ca8a), [`db70a48`](https://github.com/mastra-ai/mastra/commit/db70a48aeeeeb8e5f92007e8ede52c364ce15287), [`f0fdc14`](https://github.com/mastra-ai/mastra/commit/f0fdc14ee233d619266b3d2bbdeea7d25cfc6d13), [`db18bc9`](https://github.com/mastra-ai/mastra/commit/db18bc9c3825e2c1a0ad9a183cc9935f6691bfa1), [`9b37b56`](https://github.com/mastra-ai/mastra/commit/9b37b565e1f2a76c24f728945cc740c2b09be9da), [`41a23c3`](https://github.com/mastra-ai/mastra/commit/41a23c32f9877d71810f37e24930515df2ff7a0f), [`5d171ad`](https://github.com/mastra-ai/mastra/commit/5d171ad9ef340387276b77c2bb3e83e83332d729), [`f03ae60`](https://github.com/mastra-ai/mastra/commit/f03ae60500fe350c9d828621006cdafe1975fdd8), [`d1e74a0`](https://github.com/mastra-ai/mastra/commit/d1e74a0a293866dece31022047f5dbab65a304d0), [`39e7869`](https://github.com/mastra-ai/mastra/commit/39e7869bc7d0ee391077ce291474d8a84eedccff), [`5761926`](https://github.com/mastra-ai/mastra/commit/57619260c4a2cdd598763abbacd90de594c6bc76), [`c900fdd`](https://github.com/mastra-ai/mastra/commit/c900fdd504c41348efdffb205cfe80d48c38fa33), [`604a79f`](https://github.com/mastra-ai/mastra/commit/604a79fecf276e26a54a3fe01bb94e65315d2e0e), [`887f0b4`](https://github.com/mastra-ai/mastra/commit/887f0b4746cdbd7cb7d6b17ac9f82aeb58037ea5), [`2562143`](https://github.com/mastra-ai/mastra/commit/256214336b4faa78646c9c1776612393790d8784), [`ef11a61`](https://github.com/mastra-ai/mastra/commit/ef11a61920fa0ed08a5b7ceedd192875af119749)]:
|
|
59
|
+
- @mastra/core@1.0.0-beta.6
|
|
60
|
+
|
|
3
61
|
## 2.0.0-beta.2
|
|
4
62
|
|
|
5
63
|
### Patch Changes
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"document.d.ts","sourceRoot":"","sources":["../../src/document/document.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,QAAQ,IAAI,KAAK,EAAgC,MAAM,UAAU,CAAC;AAU3E,OAAO,KAAK,EACV,WAAW,EAEX,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,oBAAoB,EACpB,4BAA4B,EAC5B,gBAAgB,EAChB,iBAAiB,EACjB,oBAAoB,EAErB,MAAM,SAAS,CAAC;AAGjB,qBAAa,SAAS;IACpB,OAAO,CAAC,MAAM,CAAU;IACxB,OAAO,CAAC,IAAI,CAAS;gBAET,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;SAAE,EAAE,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE;IAOhG,eAAe,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,EAAE,aAAa,GAAG,OAAO,CAAC,SAAS,CAAC;IAmDjG,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYhF,MAAM,CAAC,QAAQ,CAAC,UAAU,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAY9E,OAAO,CAAC,eAAe;IAevB,OAAO,CAAC,YAAY,CAAC,CAA4E;IAEjG,OAAO,KAAK,WAAW,GAetB;YAEa,OAAO;IASf,cAAc,CAAC,OAAO,CAAC,EAAE,qBAAqB,GAAG,OAAO,CAAC,IAAI,CAAC;IAa9D,cAAc,CAAC,OAAO,CAAC,EAAE,qBAAqB,GAAG,OAAO,CAAC,IAAI,CAAC;IAU9D,SAAS,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;
|
|
1
|
+
{"version":3,"file":"document.d.ts","sourceRoot":"","sources":["../../src/document/document.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,QAAQ,IAAI,KAAK,EAAgC,MAAM,UAAU,CAAC;AAU3E,OAAO,KAAK,EACV,WAAW,EAEX,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,oBAAoB,EACpB,4BAA4B,EAC5B,gBAAgB,EAChB,iBAAiB,EACjB,oBAAoB,EAErB,MAAM,SAAS,CAAC;AAGjB,qBAAa,SAAS;IACpB,OAAO,CAAC,MAAM,CAAU;IACxB,OAAO,CAAC,IAAI,CAAS;gBAET,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;SAAE,EAAE,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE;IAOhG,eAAe,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,EAAE,aAAa,GAAG,OAAO,CAAC,SAAS,CAAC;IAmDjG,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYhF,MAAM,CAAC,QAAQ,CAAC,UAAU,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAY9E,OAAO,CAAC,eAAe;IAevB,OAAO,CAAC,YAAY,CAAC,CAA4E;IAEjG,OAAO,KAAK,WAAW,GAetB;YAEa,OAAO;IASf,cAAc,CAAC,OAAO,CAAC,EAAE,qBAAqB,GAAG,OAAO,CAAC,IAAI,CAAC;IAa9D,cAAc,CAAC,OAAO,CAAC,EAAE,qBAAqB,GAAG,OAAO,CAAC,IAAI,CAAC;IAU9D,SAAS,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IA8CpD,SAAS,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAmBpD,UAAU,CAAC,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,IAAI,CAAC;IAMtD,UAAU,CAAC,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,IAAI,CAAC;IAUtD,aAAa,CAAC,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC;IAa5D,aAAa,CAAC,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC;IAuB5D,qBAAqB,CAAC,OAAO,CAAC,EAAE,4BAA4B,GAAG,OAAO,CAAC,IAAI,CAAC;IAU5E,KAAK,CAAC,MAAM,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAiBnD,OAAO,IAAI,KAAK,EAAE;IAIlB,OAAO,IAAI,MAAM,EAAE;IAInB,WAAW,IAAI,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE;CAGrC"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { MastraLanguageModel } from '@mastra/core/agent';
|
|
1
|
+
import type { MastraLanguageModel, MastraLegacyLanguageModel } from '@mastra/core/agent';
|
|
2
2
|
import type { KeywordExtractPrompt } from '../prompts/index.js';
|
|
3
3
|
import type { BaseNode } from '../schema/index.js';
|
|
4
4
|
import { BaseExtractor } from './base.js';
|
|
@@ -13,7 +13,7 @@ type ExtractKeyword = {
|
|
|
13
13
|
* Extract keywords from a list of nodes.
|
|
14
14
|
*/
|
|
15
15
|
export declare class KeywordExtractor extends BaseExtractor {
|
|
16
|
-
llm: MastraLanguageModel;
|
|
16
|
+
llm: MastraLanguageModel | MastraLegacyLanguageModel;
|
|
17
17
|
keywords: number;
|
|
18
18
|
promptTemplate: KeywordExtractPrompt;
|
|
19
19
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"keywords.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/keywords.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;
|
|
1
|
+
{"version":3,"file":"keywords.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/keywords.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAEzF,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAC;AACvD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAE1C,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AAEvC,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,SAAS,CAAC;AAElD,KAAK,cAAc,GAAG;IACpB;;OAEG;IACH,eAAe,EAAE,MAAM,CAAC;CACzB,CAAC;AAEF;;GAEG;AACH,qBAAa,gBAAiB,SAAQ,aAAa;IACjD,GAAG,EAAE,mBAAmB,GAAG,yBAAyB,CAAC;IACrD,QAAQ,EAAE,MAAM,CAAK;IACrB,cAAc,EAAE,oBAAoB,CAAC;IAErC;;;;;;OAMG;gBACS,OAAO,CAAC,EAAE,kBAAkB;IAexC;;;;OAIG;IACH;;;OAGG;IACG,wBAAwB,CAAC,IAAI,EAAE,QAAQ,GAAG,OAAO,CAAC,cAAc,CAAC;IAoDvE;;;;OAIG;IACH;;;;OAIG;IACG,OAAO,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;CAKjE"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { MastraLanguageModel } from '@mastra/core/agent';
|
|
1
|
+
import type { MastraLanguageModel, MastraLegacyLanguageModel } from '@mastra/core/agent';
|
|
2
2
|
import type { QuestionExtractPrompt } from '../prompts/index.js';
|
|
3
3
|
import type { BaseNode } from '../schema/index.js';
|
|
4
4
|
import { BaseExtractor } from './base.js';
|
|
@@ -13,7 +13,7 @@ type ExtractQuestion = {
|
|
|
13
13
|
* Extract questions from a list of nodes.
|
|
14
14
|
*/
|
|
15
15
|
export declare class QuestionsAnsweredExtractor extends BaseExtractor {
|
|
16
|
-
llm: MastraLanguageModel;
|
|
16
|
+
llm: MastraLanguageModel | MastraLegacyLanguageModel;
|
|
17
17
|
questions: number;
|
|
18
18
|
promptTemplate: QuestionExtractPrompt;
|
|
19
19
|
embeddingOnly: boolean;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"questions.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/questions.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;
|
|
1
|
+
{"version":3,"file":"questions.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/questions.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAEzF,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,YAAY,CAAC;AACxD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAE1C,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AAEvC,OAAO,KAAK,EAAE,yBAAyB,EAAE,MAAM,SAAS,CAAC;AAEzD,KAAK,eAAe,GAAG;IACrB;;OAEG;IACH,6BAA6B,EAAE,MAAM,CAAC;CACvC,CAAC;AAEF;;GAEG;AACH,qBAAa,0BAA2B,SAAQ,aAAa;IAC3D,GAAG,EAAE,mBAAmB,GAAG,yBAAyB,CAAC;IACrD,SAAS,EAAE,MAAM,CAAK;IACtB,cAAc,EAAE,qBAAqB,CAAC;IACtC,aAAa,EAAE,OAAO,CAAS;IAE/B;;;;;;OAMG;gBACS,OAAO,CAAC,EAAE,yBAAyB;IAkB/C;;;;OAIG;IACG,wBAAwB,CAAC,IAAI,EAAE,QAAQ,GAAG,OAAO,CAAC,eAAe,CAAC;IA6CxE;;;;OAIG;IACG,OAAO,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,KAAK,CAAC,eAAe,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;CAKlF"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"summary.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/summary.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAE1C,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AAEvC,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,SAAS,CAAC;AAElD,KAAK,cAAc,GAAG;IACpB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B,CAAC;AAEF;;;;;;GAMG;AACH,qBAAa,gBAAiB,SAAQ,aAAa;IACjD,OAAO,CAAC,GAAG,
|
|
1
|
+
{"version":3,"file":"summary.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/summary.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAE1C,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AAEvC,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,SAAS,CAAC;AAElD,KAAK,cAAc,GAAG;IACpB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B,CAAC;AAEF;;;;;;GAMG;AACH,qBAAa,gBAAiB,SAAQ,aAAa;IACjD,OAAO,CAAC,GAAG,CAAkD;IAC7D,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,cAAc,EAAE,aAAa,CAAC;IAC9B,OAAO,CAAC,WAAW,CAAU;IAC7B,OAAO,CAAC,WAAW,CAAU;IAC7B,OAAO,CAAC,WAAW,CAAU;gBACjB,OAAO,CAAC,EAAE,kBAAkB;IAsBxC;;;;OAIG;IACG,mBAAmB,CAAC,IAAI,EAAE,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC;IAuC1D;;;;OAIG;IACG,OAAO,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;CAqB5D"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { MastraLanguageModel } from '@mastra/core/agent';
|
|
1
|
+
import type { MastraLanguageModel, MastraLegacyLanguageModel } from '@mastra/core/agent';
|
|
2
2
|
import type { TitleCombinePrompt, TitleExtractorPrompt } from '../prompts/index.js';
|
|
3
3
|
import type { BaseNode } from '../schema/index.js';
|
|
4
4
|
import { BaseExtractor } from './base.js';
|
|
@@ -10,7 +10,7 @@ type ExtractTitle = {
|
|
|
10
10
|
* Extract title from a list of nodes.
|
|
11
11
|
*/
|
|
12
12
|
export declare class TitleExtractor extends BaseExtractor {
|
|
13
|
-
llm: MastraLanguageModel;
|
|
13
|
+
llm: MastraLegacyLanguageModel | MastraLanguageModel;
|
|
14
14
|
isTextNodeOnly: boolean;
|
|
15
15
|
nodes: number;
|
|
16
16
|
nodeTemplate: TitleExtractorPrompt;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"title.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/title.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;
|
|
1
|
+
{"version":3,"file":"title.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/title.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAEzF,OAAO,KAAK,EAAE,kBAAkB,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAC;AAE3E,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AAEvC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAEnD,KAAK,YAAY,GAAG;IAClB,aAAa,EAAE,MAAM,CAAC;CACvB,CAAC;AAEF;;GAEG;AACH,qBAAa,cAAe,SAAQ,aAAa;IAC/C,GAAG,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACrD,cAAc,EAAE,OAAO,CAAS;IAChC,KAAK,EAAE,MAAM,CAAK;IAClB,YAAY,EAAE,oBAAoB,CAAC;IACnC,eAAe,EAAE,kBAAkB,CAAC;gBAExB,OAAO,CAAC,EAAE,mBAAmB;IAqBzC;;;;OAIG;IACG,OAAO,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;IAoC9D,OAAO,CAAC,WAAW;IASnB,OAAO,CAAC,uBAAuB;YAYjB,aAAa;YA6Cb,mBAAmB;CAiClC"}
|
|
@@ -1,27 +1,27 @@
|
|
|
1
|
-
import type { MastraLanguageModel } from '@mastra/core/agent';
|
|
1
|
+
import type { MastraLanguageModel, MastraLegacyLanguageModel } from '@mastra/core/agent';
|
|
2
2
|
import type { KeywordExtractPrompt, QuestionExtractPrompt, SummaryPrompt, TitleExtractorPrompt, TitleCombinePrompt } from '../prompts/index.js';
|
|
3
3
|
export type KeywordExtractArgs = {
|
|
4
|
-
llm?: MastraLanguageModel;
|
|
4
|
+
llm?: MastraLegacyLanguageModel | MastraLanguageModel;
|
|
5
5
|
keywords?: number;
|
|
6
6
|
promptTemplate?: KeywordExtractPrompt['template'];
|
|
7
7
|
};
|
|
8
8
|
export type QuestionAnswerExtractArgs = {
|
|
9
|
-
llm?: MastraLanguageModel;
|
|
9
|
+
llm?: MastraLegacyLanguageModel | MastraLanguageModel;
|
|
10
10
|
questions?: number;
|
|
11
11
|
promptTemplate?: QuestionExtractPrompt['template'];
|
|
12
12
|
embeddingOnly?: boolean;
|
|
13
13
|
};
|
|
14
14
|
export type SummaryExtractArgs = {
|
|
15
|
-
llm?: MastraLanguageModel;
|
|
15
|
+
llm?: MastraLegacyLanguageModel | MastraLanguageModel;
|
|
16
16
|
summaries?: string[];
|
|
17
17
|
promptTemplate?: SummaryPrompt['template'];
|
|
18
18
|
};
|
|
19
19
|
export type TitleExtractorsArgs = {
|
|
20
|
-
llm?: MastraLanguageModel;
|
|
20
|
+
llm?: MastraLegacyLanguageModel | MastraLanguageModel;
|
|
21
21
|
nodes?: number;
|
|
22
22
|
nodeTemplate?: TitleExtractorPrompt['template'];
|
|
23
23
|
combineTemplate?: TitleCombinePrompt['template'];
|
|
24
24
|
};
|
|
25
25
|
export declare const STRIP_REGEX: RegExp;
|
|
26
|
-
export declare const baseLLM: MastraLanguageModel;
|
|
26
|
+
export declare const baseLLM: MastraLegacyLanguageModel | MastraLanguageModel;
|
|
27
27
|
//# sourceMappingURL=types.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/types.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/types.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AACzF,OAAO,KAAK,EACV,oBAAoB,EACpB,qBAAqB,EACrB,aAAa,EACb,oBAAoB,EACpB,kBAAkB,EACnB,MAAM,YAAY,CAAC;AAEpB,MAAM,MAAM,kBAAkB,GAAG;IAC/B,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,cAAc,CAAC,EAAE,oBAAoB,CAAC,UAAU,CAAC,CAAC;CACnD,CAAC;AAEF,MAAM,MAAM,yBAAyB,GAAG;IACtC,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,cAAc,CAAC,EAAE,qBAAqB,CAAC,UAAU,CAAC,CAAC;IACnD,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG;IAC/B,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,cAAc,CAAC,EAAE,aAAa,CAAC,UAAU,CAAC,CAAC;CAC5C,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG;IAChC,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,YAAY,CAAC,EAAE,oBAAoB,CAAC,UAAU,CAAC,CAAC;IAChD,eAAe,CAAC,EAAE,kBAAkB,CAAC,UAAU,CAAC,CAAC;CAClD,CAAC;AAEF,eAAO,MAAM,WAAW,QAAmB,CAAC;AAG5C,eAAO,MAAM,OAAO,EAAE,yBAAyB,GAAG,mBAAsC,CAAC"}
|
|
@@ -23,6 +23,7 @@ export declare class HTMLSectionTransformer {
|
|
|
23
23
|
});
|
|
24
24
|
splitText(text: string): Document[];
|
|
25
25
|
private getXPath;
|
|
26
|
+
private getTextContent;
|
|
26
27
|
private splitHtmlByHeaders;
|
|
27
28
|
splitDocuments(documents: Document[]): Promise<Document[]>;
|
|
28
29
|
createDocuments(texts: string[], metadatas?: Record<string, any>[]): Document[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"html.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/html.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AACrC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAWjD,qBAAa,qBAAqB;IAChC,OAAO,CAAC,gBAAgB,CAAqB;IAC7C,OAAO,CAAC,iBAAiB,CAAU;gBAEvB,OAAO,EAAE,gBAAgB,GAAG;QAAE,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAA;KAAE;IAKvE,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,QAAQ,EAAE;IAwDjD,OAAO,CAAC,QAAQ;IA2BhB,OAAO,CAAC,cAAc;IAuBtB,OAAO,CAAC,yBAAyB;IAyBjC,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA8B/E,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD;AAED,qBAAa,sBAAsB;IACjC,OAAO,CAAC,gBAAgB,CAAyB;IACjD,OAAO,CAAC,YAAY,CAAgC;gBAExC,OAAO,EAAE,gBAAgB,GAAG;QAAE,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAA;KAAE;IAKxE,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,QAAQ,EAAE;IAenC,OAAO,CAAC,QAAQ;IAwBhB,OAAO,CAAC,kBAAkB;
|
|
1
|
+
{"version":3,"file":"html.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/html.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AACrC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAWjD,qBAAa,qBAAqB;IAChC,OAAO,CAAC,gBAAgB,CAAqB;IAC7C,OAAO,CAAC,iBAAiB,CAAU;gBAEvB,OAAO,EAAE,gBAAgB,GAAG;QAAE,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAA;KAAE;IAKvE,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,QAAQ,EAAE;IAwDjD,OAAO,CAAC,QAAQ;IA2BhB,OAAO,CAAC,cAAc;IAuBtB,OAAO,CAAC,yBAAyB;IAyBjC,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA8B/E,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD;AAED,qBAAa,sBAAsB;IACjC,OAAO,CAAC,gBAAgB,CAAyB;IACjD,OAAO,CAAC,YAAY,CAAgC;gBAExC,OAAO,EAAE,gBAAgB,GAAG;QAAE,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAA;KAAE;IAKxE,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,QAAQ,EAAE;IAenC,OAAO,CAAC,QAAQ;IAwBhB,OAAO,CAAC,cAAc;IAuBtB,OAAO,CAAC,kBAAkB;IA6DpB,cAAc,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAahE,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA+B/E,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD"}
|
package/dist/index.cjs
CHANGED
|
@@ -272,7 +272,7 @@ Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'
|
|
|
272
272
|
var defaultQuestionExtractPrompt = new PromptTemplate({
|
|
273
273
|
templateVars: ["numQuestions", "context"],
|
|
274
274
|
template: `(
|
|
275
|
-
"Given the contextual
|
|
275
|
+
"Given the contextual information below, generate {numQuestions} questions this context can provide specific answers to which are unlikely to be found elsewhere. Higher-level summaries of surrounding context may be provided as well. "
|
|
276
276
|
"Try using these summaries to generate better questions that this context can answer."
|
|
277
277
|
"---------------------"
|
|
278
278
|
"{context}"
|
|
@@ -4141,7 +4141,7 @@ var TitleExtractor = class extends BaseExtractor {
|
|
|
4141
4141
|
const titleCandidates = await this.getTitlesCandidates(nodes);
|
|
4142
4142
|
const combinedTitles = titleCandidates.join(", ");
|
|
4143
4143
|
let title = "";
|
|
4144
|
-
if (this.llm
|
|
4144
|
+
if (agent.isSupportedLanguageModel(this.llm)) {
|
|
4145
4145
|
const miniAgent = new agent.Agent({
|
|
4146
4146
|
id: "title-extractor",
|
|
4147
4147
|
model: this.llm,
|
|
@@ -4180,7 +4180,7 @@ var TitleExtractor = class extends BaseExtractor {
|
|
|
4180
4180
|
});
|
|
4181
4181
|
const titleJobs = nodes.map(async (node) => {
|
|
4182
4182
|
let completion;
|
|
4183
|
-
if (this.llm
|
|
4183
|
+
if (agent.isSupportedLanguageModel(this.llm)) {
|
|
4184
4184
|
const result = await miniAgent.generate([
|
|
4185
4185
|
{ role: "user", content: this.nodeTemplate.format({ context: node.getContent() }) }
|
|
4186
4186
|
]);
|
|
@@ -4246,7 +4246,7 @@ var SummaryExtractor = class extends BaseExtractor {
|
|
|
4246
4246
|
instructions: "You are a summary extractor. You are given a node and you need to extract the summary from the node."
|
|
4247
4247
|
});
|
|
4248
4248
|
let summary = "";
|
|
4249
|
-
if (this.llm
|
|
4249
|
+
if (agent.isSupportedLanguageModel(this.llm)) {
|
|
4250
4250
|
const result = await miniAgent.generate([{ role: "user", content: prompt }]);
|
|
4251
4251
|
summary = result.text;
|
|
4252
4252
|
} else {
|
|
@@ -4332,7 +4332,7 @@ var QuestionsAnsweredExtractor = class extends BaseExtractor {
|
|
|
4332
4332
|
instructions: "You are a question extractor. You are given a node and you need to extract the questions from the node."
|
|
4333
4333
|
});
|
|
4334
4334
|
let questionsText = "";
|
|
4335
|
-
if (this.llm
|
|
4335
|
+
if (agent.isSupportedLanguageModel(this.llm)) {
|
|
4336
4336
|
const result2 = await miniAgent.generate([{ role: "user", content: prompt }]);
|
|
4337
4337
|
questionsText = result2.text;
|
|
4338
4338
|
} else {
|
|
@@ -4404,7 +4404,7 @@ var KeywordExtractor = class extends BaseExtractor {
|
|
|
4404
4404
|
name: "keyword-extractor",
|
|
4405
4405
|
instructions: "You are a keyword extractor. You are given a node and you need to extract the keywords from the node."
|
|
4406
4406
|
});
|
|
4407
|
-
if (this.llm
|
|
4407
|
+
if (agent.isSupportedLanguageModel(this.llm)) {
|
|
4408
4408
|
const result = await miniAgent.generate([
|
|
4409
4409
|
{
|
|
4410
4410
|
role: "user",
|
|
@@ -4999,23 +4999,47 @@ var HTMLSectionTransformer = class {
|
|
|
4999
4999
|
}
|
|
5000
5000
|
return "/" + parts.join("/");
|
|
5001
5001
|
}
|
|
5002
|
+
getTextContent(element) {
|
|
5003
|
+
if (!element) return "";
|
|
5004
|
+
if (!element.tagName) {
|
|
5005
|
+
return element.text || "";
|
|
5006
|
+
}
|
|
5007
|
+
let content = element.text || "";
|
|
5008
|
+
if (element.childNodes) {
|
|
5009
|
+
for (const child of element.childNodes) {
|
|
5010
|
+
const childText = this.getTextContent(child);
|
|
5011
|
+
if (childText) {
|
|
5012
|
+
content += " " + childText;
|
|
5013
|
+
}
|
|
5014
|
+
}
|
|
5015
|
+
}
|
|
5016
|
+
return content.trim();
|
|
5017
|
+
}
|
|
5002
5018
|
splitHtmlByHeaders(htmlDoc) {
|
|
5003
5019
|
const sections = [];
|
|
5004
5020
|
const root = nodeHtmlBetterParser.parse(htmlDoc);
|
|
5005
5021
|
const headers = Object.keys(this.headersToSplitOn);
|
|
5006
5022
|
const headerElements = root.querySelectorAll(headers.join(","));
|
|
5007
|
-
headerElements.forEach((headerElement
|
|
5023
|
+
headerElements.forEach((headerElement) => {
|
|
5008
5024
|
const header = headerElement.text?.trim() || "";
|
|
5009
5025
|
const tagName = headerElement.tagName;
|
|
5010
5026
|
const xpath = this.getXPath(headerElement);
|
|
5011
5027
|
let content = "";
|
|
5012
|
-
|
|
5013
|
-
|
|
5014
|
-
|
|
5015
|
-
|
|
5016
|
-
|
|
5028
|
+
const parentNode = headerElement.parentNode;
|
|
5029
|
+
if (parentNode && parentNode.childNodes) {
|
|
5030
|
+
let foundHeader = false;
|
|
5031
|
+
for (const node of parentNode.childNodes) {
|
|
5032
|
+
if (node === headerElement) {
|
|
5033
|
+
foundHeader = true;
|
|
5034
|
+
continue;
|
|
5035
|
+
}
|
|
5036
|
+
if (foundHeader && node.tagName && headers.includes(node.tagName.toLowerCase())) {
|
|
5037
|
+
break;
|
|
5038
|
+
}
|
|
5039
|
+
if (foundHeader) {
|
|
5040
|
+
content += this.getTextContent(node) + " ";
|
|
5041
|
+
}
|
|
5017
5042
|
}
|
|
5018
|
-
currentElement = currentElement.nextElementSibling;
|
|
5019
5043
|
}
|
|
5020
5044
|
content = content.trim();
|
|
5021
5045
|
sections.push({
|
|
@@ -6392,13 +6416,33 @@ var MDocument = class _MDocument {
|
|
|
6392
6416
|
async chunkHTML(options) {
|
|
6393
6417
|
if (options?.headers?.length) {
|
|
6394
6418
|
const rt = new HTMLHeaderTransformer(options);
|
|
6395
|
-
|
|
6419
|
+
let textSplit = rt.transformDocuments(this.chunks);
|
|
6420
|
+
if (options?.maxSize) {
|
|
6421
|
+
const textSplitter = new RecursiveCharacterTransformer({
|
|
6422
|
+
maxSize: options.maxSize,
|
|
6423
|
+
overlap: options.overlap,
|
|
6424
|
+
keepSeparator: options.keepSeparator,
|
|
6425
|
+
addStartIndex: options.addStartIndex,
|
|
6426
|
+
stripWhitespace: options.stripWhitespace
|
|
6427
|
+
});
|
|
6428
|
+
textSplit = textSplitter.splitDocuments(textSplit);
|
|
6429
|
+
}
|
|
6396
6430
|
this.chunks = textSplit;
|
|
6397
6431
|
return;
|
|
6398
6432
|
}
|
|
6399
6433
|
if (options?.sections?.length) {
|
|
6400
6434
|
const rt = new HTMLSectionTransformer(options);
|
|
6401
|
-
|
|
6435
|
+
let textSplit = rt.transformDocuments(this.chunks);
|
|
6436
|
+
if (options?.maxSize) {
|
|
6437
|
+
const textSplitter = new RecursiveCharacterTransformer({
|
|
6438
|
+
maxSize: options.maxSize,
|
|
6439
|
+
overlap: options.overlap,
|
|
6440
|
+
keepSeparator: options.keepSeparator,
|
|
6441
|
+
addStartIndex: options.addStartIndex,
|
|
6442
|
+
stripWhitespace: options.stripWhitespace
|
|
6443
|
+
});
|
|
6444
|
+
textSplit = textSplitter.splitDocuments(textSplit);
|
|
6445
|
+
}
|
|
6402
6446
|
this.chunks = textSplit;
|
|
6403
6447
|
return;
|
|
6404
6448
|
}
|
|
@@ -6550,7 +6594,7 @@ Always return just the number, no explanation.`,
|
|
|
6550
6594
|
const prompt = relevance.createSimilarityPrompt(query, text);
|
|
6551
6595
|
const model = await this.agent.getModel();
|
|
6552
6596
|
let response;
|
|
6553
|
-
if (model
|
|
6597
|
+
if (agent.isSupportedLanguageModel(model)) {
|
|
6554
6598
|
response = await this.agent.generate(prompt);
|
|
6555
6599
|
} else {
|
|
6556
6600
|
response = await this.agent.generateLegacy(prompt);
|
|
@@ -6936,7 +6980,14 @@ var vectorQuerySearch = async ({
|
|
|
6936
6980
|
providerOptions
|
|
6937
6981
|
}) => {
|
|
6938
6982
|
let embeddingResult;
|
|
6939
|
-
if (model.specificationVersion === "
|
|
6983
|
+
if (model.specificationVersion === "v3") {
|
|
6984
|
+
embeddingResult = await vector.embedV3({
|
|
6985
|
+
model,
|
|
6986
|
+
value: queryText,
|
|
6987
|
+
maxRetries,
|
|
6988
|
+
...providerOptions && { providerOptions }
|
|
6989
|
+
});
|
|
6990
|
+
} else if (model.specificationVersion === "v2") {
|
|
6940
6991
|
embeddingResult = await vector.embedV2({
|
|
6941
6992
|
model,
|
|
6942
6993
|
value: queryText,
|