@mastra/rag 2.0.0-beta.2 → 2.0.0-beta.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,63 @@
1
1
  # @mastra/rag
2
2
 
3
+ ## 2.0.0-beta.4
4
+
5
+ ### Patch Changes
6
+
7
+ - Add support for AI SDK v6 (LanguageModelV3) ([#11191](https://github.com/mastra-ai/mastra/pull/11191))
8
+
9
+ Agents can now use `LanguageModelV3` models from AI SDK v6 beta providers like `@ai-sdk/openai@^3.0.0-beta`.
10
+
11
+ **New features:**
12
+ - Usage normalization: V3's nested usage format is normalized to Mastra's flat format with `reasoningTokens`, `cachedInputTokens`, and raw data preserved in a `raw` field
13
+
14
+ **Backward compatible:** All existing V1 and V2 models continue to work unchanged.
15
+
16
+ - Updated dependencies [[`4f94ed8`](https://github.com/mastra-ai/mastra/commit/4f94ed8177abfde3ec536e3574883e075423350c), [`ac3cc23`](https://github.com/mastra-ai/mastra/commit/ac3cc2397d1966bc0fc2736a223abc449d3c7719), [`a86f4df`](https://github.com/mastra-ai/mastra/commit/a86f4df0407311e0d2ea49b9a541f0938810d6a9), [`029540c`](https://github.com/mastra-ai/mastra/commit/029540ca1e582fc2dd8d288ecd4a9b0f31a954ef), [`66741d1`](https://github.com/mastra-ai/mastra/commit/66741d1a99c4f42cf23a16109939e8348ac6852e), [`01b20fe`](https://github.com/mastra-ai/mastra/commit/01b20fefb7c67c2b7d79417598ef4e60256d1225), [`0dbf199`](https://github.com/mastra-ai/mastra/commit/0dbf199110f22192ce5c95b1c8148d4872b4d119), [`a7ce182`](https://github.com/mastra-ai/mastra/commit/a7ce1822a8785ce45d62dd5c911af465e144f7d7)]:
17
+ - @mastra/core@1.0.0-beta.14
18
+
19
+ ## 2.0.0-beta.3
20
+
21
+ ### Patch Changes
22
+
23
+ - Add maxSize support for HTML chunking strategies ([#10654](https://github.com/mastra-ai/mastra/pull/10654))
24
+
25
+ Added support for the `maxSize` option in HTML chunking strategies (`headers` and `sections`), allowing users to control the maximum chunk size when chunking HTML documents. Previously, HTML chunks could be excessively large when sections contained substantial content.
26
+
27
+ **Changes:**
28
+ - Added `maxSize` support to `headers` strategy - applies `RecursiveCharacterTransformer` after header-based splitting
29
+ - Added `maxSize` support to `sections` strategy - applies `RecursiveCharacterTransformer` after section-based splitting
30
+ - Fixed `splitHtmlByHeaders` content extraction bug - changed from broken `nextElementSibling` to working `parentNode.childNodes` approach
31
+ - Added comprehensive test coverage including integration test with real arXiv paper
32
+
33
+ **Usage:**
34
+
35
+ ```typescript
36
+ import { MDocument } from '@mastra/rag';
37
+
38
+ const doc = MDocument.fromHTML(htmlContent);
39
+
40
+ const chunks = await doc.chunk({
41
+ strategy: 'html',
42
+ headers: [
43
+ ['h1', 'Header 1'],
44
+ ['h2', 'Header 2'],
45
+ ['h3', 'Header 3'],
46
+ ],
47
+ maxSize: 512, // Control chunk size
48
+ overlap: 50, // Optional overlap for context
49
+ });
50
+ ```
51
+
52
+ **Results from real arXiv paper test:**
53
+ - Without maxSize: 22 chunks, max 45,531 chars (too big!)
54
+ - With maxSize=512: 499 chunks, max 512 chars (properly sized)
55
+
56
+ Fixes #7942
57
+
58
+ - Updated dependencies [[`ac0d2f4`](https://github.com/mastra-ai/mastra/commit/ac0d2f4ff8831f72c1c66c2be809706d17f65789), [`1a0d3fc`](https://github.com/mastra-ai/mastra/commit/1a0d3fc811482c9c376cdf79ee615c23bae9b2d6), [`85a628b`](https://github.com/mastra-ai/mastra/commit/85a628b1224a8f64cd82ea7f033774bf22df7a7e), [`c237233`](https://github.com/mastra-ai/mastra/commit/c23723399ccedf7f5744b3f40997b79246bfbe64), [`15f9e21`](https://github.com/mastra-ai/mastra/commit/15f9e216177201ea6e3f6d0bfb063fcc0953444f), [`ff94dea`](https://github.com/mastra-ai/mastra/commit/ff94dea935f4e34545c63bcb6c29804732698809), [`5b2ff46`](https://github.com/mastra-ai/mastra/commit/5b2ff4651df70c146523a7fca773f8eb0a2272f8), [`db41688`](https://github.com/mastra-ai/mastra/commit/db4168806d007417e2e60b4f68656dca4e5f40c9), [`5ca599d`](https://github.com/mastra-ai/mastra/commit/5ca599d0bb59a1595f19f58473fcd67cc71cef58), [`bff1145`](https://github.com/mastra-ai/mastra/commit/bff114556b3cbadad9b2768488708f8ad0e91475), [`5c8ca24`](https://github.com/mastra-ai/mastra/commit/5c8ca247094e0cc2cdbd7137822fb47241f86e77), [`e191844`](https://github.com/mastra-ai/mastra/commit/e1918444ca3f80e82feef1dad506cd4ec6e2875f), [`22553f1`](https://github.com/mastra-ai/mastra/commit/22553f11c63ee5e966a9c034a349822249584691), [`7237163`](https://github.com/mastra-ai/mastra/commit/72371635dbf96a87df4b073cc48fc655afbdce3d), [`2500740`](https://github.com/mastra-ai/mastra/commit/2500740ea23da067d6e50ec71c625ab3ce275e64), [`873ecbb`](https://github.com/mastra-ai/mastra/commit/873ecbb517586aa17d2f1e99283755b3ebb2863f), [`4f9bbe5`](https://github.com/mastra-ai/mastra/commit/4f9bbe5968f42c86f4930b8193de3c3c17e5bd36), [`02e51fe`](https://github.com/mastra-ai/mastra/commit/02e51feddb3d4155cfbcc42624fd0d0970d032c0), [`8f3fa3a`](https://github.com/mastra-ai/mastra/commit/8f3fa3a652bb77da092f913ec51ae46e3a7e27dc), [`cd29ad2`](https://github.com/mastra-ai/mastra/commit/cd29ad23a255534e8191f249593849ed29160886), [`bdf4d8c`](https://github.com/mastra-ai/mastra/commit/bdf4d8cdc656d8a2c21d81834bfa3bfa70f56c16), [`854e3da`](https://github.com/mastra-ai/mastra/commit/854e3dad5daac17a91a20986399d3a51f54bf68b), [`ce18d38`](https://github.com/mastra-ai/mastra/commit/ce18d38678c65870350d123955014a8432075fd9), [`cccf9c8`](https://github.com/mastra-ai/mastra/commit/cccf9c8b2d2dfc1a5e63919395b83d78c89682a0), [`61a5705`](https://github.com/mastra-ai/mastra/commit/61a570551278b6743e64243b3ce7d73de915ca8a), [`db70a48`](https://github.com/mastra-ai/mastra/commit/db70a48aeeeeb8e5f92007e8ede52c364ce15287), [`f0fdc14`](https://github.com/mastra-ai/mastra/commit/f0fdc14ee233d619266b3d2bbdeea7d25cfc6d13), [`db18bc9`](https://github.com/mastra-ai/mastra/commit/db18bc9c3825e2c1a0ad9a183cc9935f6691bfa1), [`9b37b56`](https://github.com/mastra-ai/mastra/commit/9b37b565e1f2a76c24f728945cc740c2b09be9da), [`41a23c3`](https://github.com/mastra-ai/mastra/commit/41a23c32f9877d71810f37e24930515df2ff7a0f), [`5d171ad`](https://github.com/mastra-ai/mastra/commit/5d171ad9ef340387276b77c2bb3e83e83332d729), [`f03ae60`](https://github.com/mastra-ai/mastra/commit/f03ae60500fe350c9d828621006cdafe1975fdd8), [`d1e74a0`](https://github.com/mastra-ai/mastra/commit/d1e74a0a293866dece31022047f5dbab65a304d0), [`39e7869`](https://github.com/mastra-ai/mastra/commit/39e7869bc7d0ee391077ce291474d8a84eedccff), [`5761926`](https://github.com/mastra-ai/mastra/commit/57619260c4a2cdd598763abbacd90de594c6bc76), [`c900fdd`](https://github.com/mastra-ai/mastra/commit/c900fdd504c41348efdffb205cfe80d48c38fa33), [`604a79f`](https://github.com/mastra-ai/mastra/commit/604a79fecf276e26a54a3fe01bb94e65315d2e0e), [`887f0b4`](https://github.com/mastra-ai/mastra/commit/887f0b4746cdbd7cb7d6b17ac9f82aeb58037ea5), [`2562143`](https://github.com/mastra-ai/mastra/commit/256214336b4faa78646c9c1776612393790d8784), [`ef11a61`](https://github.com/mastra-ai/mastra/commit/ef11a61920fa0ed08a5b7ceedd192875af119749)]:
59
+ - @mastra/core@1.0.0-beta.6
60
+
3
61
  ## 2.0.0-beta.2
4
62
 
5
63
  ### Patch Changes
@@ -1 +1 @@
1
- {"version":3,"file":"document.d.ts","sourceRoot":"","sources":["../../src/document/document.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,QAAQ,IAAI,KAAK,EAAgC,MAAM,UAAU,CAAC;AAU3E,OAAO,KAAK,EACV,WAAW,EAEX,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,oBAAoB,EACpB,4BAA4B,EAC5B,gBAAgB,EAChB,iBAAiB,EACjB,oBAAoB,EAErB,MAAM,SAAS,CAAC;AAGjB,qBAAa,SAAS;IACpB,OAAO,CAAC,MAAM,CAAU;IACxB,OAAO,CAAC,IAAI,CAAS;gBAET,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;SAAE,EAAE,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE;IAOhG,eAAe,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,EAAE,aAAa,GAAG,OAAO,CAAC,SAAS,CAAC;IAmDjG,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYhF,MAAM,CAAC,QAAQ,CAAC,UAAU,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAY9E,OAAO,CAAC,eAAe;IAevB,OAAO,CAAC,YAAY,CAAC,CAA4E;IAEjG,OAAO,KAAK,WAAW,GAetB;YAEa,OAAO;IASf,cAAc,CAAC,OAAO,CAAC,EAAE,qBAAqB,GAAG,OAAO,CAAC,IAAI,CAAC;IAa9D,cAAc,CAAC,OAAO,CAAC,EAAE,qBAAqB,GAAG,OAAO,CAAC,IAAI,CAAC;IAU9D,SAAS,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAoBpD,SAAS,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAmBpD,UAAU,CAAC,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,IAAI,CAAC;IAMtD,UAAU,CAAC,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,IAAI,CAAC;IAUtD,aAAa,CAAC,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC;IAa5D,aAAa,CAAC,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC;IAuB5D,qBAAqB,CAAC,OAAO,CAAC,EAAE,4BAA4B,GAAG,OAAO,CAAC,IAAI,CAAC;IAU5E,KAAK,CAAC,MAAM,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAiBnD,OAAO,IAAI,KAAK,EAAE;IAIlB,OAAO,IAAI,MAAM,EAAE;IAInB,WAAW,IAAI,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE;CAGrC"}
1
+ {"version":3,"file":"document.d.ts","sourceRoot":"","sources":["../../src/document/document.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,QAAQ,IAAI,KAAK,EAAgC,MAAM,UAAU,CAAC;AAU3E,OAAO,KAAK,EACV,WAAW,EAEX,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,oBAAoB,EACpB,4BAA4B,EAC5B,gBAAgB,EAChB,iBAAiB,EACjB,oBAAoB,EAErB,MAAM,SAAS,CAAC;AAGjB,qBAAa,SAAS;IACpB,OAAO,CAAC,MAAM,CAAU;IACxB,OAAO,CAAC,IAAI,CAAS;gBAET,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;SAAE,EAAE,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE;IAOhG,eAAe,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,EAAE,aAAa,GAAG,OAAO,CAAC,SAAS,CAAC;IAmDjG,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYhF,MAAM,CAAC,QAAQ,CAAC,UAAU,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAY9E,OAAO,CAAC,eAAe;IAevB,OAAO,CAAC,YAAY,CAAC,CAA4E;IAEjG,OAAO,KAAK,WAAW,GAetB;YAEa,OAAO;IASf,cAAc,CAAC,OAAO,CAAC,EAAE,qBAAqB,GAAG,OAAO,CAAC,IAAI,CAAC;IAa9D,cAAc,CAAC,OAAO,CAAC,EAAE,qBAAqB,GAAG,OAAO,CAAC,IAAI,CAAC;IAU9D,SAAS,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IA8CpD,SAAS,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAmBpD,UAAU,CAAC,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,IAAI,CAAC;IAMtD,UAAU,CAAC,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,IAAI,CAAC;IAUtD,aAAa,CAAC,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC;IAa5D,aAAa,CAAC,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC;IAuB5D,qBAAqB,CAAC,OAAO,CAAC,EAAE,4BAA4B,GAAG,OAAO,CAAC,IAAI,CAAC;IAU5E,KAAK,CAAC,MAAM,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAiBnD,OAAO,IAAI,KAAK,EAAE;IAIlB,OAAO,IAAI,MAAM,EAAE;IAInB,WAAW,IAAI,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE;CAGrC"}
@@ -1,4 +1,4 @@
1
- import type { MastraLanguageModel } from '@mastra/core/agent';
1
+ import type { MastraLanguageModel, MastraLegacyLanguageModel } from '@mastra/core/agent';
2
2
  import type { KeywordExtractPrompt } from '../prompts/index.js';
3
3
  import type { BaseNode } from '../schema/index.js';
4
4
  import { BaseExtractor } from './base.js';
@@ -13,7 +13,7 @@ type ExtractKeyword = {
13
13
  * Extract keywords from a list of nodes.
14
14
  */
15
15
  export declare class KeywordExtractor extends BaseExtractor {
16
- llm: MastraLanguageModel;
16
+ llm: MastraLanguageModel | MastraLegacyLanguageModel;
17
17
  keywords: number;
18
18
  promptTemplate: KeywordExtractPrompt;
19
19
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"keywords.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/keywords.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAE9D,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAC;AACvD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAE1C,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AAEvC,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,SAAS,CAAC;AAElD,KAAK,cAAc,GAAG;IACpB;;OAEG;IACH,eAAe,EAAE,MAAM,CAAC;CACzB,CAAC;AAEF;;GAEG;AACH,qBAAa,gBAAiB,SAAQ,aAAa;IACjD,GAAG,EAAE,mBAAmB,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAK;IACrB,cAAc,EAAE,oBAAoB,CAAC;IAErC;;;;;;OAMG;gBACS,OAAO,CAAC,EAAE,kBAAkB;IAexC;;;;OAIG;IACH;;;OAGG;IACG,wBAAwB,CAAC,IAAI,EAAE,QAAQ,GAAG,OAAO,CAAC,cAAc,CAAC;IAoDvE;;;;OAIG;IACH;;;;OAIG;IACG,OAAO,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;CAKjE"}
1
+ {"version":3,"file":"keywords.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/keywords.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAEzF,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAC;AACvD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAE1C,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AAEvC,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,SAAS,CAAC;AAElD,KAAK,cAAc,GAAG;IACpB;;OAEG;IACH,eAAe,EAAE,MAAM,CAAC;CACzB,CAAC;AAEF;;GAEG;AACH,qBAAa,gBAAiB,SAAQ,aAAa;IACjD,GAAG,EAAE,mBAAmB,GAAG,yBAAyB,CAAC;IACrD,QAAQ,EAAE,MAAM,CAAK;IACrB,cAAc,EAAE,oBAAoB,CAAC;IAErC;;;;;;OAMG;gBACS,OAAO,CAAC,EAAE,kBAAkB;IAexC;;;;OAIG;IACH;;;OAGG;IACG,wBAAwB,CAAC,IAAI,EAAE,QAAQ,GAAG,OAAO,CAAC,cAAc,CAAC;IAoDvE;;;;OAIG;IACH;;;;OAIG;IACG,OAAO,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;CAKjE"}
@@ -1,4 +1,4 @@
1
- import type { MastraLanguageModel } from '@mastra/core/agent';
1
+ import type { MastraLanguageModel, MastraLegacyLanguageModel } from '@mastra/core/agent';
2
2
  import type { QuestionExtractPrompt } from '../prompts/index.js';
3
3
  import type { BaseNode } from '../schema/index.js';
4
4
  import { BaseExtractor } from './base.js';
@@ -13,7 +13,7 @@ type ExtractQuestion = {
13
13
  * Extract questions from a list of nodes.
14
14
  */
15
15
  export declare class QuestionsAnsweredExtractor extends BaseExtractor {
16
- llm: MastraLanguageModel;
16
+ llm: MastraLanguageModel | MastraLegacyLanguageModel;
17
17
  questions: number;
18
18
  promptTemplate: QuestionExtractPrompt;
19
19
  embeddingOnly: boolean;
@@ -1 +1 @@
1
- {"version":3,"file":"questions.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/questions.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAE9D,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,YAAY,CAAC;AACxD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAE1C,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AAEvC,OAAO,KAAK,EAAE,yBAAyB,EAAE,MAAM,SAAS,CAAC;AAEzD,KAAK,eAAe,GAAG;IACrB;;OAEG;IACH,6BAA6B,EAAE,MAAM,CAAC;CACvC,CAAC;AAEF;;GAEG;AACH,qBAAa,0BAA2B,SAAQ,aAAa;IAC3D,GAAG,EAAE,mBAAmB,CAAC;IACzB,SAAS,EAAE,MAAM,CAAK;IACtB,cAAc,EAAE,qBAAqB,CAAC;IACtC,aAAa,EAAE,OAAO,CAAS;IAE/B;;;;;;OAMG;gBACS,OAAO,CAAC,EAAE,yBAAyB;IAkB/C;;;;OAIG;IACG,wBAAwB,CAAC,IAAI,EAAE,QAAQ,GAAG,OAAO,CAAC,eAAe,CAAC;IA6CxE;;;;OAIG;IACG,OAAO,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,KAAK,CAAC,eAAe,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;CAKlF"}
1
+ {"version":3,"file":"questions.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/questions.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAEzF,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,YAAY,CAAC;AACxD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAE1C,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AAEvC,OAAO,KAAK,EAAE,yBAAyB,EAAE,MAAM,SAAS,CAAC;AAEzD,KAAK,eAAe,GAAG;IACrB;;OAEG;IACH,6BAA6B,EAAE,MAAM,CAAC;CACvC,CAAC;AAEF;;GAEG;AACH,qBAAa,0BAA2B,SAAQ,aAAa;IAC3D,GAAG,EAAE,mBAAmB,GAAG,yBAAyB,CAAC;IACrD,SAAS,EAAE,MAAM,CAAK;IACtB,cAAc,EAAE,qBAAqB,CAAC;IACtC,aAAa,EAAE,OAAO,CAAS;IAE/B;;;;;;OAMG;gBACS,OAAO,CAAC,EAAE,yBAAyB;IAkB/C;;;;OAIG;IACG,wBAAwB,CAAC,IAAI,EAAE,QAAQ,GAAG,OAAO,CAAC,eAAe,CAAC;IA6CxE;;;;OAIG;IACG,OAAO,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,KAAK,CAAC,eAAe,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;CAKlF"}
@@ -1 +1 @@
1
- {"version":3,"file":"summary.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/summary.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAE1C,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AAEvC,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,SAAS,CAAC;AAElD,KAAK,cAAc,GAAG;IACpB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B,CAAC;AAEF;;;;;;GAMG;AACH,qBAAa,gBAAiB,SAAQ,aAAa;IACjD,OAAO,CAAC,GAAG,CAAsB;IACjC,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,cAAc,EAAE,aAAa,CAAC;IAC9B,OAAO,CAAC,WAAW,CAAU;IAC7B,OAAO,CAAC,WAAW,CAAU;IAC7B,OAAO,CAAC,WAAW,CAAU;gBACjB,OAAO,CAAC,EAAE,kBAAkB;IAsBxC;;;;OAIG;IACG,mBAAmB,CAAC,IAAI,EAAE,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC;IAuC1D;;;;OAIG;IACG,OAAO,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;CAqB5D"}
1
+ {"version":3,"file":"summary.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/summary.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAE1C,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AAEvC,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,SAAS,CAAC;AAElD,KAAK,cAAc,GAAG;IACpB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B,CAAC;AAEF;;;;;;GAMG;AACH,qBAAa,gBAAiB,SAAQ,aAAa;IACjD,OAAO,CAAC,GAAG,CAAkD;IAC7D,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,cAAc,EAAE,aAAa,CAAC;IAC9B,OAAO,CAAC,WAAW,CAAU;IAC7B,OAAO,CAAC,WAAW,CAAU;IAC7B,OAAO,CAAC,WAAW,CAAU;gBACjB,OAAO,CAAC,EAAE,kBAAkB;IAsBxC;;;;OAIG;IACG,mBAAmB,CAAC,IAAI,EAAE,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC;IAuC1D;;;;OAIG;IACG,OAAO,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;CAqB5D"}
@@ -1,4 +1,4 @@
1
- import type { MastraLanguageModel } from '@mastra/core/agent';
1
+ import type { MastraLanguageModel, MastraLegacyLanguageModel } from '@mastra/core/agent';
2
2
  import type { TitleCombinePrompt, TitleExtractorPrompt } from '../prompts/index.js';
3
3
  import type { BaseNode } from '../schema/index.js';
4
4
  import { BaseExtractor } from './base.js';
@@ -10,7 +10,7 @@ type ExtractTitle = {
10
10
  * Extract title from a list of nodes.
11
11
  */
12
12
  export declare class TitleExtractor extends BaseExtractor {
13
- llm: MastraLanguageModel;
13
+ llm: MastraLegacyLanguageModel | MastraLanguageModel;
14
14
  isTextNodeOnly: boolean;
15
15
  nodes: number;
16
16
  nodeTemplate: TitleExtractorPrompt;
@@ -1 +1 @@
1
- {"version":3,"file":"title.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/title.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAE9D,OAAO,KAAK,EAAE,kBAAkB,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAC;AAE3E,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AAEvC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAEnD,KAAK,YAAY,GAAG;IAClB,aAAa,EAAE,MAAM,CAAC;CACvB,CAAC;AAEF;;GAEG;AACH,qBAAa,cAAe,SAAQ,aAAa;IAC/C,GAAG,EAAE,mBAAmB,CAAC;IACzB,cAAc,EAAE,OAAO,CAAS;IAChC,KAAK,EAAE,MAAM,CAAK;IAClB,YAAY,EAAE,oBAAoB,CAAC;IACnC,eAAe,EAAE,kBAAkB,CAAC;gBAExB,OAAO,CAAC,EAAE,mBAAmB;IAqBzC;;;;OAIG;IACG,OAAO,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;IAoC9D,OAAO,CAAC,WAAW;IASnB,OAAO,CAAC,uBAAuB;YAYjB,aAAa;YA6Cb,mBAAmB;CAiClC"}
1
+ {"version":3,"file":"title.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/title.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAEzF,OAAO,KAAK,EAAE,kBAAkB,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAC;AAE3E,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AAEvC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAEnD,KAAK,YAAY,GAAG;IAClB,aAAa,EAAE,MAAM,CAAC;CACvB,CAAC;AAEF;;GAEG;AACH,qBAAa,cAAe,SAAQ,aAAa;IAC/C,GAAG,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACrD,cAAc,EAAE,OAAO,CAAS;IAChC,KAAK,EAAE,MAAM,CAAK;IAClB,YAAY,EAAE,oBAAoB,CAAC;IACnC,eAAe,EAAE,kBAAkB,CAAC;gBAExB,OAAO,CAAC,EAAE,mBAAmB;IAqBzC;;;;OAIG;IACG,OAAO,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;IAoC9D,OAAO,CAAC,WAAW;IASnB,OAAO,CAAC,uBAAuB;YAYjB,aAAa;YA6Cb,mBAAmB;CAiClC"}
@@ -1,27 +1,27 @@
1
- import type { MastraLanguageModel } from '@mastra/core/agent';
1
+ import type { MastraLanguageModel, MastraLegacyLanguageModel } from '@mastra/core/agent';
2
2
  import type { KeywordExtractPrompt, QuestionExtractPrompt, SummaryPrompt, TitleExtractorPrompt, TitleCombinePrompt } from '../prompts/index.js';
3
3
  export type KeywordExtractArgs = {
4
- llm?: MastraLanguageModel;
4
+ llm?: MastraLegacyLanguageModel | MastraLanguageModel;
5
5
  keywords?: number;
6
6
  promptTemplate?: KeywordExtractPrompt['template'];
7
7
  };
8
8
  export type QuestionAnswerExtractArgs = {
9
- llm?: MastraLanguageModel;
9
+ llm?: MastraLegacyLanguageModel | MastraLanguageModel;
10
10
  questions?: number;
11
11
  promptTemplate?: QuestionExtractPrompt['template'];
12
12
  embeddingOnly?: boolean;
13
13
  };
14
14
  export type SummaryExtractArgs = {
15
- llm?: MastraLanguageModel;
15
+ llm?: MastraLegacyLanguageModel | MastraLanguageModel;
16
16
  summaries?: string[];
17
17
  promptTemplate?: SummaryPrompt['template'];
18
18
  };
19
19
  export type TitleExtractorsArgs = {
20
- llm?: MastraLanguageModel;
20
+ llm?: MastraLegacyLanguageModel | MastraLanguageModel;
21
21
  nodes?: number;
22
22
  nodeTemplate?: TitleExtractorPrompt['template'];
23
23
  combineTemplate?: TitleCombinePrompt['template'];
24
24
  };
25
25
  export declare const STRIP_REGEX: RegExp;
26
- export declare const baseLLM: MastraLanguageModel;
26
+ export declare const baseLLM: MastraLegacyLanguageModel | MastraLanguageModel;
27
27
  //# sourceMappingURL=types.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/types.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAC9D,OAAO,KAAK,EACV,oBAAoB,EACpB,qBAAqB,EACrB,aAAa,EACb,oBAAoB,EACpB,kBAAkB,EACnB,MAAM,YAAY,CAAC;AAEpB,MAAM,MAAM,kBAAkB,GAAG;IAC/B,GAAG,CAAC,EAAE,mBAAmB,CAAC;IAC1B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,cAAc,CAAC,EAAE,oBAAoB,CAAC,UAAU,CAAC,CAAC;CACnD,CAAC;AAEF,MAAM,MAAM,yBAAyB,GAAG;IACtC,GAAG,CAAC,EAAE,mBAAmB,CAAC;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,cAAc,CAAC,EAAE,qBAAqB,CAAC,UAAU,CAAC,CAAC;IACnD,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG;IAC/B,GAAG,CAAC,EAAE,mBAAmB,CAAC;IAC1B,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,cAAc,CAAC,EAAE,aAAa,CAAC,UAAU,CAAC,CAAC;CAC5C,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG;IAChC,GAAG,CAAC,EAAE,mBAAmB,CAAC;IAC1B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,YAAY,CAAC,EAAE,oBAAoB,CAAC,UAAU,CAAC,CAAC;IAChD,eAAe,CAAC,EAAE,kBAAkB,CAAC,UAAU,CAAC,CAAC;CAClD,CAAC;AAEF,eAAO,MAAM,WAAW,QAAmB,CAAC;AAG5C,eAAO,MAAM,OAAO,EAAE,mBAAsC,CAAC"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/types.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AACzF,OAAO,KAAK,EACV,oBAAoB,EACpB,qBAAqB,EACrB,aAAa,EACb,oBAAoB,EACpB,kBAAkB,EACnB,MAAM,YAAY,CAAC;AAEpB,MAAM,MAAM,kBAAkB,GAAG;IAC/B,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,cAAc,CAAC,EAAE,oBAAoB,CAAC,UAAU,CAAC,CAAC;CACnD,CAAC;AAEF,MAAM,MAAM,yBAAyB,GAAG;IACtC,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,cAAc,CAAC,EAAE,qBAAqB,CAAC,UAAU,CAAC,CAAC;IACnD,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG;IAC/B,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,cAAc,CAAC,EAAE,aAAa,CAAC,UAAU,CAAC,CAAC;CAC5C,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG;IAChC,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,YAAY,CAAC,EAAE,oBAAoB,CAAC,UAAU,CAAC,CAAC;IAChD,eAAe,CAAC,EAAE,kBAAkB,CAAC,UAAU,CAAC,CAAC;CAClD,CAAC;AAEF,eAAO,MAAM,WAAW,QAAmB,CAAC;AAG5C,eAAO,MAAM,OAAO,EAAE,yBAAyB,GAAG,mBAAsC,CAAC"}
@@ -23,6 +23,7 @@ export declare class HTMLSectionTransformer {
23
23
  });
24
24
  splitText(text: string): Document[];
25
25
  private getXPath;
26
+ private getTextContent;
26
27
  private splitHtmlByHeaders;
27
28
  splitDocuments(documents: Document[]): Promise<Document[]>;
28
29
  createDocuments(texts: string[], metadatas?: Record<string, any>[]): Document[];
@@ -1 +1 @@
1
- {"version":3,"file":"html.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/html.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AACrC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAWjD,qBAAa,qBAAqB;IAChC,OAAO,CAAC,gBAAgB,CAAqB;IAC7C,OAAO,CAAC,iBAAiB,CAAU;gBAEvB,OAAO,EAAE,gBAAgB,GAAG;QAAE,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAA;KAAE;IAKvE,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,QAAQ,EAAE;IAwDjD,OAAO,CAAC,QAAQ;IA2BhB,OAAO,CAAC,cAAc;IAuBtB,OAAO,CAAC,yBAAyB;IAyBjC,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA8B/E,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD;AAED,qBAAa,sBAAsB;IACjC,OAAO,CAAC,gBAAgB,CAAyB;IACjD,OAAO,CAAC,YAAY,CAAgC;gBAExC,OAAO,EAAE,gBAAgB,GAAG;QAAE,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAA;KAAE;IAKxE,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,QAAQ,EAAE;IAenC,OAAO,CAAC,QAAQ;IAwBhB,OAAO,CAAC,kBAAkB;IA8CpB,cAAc,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAahE,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA+B/E,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD"}
1
+ {"version":3,"file":"html.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/html.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AACrC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAWjD,qBAAa,qBAAqB;IAChC,OAAO,CAAC,gBAAgB,CAAqB;IAC7C,OAAO,CAAC,iBAAiB,CAAU;gBAEvB,OAAO,EAAE,gBAAgB,GAAG;QAAE,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAA;KAAE;IAKvE,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,QAAQ,EAAE;IAwDjD,OAAO,CAAC,QAAQ;IA2BhB,OAAO,CAAC,cAAc;IAuBtB,OAAO,CAAC,yBAAyB;IAyBjC,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA8B/E,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD;AAED,qBAAa,sBAAsB;IACjC,OAAO,CAAC,gBAAgB,CAAyB;IACjD,OAAO,CAAC,YAAY,CAAgC;gBAExC,OAAO,EAAE,gBAAgB,GAAG;QAAE,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAA;KAAE;IAKxE,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,QAAQ,EAAE;IAenC,OAAO,CAAC,QAAQ;IAwBhB,OAAO,CAAC,cAAc;IAuBtB,OAAO,CAAC,kBAAkB;IA6DpB,cAAc,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAahE,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA+B/E,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD"}
package/dist/index.cjs CHANGED
@@ -272,7 +272,7 @@ Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'
272
272
  var defaultQuestionExtractPrompt = new PromptTemplate({
273
273
  templateVars: ["numQuestions", "context"],
274
274
  template: `(
275
- "Given the contextual informations below, generate {numQuestions} questions this context can provides specific answers to which are unlikely to be found else where. Higher-level summaries of surrounding context may be provided as well. "
275
+ "Given the contextual information below, generate {numQuestions} questions this context can provide specific answers to which are unlikely to be found elsewhere. Higher-level summaries of surrounding context may be provided as well. "
276
276
  "Try using these summaries to generate better questions that this context can answer."
277
277
  "---------------------"
278
278
  "{context}"
@@ -4141,7 +4141,7 @@ var TitleExtractor = class extends BaseExtractor {
4141
4141
  const titleCandidates = await this.getTitlesCandidates(nodes);
4142
4142
  const combinedTitles = titleCandidates.join(", ");
4143
4143
  let title = "";
4144
- if (this.llm.specificationVersion === "v2") {
4144
+ if (agent.isSupportedLanguageModel(this.llm)) {
4145
4145
  const miniAgent = new agent.Agent({
4146
4146
  id: "title-extractor",
4147
4147
  model: this.llm,
@@ -4180,7 +4180,7 @@ var TitleExtractor = class extends BaseExtractor {
4180
4180
  });
4181
4181
  const titleJobs = nodes.map(async (node) => {
4182
4182
  let completion;
4183
- if (this.llm.specificationVersion === "v2") {
4183
+ if (agent.isSupportedLanguageModel(this.llm)) {
4184
4184
  const result = await miniAgent.generate([
4185
4185
  { role: "user", content: this.nodeTemplate.format({ context: node.getContent() }) }
4186
4186
  ]);
@@ -4246,7 +4246,7 @@ var SummaryExtractor = class extends BaseExtractor {
4246
4246
  instructions: "You are a summary extractor. You are given a node and you need to extract the summary from the node."
4247
4247
  });
4248
4248
  let summary = "";
4249
- if (this.llm.specificationVersion === "v2") {
4249
+ if (agent.isSupportedLanguageModel(this.llm)) {
4250
4250
  const result = await miniAgent.generate([{ role: "user", content: prompt }]);
4251
4251
  summary = result.text;
4252
4252
  } else {
@@ -4332,7 +4332,7 @@ var QuestionsAnsweredExtractor = class extends BaseExtractor {
4332
4332
  instructions: "You are a question extractor. You are given a node and you need to extract the questions from the node."
4333
4333
  });
4334
4334
  let questionsText = "";
4335
- if (this.llm.specificationVersion === "v2") {
4335
+ if (agent.isSupportedLanguageModel(this.llm)) {
4336
4336
  const result2 = await miniAgent.generate([{ role: "user", content: prompt }]);
4337
4337
  questionsText = result2.text;
4338
4338
  } else {
@@ -4404,7 +4404,7 @@ var KeywordExtractor = class extends BaseExtractor {
4404
4404
  name: "keyword-extractor",
4405
4405
  instructions: "You are a keyword extractor. You are given a node and you need to extract the keywords from the node."
4406
4406
  });
4407
- if (this.llm.specificationVersion === "v2") {
4407
+ if (agent.isSupportedLanguageModel(this.llm)) {
4408
4408
  const result = await miniAgent.generate([
4409
4409
  {
4410
4410
  role: "user",
@@ -4999,23 +4999,47 @@ var HTMLSectionTransformer = class {
4999
4999
  }
5000
5000
  return "/" + parts.join("/");
5001
5001
  }
5002
+ getTextContent(element) {
5003
+ if (!element) return "";
5004
+ if (!element.tagName) {
5005
+ return element.text || "";
5006
+ }
5007
+ let content = element.text || "";
5008
+ if (element.childNodes) {
5009
+ for (const child of element.childNodes) {
5010
+ const childText = this.getTextContent(child);
5011
+ if (childText) {
5012
+ content += " " + childText;
5013
+ }
5014
+ }
5015
+ }
5016
+ return content.trim();
5017
+ }
5002
5018
  splitHtmlByHeaders(htmlDoc) {
5003
5019
  const sections = [];
5004
5020
  const root = nodeHtmlBetterParser.parse(htmlDoc);
5005
5021
  const headers = Object.keys(this.headersToSplitOn);
5006
5022
  const headerElements = root.querySelectorAll(headers.join(","));
5007
- headerElements.forEach((headerElement, index) => {
5023
+ headerElements.forEach((headerElement) => {
5008
5024
  const header = headerElement.text?.trim() || "";
5009
5025
  const tagName = headerElement.tagName;
5010
5026
  const xpath = this.getXPath(headerElement);
5011
5027
  let content = "";
5012
- let currentElement = headerElement.nextElementSibling;
5013
- const nextHeader = headerElements[index + 1];
5014
- while (currentElement && (!nextHeader || currentElement !== nextHeader)) {
5015
- if (currentElement.text) {
5016
- content += currentElement.text.trim() + " ";
5028
+ const parentNode = headerElement.parentNode;
5029
+ if (parentNode && parentNode.childNodes) {
5030
+ let foundHeader = false;
5031
+ for (const node of parentNode.childNodes) {
5032
+ if (node === headerElement) {
5033
+ foundHeader = true;
5034
+ continue;
5035
+ }
5036
+ if (foundHeader && node.tagName && headers.includes(node.tagName.toLowerCase())) {
5037
+ break;
5038
+ }
5039
+ if (foundHeader) {
5040
+ content += this.getTextContent(node) + " ";
5041
+ }
5017
5042
  }
5018
- currentElement = currentElement.nextElementSibling;
5019
5043
  }
5020
5044
  content = content.trim();
5021
5045
  sections.push({
@@ -6392,13 +6416,33 @@ var MDocument = class _MDocument {
6392
6416
  async chunkHTML(options) {
6393
6417
  if (options?.headers?.length) {
6394
6418
  const rt = new HTMLHeaderTransformer(options);
6395
- const textSplit = rt.transformDocuments(this.chunks);
6419
+ let textSplit = rt.transformDocuments(this.chunks);
6420
+ if (options?.maxSize) {
6421
+ const textSplitter = new RecursiveCharacterTransformer({
6422
+ maxSize: options.maxSize,
6423
+ overlap: options.overlap,
6424
+ keepSeparator: options.keepSeparator,
6425
+ addStartIndex: options.addStartIndex,
6426
+ stripWhitespace: options.stripWhitespace
6427
+ });
6428
+ textSplit = textSplitter.splitDocuments(textSplit);
6429
+ }
6396
6430
  this.chunks = textSplit;
6397
6431
  return;
6398
6432
  }
6399
6433
  if (options?.sections?.length) {
6400
6434
  const rt = new HTMLSectionTransformer(options);
6401
- const textSplit = rt.transformDocuments(this.chunks);
6435
+ let textSplit = rt.transformDocuments(this.chunks);
6436
+ if (options?.maxSize) {
6437
+ const textSplitter = new RecursiveCharacterTransformer({
6438
+ maxSize: options.maxSize,
6439
+ overlap: options.overlap,
6440
+ keepSeparator: options.keepSeparator,
6441
+ addStartIndex: options.addStartIndex,
6442
+ stripWhitespace: options.stripWhitespace
6443
+ });
6444
+ textSplit = textSplitter.splitDocuments(textSplit);
6445
+ }
6402
6446
  this.chunks = textSplit;
6403
6447
  return;
6404
6448
  }
@@ -6550,7 +6594,7 @@ Always return just the number, no explanation.`,
6550
6594
  const prompt = relevance.createSimilarityPrompt(query, text);
6551
6595
  const model = await this.agent.getModel();
6552
6596
  let response;
6553
- if (model.specificationVersion === "v2") {
6597
+ if (agent.isSupportedLanguageModel(model)) {
6554
6598
  response = await this.agent.generate(prompt);
6555
6599
  } else {
6556
6600
  response = await this.agent.generateLegacy(prompt);
@@ -6936,7 +6980,14 @@ var vectorQuerySearch = async ({
6936
6980
  providerOptions
6937
6981
  }) => {
6938
6982
  let embeddingResult;
6939
- if (model.specificationVersion === "v2") {
6983
+ if (model.specificationVersion === "v3") {
6984
+ embeddingResult = await vector.embedV3({
6985
+ model,
6986
+ value: queryText,
6987
+ maxRetries,
6988
+ ...providerOptions && { providerOptions }
6989
+ });
6990
+ } else if (model.specificationVersion === "v2") {
6940
6991
  embeddingResult = await vector.embedV2({
6941
6992
  model,
6942
6993
  value: queryText,