@mastra/rag 2.0.0-beta.5 → 2.0.0-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/CHANGELOG.md +157 -0
  2. package/dist/docs/README.md +1 -1
  3. package/dist/docs/SKILL.md +1 -1
  4. package/dist/docs/SOURCE_MAP.json +1 -1
  5. package/dist/docs/rag/02-chunking-and-embedding.md +0 -1
  6. package/dist/docs/rag/03-retrieval.md +5 -6
  7. package/dist/docs/rag/05-reference.md +22 -8
  8. package/dist/document/document.d.ts +1 -1
  9. package/dist/document/document.d.ts.map +1 -1
  10. package/dist/document/extractors/index.d.ts +2 -1
  11. package/dist/document/extractors/index.d.ts.map +1 -1
  12. package/dist/document/extractors/schema.d.ts +13 -0
  13. package/dist/document/extractors/schema.d.ts.map +1 -0
  14. package/dist/document/extractors/types.d.ts +7 -0
  15. package/dist/document/extractors/types.d.ts.map +1 -1
  16. package/dist/document/transformers/sentence.d.ts +0 -1
  17. package/dist/document/transformers/sentence.d.ts.map +1 -1
  18. package/dist/document/transformers/text.d.ts +2 -2
  19. package/dist/document/transformers/text.d.ts.map +1 -1
  20. package/dist/document/types.d.ts +3 -2
  21. package/dist/document/types.d.ts.map +1 -1
  22. package/dist/index.cjs +56 -17
  23. package/dist/index.cjs.map +1 -1
  24. package/dist/index.js +56 -17
  25. package/dist/index.js.map +1 -1
  26. package/dist/tools/document-chunker.d.ts +3 -2
  27. package/dist/tools/document-chunker.d.ts.map +1 -1
  28. package/dist/tools/graph-rag.d.ts +2 -2
  29. package/dist/tools/graph-rag.d.ts.map +1 -1
  30. package/dist/tools/vector-query.d.ts +2 -2
  31. package/dist/tools/vector-query.d.ts.map +1 -1
  32. package/dist/utils/tool-schemas.d.ts +2 -2
  33. package/dist/utils/tool-schemas.d.ts.map +1 -1
  34. package/package.json +2 -3
package/CHANGELOG.md CHANGED
@@ -1,5 +1,162 @@
1
1
  # @mastra/rag
2
2
 
3
+ ## 2.0.0-beta.7
4
+
5
+ ### Major Changes
6
+
7
+ - Refactor workflow and tool types to remove Zod-specific constraints ([#11814](https://github.com/mastra-ai/mastra/pull/11814))
8
+
9
+ Removed Zod-specific type constraints across all workflow implementations and tool types, replacing them with generic types. This ensures type consistency across default, evented, and inngest workflows while preparing for Zod v4 migration.
10
+
11
+ **Workflow Changes:**
12
+ - Removed `z.ZodObject<any>` and `z.ZodType<any>` constraints from all workflow generic types
13
+ - Updated method signatures to use `TInput` and `TState` directly instead of `z.infer<TInput>` and `z.infer<TState>`
14
+ - Aligned conditional types across all workflow implementations using `TInput extends unknown` pattern
15
+ - Fixed `TSteps` generic to properly use `TEngineType` instead of `any`
16
+
17
+ **Tool Changes:**
18
+ - Removed Zod schema constraints from `ToolExecutionContext` and related interfaces
19
+ - Simplified type parameters from `TSuspendSchema extends ZodLikeSchema` to `TSuspend` and `TResume`
20
+ - Updated tool execution context types to use generic types
21
+
22
+ **Type Utilities:**
23
+ - Refactored type helpers to work with generic schemas instead of Zod-specific types
24
+ - Updated type extraction utilities for better compatibility
25
+
26
+ This change maintains backward compatibility while improving type consistency and preparing for Zod v4 support across all affected packages.
27
+
28
+ ### Minor Changes
29
+
30
+ - Add schema-driven metadata extraction with Zod support ([#11833](https://github.com/mastra-ai/mastra/pull/11833))
31
+
32
+ Introduces a new `SchemaExtractor` that enables extraction of custom structured metadata from document chunks using user-defined Zod schemas. This allows for domain-specific metadata structures (e.g., product details, legal entities, sentiment analysis) to be reliably extracted via LLM structured output.
33
+ - Extract domain-specific metadata using your own Zod schemas (e.g., product details, legal entities, sentiment)
34
+ - Customize extraction behavior with your own LLM model and instructions
35
+ - Organize extracted data by nesting it under custom metadata keys
36
+ - Existing extractors (title, summary, keywords, questions) remain unchanged and fully compatible
37
+
38
+ **Before** (limited to built-in extractors):
39
+
40
+ ```typescript
41
+ await document.extractMetadata({
42
+ extract: {
43
+ title: true,
44
+ summary: true,
45
+ },
46
+ });
47
+ ```
48
+
49
+ **After** (with custom Zod schema):
50
+
51
+ ```typescript
52
+ import { z } from 'zod';
53
+
54
+ const productSchema = z.object({
55
+ name: z.string(),
56
+ price: z.number(),
57
+ category: z.string(),
58
+ });
59
+
60
+ await document.extractMetadata({
61
+ extract: {
62
+ title: true,
63
+ schema: {
64
+ schema: productSchema,
65
+ instructions: 'Extract product details from the document',
66
+ metadataKey: 'product',
67
+ },
68
+ },
69
+ });
70
+ ```
71
+
72
+ With `metadataKey`, extracted data is nested under the key:
73
+
74
+ ```typescript
75
+ {
76
+ title: "Product Document",
77
+ summary: "A comprehensive guide",
78
+ product: {
79
+ name: "Wireless Headphones",
80
+ price: 149.99,
81
+ category: "Electronics"
82
+ }
83
+ }
84
+ ```
85
+
86
+ Without `metadataKey`, extracted data is returned inline:
87
+
88
+ ```typescript
89
+ {
90
+ title: "Product Document",
91
+ summary: "A comprehensive guide",
92
+ name: "Wireless Headphones",
93
+ price: 149.99,
94
+ category: "Electronics"
95
+ }
96
+ ```
97
+
98
+ Fixes #11799
99
+
100
+ - Renamed `keepSeparator` parameter to `separatorPosition` with a cleaner type. ([#11802](https://github.com/mastra-ai/mastra/pull/11802))
101
+
102
+ The `keepSeparator` parameter had a confusing `boolean | 'start' | 'end'` type where `true` was secretly an alias for `'start'`. The new `separatorPosition` parameter uses explicit `'start' | 'end'` values, and omitting the parameter discards the separator (previous default behavior).
103
+
104
+ **Migration**
105
+
106
+ ```typescript
107
+ // Before
108
+ await doc.chunk({
109
+ strategy: 'character',
110
+ separator: '.',
111
+ keepSeparator: true, // or 'start'
112
+ });
113
+
114
+ await doc.chunk({
115
+ strategy: 'character',
116
+ separator: '.',
117
+ keepSeparator: 'end',
118
+ });
119
+
120
+ await doc.chunk({
121
+ strategy: 'character',
122
+ separator: '.',
123
+ keepSeparator: false, // or omit entirely
124
+ });
125
+
126
+ // After
127
+ await doc.chunk({
128
+ strategy: 'character',
129
+ separator: '.',
130
+ separatorPosition: 'start',
131
+ });
132
+
133
+ await doc.chunk({
134
+ strategy: 'character',
135
+ separator: '.',
136
+ separatorPosition: 'end',
137
+ });
138
+
139
+ await doc.chunk({
140
+ strategy: 'character',
141
+ separator: '.',
142
+ // omit separatorPosition to discard separator
143
+ });
144
+ ```
145
+
146
+ ### Patch Changes
147
+
148
+ - Updated dependencies [[`ebae12a`](https://github.com/mastra-ai/mastra/commit/ebae12a2dd0212e75478981053b148a2c246962d), [`c61a0a5`](https://github.com/mastra-ai/mastra/commit/c61a0a5de4904c88fd8b3718bc26d1be1c2ec6e7), [`69136e7`](https://github.com/mastra-ai/mastra/commit/69136e748e32f57297728a4e0f9a75988462f1a7), [`449aed2`](https://github.com/mastra-ai/mastra/commit/449aed2ba9d507b75bf93d427646ea94f734dfd1), [`eb648a2`](https://github.com/mastra-ai/mastra/commit/eb648a2cc1728f7678768dd70cd77619b448dab9), [`0131105`](https://github.com/mastra-ai/mastra/commit/0131105532e83bdcbb73352fc7d0879eebf140dc), [`9d5059e`](https://github.com/mastra-ai/mastra/commit/9d5059eae810829935fb08e81a9bb7ecd5b144a7), [`ef756c6`](https://github.com/mastra-ai/mastra/commit/ef756c65f82d16531c43f49a27290a416611e526), [`b00ccd3`](https://github.com/mastra-ai/mastra/commit/b00ccd325ebd5d9e37e34dd0a105caae67eb568f), [`3bdfa75`](https://github.com/mastra-ai/mastra/commit/3bdfa7507a91db66f176ba8221aa28dd546e464a), [`e770de9`](https://github.com/mastra-ai/mastra/commit/e770de941a287a49b1964d44db5a5763d19890a6), [`52e2716`](https://github.com/mastra-ai/mastra/commit/52e2716b42df6eff443de72360ae83e86ec23993), [`27b4040`](https://github.com/mastra-ai/mastra/commit/27b4040bfa1a95d92546f420a02a626b1419a1d6), [`610a70b`](https://github.com/mastra-ai/mastra/commit/610a70bdad282079f0c630e0d7bb284578f20151), [`8dc7f55`](https://github.com/mastra-ai/mastra/commit/8dc7f55900395771da851dc7d78d53ae84fe34ec), [`8379099`](https://github.com/mastra-ai/mastra/commit/8379099fc467af6bef54dd7f80c9bd75bf8bbddf), [`8c0ec25`](https://github.com/mastra-ai/mastra/commit/8c0ec25646c8a7df253ed1e5ff4863a0d3f1316c), [`ff4d9a6`](https://github.com/mastra-ai/mastra/commit/ff4d9a6704fc87b31a380a76ed22736fdedbba5a), [`69821ef`](https://github.com/mastra-ai/mastra/commit/69821ef806482e2c44e2197ac0b050c3fe3a5285), [`1ed5716`](https://github.com/mastra-ai/mastra/commit/1ed5716830867b3774c4a1b43cc0d82935f32b96), [`4186bdd`](https://github.com/mastra-ai/mastra/commit/4186bdd00731305726fa06adba0b076a1d50b49f), [`7aaf973`](https://github.com/mastra-ai/mastra/commit/7aaf973f83fbbe9521f1f9e7a4fd99b8de464617)]:
149
+ - @mastra/core@1.0.0-beta.22
150
+
151
+ ## 2.0.0-beta.6
152
+
153
+ ### Patch Changes
154
+
155
+ - Remove unnecessary `ai` package peer dependency to enable compatibility with AI SDK v6. The rag package doesn't directly use the ai package, so this peer dependency was unnecessarily constraining version compatibility. ([#11724](https://github.com/mastra-ai/mastra/pull/11724))
156
+
157
+ - Updated dependencies [[`08766f1`](https://github.com/mastra-ai/mastra/commit/08766f15e13ac0692fde2a8bd366c2e16e4321df), [`ae8baf7`](https://github.com/mastra-ai/mastra/commit/ae8baf7d8adcb0ff9dac11880400452bc49b33ff), [`cfabdd4`](https://github.com/mastra-ai/mastra/commit/cfabdd4aae7a726b706942d6836eeca110fb6267), [`a0e437f`](https://github.com/mastra-ai/mastra/commit/a0e437fac561b28ee719e0302d72b2f9b4c138f0), [`bec5efd`](https://github.com/mastra-ai/mastra/commit/bec5efde96653ccae6604e68c696d1bc6c1a0bf5), [`9eedf7d`](https://github.com/mastra-ai/mastra/commit/9eedf7de1d6e0022a2f4e5e9e6fe1ec468f9b43c)]:
158
+ - @mastra/core@1.0.0-beta.21
159
+
3
160
  ## 2.0.0-beta.5
4
161
 
5
162
  ### Minor Changes
@@ -29,4 +29,4 @@ docs/
29
29
  ## Version
30
30
 
31
31
  Package: @mastra/rag
32
- Version: 2.0.0-beta.5
32
+ Version: 2.0.0-beta.7
@@ -5,7 +5,7 @@ description: Documentation for @mastra/rag. Includes links to type definitions a
5
5
 
6
6
  # @mastra/rag Documentation
7
7
 
8
- > **Version**: 2.0.0-beta.5
8
+ > **Version**: 2.0.0-beta.7
9
9
  > **Package**: @mastra/rag
10
10
 
11
11
  ## Quick Navigation
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.0.0-beta.5",
2
+ "version": "2.0.0-beta.7",
3
3
  "package": "@mastra/rag",
4
4
  "exports": {},
5
5
  "modules": {}
@@ -51,7 +51,6 @@ const chunks = await doc.chunk({
51
51
  minSize: 50,
52
52
  overlap: 0,
53
53
  sentenceEnders: ["."],
54
- keepSeparator: true,
55
54
  });
56
55
  ```
57
56
 
@@ -171,7 +171,7 @@ The Vector Query Tool supports database-specific configurations that enable you
171
171
  > **Note:**
172
172
  These configurations are for **query-time options** like namespaces, performance tuning, and filtering—not for database connection setup.
173
173
 
174
- Connection credentials (URLs, auth tokens) are configured when you instantiate the vector store class (e.g., `new LibSQLVector({ connectionUrl: '...' })`).
174
+ Connection credentials (URLs, auth tokens) are configured when you instantiate the vector store class (e.g., `new LibSQLVector({ url: '...' })`).
175
175
 
176
176
  ```ts
177
177
  import { createVectorQueryTool } from "@mastra/rag";
@@ -258,11 +258,10 @@ requestContext.set("databaseConfig", {
258
258
  },
259
259
  });
260
260
 
261
- await pineconeQueryTool.execute({
262
- context: { queryText: "search query" },
263
- mastra,
264
- requestContext,
265
- });
261
+ await pineconeQueryTool.execute(
262
+ { queryText: "search query" },
263
+ { mastra, requestContext }
264
+ );
266
265
  ```
267
266
 
268
267
  For detailed configuration options and advanced usage, see the [Vector Query Tool Reference](https://mastra.ai/reference/v1/tools/vector-query-tool).
@@ -97,7 +97,7 @@ const results = await graphRag.query({
97
97
 
98
98
  ---
99
99
 
100
- ## Reference: Reference: .chunk()
100
+ ## Reference: .chunk()
101
101
 
102
102
  > Documentation for the chunk function in Mastra, which splits documents into smaller segments using various strategies.
103
103
 
@@ -171,7 +171,6 @@ const chunks = await doc.chunk({
171
171
  minSize: 50, // Sentence-specific option
172
172
  sentenceEnders: ["."], // Sentence-specific option
173
173
  fallbackToCharacters: false, // Sentence-specific option
174
- keepSeparator: true, // general option
175
174
  });
176
175
 
177
176
  // HTML strategy example
@@ -373,11 +372,10 @@ whereDocument: { "$contains": "API documentation" }
373
372
  }
374
373
  });
375
374
 
376
- await vectorTool.execute({
377
- context: { queryText: 'search query' },
378
- mastra,
379
- requestContext
380
- });
375
+ await vectorTool.execute(
376
+ { queryText: 'search query' },
377
+ { mastra, requestContext }
378
+ );
381
379
  ```
382
380
 
383
381
 
@@ -672,6 +670,8 @@ The `extract` parameter accepts the following fields:
672
670
 
673
671
  ### KeywordExtractArgs
674
672
 
673
+ ### SchemaExtractArgs
674
+
675
675
  ## Advanced Example
676
676
 
677
677
  ```typescript
@@ -705,6 +705,16 @@ const chunks = await doc.chunk({
705
705
  keywords: 5, // Extract 5 keywords
706
706
  promptTemplate: "Extract {maxKeywords} key terms from: {context}",
707
707
  },
708
+
709
+ // Schema extraction with Zod
710
+ schema: {
711
+ schema: z.object({
712
+ productName: z.string(),
713
+ category: z.enum(["electronics", "clothing"]),
714
+ }),
715
+ instructions: "Extract product information.",
716
+ metadataKey: "product",
717
+ },
708
718
  },
709
719
  });
710
720
 
@@ -713,7 +723,11 @@ const chunks = await doc.chunk({
713
723
  // documentTitle: "AI in Modern Computing",
714
724
  // sectionSummary: "Overview of AI concepts and their applications in computing",
715
725
  // questionsThisExcerptCanAnswer: "1. What is machine learning?\n2. How do neural networks work?",
716
- // excerptKeywords: "1. Machine learning\n2. Neural networks\n3. Training data"
726
+ // excerptKeywords: "1. Machine learning\n2. Neural networks\n3. Training data",
727
+ // product: {
728
+ // productName: "Neural Net 2000",
729
+ // category: "electronics"
730
+ // }
717
731
  // }
718
732
  ```
719
733
 
@@ -10,7 +10,7 @@ export declare class MDocument {
10
10
  }[];
11
11
  type: string;
12
12
  });
13
- extractMetadata({ title, summary, questions, keywords }: ExtractParams): Promise<MDocument>;
13
+ extractMetadata({ title, summary, questions, keywords, schema }: ExtractParams): Promise<MDocument>;
14
14
  static fromText(text: string, metadata?: Record<string, any>): MDocument;
15
15
  static fromHTML(html: string, metadata?: Record<string, any>): MDocument;
16
16
  static fromMarkdown(markdown: string, metadata?: Record<string, any>): MDocument;
@@ -1 +1 @@
1
- {"version":3,"file":"document.d.ts","sourceRoot":"","sources":["../../src/document/document.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,QAAQ,IAAI,KAAK,EAAgC,MAAM,UAAU,CAAC;AAU3E,OAAO,KAAK,EACV,WAAW,EAEX,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,oBAAoB,EACpB,4BAA4B,EAC5B,gBAAgB,EAChB,iBAAiB,EACjB,oBAAoB,EAErB,MAAM,SAAS,CAAC;AAGjB,qBAAa,SAAS;IACpB,OAAO,CAAC,MAAM,CAAU;IACxB,OAAO,CAAC,IAAI,CAAS;gBAET,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;SAAE,EAAE,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE;IAOhG,eAAe,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,EAAE,aAAa,GAAG,OAAO,CAAC,SAAS,CAAC;IAmDjG,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYhF,MAAM,CAAC,QAAQ,CAAC,UAAU,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAY9E,OAAO,CAAC,eAAe;IAevB,OAAO,CAAC,YAAY,CAAC,CAA4E;IAEjG,OAAO,KAAK,WAAW,GAetB;YAEa,OAAO;IASf,cAAc,CAAC,OAAO,CAAC,EAAE,qBAAqB,GAAG,OAAO,CAAC,IAAI,CAAC;IAa9D,cAAc,CAAC,OAAO,CAAC,EAAE,qBAAqB,GAAG,OAAO,CAAC,IAAI,CAAC;IAU9D,SAAS,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IA8CpD,SAAS,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAmBpD,UAAU,CAAC,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,IAAI,CAAC;IAMtD,UAAU,CAAC,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,IAAI,CAAC;IAUtD,aAAa,CAAC,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC;IAa5D,aAAa,CAAC,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC;IAuB5D,qBAAqB,CAAC,OAAO,CAAC,EAAE,4BAA4B,GAAG,OAAO,CAAC,IAAI,CAAC;IAU5E,KAAK,CAAC,MAAM,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAiBnD,OAAO,IAAI,KAAK,EAAE;IAIlB,OAAO,IAAI,MAAM,EAAE;IAInB,WAAW,IAAI,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE;CAGrC"}
1
+ {"version":3,"file":"document.d.ts","sourceRoot":"","sources":["../../src/document/document.ts"],"names":[],"mappings":"AAQA,OAAO,EAAE,QAAQ,IAAI,KAAK,EAAgC,MAAM,UAAU,CAAC;AAU3E,OAAO,KAAK,EACV,WAAW,EAEX,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,oBAAoB,EACpB,4BAA4B,EAC5B,gBAAgB,EAChB,iBAAiB,EACjB,oBAAoB,EAErB,MAAM,SAAS,CAAC;AAGjB,qBAAa,SAAS;IACpB,OAAO,CAAC,MAAM,CAAU;IACxB,OAAO,CAAC,IAAI,CAAS;gBAET,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;SAAE,EAAE,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE;IAOhG,eAAe,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,aAAa,GAAG,OAAO,CAAC,SAAS,CAAC;IAuDzG,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYhF,MAAM,CAAC,QAAQ,CAAC,UAAU,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAY9E,OAAO,CAAC,eAAe;IAevB,OAAO,CAAC,YAAY,CAAC,CAA4E;IAEjG,OAAO,KAAK,WAAW,GAetB;YAEa,OAAO;IASf,cAAc,CAAC,OAAO,CAAC,EAAE,qBAAqB,GAAG,OAAO,CAAC,IAAI,CAAC;IAa9D,cAAc,CAAC,OAAO,CAAC,EAAE,qBAAqB,GAAG,OAAO,CAAC,IAAI,CAAC;IAU9D,SAAS,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IA8CpD,SAAS,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAmBpD,UAAU,CAAC,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,IAAI,CAAC;IAMtD,UAAU,CAAC,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,IAAI,CAAC;IAUtD,aAAa,CAAC,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC;IAa5D,aAAa,CAAC,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC;IAuB5D,qBAAqB,CAAC,OAAO,CAAC,EAAE,4BAA4B,GAAG,OAAO,CAAC,IAAI,CAAC;IAU5E,KAAK,CAAC,MAAM,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAiBnD,OAAO,IAAI,KAAK,EAAE;IAIlB,OAAO,IAAI,MAAM,EAAE;IAInB,WAAW,IAAI,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE;CAGrC"}
@@ -2,5 +2,6 @@ export { TitleExtractor } from './title.js';
2
2
  export { SummaryExtractor } from './summary.js';
3
3
  export { QuestionsAnsweredExtractor } from './questions.js';
4
4
  export { KeywordExtractor } from './keywords.js';
5
- export type { KeywordExtractArgs, QuestionAnswerExtractArgs, SummaryExtractArgs, TitleExtractorsArgs } from './types.js';
5
+ export { SchemaExtractor } from './schema.js';
6
+ export type { KeywordExtractArgs, QuestionAnswerExtractArgs, SummaryExtractArgs, TitleExtractorsArgs, SchemaExtractArgs, } from './types.js';
6
7
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AACzC,OAAO,EAAE,gBAAgB,EAAE,MAAM,WAAW,CAAC;AAC7C,OAAO,EAAE,0BAA0B,EAAE,MAAM,aAAa,CAAC;AACzD,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAC9C,YAAY,EAAE,kBAAkB,EAAE,yBAAyB,EAAE,kBAAkB,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AACzC,OAAO,EAAE,gBAAgB,EAAE,MAAM,WAAW,CAAC;AAC7C,OAAO,EAAE,0BAA0B,EAAE,MAAM,aAAa,CAAC;AACzD,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAC9C,OAAO,EAAE,eAAe,EAAE,MAAM,UAAU,CAAC;AAC3C,YAAY,EACV,kBAAkB,EAClB,yBAAyB,EACzB,kBAAkB,EAClB,mBAAmB,EACnB,iBAAiB,GAClB,MAAM,SAAS,CAAC"}
@@ -0,0 +1,13 @@
1
+ import type { z } from 'zod';
2
+ import type { BaseNode } from '../schema/index.js';
3
+ import { BaseExtractor } from './base.js';
4
+ import type { SchemaExtractArgs } from './types.js';
5
+ export declare class SchemaExtractor<T extends z.ZodType> extends BaseExtractor {
6
+ private schema;
7
+ private llm?;
8
+ private instructions?;
9
+ private metadataKey?;
10
+ constructor(options: SchemaExtractArgs<T>);
11
+ extract(nodes: BaseNode[]): Promise<Record<string, any>[]>;
12
+ }
13
+ //# sourceMappingURL=schema.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/schema.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAE7B,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAE1C,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AAEvC,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,SAAS,CAAC;AAEjD,qBAAa,eAAe,CAAC,CAAC,SAAS,CAAC,CAAC,OAAO,CAAE,SAAQ,aAAa;IACrE,OAAO,CAAC,MAAM,CAAI;IAClB,OAAO,CAAC,GAAG,CAAC,CAAkD;IAC9D,OAAO,CAAC,YAAY,CAAC,CAAS;IAC9B,OAAO,CAAC,WAAW,CAAC,CAAS;gBAEjB,OAAO,EAAE,iBAAiB,CAAC,CAAC,CAAC;IAQnC,OAAO,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,CAAC;CA6BjE"}
@@ -1,4 +1,5 @@
1
1
  import type { MastraLanguageModel, MastraLegacyLanguageModel } from '@mastra/core/agent';
2
+ import type { z } from 'zod';
2
3
  import type { KeywordExtractPrompt, QuestionExtractPrompt, SummaryPrompt, TitleExtractorPrompt, TitleCombinePrompt } from '../prompts/index.js';
3
4
  export type KeywordExtractArgs = {
4
5
  llm?: MastraLegacyLanguageModel | MastraLanguageModel;
@@ -22,6 +23,12 @@ export type TitleExtractorsArgs = {
22
23
  nodeTemplate?: TitleExtractorPrompt['template'];
23
24
  combineTemplate?: TitleCombinePrompt['template'];
24
25
  };
26
+ export type SchemaExtractArgs<T extends z.ZodType = z.ZodType> = {
27
+ schema: T;
28
+ llm?: MastraLegacyLanguageModel | MastraLanguageModel;
29
+ instructions?: string;
30
+ metadataKey?: string;
31
+ };
25
32
  export declare const STRIP_REGEX: RegExp;
26
33
  export declare const baseLLM: MastraLegacyLanguageModel | MastraLanguageModel;
27
34
  //# sourceMappingURL=types.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/types.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AACzF,OAAO,KAAK,EACV,oBAAoB,EACpB,qBAAqB,EACrB,aAAa,EACb,oBAAoB,EACpB,kBAAkB,EACnB,MAAM,YAAY,CAAC;AAEpB,MAAM,MAAM,kBAAkB,GAAG;IAC/B,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,cAAc,CAAC,EAAE,oBAAoB,CAAC,UAAU,CAAC,CAAC;CACnD,CAAC;AAEF,MAAM,MAAM,yBAAyB,GAAG;IACtC,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,cAAc,CAAC,EAAE,qBAAqB,CAAC,UAAU,CAAC,CAAC;IACnD,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG;IAC/B,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,cAAc,CAAC,EAAE,aAAa,CAAC,UAAU,CAAC,CAAC;CAC5C,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG;IAChC,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,YAAY,CAAC,EAAE,oBAAoB,CAAC,UAAU,CAAC,CAAC;IAChD,eAAe,CAAC,EAAE,kBAAkB,CAAC,UAAU,CAAC,CAAC;CAClD,CAAC;AAEF,eAAO,MAAM,WAAW,QAAmB,CAAC;AAG5C,eAAO,MAAM,OAAO,EAAE,yBAAyB,GAAG,mBAAsC,CAAC"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/types.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AACzF,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAC7B,OAAO,KAAK,EACV,oBAAoB,EACpB,qBAAqB,EACrB,aAAa,EACb,oBAAoB,EACpB,kBAAkB,EACnB,MAAM,YAAY,CAAC;AAEpB,MAAM,MAAM,kBAAkB,GAAG;IAC/B,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,cAAc,CAAC,EAAE,oBAAoB,CAAC,UAAU,CAAC,CAAC;CACnD,CAAC;AAEF,MAAM,MAAM,yBAAyB,GAAG;IACtC,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,cAAc,CAAC,EAAE,qBAAqB,CAAC,UAAU,CAAC,CAAC;IACnD,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG;IAC/B,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,cAAc,CAAC,EAAE,aAAa,CAAC,UAAU,CAAC,CAAC;CAC5C,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG;IAChC,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,YAAY,CAAC,EAAE,oBAAoB,CAAC,UAAU,CAAC,CAAC;IAChD,eAAe,CAAC,EAAE,kBAAkB,CAAC,UAAU,CAAC,CAAC;CAClD,CAAC;AAEF,MAAM,MAAM,iBAAiB,CAAC,CAAC,SAAS,CAAC,CAAC,OAAO,GAAG,CAAC,CAAC,OAAO,IAAI;IAC/D,MAAM,EAAE,CAAC,CAAC;IACV,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB,CAAC;AAEF,eAAO,MAAM,WAAW,QAAmB,CAAC;AAG5C,eAAO,MAAM,OAAO,EAAE,yBAAyB,GAAG,mBAAsC,CAAC"}
@@ -7,7 +7,6 @@ export declare class SentenceTransformer extends TextTransformer {
7
7
  protected sentenceEnders: string[];
8
8
  protected fallbackToWords: boolean;
9
9
  protected fallbackToCharacters: boolean;
10
- protected keepSeparator: boolean | 'start' | 'end';
11
10
  constructor(options: SentenceChunkOptions);
12
11
  private detectSentenceBoundaries;
13
12
  private isRealSentenceBoundary;
@@ -1 +1 @@
1
- {"version":3,"file":"sentence.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/sentence.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,UAAU,CAAC;AACrD,OAAO,EAAE,eAAe,EAAE,MAAM,QAAQ,CAAC;AAEzC,qBAAa,mBAAoB,SAAQ,eAAe;IACtD,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,UAAU,EAAE,MAAM,CAAC;IAC7B,SAAS,CAAC,cAAc,EAAE,MAAM,EAAE,CAAC;IACnC,SAAS,CAAC,eAAe,EAAE,OAAO,CAAC;IACnC,SAAS,CAAC,oBAAoB,EAAE,OAAO,CAAC;IACxC,SAAS,CAAC,aAAa,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;gBAEvC,OAAO,EAAE,oBAAoB;IAuBzC,OAAO,CAAC,wBAAwB;IA+BhC,OAAO,CAAC,sBAAsB;IAqB9B,OAAO,CAAC,oBAAoB;IA8B5B;;OAEG;IACH,OAAO,CAAC,wBAAwB;IAsDhC;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAqB/B,OAAO,CAAC,sBAAsB;IAmC9B,OAAO,CAAC,2BAA2B;IAsBnC,OAAO,CAAC,wBAAwB;IA4BhC,OAAO,CAAC,kBAAkB;IAqB1B,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;CAShD"}
1
+ {"version":3,"file":"sentence.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/sentence.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,UAAU,CAAC;AACrD,OAAO,EAAE,eAAe,EAAE,MAAM,QAAQ,CAAC;AAEzC,qBAAa,mBAAoB,SAAQ,eAAe;IACtD,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,UAAU,EAAE,MAAM,CAAC;IAC7B,SAAS,CAAC,cAAc,EAAE,MAAM,EAAE,CAAC;IACnC,SAAS,CAAC,eAAe,EAAE,OAAO,CAAC;IACnC,SAAS,CAAC,oBAAoB,EAAE,OAAO,CAAC;gBAE5B,OAAO,EAAE,oBAAoB;IAsBzC,OAAO,CAAC,wBAAwB;IA+BhC,OAAO,CAAC,sBAAsB;IAqB9B,OAAO,CAAC,oBAAoB;IA8B5B;;OAEG;IACH,OAAO,CAAC,wBAAwB;IAsDhC;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAqB/B,OAAO,CAAC,sBAAsB;IAmC9B,OAAO,CAAC,2BAA2B;IAsBnC,OAAO,CAAC,wBAAwB;IA4BhC,OAAO,CAAC,kBAAkB;IAqB1B,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;CAShD"}
@@ -5,10 +5,10 @@ export declare abstract class TextTransformer implements Transformer {
5
5
  protected maxSize: number;
6
6
  protected overlap: number;
7
7
  protected lengthFunction: (text: string) => number;
8
- protected keepSeparator: boolean | 'start' | 'end';
8
+ protected separatorPosition?: 'start' | 'end';
9
9
  protected addStartIndex: boolean;
10
10
  protected stripWhitespace: boolean;
11
- constructor({ maxSize, overlap, lengthFunction, keepSeparator, addStartIndex, stripWhitespace, }: BaseChunkOptions);
11
+ constructor({ maxSize, overlap, lengthFunction, separatorPosition, addStartIndex, stripWhitespace, }: BaseChunkOptions);
12
12
  setAddStartIndex(value: boolean): void;
13
13
  abstract splitText({ text }: {
14
14
  text: string;
@@ -1 +1 @@
1
- {"version":3,"file":"text.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/text.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAErC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAEjD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAEjD,8BAAsB,eAAgB,YAAW,WAAW;IAC1D,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,cAAc,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;IACnD,SAAS,CAAC,aAAa,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;IACnD,SAAS,CAAC,aAAa,EAAE,OAAO,CAAC;IACjC,SAAS,CAAC,eAAe,EAAE,OAAO,CAAC;gBAEvB,EACV,OAAc,EACd,OAAa,EACb,cAA8C,EAC9C,aAAqB,EACrB,aAAqB,EACrB,eAAsB,GACvB,EAAE,gBAAgB;IAYnB,gBAAgB,CAAC,KAAK,EAAE,OAAO,GAAG,IAAI;IAItC,QAAQ,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;IAExD,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA4B/E,cAAc,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;IAUjD,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;IAYrD,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IAQpE,SAAS,CAAC,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,EAAE;CA4DrE"}
1
+ {"version":3,"file":"text.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/text.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAErC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAEjD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAEjD,8BAAsB,eAAgB,YAAW,WAAW;IAC1D,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,cAAc,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;IACnD,SAAS,CAAC,iBAAiB,CAAC,EAAE,OAAO,GAAG,KAAK,CAAC;IAC9C,SAAS,CAAC,aAAa,EAAE,OAAO,CAAC;IACjC,SAAS,CAAC,eAAe,EAAE,OAAO,CAAC;gBAEvB,EACV,OAAc,EACd,OAAa,EACb,cAA8C,EAC9C,iBAAiB,EACjB,aAAqB,EACrB,eAAsB,GACvB,EAAE,gBAAgB;IAYnB,gBAAgB,CAAC,KAAK,EAAE,OAAO,GAAG,IAAI;IAItC,QAAQ,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;IAExD,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA4B/E,cAAc,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;IAUjD,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;IAYrD,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IAQpE,SAAS,CAAC,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,EAAE;CA4DrE"}
@@ -1,5 +1,5 @@
1
1
  import type { TiktokenEncoding, TiktokenModel } from 'js-tiktoken';
2
- import type { TitleExtractorsArgs, SummaryExtractArgs, QuestionAnswerExtractArgs, KeywordExtractArgs } from './extractors/index.js';
2
+ import type { TitleExtractorsArgs, SummaryExtractArgs, QuestionAnswerExtractArgs, KeywordExtractArgs, SchemaExtractArgs } from './extractors/index.js';
3
3
  export declare enum Language {
4
4
  CPP = "cpp",
5
5
  GO = "go",
@@ -33,12 +33,13 @@ export type ExtractParams = {
33
33
  summary?: SummaryExtractArgs | boolean;
34
34
  questions?: QuestionAnswerExtractArgs | boolean;
35
35
  keywords?: KeywordExtractArgs | boolean;
36
+ schema?: SchemaExtractArgs;
36
37
  };
37
38
  export type BaseChunkOptions = {
38
39
  maxSize?: number;
39
40
  overlap?: number;
40
41
  lengthFunction?: (text: string) => number;
41
- keepSeparator?: boolean | 'start' | 'end';
42
+ separatorPosition?: 'start' | 'end';
42
43
  addStartIndex?: boolean;
43
44
  stripWhitespace?: boolean;
44
45
  };
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/document/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACnE,OAAO,KAAK,EACV,mBAAmB,EACnB,kBAAkB,EAClB,yBAAyB,EACzB,kBAAkB,EACnB,MAAM,cAAc,CAAC;AAEtB,oBAAY,QAAQ;IAClB,GAAG,QAAQ;IACX,EAAE,OAAO;IACT,IAAI,SAAS;IACb,MAAM,WAAW;IACjB,EAAE,OAAO;IACT,EAAE,OAAO;IACT,GAAG,QAAQ;IACX,KAAK,UAAU;IACf,MAAM,WAAW;IACjB,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,IAAI,SAAS;IACb,KAAK,UAAU;IACf,KAAK,UAAU;IACf,QAAQ,aAAa;IACrB,KAAK,UAAU;IACf,IAAI,SAAS;IACb,GAAG,QAAQ;IACX,MAAM,WAAW;IACjB,KAAK,UAAU;IACf,CAAC,MAAM;IACP,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,OAAO,YAAY;IACnB,MAAM,WAAW;IACjB,UAAU,eAAe;CAC1B;AAED,MAAM,MAAM,aAAa,GAAG;IAC1B,KAAK,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC;IACtC,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC;IACvC,SAAS,CAAC,EAAE,yBAAyB,GAAG,OAAO,CAAC;IAChD,QAAQ,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC;CACzC,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG;IAC7B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;IAC1C,aAAa,CAAC,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;IAC1C,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,eAAe,CAAC,EAAE,OAAO,CAAC;CAC3B,CAAC;AAEF,MAAM,MAAM,qBAAqB,GAAG,gBAAgB,GAAG;IACrD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B,CAAC;AAEF,MAAM,MAAM,qBAAqB,GAAG,gBAAgB,GAAG;IACrD,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,QAAQ,CAAC,EAAE,QAAQ,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,iBAAiB,GAAG,gBAAgB,GAAG;IACjD,YAAY,CAAC,EAAE,gBAAgB,CAAC;IAChC,SAAS,CAAC,EAAE,aAAa,CAAC;IAC1B,cAAc,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;IACrC,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;CACzC,CAAC;AAEF,MAAM,MAAM,oBAAoB,GAAG,gBAAgB,GAAG;IACpD,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB,CAAC;AAEF,MAAM,MAAM,4BAA4B,GAAG,gBAAgB,GAAG;IAC5D,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,gBAAgB,CAAC;IAChC,SAAS,CAAC,EAAE,aAAa,CAAC;IAC1B,cAAc,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;IACrC,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;CACzC,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG,gBAAgB,GAC7C,CACI;IAAE,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAAC,QAAQ,CAAC,EAAE,KAAK,CAAC;IAAC,cAAc,CAAC,EAAE,OAAO,CAAA;CAAE,GAC3E;IAAE,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAAC,OAAO,CAAC,EAAE,KAAK,CAAA;CAAE,CACpD,GAAG;IAAE,cAAc,CAAC,EAAE,OAAO,CAAA;CAAE,CAAC;AAEnC,MAAM,MAAM,gBAAgB,GAAG,gBAAgB,GAAG;IAChD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB,CAAC;AAEF,MAAM,MAAM,iBAAiB,GAAG,gBAAgB,GAAG,EAAE,CAAC;AAEtD,MAAM,MAAM,oBAAoB,GAAG,gBAAgB,GAAG;IACpD,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,oBAAoB,CAAC,EAAE,OAAO,CAAC;CAChC,CAAC;AAEF,MAAM,MAAM,eAAe,GAAG;IAC5B,SAAS,EAAE,qBAAqB,CAAC;IACjC,SAAS,EAAE,qBAAqB,CAAC;IACjC,KAAK,EAAE,iBAAiB,CAAC;IACzB,QAAQ,EAAE,oBAAoB,CAAC;IAC/B,IAAI,EAAE,gBAAgB,CAAC;IACvB,IAAI,EAAE,gBAAgB,CAAC;IACvB,KAAK,EAAE,iBAAiB,CAAC;IACzB,QAAQ,EAAE,oBAAoB,CAAC;IAC/B,mBAAmB,EAAE,4BAA4B,CAAC;CACnD,CAAC;AAEF,MAAM,MAAM,aAAa,GACrB,WAAW,GACX,WAAW,GACX,OAAO,GACP,UAAU,GACV,MAAM,GACN,MAAM,GACN,OAAO,GACP,UAAU,GACV,mBAAmB,CAAC;AAExB,MAAM,MAAM,WAAW,GACnB,CAAC;IAAE,QAAQ,CAAC,EAAE,WAAW,CAAA;CAAE,GAAG,qBAAqB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GAClF,CAAC;IAAE,QAAQ,EAAE,WAAW,CAAA;CAAE,GAAG,qBAAqB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACjF,CAAC;IAAE,QAAQ,EAAE,OAAO,CAAA;CAAE,GAAG,iBAAiB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACzE,CAAC;IAAE,QAAQ,EAAE,UAAU,CAAA;CAAE,GAAG,oBAAoB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GAC/E,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAA;CAAE,GAAG,gBAAgB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACvE,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAA;CAAE,GAAG,gBAAgB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACvE,CAAC;IAAE,QAAQ,EAAE,OAAO,CAAA;CAAE,GAAG,iBAAiB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACzE,CAAC;IAAE,QAAQ,EAAE,UAAU,CAAA;CAAE,GAAG,oBAAoB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GAC/E,CAAC;IAAE,QAAQ,EAAE,mBAAmB,CAAA;CAAE,GAAG,4BAA4B,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,CAAC"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/document/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACnE,OAAO,KAAK,EACV,mBAAmB,EACnB,kBAAkB,EAClB,yBAAyB,EACzB,kBAAkB,EAClB,iBAAiB,EAClB,MAAM,cAAc,CAAC;AAEtB,oBAAY,QAAQ;IAClB,GAAG,QAAQ;IACX,EAAE,OAAO;IACT,IAAI,SAAS;IACb,MAAM,WAAW;IACjB,EAAE,OAAO;IACT,EAAE,OAAO;IACT,GAAG,QAAQ;IACX,KAAK,UAAU;IACf,MAAM,WAAW;IACjB,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,IAAI,SAAS;IACb,KAAK,UAAU;IACf,KAAK,UAAU;IACf,QAAQ,aAAa;IACrB,KAAK,UAAU;IACf,IAAI,SAAS;IACb,GAAG,QAAQ;IACX,MAAM,WAAW;IACjB,KAAK,UAAU;IACf,CAAC,MAAM;IACP,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,OAAO,YAAY;IACnB,MAAM,WAAW;IACjB,UAAU,eAAe;CAC1B;AAED,MAAM,MAAM,aAAa,GAAG;IAC1B,KAAK,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC;IACtC,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC;IACvC,SAAS,CAAC,EAAE,yBAAyB,GAAG,OAAO,CAAC;IAChD,QAAQ,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC;IACxC,MAAM,CAAC,EAAE,iBAAiB,CAAC;CAC5B,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG;IAC7B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;IAC1C,iBAAiB,CAAC,EAAE,OAAO,GAAG,KAAK,CAAC;IACpC,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,eAAe,CAAC,EAAE,OAAO,CAAC;CAC3B,CAAC;AAEF,MAAM,MAAM,qBAAqB,GAAG,gBAAgB,GAAG;IACrD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B,CAAC;AAEF,MAAM,MAAM,qBAAqB,GAAG,gBAAgB,GAAG;IACrD,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,QAAQ,CAAC,EAAE,QAAQ,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,iBAAiB,GAAG,gBAAgB,GAAG;IACjD,YAAY,CAAC,EAAE,gBAAgB,CAAC;IAChC,SAAS,CAAC,EAAE,aAAa,CAAC;IAC1B,cAAc,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;IACrC,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;CACzC,CAAC;AAEF,MAAM,MAAM,oBAAoB,GAAG,gBAAgB,GAAG;IACpD,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB,CAAC;AAEF,MAAM,MAAM,4BAA4B,GAAG,gBAAgB,GAAG;IAC5D,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,gBAAgB,CAAC;IAChC,SAAS,CAAC,EAAE,aAAa,CAAC;IAC1B,cAAc,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;IACrC,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;CACzC,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG,gBAAgB,GAC7C,CACI;IAAE,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAAC,QAAQ,CAAC,EAAE,KAAK,CAAC;IAAC,cAAc,CAAC,EAAE,OAAO,CAAA;CAAE,GAC3E;IAAE,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAAC,OAAO,CAAC,EAAE,KAAK,CAAA;CAAE,CACpD,GAAG;IAAE,cAAc,CAAC,EAAE,OAAO,CAAA;CAAE,CAAC;AAEnC,MAAM,MAAM,gBAAgB,GAAG,gBAAgB,GAAG;IAChD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB,CAAC;AAEF,MAAM,MAAM,iBAAiB,GAAG,gBAAgB,GAAG,EAAE,CAAC;AAEtD,MAAM,MAAM,oBAAoB,GAAG,gBAAgB,GAAG;IACpD,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,oBAAoB,CAAC,EAAE,OAAO,CAAC;CAChC,CAAC;AAEF,MAAM,MAAM,eAAe,GAAG;IAC5B,SAAS,EAAE,qBAAqB,CAAC;IACjC,SAAS,EAAE,qBAAqB,CAAC;IACjC,KAAK,EAAE,iBAAiB,CAAC;IACzB,QAAQ,EAAE,oBAAoB,CAAC;IAC/B,IAAI,EAAE,gBAAgB,CAAC;IACvB,IAAI,EAAE,gBAAgB,CAAC;IACvB,KAAK,EAAE,iBAAiB,CAAC;IACzB,QAAQ,EAAE,oBAAoB,CAAC;IAC/B,mBAAmB,EAAE,4BAA4B,CAAC;CACnD,CAAC;AAEF,MAAM,MAAM,aAAa,GACrB,WAAW,GACX,WAAW,GACX,OAAO,GACP,UAAU,GACV,MAAM,GACN,MAAM,GACN,OAAO,GACP,UAAU,GACV,mBAAmB,CAAC;AAExB,MAAM,MAAM,WAAW,GACnB,CAAC;IAAE,QAAQ,CAAC,EAAE,WAAW,CAAA;CAAE,GAAG,qBAAqB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GAClF,CAAC;IAAE,QAAQ,EAAE,WAAW,CAAA;CAAE,GAAG,qBAAqB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACjF,CAAC;IAAE,QAAQ,EAAE,OAAO,CAAA;CAAE,GAAG,iBAAiB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACzE,CAAC;IAAE,QAAQ,EAAE,UAAU,CAAA;CAAE,GAAG,oBAAoB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GAC/E,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAA;CAAE,GAAG,gBAAgB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACvE,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAA;CAAE,GAAG,gBAAgB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACvE,CAAC;IAAE,QAAQ,EAAE,OAAO,CAAA;CAAE,GAAG,iBAAiB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACzE,CAAC;IAAE,QAAQ,EAAE,UAAU,CAAA;CAAE,GAAG,oBAAoB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GAC/E,CAAC;IAAE,QAAQ,EAAE,mBAAmB,CAAA;CAAE,GAAG,4BAA4B,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,CAAC"}
package/dist/index.cjs CHANGED
@@ -4450,6 +4450,44 @@ var KeywordExtractor = class extends BaseExtractor {
4450
4450
  return results;
4451
4451
  }
4452
4452
  };
4453
+ var SchemaExtractor = class extends BaseExtractor {
4454
+ schema;
4455
+ llm;
4456
+ instructions;
4457
+ metadataKey;
4458
+ constructor(options) {
4459
+ super();
4460
+ this.schema = options.schema;
4461
+ this.llm = options.llm;
4462
+ this.instructions = options.instructions;
4463
+ this.metadataKey = options.metadataKey;
4464
+ }
4465
+ async extract(nodes) {
4466
+ const agent$1 = new agent.Agent({
4467
+ name: "schema-extractor",
4468
+ id: "schema-extractor",
4469
+ instructions: this.instructions ?? "Extract structured data from the provided text.",
4470
+ model: this.llm ?? baseLLM
4471
+ });
4472
+ const results = await Promise.all(
4473
+ nodes.map(async (node) => {
4474
+ try {
4475
+ const result = await agent$1.generate([{ role: "user", content: node.getContent() }], {
4476
+ structuredOutput: { schema: this.schema }
4477
+ });
4478
+ if (this.metadataKey) {
4479
+ return { [this.metadataKey]: result.object };
4480
+ }
4481
+ return result.object;
4482
+ } catch (error) {
4483
+ console.error("Schema extraction failed:", error);
4484
+ return {};
4485
+ }
4486
+ })
4487
+ );
4488
+ return results;
4489
+ }
4490
+ };
4453
4491
 
4454
4492
  // src/document/types.ts
4455
4493
  var Language = /* @__PURE__ */ ((Language2) => {
@@ -4487,14 +4525,14 @@ var TextTransformer = class {
4487
4525
  maxSize;
4488
4526
  overlap;
4489
4527
  lengthFunction;
4490
- keepSeparator;
4528
+ separatorPosition;
4491
4529
  addStartIndex;
4492
4530
  stripWhitespace;
4493
4531
  constructor({
4494
4532
  maxSize = 4e3,
4495
4533
  overlap = 200,
4496
4534
  lengthFunction = (text) => text.length,
4497
- keepSeparator = false,
4535
+ separatorPosition,
4498
4536
  addStartIndex = false,
4499
4537
  stripWhitespace = true
4500
4538
  }) {
@@ -4504,7 +4542,7 @@ var TextTransformer = class {
4504
4542
  this.maxSize = maxSize;
4505
4543
  this.overlap = overlap;
4506
4544
  this.lengthFunction = lengthFunction;
4507
- this.keepSeparator = keepSeparator;
4545
+ this.separatorPosition = separatorPosition;
4508
4546
  this.addStartIndex = addStartIndex;
4509
4547
  this.stripWhitespace = stripWhitespace;
4510
4548
  }
@@ -4610,11 +4648,11 @@ var TextTransformer = class {
4610
4648
  };
4611
4649
 
4612
4650
  // src/document/transformers/character.ts
4613
- function splitTextWithRegex(text, separator, keepSeparator) {
4651
+ function splitTextWithRegex(text, separator, separatorPosition) {
4614
4652
  if (!separator) {
4615
4653
  return text.split("");
4616
4654
  }
4617
- if (!keepSeparator) {
4655
+ if (!separatorPosition) {
4618
4656
  return text.split(new RegExp(separator)).filter((s) => s !== "");
4619
4657
  }
4620
4658
  if (!text) {
@@ -4622,7 +4660,7 @@ function splitTextWithRegex(text, separator, keepSeparator) {
4622
4660
  }
4623
4661
  const splits = text.split(new RegExp(`(${separator})`));
4624
4662
  const result = [];
4625
- if (keepSeparator === "end") {
4663
+ if (separatorPosition === "end") {
4626
4664
  for (let i = 0; i < splits.length - 1; i += 2) {
4627
4665
  if (i + 1 < splits.length) {
4628
4666
  const chunk = splits[i] + (splits[i + 1] || "");
@@ -4654,7 +4692,7 @@ var CharacterTransformer = class extends TextTransformer {
4654
4692
  }
4655
4693
  splitText({ text }) {
4656
4694
  const separator = this.isSeparatorRegex ? this.separator : this.separator.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
4657
- const initialSplits = splitTextWithRegex(text, separator, this.keepSeparator);
4695
+ const initialSplits = splitTextWithRegex(text, separator, this.separatorPosition);
4658
4696
  const chunks = [];
4659
4697
  for (const split of initialSplits) {
4660
4698
  if (this.lengthFunction(split) <= this.maxSize) {
@@ -4709,9 +4747,9 @@ var RecursiveCharacterTransformer = class _RecursiveCharacterTransformer extends
4709
4747
  }
4710
4748
  }
4711
4749
  const _separator = this.isSeparatorRegex ? separator : separator?.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
4712
- const splits = splitTextWithRegex(text, _separator, this.keepSeparator);
4750
+ const splits = splitTextWithRegex(text, _separator, this.separatorPosition);
4713
4751
  const goodSplits = [];
4714
- const mergeSeparator = this.keepSeparator ? "" : separator;
4752
+ const mergeSeparator = this.separatorPosition ? "" : separator;
4715
4753
  for (const s of splits) {
4716
4754
  if (this.lengthFunction(s) < this.maxSize) {
4717
4755
  goodSplits.push(s);
@@ -5857,7 +5895,6 @@ var SentenceTransformer = class extends TextTransformer {
5857
5895
  sentenceEnders;
5858
5896
  fallbackToWords;
5859
5897
  fallbackToCharacters;
5860
- keepSeparator;
5861
5898
  constructor(options) {
5862
5899
  const parentOverlap = Math.min(options.overlap ?? 0, options.maxSize - 1);
5863
5900
  const baseOptions = {
@@ -5872,7 +5909,6 @@ var SentenceTransformer = class extends TextTransformer {
5872
5909
  this.sentenceEnders = options.sentenceEnders ?? [".", "!", "?"];
5873
5910
  this.fallbackToWords = options.fallbackToWords ?? true;
5874
5911
  this.fallbackToCharacters = options.fallbackToCharacters ?? true;
5875
- this.keepSeparator = options.keepSeparator ?? false;
5876
5912
  this.overlap = options.overlap ?? 0;
5877
5913
  }
5878
5914
  detectSentenceBoundaries(text) {
@@ -6173,8 +6209,8 @@ var TokenTransformer = class _TokenTransformer extends TextTransformer {
6173
6209
  var baseChunkOptionsSchema = zod.z.object({
6174
6210
  maxSize: zod.z.number().positive().optional(),
6175
6211
  overlap: zod.z.number().min(0).optional(),
6176
- lengthFunction: zod.z.function().optional(),
6177
- keepSeparator: zod.z.union([zod.z.boolean(), zod.z.literal("start"), zod.z.literal("end")]).optional(),
6212
+ lengthFunction: zod.z.optional(zod.z.function()),
6213
+ separatorPosition: zod.z.enum(["start", "end"]).optional(),
6178
6214
  addStartIndex: zod.z.boolean().optional(),
6179
6215
  stripWhitespace: zod.z.boolean().optional()
6180
6216
  });
@@ -6269,8 +6305,11 @@ var MDocument = class _MDocument {
6269
6305
  });
6270
6306
  this.type = type;
6271
6307
  }
6272
- async extractMetadata({ title, summary, questions, keywords }) {
6308
+ async extractMetadata({ title, summary, questions, keywords, schema }) {
6273
6309
  const transformations = [];
6310
+ if (schema) {
6311
+ transformations.push(new SchemaExtractor(schema));
6312
+ }
6274
6313
  if (typeof summary !== "undefined") {
6275
6314
  transformations.push(new SummaryExtractor(typeof summary === "boolean" ? {} : summary));
6276
6315
  }
@@ -6421,7 +6460,7 @@ var MDocument = class _MDocument {
6421
6460
  const textSplitter = new RecursiveCharacterTransformer({
6422
6461
  maxSize: options.maxSize,
6423
6462
  overlap: options.overlap,
6424
- keepSeparator: options.keepSeparator,
6463
+ separatorPosition: options.separatorPosition,
6425
6464
  addStartIndex: options.addStartIndex,
6426
6465
  stripWhitespace: options.stripWhitespace
6427
6466
  });
@@ -6437,7 +6476,7 @@ var MDocument = class _MDocument {
6437
6476
  const textSplitter = new RecursiveCharacterTransformer({
6438
6477
  maxSize: options.maxSize,
6439
6478
  overlap: options.overlap,
6440
- keepSeparator: options.keepSeparator,
6479
+ separatorPosition: options.separatorPosition,
6441
6480
  addStartIndex: options.addStartIndex,
6442
6481
  stripWhitespace: options.stripWhitespace
6443
6482
  });
@@ -6500,7 +6539,7 @@ var MDocument = class _MDocument {
6500
6539
  sentenceEnders: options?.sentenceEnders,
6501
6540
  fallbackToWords: options?.fallbackToWords,
6502
6541
  fallbackToCharacters: options?.fallbackToCharacters,
6503
- keepSeparator: options?.keepSeparator,
6542
+ separatorPosition: options?.separatorPosition,
6504
6543
  lengthFunction: options?.lengthFunction,
6505
6544
  addStartIndex: options?.addStartIndex,
6506
6545
  stripWhitespace: options?.stripWhitespace