@mastra/rag 2.0.0-beta.5 → 2.0.0-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +157 -0
- package/dist/docs/README.md +1 -1
- package/dist/docs/SKILL.md +1 -1
- package/dist/docs/SOURCE_MAP.json +1 -1
- package/dist/docs/rag/02-chunking-and-embedding.md +0 -1
- package/dist/docs/rag/03-retrieval.md +5 -6
- package/dist/docs/rag/05-reference.md +22 -8
- package/dist/document/document.d.ts +1 -1
- package/dist/document/document.d.ts.map +1 -1
- package/dist/document/extractors/index.d.ts +2 -1
- package/dist/document/extractors/index.d.ts.map +1 -1
- package/dist/document/extractors/schema.d.ts +13 -0
- package/dist/document/extractors/schema.d.ts.map +1 -0
- package/dist/document/extractors/types.d.ts +7 -0
- package/dist/document/extractors/types.d.ts.map +1 -1
- package/dist/document/transformers/sentence.d.ts +0 -1
- package/dist/document/transformers/sentence.d.ts.map +1 -1
- package/dist/document/transformers/text.d.ts +2 -2
- package/dist/document/transformers/text.d.ts.map +1 -1
- package/dist/document/types.d.ts +3 -2
- package/dist/document/types.d.ts.map +1 -1
- package/dist/index.cjs +56 -17
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +56 -17
- package/dist/index.js.map +1 -1
- package/dist/tools/document-chunker.d.ts +3 -2
- package/dist/tools/document-chunker.d.ts.map +1 -1
- package/dist/tools/graph-rag.d.ts +2 -2
- package/dist/tools/graph-rag.d.ts.map +1 -1
- package/dist/tools/vector-query.d.ts +2 -2
- package/dist/tools/vector-query.d.ts.map +1 -1
- package/dist/utils/tool-schemas.d.ts +2 -2
- package/dist/utils/tool-schemas.d.ts.map +1 -1
- package/package.json +2 -3
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,162 @@
|
|
|
1
1
|
# @mastra/rag
|
|
2
2
|
|
|
3
|
+
## 2.0.0-beta.7
|
|
4
|
+
|
|
5
|
+
### Major Changes
|
|
6
|
+
|
|
7
|
+
- Refactor workflow and tool types to remove Zod-specific constraints ([#11814](https://github.com/mastra-ai/mastra/pull/11814))
|
|
8
|
+
|
|
9
|
+
Removed Zod-specific type constraints across all workflow implementations and tool types, replacing them with generic types. This ensures type consistency across default, evented, and inngest workflows while preparing for Zod v4 migration.
|
|
10
|
+
|
|
11
|
+
**Workflow Changes:**
|
|
12
|
+
- Removed `z.ZodObject<any>` and `z.ZodType<any>` constraints from all workflow generic types
|
|
13
|
+
- Updated method signatures to use `TInput` and `TState` directly instead of `z.infer<TInput>` and `z.infer<TState>`
|
|
14
|
+
- Aligned conditional types across all workflow implementations using `TInput extends unknown` pattern
|
|
15
|
+
- Fixed `TSteps` generic to properly use `TEngineType` instead of `any`
|
|
16
|
+
|
|
17
|
+
**Tool Changes:**
|
|
18
|
+
- Removed Zod schema constraints from `ToolExecutionContext` and related interfaces
|
|
19
|
+
- Simplified type parameters from `TSuspendSchema extends ZodLikeSchema` to `TSuspend` and `TResume`
|
|
20
|
+
- Updated tool execution context types to use generic types
|
|
21
|
+
|
|
22
|
+
**Type Utilities:**
|
|
23
|
+
- Refactored type helpers to work with generic schemas instead of Zod-specific types
|
|
24
|
+
- Updated type extraction utilities for better compatibility
|
|
25
|
+
|
|
26
|
+
This change maintains backward compatibility while improving type consistency and preparing for Zod v4 support across all affected packages.
|
|
27
|
+
|
|
28
|
+
### Minor Changes
|
|
29
|
+
|
|
30
|
+
- Add schema-driven metadata extraction with Zod support ([#11833](https://github.com/mastra-ai/mastra/pull/11833))
|
|
31
|
+
|
|
32
|
+
Introduces a new `SchemaExtractor` that enables extraction of custom structured metadata from document chunks using user-defined Zod schemas. This allows for domain-specific metadata structures (e.g., product details, legal entities, sentiment analysis) to be reliably extracted via LLM structured output.
|
|
33
|
+
- Extract domain-specific metadata using your own Zod schemas (e.g., product details, legal entities, sentiment)
|
|
34
|
+
- Customize extraction behavior with your own LLM model and instructions
|
|
35
|
+
- Organize extracted data by nesting it under custom metadata keys
|
|
36
|
+
- Existing extractors (title, summary, keywords, questions) remain unchanged and fully compatible
|
|
37
|
+
|
|
38
|
+
**Before** (limited to built-in extractors):
|
|
39
|
+
|
|
40
|
+
```typescript
|
|
41
|
+
await document.extractMetadata({
|
|
42
|
+
extract: {
|
|
43
|
+
title: true,
|
|
44
|
+
summary: true,
|
|
45
|
+
},
|
|
46
|
+
});
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
**After** (with custom Zod schema):
|
|
50
|
+
|
|
51
|
+
```typescript
|
|
52
|
+
import { z } from 'zod';
|
|
53
|
+
|
|
54
|
+
const productSchema = z.object({
|
|
55
|
+
name: z.string(),
|
|
56
|
+
price: z.number(),
|
|
57
|
+
category: z.string(),
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
await document.extractMetadata({
|
|
61
|
+
extract: {
|
|
62
|
+
title: true,
|
|
63
|
+
schema: {
|
|
64
|
+
schema: productSchema,
|
|
65
|
+
instructions: 'Extract product details from the document',
|
|
66
|
+
metadataKey: 'product',
|
|
67
|
+
},
|
|
68
|
+
},
|
|
69
|
+
});
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
With `metadataKey`, extracted data is nested under the key:
|
|
73
|
+
|
|
74
|
+
```typescript
|
|
75
|
+
{
|
|
76
|
+
title: "Product Document",
|
|
77
|
+
summary: "A comprehensive guide",
|
|
78
|
+
product: {
|
|
79
|
+
name: "Wireless Headphones",
|
|
80
|
+
price: 149.99,
|
|
81
|
+
category: "Electronics"
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Without `metadataKey`, extracted data is returned inline:
|
|
87
|
+
|
|
88
|
+
```typescript
|
|
89
|
+
{
|
|
90
|
+
title: "Product Document",
|
|
91
|
+
summary: "A comprehensive guide",
|
|
92
|
+
name: "Wireless Headphones",
|
|
93
|
+
price: 149.99,
|
|
94
|
+
category: "Electronics"
|
|
95
|
+
}
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Fixes #11799
|
|
99
|
+
|
|
100
|
+
- Renamed `keepSeparator` parameter to `separatorPosition` with a cleaner type. ([#11802](https://github.com/mastra-ai/mastra/pull/11802))
|
|
101
|
+
|
|
102
|
+
The `keepSeparator` parameter had a confusing `boolean | 'start' | 'end'` type where `true` was secretly an alias for `'start'`. The new `separatorPosition` parameter uses explicit `'start' | 'end'` values, and omitting the parameter discards the separator (previous default behavior).
|
|
103
|
+
|
|
104
|
+
**Migration**
|
|
105
|
+
|
|
106
|
+
```typescript
|
|
107
|
+
// Before
|
|
108
|
+
await doc.chunk({
|
|
109
|
+
strategy: 'character',
|
|
110
|
+
separator: '.',
|
|
111
|
+
keepSeparator: true, // or 'start'
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
await doc.chunk({
|
|
115
|
+
strategy: 'character',
|
|
116
|
+
separator: '.',
|
|
117
|
+
keepSeparator: 'end',
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
await doc.chunk({
|
|
121
|
+
strategy: 'character',
|
|
122
|
+
separator: '.',
|
|
123
|
+
keepSeparator: false, // or omit entirely
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
// After
|
|
127
|
+
await doc.chunk({
|
|
128
|
+
strategy: 'character',
|
|
129
|
+
separator: '.',
|
|
130
|
+
separatorPosition: 'start',
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
await doc.chunk({
|
|
134
|
+
strategy: 'character',
|
|
135
|
+
separator: '.',
|
|
136
|
+
separatorPosition: 'end',
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
await doc.chunk({
|
|
140
|
+
strategy: 'character',
|
|
141
|
+
separator: '.',
|
|
142
|
+
// omit separatorPosition to discard separator
|
|
143
|
+
});
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Patch Changes
|
|
147
|
+
|
|
148
|
+
- Updated dependencies [[`ebae12a`](https://github.com/mastra-ai/mastra/commit/ebae12a2dd0212e75478981053b148a2c246962d), [`c61a0a5`](https://github.com/mastra-ai/mastra/commit/c61a0a5de4904c88fd8b3718bc26d1be1c2ec6e7), [`69136e7`](https://github.com/mastra-ai/mastra/commit/69136e748e32f57297728a4e0f9a75988462f1a7), [`449aed2`](https://github.com/mastra-ai/mastra/commit/449aed2ba9d507b75bf93d427646ea94f734dfd1), [`eb648a2`](https://github.com/mastra-ai/mastra/commit/eb648a2cc1728f7678768dd70cd77619b448dab9), [`0131105`](https://github.com/mastra-ai/mastra/commit/0131105532e83bdcbb73352fc7d0879eebf140dc), [`9d5059e`](https://github.com/mastra-ai/mastra/commit/9d5059eae810829935fb08e81a9bb7ecd5b144a7), [`ef756c6`](https://github.com/mastra-ai/mastra/commit/ef756c65f82d16531c43f49a27290a416611e526), [`b00ccd3`](https://github.com/mastra-ai/mastra/commit/b00ccd325ebd5d9e37e34dd0a105caae67eb568f), [`3bdfa75`](https://github.com/mastra-ai/mastra/commit/3bdfa7507a91db66f176ba8221aa28dd546e464a), [`e770de9`](https://github.com/mastra-ai/mastra/commit/e770de941a287a49b1964d44db5a5763d19890a6), [`52e2716`](https://github.com/mastra-ai/mastra/commit/52e2716b42df6eff443de72360ae83e86ec23993), [`27b4040`](https://github.com/mastra-ai/mastra/commit/27b4040bfa1a95d92546f420a02a626b1419a1d6), [`610a70b`](https://github.com/mastra-ai/mastra/commit/610a70bdad282079f0c630e0d7bb284578f20151), [`8dc7f55`](https://github.com/mastra-ai/mastra/commit/8dc7f55900395771da851dc7d78d53ae84fe34ec), [`8379099`](https://github.com/mastra-ai/mastra/commit/8379099fc467af6bef54dd7f80c9bd75bf8bbddf), [`8c0ec25`](https://github.com/mastra-ai/mastra/commit/8c0ec25646c8a7df253ed1e5ff4863a0d3f1316c), [`ff4d9a6`](https://github.com/mastra-ai/mastra/commit/ff4d9a6704fc87b31a380a76ed22736fdedbba5a), [`69821ef`](https://github.com/mastra-ai/mastra/commit/69821ef806482e2c44e2197ac0b050c3fe3a5285), [`1ed5716`](https://github.com/mastra-ai/mastra/commit/1ed5716830867b3774c4a1b43cc0d82935f32b96), [`4186bdd`](https://github.com/mastra-ai/mastra/commit/4186bdd00731305726fa06adba0b076a1d50b49f), [`7aaf973`](https://github.com/mastra-ai/mastra/commit/7aaf973f83fbbe9521f1f9e7a4fd99b8de464617)]:
|
|
149
|
+
- @mastra/core@1.0.0-beta.22
|
|
150
|
+
|
|
151
|
+
## 2.0.0-beta.6
|
|
152
|
+
|
|
153
|
+
### Patch Changes
|
|
154
|
+
|
|
155
|
+
- Remove unnecessary `ai` package peer dependency to enable compatibility with AI SDK v6. The rag package doesn't directly use the ai package, so this peer dependency was unnecessarily constraining version compatibility. ([#11724](https://github.com/mastra-ai/mastra/pull/11724))
|
|
156
|
+
|
|
157
|
+
- Updated dependencies [[`08766f1`](https://github.com/mastra-ai/mastra/commit/08766f15e13ac0692fde2a8bd366c2e16e4321df), [`ae8baf7`](https://github.com/mastra-ai/mastra/commit/ae8baf7d8adcb0ff9dac11880400452bc49b33ff), [`cfabdd4`](https://github.com/mastra-ai/mastra/commit/cfabdd4aae7a726b706942d6836eeca110fb6267), [`a0e437f`](https://github.com/mastra-ai/mastra/commit/a0e437fac561b28ee719e0302d72b2f9b4c138f0), [`bec5efd`](https://github.com/mastra-ai/mastra/commit/bec5efde96653ccae6604e68c696d1bc6c1a0bf5), [`9eedf7d`](https://github.com/mastra-ai/mastra/commit/9eedf7de1d6e0022a2f4e5e9e6fe1ec468f9b43c)]:
|
|
158
|
+
- @mastra/core@1.0.0-beta.21
|
|
159
|
+
|
|
3
160
|
## 2.0.0-beta.5
|
|
4
161
|
|
|
5
162
|
### Minor Changes
|
package/dist/docs/README.md
CHANGED
package/dist/docs/SKILL.md
CHANGED
|
@@ -171,7 +171,7 @@ The Vector Query Tool supports database-specific configurations that enable you
|
|
|
171
171
|
> **Note:**
|
|
172
172
|
These configurations are for **query-time options** like namespaces, performance tuning, and filtering—not for database connection setup.
|
|
173
173
|
|
|
174
|
-
Connection credentials (URLs, auth tokens) are configured when you instantiate the vector store class (e.g., `new LibSQLVector({
|
|
174
|
+
Connection credentials (URLs, auth tokens) are configured when you instantiate the vector store class (e.g., `new LibSQLVector({ url: '...' })`).
|
|
175
175
|
|
|
176
176
|
```ts
|
|
177
177
|
import { createVectorQueryTool } from "@mastra/rag";
|
|
@@ -258,11 +258,10 @@ requestContext.set("databaseConfig", {
|
|
|
258
258
|
},
|
|
259
259
|
});
|
|
260
260
|
|
|
261
|
-
await pineconeQueryTool.execute(
|
|
262
|
-
|
|
263
|
-
mastra,
|
|
264
|
-
|
|
265
|
-
});
|
|
261
|
+
await pineconeQueryTool.execute(
|
|
262
|
+
{ queryText: "search query" },
|
|
263
|
+
{ mastra, requestContext }
|
|
264
|
+
);
|
|
266
265
|
```
|
|
267
266
|
|
|
268
267
|
For detailed configuration options and advanced usage, see the [Vector Query Tool Reference](https://mastra.ai/reference/v1/tools/vector-query-tool).
|
|
@@ -97,7 +97,7 @@ const results = await graphRag.query({
|
|
|
97
97
|
|
|
98
98
|
---
|
|
99
99
|
|
|
100
|
-
## Reference:
|
|
100
|
+
## Reference: .chunk()
|
|
101
101
|
|
|
102
102
|
> Documentation for the chunk function in Mastra, which splits documents into smaller segments using various strategies.
|
|
103
103
|
|
|
@@ -171,7 +171,6 @@ const chunks = await doc.chunk({
|
|
|
171
171
|
minSize: 50, // Sentence-specific option
|
|
172
172
|
sentenceEnders: ["."], // Sentence-specific option
|
|
173
173
|
fallbackToCharacters: false, // Sentence-specific option
|
|
174
|
-
keepSeparator: true, // general option
|
|
175
174
|
});
|
|
176
175
|
|
|
177
176
|
// HTML strategy example
|
|
@@ -373,11 +372,10 @@ whereDocument: { "$contains": "API documentation" }
|
|
|
373
372
|
}
|
|
374
373
|
});
|
|
375
374
|
|
|
376
|
-
await vectorTool.execute(
|
|
377
|
-
|
|
378
|
-
mastra,
|
|
379
|
-
|
|
380
|
-
});
|
|
375
|
+
await vectorTool.execute(
|
|
376
|
+
{ queryText: 'search query' },
|
|
377
|
+
{ mastra, requestContext }
|
|
378
|
+
);
|
|
381
379
|
```
|
|
382
380
|
|
|
383
381
|
|
|
@@ -672,6 +670,8 @@ The `extract` parameter accepts the following fields:
|
|
|
672
670
|
|
|
673
671
|
### KeywordExtractArgs
|
|
674
672
|
|
|
673
|
+
### SchemaExtractArgs
|
|
674
|
+
|
|
675
675
|
## Advanced Example
|
|
676
676
|
|
|
677
677
|
```typescript
|
|
@@ -705,6 +705,16 @@ const chunks = await doc.chunk({
|
|
|
705
705
|
keywords: 5, // Extract 5 keywords
|
|
706
706
|
promptTemplate: "Extract {maxKeywords} key terms from: {context}",
|
|
707
707
|
},
|
|
708
|
+
|
|
709
|
+
// Schema extraction with Zod
|
|
710
|
+
schema: {
|
|
711
|
+
schema: z.object({
|
|
712
|
+
productName: z.string(),
|
|
713
|
+
category: z.enum(["electronics", "clothing"]),
|
|
714
|
+
}),
|
|
715
|
+
instructions: "Extract product information.",
|
|
716
|
+
metadataKey: "product",
|
|
717
|
+
},
|
|
708
718
|
},
|
|
709
719
|
});
|
|
710
720
|
|
|
@@ -713,7 +723,11 @@ const chunks = await doc.chunk({
|
|
|
713
723
|
// documentTitle: "AI in Modern Computing",
|
|
714
724
|
// sectionSummary: "Overview of AI concepts and their applications in computing",
|
|
715
725
|
// questionsThisExcerptCanAnswer: "1. What is machine learning?\n2. How do neural networks work?",
|
|
716
|
-
// excerptKeywords: "1. Machine learning\n2. Neural networks\n3. Training data"
|
|
726
|
+
// excerptKeywords: "1. Machine learning\n2. Neural networks\n3. Training data",
|
|
727
|
+
// product: {
|
|
728
|
+
// productName: "Neural Net 2000",
|
|
729
|
+
// category: "electronics"
|
|
730
|
+
// }
|
|
717
731
|
// }
|
|
718
732
|
```
|
|
719
733
|
|
|
@@ -10,7 +10,7 @@ export declare class MDocument {
|
|
|
10
10
|
}[];
|
|
11
11
|
type: string;
|
|
12
12
|
});
|
|
13
|
-
extractMetadata({ title, summary, questions, keywords }: ExtractParams): Promise<MDocument>;
|
|
13
|
+
extractMetadata({ title, summary, questions, keywords, schema }: ExtractParams): Promise<MDocument>;
|
|
14
14
|
static fromText(text: string, metadata?: Record<string, any>): MDocument;
|
|
15
15
|
static fromHTML(html: string, metadata?: Record<string, any>): MDocument;
|
|
16
16
|
static fromMarkdown(markdown: string, metadata?: Record<string, any>): MDocument;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"document.d.ts","sourceRoot":"","sources":["../../src/document/document.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"document.d.ts","sourceRoot":"","sources":["../../src/document/document.ts"],"names":[],"mappings":"AAQA,OAAO,EAAE,QAAQ,IAAI,KAAK,EAAgC,MAAM,UAAU,CAAC;AAU3E,OAAO,KAAK,EACV,WAAW,EAEX,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,oBAAoB,EACpB,4BAA4B,EAC5B,gBAAgB,EAChB,iBAAiB,EACjB,oBAAoB,EAErB,MAAM,SAAS,CAAC;AAGjB,qBAAa,SAAS;IACpB,OAAO,CAAC,MAAM,CAAU;IACxB,OAAO,CAAC,IAAI,CAAS;gBAET,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;SAAE,EAAE,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE;IAOhG,eAAe,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,aAAa,GAAG,OAAO,CAAC,SAAS,CAAC;IAuDzG,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYhF,MAAM,CAAC,QAAQ,CAAC,UAAU,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAY9E,OAAO,CAAC,eAAe;IAevB,OAAO,CAAC,YAAY,CAAC,CAA4E;IAEjG,OAAO,KAAK,WAAW,GAetB;YAEa,OAAO;IASf,cAAc,CAAC,OAAO,CAAC,EAAE,qBAAqB,GAAG,OAAO,CAAC,IAAI,CAAC;IAa9D,cAAc,CAAC,OAAO,CAAC,EAAE,qBAAqB,GAAG,OAAO,CAAC,IAAI,CAAC;IAU9D,SAAS,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IA8CpD,SAAS,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAmBpD,UAAU,CAAC,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,IAAI,CAAC;IAMtD,UAAU,CAAC,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,IAAI,CAAC;IAUtD,aAAa,CAAC,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC;IAa5D,aAAa,CAAC,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC;IAuB5D,qBAAqB,CAAC,OAAO,CAAC,EAAE,4BAA4B,GAAG,OAAO,CAAC,IAAI,CAAC;IAU5E,KAAK,CAAC,MAAM,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAiBnD,OAAO,IAAI,KAAK,EAAE;IAIlB,OAAO,IAAI,MAAM,EAAE;IAInB,WAAW,IAAI,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE;CAGrC"}
|
|
@@ -2,5 +2,6 @@ export { TitleExtractor } from './title.js';
|
|
|
2
2
|
export { SummaryExtractor } from './summary.js';
|
|
3
3
|
export { QuestionsAnsweredExtractor } from './questions.js';
|
|
4
4
|
export { KeywordExtractor } from './keywords.js';
|
|
5
|
-
export
|
|
5
|
+
export { SchemaExtractor } from './schema.js';
|
|
6
|
+
export type { KeywordExtractArgs, QuestionAnswerExtractArgs, SummaryExtractArgs, TitleExtractorsArgs, SchemaExtractArgs, } from './types.js';
|
|
6
7
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AACzC,OAAO,EAAE,gBAAgB,EAAE,MAAM,WAAW,CAAC;AAC7C,OAAO,EAAE,0BAA0B,EAAE,MAAM,aAAa,CAAC;AACzD,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAC9C,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AACzC,OAAO,EAAE,gBAAgB,EAAE,MAAM,WAAW,CAAC;AAC7C,OAAO,EAAE,0BAA0B,EAAE,MAAM,aAAa,CAAC;AACzD,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAC9C,OAAO,EAAE,eAAe,EAAE,MAAM,UAAU,CAAC;AAC3C,YAAY,EACV,kBAAkB,EAClB,yBAAyB,EACzB,kBAAkB,EAClB,mBAAmB,EACnB,iBAAiB,GAClB,MAAM,SAAS,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { z } from 'zod';
|
|
2
|
+
import type { BaseNode } from '../schema/index.js';
|
|
3
|
+
import { BaseExtractor } from './base.js';
|
|
4
|
+
import type { SchemaExtractArgs } from './types.js';
|
|
5
|
+
export declare class SchemaExtractor<T extends z.ZodType> extends BaseExtractor {
|
|
6
|
+
private schema;
|
|
7
|
+
private llm?;
|
|
8
|
+
private instructions?;
|
|
9
|
+
private metadataKey?;
|
|
10
|
+
constructor(options: SchemaExtractArgs<T>);
|
|
11
|
+
extract(nodes: BaseNode[]): Promise<Record<string, any>[]>;
|
|
12
|
+
}
|
|
13
|
+
//# sourceMappingURL=schema.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/schema.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAE7B,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAE1C,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AAEvC,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,SAAS,CAAC;AAEjD,qBAAa,eAAe,CAAC,CAAC,SAAS,CAAC,CAAC,OAAO,CAAE,SAAQ,aAAa;IACrE,OAAO,CAAC,MAAM,CAAI;IAClB,OAAO,CAAC,GAAG,CAAC,CAAkD;IAC9D,OAAO,CAAC,YAAY,CAAC,CAAS;IAC9B,OAAO,CAAC,WAAW,CAAC,CAAS;gBAEjB,OAAO,EAAE,iBAAiB,CAAC,CAAC,CAAC;IAQnC,OAAO,CAAC,KAAK,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,CAAC;CA6BjE"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { MastraLanguageModel, MastraLegacyLanguageModel } from '@mastra/core/agent';
|
|
2
|
+
import type { z } from 'zod';
|
|
2
3
|
import type { KeywordExtractPrompt, QuestionExtractPrompt, SummaryPrompt, TitleExtractorPrompt, TitleCombinePrompt } from '../prompts/index.js';
|
|
3
4
|
export type KeywordExtractArgs = {
|
|
4
5
|
llm?: MastraLegacyLanguageModel | MastraLanguageModel;
|
|
@@ -22,6 +23,12 @@ export type TitleExtractorsArgs = {
|
|
|
22
23
|
nodeTemplate?: TitleExtractorPrompt['template'];
|
|
23
24
|
combineTemplate?: TitleCombinePrompt['template'];
|
|
24
25
|
};
|
|
26
|
+
export type SchemaExtractArgs<T extends z.ZodType = z.ZodType> = {
|
|
27
|
+
schema: T;
|
|
28
|
+
llm?: MastraLegacyLanguageModel | MastraLanguageModel;
|
|
29
|
+
instructions?: string;
|
|
30
|
+
metadataKey?: string;
|
|
31
|
+
};
|
|
25
32
|
export declare const STRIP_REGEX: RegExp;
|
|
26
33
|
export declare const baseLLM: MastraLegacyLanguageModel | MastraLanguageModel;
|
|
27
34
|
//# sourceMappingURL=types.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/types.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AACzF,OAAO,KAAK,EACV,oBAAoB,EACpB,qBAAqB,EACrB,aAAa,EACb,oBAAoB,EACpB,kBAAkB,EACnB,MAAM,YAAY,CAAC;AAEpB,MAAM,MAAM,kBAAkB,GAAG;IAC/B,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,cAAc,CAAC,EAAE,oBAAoB,CAAC,UAAU,CAAC,CAAC;CACnD,CAAC;AAEF,MAAM,MAAM,yBAAyB,GAAG;IACtC,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,cAAc,CAAC,EAAE,qBAAqB,CAAC,UAAU,CAAC,CAAC;IACnD,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG;IAC/B,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,cAAc,CAAC,EAAE,aAAa,CAAC,UAAU,CAAC,CAAC;CAC5C,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG;IAChC,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,YAAY,CAAC,EAAE,oBAAoB,CAAC,UAAU,CAAC,CAAC;IAChD,eAAe,CAAC,EAAE,kBAAkB,CAAC,UAAU,CAAC,CAAC;CAClD,CAAC;AAEF,eAAO,MAAM,WAAW,QAAmB,CAAC;AAG5C,eAAO,MAAM,OAAO,EAAE,yBAAyB,GAAG,mBAAsC,CAAC"}
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/document/extractors/types.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AACzF,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAC7B,OAAO,KAAK,EACV,oBAAoB,EACpB,qBAAqB,EACrB,aAAa,EACb,oBAAoB,EACpB,kBAAkB,EACnB,MAAM,YAAY,CAAC;AAEpB,MAAM,MAAM,kBAAkB,GAAG;IAC/B,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,cAAc,CAAC,EAAE,oBAAoB,CAAC,UAAU,CAAC,CAAC;CACnD,CAAC;AAEF,MAAM,MAAM,yBAAyB,GAAG;IACtC,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,cAAc,CAAC,EAAE,qBAAqB,CAAC,UAAU,CAAC,CAAC;IACnD,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG;IAC/B,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,cAAc,CAAC,EAAE,aAAa,CAAC,UAAU,CAAC,CAAC;CAC5C,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG;IAChC,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,YAAY,CAAC,EAAE,oBAAoB,CAAC,UAAU,CAAC,CAAC;IAChD,eAAe,CAAC,EAAE,kBAAkB,CAAC,UAAU,CAAC,CAAC;CAClD,CAAC;AAEF,MAAM,MAAM,iBAAiB,CAAC,CAAC,SAAS,CAAC,CAAC,OAAO,GAAG,CAAC,CAAC,OAAO,IAAI;IAC/D,MAAM,EAAE,CAAC,CAAC;IACV,GAAG,CAAC,EAAE,yBAAyB,GAAG,mBAAmB,CAAC;IACtD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB,CAAC;AAEF,eAAO,MAAM,WAAW,QAAmB,CAAC;AAG5C,eAAO,MAAM,OAAO,EAAE,yBAAyB,GAAG,mBAAsC,CAAC"}
|
|
@@ -7,7 +7,6 @@ export declare class SentenceTransformer extends TextTransformer {
|
|
|
7
7
|
protected sentenceEnders: string[];
|
|
8
8
|
protected fallbackToWords: boolean;
|
|
9
9
|
protected fallbackToCharacters: boolean;
|
|
10
|
-
protected keepSeparator: boolean | 'start' | 'end';
|
|
11
10
|
constructor(options: SentenceChunkOptions);
|
|
12
11
|
private detectSentenceBoundaries;
|
|
13
12
|
private isRealSentenceBoundary;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sentence.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/sentence.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,UAAU,CAAC;AACrD,OAAO,EAAE,eAAe,EAAE,MAAM,QAAQ,CAAC;AAEzC,qBAAa,mBAAoB,SAAQ,eAAe;IACtD,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,UAAU,EAAE,MAAM,CAAC;IAC7B,SAAS,CAAC,cAAc,EAAE,MAAM,EAAE,CAAC;IACnC,SAAS,CAAC,eAAe,EAAE,OAAO,CAAC;IACnC,SAAS,CAAC,oBAAoB,EAAE,OAAO,CAAC;
|
|
1
|
+
{"version":3,"file":"sentence.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/sentence.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,UAAU,CAAC;AACrD,OAAO,EAAE,eAAe,EAAE,MAAM,QAAQ,CAAC;AAEzC,qBAAa,mBAAoB,SAAQ,eAAe;IACtD,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,UAAU,EAAE,MAAM,CAAC;IAC7B,SAAS,CAAC,cAAc,EAAE,MAAM,EAAE,CAAC;IACnC,SAAS,CAAC,eAAe,EAAE,OAAO,CAAC;IACnC,SAAS,CAAC,oBAAoB,EAAE,OAAO,CAAC;gBAE5B,OAAO,EAAE,oBAAoB;IAsBzC,OAAO,CAAC,wBAAwB;IA+BhC,OAAO,CAAC,sBAAsB;IAqB9B,OAAO,CAAC,oBAAoB;IA8B5B;;OAEG;IACH,OAAO,CAAC,wBAAwB;IAsDhC;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAqB/B,OAAO,CAAC,sBAAsB;IAmC9B,OAAO,CAAC,2BAA2B;IAsBnC,OAAO,CAAC,wBAAwB;IA4BhC,OAAO,CAAC,kBAAkB;IAqB1B,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;CAShD"}
|
|
@@ -5,10 +5,10 @@ export declare abstract class TextTransformer implements Transformer {
|
|
|
5
5
|
protected maxSize: number;
|
|
6
6
|
protected overlap: number;
|
|
7
7
|
protected lengthFunction: (text: string) => number;
|
|
8
|
-
protected
|
|
8
|
+
protected separatorPosition?: 'start' | 'end';
|
|
9
9
|
protected addStartIndex: boolean;
|
|
10
10
|
protected stripWhitespace: boolean;
|
|
11
|
-
constructor({ maxSize, overlap, lengthFunction,
|
|
11
|
+
constructor({ maxSize, overlap, lengthFunction, separatorPosition, addStartIndex, stripWhitespace, }: BaseChunkOptions);
|
|
12
12
|
setAddStartIndex(value: boolean): void;
|
|
13
13
|
abstract splitText({ text }: {
|
|
14
14
|
text: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"text.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/text.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAErC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAEjD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAEjD,8BAAsB,eAAgB,YAAW,WAAW;IAC1D,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,cAAc,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;IACnD,SAAS,CAAC,
|
|
1
|
+
{"version":3,"file":"text.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/text.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAErC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAEjD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAEjD,8BAAsB,eAAgB,YAAW,WAAW;IAC1D,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,cAAc,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;IACnD,SAAS,CAAC,iBAAiB,CAAC,EAAE,OAAO,GAAG,KAAK,CAAC;IAC9C,SAAS,CAAC,aAAa,EAAE,OAAO,CAAC;IACjC,SAAS,CAAC,eAAe,EAAE,OAAO,CAAC;gBAEvB,EACV,OAAc,EACd,OAAa,EACb,cAA8C,EAC9C,iBAAiB,EACjB,aAAqB,EACrB,eAAsB,GACvB,EAAE,gBAAgB;IAYnB,gBAAgB,CAAC,KAAK,EAAE,OAAO,GAAG,IAAI;IAItC,QAAQ,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;IAExD,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA4B/E,cAAc,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;IAUjD,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;IAYrD,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IAQpE,SAAS,CAAC,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,EAAE;CA4DrE"}
|
package/dist/document/types.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { TiktokenEncoding, TiktokenModel } from 'js-tiktoken';
|
|
2
|
-
import type { TitleExtractorsArgs, SummaryExtractArgs, QuestionAnswerExtractArgs, KeywordExtractArgs } from './extractors/index.js';
|
|
2
|
+
import type { TitleExtractorsArgs, SummaryExtractArgs, QuestionAnswerExtractArgs, KeywordExtractArgs, SchemaExtractArgs } from './extractors/index.js';
|
|
3
3
|
export declare enum Language {
|
|
4
4
|
CPP = "cpp",
|
|
5
5
|
GO = "go",
|
|
@@ -33,12 +33,13 @@ export type ExtractParams = {
|
|
|
33
33
|
summary?: SummaryExtractArgs | boolean;
|
|
34
34
|
questions?: QuestionAnswerExtractArgs | boolean;
|
|
35
35
|
keywords?: KeywordExtractArgs | boolean;
|
|
36
|
+
schema?: SchemaExtractArgs;
|
|
36
37
|
};
|
|
37
38
|
export type BaseChunkOptions = {
|
|
38
39
|
maxSize?: number;
|
|
39
40
|
overlap?: number;
|
|
40
41
|
lengthFunction?: (text: string) => number;
|
|
41
|
-
|
|
42
|
+
separatorPosition?: 'start' | 'end';
|
|
42
43
|
addStartIndex?: boolean;
|
|
43
44
|
stripWhitespace?: boolean;
|
|
44
45
|
};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/document/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACnE,OAAO,KAAK,EACV,mBAAmB,EACnB,kBAAkB,EAClB,yBAAyB,EACzB,kBAAkB,
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/document/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACnE,OAAO,KAAK,EACV,mBAAmB,EACnB,kBAAkB,EAClB,yBAAyB,EACzB,kBAAkB,EAClB,iBAAiB,EAClB,MAAM,cAAc,CAAC;AAEtB,oBAAY,QAAQ;IAClB,GAAG,QAAQ;IACX,EAAE,OAAO;IACT,IAAI,SAAS;IACb,MAAM,WAAW;IACjB,EAAE,OAAO;IACT,EAAE,OAAO;IACT,GAAG,QAAQ;IACX,KAAK,UAAU;IACf,MAAM,WAAW;IACjB,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,IAAI,SAAS;IACb,KAAK,UAAU;IACf,KAAK,UAAU;IACf,QAAQ,aAAa;IACrB,KAAK,UAAU;IACf,IAAI,SAAS;IACb,GAAG,QAAQ;IACX,MAAM,WAAW;IACjB,KAAK,UAAU;IACf,CAAC,MAAM;IACP,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,OAAO,YAAY;IACnB,MAAM,WAAW;IACjB,UAAU,eAAe;CAC1B;AAED,MAAM,MAAM,aAAa,GAAG;IAC1B,KAAK,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC;IACtC,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC;IACvC,SAAS,CAAC,EAAE,yBAAyB,GAAG,OAAO,CAAC;IAChD,QAAQ,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC;IACxC,MAAM,CAAC,EAAE,iBAAiB,CAAC;CAC5B,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG;IAC7B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;IAC1C,iBAAiB,CAAC,EAAE,OAAO,GAAG,KAAK,CAAC;IACpC,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,eAAe,CAAC,EAAE,OAAO,CAAC;CAC3B,CAAC;AAEF,MAAM,MAAM,qBAAqB,GAAG,gBAAgB,GAAG;IACrD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B,CAAC;AAEF,MAAM,MAAM,qBAAqB,GAAG,gBAAgB,GAAG;IACrD,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,QAAQ,CAAC,EAAE,QAAQ,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,iBAAiB,GAAG,gBAAgB,GAAG;IACjD,YAAY,CAAC,EAAE,gBAAgB,CAAC;IAChC,SAAS,CAAC,EAAE,aAAa,CAAC;IAC1B,cAAc,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;IACrC,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;CACzC,CAAC;AAEF,MAAM,MAAM,oBAAoB,GAAG,gBAAgB,GAAG;IACpD,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB,CAAC;AAEF,MAAM,MAAM,4BAA4B,GAAG,gBAAgB,GAAG;IAC5D,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,gBAAgB,CAAC;IAChC,SAAS,CAAC,EAAE,aAAa,CAAC;IAC1B,cAAc,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;IACrC,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;CACzC,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG,gBAAgB,GAC7C,CACI;IAAE,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAAC,QAAQ,CAAC,EAAE,KAAK,CAAC;IAAC,cAAc,CAAC,EAAE,OAAO,CAAA;CAAE,GAC3E;IAAE,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAAC,OAAO,CAAC,EAAE,KAAK,CAAA;CAAE,CACpD,GAAG;IAAE,cAAc,CAAC,EAAE,OAAO,CAAA;CAAE,CAAC;AAEnC,MAAM,MAAM,gBAAgB,GAAG,gBAAgB,GAAG;IAChD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB,CAAC;AAEF,MAAM,MAAM,iBAAiB,GAAG,gBAAgB,GAAG,EAAE,CAAC;AAEtD,MAAM,MAAM,oBAAoB,GAAG,gBAAgB,GAAG;IACpD,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,oBAAoB,CAAC,EAAE,OAAO,CAAC;CAChC,CAAC;AAEF,MAAM,MAAM,eAAe,GAAG;IAC5B,SAAS,EAAE,qBAAqB,CAAC;IACjC,SAAS,EAAE,qBAAqB,CAAC;IACjC,KAAK,EAAE,iBAAiB,CAAC;IACzB,QAAQ,EAAE,oBAAoB,CAAC;IAC/B,IAAI,EAAE,gBAAgB,CAAC;IACvB,IAAI,EAAE,gBAAgB,CAAC;IACvB,KAAK,EAAE,iBAAiB,CAAC;IACzB,QAAQ,EAAE,oBAAoB,CAAC;IAC/B,mBAAmB,EAAE,4BAA4B,CAAC;CACnD,CAAC;AAEF,MAAM,MAAM,aAAa,GACrB,WAAW,GACX,WAAW,GACX,OAAO,GACP,UAAU,GACV,MAAM,GACN,MAAM,GACN,OAAO,GACP,UAAU,GACV,mBAAmB,CAAC;AAExB,MAAM,MAAM,WAAW,GACnB,CAAC;IAAE,QAAQ,CAAC,EAAE,WAAW,CAAA;CAAE,GAAG,qBAAqB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GAClF,CAAC;IAAE,QAAQ,EAAE,WAAW,CAAA;CAAE,GAAG,qBAAqB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACjF,CAAC;IAAE,QAAQ,EAAE,OAAO,CAAA;CAAE,GAAG,iBAAiB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACzE,CAAC;IAAE,QAAQ,EAAE,UAAU,CAAA;CAAE,GAAG,oBAAoB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GAC/E,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAA;CAAE,GAAG,gBAAgB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACvE,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAA;CAAE,GAAG,gBAAgB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACvE,CAAC;IAAE,QAAQ,EAAE,OAAO,CAAA;CAAE,GAAG,iBAAiB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACzE,CAAC;IAAE,QAAQ,EAAE,UAAU,CAAA;CAAE,GAAG,oBAAoB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GAC/E,CAAC;IAAE,QAAQ,EAAE,mBAAmB,CAAA;CAAE,GAAG,4BAA4B,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,CAAC"}
|
package/dist/index.cjs
CHANGED
|
@@ -4450,6 +4450,44 @@ var KeywordExtractor = class extends BaseExtractor {
|
|
|
4450
4450
|
return results;
|
|
4451
4451
|
}
|
|
4452
4452
|
};
|
|
4453
|
+
var SchemaExtractor = class extends BaseExtractor {
|
|
4454
|
+
schema;
|
|
4455
|
+
llm;
|
|
4456
|
+
instructions;
|
|
4457
|
+
metadataKey;
|
|
4458
|
+
constructor(options) {
|
|
4459
|
+
super();
|
|
4460
|
+
this.schema = options.schema;
|
|
4461
|
+
this.llm = options.llm;
|
|
4462
|
+
this.instructions = options.instructions;
|
|
4463
|
+
this.metadataKey = options.metadataKey;
|
|
4464
|
+
}
|
|
4465
|
+
async extract(nodes) {
|
|
4466
|
+
const agent$1 = new agent.Agent({
|
|
4467
|
+
name: "schema-extractor",
|
|
4468
|
+
id: "schema-extractor",
|
|
4469
|
+
instructions: this.instructions ?? "Extract structured data from the provided text.",
|
|
4470
|
+
model: this.llm ?? baseLLM
|
|
4471
|
+
});
|
|
4472
|
+
const results = await Promise.all(
|
|
4473
|
+
nodes.map(async (node) => {
|
|
4474
|
+
try {
|
|
4475
|
+
const result = await agent$1.generate([{ role: "user", content: node.getContent() }], {
|
|
4476
|
+
structuredOutput: { schema: this.schema }
|
|
4477
|
+
});
|
|
4478
|
+
if (this.metadataKey) {
|
|
4479
|
+
return { [this.metadataKey]: result.object };
|
|
4480
|
+
}
|
|
4481
|
+
return result.object;
|
|
4482
|
+
} catch (error) {
|
|
4483
|
+
console.error("Schema extraction failed:", error);
|
|
4484
|
+
return {};
|
|
4485
|
+
}
|
|
4486
|
+
})
|
|
4487
|
+
);
|
|
4488
|
+
return results;
|
|
4489
|
+
}
|
|
4490
|
+
};
|
|
4453
4491
|
|
|
4454
4492
|
// src/document/types.ts
|
|
4455
4493
|
var Language = /* @__PURE__ */ ((Language2) => {
|
|
@@ -4487,14 +4525,14 @@ var TextTransformer = class {
|
|
|
4487
4525
|
maxSize;
|
|
4488
4526
|
overlap;
|
|
4489
4527
|
lengthFunction;
|
|
4490
|
-
|
|
4528
|
+
separatorPosition;
|
|
4491
4529
|
addStartIndex;
|
|
4492
4530
|
stripWhitespace;
|
|
4493
4531
|
constructor({
|
|
4494
4532
|
maxSize = 4e3,
|
|
4495
4533
|
overlap = 200,
|
|
4496
4534
|
lengthFunction = (text) => text.length,
|
|
4497
|
-
|
|
4535
|
+
separatorPosition,
|
|
4498
4536
|
addStartIndex = false,
|
|
4499
4537
|
stripWhitespace = true
|
|
4500
4538
|
}) {
|
|
@@ -4504,7 +4542,7 @@ var TextTransformer = class {
|
|
|
4504
4542
|
this.maxSize = maxSize;
|
|
4505
4543
|
this.overlap = overlap;
|
|
4506
4544
|
this.lengthFunction = lengthFunction;
|
|
4507
|
-
this.
|
|
4545
|
+
this.separatorPosition = separatorPosition;
|
|
4508
4546
|
this.addStartIndex = addStartIndex;
|
|
4509
4547
|
this.stripWhitespace = stripWhitespace;
|
|
4510
4548
|
}
|
|
@@ -4610,11 +4648,11 @@ var TextTransformer = class {
|
|
|
4610
4648
|
};
|
|
4611
4649
|
|
|
4612
4650
|
// src/document/transformers/character.ts
|
|
4613
|
-
function splitTextWithRegex(text, separator,
|
|
4651
|
+
function splitTextWithRegex(text, separator, separatorPosition) {
|
|
4614
4652
|
if (!separator) {
|
|
4615
4653
|
return text.split("");
|
|
4616
4654
|
}
|
|
4617
|
-
if (!
|
|
4655
|
+
if (!separatorPosition) {
|
|
4618
4656
|
return text.split(new RegExp(separator)).filter((s) => s !== "");
|
|
4619
4657
|
}
|
|
4620
4658
|
if (!text) {
|
|
@@ -4622,7 +4660,7 @@ function splitTextWithRegex(text, separator, keepSeparator) {
|
|
|
4622
4660
|
}
|
|
4623
4661
|
const splits = text.split(new RegExp(`(${separator})`));
|
|
4624
4662
|
const result = [];
|
|
4625
|
-
if (
|
|
4663
|
+
if (separatorPosition === "end") {
|
|
4626
4664
|
for (let i = 0; i < splits.length - 1; i += 2) {
|
|
4627
4665
|
if (i + 1 < splits.length) {
|
|
4628
4666
|
const chunk = splits[i] + (splits[i + 1] || "");
|
|
@@ -4654,7 +4692,7 @@ var CharacterTransformer = class extends TextTransformer {
|
|
|
4654
4692
|
}
|
|
4655
4693
|
splitText({ text }) {
|
|
4656
4694
|
const separator = this.isSeparatorRegex ? this.separator : this.separator.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
4657
|
-
const initialSplits = splitTextWithRegex(text, separator, this.
|
|
4695
|
+
const initialSplits = splitTextWithRegex(text, separator, this.separatorPosition);
|
|
4658
4696
|
const chunks = [];
|
|
4659
4697
|
for (const split of initialSplits) {
|
|
4660
4698
|
if (this.lengthFunction(split) <= this.maxSize) {
|
|
@@ -4709,9 +4747,9 @@ var RecursiveCharacterTransformer = class _RecursiveCharacterTransformer extends
|
|
|
4709
4747
|
}
|
|
4710
4748
|
}
|
|
4711
4749
|
const _separator = this.isSeparatorRegex ? separator : separator?.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
4712
|
-
const splits = splitTextWithRegex(text, _separator, this.
|
|
4750
|
+
const splits = splitTextWithRegex(text, _separator, this.separatorPosition);
|
|
4713
4751
|
const goodSplits = [];
|
|
4714
|
-
const mergeSeparator = this.
|
|
4752
|
+
const mergeSeparator = this.separatorPosition ? "" : separator;
|
|
4715
4753
|
for (const s of splits) {
|
|
4716
4754
|
if (this.lengthFunction(s) < this.maxSize) {
|
|
4717
4755
|
goodSplits.push(s);
|
|
@@ -5857,7 +5895,6 @@ var SentenceTransformer = class extends TextTransformer {
|
|
|
5857
5895
|
sentenceEnders;
|
|
5858
5896
|
fallbackToWords;
|
|
5859
5897
|
fallbackToCharacters;
|
|
5860
|
-
keepSeparator;
|
|
5861
5898
|
constructor(options) {
|
|
5862
5899
|
const parentOverlap = Math.min(options.overlap ?? 0, options.maxSize - 1);
|
|
5863
5900
|
const baseOptions = {
|
|
@@ -5872,7 +5909,6 @@ var SentenceTransformer = class extends TextTransformer {
|
|
|
5872
5909
|
this.sentenceEnders = options.sentenceEnders ?? [".", "!", "?"];
|
|
5873
5910
|
this.fallbackToWords = options.fallbackToWords ?? true;
|
|
5874
5911
|
this.fallbackToCharacters = options.fallbackToCharacters ?? true;
|
|
5875
|
-
this.keepSeparator = options.keepSeparator ?? false;
|
|
5876
5912
|
this.overlap = options.overlap ?? 0;
|
|
5877
5913
|
}
|
|
5878
5914
|
detectSentenceBoundaries(text) {
|
|
@@ -6173,8 +6209,8 @@ var TokenTransformer = class _TokenTransformer extends TextTransformer {
|
|
|
6173
6209
|
var baseChunkOptionsSchema = zod.z.object({
|
|
6174
6210
|
maxSize: zod.z.number().positive().optional(),
|
|
6175
6211
|
overlap: zod.z.number().min(0).optional(),
|
|
6176
|
-
lengthFunction: zod.z.
|
|
6177
|
-
|
|
6212
|
+
lengthFunction: zod.z.optional(zod.z.function()),
|
|
6213
|
+
separatorPosition: zod.z.enum(["start", "end"]).optional(),
|
|
6178
6214
|
addStartIndex: zod.z.boolean().optional(),
|
|
6179
6215
|
stripWhitespace: zod.z.boolean().optional()
|
|
6180
6216
|
});
|
|
@@ -6269,8 +6305,11 @@ var MDocument = class _MDocument {
|
|
|
6269
6305
|
});
|
|
6270
6306
|
this.type = type;
|
|
6271
6307
|
}
|
|
6272
|
-
async extractMetadata({ title, summary, questions, keywords }) {
|
|
6308
|
+
async extractMetadata({ title, summary, questions, keywords, schema }) {
|
|
6273
6309
|
const transformations = [];
|
|
6310
|
+
if (schema) {
|
|
6311
|
+
transformations.push(new SchemaExtractor(schema));
|
|
6312
|
+
}
|
|
6274
6313
|
if (typeof summary !== "undefined") {
|
|
6275
6314
|
transformations.push(new SummaryExtractor(typeof summary === "boolean" ? {} : summary));
|
|
6276
6315
|
}
|
|
@@ -6421,7 +6460,7 @@ var MDocument = class _MDocument {
|
|
|
6421
6460
|
const textSplitter = new RecursiveCharacterTransformer({
|
|
6422
6461
|
maxSize: options.maxSize,
|
|
6423
6462
|
overlap: options.overlap,
|
|
6424
|
-
|
|
6463
|
+
separatorPosition: options.separatorPosition,
|
|
6425
6464
|
addStartIndex: options.addStartIndex,
|
|
6426
6465
|
stripWhitespace: options.stripWhitespace
|
|
6427
6466
|
});
|
|
@@ -6437,7 +6476,7 @@ var MDocument = class _MDocument {
|
|
|
6437
6476
|
const textSplitter = new RecursiveCharacterTransformer({
|
|
6438
6477
|
maxSize: options.maxSize,
|
|
6439
6478
|
overlap: options.overlap,
|
|
6440
|
-
|
|
6479
|
+
separatorPosition: options.separatorPosition,
|
|
6441
6480
|
addStartIndex: options.addStartIndex,
|
|
6442
6481
|
stripWhitespace: options.stripWhitespace
|
|
6443
6482
|
});
|
|
@@ -6500,7 +6539,7 @@ var MDocument = class _MDocument {
|
|
|
6500
6539
|
sentenceEnders: options?.sentenceEnders,
|
|
6501
6540
|
fallbackToWords: options?.fallbackToWords,
|
|
6502
6541
|
fallbackToCharacters: options?.fallbackToCharacters,
|
|
6503
|
-
|
|
6542
|
+
separatorPosition: options?.separatorPosition,
|
|
6504
6543
|
lengthFunction: options?.lengthFunction,
|
|
6505
6544
|
addStartIndex: options?.addStartIndex,
|
|
6506
6545
|
stripWhitespace: options?.stripWhitespace
|