@mastra/rag 2.0.0-beta.6 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +472 -0
- package/dist/docs/README.md +1 -1
- package/dist/docs/SKILL.md +1 -1
- package/dist/docs/SOURCE_MAP.json +1 -1
- package/dist/docs/rag/02-chunking-and-embedding.md +0 -1
- package/dist/docs/rag/03-retrieval.md +5 -6
- package/dist/docs/rag/05-reference.md +22 -8
- package/dist/document/document.d.ts +1 -1
- package/dist/document/document.d.ts.map +1 -1
- package/dist/document/extractors/index.d.ts +2 -1
- package/dist/document/extractors/index.d.ts.map +1 -1
- package/dist/document/extractors/schema.d.ts +13 -0
- package/dist/document/extractors/schema.d.ts.map +1 -0
- package/dist/document/extractors/types.d.ts +7 -0
- package/dist/document/extractors/types.d.ts.map +1 -1
- package/dist/document/transformers/sentence.d.ts +0 -1
- package/dist/document/transformers/sentence.d.ts.map +1 -1
- package/dist/document/transformers/text.d.ts +2 -2
- package/dist/document/transformers/text.d.ts.map +1 -1
- package/dist/document/types.d.ts +3 -2
- package/dist/document/types.d.ts.map +1 -1
- package/dist/index.cjs +56 -17
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +56 -17
- package/dist/index.js.map +1 -1
- package/dist/tools/document-chunker.d.ts +3 -2
- package/dist/tools/document-chunker.d.ts.map +1 -1
- package/dist/tools/graph-rag.d.ts +2 -2
- package/dist/tools/graph-rag.d.ts.map +1 -1
- package/dist/tools/vector-query.d.ts +2 -2
- package/dist/tools/vector-query.d.ts.map +1 -1
- package/dist/utils/tool-schemas.d.ts +2 -2
- package/dist/utils/tool-schemas.d.ts.map +1 -1
- package/package.json +5 -5
package/dist/index.js
CHANGED
|
@@ -4444,6 +4444,44 @@ var KeywordExtractor = class extends BaseExtractor {
|
|
|
4444
4444
|
return results;
|
|
4445
4445
|
}
|
|
4446
4446
|
};
|
|
4447
|
+
var SchemaExtractor = class extends BaseExtractor {
|
|
4448
|
+
schema;
|
|
4449
|
+
llm;
|
|
4450
|
+
instructions;
|
|
4451
|
+
metadataKey;
|
|
4452
|
+
constructor(options) {
|
|
4453
|
+
super();
|
|
4454
|
+
this.schema = options.schema;
|
|
4455
|
+
this.llm = options.llm;
|
|
4456
|
+
this.instructions = options.instructions;
|
|
4457
|
+
this.metadataKey = options.metadataKey;
|
|
4458
|
+
}
|
|
4459
|
+
async extract(nodes) {
|
|
4460
|
+
const agent = new Agent({
|
|
4461
|
+
name: "schema-extractor",
|
|
4462
|
+
id: "schema-extractor",
|
|
4463
|
+
instructions: this.instructions ?? "Extract structured data from the provided text.",
|
|
4464
|
+
model: this.llm ?? baseLLM
|
|
4465
|
+
});
|
|
4466
|
+
const results = await Promise.all(
|
|
4467
|
+
nodes.map(async (node) => {
|
|
4468
|
+
try {
|
|
4469
|
+
const result = await agent.generate([{ role: "user", content: node.getContent() }], {
|
|
4470
|
+
structuredOutput: { schema: this.schema }
|
|
4471
|
+
});
|
|
4472
|
+
if (this.metadataKey) {
|
|
4473
|
+
return { [this.metadataKey]: result.object };
|
|
4474
|
+
}
|
|
4475
|
+
return result.object;
|
|
4476
|
+
} catch (error) {
|
|
4477
|
+
console.error("Schema extraction failed:", error);
|
|
4478
|
+
return {};
|
|
4479
|
+
}
|
|
4480
|
+
})
|
|
4481
|
+
);
|
|
4482
|
+
return results;
|
|
4483
|
+
}
|
|
4484
|
+
};
|
|
4447
4485
|
|
|
4448
4486
|
// src/document/types.ts
|
|
4449
4487
|
var Language = /* @__PURE__ */ ((Language2) => {
|
|
@@ -4481,14 +4519,14 @@ var TextTransformer = class {
|
|
|
4481
4519
|
maxSize;
|
|
4482
4520
|
overlap;
|
|
4483
4521
|
lengthFunction;
|
|
4484
|
-
|
|
4522
|
+
separatorPosition;
|
|
4485
4523
|
addStartIndex;
|
|
4486
4524
|
stripWhitespace;
|
|
4487
4525
|
constructor({
|
|
4488
4526
|
maxSize = 4e3,
|
|
4489
4527
|
overlap = 200,
|
|
4490
4528
|
lengthFunction = (text) => text.length,
|
|
4491
|
-
|
|
4529
|
+
separatorPosition,
|
|
4492
4530
|
addStartIndex = false,
|
|
4493
4531
|
stripWhitespace = true
|
|
4494
4532
|
}) {
|
|
@@ -4498,7 +4536,7 @@ var TextTransformer = class {
|
|
|
4498
4536
|
this.maxSize = maxSize;
|
|
4499
4537
|
this.overlap = overlap;
|
|
4500
4538
|
this.lengthFunction = lengthFunction;
|
|
4501
|
-
this.
|
|
4539
|
+
this.separatorPosition = separatorPosition;
|
|
4502
4540
|
this.addStartIndex = addStartIndex;
|
|
4503
4541
|
this.stripWhitespace = stripWhitespace;
|
|
4504
4542
|
}
|
|
@@ -4604,11 +4642,11 @@ var TextTransformer = class {
|
|
|
4604
4642
|
};
|
|
4605
4643
|
|
|
4606
4644
|
// src/document/transformers/character.ts
|
|
4607
|
-
function splitTextWithRegex(text, separator,
|
|
4645
|
+
function splitTextWithRegex(text, separator, separatorPosition) {
|
|
4608
4646
|
if (!separator) {
|
|
4609
4647
|
return text.split("");
|
|
4610
4648
|
}
|
|
4611
|
-
if (!
|
|
4649
|
+
if (!separatorPosition) {
|
|
4612
4650
|
return text.split(new RegExp(separator)).filter((s) => s !== "");
|
|
4613
4651
|
}
|
|
4614
4652
|
if (!text) {
|
|
@@ -4616,7 +4654,7 @@ function splitTextWithRegex(text, separator, keepSeparator) {
|
|
|
4616
4654
|
}
|
|
4617
4655
|
const splits = text.split(new RegExp(`(${separator})`));
|
|
4618
4656
|
const result = [];
|
|
4619
|
-
if (
|
|
4657
|
+
if (separatorPosition === "end") {
|
|
4620
4658
|
for (let i = 0; i < splits.length - 1; i += 2) {
|
|
4621
4659
|
if (i + 1 < splits.length) {
|
|
4622
4660
|
const chunk = splits[i] + (splits[i + 1] || "");
|
|
@@ -4648,7 +4686,7 @@ var CharacterTransformer = class extends TextTransformer {
|
|
|
4648
4686
|
}
|
|
4649
4687
|
splitText({ text }) {
|
|
4650
4688
|
const separator = this.isSeparatorRegex ? this.separator : this.separator.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
4651
|
-
const initialSplits = splitTextWithRegex(text, separator, this.
|
|
4689
|
+
const initialSplits = splitTextWithRegex(text, separator, this.separatorPosition);
|
|
4652
4690
|
const chunks = [];
|
|
4653
4691
|
for (const split of initialSplits) {
|
|
4654
4692
|
if (this.lengthFunction(split) <= this.maxSize) {
|
|
@@ -4703,9 +4741,9 @@ var RecursiveCharacterTransformer = class _RecursiveCharacterTransformer extends
|
|
|
4703
4741
|
}
|
|
4704
4742
|
}
|
|
4705
4743
|
const _separator = this.isSeparatorRegex ? separator : separator?.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
4706
|
-
const splits = splitTextWithRegex(text, _separator, this.
|
|
4744
|
+
const splits = splitTextWithRegex(text, _separator, this.separatorPosition);
|
|
4707
4745
|
const goodSplits = [];
|
|
4708
|
-
const mergeSeparator = this.
|
|
4746
|
+
const mergeSeparator = this.separatorPosition ? "" : separator;
|
|
4709
4747
|
for (const s of splits) {
|
|
4710
4748
|
if (this.lengthFunction(s) < this.maxSize) {
|
|
4711
4749
|
goodSplits.push(s);
|
|
@@ -5851,7 +5889,6 @@ var SentenceTransformer = class extends TextTransformer {
|
|
|
5851
5889
|
sentenceEnders;
|
|
5852
5890
|
fallbackToWords;
|
|
5853
5891
|
fallbackToCharacters;
|
|
5854
|
-
keepSeparator;
|
|
5855
5892
|
constructor(options) {
|
|
5856
5893
|
const parentOverlap = Math.min(options.overlap ?? 0, options.maxSize - 1);
|
|
5857
5894
|
const baseOptions = {
|
|
@@ -5866,7 +5903,6 @@ var SentenceTransformer = class extends TextTransformer {
|
|
|
5866
5903
|
this.sentenceEnders = options.sentenceEnders ?? [".", "!", "?"];
|
|
5867
5904
|
this.fallbackToWords = options.fallbackToWords ?? true;
|
|
5868
5905
|
this.fallbackToCharacters = options.fallbackToCharacters ?? true;
|
|
5869
|
-
this.keepSeparator = options.keepSeparator ?? false;
|
|
5870
5906
|
this.overlap = options.overlap ?? 0;
|
|
5871
5907
|
}
|
|
5872
5908
|
detectSentenceBoundaries(text) {
|
|
@@ -6167,8 +6203,8 @@ var TokenTransformer = class _TokenTransformer extends TextTransformer {
|
|
|
6167
6203
|
var baseChunkOptionsSchema = z.object({
|
|
6168
6204
|
maxSize: z.number().positive().optional(),
|
|
6169
6205
|
overlap: z.number().min(0).optional(),
|
|
6170
|
-
lengthFunction: z.
|
|
6171
|
-
|
|
6206
|
+
lengthFunction: z.optional(z.function()),
|
|
6207
|
+
separatorPosition: z.enum(["start", "end"]).optional(),
|
|
6172
6208
|
addStartIndex: z.boolean().optional(),
|
|
6173
6209
|
stripWhitespace: z.boolean().optional()
|
|
6174
6210
|
});
|
|
@@ -6263,8 +6299,11 @@ var MDocument = class _MDocument {
|
|
|
6263
6299
|
});
|
|
6264
6300
|
this.type = type;
|
|
6265
6301
|
}
|
|
6266
|
-
async extractMetadata({ title, summary, questions, keywords }) {
|
|
6302
|
+
async extractMetadata({ title, summary, questions, keywords, schema }) {
|
|
6267
6303
|
const transformations = [];
|
|
6304
|
+
if (schema) {
|
|
6305
|
+
transformations.push(new SchemaExtractor(schema));
|
|
6306
|
+
}
|
|
6268
6307
|
if (typeof summary !== "undefined") {
|
|
6269
6308
|
transformations.push(new SummaryExtractor(typeof summary === "boolean" ? {} : summary));
|
|
6270
6309
|
}
|
|
@@ -6415,7 +6454,7 @@ var MDocument = class _MDocument {
|
|
|
6415
6454
|
const textSplitter = new RecursiveCharacterTransformer({
|
|
6416
6455
|
maxSize: options.maxSize,
|
|
6417
6456
|
overlap: options.overlap,
|
|
6418
|
-
|
|
6457
|
+
separatorPosition: options.separatorPosition,
|
|
6419
6458
|
addStartIndex: options.addStartIndex,
|
|
6420
6459
|
stripWhitespace: options.stripWhitespace
|
|
6421
6460
|
});
|
|
@@ -6431,7 +6470,7 @@ var MDocument = class _MDocument {
|
|
|
6431
6470
|
const textSplitter = new RecursiveCharacterTransformer({
|
|
6432
6471
|
maxSize: options.maxSize,
|
|
6433
6472
|
overlap: options.overlap,
|
|
6434
|
-
|
|
6473
|
+
separatorPosition: options.separatorPosition,
|
|
6435
6474
|
addStartIndex: options.addStartIndex,
|
|
6436
6475
|
stripWhitespace: options.stripWhitespace
|
|
6437
6476
|
});
|
|
@@ -6494,7 +6533,7 @@ var MDocument = class _MDocument {
|
|
|
6494
6533
|
sentenceEnders: options?.sentenceEnders,
|
|
6495
6534
|
fallbackToWords: options?.fallbackToWords,
|
|
6496
6535
|
fallbackToCharacters: options?.fallbackToCharacters,
|
|
6497
|
-
|
|
6536
|
+
separatorPosition: options?.separatorPosition,
|
|
6498
6537
|
lengthFunction: options?.lengthFunction,
|
|
6499
6538
|
addStartIndex: options?.addStartIndex,
|
|
6500
6539
|
stripWhitespace: options?.stripWhitespace
|