@mastra/rag 2.0.0-beta.6 → 2.0.0-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/CHANGELOG.md +148 -0
  2. package/dist/docs/README.md +1 -1
  3. package/dist/docs/SKILL.md +1 -1
  4. package/dist/docs/SOURCE_MAP.json +1 -1
  5. package/dist/docs/rag/02-chunking-and-embedding.md +0 -1
  6. package/dist/docs/rag/03-retrieval.md +5 -6
  7. package/dist/docs/rag/05-reference.md +22 -8
  8. package/dist/document/document.d.ts +1 -1
  9. package/dist/document/document.d.ts.map +1 -1
  10. package/dist/document/extractors/index.d.ts +2 -1
  11. package/dist/document/extractors/index.d.ts.map +1 -1
  12. package/dist/document/extractors/schema.d.ts +13 -0
  13. package/dist/document/extractors/schema.d.ts.map +1 -0
  14. package/dist/document/extractors/types.d.ts +7 -0
  15. package/dist/document/extractors/types.d.ts.map +1 -1
  16. package/dist/document/transformers/sentence.d.ts +0 -1
  17. package/dist/document/transformers/sentence.d.ts.map +1 -1
  18. package/dist/document/transformers/text.d.ts +2 -2
  19. package/dist/document/transformers/text.d.ts.map +1 -1
  20. package/dist/document/types.d.ts +3 -2
  21. package/dist/document/types.d.ts.map +1 -1
  22. package/dist/index.cjs +56 -17
  23. package/dist/index.cjs.map +1 -1
  24. package/dist/index.js +56 -17
  25. package/dist/index.js.map +1 -1
  26. package/dist/tools/document-chunker.d.ts +3 -2
  27. package/dist/tools/document-chunker.d.ts.map +1 -1
  28. package/dist/tools/graph-rag.d.ts +2 -2
  29. package/dist/tools/graph-rag.d.ts.map +1 -1
  30. package/dist/tools/vector-query.d.ts +2 -2
  31. package/dist/tools/vector-query.d.ts.map +1 -1
  32. package/dist/utils/tool-schemas.d.ts +2 -2
  33. package/dist/utils/tool-schemas.d.ts.map +1 -1
  34. package/package.json +2 -2
package/dist/index.js CHANGED
@@ -4444,6 +4444,44 @@ var KeywordExtractor = class extends BaseExtractor {
4444
4444
  return results;
4445
4445
  }
4446
4446
  };
4447
+ var SchemaExtractor = class extends BaseExtractor {
4448
+ schema;
4449
+ llm;
4450
+ instructions;
4451
+ metadataKey;
4452
+ constructor(options) {
4453
+ super();
4454
+ this.schema = options.schema;
4455
+ this.llm = options.llm;
4456
+ this.instructions = options.instructions;
4457
+ this.metadataKey = options.metadataKey;
4458
+ }
4459
+ async extract(nodes) {
4460
+ const agent = new Agent({
4461
+ name: "schema-extractor",
4462
+ id: "schema-extractor",
4463
+ instructions: this.instructions ?? "Extract structured data from the provided text.",
4464
+ model: this.llm ?? baseLLM
4465
+ });
4466
+ const results = await Promise.all(
4467
+ nodes.map(async (node) => {
4468
+ try {
4469
+ const result = await agent.generate([{ role: "user", content: node.getContent() }], {
4470
+ structuredOutput: { schema: this.schema }
4471
+ });
4472
+ if (this.metadataKey) {
4473
+ return { [this.metadataKey]: result.object };
4474
+ }
4475
+ return result.object;
4476
+ } catch (error) {
4477
+ console.error("Schema extraction failed:", error);
4478
+ return {};
4479
+ }
4480
+ })
4481
+ );
4482
+ return results;
4483
+ }
4484
+ };
4447
4485
 
4448
4486
  // src/document/types.ts
4449
4487
  var Language = /* @__PURE__ */ ((Language2) => {
@@ -4481,14 +4519,14 @@ var TextTransformer = class {
4481
4519
  maxSize;
4482
4520
  overlap;
4483
4521
  lengthFunction;
4484
- keepSeparator;
4522
+ separatorPosition;
4485
4523
  addStartIndex;
4486
4524
  stripWhitespace;
4487
4525
  constructor({
4488
4526
  maxSize = 4e3,
4489
4527
  overlap = 200,
4490
4528
  lengthFunction = (text) => text.length,
4491
- keepSeparator = false,
4529
+ separatorPosition,
4492
4530
  addStartIndex = false,
4493
4531
  stripWhitespace = true
4494
4532
  }) {
@@ -4498,7 +4536,7 @@ var TextTransformer = class {
4498
4536
  this.maxSize = maxSize;
4499
4537
  this.overlap = overlap;
4500
4538
  this.lengthFunction = lengthFunction;
4501
- this.keepSeparator = keepSeparator;
4539
+ this.separatorPosition = separatorPosition;
4502
4540
  this.addStartIndex = addStartIndex;
4503
4541
  this.stripWhitespace = stripWhitespace;
4504
4542
  }
@@ -4604,11 +4642,11 @@ var TextTransformer = class {
4604
4642
  };
4605
4643
 
4606
4644
  // src/document/transformers/character.ts
4607
- function splitTextWithRegex(text, separator, keepSeparator) {
4645
+ function splitTextWithRegex(text, separator, separatorPosition) {
4608
4646
  if (!separator) {
4609
4647
  return text.split("");
4610
4648
  }
4611
- if (!keepSeparator) {
4649
+ if (!separatorPosition) {
4612
4650
  return text.split(new RegExp(separator)).filter((s) => s !== "");
4613
4651
  }
4614
4652
  if (!text) {
@@ -4616,7 +4654,7 @@ function splitTextWithRegex(text, separator, keepSeparator) {
4616
4654
  }
4617
4655
  const splits = text.split(new RegExp(`(${separator})`));
4618
4656
  const result = [];
4619
- if (keepSeparator === "end") {
4657
+ if (separatorPosition === "end") {
4620
4658
  for (let i = 0; i < splits.length - 1; i += 2) {
4621
4659
  if (i + 1 < splits.length) {
4622
4660
  const chunk = splits[i] + (splits[i + 1] || "");
@@ -4648,7 +4686,7 @@ var CharacterTransformer = class extends TextTransformer {
4648
4686
  }
4649
4687
  splitText({ text }) {
4650
4688
  const separator = this.isSeparatorRegex ? this.separator : this.separator.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
4651
- const initialSplits = splitTextWithRegex(text, separator, this.keepSeparator);
4689
+ const initialSplits = splitTextWithRegex(text, separator, this.separatorPosition);
4652
4690
  const chunks = [];
4653
4691
  for (const split of initialSplits) {
4654
4692
  if (this.lengthFunction(split) <= this.maxSize) {
@@ -4703,9 +4741,9 @@ var RecursiveCharacterTransformer = class _RecursiveCharacterTransformer extends
4703
4741
  }
4704
4742
  }
4705
4743
  const _separator = this.isSeparatorRegex ? separator : separator?.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
4706
- const splits = splitTextWithRegex(text, _separator, this.keepSeparator);
4744
+ const splits = splitTextWithRegex(text, _separator, this.separatorPosition);
4707
4745
  const goodSplits = [];
4708
- const mergeSeparator = this.keepSeparator ? "" : separator;
4746
+ const mergeSeparator = this.separatorPosition ? "" : separator;
4709
4747
  for (const s of splits) {
4710
4748
  if (this.lengthFunction(s) < this.maxSize) {
4711
4749
  goodSplits.push(s);
@@ -5851,7 +5889,6 @@ var SentenceTransformer = class extends TextTransformer {
5851
5889
  sentenceEnders;
5852
5890
  fallbackToWords;
5853
5891
  fallbackToCharacters;
5854
- keepSeparator;
5855
5892
  constructor(options) {
5856
5893
  const parentOverlap = Math.min(options.overlap ?? 0, options.maxSize - 1);
5857
5894
  const baseOptions = {
@@ -5866,7 +5903,6 @@ var SentenceTransformer = class extends TextTransformer {
5866
5903
  this.sentenceEnders = options.sentenceEnders ?? [".", "!", "?"];
5867
5904
  this.fallbackToWords = options.fallbackToWords ?? true;
5868
5905
  this.fallbackToCharacters = options.fallbackToCharacters ?? true;
5869
- this.keepSeparator = options.keepSeparator ?? false;
5870
5906
  this.overlap = options.overlap ?? 0;
5871
5907
  }
5872
5908
  detectSentenceBoundaries(text) {
@@ -6167,8 +6203,8 @@ var TokenTransformer = class _TokenTransformer extends TextTransformer {
6167
6203
  var baseChunkOptionsSchema = z.object({
6168
6204
  maxSize: z.number().positive().optional(),
6169
6205
  overlap: z.number().min(0).optional(),
6170
- lengthFunction: z.function().optional(),
6171
- keepSeparator: z.union([z.boolean(), z.literal("start"), z.literal("end")]).optional(),
6206
+ lengthFunction: z.optional(z.function()),
6207
+ separatorPosition: z.enum(["start", "end"]).optional(),
6172
6208
  addStartIndex: z.boolean().optional(),
6173
6209
  stripWhitespace: z.boolean().optional()
6174
6210
  });
@@ -6263,8 +6299,11 @@ var MDocument = class _MDocument {
6263
6299
  });
6264
6300
  this.type = type;
6265
6301
  }
6266
- async extractMetadata({ title, summary, questions, keywords }) {
6302
+ async extractMetadata({ title, summary, questions, keywords, schema }) {
6267
6303
  const transformations = [];
6304
+ if (schema) {
6305
+ transformations.push(new SchemaExtractor(schema));
6306
+ }
6268
6307
  if (typeof summary !== "undefined") {
6269
6308
  transformations.push(new SummaryExtractor(typeof summary === "boolean" ? {} : summary));
6270
6309
  }
@@ -6415,7 +6454,7 @@ var MDocument = class _MDocument {
6415
6454
  const textSplitter = new RecursiveCharacterTransformer({
6416
6455
  maxSize: options.maxSize,
6417
6456
  overlap: options.overlap,
6418
- keepSeparator: options.keepSeparator,
6457
+ separatorPosition: options.separatorPosition,
6419
6458
  addStartIndex: options.addStartIndex,
6420
6459
  stripWhitespace: options.stripWhitespace
6421
6460
  });
@@ -6431,7 +6470,7 @@ var MDocument = class _MDocument {
6431
6470
  const textSplitter = new RecursiveCharacterTransformer({
6432
6471
  maxSize: options.maxSize,
6433
6472
  overlap: options.overlap,
6434
- keepSeparator: options.keepSeparator,
6473
+ separatorPosition: options.separatorPosition,
6435
6474
  addStartIndex: options.addStartIndex,
6436
6475
  stripWhitespace: options.stripWhitespace
6437
6476
  });
@@ -6494,7 +6533,7 @@ var MDocument = class _MDocument {
6494
6533
  sentenceEnders: options?.sentenceEnders,
6495
6534
  fallbackToWords: options?.fallbackToWords,
6496
6535
  fallbackToCharacters: options?.fallbackToCharacters,
6497
- keepSeparator: options?.keepSeparator,
6536
+ separatorPosition: options?.separatorPosition,
6498
6537
  lengthFunction: options?.lengthFunction,
6499
6538
  addStartIndex: options?.addStartIndex,
6500
6539
  stripWhitespace: options?.stripWhitespace