npm - @mastra/rag - Versions diffs - 0.1.19 → 0.1.20-alpha.1 - Mend

@mastra/rag 0.1.19 → 0.1.20-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/.turbo/turbo-build.log +7 -7
package/CHANGELOG.md +23 -0
package/dist/_tsup-dts-rollup.d.cts +225 -86
package/dist/_tsup-dts-rollup.d.ts +225 -86
package/dist/index.cjs +417 -131
package/dist/index.js +385 -99
package/package.json +2 -3
package/src/document/document.ts +6 -9
package/src/document/extractors/base.ts +30 -0
package/src/document/extractors/index.ts +1 -1
package/src/document/extractors/keywords.test.ts +1 -1
package/src/document/extractors/keywords.ts +7 -19
package/src/document/extractors/questions.test.ts +1 -1
package/src/document/extractors/questions.ts +7 -25
package/src/document/extractors/summary.test.ts +1 -1
package/src/document/extractors/summary.ts +7 -19
package/src/document/extractors/title.test.ts +1 -1
package/src/document/extractors/title.ts +7 -44
package/src/document/extractors/types.ts +1 -1
package/src/document/prompts/base.ts +77 -0
package/src/document/prompts/format.ts +9 -0
package/src/document/prompts/index.ts +15 -0
package/src/document/prompts/prompt.ts +60 -0
package/src/document/prompts/types.ts +29 -0
package/src/document/schema/index.ts +3 -0
package/src/document/schema/node.ts +187 -0
package/src/document/schema/types.ts +40 -0
package/src/document/transformers/html.ts +1 -1
package/src/document/transformers/json.ts +1 -1
package/src/document/transformers/markdown.ts +1 -1
package/src/document/transformers/text.ts +1 -1
package/src/document/transformers/transformer.ts +1 -1

package/dist/index.cjs CHANGED Viewed

@@ -1,6 +1,6 @@
 'use strict';
-var llamaindex = require('llamaindex');
+var crypto = require('crypto');
 var zod = require('zod');
 var nodeHtmlBetterParser = require('node-html-better-parser');
 var jsTiktoken = require('js-tiktoken');
@@ -14,6 +14,11 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
 var __getOwnPropNames = Object.getOwnPropertyNames;
 var __getProtoOf = Object.getPrototypeOf;
 var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __knownSymbol = (name14, symbol15) => (symbol15 = Symbol[name14]) ? symbol15 : Symbol.for("Symbol." + name14);
+var __typeError = (msg) => {
+  throw TypeError(msg);
+};
+var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
 var __commonJS = (cb, mod) => function __require() {
   return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
 };
@@ -33,6 +38,43 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
   __defProp(target, "default", { value: mod, enumerable: true }) ,
   mod
 ));
+var __decoratorStart = (base) => [, , , __create(null)];
+var __decoratorStrings = ["class", "method", "getter", "setter", "accessor", "field", "value", "get", "set"];
+var __expectFn = (fn) => fn !== void 0 && typeof fn !== "function" ? __typeError("Function expected") : fn;
+var __decoratorContext = (kind, name14, done, metadata, fns) => ({ kind: __decoratorStrings[kind], name: name14, metadata, addInitializer: (fn) => done._ ? __typeError("Already initialized") : fns.push(__expectFn(fn || null)) });
+var __decoratorMetadata = (array, target) => __defNormalProp(target, __knownSymbol("metadata"), array[3]);
+var __runInitializers = (array, flags, self, value) => {
+  for (var i = 0, fns = array[flags >> 1], n = fns && fns.length; i < n; i++) flags & 1 ? fns[i].call(self) : value = fns[i].call(self, value);
+  return value;
+};
+var __decorateElement = (array, flags, name14, decorators, target, extra) => {
+  var fn, it, done, ctx, access, k = flags & 7, s = false, p = false;
+  var j = array.length + 1 , key = __decoratorStrings[k + 5];
+  var initializers = (array[j - 1] = []), extraInitializers = array[j] || (array[j] = []);
+  var desc = ((target = target.prototype), __getOwnPropDesc({ get [name14]() {
+    return __privateGet(this, extra);
+  }, set [name14](x) {
+    return __privateSet(this, extra, x);
+  } }, name14));
+  for (var i = decorators.length - 1; i >= 0; i--) {
+    ctx = __decoratorContext(k, name14, done = {}, array[3], extraInitializers);
+    {
+      ctx.static = s, ctx.private = p, access = ctx.access = { has: (x) => name14 in x };
+      access.get = (x) => x[name14];
+      access.set = (x, y) => x[name14] = y;
+    }
+    it = (0, decorators[i])({ get: desc.get, set: desc.set } , ctx), done._ = 1;
+    if (it === void 0) __expectFn(it) && (desc[key] = it );
+    else if (typeof it !== "object" || it === null) __typeError("Object expected");
+    else __expectFn(fn = it.get) && (desc.get = fn), __expectFn(fn = it.set) && (desc.set = fn), __expectFn(fn = it.init) && initializers.unshift(fn);
+  }
+  return desc && __defProp(target, name14, desc), target;
+};
+var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
+var __accessCheck = (obj, member, msg) => member.has(obj) || __typeError("Cannot " + msg);
+var __privateGet = (obj, member, getter) => (__accessCheck(obj, member, "read from private field"), member.get(obj));
+var __privateAdd = (obj, member, value) => member.has(obj) ? __typeError("Cannot add the same private member more than once") : member instanceof WeakSet ? member.add(obj) : member.set(obj, value);
+var __privateSet = (obj, member, value, setter) => (__accessCheck(obj, member, "write to private field"), member.set(obj, value), value);
 // ../../node_modules/.pnpm/secure-json-parse@2.7.0/node_modules/secure-json-parse/index.js
 var require_secure_json_parse = __commonJS({
@@ -137,6 +179,275 @@ var require_secure_json_parse = __commonJS({
   }
 });
+// src/document/prompts/format.ts
+function format(str, params) {
+  return str.replace(/{(\w+)}/g, (_, k) => params[k] ?? "");
+}
+// src/document/prompts/base.ts
+var BasePromptTemplate = class {
+  templateVars = /* @__PURE__ */ new Set();
+  options = {};
+  constructor(options) {
+    const { templateVars } = options;
+    if (templateVars) {
+      this.templateVars = new Set(templateVars);
+    }
+    if (options.options) {
+      this.options = options.options;
+    }
+  }
+};
+var PromptTemplate = class _PromptTemplate extends BasePromptTemplate {
+  #template;
+  constructor(options) {
+    const { template, ...rest } = options;
+    super(rest);
+    this.#template = template;
+  }
+  partialFormat(options) {
+    const prompt = new _PromptTemplate({
+      template: this.template,
+      templateVars: [...this.templateVars],
+      options: this.options
+    });
+    prompt.options = {
+      ...prompt.options,
+      ...options
+    };
+    return prompt;
+  }
+  format(options) {
+    const allOptions = {
+      ...this.options,
+      ...options
+    };
+    return format(this.template, allOptions);
+  }
+  formatMessages(options) {
+    const prompt = this.format(options);
+    return [
+      {
+        role: "user",
+        content: prompt
+      }
+    ];
+  }
+  get template() {
+    return this.#template;
+  }
+};
+// src/document/prompts/prompt.ts
+var defaultSummaryPrompt = new PromptTemplate({
+  templateVars: ["context"],
+  template: `Write a summary of the following. Try to use only the information provided. Try to include as many key details as possible.
+{context}
+SUMMARY:"""
+`
+});
+var defaultKeywordExtractPrompt = new PromptTemplate({
+  templateVars: ["maxKeywords", "context"],
+  template: `
+Some text is provided below. Given the text, extract up to {maxKeywords} keywords from the text. Avoid stopwords.
+---------------------
+{context}
+---------------------
+Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'
+`
+}).partialFormat({
+  maxKeywords: "10"
+});
+var defaultQuestionExtractPrompt = new PromptTemplate({
+  templateVars: ["numQuestions", "context"],
+  template: `(
+  "Given the contextual informations below, generate {numQuestions} questions this context can provides specific answers to which are unlikely to be found else where. Higher-level summaries of surrounding context may be provided as well. "
+  "Try using these summaries to generate better questions that this context can answer."
+  "---------------------"
+  "{context}"
+  "---------------------"
+  "Provide questions in the following format: 'QUESTIONS: <questions>'"
+)`
+}).partialFormat({
+  numQuestions: "5"
+});
+var defaultTitleExtractorPromptTemplate = new PromptTemplate({
+  templateVars: ["context"],
+  template: `{context}
+Give a title that summarizes all of the unique entities, titles or themes found in the context.
+Title: `
+});
+var defaultTitleCombinePromptTemplate = new PromptTemplate({
+  templateVars: ["context"],
+  template: `{context}
+Based on the above candidate titles and contents, what is the comprehensive title for this document?
+Title: `
+});
+var _hash_dec, _init, _hash;
+_hash_dec = [lazyInitHash];
+var BaseNode = class {
+  constructor(init) {
+    __publicField(this, "id_");
+    __publicField(this, "metadata");
+    __publicField(this, "relationships");
+    __privateAdd(this, _hash, __runInitializers(_init, 8, this, "")), __runInitializers(_init, 11, this);
+    const { id_, metadata, relationships } = init || {};
+    this.id_ = id_ ?? crypto.randomUUID();
+    this.metadata = metadata ?? {};
+    this.relationships = relationships ?? {};
+  }
+  get sourceNode() {
+    const relationship = this.relationships["SOURCE" /* SOURCE */];
+    if (Array.isArray(relationship)) {
+      throw new Error("Source object must be a single RelatedNodeInfo object");
+    }
+    return relationship;
+  }
+  get prevNode() {
+    const relationship = this.relationships["PREVIOUS" /* PREVIOUS */];
+    if (Array.isArray(relationship)) {
+      throw new Error("Previous object must be a single RelatedNodeInfo object");
+    }
+    return relationship;
+  }
+  get nextNode() {
+    const relationship = this.relationships["NEXT" /* NEXT */];
+    if (Array.isArray(relationship)) {
+      throw new Error("Next object must be a single RelatedNodeInfo object");
+    }
+    return relationship;
+  }
+  get parentNode() {
+    const relationship = this.relationships["PARENT" /* PARENT */];
+    if (Array.isArray(relationship)) {
+      throw new Error("Parent object must be a single RelatedNodeInfo object");
+    }
+    return relationship;
+  }
+  get childNodes() {
+    const relationship = this.relationships["CHILD" /* CHILD */];
+    if (!Array.isArray(relationship)) {
+      throw new Error("Child object must be a an array of RelatedNodeInfo objects");
+    }
+    return relationship;
+  }
+};
+_init = __decoratorStart();
+_hash = new WeakMap();
+__decorateElement(_init, 4, "hash", _hash_dec, BaseNode, _hash);
+__decoratorMetadata(_init, BaseNode);
+var TextNode = class extends BaseNode {
+  text;
+  startCharIdx;
+  endCharIdx;
+  metadataSeparator;
+  constructor(init = {}) {
+    super(init);
+    const { text, startCharIdx, endCharIdx, metadataSeparator } = init;
+    this.text = text ?? "";
+    if (startCharIdx) {
+      this.startCharIdx = startCharIdx;
+    }
+    if (endCharIdx) {
+      this.endCharIdx = endCharIdx;
+    }
+    this.metadataSeparator = metadataSeparator ?? "\n";
+  }
+  /**
+   * Generate a hash of the text node.
+   * The ID is not part of the hash as it can change independent of content.
+   * @returns
+   */
+  generateHash() {
+    const hashFunction = createSHA256();
+    hashFunction.update(`type=${this.type}`);
+    hashFunction.update(`startCharIdx=${this.startCharIdx} endCharIdx=${this.endCharIdx}`);
+    hashFunction.update(this.getContent());
+    return hashFunction.digest();
+  }
+  get type() {
+    return "TEXT" /* TEXT */;
+  }
+  getContent() {
+    const metadataStr = this.getMetadataStr().trim();
+    return `${metadataStr}
+${this.text}`.trim();
+  }
+  getMetadataStr() {
+    const usableMetadataKeys = new Set(Object.keys(this.metadata).sort());
+    return [...usableMetadataKeys].map((key) => `${key}: ${this.metadata[key]}`).join(this.metadataSeparator);
+  }
+  getNodeInfo() {
+    return { start: this.startCharIdx, end: this.endCharIdx };
+  }
+  getText() {
+    return this.text;
+  }
+};
+var Document = class extends TextNode {
+  constructor(init) {
+    super(init);
+  }
+  get type() {
+    return "DOCUMENT" /* DOCUMENT */;
+  }
+};
+function lazyInitHash(value, _context) {
+  return {
+    get() {
+      const oldValue = value.get.call(this);
+      if (oldValue === "") {
+        const hash = this.generateHash();
+        value.set.call(this, hash);
+      }
+      return value.get.call(this);
+    },
+    set(newValue) {
+      value.set.call(this, newValue);
+    },
+    init(value2) {
+      return value2;
+    }
+  };
+}
+function createSHA256() {
+  const hash = crypto.createHash("sha256");
+  return {
+    update(data) {
+      hash.update(data);
+    },
+    digest() {
+      return hash.digest("base64");
+    }
+  };
+}
+// src/document/extractors/base.ts
+var BaseExtractor = class {
+  isTextNodeOnly = true;
+  /**
+   *
+   * @param nodes Nodes to extract metadata from.
+   * @returns Metadata extracted from the nodes.
+   */
+  async processNodes(nodes) {
+    let newNodes = nodes;
+    const curMetadataList = await this.extract(newNodes);
+    for (const idx in newNodes) {
+      newNodes[idx].metadata = {
+        ...newNodes[idx].metadata,
+        ...curMetadataList[idx]
+      };
+    }
+    return newNodes;
+  }
+};
 // ../../node_modules/.pnpm/@ai-sdk+provider@1.1.3/node_modules/@ai-sdk/provider/dist/index.mjs
 var marker = "vercel.ai.error";
 var symbol = Symbol.for(marker);
@@ -2430,8 +2741,10 @@ var openaiTextEmbeddingResponseSchema = zod.z.object({
 });
 var modelMaxImagesPerCall = {
   "dall-e-3": 1,
-  "dall-e-2": 10
+  "dall-e-2": 10,
+  "gpt-image-1": 10
 };
+var hasDefaultResponseFormat = /* @__PURE__ */ new Set(["gpt-image-1"]);
 var OpenAIImageModel = class {
   constructor(modelId, settings, config) {
     this.modelId = modelId;
@@ -2481,7 +2794,7 @@ var OpenAIImageModel = class {
         n,
         size,
         ...(_d = providerOptions.openai) != null ? _d : {},
-        response_format: "b64_json"
+        ...!hasDefaultResponseFormat.has(this.modelId) ? { response_format: "b64_json" } : {}
       },
       failedResponseHandler: openaiFailedResponseHandler,
       successfulResponseHandler: createJsonResponseHandler(
@@ -2976,8 +3289,15 @@ var OpenAIResponsesLanguageModel = class {
       user: openaiOptions == null ? void 0 : openaiOptions.user,
       instructions: openaiOptions == null ? void 0 : openaiOptions.instructions,
       // model-specific settings:
-      ...modelConfig.isReasoningModel && (openaiOptions == null ? void 0 : openaiOptions.reasoningEffort) != null && {
-        reasoning: { effort: openaiOptions == null ? void 0 : openaiOptions.reasoningEffort }
+      ...modelConfig.isReasoningModel && ((openaiOptions == null ? void 0 : openaiOptions.reasoningEffort) != null || (openaiOptions == null ? void 0 : openaiOptions.reasoningSummary) != null) && {
+        reasoning: {
+          ...(openaiOptions == null ? void 0 : openaiOptions.reasoningEffort) != null && {
+            effort: openaiOptions.reasoningEffort
+          },
+          ...(openaiOptions == null ? void 0 : openaiOptions.reasoningSummary) != null && {
+            summary: openaiOptions.reasoningSummary
+          }
+        }
       },
       ...modelConfig.requiredAutoTruncation && {
         truncation: "auto"
@@ -3059,7 +3379,7 @@ var OpenAIResponsesLanguageModel = class {
     }
   }
   async doGenerate(options) {
-    var _a15, _b, _c, _d, _e;
+    var _a15, _b, _c, _d, _e, _f, _g;
     const { args: body, warnings } = this.getArgs(options);
     const {
       responseHeaders,
@@ -3112,7 +3432,13 @@ var OpenAIResponsesLanguageModel = class {
                 type: zod.z.literal("computer_call")
               }),
               zod.z.object({
-                type: zod.z.literal("reasoning")
+                type: zod.z.literal("reasoning"),
+                summary: zod.z.array(
+                  zod.z.object({
+                    type: zod.z.literal("summary_text"),
+                    text: zod.z.string()
+                  })
+                )
               })
             ])
           ),
@@ -3130,6 +3456,7 @@ var OpenAIResponsesLanguageModel = class {
       toolName: output.name,
       args: output.arguments
     }));
+    const reasoningSummary = (_b = (_a15 = response.output.find((item) => item.type === "reasoning")) == null ? void 0 : _a15.summary) != null ? _b : null;
     return {
       text: outputTextElements.map((content) => content.text).join("\n"),
       sources: outputTextElements.flatMap(
@@ -3144,10 +3471,14 @@ var OpenAIResponsesLanguageModel = class {
         })
       ),
       finishReason: mapOpenAIResponseFinishReason({
-        finishReason: (_a15 = response.incomplete_details) == null ? void 0 : _a15.reason,
+        finishReason: (_c = response.incomplete_details) == null ? void 0 : _c.reason,
         hasToolCalls: toolCalls.length > 0
       }),
       toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
+      reasoning: reasoningSummary ? reasoningSummary.map((summary) => ({
+        type: "text",
+        text: summary.text
+      })) : void 0,
       usage: {
         promptTokens: response.usage.input_tokens,
         completionTokens: response.usage.output_tokens
@@ -3171,8 +3502,8 @@ var OpenAIResponsesLanguageModel = class {
       providerMetadata: {
         openai: {
           responseId: response.id,
-          cachedPromptTokens: (_c = (_b = response.usage.input_tokens_details) == null ? void 0 : _b.cached_tokens) != null ? _c : null,
-          reasoningTokens: (_e = (_d = response.usage.output_tokens_details) == null ? void 0 : _d.reasoning_tokens) != null ? _e : null
+          cachedPromptTokens: (_e = (_d = response.usage.input_tokens_details) == null ? void 0 : _d.cached_tokens) != null ? _e : null,
+          reasoningTokens: (_g = (_f = response.usage.output_tokens_details) == null ? void 0 : _f.reasoning_tokens) != null ? _g : null
         }
       },
       warnings
@@ -3255,6 +3586,11 @@ var OpenAIResponsesLanguageModel = class {
                 type: "text-delta",
                 textDelta: value.delta
               });
+            } else if (isResponseReasoningSummaryTextDeltaChunk(value)) {
+              controller.enqueue({
+                type: "reasoning",
+                textDelta: value.delta
+              });
             } else if (isResponseOutputItemDoneChunk(value) && value.item.type === "function_call") {
               ongoingToolCalls[value.output_index] = void 0;
               hasToolCalls = true;
@@ -3386,6 +3722,13 @@ var responseAnnotationAddedSchema = zod.z.object({
     title: zod.z.string()
   })
 });
+var responseReasoningSummaryTextDeltaSchema = zod.z.object({
+  type: zod.z.literal("response.reasoning_summary_text.delta"),
+  item_id: zod.z.string(),
+  output_index: zod.z.number(),
+  summary_index: zod.z.number(),
+  delta: zod.z.string()
+});
 var openaiResponsesChunkSchema = zod.z.union([
   textDeltaChunkSchema,
   responseFinishedChunkSchema,
@@ -3394,6 +3737,7 @@ var openaiResponsesChunkSchema = zod.z.union([
   responseFunctionCallArgumentsDeltaSchema,
   responseOutputItemAddedSchema,
   responseAnnotationAddedSchema,
+  responseReasoningSummaryTextDeltaSchema,
   zod.z.object({ type: zod.z.string() }).passthrough()
   // fallback for unknown chunks
 ]);
@@ -3418,6 +3762,9 @@ function isResponseOutputItemAddedChunk(chunk) {
 function isResponseAnnotationAddedChunk(chunk) {
   return chunk.type === "response.output_text.annotation.added";
 }
+function isResponseReasoningSummaryTextDeltaChunk(chunk) {
+  return chunk.type === "response.reasoning_summary_text.delta";
+}
 function getResponsesModelConfig(modelId) {
   if (modelId.startsWith("o")) {
     if (modelId.startsWith("o1-mini") || modelId.startsWith("o1-preview")) {
@@ -3447,7 +3794,8 @@ var openaiResponsesProviderOptionsSchema = zod.z.object({
   user: zod.z.string().nullish(),
   reasoningEffort: zod.z.string().nullish(),
   strictSchemas: zod.z.boolean().nullish(),
-  instructions: zod.z.string().nullish()
+  instructions: zod.z.string().nullish(),
+  reasoningSummary: zod.z.string().nullish()
 });
 var WebSearchPreviewParameters = zod.z.object({});
 function webSearchPreviewTool({
@@ -3667,53 +4015,24 @@ var openai2 = createOpenAI({ apiKey: process.env.OPENAI_API_KEY });
 var baseLLM = openai2("gpt-4o");
 // src/document/extractors/title.ts
-var TitleExtractor = class extends llamaindex.BaseExtractor {
-  /**
-   * MastraLanguageModel instance.
-   * @type {MastraLanguageModel}
-   */
+var TitleExtractor = class extends BaseExtractor {
   llm;
-  /**
-   * Can work for mixture of text and non-text nodes
-   * @type {boolean}
-   * @default false
-   */
   isTextNodeOnly = false;
-  /**
-   * Number of nodes to extrct titles from.
-   * @type {number}
-   * @default 5
-   */
   nodes = 5;
-  /**
-   * The prompt template to use for the title extractor.
-   * @type {string}
-   */
   nodeTemplate;
-  /**
-   * The prompt template to merge title with..
-   * @type {string}
-   */
   combineTemplate;
-  /**
-   * Constructor for the TitleExtractor class.
-   * @param {MastraLanguageModel} llm MastraLanguageModel instance.
-   * @param {number} nodes Number of nodes to extract titles from.
-   * @param {TitleExtractorPrompt} nodeTemplate The prompt template to use for the title extractor.
-   * @param {string} combineTemplate The prompt template to merge title with..
-   */
   constructor(options) {
     super();
     this.llm = options?.llm ?? baseLLM;
     this.nodes = options?.nodes ?? 5;
-    this.nodeTemplate = options?.nodeTemplate ? new llamaindex.PromptTemplate({
+    this.nodeTemplate = options?.nodeTemplate ? new PromptTemplate({
       templateVars: ["context"],
       template: options.nodeTemplate
-    }) : llamaindex.defaultTitleExtractorPromptTemplate;
-    this.combineTemplate = options?.combineTemplate ? new llamaindex.PromptTemplate({
+    }) : defaultTitleExtractorPromptTemplate;
+    this.combineTemplate = options?.combineTemplate ? new PromptTemplate({
       templateVars: ["context"],
       template: options.combineTemplate
-    }) : llamaindex.defaultTitleCombinePromptTemplate;
+    }) : defaultTitleCombinePromptTemplate;
   }
   /**
    * Extract titles from a list of nodes.
@@ -3725,7 +4044,7 @@ var TitleExtractor = class extends llamaindex.BaseExtractor {
     const nodesToExtractTitle = [];
     const nodeIndexes = [];
     nodes.forEach((node, idx) => {
-      const text = node.getContent(this.metadataMode);
+      const text = node.getContent();
       if (!text || text.trim() === "") {
         results[idx] = { documentTitle: "" };
       } else {
@@ -3753,7 +4072,7 @@ var TitleExtractor = class extends llamaindex.BaseExtractor {
   }
   filterNodes(nodes) {
     return nodes.filter((node) => {
-      if (this.isTextNodeOnly && !(node instanceof llamaindex.TextNode)) {
+      if (this.isTextNodeOnly && !(node instanceof TextNode)) {
         return false;
       }
       return true;
@@ -3812,7 +4131,7 @@ var TitleExtractor = class extends llamaindex.BaseExtractor {
               {
                 type: "text",
                 text: this.nodeTemplate.format({
-                  context: node.getContent(llamaindex.MetadataMode.ALL)
+                  context: node.getContent()
                 })
               }
             ]
@@ -3829,21 +4148,11 @@ var TitleExtractor = class extends llamaindex.BaseExtractor {
     return await Promise.all(titleJobs);
   }
 };
-var SummaryExtractor = class extends llamaindex.BaseExtractor {
-  /**
-   * MastraLanguageModel instance.
-   * @type {MastraLanguageModel}
-   */
+// src/document/extractors/summary.ts
+var SummaryExtractor = class extends BaseExtractor {
   llm;
-  /**
-   * List of summaries to extract: 'self', 'prev', 'next'
-   * @type {string[]}
-   */
   summaries;
-  /**
-   * The prompt template to use for the summary extractor.
-   * @type {string}
-   */
   promptTemplate;
   selfSummary;
   prevSummary;
@@ -3855,10 +4164,10 @@ var SummaryExtractor = class extends llamaindex.BaseExtractor {
     super();
     this.llm = options?.llm ?? baseLLM;
     this.summaries = summaries;
-    this.promptTemplate = options?.promptTemplate ? new llamaindex.PromptTemplate({
+    this.promptTemplate = options?.promptTemplate ? new PromptTemplate({
       templateVars: ["context"],
       template: options.promptTemplate
-    }) : llamaindex.defaultSummaryPrompt;
+    }) : defaultSummaryPrompt;
     this.selfSummary = summaries?.includes("self") ?? false;
     this.prevSummary = summaries?.includes("prev") ?? false;
     this.nextSummary = summaries?.includes("next") ?? false;
@@ -3869,14 +4178,14 @@ var SummaryExtractor = class extends llamaindex.BaseExtractor {
    * @returns {Promise<string>} Summary extracted from the node.
    */
   async generateNodeSummary(node) {
-    const text = node.getContent(this.metadataMode);
+    const text = node.getContent();
     if (!text || text.trim() === "") {
       return "";
     }
-    if (this.isTextNodeOnly && !(node instanceof llamaindex.TextNode)) {
+    if (this.isTextNodeOnly && !(node instanceof TextNode)) {
       return "";
     }
-    const context = node.getContent(this.metadataMode);
+    const context = node.getContent();
     const prompt = this.promptTemplate.format({
       context
     });
@@ -3904,7 +4213,7 @@ var SummaryExtractor = class extends llamaindex.BaseExtractor {
    * @returns {Promise<ExtractSummary[]>} Summaries extracted from the nodes.
    */
   async extract(nodes) {
-    if (!nodes.every((n) => n instanceof llamaindex.TextNode)) throw new Error("Only `TextNode` is allowed for `Summary` extractor");
+    if (!nodes.every((n) => n instanceof TextNode)) throw new Error("Only `TextNode` is allowed for `Summary` extractor");
     const nodeSummaries = await Promise.all(nodes.map((node) => this.generateNodeSummary(node)));
     const metadataList = nodes.map(() => ({}));
     for (let i = 0; i < nodes.length; i++) {
@@ -3921,28 +4230,12 @@ var SummaryExtractor = class extends llamaindex.BaseExtractor {
     return metadataList;
   }
 };
-var QuestionsAnsweredExtractor = class extends llamaindex.BaseExtractor {
-  /**
-   * MastraLanguageModel instance.
-   * @type {MastraLanguageModel}
-   */
+// src/document/extractors/questions.ts
+var QuestionsAnsweredExtractor = class extends BaseExtractor {
   llm;
-  /**
-   * Number of questions to generate.
-   * @type {number}
-   * @default 5
-   */
   questions = 5;
-  /**
-   * The prompt template to use for the question extractor.
-   * @type {string}
-   */
   promptTemplate;
-  /**
-   * Wheter to use metadata for embeddings only
-   * @type {boolean}
-   * @default false
-   */
   embeddingOnly = false;
   /**
    * Constructor for the QuestionsAnsweredExtractor class.
@@ -3956,12 +4249,12 @@ var QuestionsAnsweredExtractor = class extends llamaindex.BaseExtractor {
     super();
     this.llm = options?.llm ?? baseLLM;
     this.questions = options?.questions ?? 5;
-    this.promptTemplate = options?.promptTemplate ? new llamaindex.PromptTemplate({
+    this.promptTemplate = options?.promptTemplate ? new PromptTemplate({
       templateVars: ["numQuestions", "context"],
       template: options.promptTemplate
     }).partialFormat({
       numQuestions: "5"
-    }) : llamaindex.defaultQuestionExtractPrompt;
+    }) : defaultQuestionExtractPrompt;
     this.embeddingOnly = options?.embeddingOnly ?? false;
   }
   /**
@@ -3970,14 +4263,14 @@ var QuestionsAnsweredExtractor = class extends llamaindex.BaseExtractor {
    * @returns {Promise<Array<ExtractQuestion> | Array<{}>>} Questions extracted from the node.
    */
   async extractQuestionsFromNode(node) {
-    const text = node.getContent(this.metadataMode);
+    const text = node.getContent();
     if (!text || text.trim() === "") {
       return { questionsThisExcerptCanAnswer: "" };
     }
-    if (this.isTextNodeOnly && !(node instanceof llamaindex.TextNode)) {
+    if (this.isTextNodeOnly && !(node instanceof TextNode)) {
       return { questionsThisExcerptCanAnswer: "" };
     }
-    const contextStr = node.getContent(this.metadataMode);
+    const contextStr = node.getContent();
     const prompt = this.promptTemplate.format({
       context: contextStr,
       numQuestions: this.questions.toString()
@@ -4016,22 +4309,11 @@ var QuestionsAnsweredExtractor = class extends llamaindex.BaseExtractor {
     return results;
   }
 };
-var KeywordExtractor = class extends llamaindex.BaseExtractor {
-  /**
-   * MastraLanguageModel instance.
-   * @type {MastraLanguageModel}
-   */
+// src/document/extractors/keywords.ts
+var KeywordExtractor = class extends BaseExtractor {
   llm;
-  /**
-   * Number of keywords to extract.
-   * @type {number}
-   * @default 5
-   */
   keywords = 5;
-  /**
-   * The prompt template to use for the question extractor.
-   * @type {string}
-   */
   promptTemplate;
   /**
    * Constructor for the KeywordExtractor class.
@@ -4045,10 +4327,10 @@ var KeywordExtractor = class extends llamaindex.BaseExtractor {
     super();
     this.llm = options?.llm ?? baseLLM;
     this.keywords = options?.keywords ?? 5;
-    this.promptTemplate = options?.promptTemplate ? new llamaindex.PromptTemplate({
+    this.promptTemplate = options?.promptTemplate ? new PromptTemplate({
       templateVars: ["context", "maxKeywords"],
       template: options.promptTemplate
-    }) : llamaindex.defaultKeywordExtractPrompt;
+    }) : defaultKeywordExtractPrompt;
   }
   /**
    *
@@ -4060,11 +4342,11 @@ var KeywordExtractor = class extends llamaindex.BaseExtractor {
    * Adds error handling for malformed/empty LLM output.
    */
   async extractKeywordsFromNodes(node) {
-    const text = node.getContent(this.metadataMode);
+    const text = node.getContent();
     if (!text || text.trim() === "") {
       return { excerptKeywords: "" };
     }
-    if (this.isTextNodeOnly && !(node instanceof llamaindex.TextNode)) {
+    if (this.isTextNodeOnly && !(node instanceof TextNode)) {
       return { excerptKeywords: "" };
     }
     let keywords = "";
@@ -4079,7 +4361,7 @@ var KeywordExtractor = class extends llamaindex.BaseExtractor {
               {
                 type: "text",
                 text: this.promptTemplate.format({
-                  context: node.getContent(llamaindex.MetadataMode.ALL),
+                  context: node.getContent(),
                   maxKeywords: this.keywords.toString()
                 })
               }
@@ -4144,6 +4426,8 @@ var Language = /* @__PURE__ */ ((Language2) => {
   Language2["POWERSHELL"] = "powershell";
   return Language2;
 })(Language || {});
+// src/document/transformers/text.ts
 var TextTransformer = class {
   size;
   overlap;
@@ -4187,7 +4471,7 @@ var TextTransformer = class {
           previousChunkLen = chunk.length;
         }
         documents.push(
-          new llamaindex.Document({
+          new Document({
             text: chunk,
             metadata
           })
@@ -4517,7 +4801,7 @@ var HTMLHeaderTransformer = class {
       });
     });
     return this.returnEachElement ? elements.map(
-      (el) => new llamaindex.Document({
+      (el) => new Document({
         text: el.content,
         metadata: { ...el.metadata, xpath: el.xpath }
       })
@@ -4569,7 +4853,7 @@ var HTMLHeaderTransformer = class {
       }
     }
     return aggregatedChunks.map(
-      (chunk) => new llamaindex.Document({
+      (chunk) => new Document({
         text: chunk.content,
         metadata: { ...chunk.metadata, xpath: chunk.xpath }
       })
@@ -4591,7 +4875,7 @@ var HTMLHeaderTransformer = class {
           }
         }
         documents.push(
-          new llamaindex.Document({
+          new Document({
             text: chunk.text,
             metadata: { ...metadata, ...chunkMetadata }
           })
@@ -4620,7 +4904,7 @@ var HTMLSectionTransformer = class {
   splitText(text) {
     const sections = this.splitHtmlByHeaders(text);
     return sections.map(
-      (section) => new llamaindex.Document({
+      (section) => new Document({
         text: section.content,
         metadata: {
           [this.headersToSplitOn[section.tagName.toLowerCase()]]: section.header,
@@ -4703,7 +4987,7 @@ var HTMLSectionTransformer = class {
           }
         }
         documents.push(
-          new llamaindex.Document({
+          new Document({
             text: chunk.text,
             metadata: { ...metadata, ...chunkMetadata }
           })
@@ -4722,6 +5006,8 @@ var HTMLSectionTransformer = class {
     return this.createDocuments(texts, metadatas);
   }
 };
+// src/document/transformers/json.ts
 var RecursiveJsonTransformer = class _RecursiveJsonTransformer {
   maxSize;
   minSize;
@@ -5093,7 +5379,7 @@ var RecursiveJsonTransformer = class _RecursiveJsonTransformer {
       chunks.forEach((chunk) => {
         const metadata = { ..._metadatas[i] || {} };
         documents.push(
-          new llamaindex.Document({
+          new Document({
             text: chunk,
             metadata
           })
@@ -5129,6 +5415,8 @@ var LatexTransformer = class extends RecursiveCharacterTransformer {
     super({ separators, isSeparatorRegex: true, options });
   }
 };
+// src/document/transformers/markdown.ts
 var MarkdownTransformer = class extends RecursiveCharacterTransformer {
   constructor(options = {}) {
     const separators = RecursiveCharacterTransformer.getSeparatorsForLanguage("markdown" /* MARKDOWN */);
@@ -5149,7 +5437,7 @@ var MarkdownHeaderTransformer = class {
       return lines.flatMap((line) => {
         const contentLines = line.content.split("\n");
         return contentLines.filter((l) => l.trim() !== "" || this.headersToSplitOn.some(([sep]) => l.trim().startsWith(sep))).map(
-          (l) => new llamaindex.Document({
+          (l) => new Document({
             text: l.trim(),
             metadata: line.metadata
           })
@@ -5174,7 +5462,7 @@ var MarkdownHeaderTransformer = class {
       }
     }
     return aggregatedChunks.map(
-      (chunk) => new llamaindex.Document({
+      (chunk) => new Document({
         text: chunk.content,
         metadata: chunk.metadata
       })
@@ -5276,7 +5564,7 @@ var MarkdownHeaderTransformer = class {
       this.splitText({ text }).forEach((chunk) => {
         const metadata = { ..._metadatas[i], ...chunk.metadata };
         documents.push(
-          new llamaindex.Document({
+          new Document({
             text: chunk.text,
             metadata
           })
@@ -5392,7 +5680,7 @@ var MDocument = class _MDocument {
   // e.g., 'text', 'html', 'markdown', 'json'
   constructor({ docs, type }) {
     this.chunks = docs.map((d) => {
-      return new llamaindex.Document({ text: d.text, metadata: d.metadata });
+      return new Document({ text: d.text, metadata: d.metadata });
     });
     this.type = type;
   }
@@ -5410,26 +5698,24 @@ var MDocument = class _MDocument {
     if (typeof title !== "undefined") {
       transformations.push(new TitleExtractor(typeof title === "boolean" ? {} : title));
       this.chunks = this.chunks.map(
-        (doc) => doc?.metadata?.docId ? new llamaindex.Document({
+        (doc) => doc?.metadata?.docId ? new Document({
           ...doc,
           relationships: {
-            [llamaindex.NodeRelationship.SOURCE]: {
+            ["SOURCE" /* SOURCE */]: {
               nodeId: doc.metadata.docId,
-              nodeType: llamaindex.ObjectType.DOCUMENT,
+              nodeType: "DOCUMENT" /* DOCUMENT */,
               metadata: doc.metadata
             }
           }
         }) : doc
       );
     }
-    const pipeline = new llamaindex.IngestionPipeline({
-      transformations
-    });
-    const nodes = await pipeline.run({
-      documents: this.chunks
-    });
+    let nodes = this.chunks;
+    for (const extractor of transformations) {
+      nodes = await extractor.processNodes(nodes);
+    }
     this.chunks = this.chunks.map((doc, i) => {
-      return new llamaindex.Document({
+      return new Document({
         text: doc.text,
         metadata: {
           ...doc.metadata,