@mastra/rag 0.1.20-alpha.0 → 0.1.20-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,4 +1,4 @@
1
- import { Document, ObjectType, NodeRelationship, IngestionPipeline, BaseExtractor, PromptTemplate, defaultSummaryPrompt, TextNode, defaultQuestionExtractPrompt, defaultKeywordExtractPrompt, MetadataMode, defaultTitleExtractorPromptTemplate, defaultTitleCombinePromptTemplate } from 'llamaindex';
1
+ import { randomUUID, createHash } from 'crypto';
2
2
  import { z } from 'zod';
3
3
  import { parse } from 'node-html-better-parser';
4
4
  import { encodingForModel, getEncoding } from 'js-tiktoken';
@@ -12,6 +12,11 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
12
12
  var __getOwnPropNames = Object.getOwnPropertyNames;
13
13
  var __getProtoOf = Object.getPrototypeOf;
14
14
  var __hasOwnProp = Object.prototype.hasOwnProperty;
15
+ var __knownSymbol = (name14, symbol15) => (symbol15 = Symbol[name14]) ? symbol15 : Symbol.for("Symbol." + name14);
16
+ var __typeError = (msg) => {
17
+ throw TypeError(msg);
18
+ };
19
+ var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
15
20
  var __commonJS = (cb, mod) => function __require() {
16
21
  return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
17
22
  };
@@ -31,6 +36,43 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
31
36
  __defProp(target, "default", { value: mod, enumerable: true }) ,
32
37
  mod
33
38
  ));
39
+ var __decoratorStart = (base) => [, , , __create(null)];
40
+ var __decoratorStrings = ["class", "method", "getter", "setter", "accessor", "field", "value", "get", "set"];
41
+ var __expectFn = (fn) => fn !== void 0 && typeof fn !== "function" ? __typeError("Function expected") : fn;
42
+ var __decoratorContext = (kind, name14, done, metadata, fns) => ({ kind: __decoratorStrings[kind], name: name14, metadata, addInitializer: (fn) => done._ ? __typeError("Already initialized") : fns.push(__expectFn(fn || null)) });
43
+ var __decoratorMetadata = (array, target) => __defNormalProp(target, __knownSymbol("metadata"), array[3]);
44
+ var __runInitializers = (array, flags, self, value) => {
45
+ for (var i = 0, fns = array[flags >> 1], n = fns && fns.length; i < n; i++) flags & 1 ? fns[i].call(self) : value = fns[i].call(self, value);
46
+ return value;
47
+ };
48
+ var __decorateElement = (array, flags, name14, decorators, target, extra) => {
49
+ var fn, it, done, ctx, access, k = flags & 7, s = false, p = false;
50
+ var j = array.length + 1 , key = __decoratorStrings[k + 5];
51
+ var initializers = (array[j - 1] = []), extraInitializers = array[j] || (array[j] = []);
52
+ var desc = ((target = target.prototype), __getOwnPropDesc({ get [name14]() {
53
+ return __privateGet(this, extra);
54
+ }, set [name14](x) {
55
+ return __privateSet(this, extra, x);
56
+ } }, name14));
57
+ for (var i = decorators.length - 1; i >= 0; i--) {
58
+ ctx = __decoratorContext(k, name14, done = {}, array[3], extraInitializers);
59
+ {
60
+ ctx.static = s, ctx.private = p, access = ctx.access = { has: (x) => name14 in x };
61
+ access.get = (x) => x[name14];
62
+ access.set = (x, y) => x[name14] = y;
63
+ }
64
+ it = (0, decorators[i])({ get: desc.get, set: desc.set } , ctx), done._ = 1;
65
+ if (it === void 0) __expectFn(it) && (desc[key] = it );
66
+ else if (typeof it !== "object" || it === null) __typeError("Object expected");
67
+ else __expectFn(fn = it.get) && (desc.get = fn), __expectFn(fn = it.set) && (desc.set = fn), __expectFn(fn = it.init) && initializers.unshift(fn);
68
+ }
69
+ return desc && __defProp(target, name14, desc), target;
70
+ };
71
+ var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
72
+ var __accessCheck = (obj, member, msg) => member.has(obj) || __typeError("Cannot " + msg);
73
+ var __privateGet = (obj, member, getter) => (__accessCheck(obj, member, "read from private field"), member.get(obj));
74
+ var __privateAdd = (obj, member, value) => member.has(obj) ? __typeError("Cannot add the same private member more than once") : member instanceof WeakSet ? member.add(obj) : member.set(obj, value);
75
+ var __privateSet = (obj, member, value, setter) => (__accessCheck(obj, member, "write to private field"), member.set(obj, value), value);
34
76
 
35
77
  // ../../node_modules/.pnpm/secure-json-parse@2.7.0/node_modules/secure-json-parse/index.js
36
78
  var require_secure_json_parse = __commonJS({
@@ -135,6 +177,275 @@ var require_secure_json_parse = __commonJS({
135
177
  }
136
178
  });
137
179
 
180
+ // src/document/prompts/format.ts
181
+ function format(str, params) {
182
+ return str.replace(/{(\w+)}/g, (_, k) => params[k] ?? "");
183
+ }
184
+
185
+ // src/document/prompts/base.ts
186
+ var BasePromptTemplate = class {
187
+ templateVars = /* @__PURE__ */ new Set();
188
+ options = {};
189
+ constructor(options) {
190
+ const { templateVars } = options;
191
+ if (templateVars) {
192
+ this.templateVars = new Set(templateVars);
193
+ }
194
+ if (options.options) {
195
+ this.options = options.options;
196
+ }
197
+ }
198
+ };
199
+ var PromptTemplate = class _PromptTemplate extends BasePromptTemplate {
200
+ #template;
201
+ constructor(options) {
202
+ const { template, ...rest } = options;
203
+ super(rest);
204
+ this.#template = template;
205
+ }
206
+ partialFormat(options) {
207
+ const prompt = new _PromptTemplate({
208
+ template: this.template,
209
+ templateVars: [...this.templateVars],
210
+ options: this.options
211
+ });
212
+ prompt.options = {
213
+ ...prompt.options,
214
+ ...options
215
+ };
216
+ return prompt;
217
+ }
218
+ format(options) {
219
+ const allOptions = {
220
+ ...this.options,
221
+ ...options
222
+ };
223
+ return format(this.template, allOptions);
224
+ }
225
+ formatMessages(options) {
226
+ const prompt = this.format(options);
227
+ return [
228
+ {
229
+ role: "user",
230
+ content: prompt
231
+ }
232
+ ];
233
+ }
234
+ get template() {
235
+ return this.#template;
236
+ }
237
+ };
238
+
239
+ // src/document/prompts/prompt.ts
240
+ var defaultSummaryPrompt = new PromptTemplate({
241
+ templateVars: ["context"],
242
+ template: `Write a summary of the following. Try to use only the information provided. Try to include as many key details as possible.
243
+
244
+
245
+ {context}
246
+
247
+
248
+ SUMMARY:"""
249
+ `
250
+ });
251
+ var defaultKeywordExtractPrompt = new PromptTemplate({
252
+ templateVars: ["maxKeywords", "context"],
253
+ template: `
254
+ Some text is provided below. Given the text, extract up to {maxKeywords} keywords from the text. Avoid stopwords.
255
+ ---------------------
256
+ {context}
257
+ ---------------------
258
+ Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'
259
+ `
260
+ }).partialFormat({
261
+ maxKeywords: "10"
262
+ });
263
+ var defaultQuestionExtractPrompt = new PromptTemplate({
264
+ templateVars: ["numQuestions", "context"],
265
+ template: `(
266
+ "Given the contextual informations below, generate {numQuestions} questions this context can provides specific answers to which are unlikely to be found else where. Higher-level summaries of surrounding context may be provided as well. "
267
+ "Try using these summaries to generate better questions that this context can answer."
268
+ "---------------------"
269
+ "{context}"
270
+ "---------------------"
271
+ "Provide questions in the following format: 'QUESTIONS: <questions>'"
272
+ )`
273
+ }).partialFormat({
274
+ numQuestions: "5"
275
+ });
276
+ var defaultTitleExtractorPromptTemplate = new PromptTemplate({
277
+ templateVars: ["context"],
278
+ template: `{context}
279
+ Give a title that summarizes all of the unique entities, titles or themes found in the context.
280
+ Title: `
281
+ });
282
+ var defaultTitleCombinePromptTemplate = new PromptTemplate({
283
+ templateVars: ["context"],
284
+ template: `{context}
285
+ Based on the above candidate titles and contents, what is the comprehensive title for this document?
286
+ Title: `
287
+ });
288
+ var _hash_dec, _init, _hash;
289
+ _hash_dec = [lazyInitHash];
290
+ var BaseNode = class {
291
+ constructor(init) {
292
+ __publicField(this, "id_");
293
+ __publicField(this, "metadata");
294
+ __publicField(this, "relationships");
295
+ __privateAdd(this, _hash, __runInitializers(_init, 8, this, "")), __runInitializers(_init, 11, this);
296
+ const { id_, metadata, relationships } = init || {};
297
+ this.id_ = id_ ?? randomUUID();
298
+ this.metadata = metadata ?? {};
299
+ this.relationships = relationships ?? {};
300
+ }
301
+ get sourceNode() {
302
+ const relationship = this.relationships["SOURCE" /* SOURCE */];
303
+ if (Array.isArray(relationship)) {
304
+ throw new Error("Source object must be a single RelatedNodeInfo object");
305
+ }
306
+ return relationship;
307
+ }
308
+ get prevNode() {
309
+ const relationship = this.relationships["PREVIOUS" /* PREVIOUS */];
310
+ if (Array.isArray(relationship)) {
311
+ throw new Error("Previous object must be a single RelatedNodeInfo object");
312
+ }
313
+ return relationship;
314
+ }
315
+ get nextNode() {
316
+ const relationship = this.relationships["NEXT" /* NEXT */];
317
+ if (Array.isArray(relationship)) {
318
+ throw new Error("Next object must be a single RelatedNodeInfo object");
319
+ }
320
+ return relationship;
321
+ }
322
+ get parentNode() {
323
+ const relationship = this.relationships["PARENT" /* PARENT */];
324
+ if (Array.isArray(relationship)) {
325
+ throw new Error("Parent object must be a single RelatedNodeInfo object");
326
+ }
327
+ return relationship;
328
+ }
329
+ get childNodes() {
330
+ const relationship = this.relationships["CHILD" /* CHILD */];
331
+ if (!Array.isArray(relationship)) {
332
+ throw new Error("Child object must be a an array of RelatedNodeInfo objects");
333
+ }
334
+ return relationship;
335
+ }
336
+ };
337
+ _init = __decoratorStart();
338
+ _hash = new WeakMap();
339
+ __decorateElement(_init, 4, "hash", _hash_dec, BaseNode, _hash);
340
+ __decoratorMetadata(_init, BaseNode);
341
+ var TextNode = class extends BaseNode {
342
+ text;
343
+ startCharIdx;
344
+ endCharIdx;
345
+ metadataSeparator;
346
+ constructor(init = {}) {
347
+ super(init);
348
+ const { text, startCharIdx, endCharIdx, metadataSeparator } = init;
349
+ this.text = text ?? "";
350
+ if (startCharIdx) {
351
+ this.startCharIdx = startCharIdx;
352
+ }
353
+ if (endCharIdx) {
354
+ this.endCharIdx = endCharIdx;
355
+ }
356
+ this.metadataSeparator = metadataSeparator ?? "\n";
357
+ }
358
+ /**
359
+ * Generate a hash of the text node.
360
+ * The ID is not part of the hash as it can change independent of content.
361
+ * @returns
362
+ */
363
+ generateHash() {
364
+ const hashFunction = createSHA256();
365
+ hashFunction.update(`type=${this.type}`);
366
+ hashFunction.update(`startCharIdx=${this.startCharIdx} endCharIdx=${this.endCharIdx}`);
367
+ hashFunction.update(this.getContent());
368
+ return hashFunction.digest();
369
+ }
370
+ get type() {
371
+ return "TEXT" /* TEXT */;
372
+ }
373
+ getContent() {
374
+ const metadataStr = this.getMetadataStr().trim();
375
+ return `${metadataStr}
376
+
377
+ ${this.text}`.trim();
378
+ }
379
+ getMetadataStr() {
380
+ const usableMetadataKeys = new Set(Object.keys(this.metadata).sort());
381
+ return [...usableMetadataKeys].map((key) => `${key}: ${this.metadata[key]}`).join(this.metadataSeparator);
382
+ }
383
+ getNodeInfo() {
384
+ return { start: this.startCharIdx, end: this.endCharIdx };
385
+ }
386
+ getText() {
387
+ return this.text;
388
+ }
389
+ };
390
+ var Document = class extends TextNode {
391
+ constructor(init) {
392
+ super(init);
393
+ }
394
+ get type() {
395
+ return "DOCUMENT" /* DOCUMENT */;
396
+ }
397
+ };
398
+ function lazyInitHash(value, _context) {
399
+ return {
400
+ get() {
401
+ const oldValue = value.get.call(this);
402
+ if (oldValue === "") {
403
+ const hash = this.generateHash();
404
+ value.set.call(this, hash);
405
+ }
406
+ return value.get.call(this);
407
+ },
408
+ set(newValue) {
409
+ value.set.call(this, newValue);
410
+ },
411
+ init(value2) {
412
+ return value2;
413
+ }
414
+ };
415
+ }
416
+ function createSHA256() {
417
+ const hash = createHash("sha256");
418
+ return {
419
+ update(data) {
420
+ hash.update(data);
421
+ },
422
+ digest() {
423
+ return hash.digest("base64");
424
+ }
425
+ };
426
+ }
427
+
428
+ // src/document/extractors/base.ts
429
+ var BaseExtractor = class {
430
+ isTextNodeOnly = true;
431
+ /**
432
+ *
433
+ * @param nodes Nodes to extract metadata from.
434
+ * @returns Metadata extracted from the nodes.
435
+ */
436
+ async processNodes(nodes) {
437
+ let newNodes = nodes;
438
+ const curMetadataList = await this.extract(newNodes);
439
+ for (const idx in newNodes) {
440
+ newNodes[idx].metadata = {
441
+ ...newNodes[idx].metadata,
442
+ ...curMetadataList[idx]
443
+ };
444
+ }
445
+ return newNodes;
446
+ }
447
+ };
448
+
138
449
  // ../../node_modules/.pnpm/@ai-sdk+provider@1.1.3/node_modules/@ai-sdk/provider/dist/index.mjs
139
450
  var marker = "vercel.ai.error";
140
451
  var symbol = Symbol.for(marker);
@@ -2428,8 +2739,10 @@ var openaiTextEmbeddingResponseSchema = z.object({
2428
2739
  });
2429
2740
  var modelMaxImagesPerCall = {
2430
2741
  "dall-e-3": 1,
2431
- "dall-e-2": 10
2742
+ "dall-e-2": 10,
2743
+ "gpt-image-1": 10
2432
2744
  };
2745
+ var hasDefaultResponseFormat = /* @__PURE__ */ new Set(["gpt-image-1"]);
2433
2746
  var OpenAIImageModel = class {
2434
2747
  constructor(modelId, settings, config) {
2435
2748
  this.modelId = modelId;
@@ -2479,7 +2792,7 @@ var OpenAIImageModel = class {
2479
2792
  n,
2480
2793
  size,
2481
2794
  ...(_d = providerOptions.openai) != null ? _d : {},
2482
- response_format: "b64_json"
2795
+ ...!hasDefaultResponseFormat.has(this.modelId) ? { response_format: "b64_json" } : {}
2483
2796
  },
2484
2797
  failedResponseHandler: openaiFailedResponseHandler,
2485
2798
  successfulResponseHandler: createJsonResponseHandler(
@@ -2974,8 +3287,15 @@ var OpenAIResponsesLanguageModel = class {
2974
3287
  user: openaiOptions == null ? void 0 : openaiOptions.user,
2975
3288
  instructions: openaiOptions == null ? void 0 : openaiOptions.instructions,
2976
3289
  // model-specific settings:
2977
- ...modelConfig.isReasoningModel && (openaiOptions == null ? void 0 : openaiOptions.reasoningEffort) != null && {
2978
- reasoning: { effort: openaiOptions == null ? void 0 : openaiOptions.reasoningEffort }
3290
+ ...modelConfig.isReasoningModel && ((openaiOptions == null ? void 0 : openaiOptions.reasoningEffort) != null || (openaiOptions == null ? void 0 : openaiOptions.reasoningSummary) != null) && {
3291
+ reasoning: {
3292
+ ...(openaiOptions == null ? void 0 : openaiOptions.reasoningEffort) != null && {
3293
+ effort: openaiOptions.reasoningEffort
3294
+ },
3295
+ ...(openaiOptions == null ? void 0 : openaiOptions.reasoningSummary) != null && {
3296
+ summary: openaiOptions.reasoningSummary
3297
+ }
3298
+ }
2979
3299
  },
2980
3300
  ...modelConfig.requiredAutoTruncation && {
2981
3301
  truncation: "auto"
@@ -3057,7 +3377,7 @@ var OpenAIResponsesLanguageModel = class {
3057
3377
  }
3058
3378
  }
3059
3379
  async doGenerate(options) {
3060
- var _a15, _b, _c, _d, _e;
3380
+ var _a15, _b, _c, _d, _e, _f, _g;
3061
3381
  const { args: body, warnings } = this.getArgs(options);
3062
3382
  const {
3063
3383
  responseHeaders,
@@ -3110,7 +3430,13 @@ var OpenAIResponsesLanguageModel = class {
3110
3430
  type: z.literal("computer_call")
3111
3431
  }),
3112
3432
  z.object({
3113
- type: z.literal("reasoning")
3433
+ type: z.literal("reasoning"),
3434
+ summary: z.array(
3435
+ z.object({
3436
+ type: z.literal("summary_text"),
3437
+ text: z.string()
3438
+ })
3439
+ )
3114
3440
  })
3115
3441
  ])
3116
3442
  ),
@@ -3128,6 +3454,7 @@ var OpenAIResponsesLanguageModel = class {
3128
3454
  toolName: output.name,
3129
3455
  args: output.arguments
3130
3456
  }));
3457
+ const reasoningSummary = (_b = (_a15 = response.output.find((item) => item.type === "reasoning")) == null ? void 0 : _a15.summary) != null ? _b : null;
3131
3458
  return {
3132
3459
  text: outputTextElements.map((content) => content.text).join("\n"),
3133
3460
  sources: outputTextElements.flatMap(
@@ -3142,10 +3469,14 @@ var OpenAIResponsesLanguageModel = class {
3142
3469
  })
3143
3470
  ),
3144
3471
  finishReason: mapOpenAIResponseFinishReason({
3145
- finishReason: (_a15 = response.incomplete_details) == null ? void 0 : _a15.reason,
3472
+ finishReason: (_c = response.incomplete_details) == null ? void 0 : _c.reason,
3146
3473
  hasToolCalls: toolCalls.length > 0
3147
3474
  }),
3148
3475
  toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
3476
+ reasoning: reasoningSummary ? reasoningSummary.map((summary) => ({
3477
+ type: "text",
3478
+ text: summary.text
3479
+ })) : void 0,
3149
3480
  usage: {
3150
3481
  promptTokens: response.usage.input_tokens,
3151
3482
  completionTokens: response.usage.output_tokens
@@ -3169,8 +3500,8 @@ var OpenAIResponsesLanguageModel = class {
3169
3500
  providerMetadata: {
3170
3501
  openai: {
3171
3502
  responseId: response.id,
3172
- cachedPromptTokens: (_c = (_b = response.usage.input_tokens_details) == null ? void 0 : _b.cached_tokens) != null ? _c : null,
3173
- reasoningTokens: (_e = (_d = response.usage.output_tokens_details) == null ? void 0 : _d.reasoning_tokens) != null ? _e : null
3503
+ cachedPromptTokens: (_e = (_d = response.usage.input_tokens_details) == null ? void 0 : _d.cached_tokens) != null ? _e : null,
3504
+ reasoningTokens: (_g = (_f = response.usage.output_tokens_details) == null ? void 0 : _f.reasoning_tokens) != null ? _g : null
3174
3505
  }
3175
3506
  },
3176
3507
  warnings
@@ -3253,6 +3584,11 @@ var OpenAIResponsesLanguageModel = class {
3253
3584
  type: "text-delta",
3254
3585
  textDelta: value.delta
3255
3586
  });
3587
+ } else if (isResponseReasoningSummaryTextDeltaChunk(value)) {
3588
+ controller.enqueue({
3589
+ type: "reasoning",
3590
+ textDelta: value.delta
3591
+ });
3256
3592
  } else if (isResponseOutputItemDoneChunk(value) && value.item.type === "function_call") {
3257
3593
  ongoingToolCalls[value.output_index] = void 0;
3258
3594
  hasToolCalls = true;
@@ -3384,6 +3720,13 @@ var responseAnnotationAddedSchema = z.object({
3384
3720
  title: z.string()
3385
3721
  })
3386
3722
  });
3723
+ var responseReasoningSummaryTextDeltaSchema = z.object({
3724
+ type: z.literal("response.reasoning_summary_text.delta"),
3725
+ item_id: z.string(),
3726
+ output_index: z.number(),
3727
+ summary_index: z.number(),
3728
+ delta: z.string()
3729
+ });
3387
3730
  var openaiResponsesChunkSchema = z.union([
3388
3731
  textDeltaChunkSchema,
3389
3732
  responseFinishedChunkSchema,
@@ -3392,6 +3735,7 @@ var openaiResponsesChunkSchema = z.union([
3392
3735
  responseFunctionCallArgumentsDeltaSchema,
3393
3736
  responseOutputItemAddedSchema,
3394
3737
  responseAnnotationAddedSchema,
3738
+ responseReasoningSummaryTextDeltaSchema,
3395
3739
  z.object({ type: z.string() }).passthrough()
3396
3740
  // fallback for unknown chunks
3397
3741
  ]);
@@ -3416,6 +3760,9 @@ function isResponseOutputItemAddedChunk(chunk) {
3416
3760
  function isResponseAnnotationAddedChunk(chunk) {
3417
3761
  return chunk.type === "response.output_text.annotation.added";
3418
3762
  }
3763
+ function isResponseReasoningSummaryTextDeltaChunk(chunk) {
3764
+ return chunk.type === "response.reasoning_summary_text.delta";
3765
+ }
3419
3766
  function getResponsesModelConfig(modelId) {
3420
3767
  if (modelId.startsWith("o")) {
3421
3768
  if (modelId.startsWith("o1-mini") || modelId.startsWith("o1-preview")) {
@@ -3445,7 +3792,8 @@ var openaiResponsesProviderOptionsSchema = z.object({
3445
3792
  user: z.string().nullish(),
3446
3793
  reasoningEffort: z.string().nullish(),
3447
3794
  strictSchemas: z.boolean().nullish(),
3448
- instructions: z.string().nullish()
3795
+ instructions: z.string().nullish(),
3796
+ reasoningSummary: z.string().nullish()
3449
3797
  });
3450
3798
  var WebSearchPreviewParameters = z.object({});
3451
3799
  function webSearchPreviewTool({
@@ -3666,40 +4014,11 @@ var baseLLM = openai2("gpt-4o");
3666
4014
 
3667
4015
  // src/document/extractors/title.ts
3668
4016
  var TitleExtractor = class extends BaseExtractor {
3669
- /**
3670
- * MastraLanguageModel instance.
3671
- * @type {MastraLanguageModel}
3672
- */
3673
4017
  llm;
3674
- /**
3675
- * Can work for mixture of text and non-text nodes
3676
- * @type {boolean}
3677
- * @default false
3678
- */
3679
4018
  isTextNodeOnly = false;
3680
- /**
3681
- * Number of nodes to extrct titles from.
3682
- * @type {number}
3683
- * @default 5
3684
- */
3685
4019
  nodes = 5;
3686
- /**
3687
- * The prompt template to use for the title extractor.
3688
- * @type {string}
3689
- */
3690
4020
  nodeTemplate;
3691
- /**
3692
- * The prompt template to merge title with..
3693
- * @type {string}
3694
- */
3695
4021
  combineTemplate;
3696
- /**
3697
- * Constructor for the TitleExtractor class.
3698
- * @param {MastraLanguageModel} llm MastraLanguageModel instance.
3699
- * @param {number} nodes Number of nodes to extract titles from.
3700
- * @param {TitleExtractorPrompt} nodeTemplate The prompt template to use for the title extractor.
3701
- * @param {string} combineTemplate The prompt template to merge title with..
3702
- */
3703
4022
  constructor(options) {
3704
4023
  super();
3705
4024
  this.llm = options?.llm ?? baseLLM;
@@ -3723,7 +4042,7 @@ var TitleExtractor = class extends BaseExtractor {
3723
4042
  const nodesToExtractTitle = [];
3724
4043
  const nodeIndexes = [];
3725
4044
  nodes.forEach((node, idx) => {
3726
- const text = node.getContent(this.metadataMode);
4045
+ const text = node.getContent();
3727
4046
  if (!text || text.trim() === "") {
3728
4047
  results[idx] = { documentTitle: "" };
3729
4048
  } else {
@@ -3810,7 +4129,7 @@ var TitleExtractor = class extends BaseExtractor {
3810
4129
  {
3811
4130
  type: "text",
3812
4131
  text: this.nodeTemplate.format({
3813
- context: node.getContent(MetadataMode.ALL)
4132
+ context: node.getContent()
3814
4133
  })
3815
4134
  }
3816
4135
  ]
@@ -3827,21 +4146,11 @@ var TitleExtractor = class extends BaseExtractor {
3827
4146
  return await Promise.all(titleJobs);
3828
4147
  }
3829
4148
  };
4149
+
4150
+ // src/document/extractors/summary.ts
3830
4151
  var SummaryExtractor = class extends BaseExtractor {
3831
- /**
3832
- * MastraLanguageModel instance.
3833
- * @type {MastraLanguageModel}
3834
- */
3835
4152
  llm;
3836
- /**
3837
- * List of summaries to extract: 'self', 'prev', 'next'
3838
- * @type {string[]}
3839
- */
3840
4153
  summaries;
3841
- /**
3842
- * The prompt template to use for the summary extractor.
3843
- * @type {string}
3844
- */
3845
4154
  promptTemplate;
3846
4155
  selfSummary;
3847
4156
  prevSummary;
@@ -3867,14 +4176,14 @@ var SummaryExtractor = class extends BaseExtractor {
3867
4176
  * @returns {Promise<string>} Summary extracted from the node.
3868
4177
  */
3869
4178
  async generateNodeSummary(node) {
3870
- const text = node.getContent(this.metadataMode);
4179
+ const text = node.getContent();
3871
4180
  if (!text || text.trim() === "") {
3872
4181
  return "";
3873
4182
  }
3874
4183
  if (this.isTextNodeOnly && !(node instanceof TextNode)) {
3875
4184
  return "";
3876
4185
  }
3877
- const context = node.getContent(this.metadataMode);
4186
+ const context = node.getContent();
3878
4187
  const prompt = this.promptTemplate.format({
3879
4188
  context
3880
4189
  });
@@ -3919,28 +4228,12 @@ var SummaryExtractor = class extends BaseExtractor {
3919
4228
  return metadataList;
3920
4229
  }
3921
4230
  };
4231
+
4232
+ // src/document/extractors/questions.ts
3922
4233
  var QuestionsAnsweredExtractor = class extends BaseExtractor {
3923
- /**
3924
- * MastraLanguageModel instance.
3925
- * @type {MastraLanguageModel}
3926
- */
3927
4234
  llm;
3928
- /**
3929
- * Number of questions to generate.
3930
- * @type {number}
3931
- * @default 5
3932
- */
3933
4235
  questions = 5;
3934
- /**
3935
- * The prompt template to use for the question extractor.
3936
- * @type {string}
3937
- */
3938
4236
  promptTemplate;
3939
- /**
3940
- * Wheter to use metadata for embeddings only
3941
- * @type {boolean}
3942
- * @default false
3943
- */
3944
4237
  embeddingOnly = false;
3945
4238
  /**
3946
4239
  * Constructor for the QuestionsAnsweredExtractor class.
@@ -3968,14 +4261,14 @@ var QuestionsAnsweredExtractor = class extends BaseExtractor {
3968
4261
  * @returns {Promise<Array<ExtractQuestion> | Array<{}>>} Questions extracted from the node.
3969
4262
  */
3970
4263
  async extractQuestionsFromNode(node) {
3971
- const text = node.getContent(this.metadataMode);
4264
+ const text = node.getContent();
3972
4265
  if (!text || text.trim() === "") {
3973
4266
  return { questionsThisExcerptCanAnswer: "" };
3974
4267
  }
3975
4268
  if (this.isTextNodeOnly && !(node instanceof TextNode)) {
3976
4269
  return { questionsThisExcerptCanAnswer: "" };
3977
4270
  }
3978
- const contextStr = node.getContent(this.metadataMode);
4271
+ const contextStr = node.getContent();
3979
4272
  const prompt = this.promptTemplate.format({
3980
4273
  context: contextStr,
3981
4274
  numQuestions: this.questions.toString()
@@ -4014,22 +4307,11 @@ var QuestionsAnsweredExtractor = class extends BaseExtractor {
4014
4307
  return results;
4015
4308
  }
4016
4309
  };
4310
+
4311
+ // src/document/extractors/keywords.ts
4017
4312
  var KeywordExtractor = class extends BaseExtractor {
4018
- /**
4019
- * MastraLanguageModel instance.
4020
- * @type {MastraLanguageModel}
4021
- */
4022
4313
  llm;
4023
- /**
4024
- * Number of keywords to extract.
4025
- * @type {number}
4026
- * @default 5
4027
- */
4028
4314
  keywords = 5;
4029
- /**
4030
- * The prompt template to use for the question extractor.
4031
- * @type {string}
4032
- */
4033
4315
  promptTemplate;
4034
4316
  /**
4035
4317
  * Constructor for the KeywordExtractor class.
@@ -4058,7 +4340,7 @@ var KeywordExtractor = class extends BaseExtractor {
4058
4340
  * Adds error handling for malformed/empty LLM output.
4059
4341
  */
4060
4342
  async extractKeywordsFromNodes(node) {
4061
- const text = node.getContent(this.metadataMode);
4343
+ const text = node.getContent();
4062
4344
  if (!text || text.trim() === "") {
4063
4345
  return { excerptKeywords: "" };
4064
4346
  }
@@ -4077,7 +4359,7 @@ var KeywordExtractor = class extends BaseExtractor {
4077
4359
  {
4078
4360
  type: "text",
4079
4361
  text: this.promptTemplate.format({
4080
- context: node.getContent(MetadataMode.ALL),
4362
+ context: node.getContent(),
4081
4363
  maxKeywords: this.keywords.toString()
4082
4364
  })
4083
4365
  }
@@ -4142,6 +4424,8 @@ var Language = /* @__PURE__ */ ((Language2) => {
4142
4424
  Language2["POWERSHELL"] = "powershell";
4143
4425
  return Language2;
4144
4426
  })(Language || {});
4427
+
4428
+ // src/document/transformers/text.ts
4145
4429
  var TextTransformer = class {
4146
4430
  size;
4147
4431
  overlap;
@@ -4720,6 +5004,8 @@ var HTMLSectionTransformer = class {
4720
5004
  return this.createDocuments(texts, metadatas);
4721
5005
  }
4722
5006
  };
5007
+
5008
+ // src/document/transformers/json.ts
4723
5009
  var RecursiveJsonTransformer = class _RecursiveJsonTransformer {
4724
5010
  maxSize;
4725
5011
  minSize;
@@ -5127,6 +5413,8 @@ var LatexTransformer = class extends RecursiveCharacterTransformer {
5127
5413
  super({ separators, isSeparatorRegex: true, options });
5128
5414
  }
5129
5415
  };
5416
+
5417
+ // src/document/transformers/markdown.ts
5130
5418
  var MarkdownTransformer = class extends RecursiveCharacterTransformer {
5131
5419
  constructor(options = {}) {
5132
5420
  const separators = RecursiveCharacterTransformer.getSeparatorsForLanguage("markdown" /* MARKDOWN */);
@@ -5411,21 +5699,19 @@ var MDocument = class _MDocument {
5411
5699
  (doc) => doc?.metadata?.docId ? new Document({
5412
5700
  ...doc,
5413
5701
  relationships: {
5414
- [NodeRelationship.SOURCE]: {
5702
+ ["SOURCE" /* SOURCE */]: {
5415
5703
  nodeId: doc.metadata.docId,
5416
- nodeType: ObjectType.DOCUMENT,
5704
+ nodeType: "DOCUMENT" /* DOCUMENT */,
5417
5705
  metadata: doc.metadata
5418
5706
  }
5419
5707
  }
5420
5708
  }) : doc
5421
5709
  );
5422
5710
  }
5423
- const pipeline = new IngestionPipeline({
5424
- transformations
5425
- });
5426
- const nodes = await pipeline.run({
5427
- documents: this.chunks
5428
- });
5711
+ let nodes = this.chunks;
5712
+ for (const extractor of transformations) {
5713
+ nodes = await extractor.processNodes(nodes);
5714
+ }
5429
5715
  this.chunks = this.chunks.map((doc, i) => {
5430
5716
  return new Document({
5431
5717
  text: doc.text,