@mastra/rag 0.1.19 → 0.1.20-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1,6 +1,6 @@
1
1
  'use strict';
2
2
 
3
- var llamaindex = require('llamaindex');
3
+ var crypto = require('crypto');
4
4
  var zod = require('zod');
5
5
  var nodeHtmlBetterParser = require('node-html-better-parser');
6
6
  var jsTiktoken = require('js-tiktoken');
@@ -14,6 +14,11 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
14
14
  var __getOwnPropNames = Object.getOwnPropertyNames;
15
15
  var __getProtoOf = Object.getPrototypeOf;
16
16
  var __hasOwnProp = Object.prototype.hasOwnProperty;
17
+ var __knownSymbol = (name14, symbol15) => (symbol15 = Symbol[name14]) ? symbol15 : Symbol.for("Symbol." + name14);
18
+ var __typeError = (msg) => {
19
+ throw TypeError(msg);
20
+ };
21
+ var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
17
22
  var __commonJS = (cb, mod) => function __require() {
18
23
  return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
19
24
  };
@@ -33,6 +38,43 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
33
38
  __defProp(target, "default", { value: mod, enumerable: true }) ,
34
39
  mod
35
40
  ));
41
+ var __decoratorStart = (base) => [, , , __create(null)];
42
+ var __decoratorStrings = ["class", "method", "getter", "setter", "accessor", "field", "value", "get", "set"];
43
+ var __expectFn = (fn) => fn !== void 0 && typeof fn !== "function" ? __typeError("Function expected") : fn;
44
+ var __decoratorContext = (kind, name14, done, metadata, fns) => ({ kind: __decoratorStrings[kind], name: name14, metadata, addInitializer: (fn) => done._ ? __typeError("Already initialized") : fns.push(__expectFn(fn || null)) });
45
+ var __decoratorMetadata = (array, target) => __defNormalProp(target, __knownSymbol("metadata"), array[3]);
46
+ var __runInitializers = (array, flags, self, value) => {
47
+ for (var i = 0, fns = array[flags >> 1], n = fns && fns.length; i < n; i++) flags & 1 ? fns[i].call(self) : value = fns[i].call(self, value);
48
+ return value;
49
+ };
50
+ var __decorateElement = (array, flags, name14, decorators, target, extra) => {
51
+ var fn, it, done, ctx, access, k = flags & 7, s = false, p = false;
52
+ var j = array.length + 1 , key = __decoratorStrings[k + 5];
53
+ var initializers = (array[j - 1] = []), extraInitializers = array[j] || (array[j] = []);
54
+ var desc = ((target = target.prototype), __getOwnPropDesc({ get [name14]() {
55
+ return __privateGet(this, extra);
56
+ }, set [name14](x) {
57
+ return __privateSet(this, extra, x);
58
+ } }, name14));
59
+ for (var i = decorators.length - 1; i >= 0; i--) {
60
+ ctx = __decoratorContext(k, name14, done = {}, array[3], extraInitializers);
61
+ {
62
+ ctx.static = s, ctx.private = p, access = ctx.access = { has: (x) => name14 in x };
63
+ access.get = (x) => x[name14];
64
+ access.set = (x, y) => x[name14] = y;
65
+ }
66
+ it = (0, decorators[i])({ get: desc.get, set: desc.set } , ctx), done._ = 1;
67
+ if (it === void 0) __expectFn(it) && (desc[key] = it );
68
+ else if (typeof it !== "object" || it === null) __typeError("Object expected");
69
+ else __expectFn(fn = it.get) && (desc.get = fn), __expectFn(fn = it.set) && (desc.set = fn), __expectFn(fn = it.init) && initializers.unshift(fn);
70
+ }
71
+ return desc && __defProp(target, name14, desc), target;
72
+ };
73
+ var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
74
+ var __accessCheck = (obj, member, msg) => member.has(obj) || __typeError("Cannot " + msg);
75
+ var __privateGet = (obj, member, getter) => (__accessCheck(obj, member, "read from private field"), member.get(obj));
76
+ var __privateAdd = (obj, member, value) => member.has(obj) ? __typeError("Cannot add the same private member more than once") : member instanceof WeakSet ? member.add(obj) : member.set(obj, value);
77
+ var __privateSet = (obj, member, value, setter) => (__accessCheck(obj, member, "write to private field"), member.set(obj, value), value);
36
78
 
37
79
  // ../../node_modules/.pnpm/secure-json-parse@2.7.0/node_modules/secure-json-parse/index.js
38
80
  var require_secure_json_parse = __commonJS({
@@ -137,6 +179,275 @@ var require_secure_json_parse = __commonJS({
137
179
  }
138
180
  });
139
181
 
182
+ // src/document/prompts/format.ts
183
+ function format(str, params) {
184
+ return str.replace(/{(\w+)}/g, (_, k) => params[k] ?? "");
185
+ }
186
+
187
+ // src/document/prompts/base.ts
188
+ var BasePromptTemplate = class {
189
+ templateVars = /* @__PURE__ */ new Set();
190
+ options = {};
191
+ constructor(options) {
192
+ const { templateVars } = options;
193
+ if (templateVars) {
194
+ this.templateVars = new Set(templateVars);
195
+ }
196
+ if (options.options) {
197
+ this.options = options.options;
198
+ }
199
+ }
200
+ };
201
+ var PromptTemplate = class _PromptTemplate extends BasePromptTemplate {
202
+ #template;
203
+ constructor(options) {
204
+ const { template, ...rest } = options;
205
+ super(rest);
206
+ this.#template = template;
207
+ }
208
+ partialFormat(options) {
209
+ const prompt = new _PromptTemplate({
210
+ template: this.template,
211
+ templateVars: [...this.templateVars],
212
+ options: this.options
213
+ });
214
+ prompt.options = {
215
+ ...prompt.options,
216
+ ...options
217
+ };
218
+ return prompt;
219
+ }
220
+ format(options) {
221
+ const allOptions = {
222
+ ...this.options,
223
+ ...options
224
+ };
225
+ return format(this.template, allOptions);
226
+ }
227
+ formatMessages(options) {
228
+ const prompt = this.format(options);
229
+ return [
230
+ {
231
+ role: "user",
232
+ content: prompt
233
+ }
234
+ ];
235
+ }
236
+ get template() {
237
+ return this.#template;
238
+ }
239
+ };
240
+
241
+ // src/document/prompts/prompt.ts
242
+ var defaultSummaryPrompt = new PromptTemplate({
243
+ templateVars: ["context"],
244
+ template: `Write a summary of the following. Try to use only the information provided. Try to include as many key details as possible.
245
+
246
+
247
+ {context}
248
+
249
+
250
+ SUMMARY:"""
251
+ `
252
+ });
253
+ var defaultKeywordExtractPrompt = new PromptTemplate({
254
+ templateVars: ["maxKeywords", "context"],
255
+ template: `
256
+ Some text is provided below. Given the text, extract up to {maxKeywords} keywords from the text. Avoid stopwords.
257
+ ---------------------
258
+ {context}
259
+ ---------------------
260
+ Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'
261
+ `
262
+ }).partialFormat({
263
+ maxKeywords: "10"
264
+ });
265
+ var defaultQuestionExtractPrompt = new PromptTemplate({
266
+ templateVars: ["numQuestions", "context"],
267
+ template: `(
268
+ "Given the contextual informations below, generate {numQuestions} questions this context can provides specific answers to which are unlikely to be found else where. Higher-level summaries of surrounding context may be provided as well. "
269
+ "Try using these summaries to generate better questions that this context can answer."
270
+ "---------------------"
271
+ "{context}"
272
+ "---------------------"
273
+ "Provide questions in the following format: 'QUESTIONS: <questions>'"
274
+ )`
275
+ }).partialFormat({
276
+ numQuestions: "5"
277
+ });
278
+ var defaultTitleExtractorPromptTemplate = new PromptTemplate({
279
+ templateVars: ["context"],
280
+ template: `{context}
281
+ Give a title that summarizes all of the unique entities, titles or themes found in the context.
282
+ Title: `
283
+ });
284
+ var defaultTitleCombinePromptTemplate = new PromptTemplate({
285
+ templateVars: ["context"],
286
+ template: `{context}
287
+ Based on the above candidate titles and contents, what is the comprehensive title for this document?
288
+ Title: `
289
+ });
290
+ var _hash_dec, _init, _hash;
291
+ _hash_dec = [lazyInitHash];
292
+ var BaseNode = class {
293
+ constructor(init) {
294
+ __publicField(this, "id_");
295
+ __publicField(this, "metadata");
296
+ __publicField(this, "relationships");
297
+ __privateAdd(this, _hash, __runInitializers(_init, 8, this, "")), __runInitializers(_init, 11, this);
298
+ const { id_, metadata, relationships } = init || {};
299
+ this.id_ = id_ ?? crypto.randomUUID();
300
+ this.metadata = metadata ?? {};
301
+ this.relationships = relationships ?? {};
302
+ }
303
+ get sourceNode() {
304
+ const relationship = this.relationships["SOURCE" /* SOURCE */];
305
+ if (Array.isArray(relationship)) {
306
+ throw new Error("Source object must be a single RelatedNodeInfo object");
307
+ }
308
+ return relationship;
309
+ }
310
+ get prevNode() {
311
+ const relationship = this.relationships["PREVIOUS" /* PREVIOUS */];
312
+ if (Array.isArray(relationship)) {
313
+ throw new Error("Previous object must be a single RelatedNodeInfo object");
314
+ }
315
+ return relationship;
316
+ }
317
+ get nextNode() {
318
+ const relationship = this.relationships["NEXT" /* NEXT */];
319
+ if (Array.isArray(relationship)) {
320
+ throw new Error("Next object must be a single RelatedNodeInfo object");
321
+ }
322
+ return relationship;
323
+ }
324
+ get parentNode() {
325
+ const relationship = this.relationships["PARENT" /* PARENT */];
326
+ if (Array.isArray(relationship)) {
327
+ throw new Error("Parent object must be a single RelatedNodeInfo object");
328
+ }
329
+ return relationship;
330
+ }
331
+ get childNodes() {
332
+ const relationship = this.relationships["CHILD" /* CHILD */];
333
+ if (!Array.isArray(relationship)) {
334
+ throw new Error("Child object must be a an array of RelatedNodeInfo objects");
335
+ }
336
+ return relationship;
337
+ }
338
+ };
339
+ _init = __decoratorStart();
340
+ _hash = new WeakMap();
341
+ __decorateElement(_init, 4, "hash", _hash_dec, BaseNode, _hash);
342
+ __decoratorMetadata(_init, BaseNode);
343
+ var TextNode = class extends BaseNode {
344
+ text;
345
+ startCharIdx;
346
+ endCharIdx;
347
+ metadataSeparator;
348
+ constructor(init = {}) {
349
+ super(init);
350
+ const { text, startCharIdx, endCharIdx, metadataSeparator } = init;
351
+ this.text = text ?? "";
352
+ if (startCharIdx) {
353
+ this.startCharIdx = startCharIdx;
354
+ }
355
+ if (endCharIdx) {
356
+ this.endCharIdx = endCharIdx;
357
+ }
358
+ this.metadataSeparator = metadataSeparator ?? "\n";
359
+ }
360
+ /**
361
+ * Generate a hash of the text node.
362
+ * The ID is not part of the hash as it can change independent of content.
363
+ * @returns
364
+ */
365
+ generateHash() {
366
+ const hashFunction = createSHA256();
367
+ hashFunction.update(`type=${this.type}`);
368
+ hashFunction.update(`startCharIdx=${this.startCharIdx} endCharIdx=${this.endCharIdx}`);
369
+ hashFunction.update(this.getContent());
370
+ return hashFunction.digest();
371
+ }
372
+ get type() {
373
+ return "TEXT" /* TEXT */;
374
+ }
375
+ getContent() {
376
+ const metadataStr = this.getMetadataStr().trim();
377
+ return `${metadataStr}
378
+
379
+ ${this.text}`.trim();
380
+ }
381
+ getMetadataStr() {
382
+ const usableMetadataKeys = new Set(Object.keys(this.metadata).sort());
383
+ return [...usableMetadataKeys].map((key) => `${key}: ${this.metadata[key]}`).join(this.metadataSeparator);
384
+ }
385
+ getNodeInfo() {
386
+ return { start: this.startCharIdx, end: this.endCharIdx };
387
+ }
388
+ getText() {
389
+ return this.text;
390
+ }
391
+ };
392
+ var Document = class extends TextNode {
393
+ constructor(init) {
394
+ super(init);
395
+ }
396
+ get type() {
397
+ return "DOCUMENT" /* DOCUMENT */;
398
+ }
399
+ };
400
+ function lazyInitHash(value, _context) {
401
+ return {
402
+ get() {
403
+ const oldValue = value.get.call(this);
404
+ if (oldValue === "") {
405
+ const hash = this.generateHash();
406
+ value.set.call(this, hash);
407
+ }
408
+ return value.get.call(this);
409
+ },
410
+ set(newValue) {
411
+ value.set.call(this, newValue);
412
+ },
413
+ init(value2) {
414
+ return value2;
415
+ }
416
+ };
417
+ }
418
+ function createSHA256() {
419
+ const hash = crypto.createHash("sha256");
420
+ return {
421
+ update(data) {
422
+ hash.update(data);
423
+ },
424
+ digest() {
425
+ return hash.digest("base64");
426
+ }
427
+ };
428
+ }
429
+
430
+ // src/document/extractors/base.ts
431
+ var BaseExtractor = class {
432
+ isTextNodeOnly = true;
433
+ /**
434
+ *
435
+ * @param nodes Nodes to extract metadata from.
436
+ * @returns Metadata extracted from the nodes.
437
+ */
438
+ async processNodes(nodes) {
439
+ let newNodes = nodes;
440
+ const curMetadataList = await this.extract(newNodes);
441
+ for (const idx in newNodes) {
442
+ newNodes[idx].metadata = {
443
+ ...newNodes[idx].metadata,
444
+ ...curMetadataList[idx]
445
+ };
446
+ }
447
+ return newNodes;
448
+ }
449
+ };
450
+
140
451
  // ../../node_modules/.pnpm/@ai-sdk+provider@1.1.3/node_modules/@ai-sdk/provider/dist/index.mjs
141
452
  var marker = "vercel.ai.error";
142
453
  var symbol = Symbol.for(marker);
@@ -2430,8 +2741,10 @@ var openaiTextEmbeddingResponseSchema = zod.z.object({
2430
2741
  });
2431
2742
  var modelMaxImagesPerCall = {
2432
2743
  "dall-e-3": 1,
2433
- "dall-e-2": 10
2744
+ "dall-e-2": 10,
2745
+ "gpt-image-1": 10
2434
2746
  };
2747
+ var hasDefaultResponseFormat = /* @__PURE__ */ new Set(["gpt-image-1"]);
2435
2748
  var OpenAIImageModel = class {
2436
2749
  constructor(modelId, settings, config) {
2437
2750
  this.modelId = modelId;
@@ -2481,7 +2794,7 @@ var OpenAIImageModel = class {
2481
2794
  n,
2482
2795
  size,
2483
2796
  ...(_d = providerOptions.openai) != null ? _d : {},
2484
- response_format: "b64_json"
2797
+ ...!hasDefaultResponseFormat.has(this.modelId) ? { response_format: "b64_json" } : {}
2485
2798
  },
2486
2799
  failedResponseHandler: openaiFailedResponseHandler,
2487
2800
  successfulResponseHandler: createJsonResponseHandler(
@@ -2976,8 +3289,15 @@ var OpenAIResponsesLanguageModel = class {
2976
3289
  user: openaiOptions == null ? void 0 : openaiOptions.user,
2977
3290
  instructions: openaiOptions == null ? void 0 : openaiOptions.instructions,
2978
3291
  // model-specific settings:
2979
- ...modelConfig.isReasoningModel && (openaiOptions == null ? void 0 : openaiOptions.reasoningEffort) != null && {
2980
- reasoning: { effort: openaiOptions == null ? void 0 : openaiOptions.reasoningEffort }
3292
+ ...modelConfig.isReasoningModel && ((openaiOptions == null ? void 0 : openaiOptions.reasoningEffort) != null || (openaiOptions == null ? void 0 : openaiOptions.reasoningSummary) != null) && {
3293
+ reasoning: {
3294
+ ...(openaiOptions == null ? void 0 : openaiOptions.reasoningEffort) != null && {
3295
+ effort: openaiOptions.reasoningEffort
3296
+ },
3297
+ ...(openaiOptions == null ? void 0 : openaiOptions.reasoningSummary) != null && {
3298
+ summary: openaiOptions.reasoningSummary
3299
+ }
3300
+ }
2981
3301
  },
2982
3302
  ...modelConfig.requiredAutoTruncation && {
2983
3303
  truncation: "auto"
@@ -3059,7 +3379,7 @@ var OpenAIResponsesLanguageModel = class {
3059
3379
  }
3060
3380
  }
3061
3381
  async doGenerate(options) {
3062
- var _a15, _b, _c, _d, _e;
3382
+ var _a15, _b, _c, _d, _e, _f, _g;
3063
3383
  const { args: body, warnings } = this.getArgs(options);
3064
3384
  const {
3065
3385
  responseHeaders,
@@ -3112,7 +3432,13 @@ var OpenAIResponsesLanguageModel = class {
3112
3432
  type: zod.z.literal("computer_call")
3113
3433
  }),
3114
3434
  zod.z.object({
3115
- type: zod.z.literal("reasoning")
3435
+ type: zod.z.literal("reasoning"),
3436
+ summary: zod.z.array(
3437
+ zod.z.object({
3438
+ type: zod.z.literal("summary_text"),
3439
+ text: zod.z.string()
3440
+ })
3441
+ )
3116
3442
  })
3117
3443
  ])
3118
3444
  ),
@@ -3130,6 +3456,7 @@ var OpenAIResponsesLanguageModel = class {
3130
3456
  toolName: output.name,
3131
3457
  args: output.arguments
3132
3458
  }));
3459
+ const reasoningSummary = (_b = (_a15 = response.output.find((item) => item.type === "reasoning")) == null ? void 0 : _a15.summary) != null ? _b : null;
3133
3460
  return {
3134
3461
  text: outputTextElements.map((content) => content.text).join("\n"),
3135
3462
  sources: outputTextElements.flatMap(
@@ -3144,10 +3471,14 @@ var OpenAIResponsesLanguageModel = class {
3144
3471
  })
3145
3472
  ),
3146
3473
  finishReason: mapOpenAIResponseFinishReason({
3147
- finishReason: (_a15 = response.incomplete_details) == null ? void 0 : _a15.reason,
3474
+ finishReason: (_c = response.incomplete_details) == null ? void 0 : _c.reason,
3148
3475
  hasToolCalls: toolCalls.length > 0
3149
3476
  }),
3150
3477
  toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
3478
+ reasoning: reasoningSummary ? reasoningSummary.map((summary) => ({
3479
+ type: "text",
3480
+ text: summary.text
3481
+ })) : void 0,
3151
3482
  usage: {
3152
3483
  promptTokens: response.usage.input_tokens,
3153
3484
  completionTokens: response.usage.output_tokens
@@ -3171,8 +3502,8 @@ var OpenAIResponsesLanguageModel = class {
3171
3502
  providerMetadata: {
3172
3503
  openai: {
3173
3504
  responseId: response.id,
3174
- cachedPromptTokens: (_c = (_b = response.usage.input_tokens_details) == null ? void 0 : _b.cached_tokens) != null ? _c : null,
3175
- reasoningTokens: (_e = (_d = response.usage.output_tokens_details) == null ? void 0 : _d.reasoning_tokens) != null ? _e : null
3505
+ cachedPromptTokens: (_e = (_d = response.usage.input_tokens_details) == null ? void 0 : _d.cached_tokens) != null ? _e : null,
3506
+ reasoningTokens: (_g = (_f = response.usage.output_tokens_details) == null ? void 0 : _f.reasoning_tokens) != null ? _g : null
3176
3507
  }
3177
3508
  },
3178
3509
  warnings
@@ -3255,6 +3586,11 @@ var OpenAIResponsesLanguageModel = class {
3255
3586
  type: "text-delta",
3256
3587
  textDelta: value.delta
3257
3588
  });
3589
+ } else if (isResponseReasoningSummaryTextDeltaChunk(value)) {
3590
+ controller.enqueue({
3591
+ type: "reasoning",
3592
+ textDelta: value.delta
3593
+ });
3258
3594
  } else if (isResponseOutputItemDoneChunk(value) && value.item.type === "function_call") {
3259
3595
  ongoingToolCalls[value.output_index] = void 0;
3260
3596
  hasToolCalls = true;
@@ -3386,6 +3722,13 @@ var responseAnnotationAddedSchema = zod.z.object({
3386
3722
  title: zod.z.string()
3387
3723
  })
3388
3724
  });
3725
+ var responseReasoningSummaryTextDeltaSchema = zod.z.object({
3726
+ type: zod.z.literal("response.reasoning_summary_text.delta"),
3727
+ item_id: zod.z.string(),
3728
+ output_index: zod.z.number(),
3729
+ summary_index: zod.z.number(),
3730
+ delta: zod.z.string()
3731
+ });
3389
3732
  var openaiResponsesChunkSchema = zod.z.union([
3390
3733
  textDeltaChunkSchema,
3391
3734
  responseFinishedChunkSchema,
@@ -3394,6 +3737,7 @@ var openaiResponsesChunkSchema = zod.z.union([
3394
3737
  responseFunctionCallArgumentsDeltaSchema,
3395
3738
  responseOutputItemAddedSchema,
3396
3739
  responseAnnotationAddedSchema,
3740
+ responseReasoningSummaryTextDeltaSchema,
3397
3741
  zod.z.object({ type: zod.z.string() }).passthrough()
3398
3742
  // fallback for unknown chunks
3399
3743
  ]);
@@ -3418,6 +3762,9 @@ function isResponseOutputItemAddedChunk(chunk) {
3418
3762
  function isResponseAnnotationAddedChunk(chunk) {
3419
3763
  return chunk.type === "response.output_text.annotation.added";
3420
3764
  }
3765
+ function isResponseReasoningSummaryTextDeltaChunk(chunk) {
3766
+ return chunk.type === "response.reasoning_summary_text.delta";
3767
+ }
3421
3768
  function getResponsesModelConfig(modelId) {
3422
3769
  if (modelId.startsWith("o")) {
3423
3770
  if (modelId.startsWith("o1-mini") || modelId.startsWith("o1-preview")) {
@@ -3447,7 +3794,8 @@ var openaiResponsesProviderOptionsSchema = zod.z.object({
3447
3794
  user: zod.z.string().nullish(),
3448
3795
  reasoningEffort: zod.z.string().nullish(),
3449
3796
  strictSchemas: zod.z.boolean().nullish(),
3450
- instructions: zod.z.string().nullish()
3797
+ instructions: zod.z.string().nullish(),
3798
+ reasoningSummary: zod.z.string().nullish()
3451
3799
  });
3452
3800
  var WebSearchPreviewParameters = zod.z.object({});
3453
3801
  function webSearchPreviewTool({
@@ -3667,53 +4015,24 @@ var openai2 = createOpenAI({ apiKey: process.env.OPENAI_API_KEY });
3667
4015
  var baseLLM = openai2("gpt-4o");
3668
4016
 
3669
4017
  // src/document/extractors/title.ts
3670
- var TitleExtractor = class extends llamaindex.BaseExtractor {
3671
- /**
3672
- * MastraLanguageModel instance.
3673
- * @type {MastraLanguageModel}
3674
- */
4018
+ var TitleExtractor = class extends BaseExtractor {
3675
4019
  llm;
3676
- /**
3677
- * Can work for mixture of text and non-text nodes
3678
- * @type {boolean}
3679
- * @default false
3680
- */
3681
4020
  isTextNodeOnly = false;
3682
- /**
3683
- * Number of nodes to extrct titles from.
3684
- * @type {number}
3685
- * @default 5
3686
- */
3687
4021
  nodes = 5;
3688
- /**
3689
- * The prompt template to use for the title extractor.
3690
- * @type {string}
3691
- */
3692
4022
  nodeTemplate;
3693
- /**
3694
- * The prompt template to merge title with..
3695
- * @type {string}
3696
- */
3697
4023
  combineTemplate;
3698
- /**
3699
- * Constructor for the TitleExtractor class.
3700
- * @param {MastraLanguageModel} llm MastraLanguageModel instance.
3701
- * @param {number} nodes Number of nodes to extract titles from.
3702
- * @param {TitleExtractorPrompt} nodeTemplate The prompt template to use for the title extractor.
3703
- * @param {string} combineTemplate The prompt template to merge title with..
3704
- */
3705
4024
  constructor(options) {
3706
4025
  super();
3707
4026
  this.llm = options?.llm ?? baseLLM;
3708
4027
  this.nodes = options?.nodes ?? 5;
3709
- this.nodeTemplate = options?.nodeTemplate ? new llamaindex.PromptTemplate({
4028
+ this.nodeTemplate = options?.nodeTemplate ? new PromptTemplate({
3710
4029
  templateVars: ["context"],
3711
4030
  template: options.nodeTemplate
3712
- }) : llamaindex.defaultTitleExtractorPromptTemplate;
3713
- this.combineTemplate = options?.combineTemplate ? new llamaindex.PromptTemplate({
4031
+ }) : defaultTitleExtractorPromptTemplate;
4032
+ this.combineTemplate = options?.combineTemplate ? new PromptTemplate({
3714
4033
  templateVars: ["context"],
3715
4034
  template: options.combineTemplate
3716
- }) : llamaindex.defaultTitleCombinePromptTemplate;
4035
+ }) : defaultTitleCombinePromptTemplate;
3717
4036
  }
3718
4037
  /**
3719
4038
  * Extract titles from a list of nodes.
@@ -3725,7 +4044,7 @@ var TitleExtractor = class extends llamaindex.BaseExtractor {
3725
4044
  const nodesToExtractTitle = [];
3726
4045
  const nodeIndexes = [];
3727
4046
  nodes.forEach((node, idx) => {
3728
- const text = node.getContent(this.metadataMode);
4047
+ const text = node.getContent();
3729
4048
  if (!text || text.trim() === "") {
3730
4049
  results[idx] = { documentTitle: "" };
3731
4050
  } else {
@@ -3753,7 +4072,7 @@ var TitleExtractor = class extends llamaindex.BaseExtractor {
3753
4072
  }
3754
4073
  filterNodes(nodes) {
3755
4074
  return nodes.filter((node) => {
3756
- if (this.isTextNodeOnly && !(node instanceof llamaindex.TextNode)) {
4075
+ if (this.isTextNodeOnly && !(node instanceof TextNode)) {
3757
4076
  return false;
3758
4077
  }
3759
4078
  return true;
@@ -3812,7 +4131,7 @@ var TitleExtractor = class extends llamaindex.BaseExtractor {
3812
4131
  {
3813
4132
  type: "text",
3814
4133
  text: this.nodeTemplate.format({
3815
- context: node.getContent(llamaindex.MetadataMode.ALL)
4134
+ context: node.getContent()
3816
4135
  })
3817
4136
  }
3818
4137
  ]
@@ -3829,21 +4148,11 @@ var TitleExtractor = class extends llamaindex.BaseExtractor {
3829
4148
  return await Promise.all(titleJobs);
3830
4149
  }
3831
4150
  };
3832
- var SummaryExtractor = class extends llamaindex.BaseExtractor {
3833
- /**
3834
- * MastraLanguageModel instance.
3835
- * @type {MastraLanguageModel}
3836
- */
4151
+
4152
+ // src/document/extractors/summary.ts
4153
+ var SummaryExtractor = class extends BaseExtractor {
3837
4154
  llm;
3838
- /**
3839
- * List of summaries to extract: 'self', 'prev', 'next'
3840
- * @type {string[]}
3841
- */
3842
4155
  summaries;
3843
- /**
3844
- * The prompt template to use for the summary extractor.
3845
- * @type {string}
3846
- */
3847
4156
  promptTemplate;
3848
4157
  selfSummary;
3849
4158
  prevSummary;
@@ -3855,10 +4164,10 @@ var SummaryExtractor = class extends llamaindex.BaseExtractor {
3855
4164
  super();
3856
4165
  this.llm = options?.llm ?? baseLLM;
3857
4166
  this.summaries = summaries;
3858
- this.promptTemplate = options?.promptTemplate ? new llamaindex.PromptTemplate({
4167
+ this.promptTemplate = options?.promptTemplate ? new PromptTemplate({
3859
4168
  templateVars: ["context"],
3860
4169
  template: options.promptTemplate
3861
- }) : llamaindex.defaultSummaryPrompt;
4170
+ }) : defaultSummaryPrompt;
3862
4171
  this.selfSummary = summaries?.includes("self") ?? false;
3863
4172
  this.prevSummary = summaries?.includes("prev") ?? false;
3864
4173
  this.nextSummary = summaries?.includes("next") ?? false;
@@ -3869,14 +4178,14 @@ var SummaryExtractor = class extends llamaindex.BaseExtractor {
3869
4178
  * @returns {Promise<string>} Summary extracted from the node.
3870
4179
  */
3871
4180
  async generateNodeSummary(node) {
3872
- const text = node.getContent(this.metadataMode);
4181
+ const text = node.getContent();
3873
4182
  if (!text || text.trim() === "") {
3874
4183
  return "";
3875
4184
  }
3876
- if (this.isTextNodeOnly && !(node instanceof llamaindex.TextNode)) {
4185
+ if (this.isTextNodeOnly && !(node instanceof TextNode)) {
3877
4186
  return "";
3878
4187
  }
3879
- const context = node.getContent(this.metadataMode);
4188
+ const context = node.getContent();
3880
4189
  const prompt = this.promptTemplate.format({
3881
4190
  context
3882
4191
  });
@@ -3904,7 +4213,7 @@ var SummaryExtractor = class extends llamaindex.BaseExtractor {
3904
4213
  * @returns {Promise<ExtractSummary[]>} Summaries extracted from the nodes.
3905
4214
  */
3906
4215
  async extract(nodes) {
3907
- if (!nodes.every((n) => n instanceof llamaindex.TextNode)) throw new Error("Only `TextNode` is allowed for `Summary` extractor");
4216
+ if (!nodes.every((n) => n instanceof TextNode)) throw new Error("Only `TextNode` is allowed for `Summary` extractor");
3908
4217
  const nodeSummaries = await Promise.all(nodes.map((node) => this.generateNodeSummary(node)));
3909
4218
  const metadataList = nodes.map(() => ({}));
3910
4219
  for (let i = 0; i < nodes.length; i++) {
@@ -3921,28 +4230,12 @@ var SummaryExtractor = class extends llamaindex.BaseExtractor {
3921
4230
  return metadataList;
3922
4231
  }
3923
4232
  };
3924
- var QuestionsAnsweredExtractor = class extends llamaindex.BaseExtractor {
3925
- /**
3926
- * MastraLanguageModel instance.
3927
- * @type {MastraLanguageModel}
3928
- */
4233
+
4234
+ // src/document/extractors/questions.ts
4235
+ var QuestionsAnsweredExtractor = class extends BaseExtractor {
3929
4236
  llm;
3930
- /**
3931
- * Number of questions to generate.
3932
- * @type {number}
3933
- * @default 5
3934
- */
3935
4237
  questions = 5;
3936
- /**
3937
- * The prompt template to use for the question extractor.
3938
- * @type {string}
3939
- */
3940
4238
  promptTemplate;
3941
- /**
3942
- * Wheter to use metadata for embeddings only
3943
- * @type {boolean}
3944
- * @default false
3945
- */
3946
4239
  embeddingOnly = false;
3947
4240
  /**
3948
4241
  * Constructor for the QuestionsAnsweredExtractor class.
@@ -3956,12 +4249,12 @@ var QuestionsAnsweredExtractor = class extends llamaindex.BaseExtractor {
3956
4249
  super();
3957
4250
  this.llm = options?.llm ?? baseLLM;
3958
4251
  this.questions = options?.questions ?? 5;
3959
- this.promptTemplate = options?.promptTemplate ? new llamaindex.PromptTemplate({
4252
+ this.promptTemplate = options?.promptTemplate ? new PromptTemplate({
3960
4253
  templateVars: ["numQuestions", "context"],
3961
4254
  template: options.promptTemplate
3962
4255
  }).partialFormat({
3963
4256
  numQuestions: "5"
3964
- }) : llamaindex.defaultQuestionExtractPrompt;
4257
+ }) : defaultQuestionExtractPrompt;
3965
4258
  this.embeddingOnly = options?.embeddingOnly ?? false;
3966
4259
  }
3967
4260
  /**
@@ -3970,14 +4263,14 @@ var QuestionsAnsweredExtractor = class extends llamaindex.BaseExtractor {
3970
4263
  * @returns {Promise<Array<ExtractQuestion> | Array<{}>>} Questions extracted from the node.
3971
4264
  */
3972
4265
  async extractQuestionsFromNode(node) {
3973
- const text = node.getContent(this.metadataMode);
4266
+ const text = node.getContent();
3974
4267
  if (!text || text.trim() === "") {
3975
4268
  return { questionsThisExcerptCanAnswer: "" };
3976
4269
  }
3977
- if (this.isTextNodeOnly && !(node instanceof llamaindex.TextNode)) {
4270
+ if (this.isTextNodeOnly && !(node instanceof TextNode)) {
3978
4271
  return { questionsThisExcerptCanAnswer: "" };
3979
4272
  }
3980
- const contextStr = node.getContent(this.metadataMode);
4273
+ const contextStr = node.getContent();
3981
4274
  const prompt = this.promptTemplate.format({
3982
4275
  context: contextStr,
3983
4276
  numQuestions: this.questions.toString()
@@ -4016,22 +4309,11 @@ var QuestionsAnsweredExtractor = class extends llamaindex.BaseExtractor {
4016
4309
  return results;
4017
4310
  }
4018
4311
  };
4019
- var KeywordExtractor = class extends llamaindex.BaseExtractor {
4020
- /**
4021
- * MastraLanguageModel instance.
4022
- * @type {MastraLanguageModel}
4023
- */
4312
+
4313
+ // src/document/extractors/keywords.ts
4314
+ var KeywordExtractor = class extends BaseExtractor {
4024
4315
  llm;
4025
- /**
4026
- * Number of keywords to extract.
4027
- * @type {number}
4028
- * @default 5
4029
- */
4030
4316
  keywords = 5;
4031
- /**
4032
- * The prompt template to use for the question extractor.
4033
- * @type {string}
4034
- */
4035
4317
  promptTemplate;
4036
4318
  /**
4037
4319
  * Constructor for the KeywordExtractor class.
@@ -4045,10 +4327,10 @@ var KeywordExtractor = class extends llamaindex.BaseExtractor {
4045
4327
  super();
4046
4328
  this.llm = options?.llm ?? baseLLM;
4047
4329
  this.keywords = options?.keywords ?? 5;
4048
- this.promptTemplate = options?.promptTemplate ? new llamaindex.PromptTemplate({
4330
+ this.promptTemplate = options?.promptTemplate ? new PromptTemplate({
4049
4331
  templateVars: ["context", "maxKeywords"],
4050
4332
  template: options.promptTemplate
4051
- }) : llamaindex.defaultKeywordExtractPrompt;
4333
+ }) : defaultKeywordExtractPrompt;
4052
4334
  }
4053
4335
  /**
4054
4336
  *
@@ -4060,11 +4342,11 @@ var KeywordExtractor = class extends llamaindex.BaseExtractor {
4060
4342
  * Adds error handling for malformed/empty LLM output.
4061
4343
  */
4062
4344
  async extractKeywordsFromNodes(node) {
4063
- const text = node.getContent(this.metadataMode);
4345
+ const text = node.getContent();
4064
4346
  if (!text || text.trim() === "") {
4065
4347
  return { excerptKeywords: "" };
4066
4348
  }
4067
- if (this.isTextNodeOnly && !(node instanceof llamaindex.TextNode)) {
4349
+ if (this.isTextNodeOnly && !(node instanceof TextNode)) {
4068
4350
  return { excerptKeywords: "" };
4069
4351
  }
4070
4352
  let keywords = "";
@@ -4079,7 +4361,7 @@ var KeywordExtractor = class extends llamaindex.BaseExtractor {
4079
4361
  {
4080
4362
  type: "text",
4081
4363
  text: this.promptTemplate.format({
4082
- context: node.getContent(llamaindex.MetadataMode.ALL),
4364
+ context: node.getContent(),
4083
4365
  maxKeywords: this.keywords.toString()
4084
4366
  })
4085
4367
  }
@@ -4144,6 +4426,8 @@ var Language = /* @__PURE__ */ ((Language2) => {
4144
4426
  Language2["POWERSHELL"] = "powershell";
4145
4427
  return Language2;
4146
4428
  })(Language || {});
4429
+
4430
+ // src/document/transformers/text.ts
4147
4431
  var TextTransformer = class {
4148
4432
  size;
4149
4433
  overlap;
@@ -4187,7 +4471,7 @@ var TextTransformer = class {
4187
4471
  previousChunkLen = chunk.length;
4188
4472
  }
4189
4473
  documents.push(
4190
- new llamaindex.Document({
4474
+ new Document({
4191
4475
  text: chunk,
4192
4476
  metadata
4193
4477
  })
@@ -4517,7 +4801,7 @@ var HTMLHeaderTransformer = class {
4517
4801
  });
4518
4802
  });
4519
4803
  return this.returnEachElement ? elements.map(
4520
- (el) => new llamaindex.Document({
4804
+ (el) => new Document({
4521
4805
  text: el.content,
4522
4806
  metadata: { ...el.metadata, xpath: el.xpath }
4523
4807
  })
@@ -4569,7 +4853,7 @@ var HTMLHeaderTransformer = class {
4569
4853
  }
4570
4854
  }
4571
4855
  return aggregatedChunks.map(
4572
- (chunk) => new llamaindex.Document({
4856
+ (chunk) => new Document({
4573
4857
  text: chunk.content,
4574
4858
  metadata: { ...chunk.metadata, xpath: chunk.xpath }
4575
4859
  })
@@ -4591,7 +4875,7 @@ var HTMLHeaderTransformer = class {
4591
4875
  }
4592
4876
  }
4593
4877
  documents.push(
4594
- new llamaindex.Document({
4878
+ new Document({
4595
4879
  text: chunk.text,
4596
4880
  metadata: { ...metadata, ...chunkMetadata }
4597
4881
  })
@@ -4620,7 +4904,7 @@ var HTMLSectionTransformer = class {
4620
4904
  splitText(text) {
4621
4905
  const sections = this.splitHtmlByHeaders(text);
4622
4906
  return sections.map(
4623
- (section) => new llamaindex.Document({
4907
+ (section) => new Document({
4624
4908
  text: section.content,
4625
4909
  metadata: {
4626
4910
  [this.headersToSplitOn[section.tagName.toLowerCase()]]: section.header,
@@ -4703,7 +4987,7 @@ var HTMLSectionTransformer = class {
4703
4987
  }
4704
4988
  }
4705
4989
  documents.push(
4706
- new llamaindex.Document({
4990
+ new Document({
4707
4991
  text: chunk.text,
4708
4992
  metadata: { ...metadata, ...chunkMetadata }
4709
4993
  })
@@ -4722,6 +5006,8 @@ var HTMLSectionTransformer = class {
4722
5006
  return this.createDocuments(texts, metadatas);
4723
5007
  }
4724
5008
  };
5009
+
5010
+ // src/document/transformers/json.ts
4725
5011
  var RecursiveJsonTransformer = class _RecursiveJsonTransformer {
4726
5012
  maxSize;
4727
5013
  minSize;
@@ -5093,7 +5379,7 @@ var RecursiveJsonTransformer = class _RecursiveJsonTransformer {
5093
5379
  chunks.forEach((chunk) => {
5094
5380
  const metadata = { ..._metadatas[i] || {} };
5095
5381
  documents.push(
5096
- new llamaindex.Document({
5382
+ new Document({
5097
5383
  text: chunk,
5098
5384
  metadata
5099
5385
  })
@@ -5129,6 +5415,8 @@ var LatexTransformer = class extends RecursiveCharacterTransformer {
5129
5415
  super({ separators, isSeparatorRegex: true, options });
5130
5416
  }
5131
5417
  };
5418
+
5419
+ // src/document/transformers/markdown.ts
5132
5420
  var MarkdownTransformer = class extends RecursiveCharacterTransformer {
5133
5421
  constructor(options = {}) {
5134
5422
  const separators = RecursiveCharacterTransformer.getSeparatorsForLanguage("markdown" /* MARKDOWN */);
@@ -5149,7 +5437,7 @@ var MarkdownHeaderTransformer = class {
5149
5437
  return lines.flatMap((line) => {
5150
5438
  const contentLines = line.content.split("\n");
5151
5439
  return contentLines.filter((l) => l.trim() !== "" || this.headersToSplitOn.some(([sep]) => l.trim().startsWith(sep))).map(
5152
- (l) => new llamaindex.Document({
5440
+ (l) => new Document({
5153
5441
  text: l.trim(),
5154
5442
  metadata: line.metadata
5155
5443
  })
@@ -5174,7 +5462,7 @@ var MarkdownHeaderTransformer = class {
5174
5462
  }
5175
5463
  }
5176
5464
  return aggregatedChunks.map(
5177
- (chunk) => new llamaindex.Document({
5465
+ (chunk) => new Document({
5178
5466
  text: chunk.content,
5179
5467
  metadata: chunk.metadata
5180
5468
  })
@@ -5276,7 +5564,7 @@ var MarkdownHeaderTransformer = class {
5276
5564
  this.splitText({ text }).forEach((chunk) => {
5277
5565
  const metadata = { ..._metadatas[i], ...chunk.metadata };
5278
5566
  documents.push(
5279
- new llamaindex.Document({
5567
+ new Document({
5280
5568
  text: chunk.text,
5281
5569
  metadata
5282
5570
  })
@@ -5392,7 +5680,7 @@ var MDocument = class _MDocument {
5392
5680
  // e.g., 'text', 'html', 'markdown', 'json'
5393
5681
  constructor({ docs, type }) {
5394
5682
  this.chunks = docs.map((d) => {
5395
- return new llamaindex.Document({ text: d.text, metadata: d.metadata });
5683
+ return new Document({ text: d.text, metadata: d.metadata });
5396
5684
  });
5397
5685
  this.type = type;
5398
5686
  }
@@ -5410,26 +5698,24 @@ var MDocument = class _MDocument {
5410
5698
  if (typeof title !== "undefined") {
5411
5699
  transformations.push(new TitleExtractor(typeof title === "boolean" ? {} : title));
5412
5700
  this.chunks = this.chunks.map(
5413
- (doc) => doc?.metadata?.docId ? new llamaindex.Document({
5701
+ (doc) => doc?.metadata?.docId ? new Document({
5414
5702
  ...doc,
5415
5703
  relationships: {
5416
- [llamaindex.NodeRelationship.SOURCE]: {
5704
+ ["SOURCE" /* SOURCE */]: {
5417
5705
  nodeId: doc.metadata.docId,
5418
- nodeType: llamaindex.ObjectType.DOCUMENT,
5706
+ nodeType: "DOCUMENT" /* DOCUMENT */,
5419
5707
  metadata: doc.metadata
5420
5708
  }
5421
5709
  }
5422
5710
  }) : doc
5423
5711
  );
5424
5712
  }
5425
- const pipeline = new llamaindex.IngestionPipeline({
5426
- transformations
5427
- });
5428
- const nodes = await pipeline.run({
5429
- documents: this.chunks
5430
- });
5713
+ let nodes = this.chunks;
5714
+ for (const extractor of transformations) {
5715
+ nodes = await extractor.processNodes(nodes);
5716
+ }
5431
5717
  this.chunks = this.chunks.map((doc, i) => {
5432
- return new llamaindex.Document({
5718
+ return new Document({
5433
5719
  text: doc.text,
5434
5720
  metadata: {
5435
5721
  ...doc.metadata,