@mastra/rag 1.0.8-alpha.0 → 1.0.9-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +41 -0
- package/dist/document/extractors/keywords.d.ts.map +1 -1
- package/dist/document/extractors/questions.d.ts.map +1 -1
- package/dist/document/extractors/summary.d.ts.map +1 -1
- package/dist/document/extractors/title.d.ts.map +1 -1
- package/dist/index.cjs +105 -99
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +105 -99
- package/dist/index.js.map +1 -1
- package/package.json +4 -4
- package/src/document/document.test.ts +1 -1
- package/src/document/extractors/keywords.test.ts +2 -2
- package/src/document/extractors/keywords.ts +35 -20
- package/src/document/extractors/questions.test.ts +1 -1
- package/src/document/extractors/questions.ts +21 -18
- package/src/document/extractors/summary.ts +16 -12
- package/src/document/extractors/title.test.ts +1 -1
- package/src/document/extractors/title.ts +53 -41
package/dist/index.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
+
import { Agent } from '@mastra/core/agent';
|
|
1
2
|
import { randomUUID, createHash } from 'crypto';
|
|
2
3
|
import { z } from 'zod';
|
|
3
4
|
import { parse } from 'node-html-better-parser';
|
|
4
5
|
import { encodingForModel, getEncoding } from 'js-tiktoken';
|
|
5
6
|
import { Big } from 'big.js';
|
|
6
|
-
import { Agent } from '@mastra/core/agent';
|
|
7
7
|
import { createSimilarityPrompt } from '@mastra/core/relevance';
|
|
8
8
|
import ZeroEntropy from 'zeroentropy';
|
|
9
9
|
import { createTool } from '@mastra/core/tools';
|
|
@@ -2285,7 +2285,7 @@ var openaiChatChunkSchema = z.union([
|
|
|
2285
2285
|
openaiErrorDataSchema
|
|
2286
2286
|
]);
|
|
2287
2287
|
function isReasoningModel(modelId) {
|
|
2288
|
-
return modelId.startsWith("o");
|
|
2288
|
+
return modelId.startsWith("o") || modelId.startsWith("gpt-5");
|
|
2289
2289
|
}
|
|
2290
2290
|
function isAudioModel(modelId) {
|
|
2291
2291
|
return modelId.startsWith("gpt-4o-audio-preview");
|
|
@@ -3808,7 +3808,7 @@ function isErrorChunk(chunk) {
|
|
|
3808
3808
|
return chunk.type === "error";
|
|
3809
3809
|
}
|
|
3810
3810
|
function getResponsesModelConfig(modelId) {
|
|
3811
|
-
if (modelId.startsWith("o")) {
|
|
3811
|
+
if (modelId.startsWith("o") || modelId.startsWith("gpt-5")) {
|
|
3812
3812
|
if (modelId.startsWith("o1-mini") || modelId.startsWith("o1-preview")) {
|
|
3813
3813
|
return {
|
|
3814
3814
|
isReasoningModel: true,
|
|
@@ -4134,64 +4134,65 @@ var TitleExtractor = class extends BaseExtractor {
|
|
|
4134
4134
|
for (const [key, nodes] of Object.entries(nodesByDocument)) {
|
|
4135
4135
|
const titleCandidates = await this.getTitlesCandidates(nodes);
|
|
4136
4136
|
const combinedTitles = titleCandidates.join(", ");
|
|
4137
|
-
const completion = await this.llm.doGenerate({
|
|
4138
|
-
inputFormat: "messages",
|
|
4139
|
-
mode: { type: "regular" },
|
|
4140
|
-
prompt: [
|
|
4141
|
-
{
|
|
4142
|
-
role: "user",
|
|
4143
|
-
content: [
|
|
4144
|
-
{
|
|
4145
|
-
type: "text",
|
|
4146
|
-
text: this.combineTemplate.format({
|
|
4147
|
-
context: combinedTitles
|
|
4148
|
-
})
|
|
4149
|
-
}
|
|
4150
|
-
]
|
|
4151
|
-
}
|
|
4152
|
-
]
|
|
4153
|
-
});
|
|
4154
4137
|
let title = "";
|
|
4155
|
-
if (
|
|
4156
|
-
|
|
4138
|
+
if (this.llm.specificationVersion === "v2") {
|
|
4139
|
+
const miniAgent = new Agent({
|
|
4140
|
+
model: this.llm,
|
|
4141
|
+
name: "title-extractor",
|
|
4142
|
+
instructions: "You are a title extractor. You are given a list of nodes and you need to extract the title from the nodes."
|
|
4143
|
+
});
|
|
4144
|
+
const result = await miniAgent.generateVNext(
|
|
4145
|
+
[{ role: "user", content: this.combineTemplate.format({ context: combinedTitles }) }],
|
|
4146
|
+
{ format: "mastra" }
|
|
4147
|
+
);
|
|
4148
|
+
title = result.text;
|
|
4157
4149
|
} else {
|
|
4158
|
-
|
|
4150
|
+
const miniAgent = new Agent({
|
|
4151
|
+
model: this.llm,
|
|
4152
|
+
name: "title-extractor",
|
|
4153
|
+
instructions: "You are a title extractor. You are given a list of nodes and you need to extract the title from the nodes."
|
|
4154
|
+
});
|
|
4155
|
+
const result = await miniAgent.generate([
|
|
4156
|
+
{ role: "user", content: this.combineTemplate.format({ context: combinedTitles }) }
|
|
4157
|
+
]);
|
|
4158
|
+
title = result.text;
|
|
4159
|
+
}
|
|
4160
|
+
if (!title) {
|
|
4161
|
+
console.warn("Title extraction LLM output returned empty");
|
|
4159
4162
|
}
|
|
4160
4163
|
titlesByDocument[key] = title;
|
|
4161
4164
|
}
|
|
4162
4165
|
return titlesByDocument;
|
|
4163
4166
|
}
|
|
4164
4167
|
async getTitlesCandidates(nodes) {
|
|
4168
|
+
const miniAgent = new Agent({
|
|
4169
|
+
model: this.llm,
|
|
4170
|
+
name: "titles-candidates-extractor",
|
|
4171
|
+
instructions: "You are a titles candidates extractor. You are given a list of nodes and you need to extract the titles candidates from the nodes."
|
|
4172
|
+
});
|
|
4165
4173
|
const titleJobs = nodes.map(async (node) => {
|
|
4166
|
-
|
|
4167
|
-
|
|
4168
|
-
|
|
4169
|
-
|
|
4170
|
-
{
|
|
4171
|
-
|
|
4172
|
-
|
|
4173
|
-
{
|
|
4174
|
-
type: "text",
|
|
4175
|
-
text: this.nodeTemplate.format({
|
|
4176
|
-
context: node.getContent()
|
|
4177
|
-
})
|
|
4178
|
-
}
|
|
4179
|
-
]
|
|
4180
|
-
}
|
|
4181
|
-
]
|
|
4182
|
-
});
|
|
4183
|
-
if (typeof completion.text === "string") {
|
|
4184
|
-
return completion.text.trim();
|
|
4174
|
+
let completion;
|
|
4175
|
+
if (this.llm.specificationVersion === "v2") {
|
|
4176
|
+
const result = await miniAgent.generateVNext(
|
|
4177
|
+
[{ role: "user", content: this.nodeTemplate.format({ context: node.getContent() }) }],
|
|
4178
|
+
{ format: "mastra" }
|
|
4179
|
+
);
|
|
4180
|
+
completion = result.text;
|
|
4185
4181
|
} else {
|
|
4186
|
-
|
|
4182
|
+
const result = await miniAgent.generate([
|
|
4183
|
+
{ role: "user", content: this.nodeTemplate.format({ context: node.getContent() }) }
|
|
4184
|
+
]);
|
|
4185
|
+
completion = result.text;
|
|
4186
|
+
}
|
|
4187
|
+
if (!completion) {
|
|
4188
|
+
console.warn("Title candidate extraction LLM output returned empty");
|
|
4187
4189
|
return "";
|
|
4188
4190
|
}
|
|
4191
|
+
return completion.trim();
|
|
4189
4192
|
});
|
|
4190
4193
|
return await Promise.all(titleJobs);
|
|
4191
4194
|
}
|
|
4192
4195
|
};
|
|
4193
|
-
|
|
4194
|
-
// src/document/extractors/summary.ts
|
|
4195
4196
|
var SummaryExtractor = class extends BaseExtractor {
|
|
4196
4197
|
llm;
|
|
4197
4198
|
summaries;
|
|
@@ -4231,21 +4232,22 @@ var SummaryExtractor = class extends BaseExtractor {
|
|
|
4231
4232
|
const prompt = this.promptTemplate.format({
|
|
4232
4233
|
context
|
|
4233
4234
|
});
|
|
4234
|
-
const
|
|
4235
|
-
|
|
4236
|
-
|
|
4237
|
-
|
|
4238
|
-
{
|
|
4239
|
-
role: "user",
|
|
4240
|
-
content: [{ type: "text", text: prompt }]
|
|
4241
|
-
}
|
|
4242
|
-
]
|
|
4235
|
+
const miniAgent = new Agent({
|
|
4236
|
+
model: this.llm,
|
|
4237
|
+
name: "summary-extractor",
|
|
4238
|
+
instructions: "You are a summary extractor. You are given a node and you need to extract the summary from the node."
|
|
4243
4239
|
});
|
|
4244
4240
|
let summary = "";
|
|
4245
|
-
if (
|
|
4246
|
-
|
|
4241
|
+
if (this.llm.specificationVersion === "v2") {
|
|
4242
|
+
const result = await miniAgent.generateVNext([{ role: "user", content: prompt }], { format: "mastra" });
|
|
4243
|
+
summary = result.text;
|
|
4247
4244
|
} else {
|
|
4248
|
-
|
|
4245
|
+
const result = await miniAgent.generate([{ role: "user", content: prompt }]);
|
|
4246
|
+
summary = result.text;
|
|
4247
|
+
}
|
|
4248
|
+
if (!summary) {
|
|
4249
|
+
console.warn("Summary extraction LLM output returned empty");
|
|
4250
|
+
return "";
|
|
4249
4251
|
}
|
|
4250
4252
|
return summary.replace(STRIP_REGEX, "");
|
|
4251
4253
|
}
|
|
@@ -4272,8 +4274,6 @@ var SummaryExtractor = class extends BaseExtractor {
|
|
|
4272
4274
|
return metadataList;
|
|
4273
4275
|
}
|
|
4274
4276
|
};
|
|
4275
|
-
|
|
4276
|
-
// src/document/extractors/questions.ts
|
|
4277
4277
|
var QuestionsAnsweredExtractor = class extends BaseExtractor {
|
|
4278
4278
|
llm;
|
|
4279
4279
|
questions = 5;
|
|
@@ -4317,26 +4317,24 @@ var QuestionsAnsweredExtractor = class extends BaseExtractor {
|
|
|
4317
4317
|
context: contextStr,
|
|
4318
4318
|
numQuestions: this.questions.toString()
|
|
4319
4319
|
});
|
|
4320
|
-
const
|
|
4321
|
-
|
|
4322
|
-
|
|
4323
|
-
|
|
4324
|
-
{
|
|
4325
|
-
role: "user",
|
|
4326
|
-
content: [{ type: "text", text: prompt }]
|
|
4327
|
-
}
|
|
4328
|
-
]
|
|
4320
|
+
const miniAgent = new Agent({
|
|
4321
|
+
model: this.llm,
|
|
4322
|
+
name: "question-extractor",
|
|
4323
|
+
instructions: "You are a question extractor. You are given a node and you need to extract the questions from the node."
|
|
4329
4324
|
});
|
|
4330
|
-
let
|
|
4331
|
-
|
|
4332
|
-
|
|
4333
|
-
|
|
4334
|
-
|
|
4335
|
-
|
|
4336
|
-
|
|
4337
|
-
}
|
|
4338
|
-
|
|
4325
|
+
let questionsText = "";
|
|
4326
|
+
if (this.llm.specificationVersion === "v2") {
|
|
4327
|
+
const result2 = await miniAgent.generateVNext([{ role: "user", content: prompt }], { format: "mastra" });
|
|
4328
|
+
questionsText = result2.text;
|
|
4329
|
+
} else {
|
|
4330
|
+
const result2 = await miniAgent.generate([{ role: "user", content: prompt }]);
|
|
4331
|
+
questionsText = result2.text;
|
|
4332
|
+
}
|
|
4333
|
+
if (!questionsText) {
|
|
4334
|
+
console.warn("Question extraction LLM output returned empty");
|
|
4335
|
+
return { questionsThisExcerptCanAnswer: "" };
|
|
4339
4336
|
}
|
|
4337
|
+
const result = questionsText.replace(STRIP_REGEX, "").trim();
|
|
4340
4338
|
return {
|
|
4341
4339
|
questionsThisExcerptCanAnswer: result
|
|
4342
4340
|
};
|
|
@@ -4351,8 +4349,6 @@ var QuestionsAnsweredExtractor = class extends BaseExtractor {
|
|
|
4351
4349
|
return results;
|
|
4352
4350
|
}
|
|
4353
4351
|
};
|
|
4354
|
-
|
|
4355
|
-
// src/document/extractors/keywords.ts
|
|
4356
4352
|
var KeywordExtractor = class extends BaseExtractor {
|
|
4357
4353
|
llm;
|
|
4358
4354
|
keywords = 5;
|
|
@@ -4393,33 +4389,43 @@ var KeywordExtractor = class extends BaseExtractor {
|
|
|
4393
4389
|
}
|
|
4394
4390
|
let keywords = "";
|
|
4395
4391
|
try {
|
|
4396
|
-
const
|
|
4397
|
-
|
|
4398
|
-
|
|
4399
|
-
|
|
4392
|
+
const miniAgent = new Agent({
|
|
4393
|
+
model: this.llm,
|
|
4394
|
+
name: "keyword-extractor",
|
|
4395
|
+
instructions: "You are a keyword extractor. You are given a node and you need to extract the keywords from the node."
|
|
4396
|
+
});
|
|
4397
|
+
if (this.llm.specificationVersion === "v2") {
|
|
4398
|
+
const result = await miniAgent.generateVNext(
|
|
4399
|
+
[
|
|
4400
|
+
{
|
|
4401
|
+
role: "user",
|
|
4402
|
+
content: this.promptTemplate.format({
|
|
4403
|
+
context: node.getContent(),
|
|
4404
|
+
maxKeywords: this.keywords.toString()
|
|
4405
|
+
})
|
|
4406
|
+
}
|
|
4407
|
+
],
|
|
4408
|
+
{ format: "mastra" }
|
|
4409
|
+
);
|
|
4410
|
+
keywords = result.text;
|
|
4411
|
+
} else {
|
|
4412
|
+
const result = await miniAgent.generate([
|
|
4400
4413
|
{
|
|
4401
4414
|
role: "user",
|
|
4402
|
-
content:
|
|
4403
|
-
{
|
|
4404
|
-
type: "text",
|
|
4405
|
-
text: this.promptTemplate.format({
|
|
4406
|
-
context: node.getContent(),
|
|
4407
|
-
maxKeywords: this.keywords.toString()
|
|
4408
|
-
})
|
|
4409
|
-
}
|
|
4410
|
-
]
|
|
4415
|
+
content: this.promptTemplate.format({ context: node.getContent(), maxKeywords: this.keywords.toString() })
|
|
4411
4416
|
}
|
|
4412
|
-
]
|
|
4413
|
-
|
|
4414
|
-
|
|
4415
|
-
|
|
4416
|
-
|
|
4417
|
-
|
|
4417
|
+
]);
|
|
4418
|
+
keywords = result.text;
|
|
4419
|
+
}
|
|
4420
|
+
if (!keywords) {
|
|
4421
|
+
console.warn("Keyword extraction LLM output returned empty");
|
|
4422
|
+
return { excerptKeywords: "" };
|
|
4418
4423
|
}
|
|
4424
|
+
return { excerptKeywords: keywords.trim() };
|
|
4419
4425
|
} catch (err) {
|
|
4420
4426
|
console.warn("Keyword extraction failed:", err);
|
|
4427
|
+
return { excerptKeywords: "" };
|
|
4421
4428
|
}
|
|
4422
|
-
return { excerptKeywords: keywords };
|
|
4423
4429
|
}
|
|
4424
4430
|
/**
|
|
4425
4431
|
*
|