@mastra/rag 2.0.0-beta.2 → 2.0.0-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +58 -0
- package/dist/document/document.d.ts.map +1 -1
- package/dist/document/extractors/keywords.d.ts +2 -2
- package/dist/document/extractors/keywords.d.ts.map +1 -1
- package/dist/document/extractors/questions.d.ts +2 -2
- package/dist/document/extractors/questions.d.ts.map +1 -1
- package/dist/document/extractors/summary.d.ts.map +1 -1
- package/dist/document/extractors/title.d.ts +2 -2
- package/dist/document/extractors/title.d.ts.map +1 -1
- package/dist/document/extractors/types.d.ts +6 -6
- package/dist/document/extractors/types.d.ts.map +1 -1
- package/dist/document/transformers/html.d.ts +1 -0
- package/dist/document/transformers/html.d.ts.map +1 -1
- package/dist/index.cjs +68 -17
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +70 -19
- package/dist/index.js.map +1 -1
- package/dist/rerank/relevance/mastra-agent/index.d.ts +2 -2
- package/dist/rerank/relevance/mastra-agent/index.d.ts.map +1 -1
- package/dist/utils/vector-search.d.ts.map +1 -1
- package/package.json +3 -4
package/dist/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Agent } from '@mastra/core/agent';
|
|
1
|
+
import { Agent, isSupportedLanguageModel } from '@mastra/core/agent';
|
|
2
2
|
import { randomUUID, createHash } from 'crypto';
|
|
3
3
|
import { z } from 'zod';
|
|
4
4
|
import { parse } from 'node-html-better-parser';
|
|
@@ -7,7 +7,7 @@ import { Big } from 'big.js';
|
|
|
7
7
|
import { createSimilarityPrompt } from '@mastra/core/relevance';
|
|
8
8
|
import ZeroEntropy from 'zeroentropy';
|
|
9
9
|
import { createTool } from '@mastra/core/tools';
|
|
10
|
-
import { embedV2, embedV1 } from '@mastra/core/vector';
|
|
10
|
+
import { embedV3, embedV2, embedV1 } from '@mastra/core/vector';
|
|
11
11
|
|
|
12
12
|
var __create = Object.create;
|
|
13
13
|
var __defProp = Object.defineProperty;
|
|
@@ -266,7 +266,7 @@ Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'
|
|
|
266
266
|
var defaultQuestionExtractPrompt = new PromptTemplate({
|
|
267
267
|
templateVars: ["numQuestions", "context"],
|
|
268
268
|
template: `(
|
|
269
|
-
"Given the contextual
|
|
269
|
+
"Given the contextual information below, generate {numQuestions} questions this context can provide specific answers to which are unlikely to be found elsewhere. Higher-level summaries of surrounding context may be provided as well. "
|
|
270
270
|
"Try using these summaries to generate better questions that this context can answer."
|
|
271
271
|
"---------------------"
|
|
272
272
|
"{context}"
|
|
@@ -4135,7 +4135,7 @@ var TitleExtractor = class extends BaseExtractor {
|
|
|
4135
4135
|
const titleCandidates = await this.getTitlesCandidates(nodes);
|
|
4136
4136
|
const combinedTitles = titleCandidates.join(", ");
|
|
4137
4137
|
let title = "";
|
|
4138
|
-
if (this.llm
|
|
4138
|
+
if (isSupportedLanguageModel(this.llm)) {
|
|
4139
4139
|
const miniAgent = new Agent({
|
|
4140
4140
|
id: "title-extractor",
|
|
4141
4141
|
model: this.llm,
|
|
@@ -4174,7 +4174,7 @@ var TitleExtractor = class extends BaseExtractor {
|
|
|
4174
4174
|
});
|
|
4175
4175
|
const titleJobs = nodes.map(async (node) => {
|
|
4176
4176
|
let completion;
|
|
4177
|
-
if (this.llm
|
|
4177
|
+
if (isSupportedLanguageModel(this.llm)) {
|
|
4178
4178
|
const result = await miniAgent.generate([
|
|
4179
4179
|
{ role: "user", content: this.nodeTemplate.format({ context: node.getContent() }) }
|
|
4180
4180
|
]);
|
|
@@ -4240,7 +4240,7 @@ var SummaryExtractor = class extends BaseExtractor {
|
|
|
4240
4240
|
instructions: "You are a summary extractor. You are given a node and you need to extract the summary from the node."
|
|
4241
4241
|
});
|
|
4242
4242
|
let summary = "";
|
|
4243
|
-
if (this.llm
|
|
4243
|
+
if (isSupportedLanguageModel(this.llm)) {
|
|
4244
4244
|
const result = await miniAgent.generate([{ role: "user", content: prompt }]);
|
|
4245
4245
|
summary = result.text;
|
|
4246
4246
|
} else {
|
|
@@ -4326,7 +4326,7 @@ var QuestionsAnsweredExtractor = class extends BaseExtractor {
|
|
|
4326
4326
|
instructions: "You are a question extractor. You are given a node and you need to extract the questions from the node."
|
|
4327
4327
|
});
|
|
4328
4328
|
let questionsText = "";
|
|
4329
|
-
if (this.llm
|
|
4329
|
+
if (isSupportedLanguageModel(this.llm)) {
|
|
4330
4330
|
const result2 = await miniAgent.generate([{ role: "user", content: prompt }]);
|
|
4331
4331
|
questionsText = result2.text;
|
|
4332
4332
|
} else {
|
|
@@ -4398,7 +4398,7 @@ var KeywordExtractor = class extends BaseExtractor {
|
|
|
4398
4398
|
name: "keyword-extractor",
|
|
4399
4399
|
instructions: "You are a keyword extractor. You are given a node and you need to extract the keywords from the node."
|
|
4400
4400
|
});
|
|
4401
|
-
if (this.llm
|
|
4401
|
+
if (isSupportedLanguageModel(this.llm)) {
|
|
4402
4402
|
const result = await miniAgent.generate([
|
|
4403
4403
|
{
|
|
4404
4404
|
role: "user",
|
|
@@ -4993,23 +4993,47 @@ var HTMLSectionTransformer = class {
|
|
|
4993
4993
|
}
|
|
4994
4994
|
return "/" + parts.join("/");
|
|
4995
4995
|
}
|
|
4996
|
+
getTextContent(element) {
|
|
4997
|
+
if (!element) return "";
|
|
4998
|
+
if (!element.tagName) {
|
|
4999
|
+
return element.text || "";
|
|
5000
|
+
}
|
|
5001
|
+
let content = element.text || "";
|
|
5002
|
+
if (element.childNodes) {
|
|
5003
|
+
for (const child of element.childNodes) {
|
|
5004
|
+
const childText = this.getTextContent(child);
|
|
5005
|
+
if (childText) {
|
|
5006
|
+
content += " " + childText;
|
|
5007
|
+
}
|
|
5008
|
+
}
|
|
5009
|
+
}
|
|
5010
|
+
return content.trim();
|
|
5011
|
+
}
|
|
4996
5012
|
splitHtmlByHeaders(htmlDoc) {
|
|
4997
5013
|
const sections = [];
|
|
4998
5014
|
const root = parse(htmlDoc);
|
|
4999
5015
|
const headers = Object.keys(this.headersToSplitOn);
|
|
5000
5016
|
const headerElements = root.querySelectorAll(headers.join(","));
|
|
5001
|
-
headerElements.forEach((headerElement
|
|
5017
|
+
headerElements.forEach((headerElement) => {
|
|
5002
5018
|
const header = headerElement.text?.trim() || "";
|
|
5003
5019
|
const tagName = headerElement.tagName;
|
|
5004
5020
|
const xpath = this.getXPath(headerElement);
|
|
5005
5021
|
let content = "";
|
|
5006
|
-
|
|
5007
|
-
|
|
5008
|
-
|
|
5009
|
-
|
|
5010
|
-
|
|
5022
|
+
const parentNode = headerElement.parentNode;
|
|
5023
|
+
if (parentNode && parentNode.childNodes) {
|
|
5024
|
+
let foundHeader = false;
|
|
5025
|
+
for (const node of parentNode.childNodes) {
|
|
5026
|
+
if (node === headerElement) {
|
|
5027
|
+
foundHeader = true;
|
|
5028
|
+
continue;
|
|
5029
|
+
}
|
|
5030
|
+
if (foundHeader && node.tagName && headers.includes(node.tagName.toLowerCase())) {
|
|
5031
|
+
break;
|
|
5032
|
+
}
|
|
5033
|
+
if (foundHeader) {
|
|
5034
|
+
content += this.getTextContent(node) + " ";
|
|
5035
|
+
}
|
|
5011
5036
|
}
|
|
5012
|
-
currentElement = currentElement.nextElementSibling;
|
|
5013
5037
|
}
|
|
5014
5038
|
content = content.trim();
|
|
5015
5039
|
sections.push({
|
|
@@ -6386,13 +6410,33 @@ var MDocument = class _MDocument {
|
|
|
6386
6410
|
async chunkHTML(options) {
|
|
6387
6411
|
if (options?.headers?.length) {
|
|
6388
6412
|
const rt = new HTMLHeaderTransformer(options);
|
|
6389
|
-
|
|
6413
|
+
let textSplit = rt.transformDocuments(this.chunks);
|
|
6414
|
+
if (options?.maxSize) {
|
|
6415
|
+
const textSplitter = new RecursiveCharacterTransformer({
|
|
6416
|
+
maxSize: options.maxSize,
|
|
6417
|
+
overlap: options.overlap,
|
|
6418
|
+
keepSeparator: options.keepSeparator,
|
|
6419
|
+
addStartIndex: options.addStartIndex,
|
|
6420
|
+
stripWhitespace: options.stripWhitespace
|
|
6421
|
+
});
|
|
6422
|
+
textSplit = textSplitter.splitDocuments(textSplit);
|
|
6423
|
+
}
|
|
6390
6424
|
this.chunks = textSplit;
|
|
6391
6425
|
return;
|
|
6392
6426
|
}
|
|
6393
6427
|
if (options?.sections?.length) {
|
|
6394
6428
|
const rt = new HTMLSectionTransformer(options);
|
|
6395
|
-
|
|
6429
|
+
let textSplit = rt.transformDocuments(this.chunks);
|
|
6430
|
+
if (options?.maxSize) {
|
|
6431
|
+
const textSplitter = new RecursiveCharacterTransformer({
|
|
6432
|
+
maxSize: options.maxSize,
|
|
6433
|
+
overlap: options.overlap,
|
|
6434
|
+
keepSeparator: options.keepSeparator,
|
|
6435
|
+
addStartIndex: options.addStartIndex,
|
|
6436
|
+
stripWhitespace: options.stripWhitespace
|
|
6437
|
+
});
|
|
6438
|
+
textSplit = textSplitter.splitDocuments(textSplit);
|
|
6439
|
+
}
|
|
6396
6440
|
this.chunks = textSplit;
|
|
6397
6441
|
return;
|
|
6398
6442
|
}
|
|
@@ -6544,7 +6588,7 @@ Always return just the number, no explanation.`,
|
|
|
6544
6588
|
const prompt = createSimilarityPrompt(query, text);
|
|
6545
6589
|
const model = await this.agent.getModel();
|
|
6546
6590
|
let response;
|
|
6547
|
-
if (model
|
|
6591
|
+
if (isSupportedLanguageModel(model)) {
|
|
6548
6592
|
response = await this.agent.generate(prompt);
|
|
6549
6593
|
} else {
|
|
6550
6594
|
response = await this.agent.generateLegacy(prompt);
|
|
@@ -6930,7 +6974,14 @@ var vectorQuerySearch = async ({
|
|
|
6930
6974
|
providerOptions
|
|
6931
6975
|
}) => {
|
|
6932
6976
|
let embeddingResult;
|
|
6933
|
-
if (model.specificationVersion === "
|
|
6977
|
+
if (model.specificationVersion === "v3") {
|
|
6978
|
+
embeddingResult = await embedV3({
|
|
6979
|
+
model,
|
|
6980
|
+
value: queryText,
|
|
6981
|
+
maxRetries,
|
|
6982
|
+
...providerOptions && { providerOptions }
|
|
6983
|
+
});
|
|
6984
|
+
} else if (model.specificationVersion === "v2") {
|
|
6934
6985
|
embeddingResult = await embedV2({
|
|
6935
6986
|
model,
|
|
6936
6987
|
value: queryText,
|