@mastra/rag 2.1.1 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"token.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/token.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,gBAAgB,EAAY,MAAM,aAAa,CAAC;AAE7E,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,UAAU,CAAC;AAElD,OAAO,EAAE,eAAe,EAAE,MAAM,QAAQ,CAAC;AAEzC,UAAU,SAAS;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,cAAc,EAAE,MAAM,CAAC;IACvB,MAAM,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,MAAM,CAAC;IACrC,MAAM,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC;CACpC;AAED,wBAAgB,iBAAiB,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,SAAS,CAAA;CAAE,GAAG,MAAM,EAAE,CAkBvG;AAED,qBAAa,gBAAiB,SAAQ,eAAe;IACnD,OAAO,CAAC,SAAS,CAAW;IAC5B,OAAO,CAAC,cAAc,CAAsB;IAC5C,OAAO,CAAC,iBAAiB,CAAsB;gBAEnC,EACV,YAA4B,EAC5B,SAAS,EACT,cAA0B,EAC1B,iBAAyB,EACzB,OAAY,GACb,EAAE;QACD,YAAY,CAAC,EAAE,gBAAgB,CAAC;QAChC,SAAS,CAAC,EAAE,aAAa,CAAC;QAC1B,cAAc,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;QACrC,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;QACxC,OAAO,EAAE,iBAAiB,CAAC;KAC5B;IAaD,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;IA0B/C,MAAM,CAAC,YAAY,CAAC,EAClB,YAA4B,EAC5B,SAAS,EACT,OAAY,GACb,EAAE;QACD,YAAY,CAAC,EAAE,gBAAgB,CAAC;QAChC,SAAS,CAAC,EAAE,aAAa,CAAC;QAC1B,OAAO,CAAC,EAAE,iBAAiB,CAAC;KAC7B,GAAG,gBAAgB;CAuCrB"}
1
+ {"version":3,"file":"token.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/token.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,gBAAgB,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAE7E,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,UAAU,CAAC;AAElD,OAAO,EAAE,eAAe,EAAE,MAAM,QAAQ,CAAC;AAEzC,UAAU,SAAS;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,cAAc,EAAE,MAAM,CAAC;IACvB,MAAM,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,MAAM,CAAC;IACrC,MAAM,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC;CACpC;AAED,wBAAgB,iBAAiB,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,SAAS,CAAA;CAAE,GAAG,MAAM,EAAE,CAkBvG;AAED,qBAAa,gBAAiB,SAAQ,eAAe;IACnD,OAAO,CAAC,SAAS,CAAW;IAC5B,OAAO,CAAC,YAAY,CAAmB;IACvC,OAAO,CAAC,eAAe,CAAmB;gBAE9B,EACV,YAA4B,EAC5B,SAAS,EACT,SAAS,EAAE,iBAAiB,EAC5B,cAA0B,EAC1B,iBAAyB,EACzB,OAAY,GACb,EAAE;QACD,YAAY,CAAC,EAAE,gBAAgB,CAAC;QAChC,SAAS,CAAC,EAAE,aAAa,CAAC;QAC1B,SAAS,CAAC,EAAE,QAAQ,CAAC;QACrB,cAAc,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;QACrC,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;QACxC,OAAO,EAAE,iBAAiB,CAAC;KAC5B;IAiBD,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;IAqB/C,MAAM,CAAC,YAAY,CAAC,EAClB,YAA4B,EAC5B,SAAS,EACT,OAAY,GACb,EAAE;QACD,YAAY,CAAC,EAAE,gBAAgB,CAAC;QAChC,SAAS,CAAC,EAAE,aAAa,CAAC;QAC1B,OAAO,CAAC,EAAE,iBAAiB,CAAC;KAC7B,GAAG,gBAAgB;CAwCrB"}
package/dist/index.cjs CHANGED
@@ -6072,31 +6072,34 @@ var MarkdownHeaderTransformer = class {
6072
6072
  var SemanticMarkdownTransformer = class _SemanticMarkdownTransformer extends TextTransformer {
6073
6073
  tokenizer;
6074
6074
  joinThreshold;
6075
- allowedSpecial;
6076
- disallowedSpecial;
6075
+ allowedArray;
6076
+ disallowedArray;
6077
6077
  constructor({
6078
6078
  joinThreshold = 500,
6079
6079
  encodingName = "cl100k_base",
6080
6080
  modelName,
6081
+ tokenizer: existingTokenizer,
6081
6082
  allowedSpecial = /* @__PURE__ */ new Set(),
6082
6083
  disallowedSpecial = "all",
6083
6084
  ...baseOptions
6084
6085
  } = {}) {
6085
6086
  super(baseOptions);
6086
6087
  this.joinThreshold = joinThreshold;
6087
- this.allowedSpecial = allowedSpecial;
6088
- this.disallowedSpecial = disallowedSpecial;
6089
- try {
6090
- this.tokenizer = modelName ? jsTiktoken.encodingForModel(modelName) : jsTiktoken.getEncoding(encodingName);
6091
- } catch {
6092
- throw new Error("Could not load tiktoken encoding. Please install it with `npm install js-tiktoken`.");
6088
+ this.allowedArray = allowedSpecial === "all" ? "all" : Array.from(allowedSpecial);
6089
+ this.disallowedArray = disallowedSpecial === "all" ? "all" : Array.from(disallowedSpecial);
6090
+ if (existingTokenizer) {
6091
+ this.tokenizer = existingTokenizer;
6092
+ } else {
6093
+ try {
6094
+ this.tokenizer = modelName ? jsTiktoken.encodingForModel(modelName) : jsTiktoken.getEncoding(encodingName);
6095
+ } catch {
6096
+ throw new Error("Could not load tiktoken encoding. Please install it with `npm install js-tiktoken`.");
6097
+ }
6093
6098
  }
6094
6099
  }
6095
6100
  countTokens(text) {
6096
- const allowed = this.allowedSpecial === "all" ? "all" : Array.from(this.allowedSpecial);
6097
- const disallowed = this.disallowedSpecial === "all" ? "all" : Array.from(this.disallowedSpecial);
6098
6101
  const processedText = this.stripWhitespace ? text.trim() : text;
6099
- return this.tokenizer.encode(processedText, allowed, disallowed).length;
6102
+ return this.tokenizer.encode(processedText, this.allowedArray, this.disallowedArray).length;
6100
6103
  }
6101
6104
  splitMarkdownByHeaders(markdown) {
6102
6105
  const sections = [];
@@ -6150,14 +6153,21 @@ var SemanticMarkdownTransformer = class _SemanticMarkdownTransformer extends Tex
6150
6153
  const current = workingSections[j];
6151
6154
  if (current.depth === depth) {
6152
6155
  const prev = workingSections[j - 1];
6153
- if (prev.length + current.length < this.joinThreshold && prev.depth <= current.depth) {
6154
- const title = `${"#".repeat(current.depth)} ${current.title}`;
6155
- const formattedTitle = `
6156
+ const title = `${"#".repeat(current.depth)} ${current.title}`;
6157
+ const formattedTitle = `
6156
6158
 
6157
6159
  ${title}`;
6160
+ const headerLength = this.tokenizer.encode(
6161
+ `${formattedTitle}
6162
+ `,
6163
+ this.allowedArray,
6164
+ this.disallowedArray
6165
+ ).length;
6166
+ const mergedLength = prev.length + current.length + headerLength;
6167
+ if (mergedLength < this.joinThreshold && prev.depth <= current.depth) {
6158
6168
  prev.content += `${formattedTitle}
6159
6169
  ${current.content}`;
6160
- prev.length = this.countTokens(prev.content);
6170
+ prev.length = mergedLength;
6161
6171
  workingSections.splice(j, 1);
6162
6172
  j--;
6163
6173
  }
@@ -6227,6 +6237,7 @@ ${section.content}`;
6227
6237
  ...options,
6228
6238
  encodingName,
6229
6239
  modelName,
6240
+ tokenizer,
6230
6241
  lengthFunction: tikTokenCounter
6231
6242
  });
6232
6243
  }
@@ -6481,30 +6492,33 @@ function splitTextOnTokens({ text, tokenizer }) {
6481
6492
  }
6482
6493
  var TokenTransformer = class _TokenTransformer extends TextTransformer {
6483
6494
  tokenizer;
6484
- allowedSpecial;
6485
- disallowedSpecial;
6495
+ allowedArray;
6496
+ disallowedArray;
6486
6497
  constructor({
6487
6498
  encodingName = "cl100k_base",
6488
6499
  modelName,
6500
+ tokenizer: existingTokenizer,
6489
6501
  allowedSpecial = /* @__PURE__ */ new Set(),
6490
6502
  disallowedSpecial = "all",
6491
6503
  options = {}
6492
6504
  }) {
6493
6505
  super(options);
6494
- try {
6495
- this.tokenizer = modelName ? jsTiktoken.encodingForModel(modelName) : jsTiktoken.getEncoding(encodingName);
6496
- } catch {
6497
- throw new Error("Could not load tiktoken encoding. Please install it with `npm install js-tiktoken`.");
6506
+ if (existingTokenizer) {
6507
+ this.tokenizer = existingTokenizer;
6508
+ } else {
6509
+ try {
6510
+ this.tokenizer = modelName ? jsTiktoken.encodingForModel(modelName) : jsTiktoken.getEncoding(encodingName);
6511
+ } catch {
6512
+ throw new Error("Could not load tiktoken encoding. Please install it with `npm install js-tiktoken`.");
6513
+ }
6498
6514
  }
6499
- this.allowedSpecial = allowedSpecial;
6500
- this.disallowedSpecial = disallowedSpecial;
6515
+ this.allowedArray = allowedSpecial === "all" ? "all" : Array.from(allowedSpecial);
6516
+ this.disallowedArray = disallowedSpecial === "all" ? "all" : Array.from(disallowedSpecial);
6501
6517
  }
6502
6518
  splitText({ text }) {
6503
6519
  const encode = (text2) => {
6504
- const allowed = this.allowedSpecial === "all" ? "all" : Array.from(this.allowedSpecial);
6505
- const disallowed = this.disallowedSpecial === "all" ? "all" : Array.from(this.disallowedSpecial);
6506
6520
  const processedText = this.stripWhitespace ? text2.trim() : text2;
6507
- return Array.from(this.tokenizer.encode(processedText, allowed, disallowed));
6521
+ return Array.from(this.tokenizer.encode(processedText, this.allowedArray, this.disallowedArray));
6508
6522
  };
6509
6523
  const decode = (tokens) => {
6510
6524
  const text2 = this.tokenizer.decode(tokens);
@@ -6541,6 +6555,7 @@ var TokenTransformer = class _TokenTransformer extends TextTransformer {
6541
6555
  return new _TokenTransformer({
6542
6556
  encodingName,
6543
6557
  modelName,
6558
+ tokenizer,
6544
6559
  allowedSpecial: options.allowedSpecial,
6545
6560
  disallowedSpecial: options.disallowedSpecial,
6546
6561
  options: {