@mastra/rag 0.1.8-alpha.3 → 0.1.8-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,23 +1,23 @@
1
1
 
2
- > @mastra/rag@0.1.8-alpha.3 build /home/runner/work/mastra/mastra/packages/rag
2
+ > @mastra/rag@0.1.8-alpha.5 build /home/runner/work/mastra/mastra/packages/rag
3
3
  > tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake
4
4
 
5
5
  CLI Building entry: src/index.ts
6
6
  CLI Using tsconfig: tsconfig.json
7
7
  CLI tsup v8.3.6
8
8
  TSC Build start
9
- TSC ⚡️ Build success in 27174ms
9
+ TSC ⚡️ Build success in 23355ms
10
10
  DTS Build start
11
11
  CLI Target: es2022
12
12
  Analysis will use the bundled TypeScript version 5.7.3
13
13
  Writing package typings: /home/runner/work/mastra/mastra/packages/rag/dist/_tsup-dts-rollup.d.ts
14
14
  Analysis will use the bundled TypeScript version 5.7.3
15
15
  Writing package typings: /home/runner/work/mastra/mastra/packages/rag/dist/_tsup-dts-rollup.d.cts
16
- DTS ⚡️ Build success in 37763ms
16
+ DTS ⚡️ Build success in 23003ms
17
17
  CLI Cleaning output folder
18
18
  ESM Build start
19
19
  CJS Build start
20
- CJS dist/index.cjs 83.86 KB
21
- CJS ⚡️ Build success in 2533ms
22
- ESM dist/index.js 83.18 KB
23
- ESM ⚡️ Build success in 2536ms
20
+ ESM dist/index.js 89.94 KB
21
+ ESM ⚡️ Build success in 684ms
22
+ CJS dist/index.cjs 90.62 KB
23
+ CJS ⚡️ Build success in 685ms
package/CHANGELOG.md CHANGED
@@ -1,5 +1,35 @@
1
1
  # @mastra/rag
2
2
 
3
+ ## 0.1.8-alpha.5
4
+
5
+ ### Patch Changes
6
+
7
+ - 9e81f35: Fix query filter for vector search and rerank
8
+ - 9e0f2c9: Update JSON Chunking to fix recursive issue
9
+ - Updated dependencies [22643eb]
10
+ - Updated dependencies [6feb23f]
11
+ - Updated dependencies [f2d6727]
12
+ - Updated dependencies [301e4ee]
13
+ - Updated dependencies [dfbe4e9]
14
+ - Updated dependencies [9e81f35]
15
+ - Updated dependencies [caefaa2]
16
+ - Updated dependencies [c151ae6]
17
+ - Updated dependencies [52e0418]
18
+ - Updated dependencies [03236ec]
19
+ - Updated dependencies [3764e71]
20
+ - Updated dependencies [df982db]
21
+ - Updated dependencies [0461849]
22
+ - Updated dependencies [2259379]
23
+ - Updated dependencies [358f069]
24
+ - @mastra/core@0.5.0-alpha.5
25
+
26
+ ## 0.1.8-alpha.4
27
+
28
+ ### Patch Changes
29
+
30
+ - Updated dependencies [d79aedf]
31
+ - @mastra/core@0.5.0-alpha.4
32
+
3
33
  ## 0.1.8-alpha.3
4
34
 
5
35
  ### Patch Changes
@@ -12,6 +12,7 @@ import type { TiktokenEncoding } from 'js-tiktoken';
12
12
  import type { TiktokenModel } from 'js-tiktoken';
13
13
  import type { TitleCombinePrompt } from 'llamaindex';
14
14
  import type { TitleExtractorPrompt } from 'llamaindex';
15
+ import type { VectorFilter } from '@mastra/core/vector/filter';
15
16
 
16
17
  /**
17
18
  * Vector store specific prompts that detail supported operators and examples.
@@ -404,7 +405,37 @@ export declare class RecursiveJsonTransformer {
404
405
  */
405
406
  private listToDictPreprocessing;
406
407
  /**
407
- * Split json into maximum size dictionaries while preserving structure
408
+ * Handles primitive values (strings, numbers, etc) by either adding them to the current chunk
409
+ * or creating new chunks if they don't fit
410
+ */
411
+ private handlePrimitiveValue;
412
+ /**
413
+ * Creates a nested dictionary chunk from a value and path
414
+ * e.g., path ['a', 'b'], value 'c' becomes { a: { b: 'c' } }
415
+ */
416
+ private createChunk;
417
+ /**
418
+ * Checks if value is within size limits
419
+ */
420
+ private isWithinSizeLimit;
421
+ /**
422
+ * Splits arrays into chunks based on size limits
423
+ * Handles nested objects by recursing into handleNestedObject
424
+ */
425
+ private handleArray;
426
+ /**
427
+ * Splits objects into chunks based on size limits
428
+ * Handles nested arrays and objects by recursing into handleArray and handleNestedObject
429
+ */
430
+ private handleNestedObject;
431
+ /**
432
+ * Splits long strings into smaller chunks at word boundaries
433
+ * Ensures each chunk is within maxSize limit
434
+ */
435
+ private splitLongString;
436
+ /**
437
+ * Core chunking logic that processes JSON data recursively
438
+ * Handles arrays, objects, and primitive values while maintaining structure
408
439
  */
409
440
  private jsonSplit;
410
441
  /**
@@ -414,6 +445,10 @@ export declare class RecursiveJsonTransformer {
414
445
  jsonData: Record<string, any>;
415
446
  convertLists?: boolean;
416
447
  }): Record<string, any>[];
448
+ /**
449
+ * Converts Unicode characters to their escaped ASCII representation
450
+ * e.g., 'café' becomes 'caf\u00e9'
451
+ */
417
452
  private escapeNonAscii;
418
453
  /**
419
454
  * Splits JSON into a list of JSON formatted strings
@@ -600,7 +635,7 @@ declare interface VectorQuerySearchParams {
600
635
  vectorStore: MastraVector;
601
636
  queryText: string;
602
637
  model: EmbeddingModel<string>;
603
- queryFilter?: any;
638
+ queryFilter?: VectorFilter;
604
639
  topK: number;
605
640
  includeVectors?: boolean;
606
641
  maxRetries?: number;
@@ -12,6 +12,7 @@ import type { TiktokenEncoding } from 'js-tiktoken';
12
12
  import type { TiktokenModel } from 'js-tiktoken';
13
13
  import type { TitleCombinePrompt } from 'llamaindex';
14
14
  import type { TitleExtractorPrompt } from 'llamaindex';
15
+ import type { VectorFilter } from '@mastra/core/vector/filter';
15
16
 
16
17
  /**
17
18
  * Vector store specific prompts that detail supported operators and examples.
@@ -404,7 +405,37 @@ export declare class RecursiveJsonTransformer {
404
405
  */
405
406
  private listToDictPreprocessing;
406
407
  /**
407
- * Split json into maximum size dictionaries while preserving structure
408
+ * Handles primitive values (strings, numbers, etc) by either adding them to the current chunk
409
+ * or creating new chunks if they don't fit
410
+ */
411
+ private handlePrimitiveValue;
412
+ /**
413
+ * Creates a nested dictionary chunk from a value and path
414
+ * e.g., path ['a', 'b'], value 'c' becomes { a: { b: 'c' } }
415
+ */
416
+ private createChunk;
417
+ /**
418
+ * Checks if value is within size limits
419
+ */
420
+ private isWithinSizeLimit;
421
+ /**
422
+ * Splits arrays into chunks based on size limits
423
+ * Handles nested objects by recursing into handleNestedObject
424
+ */
425
+ private handleArray;
426
+ /**
427
+ * Splits objects into chunks based on size limits
428
+ * Handles nested arrays and objects by recursing into handleArray and handleNestedObject
429
+ */
430
+ private handleNestedObject;
431
+ /**
432
+ * Splits long strings into smaller chunks at word boundaries
433
+ * Ensures each chunk is within maxSize limit
434
+ */
435
+ private splitLongString;
436
+ /**
437
+ * Core chunking logic that processes JSON data recursively
438
+ * Handles arrays, objects, and primitive values while maintaining structure
408
439
  */
409
440
  private jsonSplit;
410
441
  /**
@@ -414,6 +445,10 @@ export declare class RecursiveJsonTransformer {
414
445
  jsonData: Record<string, any>;
415
446
  convertLists?: boolean;
416
447
  }): Record<string, any>[];
448
+ /**
449
+ * Converts Unicode characters to their escaped ASCII representation
450
+ * e.g., 'café' becomes 'caf\u00e9'
451
+ */
417
452
  private escapeNonAscii;
418
453
  /**
419
454
  * Splits JSON into a list of JSON formatted strings
@@ -600,7 +635,7 @@ declare interface VectorQuerySearchParams {
600
635
  vectorStore: MastraVector;
601
636
  queryText: string;
602
637
  model: EmbeddingModel<string>;
603
- queryFilter?: any;
638
+ queryFilter?: VectorFilter;
604
639
  topK: number;
605
640
  includeVectors?: boolean;
606
641
  maxRetries?: number;
package/dist/index.cjs CHANGED
@@ -638,7 +638,8 @@ var RecursiveJsonTransformer = class _RecursiveJsonTransformer {
638
638
  return safeObj;
639
639
  }
640
640
  const stringifiable = getStringifiableData(data);
641
- return JSON.stringify(stringifiable).length;
641
+ const jsonString = JSON.stringify(stringifiable);
642
+ return jsonString.length;
642
643
  }
643
644
  /**
644
645
  * Transform JSON data while handling circular references
@@ -694,36 +695,216 @@ var RecursiveJsonTransformer = class _RecursiveJsonTransformer {
694
695
  return data;
695
696
  }
696
697
  /**
697
- * Split json into maximum size dictionaries while preserving structure
698
+ * Handles primitive values (strings, numbers, etc) by either adding them to the current chunk
699
+ * or creating new chunks if they don't fit
700
+ */
701
+ handlePrimitiveValue(value, key, currentChunk, chunks, fullPath) {
702
+ const testValue = { [key]: value };
703
+ if (_RecursiveJsonTransformer.jsonSize(testValue) <= this.maxSize) {
704
+ if (_RecursiveJsonTransformer.jsonSize({ ...currentChunk, ...testValue }) <= this.maxSize) {
705
+ return {
706
+ currentChunk: { ...currentChunk, ...testValue },
707
+ chunks
708
+ };
709
+ } else {
710
+ return {
711
+ currentChunk: testValue,
712
+ chunks: [...chunks, currentChunk]
713
+ };
714
+ }
715
+ } else if (typeof value === "string") {
716
+ const stringChunks = this.splitLongString(value);
717
+ const newChunks = stringChunks.map((chunk) => {
718
+ return this.createChunk(chunk, fullPath);
719
+ }).filter((chunk) => _RecursiveJsonTransformer.jsonSize(chunk) <= this.maxSize);
720
+ return {
721
+ currentChunk,
722
+ chunks: [...chunks, ...newChunks]
723
+ };
724
+ }
725
+ const newChunk = this.createChunk(value, fullPath);
726
+ return {
727
+ currentChunk,
728
+ chunks: _RecursiveJsonTransformer.jsonSize(newChunk) <= this.maxSize ? [...chunks, newChunk] : chunks
729
+ };
730
+ }
731
+ /**
732
+ * Creates a nested dictionary chunk from a value and path
733
+ * e.g., path ['a', 'b'], value 'c' becomes { a: { b: 'c' } }
734
+ */
735
+ createChunk(value, path) {
736
+ const chunk = {};
737
+ _RecursiveJsonTransformer.setNestedDict(chunk, path, value);
738
+ return chunk.root ? chunk.root : chunk;
739
+ }
740
+ /**
741
+ * Checks if value is within size limits
742
+ */
743
+ isWithinSizeLimit(value, currentSize = 0) {
744
+ const size = _RecursiveJsonTransformer.jsonSize(value);
745
+ return currentSize === 0 ? size <= this.maxSize : size + currentSize <= this.maxSize || currentSize < this.minSize;
746
+ }
747
+ /**
748
+ * Splits arrays into chunks based on size limits
749
+ * Handles nested objects by recursing into handleNestedObject
750
+ */
751
+ handleArray(value, key, currentPath, depth, maxDepth) {
752
+ const path = currentPath.length ? [...currentPath, key] : ["root", key];
753
+ const chunk = this.createChunk(value, path);
754
+ if (this.isWithinSizeLimit(chunk)) {
755
+ return [chunk];
756
+ }
757
+ const chunks = [];
758
+ let currentGroup = [];
759
+ const saveCurrentGroup = () => {
760
+ if (currentGroup.length > 0) {
761
+ const groupChunk = this.createChunk(currentGroup, path);
762
+ if (_RecursiveJsonTransformer.jsonSize(groupChunk) >= this.minSize) {
763
+ chunks.push(groupChunk);
764
+ currentGroup = [];
765
+ }
766
+ }
767
+ };
768
+ for (const item of value) {
769
+ const testGroup = [...currentGroup, item];
770
+ const testChunk = this.createChunk(testGroup, path);
771
+ if (this.isWithinSizeLimit(testChunk)) {
772
+ currentGroup = testGroup;
773
+ continue;
774
+ }
775
+ saveCurrentGroup();
776
+ if (typeof item === "object" && item !== null) {
777
+ const singleItemArray = [item];
778
+ const singleItemChunk = this.createChunk(singleItemArray, path);
779
+ if (this.isWithinSizeLimit(singleItemChunk)) {
780
+ currentGroup = singleItemArray;
781
+ } else {
782
+ const itemPath = [...path, String(chunks.length)];
783
+ const nestedChunks = this.handleNestedObject(item, itemPath, depth + 1, maxDepth);
784
+ chunks.push(...nestedChunks);
785
+ }
786
+ } else {
787
+ currentGroup = [item];
788
+ }
789
+ }
790
+ saveCurrentGroup();
791
+ return chunks;
792
+ }
793
+ /**
794
+ * Splits objects into chunks based on size limits
795
+ * Handles nested arrays and objects by recursing into handleArray and handleNestedObject
796
+ */
797
+ handleNestedObject(value, fullPath, depth, maxDepth) {
798
+ const path = fullPath.length ? fullPath : ["root"];
799
+ if (depth > maxDepth) {
800
+ console.warn(`Maximum depth of ${maxDepth} exceeded, flattening remaining structure`);
801
+ return [this.createChunk(value, path)];
802
+ }
803
+ const wholeChunk = this.createChunk(value, path);
804
+ if (this.isWithinSizeLimit(wholeChunk)) {
805
+ return [wholeChunk];
806
+ }
807
+ const chunks = [];
808
+ let currentChunk = {};
809
+ const saveCurrentChunk = () => {
810
+ if (Object.keys(currentChunk).length > 0) {
811
+ const objChunk = this.createChunk(currentChunk, path);
812
+ if (_RecursiveJsonTransformer.jsonSize(objChunk) >= this.minSize) {
813
+ chunks.push(objChunk);
814
+ currentChunk = {};
815
+ }
816
+ }
817
+ };
818
+ for (const [key, val] of Object.entries(value)) {
819
+ if (val === void 0) continue;
820
+ if (Array.isArray(val)) {
821
+ saveCurrentChunk();
822
+ const arrayChunks = this.handleArray(val, key, path, depth, maxDepth);
823
+ chunks.push(...arrayChunks);
824
+ continue;
825
+ }
826
+ const testChunk = this.createChunk({ ...currentChunk, [key]: val }, path);
827
+ if (this.isWithinSizeLimit(testChunk)) {
828
+ currentChunk[key] = val;
829
+ continue;
830
+ }
831
+ saveCurrentChunk();
832
+ if (typeof val === "object" && val !== null) {
833
+ const nestedChunks = this.handleNestedObject(val, [...path, key], depth + 1, maxDepth);
834
+ chunks.push(...nestedChunks);
835
+ } else {
836
+ currentChunk = { [key]: val };
837
+ }
838
+ }
839
+ saveCurrentChunk();
840
+ return chunks;
841
+ }
842
+ /**
843
+ * Splits long strings into smaller chunks at word boundaries
844
+ * Ensures each chunk is within maxSize limit
845
+ */
846
+ splitLongString(value) {
847
+ const chunks = [];
848
+ let remaining = value;
849
+ while (remaining.length > 0) {
850
+ const overhead = 20;
851
+ const chunkSize = Math.floor(this.maxSize - overhead);
852
+ if (remaining.length <= chunkSize) {
853
+ chunks.push(remaining);
854
+ break;
855
+ }
856
+ const lastSpace = remaining.slice(0, chunkSize).lastIndexOf(" ");
857
+ const splitAt = lastSpace > 0 ? lastSpace + 1 : chunkSize;
858
+ chunks.push(remaining.slice(0, splitAt));
859
+ remaining = remaining.slice(splitAt);
860
+ }
861
+ return chunks;
862
+ }
863
+ /**
864
+ * Core chunking logic that processes JSON data recursively
865
+ * Handles arrays, objects, and primitive values while maintaining structure
698
866
  */
699
867
  jsonSplit({
700
868
  data,
701
869
  currentPath = [],
702
- chunks = [{}]
870
+ chunks = [{}],
871
+ depth = 0,
872
+ maxDepth = 100
703
873
  }) {
704
- if (data && typeof data === "object" && !Array.isArray(data)) {
705
- for (const [key, value] of Object.entries(data)) {
706
- const newPath = [...currentPath, key];
707
- const chunkSize = _RecursiveJsonTransformer.jsonSize(chunks[chunks.length - 1] || {});
708
- const size = _RecursiveJsonTransformer.jsonSize({ [key]: value });
709
- const remaining = this.maxSize - chunkSize;
710
- if (size < remaining) {
711
- _RecursiveJsonTransformer.setNestedDict(chunks[chunks.length - 1] || {}, newPath, value);
712
- } else {
713
- if (chunkSize >= this.minSize) {
714
- chunks.push({});
715
- }
716
- this.jsonSplit({
717
- data: typeof value === "object" ? value : { [key]: value },
718
- currentPath: newPath,
719
- chunks
720
- });
721
- }
722
- }
723
- } else {
874
+ if (!data || typeof data !== "object") {
875
+ return chunks;
876
+ }
877
+ if (depth > maxDepth) {
878
+ console.warn(`Maximum depth of ${maxDepth} exceeded, flattening remaining structure`);
724
879
  _RecursiveJsonTransformer.setNestedDict(chunks[chunks.length - 1] || {}, currentPath, data);
880
+ return chunks;
881
+ }
882
+ let currentChunk = {};
883
+ let accumulatedChunks = chunks;
884
+ for (const [key, value] of Object.entries(data)) {
885
+ const fullPath = [...currentPath, key];
886
+ if (Array.isArray(value)) {
887
+ const arrayChunks = this.handleArray(value, key, currentPath, depth, maxDepth);
888
+ accumulatedChunks = [...accumulatedChunks, ...arrayChunks];
889
+ } else if (typeof value === "object" && value !== null) {
890
+ const objectChunks = this.handleNestedObject(value, fullPath, depth, maxDepth);
891
+ accumulatedChunks = [...accumulatedChunks, ...objectChunks];
892
+ } else {
893
+ const { currentChunk: newCurrentChunk, chunks: newChunks } = this.handlePrimitiveValue(
894
+ value,
895
+ key,
896
+ currentChunk,
897
+ accumulatedChunks,
898
+ fullPath
899
+ );
900
+ currentChunk = newCurrentChunk;
901
+ accumulatedChunks = newChunks;
902
+ }
725
903
  }
726
- return chunks;
904
+ if (Object.keys(currentChunk).length > 0) {
905
+ accumulatedChunks = [...accumulatedChunks, currentChunk];
906
+ }
907
+ return accumulatedChunks.filter((chunk) => Object.keys(chunk).length > 0);
727
908
  }
728
909
  /**
729
910
  * Splits JSON into a list of JSON chunks
@@ -739,12 +920,19 @@ var RecursiveJsonTransformer = class _RecursiveJsonTransformer {
739
920
  }
740
921
  return chunks;
741
922
  }
923
+ /**
924
+ * Converts Unicode characters to their escaped ASCII representation
925
+ * e.g., 'café' becomes 'caf\u00e9'
926
+ */
742
927
  escapeNonAscii(obj) {
743
928
  if (typeof obj === "string") {
744
929
  return obj.replace(/[\u0080-\uffff]/g, (char) => {
745
930
  return `\\u${char.charCodeAt(0).toString(16).padStart(4, "0")}`;
746
931
  });
747
932
  }
933
+ if (Array.isArray(obj)) {
934
+ return obj.map((item) => this.escapeNonAscii(item));
935
+ }
748
936
  if (typeof obj === "object" && obj !== null) {
749
937
  return Object.fromEntries(Object.entries(obj).map(([key, value]) => [key, this.escapeNonAscii(value)]));
750
938
  }
@@ -763,7 +951,14 @@ var RecursiveJsonTransformer = class _RecursiveJsonTransformer {
763
951
  const escapedChunks = chunks.map((chunk) => this.escapeNonAscii(chunk));
764
952
  return escapedChunks.map((chunk) => JSON.stringify(chunk));
765
953
  }
766
- return chunks.map((chunk) => JSON.stringify(chunk));
954
+ return chunks.map(
955
+ (chunk) => JSON.stringify(chunk, (key, value) => {
956
+ if (typeof value === "string") {
957
+ return value.replace(/\\u[\da-f]{4}/gi, (match) => String.fromCharCode(parseInt(match.slice(2), 16)));
958
+ }
959
+ return value;
960
+ })
961
+ );
767
962
  }
768
963
  /**
769
964
  * Create documents from a list of json objects
@@ -1336,7 +1531,10 @@ async function rerank(results, query, model, options) {
1336
1531
  const queryAnalysis = queryEmbedding ? analyzeQueryEmbedding(queryEmbedding) : null;
1337
1532
  const scoredResults = await Promise.all(
1338
1533
  results.map(async (result, index) => {
1339
- const semanticScore = await semanticProvider.getRelevanceScore(query, result?.metadata?.text);
1534
+ let semanticScore = 0;
1535
+ if (result?.metadata?.text) {
1536
+ semanticScore = await semanticProvider.getRelevanceScore(query, result?.metadata?.text);
1537
+ }
1340
1538
  const vectorScore = result.score;
1341
1539
  const positionScore = calculatePositionScore(index, resultLength);
1342
1540
  let finalScore = weights.semantic * semanticScore + weights.vector * vectorScore + weights.position * positionScore;
@@ -1592,7 +1790,7 @@ var vectorQuerySearch = async ({
1592
1790
  vectorStore,
1593
1791
  queryText,
1594
1792
  model,
1595
- queryFilter = {},
1793
+ queryFilter,
1596
1794
  topK,
1597
1795
  includeVectors = false,
1598
1796
  maxRetries = 2
@@ -1736,10 +1934,6 @@ var createVectorQueryTool = ({
1736
1934
  }),
1737
1935
  description: toolDescription,
1738
1936
  execute: async ({ context: { queryText, topK, filter }, mastra }) => {
1739
- console.log({
1740
- topK,
1741
- filter
1742
- });
1743
1937
  const vectorStore = mastra?.vectors?.[vectorStoreName];
1744
1938
  if (vectorStore) {
1745
1939
  let queryFilter = {};