@mastra/rag 0.1.8-alpha.3 → 0.1.8-alpha.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +7 -7
- package/CHANGELOG.md +30 -0
- package/dist/_tsup-dts-rollup.d.cts +37 -2
- package/dist/_tsup-dts-rollup.d.ts +37 -2
- package/dist/index.cjs +225 -31
- package/dist/index.js +225 -31
- package/package.json +2 -2
- package/src/document/document.test.ts +418 -1
- package/src/document/transformers/json.ts +294 -28
- package/src/rerank/index.ts +4 -1
- package/src/tools/vector-query.ts +0 -4
- package/src/utils/vector-search.ts +3 -2
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,23 +1,23 @@
|
|
|
1
1
|
|
|
2
|
-
> @mastra/rag@0.1.8-alpha.
|
|
2
|
+
> @mastra/rag@0.1.8-alpha.5 build /home/runner/work/mastra/mastra/packages/rag
|
|
3
3
|
> tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake
|
|
4
4
|
|
|
5
5
|
[34mCLI[39m Building entry: src/index.ts
|
|
6
6
|
[34mCLI[39m Using tsconfig: tsconfig.json
|
|
7
7
|
[34mCLI[39m tsup v8.3.6
|
|
8
8
|
[34mTSC[39m Build start
|
|
9
|
-
[32mTSC[39m ⚡️ Build success in
|
|
9
|
+
[32mTSC[39m ⚡️ Build success in 23355ms
|
|
10
10
|
[34mDTS[39m Build start
|
|
11
11
|
[34mCLI[39m Target: es2022
|
|
12
12
|
Analysis will use the bundled TypeScript version 5.7.3
|
|
13
13
|
[36mWriting package typings: /home/runner/work/mastra/mastra/packages/rag/dist/_tsup-dts-rollup.d.ts[39m
|
|
14
14
|
Analysis will use the bundled TypeScript version 5.7.3
|
|
15
15
|
[36mWriting package typings: /home/runner/work/mastra/mastra/packages/rag/dist/_tsup-dts-rollup.d.cts[39m
|
|
16
|
-
[32mDTS[39m ⚡️ Build success in
|
|
16
|
+
[32mDTS[39m ⚡️ Build success in 23003ms
|
|
17
17
|
[34mCLI[39m Cleaning output folder
|
|
18
18
|
[34mESM[39m Build start
|
|
19
19
|
[34mCJS[39m Build start
|
|
20
|
-
[
|
|
21
|
-
[
|
|
22
|
-
[
|
|
23
|
-
[
|
|
20
|
+
[32mESM[39m [1mdist/index.js [22m[32m89.94 KB[39m
|
|
21
|
+
[32mESM[39m ⚡️ Build success in 684ms
|
|
22
|
+
[32mCJS[39m [1mdist/index.cjs [22m[32m90.62 KB[39m
|
|
23
|
+
[32mCJS[39m ⚡️ Build success in 685ms
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,35 @@
|
|
|
1
1
|
# @mastra/rag
|
|
2
2
|
|
|
3
|
+
## 0.1.8-alpha.5
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 9e81f35: Fix query filter for vector search and rerank
|
|
8
|
+
- 9e0f2c9: Update JSON Chunking to fix recursive issue
|
|
9
|
+
- Updated dependencies [22643eb]
|
|
10
|
+
- Updated dependencies [6feb23f]
|
|
11
|
+
- Updated dependencies [f2d6727]
|
|
12
|
+
- Updated dependencies [301e4ee]
|
|
13
|
+
- Updated dependencies [dfbe4e9]
|
|
14
|
+
- Updated dependencies [9e81f35]
|
|
15
|
+
- Updated dependencies [caefaa2]
|
|
16
|
+
- Updated dependencies [c151ae6]
|
|
17
|
+
- Updated dependencies [52e0418]
|
|
18
|
+
- Updated dependencies [03236ec]
|
|
19
|
+
- Updated dependencies [3764e71]
|
|
20
|
+
- Updated dependencies [df982db]
|
|
21
|
+
- Updated dependencies [0461849]
|
|
22
|
+
- Updated dependencies [2259379]
|
|
23
|
+
- Updated dependencies [358f069]
|
|
24
|
+
- @mastra/core@0.5.0-alpha.5
|
|
25
|
+
|
|
26
|
+
## 0.1.8-alpha.4
|
|
27
|
+
|
|
28
|
+
### Patch Changes
|
|
29
|
+
|
|
30
|
+
- Updated dependencies [d79aedf]
|
|
31
|
+
- @mastra/core@0.5.0-alpha.4
|
|
32
|
+
|
|
3
33
|
## 0.1.8-alpha.3
|
|
4
34
|
|
|
5
35
|
### Patch Changes
|
|
@@ -12,6 +12,7 @@ import type { TiktokenEncoding } from 'js-tiktoken';
|
|
|
12
12
|
import type { TiktokenModel } from 'js-tiktoken';
|
|
13
13
|
import type { TitleCombinePrompt } from 'llamaindex';
|
|
14
14
|
import type { TitleExtractorPrompt } from 'llamaindex';
|
|
15
|
+
import type { VectorFilter } from '@mastra/core/vector/filter';
|
|
15
16
|
|
|
16
17
|
/**
|
|
17
18
|
* Vector store specific prompts that detail supported operators and examples.
|
|
@@ -404,7 +405,37 @@ export declare class RecursiveJsonTransformer {
|
|
|
404
405
|
*/
|
|
405
406
|
private listToDictPreprocessing;
|
|
406
407
|
/**
|
|
407
|
-
*
|
|
408
|
+
* Handles primitive values (strings, numbers, etc) by either adding them to the current chunk
|
|
409
|
+
* or creating new chunks if they don't fit
|
|
410
|
+
*/
|
|
411
|
+
private handlePrimitiveValue;
|
|
412
|
+
/**
|
|
413
|
+
* Creates a nested dictionary chunk from a value and path
|
|
414
|
+
* e.g., path ['a', 'b'], value 'c' becomes { a: { b: 'c' } }
|
|
415
|
+
*/
|
|
416
|
+
private createChunk;
|
|
417
|
+
/**
|
|
418
|
+
* Checks if value is within size limits
|
|
419
|
+
*/
|
|
420
|
+
private isWithinSizeLimit;
|
|
421
|
+
/**
|
|
422
|
+
* Splits arrays into chunks based on size limits
|
|
423
|
+
* Handles nested objects by recursing into handleNestedObject
|
|
424
|
+
*/
|
|
425
|
+
private handleArray;
|
|
426
|
+
/**
|
|
427
|
+
* Splits objects into chunks based on size limits
|
|
428
|
+
* Handles nested arrays and objects by recursing into handleArray and handleNestedObject
|
|
429
|
+
*/
|
|
430
|
+
private handleNestedObject;
|
|
431
|
+
/**
|
|
432
|
+
* Splits long strings into smaller chunks at word boundaries
|
|
433
|
+
* Ensures each chunk is within maxSize limit
|
|
434
|
+
*/
|
|
435
|
+
private splitLongString;
|
|
436
|
+
/**
|
|
437
|
+
* Core chunking logic that processes JSON data recursively
|
|
438
|
+
* Handles arrays, objects, and primitive values while maintaining structure
|
|
408
439
|
*/
|
|
409
440
|
private jsonSplit;
|
|
410
441
|
/**
|
|
@@ -414,6 +445,10 @@ export declare class RecursiveJsonTransformer {
|
|
|
414
445
|
jsonData: Record<string, any>;
|
|
415
446
|
convertLists?: boolean;
|
|
416
447
|
}): Record<string, any>[];
|
|
448
|
+
/**
|
|
449
|
+
* Converts Unicode characters to their escaped ASCII representation
|
|
450
|
+
* e.g., 'café' becomes 'caf\u00e9'
|
|
451
|
+
*/
|
|
417
452
|
private escapeNonAscii;
|
|
418
453
|
/**
|
|
419
454
|
* Splits JSON into a list of JSON formatted strings
|
|
@@ -600,7 +635,7 @@ declare interface VectorQuerySearchParams {
|
|
|
600
635
|
vectorStore: MastraVector;
|
|
601
636
|
queryText: string;
|
|
602
637
|
model: EmbeddingModel<string>;
|
|
603
|
-
queryFilter?:
|
|
638
|
+
queryFilter?: VectorFilter;
|
|
604
639
|
topK: number;
|
|
605
640
|
includeVectors?: boolean;
|
|
606
641
|
maxRetries?: number;
|
|
@@ -12,6 +12,7 @@ import type { TiktokenEncoding } from 'js-tiktoken';
|
|
|
12
12
|
import type { TiktokenModel } from 'js-tiktoken';
|
|
13
13
|
import type { TitleCombinePrompt } from 'llamaindex';
|
|
14
14
|
import type { TitleExtractorPrompt } from 'llamaindex';
|
|
15
|
+
import type { VectorFilter } from '@mastra/core/vector/filter';
|
|
15
16
|
|
|
16
17
|
/**
|
|
17
18
|
* Vector store specific prompts that detail supported operators and examples.
|
|
@@ -404,7 +405,37 @@ export declare class RecursiveJsonTransformer {
|
|
|
404
405
|
*/
|
|
405
406
|
private listToDictPreprocessing;
|
|
406
407
|
/**
|
|
407
|
-
*
|
|
408
|
+
* Handles primitive values (strings, numbers, etc) by either adding them to the current chunk
|
|
409
|
+
* or creating new chunks if they don't fit
|
|
410
|
+
*/
|
|
411
|
+
private handlePrimitiveValue;
|
|
412
|
+
/**
|
|
413
|
+
* Creates a nested dictionary chunk from a value and path
|
|
414
|
+
* e.g., path ['a', 'b'], value 'c' becomes { a: { b: 'c' } }
|
|
415
|
+
*/
|
|
416
|
+
private createChunk;
|
|
417
|
+
/**
|
|
418
|
+
* Checks if value is within size limits
|
|
419
|
+
*/
|
|
420
|
+
private isWithinSizeLimit;
|
|
421
|
+
/**
|
|
422
|
+
* Splits arrays into chunks based on size limits
|
|
423
|
+
* Handles nested objects by recursing into handleNestedObject
|
|
424
|
+
*/
|
|
425
|
+
private handleArray;
|
|
426
|
+
/**
|
|
427
|
+
* Splits objects into chunks based on size limits
|
|
428
|
+
* Handles nested arrays and objects by recursing into handleArray and handleNestedObject
|
|
429
|
+
*/
|
|
430
|
+
private handleNestedObject;
|
|
431
|
+
/**
|
|
432
|
+
* Splits long strings into smaller chunks at word boundaries
|
|
433
|
+
* Ensures each chunk is within maxSize limit
|
|
434
|
+
*/
|
|
435
|
+
private splitLongString;
|
|
436
|
+
/**
|
|
437
|
+
* Core chunking logic that processes JSON data recursively
|
|
438
|
+
* Handles arrays, objects, and primitive values while maintaining structure
|
|
408
439
|
*/
|
|
409
440
|
private jsonSplit;
|
|
410
441
|
/**
|
|
@@ -414,6 +445,10 @@ export declare class RecursiveJsonTransformer {
|
|
|
414
445
|
jsonData: Record<string, any>;
|
|
415
446
|
convertLists?: boolean;
|
|
416
447
|
}): Record<string, any>[];
|
|
448
|
+
/**
|
|
449
|
+
* Converts Unicode characters to their escaped ASCII representation
|
|
450
|
+
* e.g., 'café' becomes 'caf\u00e9'
|
|
451
|
+
*/
|
|
417
452
|
private escapeNonAscii;
|
|
418
453
|
/**
|
|
419
454
|
* Splits JSON into a list of JSON formatted strings
|
|
@@ -600,7 +635,7 @@ declare interface VectorQuerySearchParams {
|
|
|
600
635
|
vectorStore: MastraVector;
|
|
601
636
|
queryText: string;
|
|
602
637
|
model: EmbeddingModel<string>;
|
|
603
|
-
queryFilter?:
|
|
638
|
+
queryFilter?: VectorFilter;
|
|
604
639
|
topK: number;
|
|
605
640
|
includeVectors?: boolean;
|
|
606
641
|
maxRetries?: number;
|
package/dist/index.cjs
CHANGED
|
@@ -638,7 +638,8 @@ var RecursiveJsonTransformer = class _RecursiveJsonTransformer {
|
|
|
638
638
|
return safeObj;
|
|
639
639
|
}
|
|
640
640
|
const stringifiable = getStringifiableData(data);
|
|
641
|
-
|
|
641
|
+
const jsonString = JSON.stringify(stringifiable);
|
|
642
|
+
return jsonString.length;
|
|
642
643
|
}
|
|
643
644
|
/**
|
|
644
645
|
* Transform JSON data while handling circular references
|
|
@@ -694,36 +695,216 @@ var RecursiveJsonTransformer = class _RecursiveJsonTransformer {
|
|
|
694
695
|
return data;
|
|
695
696
|
}
|
|
696
697
|
/**
|
|
697
|
-
*
|
|
698
|
+
* Handles primitive values (strings, numbers, etc) by either adding them to the current chunk
|
|
699
|
+
* or creating new chunks if they don't fit
|
|
700
|
+
*/
|
|
701
|
+
handlePrimitiveValue(value, key, currentChunk, chunks, fullPath) {
|
|
702
|
+
const testValue = { [key]: value };
|
|
703
|
+
if (_RecursiveJsonTransformer.jsonSize(testValue) <= this.maxSize) {
|
|
704
|
+
if (_RecursiveJsonTransformer.jsonSize({ ...currentChunk, ...testValue }) <= this.maxSize) {
|
|
705
|
+
return {
|
|
706
|
+
currentChunk: { ...currentChunk, ...testValue },
|
|
707
|
+
chunks
|
|
708
|
+
};
|
|
709
|
+
} else {
|
|
710
|
+
return {
|
|
711
|
+
currentChunk: testValue,
|
|
712
|
+
chunks: [...chunks, currentChunk]
|
|
713
|
+
};
|
|
714
|
+
}
|
|
715
|
+
} else if (typeof value === "string") {
|
|
716
|
+
const stringChunks = this.splitLongString(value);
|
|
717
|
+
const newChunks = stringChunks.map((chunk) => {
|
|
718
|
+
return this.createChunk(chunk, fullPath);
|
|
719
|
+
}).filter((chunk) => _RecursiveJsonTransformer.jsonSize(chunk) <= this.maxSize);
|
|
720
|
+
return {
|
|
721
|
+
currentChunk,
|
|
722
|
+
chunks: [...chunks, ...newChunks]
|
|
723
|
+
};
|
|
724
|
+
}
|
|
725
|
+
const newChunk = this.createChunk(value, fullPath);
|
|
726
|
+
return {
|
|
727
|
+
currentChunk,
|
|
728
|
+
chunks: _RecursiveJsonTransformer.jsonSize(newChunk) <= this.maxSize ? [...chunks, newChunk] : chunks
|
|
729
|
+
};
|
|
730
|
+
}
|
|
731
|
+
/**
|
|
732
|
+
* Creates a nested dictionary chunk from a value and path
|
|
733
|
+
* e.g., path ['a', 'b'], value 'c' becomes { a: { b: 'c' } }
|
|
734
|
+
*/
|
|
735
|
+
createChunk(value, path) {
|
|
736
|
+
const chunk = {};
|
|
737
|
+
_RecursiveJsonTransformer.setNestedDict(chunk, path, value);
|
|
738
|
+
return chunk.root ? chunk.root : chunk;
|
|
739
|
+
}
|
|
740
|
+
/**
|
|
741
|
+
* Checks if value is within size limits
|
|
742
|
+
*/
|
|
743
|
+
isWithinSizeLimit(value, currentSize = 0) {
|
|
744
|
+
const size = _RecursiveJsonTransformer.jsonSize(value);
|
|
745
|
+
return currentSize === 0 ? size <= this.maxSize : size + currentSize <= this.maxSize || currentSize < this.minSize;
|
|
746
|
+
}
|
|
747
|
+
/**
|
|
748
|
+
* Splits arrays into chunks based on size limits
|
|
749
|
+
* Handles nested objects by recursing into handleNestedObject
|
|
750
|
+
*/
|
|
751
|
+
handleArray(value, key, currentPath, depth, maxDepth) {
|
|
752
|
+
const path = currentPath.length ? [...currentPath, key] : ["root", key];
|
|
753
|
+
const chunk = this.createChunk(value, path);
|
|
754
|
+
if (this.isWithinSizeLimit(chunk)) {
|
|
755
|
+
return [chunk];
|
|
756
|
+
}
|
|
757
|
+
const chunks = [];
|
|
758
|
+
let currentGroup = [];
|
|
759
|
+
const saveCurrentGroup = () => {
|
|
760
|
+
if (currentGroup.length > 0) {
|
|
761
|
+
const groupChunk = this.createChunk(currentGroup, path);
|
|
762
|
+
if (_RecursiveJsonTransformer.jsonSize(groupChunk) >= this.minSize) {
|
|
763
|
+
chunks.push(groupChunk);
|
|
764
|
+
currentGroup = [];
|
|
765
|
+
}
|
|
766
|
+
}
|
|
767
|
+
};
|
|
768
|
+
for (const item of value) {
|
|
769
|
+
const testGroup = [...currentGroup, item];
|
|
770
|
+
const testChunk = this.createChunk(testGroup, path);
|
|
771
|
+
if (this.isWithinSizeLimit(testChunk)) {
|
|
772
|
+
currentGroup = testGroup;
|
|
773
|
+
continue;
|
|
774
|
+
}
|
|
775
|
+
saveCurrentGroup();
|
|
776
|
+
if (typeof item === "object" && item !== null) {
|
|
777
|
+
const singleItemArray = [item];
|
|
778
|
+
const singleItemChunk = this.createChunk(singleItemArray, path);
|
|
779
|
+
if (this.isWithinSizeLimit(singleItemChunk)) {
|
|
780
|
+
currentGroup = singleItemArray;
|
|
781
|
+
} else {
|
|
782
|
+
const itemPath = [...path, String(chunks.length)];
|
|
783
|
+
const nestedChunks = this.handleNestedObject(item, itemPath, depth + 1, maxDepth);
|
|
784
|
+
chunks.push(...nestedChunks);
|
|
785
|
+
}
|
|
786
|
+
} else {
|
|
787
|
+
currentGroup = [item];
|
|
788
|
+
}
|
|
789
|
+
}
|
|
790
|
+
saveCurrentGroup();
|
|
791
|
+
return chunks;
|
|
792
|
+
}
|
|
793
|
+
/**
|
|
794
|
+
* Splits objects into chunks based on size limits
|
|
795
|
+
* Handles nested arrays and objects by recursing into handleArray and handleNestedObject
|
|
796
|
+
*/
|
|
797
|
+
handleNestedObject(value, fullPath, depth, maxDepth) {
|
|
798
|
+
const path = fullPath.length ? fullPath : ["root"];
|
|
799
|
+
if (depth > maxDepth) {
|
|
800
|
+
console.warn(`Maximum depth of ${maxDepth} exceeded, flattening remaining structure`);
|
|
801
|
+
return [this.createChunk(value, path)];
|
|
802
|
+
}
|
|
803
|
+
const wholeChunk = this.createChunk(value, path);
|
|
804
|
+
if (this.isWithinSizeLimit(wholeChunk)) {
|
|
805
|
+
return [wholeChunk];
|
|
806
|
+
}
|
|
807
|
+
const chunks = [];
|
|
808
|
+
let currentChunk = {};
|
|
809
|
+
const saveCurrentChunk = () => {
|
|
810
|
+
if (Object.keys(currentChunk).length > 0) {
|
|
811
|
+
const objChunk = this.createChunk(currentChunk, path);
|
|
812
|
+
if (_RecursiveJsonTransformer.jsonSize(objChunk) >= this.minSize) {
|
|
813
|
+
chunks.push(objChunk);
|
|
814
|
+
currentChunk = {};
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
};
|
|
818
|
+
for (const [key, val] of Object.entries(value)) {
|
|
819
|
+
if (val === void 0) continue;
|
|
820
|
+
if (Array.isArray(val)) {
|
|
821
|
+
saveCurrentChunk();
|
|
822
|
+
const arrayChunks = this.handleArray(val, key, path, depth, maxDepth);
|
|
823
|
+
chunks.push(...arrayChunks);
|
|
824
|
+
continue;
|
|
825
|
+
}
|
|
826
|
+
const testChunk = this.createChunk({ ...currentChunk, [key]: val }, path);
|
|
827
|
+
if (this.isWithinSizeLimit(testChunk)) {
|
|
828
|
+
currentChunk[key] = val;
|
|
829
|
+
continue;
|
|
830
|
+
}
|
|
831
|
+
saveCurrentChunk();
|
|
832
|
+
if (typeof val === "object" && val !== null) {
|
|
833
|
+
const nestedChunks = this.handleNestedObject(val, [...path, key], depth + 1, maxDepth);
|
|
834
|
+
chunks.push(...nestedChunks);
|
|
835
|
+
} else {
|
|
836
|
+
currentChunk = { [key]: val };
|
|
837
|
+
}
|
|
838
|
+
}
|
|
839
|
+
saveCurrentChunk();
|
|
840
|
+
return chunks;
|
|
841
|
+
}
|
|
842
|
+
/**
|
|
843
|
+
* Splits long strings into smaller chunks at word boundaries
|
|
844
|
+
* Ensures each chunk is within maxSize limit
|
|
845
|
+
*/
|
|
846
|
+
splitLongString(value) {
|
|
847
|
+
const chunks = [];
|
|
848
|
+
let remaining = value;
|
|
849
|
+
while (remaining.length > 0) {
|
|
850
|
+
const overhead = 20;
|
|
851
|
+
const chunkSize = Math.floor(this.maxSize - overhead);
|
|
852
|
+
if (remaining.length <= chunkSize) {
|
|
853
|
+
chunks.push(remaining);
|
|
854
|
+
break;
|
|
855
|
+
}
|
|
856
|
+
const lastSpace = remaining.slice(0, chunkSize).lastIndexOf(" ");
|
|
857
|
+
const splitAt = lastSpace > 0 ? lastSpace + 1 : chunkSize;
|
|
858
|
+
chunks.push(remaining.slice(0, splitAt));
|
|
859
|
+
remaining = remaining.slice(splitAt);
|
|
860
|
+
}
|
|
861
|
+
return chunks;
|
|
862
|
+
}
|
|
863
|
+
/**
|
|
864
|
+
* Core chunking logic that processes JSON data recursively
|
|
865
|
+
* Handles arrays, objects, and primitive values while maintaining structure
|
|
698
866
|
*/
|
|
699
867
|
jsonSplit({
|
|
700
868
|
data,
|
|
701
869
|
currentPath = [],
|
|
702
|
-
chunks = [{}]
|
|
870
|
+
chunks = [{}],
|
|
871
|
+
depth = 0,
|
|
872
|
+
maxDepth = 100
|
|
703
873
|
}) {
|
|
704
|
-
if (data
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
const remaining = this.maxSize - chunkSize;
|
|
710
|
-
if (size < remaining) {
|
|
711
|
-
_RecursiveJsonTransformer.setNestedDict(chunks[chunks.length - 1] || {}, newPath, value);
|
|
712
|
-
} else {
|
|
713
|
-
if (chunkSize >= this.minSize) {
|
|
714
|
-
chunks.push({});
|
|
715
|
-
}
|
|
716
|
-
this.jsonSplit({
|
|
717
|
-
data: typeof value === "object" ? value : { [key]: value },
|
|
718
|
-
currentPath: newPath,
|
|
719
|
-
chunks
|
|
720
|
-
});
|
|
721
|
-
}
|
|
722
|
-
}
|
|
723
|
-
} else {
|
|
874
|
+
if (!data || typeof data !== "object") {
|
|
875
|
+
return chunks;
|
|
876
|
+
}
|
|
877
|
+
if (depth > maxDepth) {
|
|
878
|
+
console.warn(`Maximum depth of ${maxDepth} exceeded, flattening remaining structure`);
|
|
724
879
|
_RecursiveJsonTransformer.setNestedDict(chunks[chunks.length - 1] || {}, currentPath, data);
|
|
880
|
+
return chunks;
|
|
881
|
+
}
|
|
882
|
+
let currentChunk = {};
|
|
883
|
+
let accumulatedChunks = chunks;
|
|
884
|
+
for (const [key, value] of Object.entries(data)) {
|
|
885
|
+
const fullPath = [...currentPath, key];
|
|
886
|
+
if (Array.isArray(value)) {
|
|
887
|
+
const arrayChunks = this.handleArray(value, key, currentPath, depth, maxDepth);
|
|
888
|
+
accumulatedChunks = [...accumulatedChunks, ...arrayChunks];
|
|
889
|
+
} else if (typeof value === "object" && value !== null) {
|
|
890
|
+
const objectChunks = this.handleNestedObject(value, fullPath, depth, maxDepth);
|
|
891
|
+
accumulatedChunks = [...accumulatedChunks, ...objectChunks];
|
|
892
|
+
} else {
|
|
893
|
+
const { currentChunk: newCurrentChunk, chunks: newChunks } = this.handlePrimitiveValue(
|
|
894
|
+
value,
|
|
895
|
+
key,
|
|
896
|
+
currentChunk,
|
|
897
|
+
accumulatedChunks,
|
|
898
|
+
fullPath
|
|
899
|
+
);
|
|
900
|
+
currentChunk = newCurrentChunk;
|
|
901
|
+
accumulatedChunks = newChunks;
|
|
902
|
+
}
|
|
725
903
|
}
|
|
726
|
-
|
|
904
|
+
if (Object.keys(currentChunk).length > 0) {
|
|
905
|
+
accumulatedChunks = [...accumulatedChunks, currentChunk];
|
|
906
|
+
}
|
|
907
|
+
return accumulatedChunks.filter((chunk) => Object.keys(chunk).length > 0);
|
|
727
908
|
}
|
|
728
909
|
/**
|
|
729
910
|
* Splits JSON into a list of JSON chunks
|
|
@@ -739,12 +920,19 @@ var RecursiveJsonTransformer = class _RecursiveJsonTransformer {
|
|
|
739
920
|
}
|
|
740
921
|
return chunks;
|
|
741
922
|
}
|
|
923
|
+
/**
|
|
924
|
+
* Converts Unicode characters to their escaped ASCII representation
|
|
925
|
+
* e.g., 'café' becomes 'caf\u00e9'
|
|
926
|
+
*/
|
|
742
927
|
escapeNonAscii(obj) {
|
|
743
928
|
if (typeof obj === "string") {
|
|
744
929
|
return obj.replace(/[\u0080-\uffff]/g, (char) => {
|
|
745
930
|
return `\\u${char.charCodeAt(0).toString(16).padStart(4, "0")}`;
|
|
746
931
|
});
|
|
747
932
|
}
|
|
933
|
+
if (Array.isArray(obj)) {
|
|
934
|
+
return obj.map((item) => this.escapeNonAscii(item));
|
|
935
|
+
}
|
|
748
936
|
if (typeof obj === "object" && obj !== null) {
|
|
749
937
|
return Object.fromEntries(Object.entries(obj).map(([key, value]) => [key, this.escapeNonAscii(value)]));
|
|
750
938
|
}
|
|
@@ -763,7 +951,14 @@ var RecursiveJsonTransformer = class _RecursiveJsonTransformer {
|
|
|
763
951
|
const escapedChunks = chunks.map((chunk) => this.escapeNonAscii(chunk));
|
|
764
952
|
return escapedChunks.map((chunk) => JSON.stringify(chunk));
|
|
765
953
|
}
|
|
766
|
-
return chunks.map(
|
|
954
|
+
return chunks.map(
|
|
955
|
+
(chunk) => JSON.stringify(chunk, (key, value) => {
|
|
956
|
+
if (typeof value === "string") {
|
|
957
|
+
return value.replace(/\\u[\da-f]{4}/gi, (match) => String.fromCharCode(parseInt(match.slice(2), 16)));
|
|
958
|
+
}
|
|
959
|
+
return value;
|
|
960
|
+
})
|
|
961
|
+
);
|
|
767
962
|
}
|
|
768
963
|
/**
|
|
769
964
|
* Create documents from a list of json objects
|
|
@@ -1336,7 +1531,10 @@ async function rerank(results, query, model, options) {
|
|
|
1336
1531
|
const queryAnalysis = queryEmbedding ? analyzeQueryEmbedding(queryEmbedding) : null;
|
|
1337
1532
|
const scoredResults = await Promise.all(
|
|
1338
1533
|
results.map(async (result, index) => {
|
|
1339
|
-
|
|
1534
|
+
let semanticScore = 0;
|
|
1535
|
+
if (result?.metadata?.text) {
|
|
1536
|
+
semanticScore = await semanticProvider.getRelevanceScore(query, result?.metadata?.text);
|
|
1537
|
+
}
|
|
1340
1538
|
const vectorScore = result.score;
|
|
1341
1539
|
const positionScore = calculatePositionScore(index, resultLength);
|
|
1342
1540
|
let finalScore = weights.semantic * semanticScore + weights.vector * vectorScore + weights.position * positionScore;
|
|
@@ -1592,7 +1790,7 @@ var vectorQuerySearch = async ({
|
|
|
1592
1790
|
vectorStore,
|
|
1593
1791
|
queryText,
|
|
1594
1792
|
model,
|
|
1595
|
-
queryFilter
|
|
1793
|
+
queryFilter,
|
|
1596
1794
|
topK,
|
|
1597
1795
|
includeVectors = false,
|
|
1598
1796
|
maxRetries = 2
|
|
@@ -1736,10 +1934,6 @@ var createVectorQueryTool = ({
|
|
|
1736
1934
|
}),
|
|
1737
1935
|
description: toolDescription,
|
|
1738
1936
|
execute: async ({ context: { queryText, topK, filter }, mastra }) => {
|
|
1739
|
-
console.log({
|
|
1740
|
-
topK,
|
|
1741
|
-
filter
|
|
1742
|
-
});
|
|
1743
1937
|
const vectorStore = mastra?.vectors?.[vectorStoreName];
|
|
1744
1938
|
if (vectorStore) {
|
|
1745
1939
|
let queryFilter = {};
|