@mastra/rag 1.2.2 → 1.2.3-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/CHANGELOG.md +22 -0
  2. package/dist/index.cjs +25 -9
  3. package/dist/index.cjs.map +1 -1
  4. package/dist/index.js +25 -9
  5. package/dist/index.js.map +1 -1
  6. package/dist/tools/graph-rag.d.ts.map +1 -1
  7. package/dist/tools/types.d.ts +18 -5
  8. package/dist/tools/types.d.ts.map +1 -1
  9. package/dist/tools/vector-query.d.ts.map +1 -1
  10. package/dist/utils/vector-search.d.ts +6 -7
  11. package/dist/utils/vector-search.d.ts.map +1 -1
  12. package/package.json +19 -6
  13. package/.turbo/turbo-build.log +0 -4
  14. package/docker-compose.yaml +0 -22
  15. package/eslint.config.js +0 -6
  16. package/src/document/document.test.ts +0 -2975
  17. package/src/document/document.ts +0 -335
  18. package/src/document/extractors/base.ts +0 -30
  19. package/src/document/extractors/index.ts +0 -5
  20. package/src/document/extractors/keywords.test.ts +0 -125
  21. package/src/document/extractors/keywords.ts +0 -126
  22. package/src/document/extractors/questions.test.ts +0 -120
  23. package/src/document/extractors/questions.ts +0 -111
  24. package/src/document/extractors/summary.test.ts +0 -107
  25. package/src/document/extractors/summary.ts +0 -122
  26. package/src/document/extractors/title.test.ts +0 -121
  27. package/src/document/extractors/title.ts +0 -185
  28. package/src/document/extractors/types.ts +0 -40
  29. package/src/document/index.ts +0 -2
  30. package/src/document/prompts/base.ts +0 -77
  31. package/src/document/prompts/format.ts +0 -9
  32. package/src/document/prompts/index.ts +0 -15
  33. package/src/document/prompts/prompt.ts +0 -60
  34. package/src/document/prompts/types.ts +0 -29
  35. package/src/document/schema/index.ts +0 -3
  36. package/src/document/schema/node.ts +0 -187
  37. package/src/document/schema/types.ts +0 -40
  38. package/src/document/transformers/character.ts +0 -267
  39. package/src/document/transformers/html.ts +0 -346
  40. package/src/document/transformers/json.ts +0 -536
  41. package/src/document/transformers/latex.ts +0 -11
  42. package/src/document/transformers/markdown.ts +0 -239
  43. package/src/document/transformers/semantic-markdown.ts +0 -227
  44. package/src/document/transformers/sentence.ts +0 -314
  45. package/src/document/transformers/text.ts +0 -158
  46. package/src/document/transformers/token.ts +0 -137
  47. package/src/document/transformers/transformer.ts +0 -5
  48. package/src/document/types.ts +0 -145
  49. package/src/document/validation.ts +0 -158
  50. package/src/graph-rag/index.test.ts +0 -235
  51. package/src/graph-rag/index.ts +0 -306
  52. package/src/index.ts +0 -8
  53. package/src/rerank/index.test.ts +0 -150
  54. package/src/rerank/index.ts +0 -198
  55. package/src/rerank/relevance/cohere/index.ts +0 -56
  56. package/src/rerank/relevance/index.ts +0 -3
  57. package/src/rerank/relevance/mastra-agent/index.ts +0 -32
  58. package/src/rerank/relevance/zeroentropy/index.ts +0 -26
  59. package/src/tools/README.md +0 -153
  60. package/src/tools/document-chunker.ts +0 -34
  61. package/src/tools/graph-rag.test.ts +0 -115
  62. package/src/tools/graph-rag.ts +0 -154
  63. package/src/tools/index.ts +0 -3
  64. package/src/tools/types.ts +0 -110
  65. package/src/tools/vector-query-database-config.test.ts +0 -190
  66. package/src/tools/vector-query.test.ts +0 -418
  67. package/src/tools/vector-query.ts +0 -169
  68. package/src/utils/convert-sources.ts +0 -43
  69. package/src/utils/default-settings.ts +0 -38
  70. package/src/utils/index.ts +0 -3
  71. package/src/utils/tool-schemas.ts +0 -38
  72. package/src/utils/vector-prompts.ts +0 -832
  73. package/src/utils/vector-search.ts +0 -117
  74. package/tsconfig.build.json +0 -9
  75. package/tsconfig.json +0 -5
  76. package/tsup.config.ts +0 -17
  77. package/vitest.config.ts +0 -8
@@ -1,536 +0,0 @@
1
- import { Document } from '../schema';
2
- import type { JsonChunkOptions } from '../types';
3
-
4
- export class RecursiveJsonTransformer {
5
- private maxSize: number;
6
- private minSize: number;
7
- private ensureAscii: boolean;
8
- private convertLists: boolean;
9
-
10
- constructor({ maxSize = 2000, minSize, ensureAscii = false, convertLists = true }: JsonChunkOptions) {
11
- this.maxSize = maxSize;
12
- this.minSize = minSize ?? Math.max(maxSize - 200, 50);
13
- this.ensureAscii = ensureAscii;
14
- this.convertLists = convertLists;
15
- }
16
-
17
- private static jsonSize(data: Record<string, any>): number {
18
- const seen = new WeakSet();
19
-
20
- function getStringifiableData(obj: any): any {
21
- if (obj === null || typeof obj !== 'object') {
22
- return obj;
23
- }
24
-
25
- if (seen.has(obj)) {
26
- return '[Circular]';
27
- }
28
-
29
- seen.add(obj);
30
-
31
- if (Array.isArray(obj)) {
32
- const safeArray = [];
33
- for (const item of obj) {
34
- safeArray.push(getStringifiableData(item));
35
- }
36
- return safeArray;
37
- }
38
-
39
- const safeObj: Record<string, any> = {};
40
- for (const key in obj) {
41
- if (Object.prototype.hasOwnProperty.call(obj, key)) {
42
- safeObj[key] = getStringifiableData(obj[key]);
43
- }
44
- }
45
- return safeObj;
46
- }
47
-
48
- const stringifiable = getStringifiableData(data);
49
- const jsonString = JSON.stringify(stringifiable);
50
- return jsonString.length;
51
- }
52
-
53
- /**
54
- * Transform JSON data while handling circular references
55
- */
56
- public transform(data: Record<string, any>): Record<string, any> {
57
- const size = RecursiveJsonTransformer.jsonSize(data);
58
-
59
- const seen = new WeakSet();
60
-
61
- function createSafeCopy(obj: any): any {
62
- if (obj === null || typeof obj !== 'object') {
63
- return obj;
64
- }
65
-
66
- if (seen.has(obj)) {
67
- return '[Circular]';
68
- }
69
-
70
- seen.add(obj);
71
-
72
- if (Array.isArray(obj)) {
73
- return obj.map(item => createSafeCopy(item));
74
- }
75
-
76
- const copy: Record<string, any> = {};
77
- for (const key in obj) {
78
- if (Object.prototype.hasOwnProperty.call(obj, key)) {
79
- copy[key] = createSafeCopy(obj[key]);
80
- }
81
- }
82
- return copy;
83
- }
84
-
85
- return {
86
- size,
87
- data: createSafeCopy(data),
88
- };
89
- }
90
-
91
- /**
92
- * Set a value in a nested dictionary based on the given path
93
- */
94
- private static setNestedDict(d: Record<string, any>, path: string[], value: any): void {
95
- let current = d;
96
- for (const key of path.slice(0, -1)) {
97
- current[key] = current[key] || {};
98
- current = current[key];
99
- }
100
- current[path[path.length - 1]!] = value;
101
- }
102
-
103
- /**
104
- * Convert lists in the JSON structure to dictionaries with index-based keys
105
- */
106
- private listToDictPreprocessing(data: any): any {
107
- if (data && typeof data === 'object') {
108
- if (Array.isArray(data)) {
109
- return Object.fromEntries(data.map((item, index) => [String(index), this.listToDictPreprocessing(item)]));
110
- }
111
- return Object.fromEntries(Object.entries(data).map(([k, v]) => [k, this.listToDictPreprocessing(v)]));
112
- }
113
- return data;
114
- }
115
-
116
- /**
117
- * Handles primitive values (strings, numbers, etc) by either adding them to the current chunk
118
- * or creating new chunks if they don't fit
119
- */
120
- private handlePrimitiveValue(
121
- value: any,
122
- key: string,
123
- currentChunk: Record<string, any>,
124
- chunks: Record<string, any>[],
125
- fullPath: string[],
126
- ): { currentChunk: Record<string, any>; chunks: Record<string, any>[] } {
127
- const testValue = { [key]: value };
128
-
129
- if (RecursiveJsonTransformer.jsonSize(testValue) <= this.maxSize) {
130
- if (RecursiveJsonTransformer.jsonSize({ ...currentChunk, ...testValue }) <= this.maxSize) {
131
- return {
132
- currentChunk: { ...currentChunk, ...testValue },
133
- chunks,
134
- };
135
- } else {
136
- return {
137
- currentChunk: testValue,
138
- chunks: [...chunks, currentChunk],
139
- };
140
- }
141
- } else if (typeof value === 'string') {
142
- const stringChunks = this.splitLongString(value);
143
- const newChunks = stringChunks
144
- .map(chunk => {
145
- return this.createChunk(chunk, fullPath);
146
- })
147
- .filter(chunk => RecursiveJsonTransformer.jsonSize(chunk) <= this.maxSize);
148
-
149
- return {
150
- currentChunk,
151
- chunks: [...chunks, ...newChunks],
152
- };
153
- }
154
-
155
- const newChunk = this.createChunk(value, fullPath);
156
- return {
157
- currentChunk,
158
- chunks: RecursiveJsonTransformer.jsonSize(newChunk) <= this.maxSize ? [...chunks, newChunk] : chunks,
159
- };
160
- }
161
-
162
- /**
163
- * Creates a nested dictionary chunk from a value and path
164
- * e.g., path ['a', 'b'], value 'c' becomes { a: { b: 'c' } }
165
- */
166
- private createChunk(value: any, path: string[]): Record<string, any> {
167
- const chunk: Record<string, any> = {};
168
- RecursiveJsonTransformer.setNestedDict(chunk, path, value);
169
- return chunk.root ? chunk.root : chunk;
170
- }
171
-
172
- /**
173
- * Checks if value is within size limits
174
- */
175
- private isWithinSizeLimit(value: any, currentSize: number = 0): boolean {
176
- const size = RecursiveJsonTransformer.jsonSize(value);
177
- // If this is a new chunk (currentSize = 0), allow items smaller than maxSize
178
- // If adding to existing chunk, ensure total size doesn't exceed maxSize
179
- return currentSize === 0 ? size <= this.maxSize : size + currentSize <= this.maxSize;
180
- }
181
-
182
- /**
183
- * Splits arrays into chunks based on size limits
184
- * Handles nested objects by recursing into handleNestedObject
185
- */
186
- private handleArray(
187
- value: any[],
188
- key: string,
189
- currentPath: string[],
190
- depth: number,
191
- maxDepth: number,
192
- ): Record<string, any>[] {
193
- const path = currentPath.length ? [...currentPath, key] : ['root', key];
194
-
195
- // Try keeping array intact
196
- const chunk = this.createChunk(value, path);
197
- if (this.isWithinSizeLimit(chunk)) {
198
- return [chunk];
199
- }
200
-
201
- const chunks: Record<string, any>[] = [];
202
- let currentGroup: any[] = [];
203
-
204
- const saveCurrentGroup = () => {
205
- if (currentGroup.length > 0) {
206
- const groupChunk = this.createChunk(currentGroup, path);
207
- if (RecursiveJsonTransformer.jsonSize(groupChunk) >= this.minSize) {
208
- chunks.push(groupChunk);
209
- currentGroup = [];
210
- }
211
- }
212
- };
213
-
214
- for (const item of value) {
215
- // Try adding item to current group
216
- const testGroup = [...currentGroup, item];
217
- const testChunk = this.createChunk(testGroup, path);
218
-
219
- if (this.isWithinSizeLimit(testChunk)) {
220
- currentGroup = testGroup;
221
- continue;
222
- }
223
-
224
- // Current group is full
225
- saveCurrentGroup();
226
-
227
- // Handle the new item
228
- if (typeof item === 'object' && item !== null) {
229
- const singleItemArray = [item];
230
- const singleItemChunk = this.createChunk(singleItemArray, path);
231
-
232
- if (this.isWithinSizeLimit(singleItemChunk)) {
233
- currentGroup = singleItemArray;
234
- } else {
235
- const itemPath = [...path, String(chunks.length)];
236
- const nestedChunks = this.handleNestedObject(item, itemPath, depth + 1, maxDepth);
237
- chunks.push(...nestedChunks);
238
- }
239
- } else {
240
- currentGroup = [item];
241
- }
242
- }
243
-
244
- saveCurrentGroup();
245
- return chunks;
246
- }
247
-
248
- /**
249
- * Splits objects into chunks based on size limits
250
- * Handles nested arrays and objects by recursing into handleArray and handleNestedObject
251
- */
252
- private handleNestedObject(
253
- value: Record<string, any>,
254
- fullPath: string[],
255
- depth: number,
256
- maxDepth: number,
257
- ): Record<string, any>[] {
258
- const path = fullPath.length ? fullPath : ['root'];
259
-
260
- // Handle max depth
261
- if (depth > maxDepth) {
262
- console.warn(`Maximum depth of ${maxDepth} exceeded, flattening remaining structure`);
263
- return [this.createChunk(value, path)];
264
- }
265
-
266
- // Try keeping object intact
267
- const wholeChunk = this.createChunk(value, path);
268
- if (this.isWithinSizeLimit(wholeChunk)) {
269
- return [wholeChunk];
270
- }
271
-
272
- const chunks: Record<string, any>[] = [];
273
- let currentChunk: Record<string, any> = {};
274
-
275
- const saveCurrentChunk = () => {
276
- if (Object.keys(currentChunk).length > 0) {
277
- const objChunk = this.createChunk(currentChunk, path);
278
- if (RecursiveJsonTransformer.jsonSize(objChunk) >= this.minSize) {
279
- chunks.push(objChunk);
280
- currentChunk = {};
281
- }
282
- }
283
- };
284
-
285
- for (const [key, val] of Object.entries(value)) {
286
- if (val === undefined) continue;
287
-
288
- // Handle arrays separately
289
- if (Array.isArray(val)) {
290
- saveCurrentChunk();
291
- const arrayChunks = this.handleArray(val, key, path, depth, maxDepth);
292
- chunks.push(...arrayChunks);
293
- continue;
294
- }
295
-
296
- // Try adding to current chunk
297
- const testChunk = this.createChunk({ ...currentChunk, [key]: val }, path);
298
- if (this.isWithinSizeLimit(testChunk)) {
299
- currentChunk[key] = val;
300
- continue;
301
- }
302
-
303
- // Current chunk is full
304
- saveCurrentChunk();
305
-
306
- // Handle value that didn't fit
307
- if (typeof val === 'object' && val !== null) {
308
- const nestedChunks = this.handleNestedObject(val, [...path, key], depth + 1, maxDepth);
309
- chunks.push(...nestedChunks);
310
- } else {
311
- currentChunk = { [key]: val };
312
- }
313
- }
314
-
315
- saveCurrentChunk();
316
- return chunks;
317
- }
318
-
319
- /**
320
- * Splits long strings into smaller chunks at word boundaries
321
- * Ensures each chunk is within maxSize limit
322
- */
323
- private splitLongString(value: string): string[] {
324
- const chunks: string[] = [];
325
- let remaining = value;
326
-
327
- while (remaining.length > 0) {
328
- const overhead = 20;
329
- const chunkSize = Math.floor(this.maxSize - overhead);
330
-
331
- if (remaining.length <= chunkSize) {
332
- chunks.push(remaining);
333
- break;
334
- }
335
-
336
- const lastSpace = remaining.slice(0, chunkSize).lastIndexOf(' ');
337
- const splitAt = lastSpace > 0 ? lastSpace + 1 : chunkSize;
338
-
339
- chunks.push(remaining.slice(0, splitAt));
340
- remaining = remaining.slice(splitAt);
341
- }
342
-
343
- return chunks;
344
- }
345
-
346
- /**
347
- * Core chunking logic that processes JSON data recursively
348
- * Handles arrays, objects, and primitive values while maintaining structure
349
- */
350
- private jsonSplit({
351
- data,
352
- currentPath = [],
353
- chunks = [{}],
354
- depth = 0,
355
- maxDepth = 100,
356
- }: {
357
- data: Record<string, any>;
358
- currentPath?: string[];
359
- chunks?: Record<string, any>[];
360
- depth?: number;
361
- maxDepth?: number;
362
- }): Record<string, any>[] {
363
- if (!data || typeof data !== 'object') {
364
- return chunks;
365
- }
366
-
367
- if (depth > maxDepth) {
368
- console.warn(`Maximum depth of ${maxDepth} exceeded, flattening remaining structure`);
369
- RecursiveJsonTransformer.setNestedDict(chunks[chunks.length - 1] || {}, currentPath, data);
370
- return chunks;
371
- }
372
-
373
- let currentChunk = {};
374
- let accumulatedChunks = chunks;
375
-
376
- for (const [key, value] of Object.entries(data)) {
377
- const fullPath = [...currentPath, key];
378
-
379
- if (Array.isArray(value)) {
380
- const arrayChunks = this.handleArray(value, key, currentPath, depth, maxDepth);
381
- accumulatedChunks = [...accumulatedChunks, ...arrayChunks];
382
- } else if (typeof value === 'object' && value !== null) {
383
- const objectChunks = this.handleNestedObject(value, fullPath, depth, maxDepth);
384
- accumulatedChunks = [...accumulatedChunks, ...objectChunks];
385
- } else {
386
- const { currentChunk: newCurrentChunk, chunks: newChunks } = this.handlePrimitiveValue(
387
- value,
388
- key,
389
- currentChunk,
390
- accumulatedChunks,
391
- fullPath,
392
- );
393
- currentChunk = newCurrentChunk;
394
- accumulatedChunks = newChunks;
395
- }
396
- }
397
-
398
- if (Object.keys(currentChunk).length > 0) {
399
- accumulatedChunks = [...accumulatedChunks, currentChunk];
400
- }
401
-
402
- return accumulatedChunks.filter(chunk => Object.keys(chunk).length > 0);
403
- }
404
-
405
- /**
406
- * Splits JSON into a list of JSON chunks
407
- */
408
- splitJson({
409
- jsonData,
410
- convertLists = false,
411
- }: {
412
- jsonData: Record<string, any>;
413
- convertLists?: boolean;
414
- }): Record<string, any>[] {
415
- const processedData = convertLists ? this.listToDictPreprocessing(jsonData) : jsonData;
416
-
417
- const chunks = this.jsonSplit({ data: processedData });
418
-
419
- if (Object.keys(chunks[chunks.length - 1] || {}).length === 0) {
420
- chunks.pop();
421
- }
422
-
423
- return chunks;
424
- }
425
-
426
- /**
427
- * Converts Unicode characters to their escaped ASCII representation
428
- * e.g., 'café' becomes 'caf\u00e9'
429
- */
430
- private escapeNonAscii(obj: any): any {
431
- if (typeof obj === 'string') {
432
- return obj.replace(/[\u0080-\uffff]/g, char => {
433
- return `\\u${char.charCodeAt(0).toString(16).padStart(4, '0')}`;
434
- });
435
- }
436
-
437
- if (Array.isArray(obj)) {
438
- return obj.map(item => this.escapeNonAscii(item));
439
- }
440
-
441
- if (typeof obj === 'object' && obj !== null) {
442
- return Object.fromEntries(Object.entries(obj).map(([key, value]) => [key, this.escapeNonAscii(value)]));
443
- }
444
-
445
- return obj;
446
- }
447
- /**
448
- * Splits JSON into a list of JSON formatted strings
449
- */
450
- splitText({
451
- jsonData,
452
- convertLists = false,
453
- ensureAscii = true,
454
- }: {
455
- jsonData: Record<string, any>;
456
- convertLists?: boolean;
457
- ensureAscii?: boolean;
458
- }): string[] {
459
- const chunks = this.splitJson({ jsonData, convertLists });
460
-
461
- if (ensureAscii) {
462
- const escapedChunks = chunks.map(chunk => this.escapeNonAscii(chunk));
463
- return escapedChunks.map(chunk => JSON.stringify(chunk));
464
- }
465
-
466
- return chunks.map(chunk =>
467
- JSON.stringify(chunk, (key, value) => {
468
- // Convert escaped Unicode sequences back to actual characters
469
- // e.g., '\u00e9' -> 'é'
470
- if (typeof value === 'string') {
471
- return value.replace(/\\u[\da-f]{4}/gi, match => String.fromCharCode(parseInt(match.slice(2), 16)));
472
- }
473
- return value;
474
- }),
475
- );
476
- }
477
-
478
- /**
479
- * Create documents from a list of json objects
480
- */
481
- createDocuments({
482
- texts,
483
- convertLists = false,
484
- ensureAscii = true,
485
- metadatas,
486
- }: {
487
- texts: string[];
488
- convertLists?: boolean;
489
- ensureAscii?: boolean;
490
- metadatas?: Record<string, any>[];
491
- }): Document[] {
492
- const _metadatas = metadatas || Array(texts.length).fill({});
493
- const documents: Document[] = [];
494
-
495
- texts.forEach((text, i) => {
496
- const chunks = this.splitText({ jsonData: JSON.parse(text), convertLists, ensureAscii });
497
- chunks.forEach(chunk => {
498
- const metadata = { ...(_metadatas[i] || {}) };
499
- documents.push(
500
- new Document({
501
- text: chunk,
502
- metadata,
503
- }),
504
- );
505
- });
506
- });
507
-
508
- return documents;
509
- }
510
-
511
- transformDocuments({
512
- ensureAscii,
513
- documents,
514
- convertLists,
515
- }: {
516
- ensureAscii?: boolean;
517
- convertLists?: boolean;
518
- documents: Document[];
519
- }): Document[] {
520
- const texts: string[] = [];
521
- const metadatas: Record<string, any>[] = [];
522
-
523
- for (const doc of documents) {
524
- texts.push(doc.text);
525
- metadatas.push(doc.metadata);
526
- }
527
-
528
- return this.createDocuments({
529
- texts,
530
- metadatas,
531
-
532
- ensureAscii,
533
- convertLists,
534
- });
535
- }
536
- }
@@ -1,11 +0,0 @@
1
- import { Language } from '../types';
2
- import type { BaseChunkOptions } from '../types';
3
-
4
- import { RecursiveCharacterTransformer } from './character';
5
-
6
- export class LatexTransformer extends RecursiveCharacterTransformer {
7
- constructor(options: BaseChunkOptions = {}) {
8
- const separators = RecursiveCharacterTransformer.getSeparatorsForLanguage(Language.LATEX);
9
- super({ ...options, separators, isSeparatorRegex: true });
10
- }
11
- }