@mastra/rag 1.2.3-alpha.0 → 1.2.3-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/CHANGELOG.md +18 -0
  2. package/package.json +19 -6
  3. package/.turbo/turbo-build.log +0 -4
  4. package/docker-compose.yaml +0 -22
  5. package/eslint.config.js +0 -6
  6. package/src/document/document.test.ts +0 -2975
  7. package/src/document/document.ts +0 -335
  8. package/src/document/extractors/base.ts +0 -30
  9. package/src/document/extractors/index.ts +0 -5
  10. package/src/document/extractors/keywords.test.ts +0 -125
  11. package/src/document/extractors/keywords.ts +0 -126
  12. package/src/document/extractors/questions.test.ts +0 -120
  13. package/src/document/extractors/questions.ts +0 -111
  14. package/src/document/extractors/summary.test.ts +0 -107
  15. package/src/document/extractors/summary.ts +0 -122
  16. package/src/document/extractors/title.test.ts +0 -121
  17. package/src/document/extractors/title.ts +0 -185
  18. package/src/document/extractors/types.ts +0 -40
  19. package/src/document/index.ts +0 -2
  20. package/src/document/prompts/base.ts +0 -77
  21. package/src/document/prompts/format.ts +0 -9
  22. package/src/document/prompts/index.ts +0 -15
  23. package/src/document/prompts/prompt.ts +0 -60
  24. package/src/document/prompts/types.ts +0 -29
  25. package/src/document/schema/index.ts +0 -3
  26. package/src/document/schema/node.ts +0 -187
  27. package/src/document/schema/types.ts +0 -40
  28. package/src/document/transformers/character.ts +0 -267
  29. package/src/document/transformers/html.ts +0 -346
  30. package/src/document/transformers/json.ts +0 -536
  31. package/src/document/transformers/latex.ts +0 -11
  32. package/src/document/transformers/markdown.ts +0 -239
  33. package/src/document/transformers/semantic-markdown.ts +0 -227
  34. package/src/document/transformers/sentence.ts +0 -314
  35. package/src/document/transformers/text.ts +0 -158
  36. package/src/document/transformers/token.ts +0 -137
  37. package/src/document/transformers/transformer.ts +0 -5
  38. package/src/document/types.ts +0 -145
  39. package/src/document/validation.ts +0 -158
  40. package/src/graph-rag/index.test.ts +0 -235
  41. package/src/graph-rag/index.ts +0 -306
  42. package/src/index.ts +0 -8
  43. package/src/rerank/index.test.ts +0 -150
  44. package/src/rerank/index.ts +0 -198
  45. package/src/rerank/relevance/cohere/index.ts +0 -56
  46. package/src/rerank/relevance/index.ts +0 -3
  47. package/src/rerank/relevance/mastra-agent/index.ts +0 -32
  48. package/src/rerank/relevance/zeroentropy/index.ts +0 -26
  49. package/src/tools/README.md +0 -153
  50. package/src/tools/document-chunker.ts +0 -34
  51. package/src/tools/graph-rag.test.ts +0 -115
  52. package/src/tools/graph-rag.ts +0 -157
  53. package/src/tools/index.ts +0 -3
  54. package/src/tools/types.ts +0 -126
  55. package/src/tools/vector-query-database-config.test.ts +0 -190
  56. package/src/tools/vector-query.test.ts +0 -477
  57. package/src/tools/vector-query.ts +0 -171
  58. package/src/utils/convert-sources.ts +0 -43
  59. package/src/utils/default-settings.ts +0 -38
  60. package/src/utils/index.ts +0 -3
  61. package/src/utils/tool-schemas.ts +0 -38
  62. package/src/utils/vector-prompts.ts +0 -832
  63. package/src/utils/vector-search.ts +0 -130
  64. package/tsconfig.build.json +0 -9
  65. package/tsconfig.json +0 -5
  66. package/tsup.config.ts +0 -17
  67. package/vitest.config.ts +0 -8
@@ -1,536 +0,0 @@
1
- import { Document } from '../schema';
2
- import type { JsonChunkOptions } from '../types';
3
-
4
- export class RecursiveJsonTransformer {
5
- private maxSize: number;
6
- private minSize: number;
7
- private ensureAscii: boolean;
8
- private convertLists: boolean;
9
-
10
- constructor({ maxSize = 2000, minSize, ensureAscii = false, convertLists = true }: JsonChunkOptions) {
11
- this.maxSize = maxSize;
12
- this.minSize = minSize ?? Math.max(maxSize - 200, 50);
13
- this.ensureAscii = ensureAscii;
14
- this.convertLists = convertLists;
15
- }
16
-
17
- private static jsonSize(data: Record<string, any>): number {
18
- const seen = new WeakSet();
19
-
20
- function getStringifiableData(obj: any): any {
21
- if (obj === null || typeof obj !== 'object') {
22
- return obj;
23
- }
24
-
25
- if (seen.has(obj)) {
26
- return '[Circular]';
27
- }
28
-
29
- seen.add(obj);
30
-
31
- if (Array.isArray(obj)) {
32
- const safeArray = [];
33
- for (const item of obj) {
34
- safeArray.push(getStringifiableData(item));
35
- }
36
- return safeArray;
37
- }
38
-
39
- const safeObj: Record<string, any> = {};
40
- for (const key in obj) {
41
- if (Object.prototype.hasOwnProperty.call(obj, key)) {
42
- safeObj[key] = getStringifiableData(obj[key]);
43
- }
44
- }
45
- return safeObj;
46
- }
47
-
48
- const stringifiable = getStringifiableData(data);
49
- const jsonString = JSON.stringify(stringifiable);
50
- return jsonString.length;
51
- }
52
-
53
- /**
54
- * Transform JSON data while handling circular references
55
- */
56
- public transform(data: Record<string, any>): Record<string, any> {
57
- const size = RecursiveJsonTransformer.jsonSize(data);
58
-
59
- const seen = new WeakSet();
60
-
61
- function createSafeCopy(obj: any): any {
62
- if (obj === null || typeof obj !== 'object') {
63
- return obj;
64
- }
65
-
66
- if (seen.has(obj)) {
67
- return '[Circular]';
68
- }
69
-
70
- seen.add(obj);
71
-
72
- if (Array.isArray(obj)) {
73
- return obj.map(item => createSafeCopy(item));
74
- }
75
-
76
- const copy: Record<string, any> = {};
77
- for (const key in obj) {
78
- if (Object.prototype.hasOwnProperty.call(obj, key)) {
79
- copy[key] = createSafeCopy(obj[key]);
80
- }
81
- }
82
- return copy;
83
- }
84
-
85
- return {
86
- size,
87
- data: createSafeCopy(data),
88
- };
89
- }
90
-
91
- /**
92
- * Set a value in a nested dictionary based on the given path
93
- */
94
- private static setNestedDict(d: Record<string, any>, path: string[], value: any): void {
95
- let current = d;
96
- for (const key of path.slice(0, -1)) {
97
- current[key] = current[key] || {};
98
- current = current[key];
99
- }
100
- current[path[path.length - 1]!] = value;
101
- }
102
-
103
- /**
104
- * Convert lists in the JSON structure to dictionaries with index-based keys
105
- */
106
- private listToDictPreprocessing(data: any): any {
107
- if (data && typeof data === 'object') {
108
- if (Array.isArray(data)) {
109
- return Object.fromEntries(data.map((item, index) => [String(index), this.listToDictPreprocessing(item)]));
110
- }
111
- return Object.fromEntries(Object.entries(data).map(([k, v]) => [k, this.listToDictPreprocessing(v)]));
112
- }
113
- return data;
114
- }
115
-
116
- /**
117
- * Handles primitive values (strings, numbers, etc) by either adding them to the current chunk
118
- * or creating new chunks if they don't fit
119
- */
120
- private handlePrimitiveValue(
121
- value: any,
122
- key: string,
123
- currentChunk: Record<string, any>,
124
- chunks: Record<string, any>[],
125
- fullPath: string[],
126
- ): { currentChunk: Record<string, any>; chunks: Record<string, any>[] } {
127
- const testValue = { [key]: value };
128
-
129
- if (RecursiveJsonTransformer.jsonSize(testValue) <= this.maxSize) {
130
- if (RecursiveJsonTransformer.jsonSize({ ...currentChunk, ...testValue }) <= this.maxSize) {
131
- return {
132
- currentChunk: { ...currentChunk, ...testValue },
133
- chunks,
134
- };
135
- } else {
136
- return {
137
- currentChunk: testValue,
138
- chunks: [...chunks, currentChunk],
139
- };
140
- }
141
- } else if (typeof value === 'string') {
142
- const stringChunks = this.splitLongString(value);
143
- const newChunks = stringChunks
144
- .map(chunk => {
145
- return this.createChunk(chunk, fullPath);
146
- })
147
- .filter(chunk => RecursiveJsonTransformer.jsonSize(chunk) <= this.maxSize);
148
-
149
- return {
150
- currentChunk,
151
- chunks: [...chunks, ...newChunks],
152
- };
153
- }
154
-
155
- const newChunk = this.createChunk(value, fullPath);
156
- return {
157
- currentChunk,
158
- chunks: RecursiveJsonTransformer.jsonSize(newChunk) <= this.maxSize ? [...chunks, newChunk] : chunks,
159
- };
160
- }
161
-
162
- /**
163
- * Creates a nested dictionary chunk from a value and path
164
- * e.g., path ['a', 'b'], value 'c' becomes { a: { b: 'c' } }
165
- */
166
- private createChunk(value: any, path: string[]): Record<string, any> {
167
- const chunk: Record<string, any> = {};
168
- RecursiveJsonTransformer.setNestedDict(chunk, path, value);
169
- return chunk.root ? chunk.root : chunk;
170
- }
171
-
172
- /**
173
- * Checks if value is within size limits
174
- */
175
- private isWithinSizeLimit(value: any, currentSize: number = 0): boolean {
176
- const size = RecursiveJsonTransformer.jsonSize(value);
177
- // If this is a new chunk (currentSize = 0), allow items smaller than maxSize
178
- // If adding to existing chunk, ensure total size doesn't exceed maxSize
179
- return currentSize === 0 ? size <= this.maxSize : size + currentSize <= this.maxSize;
180
- }
181
-
182
- /**
183
- * Splits arrays into chunks based on size limits
184
- * Handles nested objects by recursing into handleNestedObject
185
- */
186
- private handleArray(
187
- value: any[],
188
- key: string,
189
- currentPath: string[],
190
- depth: number,
191
- maxDepth: number,
192
- ): Record<string, any>[] {
193
- const path = currentPath.length ? [...currentPath, key] : ['root', key];
194
-
195
- // Try keeping array intact
196
- const chunk = this.createChunk(value, path);
197
- if (this.isWithinSizeLimit(chunk)) {
198
- return [chunk];
199
- }
200
-
201
- const chunks: Record<string, any>[] = [];
202
- let currentGroup: any[] = [];
203
-
204
- const saveCurrentGroup = () => {
205
- if (currentGroup.length > 0) {
206
- const groupChunk = this.createChunk(currentGroup, path);
207
- if (RecursiveJsonTransformer.jsonSize(groupChunk) >= this.minSize) {
208
- chunks.push(groupChunk);
209
- currentGroup = [];
210
- }
211
- }
212
- };
213
-
214
- for (const item of value) {
215
- // Try adding item to current group
216
- const testGroup = [...currentGroup, item];
217
- const testChunk = this.createChunk(testGroup, path);
218
-
219
- if (this.isWithinSizeLimit(testChunk)) {
220
- currentGroup = testGroup;
221
- continue;
222
- }
223
-
224
- // Current group is full
225
- saveCurrentGroup();
226
-
227
- // Handle the new item
228
- if (typeof item === 'object' && item !== null) {
229
- const singleItemArray = [item];
230
- const singleItemChunk = this.createChunk(singleItemArray, path);
231
-
232
- if (this.isWithinSizeLimit(singleItemChunk)) {
233
- currentGroup = singleItemArray;
234
- } else {
235
- const itemPath = [...path, String(chunks.length)];
236
- const nestedChunks = this.handleNestedObject(item, itemPath, depth + 1, maxDepth);
237
- chunks.push(...nestedChunks);
238
- }
239
- } else {
240
- currentGroup = [item];
241
- }
242
- }
243
-
244
- saveCurrentGroup();
245
- return chunks;
246
- }
247
-
248
- /**
249
- * Splits objects into chunks based on size limits
250
- * Handles nested arrays and objects by recursing into handleArray and handleNestedObject
251
- */
252
- private handleNestedObject(
253
- value: Record<string, any>,
254
- fullPath: string[],
255
- depth: number,
256
- maxDepth: number,
257
- ): Record<string, any>[] {
258
- const path = fullPath.length ? fullPath : ['root'];
259
-
260
- // Handle max depth
261
- if (depth > maxDepth) {
262
- console.warn(`Maximum depth of ${maxDepth} exceeded, flattening remaining structure`);
263
- return [this.createChunk(value, path)];
264
- }
265
-
266
- // Try keeping object intact
267
- const wholeChunk = this.createChunk(value, path);
268
- if (this.isWithinSizeLimit(wholeChunk)) {
269
- return [wholeChunk];
270
- }
271
-
272
- const chunks: Record<string, any>[] = [];
273
- let currentChunk: Record<string, any> = {};
274
-
275
- const saveCurrentChunk = () => {
276
- if (Object.keys(currentChunk).length > 0) {
277
- const objChunk = this.createChunk(currentChunk, path);
278
- if (RecursiveJsonTransformer.jsonSize(objChunk) >= this.minSize) {
279
- chunks.push(objChunk);
280
- currentChunk = {};
281
- }
282
- }
283
- };
284
-
285
- for (const [key, val] of Object.entries(value)) {
286
- if (val === undefined) continue;
287
-
288
- // Handle arrays separately
289
- if (Array.isArray(val)) {
290
- saveCurrentChunk();
291
- const arrayChunks = this.handleArray(val, key, path, depth, maxDepth);
292
- chunks.push(...arrayChunks);
293
- continue;
294
- }
295
-
296
- // Try adding to current chunk
297
- const testChunk = this.createChunk({ ...currentChunk, [key]: val }, path);
298
- if (this.isWithinSizeLimit(testChunk)) {
299
- currentChunk[key] = val;
300
- continue;
301
- }
302
-
303
- // Current chunk is full
304
- saveCurrentChunk();
305
-
306
- // Handle value that didn't fit
307
- if (typeof val === 'object' && val !== null) {
308
- const nestedChunks = this.handleNestedObject(val, [...path, key], depth + 1, maxDepth);
309
- chunks.push(...nestedChunks);
310
- } else {
311
- currentChunk = { [key]: val };
312
- }
313
- }
314
-
315
- saveCurrentChunk();
316
- return chunks;
317
- }
318
-
319
- /**
320
- * Splits long strings into smaller chunks at word boundaries
321
- * Ensures each chunk is within maxSize limit
322
- */
323
- private splitLongString(value: string): string[] {
324
- const chunks: string[] = [];
325
- let remaining = value;
326
-
327
- while (remaining.length > 0) {
328
- const overhead = 20;
329
- const chunkSize = Math.floor(this.maxSize - overhead);
330
-
331
- if (remaining.length <= chunkSize) {
332
- chunks.push(remaining);
333
- break;
334
- }
335
-
336
- const lastSpace = remaining.slice(0, chunkSize).lastIndexOf(' ');
337
- const splitAt = lastSpace > 0 ? lastSpace + 1 : chunkSize;
338
-
339
- chunks.push(remaining.slice(0, splitAt));
340
- remaining = remaining.slice(splitAt);
341
- }
342
-
343
- return chunks;
344
- }
345
-
346
- /**
347
- * Core chunking logic that processes JSON data recursively
348
- * Handles arrays, objects, and primitive values while maintaining structure
349
- */
350
- private jsonSplit({
351
- data,
352
- currentPath = [],
353
- chunks = [{}],
354
- depth = 0,
355
- maxDepth = 100,
356
- }: {
357
- data: Record<string, any>;
358
- currentPath?: string[];
359
- chunks?: Record<string, any>[];
360
- depth?: number;
361
- maxDepth?: number;
362
- }): Record<string, any>[] {
363
- if (!data || typeof data !== 'object') {
364
- return chunks;
365
- }
366
-
367
- if (depth > maxDepth) {
368
- console.warn(`Maximum depth of ${maxDepth} exceeded, flattening remaining structure`);
369
- RecursiveJsonTransformer.setNestedDict(chunks[chunks.length - 1] || {}, currentPath, data);
370
- return chunks;
371
- }
372
-
373
- let currentChunk = {};
374
- let accumulatedChunks = chunks;
375
-
376
- for (const [key, value] of Object.entries(data)) {
377
- const fullPath = [...currentPath, key];
378
-
379
- if (Array.isArray(value)) {
380
- const arrayChunks = this.handleArray(value, key, currentPath, depth, maxDepth);
381
- accumulatedChunks = [...accumulatedChunks, ...arrayChunks];
382
- } else if (typeof value === 'object' && value !== null) {
383
- const objectChunks = this.handleNestedObject(value, fullPath, depth, maxDepth);
384
- accumulatedChunks = [...accumulatedChunks, ...objectChunks];
385
- } else {
386
- const { currentChunk: newCurrentChunk, chunks: newChunks } = this.handlePrimitiveValue(
387
- value,
388
- key,
389
- currentChunk,
390
- accumulatedChunks,
391
- fullPath,
392
- );
393
- currentChunk = newCurrentChunk;
394
- accumulatedChunks = newChunks;
395
- }
396
- }
397
-
398
- if (Object.keys(currentChunk).length > 0) {
399
- accumulatedChunks = [...accumulatedChunks, currentChunk];
400
- }
401
-
402
- return accumulatedChunks.filter(chunk => Object.keys(chunk).length > 0);
403
- }
404
-
405
- /**
406
- * Splits JSON into a list of JSON chunks
407
- */
408
- splitJson({
409
- jsonData,
410
- convertLists = false,
411
- }: {
412
- jsonData: Record<string, any>;
413
- convertLists?: boolean;
414
- }): Record<string, any>[] {
415
- const processedData = convertLists ? this.listToDictPreprocessing(jsonData) : jsonData;
416
-
417
- const chunks = this.jsonSplit({ data: processedData });
418
-
419
- if (Object.keys(chunks[chunks.length - 1] || {}).length === 0) {
420
- chunks.pop();
421
- }
422
-
423
- return chunks;
424
- }
425
-
426
- /**
427
- * Converts Unicode characters to their escaped ASCII representation
428
- * e.g., 'café' becomes 'caf\u00e9'
429
- */
430
- private escapeNonAscii(obj: any): any {
431
- if (typeof obj === 'string') {
432
- return obj.replace(/[\u0080-\uffff]/g, char => {
433
- return `\\u${char.charCodeAt(0).toString(16).padStart(4, '0')}`;
434
- });
435
- }
436
-
437
- if (Array.isArray(obj)) {
438
- return obj.map(item => this.escapeNonAscii(item));
439
- }
440
-
441
- if (typeof obj === 'object' && obj !== null) {
442
- return Object.fromEntries(Object.entries(obj).map(([key, value]) => [key, this.escapeNonAscii(value)]));
443
- }
444
-
445
- return obj;
446
- }
447
- /**
448
- * Splits JSON into a list of JSON formatted strings
449
- */
450
- splitText({
451
- jsonData,
452
- convertLists = false,
453
- ensureAscii = true,
454
- }: {
455
- jsonData: Record<string, any>;
456
- convertLists?: boolean;
457
- ensureAscii?: boolean;
458
- }): string[] {
459
- const chunks = this.splitJson({ jsonData, convertLists });
460
-
461
- if (ensureAscii) {
462
- const escapedChunks = chunks.map(chunk => this.escapeNonAscii(chunk));
463
- return escapedChunks.map(chunk => JSON.stringify(chunk));
464
- }
465
-
466
- return chunks.map(chunk =>
467
- JSON.stringify(chunk, (key, value) => {
468
- // Convert escaped Unicode sequences back to actual characters
469
- // e.g., '\u00e9' -> 'é'
470
- if (typeof value === 'string') {
471
- return value.replace(/\\u[\da-f]{4}/gi, match => String.fromCharCode(parseInt(match.slice(2), 16)));
472
- }
473
- return value;
474
- }),
475
- );
476
- }
477
-
478
- /**
479
- * Create documents from a list of json objects
480
- */
481
- createDocuments({
482
- texts,
483
- convertLists = false,
484
- ensureAscii = true,
485
- metadatas,
486
- }: {
487
- texts: string[];
488
- convertLists?: boolean;
489
- ensureAscii?: boolean;
490
- metadatas?: Record<string, any>[];
491
- }): Document[] {
492
- const _metadatas = metadatas || Array(texts.length).fill({});
493
- const documents: Document[] = [];
494
-
495
- texts.forEach((text, i) => {
496
- const chunks = this.splitText({ jsonData: JSON.parse(text), convertLists, ensureAscii });
497
- chunks.forEach(chunk => {
498
- const metadata = { ...(_metadatas[i] || {}) };
499
- documents.push(
500
- new Document({
501
- text: chunk,
502
- metadata,
503
- }),
504
- );
505
- });
506
- });
507
-
508
- return documents;
509
- }
510
-
511
- transformDocuments({
512
- ensureAscii,
513
- documents,
514
- convertLists,
515
- }: {
516
- ensureAscii?: boolean;
517
- convertLists?: boolean;
518
- documents: Document[];
519
- }): Document[] {
520
- const texts: string[] = [];
521
- const metadatas: Record<string, any>[] = [];
522
-
523
- for (const doc of documents) {
524
- texts.push(doc.text);
525
- metadatas.push(doc.metadata);
526
- }
527
-
528
- return this.createDocuments({
529
- texts,
530
- metadatas,
531
-
532
- ensureAscii,
533
- convertLists,
534
- });
535
- }
536
- }
@@ -1,11 +0,0 @@
1
- import { Language } from '../types';
2
- import type { BaseChunkOptions } from '../types';
3
-
4
- import { RecursiveCharacterTransformer } from './character';
5
-
6
- export class LatexTransformer extends RecursiveCharacterTransformer {
7
- constructor(options: BaseChunkOptions = {}) {
8
- const separators = RecursiveCharacterTransformer.getSeparatorsForLanguage(Language.LATEX);
9
- super({ ...options, separators, isSeparatorRegex: true });
10
- }
11
- }