@mastra/longmemeval 0.0.0-add-libsql-changeset-20250910154739

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/CHANGELOG.md +919 -0
  2. package/DATA_DOWNLOAD_GUIDE.md +117 -0
  3. package/LICENSE.md +15 -0
  4. package/README.md +173 -0
  5. package/USAGE.md +105 -0
  6. package/package.json +67 -0
  7. package/scripts/download.ts +180 -0
  8. package/scripts/find-failed.ts +176 -0
  9. package/scripts/generate-embeddings.ts +56 -0
  10. package/scripts/generate-wm-templates.ts +296 -0
  11. package/scripts/setup.ts +60 -0
  12. package/src/__fixtures__/embeddings.json +2319 -0
  13. package/src/__fixtures__/test-dataset.json +82 -0
  14. package/src/cli.ts +690 -0
  15. package/src/commands/__tests__/prepare.test.ts +230 -0
  16. package/src/commands/__tests__/run.test.ts +403 -0
  17. package/src/commands/prepare.ts +793 -0
  18. package/src/commands/run.ts +553 -0
  19. package/src/config.ts +83 -0
  20. package/src/data/loader.ts +163 -0
  21. package/src/data/types.ts +61 -0
  22. package/src/embeddings/cached-openai-embedding-model.ts +227 -0
  23. package/src/embeddings/cached-openai-provider.ts +40 -0
  24. package/src/embeddings/index.ts +2 -0
  25. package/src/evaluation/__tests__/longmemeval-metric.test.ts +169 -0
  26. package/src/evaluation/longmemeval-metric.ts +173 -0
  27. package/src/retry-model.ts +60 -0
  28. package/src/storage/__tests__/benchmark-store.test.ts +280 -0
  29. package/src/storage/__tests__/benchmark-vector.test.ts +214 -0
  30. package/src/storage/benchmark-store.ts +540 -0
  31. package/src/storage/benchmark-vector.ts +234 -0
  32. package/src/storage/index.ts +2 -0
  33. package/src/test-utils/mock-embeddings.ts +54 -0
  34. package/src/test-utils/mock-model.ts +49 -0
  35. package/tests/data-loader.test.ts +96 -0
  36. package/tsconfig.json +18 -0
  37. package/vitest.config.ts +9 -0
@@ -0,0 +1,540 @@
1
+ import { MastraStorage } from '@mastra/core/storage';
2
+ import { MessageList } from '@mastra/core/agent';
3
+ import type { MastraMessageV2 } from '@mastra/core/agent';
4
+ import type { MastraMessageV1, StorageThreadType } from '@mastra/core/memory';
5
+ import type { Trace } from '@mastra/core/telemetry';
6
+ import type {
7
+ TABLE_NAMES,
8
+ StorageColumn,
9
+ StorageGetMessagesArg,
10
+ StorageResourceType,
11
+ EvalRow,
12
+ WorkflowRun,
13
+ WorkflowRuns,
14
+ PaginationInfo,
15
+ } from '@mastra/core/storage';
16
+ import { writeFile, readFile } from 'fs/promises';
17
+ import { existsSync } from 'fs';
18
+
19
+ type DBMode = 'read' | 'read-write';
20
+
21
+ export class BenchmarkStore extends MastraStorage {
22
+ private data: Record<TABLE_NAMES, Map<string, any>> = {
23
+ mastra_workflow_snapshot: new Map(),
24
+ mastra_evals: new Map(),
25
+ mastra_messages: new Map(),
26
+ mastra_threads: new Map(),
27
+ mastra_traces: new Map(),
28
+ mastra_resources: new Map(),
29
+ };
30
+
31
+ private mode: DBMode;
32
+
33
+ constructor(mode: DBMode = 'read-write') {
34
+ super({ name: 'BenchmarkStore' });
35
+ this.hasInitialized = Promise.resolve(true);
36
+ this.mode = mode;
37
+ }
38
+
39
+ get supports() {
40
+ return {
41
+ selectByIncludeResourceScope: true,
42
+ resourceWorkingMemory: true,
43
+ };
44
+ }
45
+
46
+ async createTable(_: { tableName: TABLE_NAMES; schema: Record<string, StorageColumn> }): Promise<void> {}
47
+ async alterTable(_: {
48
+ tableName: TABLE_NAMES;
49
+ schema: Record<string, StorageColumn>;
50
+ ifNotExists: string[];
51
+ }): Promise<void> {}
52
+
53
+ async clearTable({ tableName }: { tableName: TABLE_NAMES }): Promise<void> {
54
+ if (this.mode === `read`) return;
55
+ this.data[tableName].clear();
56
+ }
57
+
58
+ async insert({ tableName, record }: { tableName: TABLE_NAMES; record: Record<string, any> }): Promise<void> {
59
+ if (this.mode === `read`) return;
60
+ const key = record.id || record.run_id || `${Date.now()}_${Math.random()}`;
61
+ this.data[tableName].set(key, JSON.parse(JSON.stringify(record))); // Deep clone
62
+ }
63
+
64
+ async batchInsert({ tableName, records }: { tableName: TABLE_NAMES; records: Record<string, any>[] }): Promise<void> {
65
+ if (this.mode === `read`) return;
66
+ for (const record of records) {
67
+ await this.insert({ tableName, record });
68
+ }
69
+ }
70
+
71
+ async load<R>({ tableName, keys }: { tableName: TABLE_NAMES; keys: Record<string, string> }): Promise<R | null> {
72
+ const key = keys.run_id || keys.id;
73
+ const record = this.data[tableName].get(key!);
74
+ return record ? (record as R) : null;
75
+ }
76
+
77
+ async getThreadById({ threadId }: { threadId: string }): Promise<StorageThreadType | null> {
78
+ const thread = this.data.mastra_threads.get(threadId);
79
+ return thread || null;
80
+ }
81
+
82
+ async getThreadsByResourceId({ resourceId }: { resourceId: string }): Promise<StorageThreadType[]> {
83
+ const threads: StorageThreadType[] = [];
84
+ for (const thread of this.data.mastra_threads.values()) {
85
+ if (thread.resourceId === resourceId) {
86
+ threads.push(thread);
87
+ }
88
+ }
89
+ return threads;
90
+ }
91
+
92
+ async saveThread({ thread }: { thread: StorageThreadType }): Promise<StorageThreadType> {
93
+ this.data.mastra_threads.set(thread.id, thread);
94
+ return thread;
95
+ }
96
+
97
+ async updateThread({
98
+ id,
99
+ title,
100
+ metadata,
101
+ }: {
102
+ id: string;
103
+ title: string;
104
+ metadata: Record<string, unknown>;
105
+ }): Promise<StorageThreadType> {
106
+ const thread = this.data.mastra_threads.get(id);
107
+
108
+ if (this.mode === `read`) return thread;
109
+
110
+ if (thread) {
111
+ thread.title = title;
112
+ thread.metadata = { ...thread.metadata, ...metadata };
113
+ thread.updatedAt = new Date();
114
+ this.data.mastra_threads.set(id, thread);
115
+ }
116
+ return thread;
117
+ }
118
+
119
+ async deleteThread({ threadId }: { threadId: string }): Promise<void> {
120
+ if (this.mode === `read`) return;
121
+
122
+ this.data.mastra_threads.delete(threadId);
123
+ // Also delete associated messages
124
+ for (const [id, msg] of this.data.mastra_messages.entries()) {
125
+ if (msg.threadId === threadId) {
126
+ this.data.mastra_messages.delete(id);
127
+ }
128
+ }
129
+ }
130
+
131
+ async getResourceById({ resourceId }: { resourceId: string }): Promise<StorageResourceType | null> {
132
+ const resource = this.data.mastra_resources.get(resourceId);
133
+ return resource || null;
134
+ }
135
+
136
+ async saveResource({ resource }: { resource: StorageResourceType }): Promise<StorageResourceType> {
137
+ if (this.mode === `read`) return resource;
138
+ this.data.mastra_resources.set(resource.id, JSON.parse(JSON.stringify(resource)));
139
+ return resource;
140
+ }
141
+
142
+ async updateResource({
143
+ resourceId,
144
+ workingMemory,
145
+ metadata,
146
+ }: {
147
+ resourceId: string;
148
+ workingMemory?: string;
149
+ metadata?: Record<string, unknown>;
150
+ }): Promise<StorageResourceType> {
151
+ let resource = this.data.mastra_resources.get(resourceId);
152
+
153
+ if (this.mode === `read`) return resource;
154
+
155
+ if (!resource) {
156
+ // Create new resource if it doesn't exist
157
+ resource = {
158
+ id: resourceId,
159
+ workingMemory,
160
+ metadata: metadata || {},
161
+ createdAt: new Date(),
162
+ updatedAt: new Date(),
163
+ };
164
+ } else {
165
+ resource = {
166
+ ...resource,
167
+ workingMemory: workingMemory !== undefined ? workingMemory : resource.workingMemory,
168
+ metadata: {
169
+ ...resource.metadata,
170
+ ...metadata,
171
+ },
172
+ updatedAt: new Date(),
173
+ };
174
+ }
175
+
176
+ this.data.mastra_resources.set(resourceId, resource);
177
+ return resource;
178
+ }
179
+
180
+ async getMessages(args: StorageGetMessagesArg & { format?: 'v1' }): Promise<MastraMessageV1[]>;
181
+ async getMessages(args: StorageGetMessagesArg & { format: 'v2' }): Promise<MastraMessageV2[]>;
182
+ async getMessages(
183
+ args: StorageGetMessagesArg & { format?: 'v1' | 'v2' },
184
+ ): Promise<MastraMessageV1[] | MastraMessageV2[]> {
185
+ const { threadId, resourceId, selectBy, format = 'v1' } = args;
186
+ if (!threadId.trim()) throw new Error('threadId must be a non-empty string');
187
+
188
+ let messages: any[] = [];
189
+ const includedMessageIds = new Set<string>();
190
+
191
+ // First, handle selectBy.include for cross-thread queries (resource scope support)
192
+ if (selectBy?.include?.length) {
193
+ for (const inc of selectBy.include) {
194
+ // Use the included threadId if provided (resource scope), otherwise use main threadId
195
+ const queryThreadId = inc.threadId || threadId;
196
+
197
+ // Get the target message and surrounding context
198
+ const threadMessages = Array.from(this.data.mastra_messages.values())
199
+ .filter((msg: any) => msg.threadId === queryThreadId)
200
+ .sort((a: any, b: any) => new Date(a.createdAt).getTime() - new Date(b.createdAt).getTime());
201
+
202
+ const targetIndex = threadMessages.findIndex((msg: any) => msg.id === inc.id);
203
+
204
+ if (targetIndex >= 0) {
205
+ const startIdx = Math.max(0, targetIndex - (inc.withPreviousMessages || 0));
206
+ const endIdx = Math.min(threadMessages.length, targetIndex + (inc.withNextMessages || 0) + 1);
207
+
208
+ for (let i = startIdx; i < endIdx; i++) {
209
+ includedMessageIds.add(threadMessages[i].id);
210
+ }
211
+ }
212
+ }
213
+ }
214
+
215
+ // Get base messages for the thread
216
+ let baseMessages: any[] = [];
217
+ if (threadId || resourceId) {
218
+ baseMessages = Array.from(this.data.mastra_messages.values()).filter((msg: any) => {
219
+ if (threadId && msg.threadId !== threadId) return false;
220
+ if (resourceId && msg.resourceId !== resourceId) return false;
221
+ return true;
222
+ });
223
+
224
+ // Apply selectBy.last to base messages only
225
+ if (selectBy?.last) {
226
+ // Sort first to ensure we get the actual last messages
227
+ baseMessages.sort((a: any, b: any) => new Date(a.createdAt).getTime() - new Date(b.createdAt).getTime());
228
+ baseMessages = baseMessages.slice(-selectBy.last);
229
+ }
230
+ }
231
+
232
+ // Combine base messages with included messages
233
+ const baseMessageIds = new Set(baseMessages.map((m: any) => m.id));
234
+ const allMessageIds = new Set([...baseMessageIds, ...includedMessageIds]);
235
+
236
+ // Get all unique messages
237
+ messages = Array.from(this.data.mastra_messages.values()).filter((msg: any) => allMessageIds.has(msg.id));
238
+ // Sort by createdAt
239
+ messages.sort((a: any, b: any) => new Date(a.createdAt).getTime() - new Date(b.createdAt).getTime());
240
+
241
+ const list = new MessageList().add(messages, 'memory');
242
+ return format === 'v2' ? list.get.all.v2() : list.get.all.v1();
243
+ }
244
+
245
+ async saveMessages(args: { messages: MastraMessageV1[]; format?: undefined | 'v1' }): Promise<MastraMessageV1[]>;
246
+ async saveMessages(args: { messages: MastraMessageV2[]; format: 'v2' }): Promise<MastraMessageV2[]>;
247
+ async saveMessages(
248
+ args: { messages: MastraMessageV1[]; format?: undefined | 'v1' } | { messages: MastraMessageV2[]; format: 'v2' },
249
+ ): Promise<MastraMessageV2[] | MastraMessageV1[]> {
250
+ if (this.mode === `read`) return [];
251
+
252
+ const { messages, format = 'v1' } = args;
253
+
254
+ for (const message of messages) {
255
+ this.data.mastra_messages.set(message.id, message);
256
+ }
257
+
258
+ const list = new MessageList().add(messages, 'memory');
259
+ return format === 'v2' ? list.get.all.v2() : list.get.all.v1();
260
+ }
261
+
262
+ async updateMessages(args: { messages: Partial<MastraMessageV2> & { id: string }[] }): Promise<MastraMessageV2[]> {
263
+ const updatedMessages: MastraMessageV2[] = [];
264
+
265
+ if (this.mode === `read`) return [];
266
+
267
+ for (const update of args.messages) {
268
+ const existing = this.data.mastra_messages.get(update.id);
269
+ if (existing) {
270
+ const updated = { ...existing, ...update, updatedAt: new Date() };
271
+ this.data.mastra_messages.set(update.id, updated);
272
+ updatedMessages.push(updated);
273
+ }
274
+ }
275
+
276
+ return updatedMessages;
277
+ }
278
+
279
+ async getTraces({
280
+ name,
281
+ scope,
282
+ page,
283
+ perPage,
284
+ attributes,
285
+ filters,
286
+ fromDate,
287
+ toDate,
288
+ }: {
289
+ name?: string;
290
+ scope?: string;
291
+ page: number;
292
+ perPage: number;
293
+ attributes?: Record<string, string>;
294
+ filters?: Record<string, any>;
295
+ fromDate?: Date;
296
+ toDate?: Date;
297
+ }): Promise<any[]> {
298
+ let traces = Array.from(this.data.mastra_traces.values());
299
+
300
+ if (name) traces = traces.filter((t: any) => t.name?.startsWith(name));
301
+ if (scope) traces = traces.filter((t: any) => t.scope === scope);
302
+ if (attributes) {
303
+ traces = traces.filter((t: any) =>
304
+ Object.entries(attributes).every(([key, value]) => t.attributes?.[key] === value),
305
+ );
306
+ }
307
+ if (filters) {
308
+ traces = traces.filter((t: any) => Object.entries(filters).every(([key, value]) => t[key] === value));
309
+ }
310
+ if (fromDate) traces = traces.filter((t: any) => new Date(t.createdAt) >= fromDate);
311
+ if (toDate) traces = traces.filter((t: any) => new Date(t.createdAt) <= toDate);
312
+
313
+ // Apply pagination and sort
314
+ traces.sort((a: any, b: any) => new Date(b.startTime).getTime() - new Date(a.startTime).getTime());
315
+ const start = page * perPage;
316
+ const end = start + perPage;
317
+ return traces.slice(start, end);
318
+ }
319
+
320
+ async getEvalsByAgentName(agentName: string, type?: 'test' | 'live'): Promise<EvalRow[]> {
321
+ let evals = Array.from(this.data.mastra_evals.values()).filter((e: any) => e.agentName === agentName);
322
+
323
+ if (type === 'test') {
324
+ evals = evals.filter((e: any) => e.testInfo && e.testInfo.testPath);
325
+ } else if (type === 'live') {
326
+ evals = evals.filter((e: any) => !e.testInfo || !e.testInfo.testPath);
327
+ }
328
+
329
+ // Sort by createdAt
330
+ evals.sort((a: any, b: any) => new Date(b.createdAt).getTime() - new Date(a.createdAt).getTime());
331
+
332
+ return evals as EvalRow[];
333
+ }
334
+
335
+ async getWorkflowRuns({
336
+ workflowName,
337
+ fromDate,
338
+ toDate,
339
+ limit,
340
+ offset,
341
+ resourceId,
342
+ }: {
343
+ workflowName?: string;
344
+ fromDate?: Date;
345
+ toDate?: Date;
346
+ limit?: number;
347
+ offset?: number;
348
+ resourceId?: string;
349
+ } = {}): Promise<WorkflowRuns> {
350
+ let runs = Array.from(this.data.mastra_workflow_snapshot.values());
351
+
352
+ if (workflowName) runs = runs.filter((run: any) => run.workflow_name === workflowName);
353
+ if (fromDate) runs = runs.filter((run: any) => new Date(run.createdAt) >= fromDate);
354
+ if (toDate) runs = runs.filter((run: any) => new Date(run.createdAt) <= toDate);
355
+ if (resourceId) runs = runs.filter((run: any) => run.resourceId === resourceId);
356
+
357
+ const total = runs.length;
358
+
359
+ // Sort by createdAt
360
+ runs.sort((a: any, b: any) => new Date(b.createdAt).getTime() - new Date(a.createdAt).getTime());
361
+
362
+ // Apply pagination
363
+ if (limit !== undefined && offset !== undefined) {
364
+ runs = runs.slice(offset, offset + limit);
365
+ }
366
+
367
+ // Deserialize snapshot if it's a string
368
+ const parsedRuns = runs.map((run: any) => ({
369
+ ...run,
370
+ snapshot: typeof run.snapshot === 'string' ? JSON.parse(run.snapshot) : { ...run.snapshot },
371
+ createdAt: new Date(run.createdAt),
372
+ updatedAt: new Date(run.updatedAt),
373
+ runId: run.run_id,
374
+ workflowName: run.workflow_name,
375
+ }));
376
+
377
+ return { runs: parsedRuns as WorkflowRun[], total };
378
+ }
379
+
380
+ async getWorkflowRunById({
381
+ runId,
382
+ workflowName,
383
+ }: {
384
+ runId: string;
385
+ workflowName?: string;
386
+ }): Promise<WorkflowRun | null> {
387
+ const run = this.data.mastra_workflow_snapshot.get(runId);
388
+
389
+ if (!run || (workflowName && run.workflow_name !== workflowName)) {
390
+ return null;
391
+ }
392
+
393
+ // Deserialize snapshot if it's a string
394
+ const parsedRun = {
395
+ ...run,
396
+ snapshot: typeof run.snapshot === 'string' ? JSON.parse(run.snapshot) : run.snapshot,
397
+ createdAt: new Date(run.createdAt),
398
+ updatedAt: new Date(run.updatedAt),
399
+ runId: run.run_id,
400
+ workflowName: run.workflow_name,
401
+ };
402
+
403
+ return parsedRun as WorkflowRun;
404
+ }
405
+
406
+ async getTracesPaginated({
407
+ name,
408
+ scope,
409
+ attributes,
410
+ page,
411
+ perPage,
412
+ fromDate,
413
+ toDate,
414
+ }: {
415
+ name?: string;
416
+ scope?: string;
417
+ attributes?: Record<string, string>;
418
+ page: number;
419
+ perPage: number;
420
+ fromDate?: Date;
421
+ toDate?: Date;
422
+ }): Promise<PaginationInfo & { traces: Trace[] }> {
423
+ const traces = await this.getTraces({ name, scope, page, perPage, attributes, fromDate, toDate });
424
+ const total = Array.from(this.data.mastra_traces.values()).length;
425
+
426
+ return {
427
+ traces,
428
+ total,
429
+ page,
430
+ perPage,
431
+ hasMore: total > (page + 1) * perPage,
432
+ };
433
+ }
434
+
435
+ async getThreadsByResourceIdPaginated(args: {
436
+ resourceId: string;
437
+ page: number;
438
+ perPage: number;
439
+ }): Promise<PaginationInfo & { threads: StorageThreadType[] }> {
440
+ const allThreads = await this.getThreadsByResourceId({ resourceId: args.resourceId });
441
+ const start = args.page * args.perPage;
442
+ const threads = allThreads.slice(start, start + args.perPage);
443
+
444
+ return {
445
+ threads,
446
+ total: allThreads.length,
447
+ page: args.page,
448
+ perPage: args.perPage,
449
+ hasMore: allThreads.length > (args.page + 1) * args.perPage,
450
+ };
451
+ }
452
+
453
+ async getMessagesPaginated(
454
+ args: StorageGetMessagesArg & { format?: 'v1' | 'v2' },
455
+ ): Promise<PaginationInfo & { messages: MastraMessageV1[] | MastraMessageV2[] }> {
456
+ const { threadId, selectBy, format = 'v1' } = args;
457
+ if (!threadId.trim()) throw new Error('threadId must be a non-empty string');
458
+
459
+ const { page = 0, perPage = 40 } = selectBy?.pagination || {};
460
+
461
+ // Get all messages
462
+ const allMessages = await this.getMessages({
463
+ threadId,
464
+ selectBy: { ...selectBy, pagination: undefined },
465
+ format: format as any,
466
+ } as any);
467
+
468
+ // Apply pagination
469
+ const start = page * perPage;
470
+ const messages = allMessages.slice(start, start + perPage);
471
+
472
+ return {
473
+ messages,
474
+ total: allMessages.length,
475
+ page,
476
+ perPage,
477
+ hasMore: allMessages.length > (page + 1) * perPage,
478
+ };
479
+ }
480
+
481
+ /**
482
+ * Persist the current storage state to a JSON file
483
+ */
484
+ async persist(filePath: string): Promise<void> {
485
+ if (this.mode === `read`) return;
486
+
487
+ const data: Record<string, any> = {};
488
+
489
+ // Convert Maps to arrays for JSON serialization
490
+ for (const [tableName, tableData] of Object.entries(this.data)) {
491
+ data[tableName] = Array.from(tableData.entries());
492
+ }
493
+
494
+ await writeFile(filePath, JSON.stringify(data, null, 2));
495
+ }
496
+
497
+ /**
498
+ * Hydrate storage state from a JSON file
499
+ */
500
+ async hydrate(filePath: string): Promise<void> {
501
+ if (!existsSync(filePath)) {
502
+ throw new Error(`Storage file not found: ${filePath}`);
503
+ }
504
+
505
+ const content = await readFile(filePath, 'utf-8');
506
+ let data;
507
+ try {
508
+ data = JSON.parse(content);
509
+ } catch (error) {
510
+ console.error(`Failed to parse JSON from ${filePath}. File size: ${content.length} bytes`);
511
+ if (error instanceof SyntaxError && error.message.includes('position')) {
512
+ // Try to find the problematic area
513
+ const match = error.message.match(/position (\d+)/);
514
+ if (match) {
515
+ const position = parseInt(match[1]);
516
+ const start = Math.max(0, position - 100);
517
+ const end = Math.min(content.length, position + 100);
518
+ console.error(`Content around error position ${position}:`);
519
+ console.error(content.substring(start, end));
520
+ }
521
+ }
522
+ throw error;
523
+ }
524
+
525
+ // Convert arrays back to Maps
526
+ for (const [tableName, tableData] of Object.entries(data)) {
527
+ this.data[tableName as TABLE_NAMES] = new Map(tableData as any);
528
+ }
529
+ }
530
+
531
+ /**
532
+ * Clear all data and start fresh
533
+ */
534
+ async clear(): Promise<void> {
535
+ if (this.mode === `read`) return;
536
+ for (const table of Object.values(this.data)) {
537
+ table.clear();
538
+ }
539
+ }
540
+ }