@mastra/longmemeval 0.0.0-add-libsql-changeset-20250910154739

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/CHANGELOG.md +919 -0
  2. package/DATA_DOWNLOAD_GUIDE.md +117 -0
  3. package/LICENSE.md +15 -0
  4. package/README.md +173 -0
  5. package/USAGE.md +105 -0
  6. package/package.json +67 -0
  7. package/scripts/download.ts +180 -0
  8. package/scripts/find-failed.ts +176 -0
  9. package/scripts/generate-embeddings.ts +56 -0
  10. package/scripts/generate-wm-templates.ts +296 -0
  11. package/scripts/setup.ts +60 -0
  12. package/src/__fixtures__/embeddings.json +2319 -0
  13. package/src/__fixtures__/test-dataset.json +82 -0
  14. package/src/cli.ts +690 -0
  15. package/src/commands/__tests__/prepare.test.ts +230 -0
  16. package/src/commands/__tests__/run.test.ts +403 -0
  17. package/src/commands/prepare.ts +793 -0
  18. package/src/commands/run.ts +553 -0
  19. package/src/config.ts +83 -0
  20. package/src/data/loader.ts +163 -0
  21. package/src/data/types.ts +61 -0
  22. package/src/embeddings/cached-openai-embedding-model.ts +227 -0
  23. package/src/embeddings/cached-openai-provider.ts +40 -0
  24. package/src/embeddings/index.ts +2 -0
  25. package/src/evaluation/__tests__/longmemeval-metric.test.ts +169 -0
  26. package/src/evaluation/longmemeval-metric.ts +173 -0
  27. package/src/retry-model.ts +60 -0
  28. package/src/storage/__tests__/benchmark-store.test.ts +280 -0
  29. package/src/storage/__tests__/benchmark-vector.test.ts +214 -0
  30. package/src/storage/benchmark-store.ts +540 -0
  31. package/src/storage/benchmark-vector.ts +234 -0
  32. package/src/storage/index.ts +2 -0
  33. package/src/test-utils/mock-embeddings.ts +54 -0
  34. package/src/test-utils/mock-model.ts +49 -0
  35. package/tests/data-loader.test.ts +96 -0
  36. package/tsconfig.json +18 -0
  37. package/vitest.config.ts +9 -0
@@ -0,0 +1,793 @@
1
+ import { Agent } from '@mastra/core/agent';
2
+ import { Memory } from '@mastra/memory';
3
+ import { MockLanguageModelV1 } from '../test-utils/mock-model';
4
+ import { openai } from '@ai-sdk/openai';
5
+ import { cachedOpenAI } from '../embeddings/cached-openai-provider';
6
+ import { embeddingCacheStats } from '../embeddings';
7
+ import chalk from 'chalk';
8
+ import ora from 'ora';
9
+ import { join } from 'path';
10
+ import { mkdir, writeFile, readFile, unlink } from 'fs/promises';
11
+ import { existsSync } from 'fs';
12
+
13
+ import { DatasetLoader } from '../data/loader';
14
+ import { BenchmarkStore, BenchmarkVectorStore } from '../storage';
15
+ import type { LongMemEvalQuestion, MemoryConfigOptions, MemoryConfigType } from '../data/types';
16
+ import type { CoreMessage } from 'ai';
17
+
18
+ import { getMemoryOptions } from '../config';
19
+ import { makeRetryModel } from '../retry-model';
20
+
21
+ const retry4o = makeRetryModel(openai('gpt-4o'));
22
+
23
+ export interface PrepareOptions {
24
+ dataset: 'longmemeval_s' | 'longmemeval_m' | 'longmemeval_oracle';
25
+ memoryConfig: MemoryConfigType;
26
+ outputDir?: string;
27
+ subset?: number;
28
+ concurrency?: number;
29
+ questionId?: string;
30
+ resumeFromMessageId?: string;
31
+ sessionLimit?: number;
32
+ sessionOffset?: number;
33
+ }
34
+
35
+ export class PrepareCommand {
36
+ private loader: DatasetLoader;
37
+ private baseDir: string;
38
+
39
+ constructor() {
40
+ this.loader = new DatasetLoader();
41
+ this.baseDir = './prepared-data';
42
+ }
43
+
44
+ async run(options: PrepareOptions): Promise<void> {
45
+ console.log(chalk.blue('\n🔧 Preparing LongMemEval Data\n'));
46
+
47
+ // Reset embedding cache statistics for this run
48
+ embeddingCacheStats.reset();
49
+
50
+ // Load dataset
51
+ const spinner = ora('Loading dataset...').start();
52
+ const questions = await this.loader.loadDataset(options.dataset);
53
+ spinner.succeed(`Loaded ${questions.length} questions`);
54
+
55
+ // Load working memory templates if using tailored working memory
56
+ let wmTemplates: Record<string, any> = {};
57
+ const usesTailoredWorkingMemory =
58
+ options.memoryConfig === 'working-memory-tailored' || options.memoryConfig === 'combined-tailored';
59
+ if (usesTailoredWorkingMemory) {
60
+ const templatePath = join(this.baseDir, 'wm-templates', `${options.dataset}.json`);
61
+ if (existsSync(templatePath)) {
62
+ try {
63
+ wmTemplates = JSON.parse(await readFile(templatePath, 'utf-8'));
64
+ console.log(chalk.green(`✓ Loaded ${Object.keys(wmTemplates).length} working memory templates`));
65
+ } catch (e) {
66
+ console.log(chalk.yellow('⚠️ Could not load working memory templates, using default'));
67
+ }
68
+ } else {
69
+ console.log(chalk.yellow('⚠️ No working memory templates found, using default'));
70
+ console.log(chalk.gray('Run "pnpm generate-wm-templates" to generate them'));
71
+ }
72
+ }
73
+
74
+ // Filter by questionId if specified
75
+ let questionsToProcess = questions;
76
+ if (options.questionId) {
77
+ questionsToProcess = questions.filter(q => q.question_id === options.questionId);
78
+ if (questionsToProcess.length === 0) {
79
+ throw new Error(`Question with ID "${options.questionId}" not found in dataset`);
80
+ }
81
+ console.log(chalk.yellow(`\nFocusing on question: ${options.questionId}\n`));
82
+ } else if (options.subset) {
83
+ // Apply subset if requested
84
+ questionsToProcess = questions.slice(0, options.subset);
85
+ }
86
+
87
+ console.log(
88
+ chalk.yellow(`\nProcessing ${questionsToProcess.length} question${questionsToProcess.length !== 1 ? 's' : ''}\n`),
89
+ );
90
+
91
+ // Get memory configuration
92
+ const memoryOptions = getMemoryOptions(options.memoryConfig);
93
+
94
+ // Use real model for working memory, mock for others
95
+ const needsRealModel =
96
+ options.memoryConfig === 'working-memory' ||
97
+ options.memoryConfig === 'working-memory-tailored' ||
98
+ options.memoryConfig === 'combined' ||
99
+ options.memoryConfig === 'combined-tailored';
100
+
101
+ if (needsRealModel && !process.env.OPENAI_API_KEY) {
102
+ throw new Error('OPENAI_API_KEY is required for working memory preparation');
103
+ }
104
+
105
+ const model = needsRealModel
106
+ ? retry4o.model
107
+ : new MockLanguageModelV1({
108
+ doGenerate: async () => ({
109
+ rawCall: { rawPrompt: null, rawSettings: {} },
110
+ finishReason: 'stop',
111
+ usage: { promptTokens: 10, completionTokens: 20 },
112
+ }),
113
+ });
114
+
115
+ // Track active questions progress
116
+ const activeQuestions = new Map<
117
+ number,
118
+ { questionId: string; status: string; totalSessions?: number; processedSessions?: number; questionType?: string }
119
+ >();
120
+
121
+ // Create main progress spinner
122
+ const mainSpinner = ora('Starting data preparation...').start();
123
+
124
+ let processedCount = 0;
125
+ let cachedCount = 0;
126
+ let completedCount = 0;
127
+ let inProgressCount = 0;
128
+ const startTime = Date.now();
129
+
130
+ // Determine question batch size based on config
131
+ const questionConcurrency = options.concurrency || 10; // Allow concurrency for all configs
132
+
133
+ console.log(chalk.gray(`Question concurrency: ${questionConcurrency}`));
134
+
135
+ // Warn about working memory concurrency
136
+ if ((options.memoryConfig === 'working-memory' || options.memoryConfig === 'combined') && questionConcurrency > 1) {
137
+ console.log(
138
+ chalk.yellow(
139
+ `⚠️ Note: Running working memory questions concurrently. Each question has its own resource scope.`,
140
+ ),
141
+ );
142
+ }
143
+
144
+ let lastText = ``;
145
+ // Function to update progress display
146
+ const updateProgress = () => {
147
+ const elapsed = Math.round((Date.now() - startTime) / 1000);
148
+ const rate = elapsed > 0 ? completedCount / elapsed : 0;
149
+ const remaining = rate > 0 ? Math.round((questionsToProcess.length - completedCount) / rate) : 0;
150
+
151
+ // Build progress text with active questions
152
+ let progressText = `Overall: ${completedCount}/${questionsToProcess.length} (${inProgressCount} in progress, ${cachedCount} cached, ~${remaining}s remaining)`;
153
+
154
+ // Add embedding cache stats if available
155
+ const totalEmbeddingOps = embeddingCacheStats.cacheHits + embeddingCacheStats.cacheMisses;
156
+ if (totalEmbeddingOps > 0) {
157
+ const hitRate = embeddingCacheStats.cacheHits / totalEmbeddingOps;
158
+ progressText += `\nEmbedding cache: ${embeddingCacheStats.cacheHits} hits, ${embeddingCacheStats.cacheMisses} misses (${(hitRate * 100).toFixed(1)}% hit rate)`;
159
+ }
160
+
161
+ progressText += `\nRate limit count: ${retry4o.state.rateLimitCount}`;
162
+ if (retry4o.state.pauseTime > 0 && retry4o.state.pause)
163
+ progressText += ` (paused, waiting for ${retry4o.state.pauseTime}ms)`;
164
+
165
+ if (activeQuestions.size > 0) {
166
+ progressText += '\n\nActive questions:';
167
+
168
+ // Sort active questions by completion percentage
169
+ const sortedQuestions = Array.from(activeQuestions.entries())
170
+ .map(([index, info]) => {
171
+ const progress =
172
+ info.processedSessions && info.totalSessions ? info.processedSessions / info.totalSessions : 0;
173
+ return { index, info, progress };
174
+ })
175
+ .sort((a, b) => b.progress - a.progress); // Sort by most complete first
176
+
177
+ sortedQuestions.forEach(({ info, progress }) => {
178
+ const percentage = (progress * 100).toFixed(0);
179
+ progressText += `\n ${info.status} (${percentage}%) ${chalk.grey(info.questionType || '')}`;
180
+ });
181
+ }
182
+
183
+ if (lastText !== progressText) {
184
+ lastText = progressText;
185
+ mainSpinner.text = progressText;
186
+ }
187
+ };
188
+
189
+ // Create a queue of questions to process
190
+ const questionQueue = [...questionsToProcess];
191
+ let questionIndex = 0;
192
+
193
+ // Function to process next question from queue
194
+ const processNextQuestion = async (slotIndex: number): Promise<void> => {
195
+ while (questionQueue.length > 0) {
196
+ const question = questionQueue.shift();
197
+ if (!question) break;
198
+
199
+ const currentIndex = questionIndex++;
200
+
201
+ // Check if already prepared
202
+ const questionDir = join(
203
+ options.outputDir || this.baseDir,
204
+ options.dataset,
205
+ options.memoryConfig,
206
+ question.question_id,
207
+ );
208
+
209
+ // Check if question has failed previously
210
+ const progressPath = join(questionDir, 'progress.json');
211
+ if (existsSync(progressPath)) {
212
+ try {
213
+ const progress = JSON.parse(await readFile(progressPath, 'utf-8'));
214
+ if (progress.failed) {
215
+ // Retry failed questions
216
+ mainSpinner.clear();
217
+ console.log(
218
+ chalk.yellow(`↻`),
219
+ chalk.blue(`${question.question_id}`),
220
+ chalk.gray(`(${question.question_type})`),
221
+ chalk.yellow(`[retrying previously failed]`),
222
+ );
223
+ mainSpinner.render();
224
+
225
+ // Delete the failed progress file to start fresh
226
+ await unlink(progressPath);
227
+
228
+ // Continue processing this question normally (don't skip)
229
+ }
230
+ } catch (e) {
231
+ // If we can't read progress, continue with normal processing
232
+ }
233
+ }
234
+
235
+ // Skip cache check if we're resuming from a specific message
236
+ if (!options.resumeFromMessageId && existsSync(join(questionDir, 'meta.json'))) {
237
+ cachedCount++;
238
+ completedCount++;
239
+
240
+ mainSpinner.clear();
241
+ console.log(
242
+ chalk.green(`✓`),
243
+ chalk.blue(`${question.question_id}`),
244
+ chalk.gray(`(${question.question_type})`),
245
+ chalk.yellow(`[cached]`),
246
+ chalk.gray(`- ${completedCount}/${questionsToProcess.length}`),
247
+ );
248
+ mainSpinner.render();
249
+
250
+ // Update progress
251
+ updateProgress();
252
+
253
+ // Continue to next question
254
+ continue;
255
+ }
256
+
257
+ // Mark as in progress
258
+ inProgressCount++;
259
+ activeQuestions.set(slotIndex, { questionId: question.question_id, status: 'Starting...' });
260
+ updateProgress();
261
+
262
+ try {
263
+ await this.processQuestion(
264
+ question,
265
+ options,
266
+ model,
267
+ memoryOptions,
268
+ true,
269
+ slotIndex,
270
+ activeQuestions,
271
+ wmTemplates,
272
+ );
273
+
274
+ // Mark as completed
275
+ inProgressCount--;
276
+ processedCount++;
277
+ completedCount++;
278
+
279
+ // Remove from active questions
280
+ activeQuestions.delete(slotIndex);
281
+
282
+ mainSpinner.clear();
283
+ console.log(
284
+ chalk.green(`✓`),
285
+ chalk.blue(`${question.question_id}`),
286
+ chalk.gray(`(${question.question_type})`),
287
+ chalk.gray(`${question.haystack_sessions.length} sessions`),
288
+ chalk.gray(`- ${completedCount}/${questionsToProcess.length}`),
289
+ );
290
+ mainSpinner.render();
291
+ } catch (error) {
292
+ // Check if this is a rate limit error
293
+ const errorMessage = error instanceof Error ? error.message : String(error);
294
+ const isRateLimitError =
295
+ errorMessage.includes('Rate limit') ||
296
+ errorMessage.includes('rate limit') ||
297
+ errorMessage.includes('RPM') ||
298
+ errorMessage.includes('TPM') ||
299
+ errorMessage.includes('429');
300
+
301
+ if (isRateLimitError) {
302
+ // Don't mark as failed for rate limits - just skip this run
303
+ inProgressCount--;
304
+
305
+ // Remove from active questions
306
+ activeQuestions.delete(slotIndex);
307
+
308
+ mainSpinner.clear();
309
+ console.log(
310
+ chalk.yellow(`⏸`),
311
+ chalk.blue(`${question.question_id}`),
312
+ chalk.gray(`(${question.question_type})`),
313
+ chalk.yellow(`Rate limited - will retry later`),
314
+ chalk.gray(`- ${completedCount}/${questionsToProcess.length}`),
315
+ );
316
+ mainSpinner.render();
317
+
318
+ // Re-add to the end of the queue to retry later
319
+ questionQueue.push(question);
320
+
321
+ // Add a small delay to help with rate limiting
322
+ await new Promise(resolve => setTimeout(resolve, 1000)); // Wait 1 second
323
+ } else {
324
+ // Mark as completed but failed for non-rate-limit errors
325
+ inProgressCount--;
326
+ completedCount++;
327
+
328
+ // Remove from active questions
329
+ activeQuestions.delete(slotIndex);
330
+
331
+ mainSpinner.clear();
332
+ console.log(
333
+ chalk.red(`✗`),
334
+ chalk.blue(`${question.question_id}`),
335
+ chalk.gray(`(${question.question_type})`),
336
+ chalk.red(`Failed: ${errorMessage}`),
337
+ chalk.gray(`- ${completedCount}/${questionsToProcess.length}`),
338
+ );
339
+ mainSpinner.render();
340
+
341
+ // Save error state to progress file
342
+ const questionDir = join(
343
+ options.outputDir || this.baseDir,
344
+ options.dataset,
345
+ options.memoryConfig,
346
+ question.question_id,
347
+ );
348
+ const progressFile = join(questionDir, 'progress.json');
349
+
350
+ try {
351
+ await mkdir(questionDir, { recursive: true });
352
+
353
+ // Try to load existing progress if available
354
+ let existingProgress = { processedSessionIds: [] };
355
+ if (existsSync(progressFile)) {
356
+ existingProgress = JSON.parse(await readFile(progressFile, 'utf-8'));
357
+ }
358
+
359
+ await writeFile(
360
+ progressFile,
361
+ JSON.stringify(
362
+ {
363
+ processedSessionIds: existingProgress.processedSessionIds || [],
364
+ completed: true,
365
+ failed: true,
366
+ error: errorMessage,
367
+ failedAt: new Date().toISOString(),
368
+ },
369
+ null,
370
+ 2,
371
+ ),
372
+ );
373
+ } catch (saveError) {
374
+ console.error(chalk.red(`Failed to save error state: ${saveError}`));
375
+ }
376
+ }
377
+ }
378
+
379
+ updateProgress();
380
+ }
381
+ };
382
+
383
+ const progressInterval = setInterval(updateProgress, 500);
384
+ const workers = Array.from({ length: questionConcurrency }, (_, i) => processNextQuestion(i));
385
+ await Promise.all(workers);
386
+ clearInterval(progressInterval);
387
+ updateProgress();
388
+
389
+ mainSpinner.succeed(`Prepared ${processedCount} questions (${cachedCount} from cache)`);
390
+ const totalTime = Math.round((Date.now() - startTime) / 1000);
391
+ console.log(chalk.gray(`Total time: ${totalTime}s (${Math.round((processedCount / totalTime) * 60)} q/min)`));
392
+
393
+ // Display embedding cache statistics if any embeddings were processed
394
+ const totalEmbeddingOps = embeddingCacheStats.cacheHits + embeddingCacheStats.cacheMisses;
395
+ if (totalEmbeddingOps > 0) {
396
+ const hitRate = embeddingCacheStats.cacheHits / totalEmbeddingOps;
397
+ console.log(
398
+ chalk.gray(
399
+ `Embedding cache: ${embeddingCacheStats.cacheHits} hits, ${embeddingCacheStats.cacheMisses} misses, ${embeddingCacheStats.cacheWrites} writes (${(hitRate * 100).toFixed(1)}% hit rate)`,
400
+ ),
401
+ );
402
+ }
403
+
404
+ console.log(chalk.green('\n✅ Data preparation complete!\n'));
405
+ console.log(chalk.gray(`Prepared data saved to: ${this.baseDir}/${options.dataset}/${options.memoryConfig}/`));
406
+ }
407
+
408
+ private async processQuestion(
409
+ question: LongMemEvalQuestion,
410
+ options: PrepareOptions,
411
+ model: any,
412
+ memoryOptions: MemoryConfigOptions,
413
+ isConcurrent: boolean = false,
414
+ slotIndex?: number,
415
+ activeQuestions?: Map<
416
+ number,
417
+ { questionId: string; status: string; totalSessions?: number; processedSessions?: number; questionType?: string }
418
+ >,
419
+ wmTemplates?: Record<string, any>,
420
+ ): Promise<void> {
421
+ // Create fresh storage instances for this question
422
+ const benchmarkStore = new BenchmarkStore();
423
+ const benchmarkVectorStore = new BenchmarkVectorStore();
424
+
425
+ // Initialize stores
426
+ await benchmarkStore.init();
427
+
428
+ // Create vector index if using semantic recall
429
+ if (options.memoryConfig === 'semantic-recall' || options.memoryConfig.includes('combined')) {
430
+ await benchmarkVectorStore.createIndex({
431
+ indexName: 'memory_messages',
432
+ dimension: 1536, // text-embedding-3-small dimension
433
+ metric: 'cosine',
434
+ });
435
+ }
436
+
437
+ const usesWorkingMemory =
438
+ options.memoryConfig === 'working-memory' ||
439
+ options.memoryConfig === 'working-memory-tailored' ||
440
+ options.memoryConfig === 'combined' ||
441
+ options.memoryConfig === 'combined-tailored';
442
+ const usesTailoredTemplate =
443
+ options.memoryConfig === 'working-memory-tailored' || options.memoryConfig === 'combined-tailored';
444
+
445
+ // Working memory must run one session (thread) at a time, in order
446
+ // otherwise the data will not be accurate as working memory is meant
447
+ // to build up over time, using the previous working memory state to create the next.
448
+ if (usesWorkingMemory) isConcurrent = false;
449
+
450
+ // Use custom template if available for tailored configs
451
+ if (usesTailoredTemplate && wmTemplates && wmTemplates[question.question_id]) {
452
+ memoryOptions.options.workingMemory = {
453
+ enabled: true,
454
+ template: wmTemplates[question.question_id].template,
455
+ scope: 'resource',
456
+ };
457
+ // if (!isConcurrent) {
458
+ // console.log(chalk.cyan(' Using tailored working memory template'));
459
+ // }
460
+ }
461
+
462
+ // Create memory with appropriate configuration
463
+ const memory = new Memory({
464
+ storage: benchmarkStore,
465
+ vector:
466
+ options.memoryConfig === 'semantic-recall' || options.memoryConfig.includes('combined')
467
+ ? benchmarkVectorStore
468
+ : undefined,
469
+ embedder:
470
+ options.memoryConfig === 'semantic-recall' || options.memoryConfig.includes('combined')
471
+ ? cachedOpenAI.embedding('text-embedding-3-small')
472
+ : undefined,
473
+ options: memoryOptions.options,
474
+ });
475
+
476
+ // Create agent with appropriate model
477
+ const agent = new Agent({
478
+ name: 'prep-agent',
479
+ instructions:
480
+ "You are a helpful assistant. Process and store conversation history. Only store working memory information if it's in the template. Other information is not relevant",
481
+ model: model,
482
+ memory: memory,
483
+ });
484
+
485
+ // Process all haystack sessions
486
+ const resourceId = `resource_${question.question_id}`;
487
+
488
+ // Sort sessions by date for chronological processing (important for working memory)
489
+ const sessionsWithDates = question.haystack_sessions.map((session, index) => ({
490
+ session,
491
+ sessionId: question.haystack_session_ids[index],
492
+ date: question.haystack_dates[index],
493
+ }));
494
+
495
+ // Sort by date (oldest first)
496
+ sessionsWithDates.sort((a, b) => new Date(a.date).getTime() - new Date(b.date).getTime());
497
+
498
+ // Debug: Log first and last dates to confirm sorting
499
+ if (sessionsWithDates.length > 0 && !isConcurrent) {
500
+ // const firstDate = new Date(sessionsWithDates[0].date).toISOString().split('T')[0];
501
+ // const lastDate = new Date(sessionsWithDates[sessionsWithDates.length - 1].date).toISOString().split('T')[0];
502
+ // console.log(chalk.gray(` Sessions sorted: ${firstDate} (oldest) → ${lastDate} (newest)`));
503
+ }
504
+
505
+ // Create output directory early to save progress
506
+ const questionDir = join(
507
+ options.outputDir || this.baseDir,
508
+ options.dataset,
509
+ options.memoryConfig,
510
+ question.question_id,
511
+ );
512
+ await mkdir(questionDir, { recursive: true });
513
+
514
+ // Check if this question has partial progress saved
515
+ const progressFile = join(questionDir, 'progress.json');
516
+ let processedSessionIds: Set<string> = new Set();
517
+
518
+ // Always try to load existing db.json if it exists (for resume scenarios)
519
+ const dbPath = join(questionDir, 'db.json');
520
+ const vectorPath = join(questionDir, 'vector.json');
521
+
522
+ if (existsSync(dbPath)) {
523
+ // console.log(chalk.gray('Loading existing database...'));
524
+ await benchmarkStore.hydrate(dbPath);
525
+ }
526
+
527
+ if (
528
+ existsSync(vectorPath) &&
529
+ (options.memoryConfig === 'semantic-recall' || options.memoryConfig.includes('combined'))
530
+ ) {
531
+ // console.log(chalk.gray('Loading existing vector store...'));
532
+ await benchmarkVectorStore.hydrate(vectorPath);
533
+ }
534
+
535
+ if (existsSync(progressFile)) {
536
+ try {
537
+ const progress = JSON.parse(await readFile(progressFile, 'utf-8'));
538
+ processedSessionIds = new Set(progress.processedSessionIds || []);
539
+
540
+ if (slotIndex !== undefined && activeQuestions) {
541
+ activeQuestions.set(slotIndex, {
542
+ questionId: question.question_id,
543
+ status: `Resuming from session ${processedSessionIds.size}/${sessionsWithDates.length}`,
544
+ });
545
+ }
546
+ } catch (e) {
547
+ console.log(chalk.red(`Failed to load progress for ${question.question_id}:`));
548
+ console.error(e);
549
+ if (options.resumeFromMessageId) {
550
+ console.log(chalk.red(`Cannot resume without valid progress data. Exiting.`));
551
+ process.exit(1);
552
+ }
553
+ processedSessionIds = new Set();
554
+ }
555
+ }
556
+
557
+ // Process sessions in batches to avoid overwhelming the system
558
+ const BATCH_SIZE = usesWorkingMemory ? 1 : 50; // Process x sessions at a time. working memory must run one at a time since each conversation will use resource working memory from the last conversation and build on it.
559
+ let processedSessions = processedSessionIds.size;
560
+
561
+ // Apply session offset if specified
562
+ if (options.sessionOffset && !options.resumeFromMessageId) {
563
+ const offsetIndex = options.sessionOffset - 1; // Convert to 0-based index
564
+ if (offsetIndex >= 0 && offsetIndex < sessionsWithDates.length) {
565
+ console.log(
566
+ chalk.yellow(`\n⏭️ Starting from session ${options.sessionOffset} (skipping first ${offsetIndex} sessions)`),
567
+ );
568
+
569
+ // Mark all sessions before the offset as processed
570
+ for (let i = 0; i < offsetIndex; i++) {
571
+ processedSessionIds.add(sessionsWithDates[i].sessionId);
572
+ }
573
+ processedSessions = processedSessionIds.size;
574
+ } else {
575
+ console.log(
576
+ chalk.red(`✗ Session offset ${options.sessionOffset} is out of range (1-${sessionsWithDates.length})`),
577
+ );
578
+ process.exit(1);
579
+ }
580
+ }
581
+
582
+ // Apply session limit if specified
583
+ let sessionsToProcess = sessionsWithDates;
584
+ if (options.sessionLimit) {
585
+ const startIndex = processedSessionIds.size;
586
+ const endIndex = Math.min(startIndex + options.sessionLimit, sessionsWithDates.length);
587
+ sessionsToProcess = sessionsWithDates.slice(0, endIndex);
588
+ console.log(
589
+ chalk.yellow(`\n📊 Processing limited to ${options.sessionLimit} sessions (${startIndex + 1} to ${endIndex})`),
590
+ );
591
+ }
592
+
593
+ for (let i = 0; i < sessionsToProcess.length; i += BATCH_SIZE) {
594
+ const sessionBatch = sessionsToProcess.slice(i, i + BATCH_SIZE);
595
+
596
+ // Update progress
597
+ if (slotIndex !== undefined && activeQuestions) {
598
+ // Calculate current session index (1-based)
599
+ const currentSessionIndex = processedSessions + 1;
600
+ // Update active questions status
601
+ activeQuestions.set(slotIndex, {
602
+ questionId: question.question_id,
603
+ status: `${chalk.green('->')} preparing ${chalk.blue(question.question_id)}[${chalk.green(currentSessionIndex)}] ${chalk.white(`${processedSessions}/${sessionsToProcess.length} `)}`,
604
+ totalSessions: sessionsToProcess.length,
605
+ processedSessions,
606
+ questionType: question.question_type,
607
+ });
608
+ }
609
+
610
+ // Process batch in parallel
611
+ const batchPromises = sessionBatch.map(async ({ session, sessionId }) => {
612
+ // Skip if already processed
613
+ if (processedSessionIds.has(sessionId)) {
614
+ return;
615
+ }
616
+
617
+ // Convert session to messages
618
+ const messages: CoreMessage[] = [];
619
+ for (const turn of session) {
620
+ if (!turn.content) continue;
621
+
622
+ const role = turn.role === 'user' || turn.role === 'assistant' ? turn.role : 'user';
623
+ messages.push({
624
+ role,
625
+ content: turn.content,
626
+ });
627
+ }
628
+
629
+ if (messages.length > 0) {
630
+ // Process through agent to save to memory
631
+ try {
632
+ await agent.generate(messages, {
633
+ threadId: sessionId, // Use haystack session ID as thread ID
634
+ resourceId,
635
+ memoryOptions: memoryOptions.options,
636
+ temperature: 0.3,
637
+ frequencyPenalty: 0.3,
638
+ });
639
+ } catch (error) {
640
+ console.error(`Error in agent.generate for ${question.question_id}, session ${sessionId}:`, error);
641
+ throw error;
642
+ }
643
+ }
644
+
645
+ // Mark as processed
646
+ processedSessionIds.add(sessionId);
647
+
648
+ // Save progress after each session if using working memory
649
+ if (usesWorkingMemory) {
650
+ await writeFile(
651
+ progressFile,
652
+ JSON.stringify({
653
+ processedSessionIds: Array.from(processedSessionIds),
654
+ lastSavedDb: 'db.json',
655
+ lastSavedVector: 'vector.json',
656
+ }),
657
+ );
658
+
659
+ // Persist current state
660
+ await benchmarkStore.persist(join(questionDir, 'db.json'));
661
+ if (options.memoryConfig === 'semantic-recall' || options.memoryConfig.includes('combined')) {
662
+ await benchmarkVectorStore.persist(join(questionDir, 'vector.json'));
663
+ }
664
+ }
665
+ });
666
+
667
+ await Promise.all(batchPromises);
668
+
669
+ // Fix dates for newly processed sessions
670
+ const newlyProcessedSessions = sessionBatch.filter(s => processedSessionIds.has(s.sessionId));
671
+ if (newlyProcessedSessions.length > 0) {
672
+ await this.fixSessionDates(questionDir, newlyProcessedSessions, benchmarkStore);
673
+ }
674
+
675
+ // Update processed count based on actual processed sessions
676
+ processedSessions = processedSessionIds.size;
677
+
678
+ // Update progress after batch completes
679
+ if (slotIndex !== undefined && activeQuestions) {
680
+ // Calculate current session index (1-based)
681
+ const currentSessionIndex = processedSessions + 1;
682
+ activeQuestions.set(slotIndex, {
683
+ questionId: question.question_id,
684
+ status: `session ${currentSessionIndex} (${processedSessions}/${sessionsToProcess.length} total)`,
685
+ });
686
+ }
687
+ }
688
+
689
+ // Update status to saving
690
+ if (slotIndex !== undefined && activeQuestions) {
691
+ activeQuestions.set(slotIndex, {
692
+ questionId: question.question_id,
693
+ status: 'Saving data...',
694
+ });
695
+ }
696
+
697
+ // Persist storage
698
+ await benchmarkStore.persist(join(questionDir, 'db.json'));
699
+
700
+ // Persist vector store if used
701
+ if (options.memoryConfig === 'semantic-recall' || options.memoryConfig.includes('combined')) {
702
+ await benchmarkVectorStore.persist(join(questionDir, 'vector.json'));
703
+ }
704
+
705
+ // Save metadata
706
+ const metadata = {
707
+ questionId: question.question_id,
708
+ questionType: question.question_type,
709
+ question: question.question,
710
+ answer: question.answer,
711
+ questionDate: question.question_date,
712
+ resourceId,
713
+ threadIds: question.haystack_session_ids,
714
+ preparedAt: new Date().toISOString(),
715
+ memoryConfig: options.memoryConfig,
716
+ sessionCount: sessionsWithDates.length,
717
+ evidenceSessionIds: question.answer_session_ids,
718
+ note: 'Sessions were processed in chronological order (oldest first) for working memory',
719
+ };
720
+
721
+ await writeFile(join(questionDir, 'meta.json'), JSON.stringify(metadata, null, 2));
722
+
723
+ // Clean up progress file after successful completion
724
+ if (existsSync(progressFile)) {
725
+ await writeFile(
726
+ progressFile,
727
+ JSON.stringify({
728
+ processedSessionIds: Array.from(processedSessionIds),
729
+ completed: true,
730
+ completedAt: new Date().toISOString(),
731
+ }),
732
+ );
733
+ }
734
+ }
735
+
736
+ private async fixSessionDates(
737
+ questionDir: string,
738
+ sessionBatch: Array<{ session: any; sessionId: string; date: string }>,
739
+ benchmarkStore: BenchmarkStore,
740
+ ): Promise<void> {
741
+ // Save current state to temp file
742
+ const tempPath = join(questionDir, 'temp_db.json');
743
+ await benchmarkStore.persist(tempPath);
744
+
745
+ // Read and modify the data
746
+ const data = JSON.parse(await readFile(tempPath, 'utf-8'));
747
+
748
+ // Fix dates for each session in the batch
749
+ for (const { sessionId, date } of sessionBatch) {
750
+ const sessionDate = new Date(date);
751
+
752
+ // Get messages for this session
753
+ const sessionMessages: Array<[string, any]> = [];
754
+ if (data.mastra_messages) {
755
+ for (const [key, message] of data.mastra_messages) {
756
+ if (message.threadId === sessionId) {
757
+ sessionMessages.push([key, message]);
758
+ }
759
+ }
760
+ }
761
+
762
+ // Sort messages by their current createdAt to maintain order
763
+ sessionMessages.sort((a, b) => new Date(a[1].createdAt).getTime() - new Date(b[1].createdAt).getTime());
764
+
765
+ // Update each message's date
766
+ sessionMessages.forEach(([_key, message], idx) => {
767
+ // Add 5 minutes for each message in the conversation
768
+ const messageDate = new Date(sessionDate.getTime() + idx * 5 * 60 * 1000);
769
+ message.createdAt = messageDate.toISOString();
770
+ message.updatedAt = messageDate.toISOString();
771
+ });
772
+
773
+ // Update thread dates
774
+ if (data.mastra_threads) {
775
+ for (const [threadId, thread] of data.mastra_threads) {
776
+ if (threadId === sessionId) {
777
+ thread.createdAt = sessionDate.toISOString();
778
+ thread.updatedAt = sessionDate.toISOString();
779
+ }
780
+ }
781
+ }
782
+ }
783
+
784
+ // Write back the modified data
785
+ await writeFile(tempPath, JSON.stringify(data, null, 2));
786
+
787
+ // Reload the modified data into the store
788
+ await benchmarkStore.hydrate(tempPath);
789
+
790
+ // Clean up temp file
791
+ await unlink(tempPath);
792
+ }
793
+ }