@mastra/longmemeval 0.0.0-add-libsql-changeset-20250910154739

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/CHANGELOG.md +919 -0
  2. package/DATA_DOWNLOAD_GUIDE.md +117 -0
  3. package/LICENSE.md +15 -0
  4. package/README.md +173 -0
  5. package/USAGE.md +105 -0
  6. package/package.json +67 -0
  7. package/scripts/download.ts +180 -0
  8. package/scripts/find-failed.ts +176 -0
  9. package/scripts/generate-embeddings.ts +56 -0
  10. package/scripts/generate-wm-templates.ts +296 -0
  11. package/scripts/setup.ts +60 -0
  12. package/src/__fixtures__/embeddings.json +2319 -0
  13. package/src/__fixtures__/test-dataset.json +82 -0
  14. package/src/cli.ts +690 -0
  15. package/src/commands/__tests__/prepare.test.ts +230 -0
  16. package/src/commands/__tests__/run.test.ts +403 -0
  17. package/src/commands/prepare.ts +793 -0
  18. package/src/commands/run.ts +553 -0
  19. package/src/config.ts +83 -0
  20. package/src/data/loader.ts +163 -0
  21. package/src/data/types.ts +61 -0
  22. package/src/embeddings/cached-openai-embedding-model.ts +227 -0
  23. package/src/embeddings/cached-openai-provider.ts +40 -0
  24. package/src/embeddings/index.ts +2 -0
  25. package/src/evaluation/__tests__/longmemeval-metric.test.ts +169 -0
  26. package/src/evaluation/longmemeval-metric.ts +173 -0
  27. package/src/retry-model.ts +60 -0
  28. package/src/storage/__tests__/benchmark-store.test.ts +280 -0
  29. package/src/storage/__tests__/benchmark-vector.test.ts +214 -0
  30. package/src/storage/benchmark-store.ts +540 -0
  31. package/src/storage/benchmark-vector.ts +234 -0
  32. package/src/storage/index.ts +2 -0
  33. package/src/test-utils/mock-embeddings.ts +54 -0
  34. package/src/test-utils/mock-model.ts +49 -0
  35. package/tests/data-loader.test.ts +96 -0
  36. package/tsconfig.json +18 -0
  37. package/vitest.config.ts +9 -0
@@ -0,0 +1,230 @@
1
+ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
2
+ import { PrepareCommand } from '../prepare';
3
+ import { rm, readFile } from 'fs/promises';
4
+ import { existsSync } from 'fs';
5
+ import { join } from 'path';
6
+ import { tmpdir } from 'os';
7
+ import type { LongMemEvalQuestion } from '../../data/types';
8
+ import { createMockEmbedding } from '../../test-utils/mock-embeddings';
9
+
10
+ // Mock OpenAI embeddings with fixture embeddings
11
+ vi.mock('@ai-sdk/openai', () => ({
12
+ openai: {
13
+ embedding: vi.fn(() => createMockEmbedding()),
14
+ },
15
+ }));
16
+
17
+ // Mock the DatasetLoader
18
+ vi.mock('../../data/loader', () => ({
19
+ DatasetLoader: vi.fn().mockImplementation(() => ({
20
+ loadDataset: vi.fn().mockResolvedValue([
21
+ {
22
+ question_id: 'test-q1',
23
+ question_type: 'single-session-user',
24
+ question: 'What is my favorite color?',
25
+ answer: 'Blue',
26
+ question_date: '2024-01-01',
27
+ haystack_session_ids: ['session-1'],
28
+ haystack_dates: ['2024-01-01'],
29
+ haystack_sessions: [
30
+ [
31
+ { role: 'user', content: 'My favorite color is blue', has_answer: true },
32
+ { role: 'assistant', content: 'I understand your favorite color is blue.' },
33
+ ],
34
+ ],
35
+ answer_session_ids: ['session-1'],
36
+ },
37
+ {
38
+ question_id: 'test-q2',
39
+ question_type: 'multi-session',
40
+ question: 'What did I say about my pet?',
41
+ answer: 'You have a cat named Fluffy',
42
+ question_date: '2024-01-02',
43
+ haystack_session_ids: ['session-2', 'session-3'],
44
+ haystack_dates: ['2024-01-01', '2024-01-02'],
45
+ haystack_sessions: [
46
+ [
47
+ { role: 'user', content: 'I have a pet', has_answer: false },
48
+ { role: 'assistant', content: 'What kind of pet do you have?' },
49
+ ],
50
+ [
51
+ { role: 'user', content: 'It is a cat named Fluffy', has_answer: true },
52
+ { role: 'assistant', content: 'Fluffy is a lovely name for a cat!' },
53
+ ],
54
+ ],
55
+ answer_session_ids: ['session-3'],
56
+ },
57
+ ] as LongMemEvalQuestion[]),
58
+ })),
59
+ }));
60
+
61
+ // Mock chalk and ora to avoid console output in tests
62
+ vi.mock('chalk', () => ({
63
+ default: {
64
+ blue: (str: string) => str,
65
+ yellow: (str: string) => str,
66
+ green: (str: string) => str,
67
+ gray: (str: string) => str,
68
+ },
69
+ }));
70
+
71
+ vi.mock('ora', () => ({
72
+ default: () => ({
73
+ start: vi.fn().mockReturnThis(),
74
+ succeed: vi.fn().mockReturnThis(),
75
+ fail: vi.fn().mockReturnThis(),
76
+ }),
77
+ }));
78
+
79
+ describe('PrepareCommand', () => {
80
+ let command: PrepareCommand;
81
+ let testDir: string;
82
+
83
+ beforeEach(() => {
84
+ command = new PrepareCommand();
85
+ testDir = join(tmpdir(), `prepare-test-${Date.now()}`);
86
+ // Override the base directory
87
+ (command as any).baseDir = testDir;
88
+ });
89
+
90
+ afterEach(async () => {
91
+ // Clean up test directory
92
+ if (existsSync(testDir)) {
93
+ await rm(testDir, { recursive: true });
94
+ }
95
+ });
96
+
97
+ describe('run', () => {
98
+ it('should process questions and save prepared data', async () => {
99
+ await command.run({
100
+ dataset: 'longmemeval_s',
101
+ memoryConfig: 'full-history',
102
+ });
103
+
104
+ // Check that directories were created
105
+ const q1Dir = join(testDir, 'longmemeval_s', 'full-history', 'test-q1');
106
+ const q2Dir = join(testDir, 'longmemeval_s', 'full-history', 'test-q2');
107
+
108
+ expect(existsSync(q1Dir)).toBe(true);
109
+ expect(existsSync(q2Dir)).toBe(true);
110
+
111
+ // Check that files were created
112
+ expect(existsSync(join(q1Dir, 'db.json'))).toBe(true);
113
+ expect(existsSync(join(q1Dir, 'meta.json'))).toBe(true);
114
+ expect(existsSync(join(q2Dir, 'db.json'))).toBe(true);
115
+ expect(existsSync(join(q2Dir, 'meta.json'))).toBe(true);
116
+
117
+ // Check metadata content
118
+ const meta1 = JSON.parse(await readFile(join(q1Dir, 'meta.json'), 'utf-8'));
119
+ expect(meta1.questionId).toBe('test-q1');
120
+ expect(meta1.questionType).toBe('single-session-user');
121
+ expect(meta1.question).toBe('What is my favorite color?');
122
+ expect(meta1.answer).toBe('Blue');
123
+ expect(meta1.resourceId).toBe('resource_test-q1');
124
+ expect(meta1.threadIds).toEqual(['session-1']);
125
+ expect(meta1.memoryConfig).toBe('full-history');
126
+
127
+ const meta2 = JSON.parse(await readFile(join(q2Dir, 'meta.json'), 'utf-8'));
128
+ expect(meta2.questionId).toBe('test-q2');
129
+ expect(meta2.question).toBe('What did I say about my pet?');
130
+ expect(meta2.answer).toBe('You have a cat named Fluffy');
131
+ expect(meta2.threadIds).toEqual(['session-2', 'session-3']);
132
+ });
133
+
134
+ it('should create vector store files for semantic-recall config', async () => {
135
+ await command.run({
136
+ dataset: 'longmemeval_s',
137
+ memoryConfig: 'semantic-recall',
138
+ });
139
+
140
+ const q1Dir = join(testDir, 'longmemeval_s', 'semantic-recall', 'test-q1');
141
+
142
+ // Should have both db.json and vector.json
143
+ expect(existsSync(join(q1Dir, 'db.json'))).toBe(true);
144
+ expect(existsSync(join(q1Dir, 'vector.json'))).toBe(true);
145
+ expect(existsSync(join(q1Dir, 'meta.json'))).toBe(true);
146
+ });
147
+
148
+ it('should process subset of questions when specified', async () => {
149
+ await command.run({
150
+ dataset: 'longmemeval_s',
151
+ memoryConfig: 'last-k',
152
+ subset: 1,
153
+ });
154
+
155
+ const q1Dir = join(testDir, 'longmemeval_s', 'last-k', 'test-q1');
156
+ const q2Dir = join(testDir, 'longmemeval_s', 'last-k', 'test-q2');
157
+
158
+ // Only first question should be processed
159
+ expect(existsSync(q1Dir)).toBe(true);
160
+ expect(existsSync(q2Dir)).toBe(false);
161
+ });
162
+
163
+ it('should use custom output directory when specified', async () => {
164
+ const customDir = join(tmpdir(), `custom-prepare-${Date.now()}`);
165
+
166
+ await command.run({
167
+ dataset: 'longmemeval_s',
168
+ memoryConfig: 'working-memory',
169
+ outputDir: customDir,
170
+ subset: 1,
171
+ });
172
+
173
+ const questionDir = join(customDir, 'longmemeval_s', 'working-memory', 'test-q1');
174
+ expect(existsSync(questionDir)).toBe(true);
175
+
176
+ // Clean up
177
+ await rm(customDir, { recursive: true });
178
+ });
179
+
180
+ it('should handle combined memory config with vector store', async () => {
181
+ await command.run({
182
+ dataset: 'longmemeval_s',
183
+ memoryConfig: 'combined',
184
+ subset: 1,
185
+ });
186
+
187
+ const q1Dir = join(testDir, 'longmemeval_s', 'combined', 'test-q1');
188
+
189
+ // Combined config should have vector store
190
+ expect(existsSync(join(q1Dir, 'db.json'))).toBe(true);
191
+ expect(existsSync(join(q1Dir, 'vector.json'))).toBe(true);
192
+ expect(existsSync(join(q1Dir, 'meta.json'))).toBe(true);
193
+ });
194
+ });
195
+
196
+ describe('getMemoryOptions', () => {
197
+ it('should return correct options for each memory config', () => {
198
+ const fullHistory = (command as any).getMemoryOptions('full-history');
199
+ expect(fullHistory.type).toBe('full-history');
200
+ expect(fullHistory.options.lastMessages).toBe(999999);
201
+ expect(fullHistory.options.semanticRecall).toBe(false);
202
+
203
+ const lastK = (command as any).getMemoryOptions('last-k');
204
+ expect(lastK.type).toBe('last-k');
205
+ expect(lastK.options.lastMessages).toBe(50);
206
+
207
+ const semanticRecall = (command as any).getMemoryOptions('semantic-recall');
208
+ expect(semanticRecall.type).toBe('semantic-recall');
209
+ expect(semanticRecall.options.semanticRecall).toEqual({
210
+ topK: 10,
211
+ messageRange: 2,
212
+ scope: 'resource',
213
+ });
214
+
215
+ const workingMemory = (command as any).getMemoryOptions('working-memory');
216
+ expect(workingMemory.type).toBe('working-memory');
217
+ expect(workingMemory.options.workingMemory.enabled).toBe(true);
218
+ expect(workingMemory.options.workingMemory.template).toContain('User Context');
219
+
220
+ const combined = (command as any).getMemoryOptions('combined');
221
+ expect(combined.type).toBe('combined');
222
+ expect(combined.options.semanticRecall).toBeTruthy();
223
+ expect(combined.options.workingMemory.enabled).toBe(true);
224
+ });
225
+
226
+ it('should throw error for unknown memory config', () => {
227
+ expect(() => (command as any).getMemoryOptions('invalid')).toThrow('Unknown memory config: invalid');
228
+ });
229
+ });
230
+ });
@@ -0,0 +1,403 @@
1
+ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
2
+ import { RunCommand } from '../run';
3
+ import { rm, mkdir, writeFile } from 'fs/promises';
4
+ import { existsSync } from 'fs';
5
+ import { join } from 'path';
6
+ import { tmpdir } from 'os';
7
+ import { createMockEmbedding } from '../../test-utils/mock-embeddings';
8
+
9
+ // Mock OpenAI using vi.hoisted to avoid initialization issues
10
+ const { openaiModel } = vi.hoisted(() => {
11
+ const openaiModel = vi.fn((modelName: string) => ({
12
+ doGenerate: vi.fn().mockResolvedValue({
13
+ rawCall: { rawPrompt: null, rawSettings: {} },
14
+ finishReason: 'stop',
15
+ usage: { promptTokens: 10, completionTokens: 20 },
16
+ text: 'Blue',
17
+ }),
18
+ }));
19
+
20
+ // Add embedding as a property using fixture embeddings
21
+ openaiModel.embedding = vi.fn(() => createMockEmbedding());
22
+
23
+ return { openaiModel };
24
+ });
25
+
26
+ vi.mock('@ai-sdk/openai', () => ({
27
+ openai: openaiModel,
28
+ }));
29
+
30
+ // Mock the LongMemEvalMetric
31
+ vi.mock('../../evaluation/longmemeval-metric', () => ({
32
+ LongMemEvalMetric: vi.fn().mockImplementation(() => ({
33
+ measure: vi.fn().mockResolvedValue({ score: 1 }), // Always returns correct
34
+ })),
35
+ }));
36
+
37
+ // Mock chalk and ora
38
+ vi.mock('chalk', () => ({
39
+ default: {
40
+ blue: (str: string) => str,
41
+ yellow: (str: string) => str,
42
+ green: (str: string) => str,
43
+ gray: (str: string) => str,
44
+ red: (str: string) => str,
45
+ bold: (str: string) => str,
46
+ },
47
+ }));
48
+
49
+ vi.mock('ora', () => ({
50
+ default: () => ({
51
+ start: vi.fn().mockReturnThis(),
52
+ succeed: vi.fn().mockReturnThis(),
53
+ fail: vi.fn().mockReturnThis(),
54
+ text: '',
55
+ }),
56
+ }));
57
+
58
+ describe('RunCommand', () => {
59
+ let command: RunCommand;
60
+ let testDir: string;
61
+ let preparedDataDir: string;
62
+ let outputDir: string;
63
+
64
+ beforeEach(async () => {
65
+ command = new RunCommand();
66
+ testDir = join(tmpdir(), `run-test-${Date.now()}`);
67
+ preparedDataDir = join(testDir, 'prepared-data');
68
+ outputDir = join(testDir, 'results');
69
+
70
+ // Override the directories
71
+ (command as any).preparedDataDir = preparedDataDir;
72
+ (command as any).outputDir = outputDir;
73
+
74
+ // Create prepared test data
75
+ await createPreparedData(preparedDataDir);
76
+ });
77
+
78
+ afterEach(async () => {
79
+ // Clean up test directory
80
+ if (existsSync(testDir)) {
81
+ await rm(testDir, { recursive: true });
82
+ }
83
+ });
84
+
85
+ async function createPreparedData(baseDir: string) {
86
+ // Create directory structure
87
+ const dataDir = join(baseDir, 'longmemeval_s', 'full-history');
88
+ await mkdir(dataDir, { recursive: true });
89
+
90
+ // Question 1
91
+ const q1Dir = join(dataDir, 'test-q1');
92
+ await mkdir(q1Dir, { recursive: true });
93
+
94
+ await writeFile(
95
+ join(q1Dir, 'meta.json'),
96
+ JSON.stringify({
97
+ questionId: 'test-q1',
98
+ questionType: 'single-session-user',
99
+ resourceId: 'resource_test-q1',
100
+ threadIds: ['session-1'],
101
+ memoryConfig: 'full-history',
102
+ question: 'What is my favorite color?',
103
+ answer: 'Blue',
104
+ }),
105
+ );
106
+
107
+ await writeFile(
108
+ join(q1Dir, 'db.json'),
109
+ JSON.stringify({
110
+ mastra_messages: [
111
+ [
112
+ 'msg-1',
113
+ {
114
+ id: 'msg-1',
115
+ threadId: 'session-1',
116
+ resourceId: 'resource_test-q1',
117
+ role: 'user',
118
+ content: 'My favorite color is blue',
119
+ createdAt: new Date().toISOString(),
120
+ type: 'text',
121
+ },
122
+ ],
123
+ [
124
+ 'msg-2',
125
+ {
126
+ id: 'msg-2',
127
+ threadId: 'session-1',
128
+ resourceId: 'resource_test-q1',
129
+ role: 'assistant',
130
+ content: 'I understand your favorite color is blue.',
131
+ createdAt: new Date().toISOString(),
132
+ type: 'text',
133
+ },
134
+ ],
135
+ ],
136
+ mastra_threads: [
137
+ [
138
+ 'session-1',
139
+ {
140
+ id: 'session-1',
141
+ resourceId: 'resource_test-q1',
142
+ title: 'Session 1',
143
+ metadata: {},
144
+ createdAt: new Date().toISOString(),
145
+ updatedAt: new Date().toISOString(),
146
+ },
147
+ ],
148
+ ],
149
+ mastra_resources: [],
150
+ mastra_workflow_snapshot: [],
151
+ mastra_evals: [],
152
+ mastra_traces: [],
153
+ }),
154
+ );
155
+
156
+ // Question 2
157
+ const q2Dir = join(dataDir, 'test-q2');
158
+ await mkdir(q2Dir, { recursive: true });
159
+
160
+ await writeFile(
161
+ join(q2Dir, 'meta.json'),
162
+ JSON.stringify({
163
+ questionId: 'test-q2',
164
+ questionType: 'multi-session',
165
+ resourceId: 'resource_test-q2',
166
+ threadIds: ['session-2', 'session-3'],
167
+ memoryConfig: 'full-history',
168
+ question: 'What did I say about my pet?',
169
+ answer: 'You have a cat named Fluffy',
170
+ }),
171
+ );
172
+
173
+ await writeFile(
174
+ join(q2Dir, 'db.json'),
175
+ JSON.stringify({
176
+ mastra_messages: [
177
+ [
178
+ 'msg-1',
179
+ {
180
+ id: 'msg-1',
181
+ threadId: 'session-2',
182
+ resourceId: 'resource_test-q2',
183
+ role: 'user',
184
+ content: 'I have a pet',
185
+ createdAt: new Date().toISOString(),
186
+ type: 'text',
187
+ },
188
+ ],
189
+ [
190
+ 'msg-2',
191
+ {
192
+ id: 'msg-2',
193
+ threadId: 'session-3',
194
+ resourceId: 'resource_test-q2',
195
+ role: 'user',
196
+ content: 'It is a cat named Fluffy',
197
+ createdAt: new Date().toISOString(),
198
+ type: 'text',
199
+ },
200
+ ],
201
+ ],
202
+ mastra_threads: [
203
+ [
204
+ 'session-2',
205
+ {
206
+ id: 'session-2',
207
+ resourceId: 'resource_test-q2',
208
+ title: 'Session 2',
209
+ metadata: {},
210
+ createdAt: new Date().toISOString(),
211
+ updatedAt: new Date().toISOString(),
212
+ },
213
+ ],
214
+ [
215
+ 'session-3',
216
+ {
217
+ id: 'session-3',
218
+ resourceId: 'resource_test-q2',
219
+ title: 'Session 3',
220
+ metadata: {},
221
+ createdAt: new Date().toISOString(),
222
+ updatedAt: new Date().toISOString(),
223
+ },
224
+ ],
225
+ ],
226
+ mastra_resources: [],
227
+ mastra_workflow_snapshot: [],
228
+ mastra_evals: [],
229
+ mastra_traces: [],
230
+ }),
231
+ );
232
+ }
233
+
234
+ describe('run', () => {
235
+ it('should evaluate questions from prepared data', async () => {
236
+ const metrics = await command.run({
237
+ dataset: 'longmemeval_s',
238
+ memoryConfig: 'full-history',
239
+ model: 'gpt-4o',
240
+ preparedDataDir,
241
+ outputDir,
242
+ });
243
+
244
+ expect(metrics.total_questions).toBe(2);
245
+ expect(metrics.correct_answers).toBe(2);
246
+ expect(metrics.overall_accuracy).toBe(1.0);
247
+
248
+ // Check output files
249
+ const runDirs = await Promise.resolve().then(() =>
250
+ existsSync(outputDir) ? require('fs/promises').readdir(outputDir) : [],
251
+ );
252
+ expect(runDirs.length).toBeGreaterThan(0);
253
+
254
+ const runDir = join(outputDir, runDirs[0]);
255
+ expect(existsSync(join(runDir, 'results.jsonl'))).toBe(true);
256
+ expect(existsSync(join(runDir, 'metrics.json'))).toBe(true);
257
+ });
258
+
259
+ it('should process subset of questions when specified', async () => {
260
+ const metrics = await command.run({
261
+ dataset: 'longmemeval_s',
262
+ memoryConfig: 'full-history',
263
+ model: 'gpt-4o',
264
+ preparedDataDir,
265
+ outputDir,
266
+ subset: 1,
267
+ });
268
+
269
+ expect(metrics.total_questions).toBe(1);
270
+ });
271
+
272
+ it('should handle semantic-recall memory config with vector store', async () => {
273
+ // Create semantic-recall prepared data
274
+ const semanticDir = join(preparedDataDir, 'longmemeval_s', 'semantic-recall');
275
+ await mkdir(semanticDir, { recursive: true });
276
+
277
+ const q1Dir = join(semanticDir, 'test-q1');
278
+ await mkdir(q1Dir, { recursive: true });
279
+
280
+ await writeFile(
281
+ join(q1Dir, 'meta.json'),
282
+ JSON.stringify({
283
+ questionId: 'test-q1',
284
+ questionType: 'single-session-user',
285
+ resourceId: 'resource_test-q1',
286
+ threadIds: ['session-1'],
287
+ memoryConfig: 'semantic-recall',
288
+ question: 'What is my favorite color?',
289
+ answer: 'Blue',
290
+ }),
291
+ );
292
+
293
+ await writeFile(
294
+ join(q1Dir, 'db.json'),
295
+ JSON.stringify({
296
+ mastra_messages: [
297
+ [
298
+ 'msg-1',
299
+ {
300
+ id: 'msg-1',
301
+ threadId: 'session-1',
302
+ resourceId: 'resource_test-q1',
303
+ role: 'user',
304
+ content: 'My favorite color is blue',
305
+ createdAt: new Date().toISOString(),
306
+ type: 'text',
307
+ },
308
+ ],
309
+ ],
310
+ mastra_threads: [
311
+ [
312
+ 'session-1',
313
+ {
314
+ id: 'session-1',
315
+ resourceId: 'resource_test-q1',
316
+ title: 'Session 1',
317
+ metadata: {},
318
+ createdAt: new Date().toISOString(),
319
+ updatedAt: new Date().toISOString(),
320
+ },
321
+ ],
322
+ ],
323
+ mastra_resources: [],
324
+ mastra_workflow_snapshot: [],
325
+ mastra_evals: [],
326
+ mastra_traces: [],
327
+ }),
328
+ );
329
+
330
+ // Add vector store data in the correct format
331
+ await writeFile(
332
+ join(q1Dir, 'vector.json'),
333
+ JSON.stringify({
334
+ messages: {
335
+ config: {
336
+ dimension: 1536,
337
+ metric: 'cosine',
338
+ },
339
+ documents: [
340
+ {
341
+ id: 'msg-1',
342
+ vector: new Array(1536).fill(0).map(() => Math.random()),
343
+ metadata: {
344
+ threadId: 'session-1',
345
+ resourceId: 'resource_test-q1',
346
+ content: 'My favorite color is blue',
347
+ },
348
+ },
349
+ ],
350
+ },
351
+ }),
352
+ );
353
+
354
+ const metrics = await command.run({
355
+ dataset: 'longmemeval_s',
356
+ memoryConfig: 'semantic-recall',
357
+ model: 'gpt-4o',
358
+ preparedDataDir,
359
+ outputDir,
360
+ subset: 1,
361
+ });
362
+
363
+ expect(metrics.total_questions).toBe(1);
364
+ });
365
+
366
+ it('should throw error if prepared data does not exist', async () => {
367
+ await expect(
368
+ command.run({
369
+ dataset: 'longmemeval_s',
370
+ memoryConfig: 'working-memory',
371
+ model: 'gpt-4o',
372
+ preparedDataDir,
373
+ outputDir,
374
+ }),
375
+ ).rejects.toThrow(/Prepared data not found/);
376
+ });
377
+ });
378
+
379
+ describe('getMemoryOptions', () => {
380
+ it('should return correct options for each memory config', () => {
381
+ const fullHistory = (command as any).getMemoryOptions('full-history');
382
+ expect(fullHistory.type).toBe('full-history');
383
+ expect(fullHistory.options.lastMessages).toBe(999999);
384
+
385
+ const semanticRecall = (command as any).getMemoryOptions('semantic-recall');
386
+ expect(semanticRecall.type).toBe('semantic-recall');
387
+ expect(semanticRecall.options.semanticRecall.scope).toBe('resource');
388
+
389
+ const workingMemory = (command as any).getMemoryOptions('working-memory');
390
+ expect(workingMemory.type).toBe('working-memory');
391
+ expect(workingMemory.options.workingMemory.enabled).toBe(true);
392
+
393
+ const combined = (command as any).getMemoryOptions('combined');
394
+ expect(combined.type).toBe('combined');
395
+ expect(combined.options.semanticRecall).toBeTruthy();
396
+ expect(combined.options.workingMemory.enabled).toBe(true);
397
+ });
398
+
399
+ it('should throw error for unknown memory config', () => {
400
+ expect(() => (command as any).getMemoryOptions('invalid')).toThrow('Unknown memory config: invalid');
401
+ });
402
+ });
403
+ });