@mastra/longmemeval 0.0.0-add-libsql-changeset-20250910154739

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/CHANGELOG.md +919 -0
  2. package/DATA_DOWNLOAD_GUIDE.md +117 -0
  3. package/LICENSE.md +15 -0
  4. package/README.md +173 -0
  5. package/USAGE.md +105 -0
  6. package/package.json +67 -0
  7. package/scripts/download.ts +180 -0
  8. package/scripts/find-failed.ts +176 -0
  9. package/scripts/generate-embeddings.ts +56 -0
  10. package/scripts/generate-wm-templates.ts +296 -0
  11. package/scripts/setup.ts +60 -0
  12. package/src/__fixtures__/embeddings.json +2319 -0
  13. package/src/__fixtures__/test-dataset.json +82 -0
  14. package/src/cli.ts +690 -0
  15. package/src/commands/__tests__/prepare.test.ts +230 -0
  16. package/src/commands/__tests__/run.test.ts +403 -0
  17. package/src/commands/prepare.ts +793 -0
  18. package/src/commands/run.ts +553 -0
  19. package/src/config.ts +83 -0
  20. package/src/data/loader.ts +163 -0
  21. package/src/data/types.ts +61 -0
  22. package/src/embeddings/cached-openai-embedding-model.ts +227 -0
  23. package/src/embeddings/cached-openai-provider.ts +40 -0
  24. package/src/embeddings/index.ts +2 -0
  25. package/src/evaluation/__tests__/longmemeval-metric.test.ts +169 -0
  26. package/src/evaluation/longmemeval-metric.ts +173 -0
  27. package/src/retry-model.ts +60 -0
  28. package/src/storage/__tests__/benchmark-store.test.ts +280 -0
  29. package/src/storage/__tests__/benchmark-vector.test.ts +214 -0
  30. package/src/storage/benchmark-store.ts +540 -0
  31. package/src/storage/benchmark-vector.ts +234 -0
  32. package/src/storage/index.ts +2 -0
  33. package/src/test-utils/mock-embeddings.ts +54 -0
  34. package/src/test-utils/mock-model.ts +49 -0
  35. package/tests/data-loader.test.ts +96 -0
  36. package/tsconfig.json +18 -0
  37. package/vitest.config.ts +9 -0
@@ -0,0 +1,234 @@
1
+ import { MastraVector } from '@mastra/core/vector';
2
+ import type {
3
+ QueryVectorParams,
4
+ QueryResult,
5
+ UpsertVectorParams,
6
+ CreateIndexParams,
7
+ IndexStats,
8
+ UpdateVectorParams,
9
+ DeleteVectorParams,
10
+ DescribeIndexParams,
11
+ DeleteIndexParams,
12
+ } from '@mastra/core/vector';
13
+ import { VectorDB, type Document } from 'imvectordb';
14
+ import { writeFile, readFile } from 'fs/promises';
15
+ import { existsSync } from 'fs';
16
+
17
+ type DBMode = 'read' | 'read-write';
18
+
19
+ export class BenchmarkVectorStore extends MastraVector {
20
+ private indexes: Map<string, VectorDB> = new Map();
21
+ private indexConfigs: Map<string, { dimension: number; metric?: 'cosine' | 'euclidean' | 'dotproduct' }> = new Map();
22
+ private documentsStore: Map<string, Map<string, Document>> = new Map();
23
+ private mode: DBMode;
24
+
25
+ constructor(mode: DBMode = 'read-write') {
26
+ super();
27
+ this.mode = mode;
28
+ }
29
+
30
+ async createIndex({ indexName, dimension, metric = 'cosine' }: CreateIndexParams): Promise<void> {
31
+ if (this.mode === 'read') return;
32
+ if (this.indexes.has(indexName)) {
33
+ await this.validateExistingIndex(indexName, dimension, metric);
34
+ return;
35
+ }
36
+
37
+ this.indexes.set(indexName, new VectorDB());
38
+ this.indexConfigs.set(indexName, { dimension, metric });
39
+ this.documentsStore.set(indexName, new Map());
40
+ }
41
+
42
+ async query({ indexName, queryVector, topK = 10 }: QueryVectorParams): Promise<QueryResult[]> {
43
+ const db = this.indexes.get(indexName);
44
+ if (!db) {
45
+ throw new Error(`Index ${indexName} not found`);
46
+ }
47
+
48
+ const results = await db.query(queryVector, topK);
49
+ return results.map((r: any) => ({
50
+ id: r.document.id,
51
+ score: r.similarity,
52
+ metadata: r.document.metadata,
53
+ }));
54
+ }
55
+
56
+ async upsert({ indexName, vectors, metadata, ids }: UpsertVectorParams): Promise<string[]> {
57
+ if (this.mode === 'read') return [];
58
+ const db = this.indexes.get(indexName);
59
+ const docsStore = this.documentsStore.get(indexName);
60
+
61
+ if (!db || !docsStore) {
62
+ throw new Error(`Index ${indexName} not found`);
63
+ }
64
+
65
+ const vectorIds = ids || vectors.map(() => crypto.randomUUID());
66
+
67
+ for (let i = 0; i < vectors.length; i++) {
68
+ const doc: Document = {
69
+ id: vectorIds[i],
70
+ embedding: vectors[i],
71
+ metadata: metadata?.[i] || {},
72
+ };
73
+
74
+ // Store in VectorDB
75
+ db.add(doc);
76
+ // Also store in our documents map for persistence
77
+ docsStore.set(doc.id, doc);
78
+ }
79
+
80
+ return vectorIds;
81
+ }
82
+
83
+ async listIndexes(): Promise<string[]> {
84
+ return Array.from(this.indexes.keys());
85
+ }
86
+
87
+ async describeIndex({ indexName }: DescribeIndexParams): Promise<IndexStats> {
88
+ const db = this.indexes.get(indexName);
89
+ const config = this.indexConfigs.get(indexName);
90
+
91
+ if (!db || !config) {
92
+ throw new Error(`Index ${indexName} not found`);
93
+ }
94
+
95
+ // Get count from the database
96
+ const count = db.size();
97
+
98
+ return {
99
+ dimension: config.dimension,
100
+ count: count,
101
+ metric: config.metric,
102
+ };
103
+ }
104
+
105
+ async deleteIndex({ indexName }: DeleteIndexParams): Promise<void> {
106
+ if (this.mode === 'read') return;
107
+ const db = this.indexes.get(indexName);
108
+ if (db) {
109
+ await db.terminate();
110
+ }
111
+ this.indexes.delete(indexName);
112
+ this.indexConfigs.delete(indexName);
113
+ this.documentsStore.delete(indexName);
114
+ }
115
+
116
+ async updateVector({ indexName, id, update }: UpdateVectorParams): Promise<void> {
117
+ if (this.mode === 'read') return;
118
+ const db = this.indexes.get(indexName);
119
+ const docsStore = this.documentsStore.get(indexName);
120
+
121
+ if (!db || !docsStore) {
122
+ throw new Error(`Index ${indexName} not found`);
123
+ }
124
+
125
+ // Get the existing document
126
+ const doc = db.get(id);
127
+ if (!doc) {
128
+ throw new Error(`Vector with id ${id} not found in index ${indexName}`);
129
+ }
130
+
131
+ // Remove old version
132
+ db.del(doc);
133
+ docsStore.delete(id);
134
+
135
+ // Create updated document
136
+ const updatedDoc: Document = {
137
+ id,
138
+ embedding: update.vector || doc.embedding,
139
+ metadata: update.metadata || doc.metadata,
140
+ };
141
+
142
+ // Add updated version
143
+ db.add(updatedDoc);
144
+ docsStore.set(id, updatedDoc);
145
+ }
146
+
147
+ async deleteVector({ indexName, id }: DeleteVectorParams): Promise<void> {
148
+ if (this.mode === 'read') return;
149
+ const db = this.indexes.get(indexName);
150
+ const docsStore = this.documentsStore.get(indexName);
151
+
152
+ if (!db || !docsStore) {
153
+ throw new Error(`Index ${indexName} not found`);
154
+ }
155
+
156
+ const doc = db.get(id);
157
+ if (doc) {
158
+ db.del(doc);
159
+ docsStore.delete(id);
160
+ }
161
+ }
162
+
163
+ /**
164
+ * Persist the current vector store state to a JSON file
165
+ */
166
+ async persist(filePath: string): Promise<void> {
167
+ if (this.mode === 'read') return;
168
+ const data: Record<string, any> = {};
169
+
170
+ for (const [indexName, docsStore] of this.documentsStore) {
171
+ const config = this.indexConfigs.get(indexName);
172
+ const documents = Array.from(docsStore.values());
173
+
174
+ data[indexName] = {
175
+ config,
176
+ documents,
177
+ };
178
+ }
179
+
180
+ await writeFile(filePath, JSON.stringify(data, null, 2));
181
+ }
182
+
183
+ /**
184
+ * Hydrate vector store state from a JSON file
185
+ */
186
+ async hydrate(filePath: string): Promise<void> {
187
+ if (!existsSync(filePath)) {
188
+ throw new Error(`Vector store file not found: ${filePath}`);
189
+ }
190
+
191
+ const content = await readFile(filePath, 'utf-8');
192
+ const data = JSON.parse(content);
193
+
194
+ // Clear existing data
195
+ for (const db of this.indexes.values()) {
196
+ await db.terminate();
197
+ }
198
+ this.indexes.clear();
199
+ this.indexConfigs.clear();
200
+ this.documentsStore.clear();
201
+
202
+ // Restore data
203
+ for (const [indexName, indexData] of Object.entries(data)) {
204
+ const { config, documents } = indexData as any;
205
+
206
+ // Create new index
207
+ const db = new VectorDB();
208
+ this.indexes.set(indexName, db);
209
+ this.indexConfigs.set(indexName, config);
210
+
211
+ const docsStore = new Map<string, Document>();
212
+ this.documentsStore.set(indexName, docsStore);
213
+
214
+ // Restore documents
215
+ for (const doc of documents) {
216
+ db.add(doc);
217
+ docsStore.set(doc.id, doc);
218
+ }
219
+ }
220
+ }
221
+
222
+ /**
223
+ * Clear all data and start fresh
224
+ */
225
+ async clear(): Promise<void> {
226
+ if (this.mode === 'read') return;
227
+ for (const db of this.indexes.values()) {
228
+ await db.terminate();
229
+ }
230
+ this.indexes.clear();
231
+ this.indexConfigs.clear();
232
+ this.documentsStore.clear();
233
+ }
234
+ }
@@ -0,0 +1,2 @@
1
+ export { BenchmarkStore } from './benchmark-store';
2
+ export { BenchmarkVectorStore } from './benchmark-vector';
@@ -0,0 +1,54 @@
1
+ import { readFileSync } from 'fs';
2
+ import { join } from 'path';
3
+
4
+ // Load fixture embeddings - use relative path from project root
5
+ const embeddingsPath = join(process.cwd(), 'src', '__fixtures__', 'embeddings.json');
6
+ let fixtureEmbeddings: Record<string, number[]> = {};
7
+
8
+ try {
9
+ fixtureEmbeddings = JSON.parse(readFileSync(embeddingsPath, 'utf-8'));
10
+ } catch (error) {
11
+ console.warn('Warning: Could not load fixture embeddings, using random embeddings instead');
12
+ }
13
+
14
+ /**
15
+ * Mock embedding function that returns fixture embeddings or generates random ones
16
+ */
17
+ export function createMockEmbedding() {
18
+ return {
19
+ doEmbed: async ({ values }: { values: string[] }) => {
20
+ const embeddings = values.map(text => {
21
+ // Return fixture embedding if available
22
+ if (fixtureEmbeddings[text]) {
23
+ return fixtureEmbeddings[text];
24
+ }
25
+
26
+ // Otherwise generate a deterministic "random" embedding based on the text
27
+ // This ensures the same text always gets the same embedding
28
+ const seed = text.split('').reduce((acc, char) => acc + char.charCodeAt(0), 0);
29
+ const embedding = new Array(1536).fill(0).map((_, i) => {
30
+ // Use a simple deterministic function to generate values
31
+ return Math.sin(seed + i) * Math.cos(seed * i * 0.01);
32
+ });
33
+
34
+ return embedding;
35
+ });
36
+
37
+ return { embeddings };
38
+ },
39
+ };
40
+ }
41
+
42
+ /**
43
+ * Get a specific fixture embedding by text
44
+ */
45
+ export function getFixtureEmbedding(text: string): number[] | undefined {
46
+ return fixtureEmbeddings[text];
47
+ }
48
+
49
+ /**
50
+ * Get all available fixture texts
51
+ */
52
+ export function getFixtureTexts(): string[] {
53
+ return Object.keys(fixtureEmbeddings);
54
+ }
@@ -0,0 +1,49 @@
1
+ import type { LanguageModelV1, LanguageModelV1CallWarning, LanguageModelV1StreamPart } from '@ai-sdk/provider';
2
+ import { createIdGenerator } from '@ai-sdk/provider-utils';
3
+
4
+ const generateId = createIdGenerator();
5
+
6
+ export class MockLanguageModelV1 implements LanguageModelV1 {
7
+ readonly specificationVersion = 'v1';
8
+ readonly defaultObjectGenerationMode = 'json' as const;
9
+ readonly provider = 'mock-provider';
10
+ readonly modelId = 'mock-model';
11
+
12
+ private readonly generateFunc: (options: any) => Promise<{
13
+ rawCall: { rawPrompt: any; rawSettings: any };
14
+ finishReason: 'stop' | 'length' | 'content-filter' | 'tool-calls' | 'error' | 'other' | 'unknown';
15
+ usage: { promptTokens: number; completionTokens: number };
16
+ text?: string;
17
+ toolCalls?: any[];
18
+ warnings?: LanguageModelV1CallWarning[];
19
+ }>;
20
+
21
+ constructor({
22
+ doGenerate,
23
+ }: {
24
+ doGenerate: (options: any) => Promise<{
25
+ rawCall: { rawPrompt: any; rawSettings: any };
26
+ finishReason: 'stop' | 'length' | 'content-filter' | 'tool-calls' | 'error' | 'other' | 'unknown';
27
+ usage: { promptTokens: number; completionTokens: number };
28
+ text?: string;
29
+ toolCalls?: any[];
30
+ warnings?: LanguageModelV1CallWarning[];
31
+ }>;
32
+ }) {
33
+ this.generateFunc = doGenerate;
34
+ }
35
+
36
+ async doGenerate(
37
+ options: Parameters<LanguageModelV1['doGenerate']>[0],
38
+ ): Promise<Awaited<ReturnType<LanguageModelV1['doGenerate']>>> {
39
+ return this.generateFunc(options);
40
+ }
41
+
42
+ async doStream(options: Parameters<LanguageModelV1['doStream']>[0]): Promise<{
43
+ stream: ReadableStream<LanguageModelV1StreamPart>;
44
+ rawCall: { rawPrompt: any; rawSettings: any };
45
+ warnings?: LanguageModelV1CallWarning[];
46
+ }> {
47
+ throw new Error('Streaming not supported in mock model');
48
+ }
49
+ }
@@ -0,0 +1,96 @@
1
+ import { describe, it, expect, beforeAll } from 'vitest';
2
+ import { DatasetLoader } from '../src/data/loader';
3
+ import type { LongMemEvalQuestion } from '../src/data/types';
4
+ import { join } from 'path';
5
+
6
+ describe('DatasetLoader', () => {
7
+ let loader: DatasetLoader;
8
+ let testLoader: DatasetLoader;
9
+
10
+ beforeAll(() => {
11
+ loader = new DatasetLoader();
12
+ // Create a test loader that points to our fixtures directory
13
+ testLoader = new DatasetLoader(join(process.cwd(), 'src', '__fixtures__'));
14
+ });
15
+
16
+ describe('validateDataset', () => {
17
+ it('should validate correct dataset structure', () => {
18
+ const validData: LongMemEvalQuestion[] = [
19
+ {
20
+ question_id: 'test_001',
21
+ question_type: 'single-session-user',
22
+ question: 'What is my favorite color?',
23
+ answer: 'Blue',
24
+ question_date: '2024-01-01',
25
+ haystack_session_ids: ['session_1'],
26
+ haystack_dates: ['2024-01-01'],
27
+ haystack_sessions: [
28
+ [
29
+ { role: 'user', content: 'My favorite color is blue.' },
30
+ { role: 'assistant', content: 'I understand your favorite color is blue.' },
31
+ ],
32
+ ],
33
+ answer_session_ids: ['session_1'],
34
+ },
35
+ ];
36
+
37
+ // Should not throw
38
+ expect(() => {
39
+ // Access private method through any type
40
+ (loader as any).validateDataset(validData);
41
+ }).not.toThrow();
42
+ });
43
+
44
+ it('should throw on invalid dataset structure', () => {
45
+ const invalidData = [
46
+ {
47
+ question_id: 'test_001',
48
+ // Missing required fields
49
+ },
50
+ ];
51
+
52
+ expect(() => {
53
+ (loader as any).validateDataset(invalidData);
54
+ }).toThrow('Missing required field');
55
+ });
56
+ });
57
+
58
+ describe('loadDataset', () => {
59
+ it('should load and validate test dataset fixture', async () => {
60
+ // Use the test-dataset from fixtures
61
+ const questions = await testLoader.loadDataset('test-dataset' as any);
62
+
63
+ expect(questions).toHaveLength(3);
64
+ expect(questions[0].question_id).toBe('test-001');
65
+ expect(questions[0].question_type).toBe('single-session-user');
66
+ expect(questions[1].question_type).toBe('multi-session');
67
+
68
+ // Verify the dataset structure
69
+ expect(questions[0].haystack_sessions).toHaveLength(1);
70
+ expect(questions[1].haystack_sessions).toHaveLength(2);
71
+
72
+ // Check answers
73
+ expect(questions[0].answer).toBe('Blue');
74
+ expect(questions[1].answer).toBe('A golden retriever named Max');
75
+ });
76
+
77
+ it('should throw helpful error when dataset file not found', async () => {
78
+ // Use the regular loader which points to the data directory
79
+ await expect(loader.loadDataset('non-existent-dataset' as any)).rejects.toThrow('Dataset file not found');
80
+ });
81
+ });
82
+
83
+ describe('getDatasetStats', () => {
84
+ it('should calculate stats for test dataset', async () => {
85
+ const stats = await testLoader.getDatasetStats('test-dataset' as any);
86
+
87
+ expect(stats.totalQuestions).toBe(3);
88
+ expect(stats.questionsByType['single-session-user']).toBe(2);
89
+ expect(stats.questionsByType['multi-session']).toBe(1);
90
+ expect(stats.avgSessionsPerQuestion).toBeCloseTo(1.33, 2);
91
+ expect(stats.avgTurnsPerSession).toBe(2);
92
+ expect(stats.totalTokensEstimate).toBeGreaterThan(0);
93
+ expect(stats.abstentionQuestions).toBe(0); // None of our test questions are abstention questions
94
+ });
95
+ });
96
+ });
package/tsconfig.json ADDED
@@ -0,0 +1,18 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2022",
4
+ "module": "ESNext",
5
+ "lib": ["es2022"],
6
+ "rootDir": "./src",
7
+ "outDir": "./dist",
8
+ "esModuleInterop": true,
9
+ "forceConsistentCasingInFileNames": true,
10
+ "strict": true,
11
+ "skipLibCheck": true,
12
+ "resolveJsonModule": true,
13
+ "moduleResolution": "bundler",
14
+ "allowSyntheticDefaultImports": true
15
+ },
16
+ "include": ["src/**/*"],
17
+ "exclude": ["node_modules", "dist", "tests"]
18
+ }
@@ -0,0 +1,9 @@
1
+ import { defineConfig } from 'vitest/config';
2
+
3
+ export default defineConfig({
4
+ test: {
5
+ globals: true,
6
+ environment: 'node',
7
+ testTimeout: 30000,
8
+ },
9
+ });