@mastra/longmemeval 0.0.0-add-libsql-changeset-20250910154739
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +919 -0
- package/DATA_DOWNLOAD_GUIDE.md +117 -0
- package/LICENSE.md +15 -0
- package/README.md +173 -0
- package/USAGE.md +105 -0
- package/package.json +67 -0
- package/scripts/download.ts +180 -0
- package/scripts/find-failed.ts +176 -0
- package/scripts/generate-embeddings.ts +56 -0
- package/scripts/generate-wm-templates.ts +296 -0
- package/scripts/setup.ts +60 -0
- package/src/__fixtures__/embeddings.json +2319 -0
- package/src/__fixtures__/test-dataset.json +82 -0
- package/src/cli.ts +690 -0
- package/src/commands/__tests__/prepare.test.ts +230 -0
- package/src/commands/__tests__/run.test.ts +403 -0
- package/src/commands/prepare.ts +793 -0
- package/src/commands/run.ts +553 -0
- package/src/config.ts +83 -0
- package/src/data/loader.ts +163 -0
- package/src/data/types.ts +61 -0
- package/src/embeddings/cached-openai-embedding-model.ts +227 -0
- package/src/embeddings/cached-openai-provider.ts +40 -0
- package/src/embeddings/index.ts +2 -0
- package/src/evaluation/__tests__/longmemeval-metric.test.ts +169 -0
- package/src/evaluation/longmemeval-metric.ts +173 -0
- package/src/retry-model.ts +60 -0
- package/src/storage/__tests__/benchmark-store.test.ts +280 -0
- package/src/storage/__tests__/benchmark-vector.test.ts +214 -0
- package/src/storage/benchmark-store.ts +540 -0
- package/src/storage/benchmark-vector.ts +234 -0
- package/src/storage/index.ts +2 -0
- package/src/test-utils/mock-embeddings.ts +54 -0
- package/src/test-utils/mock-model.ts +49 -0
- package/tests/data-loader.test.ts +96 -0
- package/tsconfig.json +18 -0
- package/vitest.config.ts +9 -0
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"question_id": "test-001",
|
|
4
|
+
"question": "What is my favorite color?",
|
|
5
|
+
"answer": "Blue",
|
|
6
|
+
"question_type": "single-session-user",
|
|
7
|
+
"question_date": "2024-01-15",
|
|
8
|
+
"haystack_session_ids": ["session-001"],
|
|
9
|
+
"haystack_dates": ["2024-01-10"],
|
|
10
|
+
"haystack_sessions": [
|
|
11
|
+
[
|
|
12
|
+
{
|
|
13
|
+
"role": "user",
|
|
14
|
+
"content": "Hi! My name is Alice and my favorite color is blue.",
|
|
15
|
+
"has_answer": true
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
"role": "assistant",
|
|
19
|
+
"content": "Hello Alice! It's nice to meet you. I'll remember that your favorite color is blue."
|
|
20
|
+
}
|
|
21
|
+
]
|
|
22
|
+
],
|
|
23
|
+
"answer_session_ids": ["session-001"]
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"question_id": "test-002",
|
|
27
|
+
"question": "What pet do I have?",
|
|
28
|
+
"answer": "A golden retriever named Max",
|
|
29
|
+
"question_type": "multi-session",
|
|
30
|
+
"question_date": "2024-01-20",
|
|
31
|
+
"haystack_session_ids": ["session-002", "session-003"],
|
|
32
|
+
"haystack_dates": ["2024-01-12", "2024-01-18"],
|
|
33
|
+
"haystack_sessions": [
|
|
34
|
+
[
|
|
35
|
+
{
|
|
36
|
+
"role": "user",
|
|
37
|
+
"content": "I'm thinking about getting a pet.",
|
|
38
|
+
"has_answer": false
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"role": "assistant",
|
|
42
|
+
"content": "That's exciting! What kind of pet are you considering?"
|
|
43
|
+
}
|
|
44
|
+
],
|
|
45
|
+
[
|
|
46
|
+
{
|
|
47
|
+
"role": "user",
|
|
48
|
+
"content": "I got a golden retriever! His name is Max.",
|
|
49
|
+
"has_answer": true
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
"role": "assistant",
|
|
53
|
+
"content": "Congratulations on your new golden retriever! Max is a great name."
|
|
54
|
+
}
|
|
55
|
+
]
|
|
56
|
+
],
|
|
57
|
+
"answer_session_ids": ["session-003"]
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
"question_id": "test-003",
|
|
61
|
+
"question": "What is my favorite food?",
|
|
62
|
+
"answer": "The user has not mentioned their favorite food in the conversation history.",
|
|
63
|
+
"question_type": "single-session-user",
|
|
64
|
+
"question_date": "2024-01-25",
|
|
65
|
+
"haystack_session_ids": ["session-004"],
|
|
66
|
+
"haystack_dates": ["2024-01-22"],
|
|
67
|
+
"haystack_sessions": [
|
|
68
|
+
[
|
|
69
|
+
{
|
|
70
|
+
"role": "user",
|
|
71
|
+
"content": "I love cooking but I haven't decided on a favorite dish yet.",
|
|
72
|
+
"has_answer": false
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
"role": "assistant",
|
|
76
|
+
"content": "Cooking is a wonderful hobby! What types of cuisine do you enjoy preparing?"
|
|
77
|
+
}
|
|
78
|
+
]
|
|
79
|
+
],
|
|
80
|
+
"answer_session_ids": []
|
|
81
|
+
}
|
|
82
|
+
]
|