mcvay-mind 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/SKILL.md +9 -0
- package/bench/README.md +49 -0
- package/bench/artifacts/baseline.json +106 -0
- package/bench/artifacts/best-config-20260220T011934Z.json +13 -0
- package/bench/artifacts/best-config-20260220T014624Z.json +13 -0
- package/bench/artifacts/best-config-latest.json +13 -0
- package/bench/artifacts/gate-latest.json +22 -0
- package/bench/artifacts/latest.json +150 -0
- package/bench/artifacts/runs/20260220T011934Z/metrics.csv +7 -0
- package/bench/artifacts/runs/20260220T011934Z/results.json +418 -0
- package/bench/artifacts/runs/20260220T011934Z/summary.md +83 -0
- package/bench/artifacts/runs/20260220T014624Z/metrics.csv +7 -0
- package/bench/artifacts/runs/20260220T014624Z/results.json +490 -0
- package/bench/artifacts/runs/20260220T014624Z/summary.md +83 -0
- package/bench/dataset.js +288 -0
- package/bench/index.js +567 -0
- package/bench/metrics.js +163 -0
- package/bench/runners.js +146 -0
- package/index.js +295 -70
- package/lib/active-recall.js +24 -8
- package/lib/domain-indexer.js +131 -0
- package/lib/embeddings.js +233 -0
- package/lib/entity-linker.js +19 -4
- package/lib/expand-cache.js +112 -0
- package/lib/graph-skill-traversal.js +84 -0
- package/lib/graph-temporal.js +297 -0
- package/lib/metrics.js +163 -0
- package/lib/moc-generator.js +111 -0
- package/lib/response-guidance/index.js +577 -0
- package/lib/search-projections.js +62 -0
- package/lib/search.js +472 -180
- package/lib/skills-manifest.js +146 -0
- package/lib/sqlite-index.js +378 -0
- package/lib/store.js +406 -8
- package/lib/unified-graph.js +428 -0
- package/lib/vector-index.js +483 -0
- package/package.json +19 -6
- package/schema/base.yaml +49 -0
- package/schema/moc.yaml +50 -0
package/SKILL.md
CHANGED
|
@@ -66,9 +66,11 @@ node ~/.openclaw/skills/mcvay-mind/index.js query --type preference --days 7
|
|
|
66
66
|
|
|
67
67
|
# Search (unified)
|
|
68
68
|
node ~/.openclaw/skills/mcvay-mind/index.js search "query terms"
|
|
69
|
+
node ~/.openclaw/skills/mcvay-mind/index.js search "latest timeout preference" --mode hybrid-v2 --graph-boost 0.25
|
|
69
70
|
|
|
70
71
|
# Active recall (context surfacing)
|
|
71
72
|
node ~/.openclaw/skills/mcvay-mind/index.js recall "topic"
|
|
73
|
+
node ~/.openclaw/skills/mcvay-mind/index.js recall "codex coding" --graph-boost 0.3 --include-stale
|
|
72
74
|
|
|
73
75
|
# Entity linking
|
|
74
76
|
node ~/.openclaw/skills/mcvay-mind/index.js link
|
|
@@ -142,6 +144,12 @@ created: 2026-02-16T12:00:00.000Z
|
|
|
142
144
|
updated: 2026-02-16T12:00:00.000Z
|
|
143
145
|
tags: [tag1, tag2]
|
|
144
146
|
links: [decision/choice-1, preference/user-pref]
|
|
147
|
+
entities: [codex, timeout]
|
|
148
|
+
valid_from: 2026-02-16T12:00:00.000Z
|
|
149
|
+
valid_to: 2026-03-01T00:00:00.000Z
|
|
150
|
+
supersedes: [preference/old-timeout]
|
|
151
|
+
conflicts_with: [lesson/contrary-observation]
|
|
152
|
+
salience: 0.75
|
|
145
153
|
confidence: 90
|
|
146
154
|
source: agent
|
|
147
155
|
---
|
|
@@ -188,6 +196,7 @@ Keywords that should trigger recall:
|
|
|
188
196
|
├── lib/
|
|
189
197
|
│ ├── store.js # Memory CRUD operations
|
|
190
198
|
│ ├── search.js # Full-text search
|
|
199
|
+
│ ├── graph-temporal.js # Typed graph traversal + temporal ranking
|
|
191
200
|
│ └── entity-linker.js # Wiki-link extraction & knowledge graph
|
|
192
201
|
└── schema/ # YAML schemas
|
|
193
202
|
├── base.yaml
|
package/bench/README.md
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Benchmark Harness
|
|
2
|
+
|
|
3
|
+
Offline benchmark + auto-tuning for McVay Mind retrieval.
|
|
4
|
+
|
|
5
|
+
## Commands
|
|
6
|
+
|
|
7
|
+
From workspace root:
|
|
8
|
+
|
|
9
|
+
- `npm run bench:all` - tune + benchmark + regression gate
|
|
10
|
+
- `npm run bench:run` - benchmark with latest/default config
|
|
11
|
+
- `npm run bench:tune` - run hyperparameter tuning only
|
|
12
|
+
- `npm run bench:gate` - evaluate latest run vs baseline
|
|
13
|
+
|
|
14
|
+
From `skills/mcvay-mind` (equivalent):
|
|
15
|
+
|
|
16
|
+
- `npm run bench:all` - tune + benchmark + regression gate
|
|
17
|
+
- `npm run bench:run` - benchmark with latest/default config
|
|
18
|
+
- `npm run bench:tune` - run hyperparameter tuning only
|
|
19
|
+
- `npm run bench:gate` - evaluate latest run vs baseline
|
|
20
|
+
|
|
21
|
+
## Metrics
|
|
22
|
+
|
|
23
|
+
- Recall@k
|
|
24
|
+
- MRR
|
|
25
|
+
- nDCG@k
|
|
26
|
+
- Latency (mean, p50, p95)
|
|
27
|
+
- Bootstrap confidence intervals
|
|
28
|
+
- Paired bootstrap p-value for recall lift
|
|
29
|
+
|
|
30
|
+
## Offline
|
|
31
|
+
|
|
32
|
+
For strict offline runs, set:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
MCVAY_EMBED_ENABLED=false npm run bench:all
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Artifacts
|
|
39
|
+
|
|
40
|
+
Written to `bench/artifacts/`:
|
|
41
|
+
|
|
42
|
+
- `runs/<timestamp>/summary.md`
|
|
43
|
+
- `runs/<timestamp>/metrics.csv`
|
|
44
|
+
- `runs/<timestamp>/results.json`
|
|
45
|
+
- `best-config-<timestamp>.json`
|
|
46
|
+
- `best-config-latest.json`
|
|
47
|
+
- `baseline.json`
|
|
48
|
+
- `latest.json`
|
|
49
|
+
- `gate-latest.json`
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
{
|
|
2
|
+
"created": "2026-02-20T01:21:09.340Z",
|
|
3
|
+
"runId": "20260220T011934Z",
|
|
4
|
+
"hybridSummary": {
|
|
5
|
+
"sampleSize": 4,
|
|
6
|
+
"metrics": {
|
|
7
|
+
"recallAtK": {
|
|
8
|
+
"mean": 0.25,
|
|
9
|
+
"lower": 0,
|
|
10
|
+
"upper": 0.75
|
|
11
|
+
},
|
|
12
|
+
"mrr": {
|
|
13
|
+
"mean": 0.125,
|
|
14
|
+
"lower": 0,
|
|
15
|
+
"upper": 0.375
|
|
16
|
+
},
|
|
17
|
+
"ndcgAtK": {
|
|
18
|
+
"mean": 0.15773243839286438,
|
|
19
|
+
"lower": 0,
|
|
20
|
+
"upper": 0.47319731517859315
|
|
21
|
+
}
|
|
22
|
+
},
|
|
23
|
+
"latency": {
|
|
24
|
+
"meanMs": 3445.17079825,
|
|
25
|
+
"p50Ms": 3344.280292,
|
|
26
|
+
"p95Ms": 3533.913265
|
|
27
|
+
},
|
|
28
|
+
"perTask": [
|
|
29
|
+
{
|
|
30
|
+
"rr": 0,
|
|
31
|
+
"recall": 0,
|
|
32
|
+
"ndcg": 0,
|
|
33
|
+
"latencyMs": 3344.280292
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"rr": 0,
|
|
37
|
+
"recall": 0,
|
|
38
|
+
"ndcg": 0,
|
|
39
|
+
"latencyMs": 4020.542328
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
"rr": 0.5,
|
|
43
|
+
"recall": 1,
|
|
44
|
+
"ndcg": 0.6309297535714575,
|
|
45
|
+
"latencyMs": 3533.913265
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
"rr": 0,
|
|
49
|
+
"recall": 0,
|
|
50
|
+
"ndcg": 0,
|
|
51
|
+
"latencyMs": 2881.947308
|
|
52
|
+
}
|
|
53
|
+
]
|
|
54
|
+
},
|
|
55
|
+
"candidateSummary": {
|
|
56
|
+
"sampleSize": 4,
|
|
57
|
+
"metrics": {
|
|
58
|
+
"recallAtK": {
|
|
59
|
+
"mean": 0,
|
|
60
|
+
"lower": 0,
|
|
61
|
+
"upper": 0
|
|
62
|
+
},
|
|
63
|
+
"mrr": {
|
|
64
|
+
"mean": 0,
|
|
65
|
+
"lower": 0,
|
|
66
|
+
"upper": 0
|
|
67
|
+
},
|
|
68
|
+
"ndcgAtK": {
|
|
69
|
+
"mean": 0,
|
|
70
|
+
"lower": 0,
|
|
71
|
+
"upper": 0
|
|
72
|
+
}
|
|
73
|
+
},
|
|
74
|
+
"latency": {
|
|
75
|
+
"meanMs": 0.76737475,
|
|
76
|
+
"p50Ms": 0.746852,
|
|
77
|
+
"p95Ms": 0.758073
|
|
78
|
+
},
|
|
79
|
+
"perTask": [
|
|
80
|
+
{
|
|
81
|
+
"rr": 0,
|
|
82
|
+
"recall": 0,
|
|
83
|
+
"ndcg": 0,
|
|
84
|
+
"latencyMs": 0.746852
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
"rr": 0,
|
|
88
|
+
"recall": 0,
|
|
89
|
+
"ndcg": 0,
|
|
90
|
+
"latencyMs": 0.858205
|
|
91
|
+
},
|
|
92
|
+
{
|
|
93
|
+
"rr": 0,
|
|
94
|
+
"recall": 0,
|
|
95
|
+
"ndcg": 0,
|
|
96
|
+
"latencyMs": 0.758073
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
"rr": 0,
|
|
100
|
+
"recall": 0,
|
|
101
|
+
"ndcg": 0,
|
|
102
|
+
"latencyMs": 0.706369
|
|
103
|
+
}
|
|
104
|
+
]
|
|
105
|
+
}
|
|
106
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"timestamp": "2026-02-20T02:56:46.872Z",
|
|
3
|
+
"baselineRunId": "20260220T011934Z",
|
|
4
|
+
"latestRunId": "20260220T014624Z",
|
|
5
|
+
"gate": {
|
|
6
|
+
"pass": false,
|
|
7
|
+
"recallDrop": -833333333.3333334,
|
|
8
|
+
"latencyRegression": 3761.516071671198,
|
|
9
|
+
"failRecall": false,
|
|
10
|
+
"failLatency": true
|
|
11
|
+
},
|
|
12
|
+
"acceptance": {
|
|
13
|
+
"pass": false,
|
|
14
|
+
"latencyRegression": -0.07326701690082414,
|
|
15
|
+
"pValueThreshold": 0.05,
|
|
16
|
+
"maxLatencyRegression": 0.2,
|
|
17
|
+
"failLift": true,
|
|
18
|
+
"failPValue": true,
|
|
19
|
+
"failLatency": false,
|
|
20
|
+
"failReproducibility": true
|
|
21
|
+
}
|
|
22
|
+
}
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
{
|
|
2
|
+
"created": "2026-02-20T02:56:46.869Z",
|
|
3
|
+
"runId": "20260220T014624Z",
|
|
4
|
+
"baseline": {
|
|
5
|
+
"sampleSize": 6,
|
|
6
|
+
"metrics": {
|
|
7
|
+
"recallAtK": {
|
|
8
|
+
"mean": 0.8333333333333334,
|
|
9
|
+
"lower": 0.5,
|
|
10
|
+
"upper": 1
|
|
11
|
+
},
|
|
12
|
+
"mrr": {
|
|
13
|
+
"mean": 0.6111111111111112,
|
|
14
|
+
"lower": 0.27777777777777773,
|
|
15
|
+
"upper": 0.888888888888889
|
|
16
|
+
},
|
|
17
|
+
"ndcgAtK": {
|
|
18
|
+
"mean": 0.6666666666666666,
|
|
19
|
+
"lower": 0.3333333333333333,
|
|
20
|
+
"upper": 0.9166666666666666
|
|
21
|
+
}
|
|
22
|
+
},
|
|
23
|
+
"latency": {
|
|
24
|
+
"meanMs": 2599.9500551666665,
|
|
25
|
+
"p50Ms": 2385.250471,
|
|
26
|
+
"p95Ms": 3077.760151
|
|
27
|
+
},
|
|
28
|
+
"perTask": [
|
|
29
|
+
{
|
|
30
|
+
"rr": 1,
|
|
31
|
+
"recall": 1,
|
|
32
|
+
"ndcg": 1,
|
|
33
|
+
"latencyMs": 3070.747261
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"rr": 0,
|
|
37
|
+
"recall": 0,
|
|
38
|
+
"ndcg": 0,
|
|
39
|
+
"latencyMs": 3077.760151
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
"rr": 1,
|
|
43
|
+
"recall": 1,
|
|
44
|
+
"ndcg": 1,
|
|
45
|
+
"latencyMs": 3776.156462
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
"rr": 1,
|
|
49
|
+
"recall": 1,
|
|
50
|
+
"ndcg": 1,
|
|
51
|
+
"latencyMs": 2385.250471
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
"rr": 0.3333333333333333,
|
|
55
|
+
"recall": 1,
|
|
56
|
+
"ndcg": 0.5,
|
|
57
|
+
"latencyMs": 1707.033999
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
"rr": 0.3333333333333333,
|
|
61
|
+
"recall": 1,
|
|
62
|
+
"ndcg": 0.5,
|
|
63
|
+
"latencyMs": 1582.751987
|
|
64
|
+
}
|
|
65
|
+
]
|
|
66
|
+
},
|
|
67
|
+
"candidate": {
|
|
68
|
+
"sampleSize": 6,
|
|
69
|
+
"metrics": {
|
|
70
|
+
"recallAtK": {
|
|
71
|
+
"mean": 0.8333333333333334,
|
|
72
|
+
"lower": 0.5,
|
|
73
|
+
"upper": 1
|
|
74
|
+
},
|
|
75
|
+
"mrr": {
|
|
76
|
+
"mean": 0.6111111111111112,
|
|
77
|
+
"lower": 0.27777777777777773,
|
|
78
|
+
"upper": 0.888888888888889
|
|
79
|
+
},
|
|
80
|
+
"ndcgAtK": {
|
|
81
|
+
"mean": 0.6666666666666666,
|
|
82
|
+
"lower": 0.3333333333333333,
|
|
83
|
+
"upper": 0.9166666666666666
|
|
84
|
+
}
|
|
85
|
+
},
|
|
86
|
+
"latency": {
|
|
87
|
+
"meanMs": 2237.8700451666664,
|
|
88
|
+
"p50Ms": 2144.505668,
|
|
89
|
+
"p95Ms": 2852.261846
|
|
90
|
+
},
|
|
91
|
+
"perTask": [
|
|
92
|
+
{
|
|
93
|
+
"rr": 1,
|
|
94
|
+
"recall": 1,
|
|
95
|
+
"ndcg": 1,
|
|
96
|
+
"latencyMs": 2216.417736
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
"rr": 0,
|
|
100
|
+
"recall": 0,
|
|
101
|
+
"ndcg": 0,
|
|
102
|
+
"latencyMs": 2852.261846
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
"rr": 1,
|
|
106
|
+
"recall": 1,
|
|
107
|
+
"ndcg": 1,
|
|
108
|
+
"latencyMs": 3482.541869
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
"rr": 1,
|
|
112
|
+
"recall": 1,
|
|
113
|
+
"ndcg": 1,
|
|
114
|
+
"latencyMs": 2144.505668
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
"rr": 0.3333333333333333,
|
|
118
|
+
"recall": 1,
|
|
119
|
+
"ndcg": 0.5,
|
|
120
|
+
"latencyMs": 1443.53379
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
"rr": 0.3333333333333333,
|
|
124
|
+
"recall": 1,
|
|
125
|
+
"ndcg": 0.5,
|
|
126
|
+
"latencyMs": 1287.959362
|
|
127
|
+
}
|
|
128
|
+
]
|
|
129
|
+
},
|
|
130
|
+
"significance": {
|
|
131
|
+
"baseline": "mcvay-hybrid",
|
|
132
|
+
"candidate": "mcvay-hybrid-v2",
|
|
133
|
+
"lift": 0,
|
|
134
|
+
"pValue": 1
|
|
135
|
+
},
|
|
136
|
+
"reproducibility": {
|
|
137
|
+
"variancePct": 25.00000000000001,
|
|
138
|
+
"stable": false
|
|
139
|
+
},
|
|
140
|
+
"acceptance": {
|
|
141
|
+
"pass": false,
|
|
142
|
+
"latencyRegression": -0.07326701690082414,
|
|
143
|
+
"pValueThreshold": 0.05,
|
|
144
|
+
"maxLatencyRegression": 0.2,
|
|
145
|
+
"failLift": true,
|
|
146
|
+
"failPValue": true,
|
|
147
|
+
"failLatency": false,
|
|
148
|
+
"failReproducibility": true
|
|
149
|
+
}
|
|
150
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
variant,recall_at_k,mrr,ndcg_at_k,latency_mean_ms,latency_p95_ms,sample_size
|
|
2
|
+
mcvay-keyword,0.250000,0.125000,0.157732,3234.290,3370.587,4
|
|
3
|
+
mcvay-semantic,0.000000,0.000000,0.000000,3083.167,3131.873,4
|
|
4
|
+
mcvay-hybrid,0.250000,0.125000,0.157732,3445.171,3533.913,4
|
|
5
|
+
mcvay-hybrid-v2-default,0.000000,0.000000,0.000000,0.874,0.920,4
|
|
6
|
+
mcvay-hybrid-v2,0.000000,0.000000,0.000000,0.767,0.758,4
|
|
7
|
+
mem0-local-fallback-lexical,1.000000,0.875000,0.907732,1.672,1.847,4
|