mcvay-mind 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/SKILL.md +9 -0
  2. package/bench/README.md +49 -0
  3. package/bench/artifacts/baseline.json +106 -0
  4. package/bench/artifacts/best-config-20260220T011934Z.json +13 -0
  5. package/bench/artifacts/best-config-20260220T014624Z.json +13 -0
  6. package/bench/artifacts/best-config-latest.json +13 -0
  7. package/bench/artifacts/gate-latest.json +22 -0
  8. package/bench/artifacts/latest.json +150 -0
  9. package/bench/artifacts/runs/20260220T011934Z/metrics.csv +7 -0
  10. package/bench/artifacts/runs/20260220T011934Z/results.json +418 -0
  11. package/bench/artifacts/runs/20260220T011934Z/summary.md +83 -0
  12. package/bench/artifacts/runs/20260220T014624Z/metrics.csv +7 -0
  13. package/bench/artifacts/runs/20260220T014624Z/results.json +490 -0
  14. package/bench/artifacts/runs/20260220T014624Z/summary.md +83 -0
  15. package/bench/dataset.js +288 -0
  16. package/bench/index.js +567 -0
  17. package/bench/metrics.js +163 -0
  18. package/bench/runners.js +146 -0
  19. package/index.js +295 -70
  20. package/lib/active-recall.js +24 -8
  21. package/lib/domain-indexer.js +131 -0
  22. package/lib/embeddings.js +233 -0
  23. package/lib/entity-linker.js +19 -4
  24. package/lib/expand-cache.js +112 -0
  25. package/lib/graph-skill-traversal.js +84 -0
  26. package/lib/graph-temporal.js +297 -0
  27. package/lib/metrics.js +163 -0
  28. package/lib/moc-generator.js +111 -0
  29. package/lib/response-guidance/index.js +577 -0
  30. package/lib/search-projections.js +62 -0
  31. package/lib/search.js +472 -180
  32. package/lib/skills-manifest.js +146 -0
  33. package/lib/sqlite-index.js +378 -0
  34. package/lib/store.js +406 -8
  35. package/lib/unified-graph.js +428 -0
  36. package/lib/vector-index.js +483 -0
  37. package/package.json +19 -6
  38. package/schema/base.yaml +49 -0
  39. package/schema/moc.yaml +50 -0
package/SKILL.md CHANGED
@@ -66,9 +66,11 @@ node ~/.openclaw/skills/mcvay-mind/index.js query --type preference --days 7
66
66
 
67
67
  # Search (unified)
68
68
  node ~/.openclaw/skills/mcvay-mind/index.js search "query terms"
69
+ node ~/.openclaw/skills/mcvay-mind/index.js search "latest timeout preference" --mode hybrid-v2 --graph-boost 0.25
69
70
 
70
71
  # Active recall (context surfacing)
71
72
  node ~/.openclaw/skills/mcvay-mind/index.js recall "topic"
73
+ node ~/.openclaw/skills/mcvay-mind/index.js recall "codex coding" --graph-boost 0.3 --include-stale
72
74
 
73
75
  # Entity linking
74
76
  node ~/.openclaw/skills/mcvay-mind/index.js link
@@ -142,6 +144,12 @@ created: 2026-02-16T12:00:00.000Z
142
144
  updated: 2026-02-16T12:00:00.000Z
143
145
  tags: [tag1, tag2]
144
146
  links: [decision/choice-1, preference/user-pref]
147
+ entities: [codex, timeout]
148
+ valid_from: 2026-02-16T12:00:00.000Z
149
+ valid_to: 2026-03-01T00:00:00.000Z
150
+ supersedes: [preference/old-timeout]
151
+ conflicts_with: [lesson/contrary-observation]
152
+ salience: 0.75
145
153
  confidence: 90
146
154
  source: agent
147
155
  ---
@@ -188,6 +196,7 @@ Keywords that should trigger recall:
188
196
  ├── lib/
189
197
  │ ├── store.js # Memory CRUD operations
190
198
  │ ├── search.js # Full-text search
199
+ │ ├── graph-temporal.js # Typed graph traversal + temporal ranking
191
200
  │ └── entity-linker.js # Wiki-link extraction & knowledge graph
192
201
  └── schema/ # YAML schemas
193
202
  ├── base.yaml
@@ -0,0 +1,49 @@
1
+ # Benchmark Harness
2
+
3
+ Offline benchmark + auto-tuning for McVay Mind retrieval.
4
+
5
+ ## Commands
6
+
7
+ From workspace root:
8
+
9
+ - `npm run bench:all` - tune + benchmark + regression gate
10
+ - `npm run bench:run` - benchmark with latest/default config
11
+ - `npm run bench:tune` - run hyperparameter tuning only
12
+ - `npm run bench:gate` - evaluate latest run vs baseline
13
+
14
+ From `skills/mcvay-mind` (equivalent):
15
+
16
+ - `npm run bench:all` - tune + benchmark + regression gate
17
+ - `npm run bench:run` - benchmark with latest/default config
18
+ - `npm run bench:tune` - run hyperparameter tuning only
19
+ - `npm run bench:gate` - evaluate latest run vs baseline
20
+
21
+ ## Metrics
22
+
23
+ - Recall@k
24
+ - MRR
25
+ - nDCG@k
26
+ - Latency (mean, p50, p95)
27
+ - Bootstrap confidence intervals
28
+ - Paired bootstrap p-value for recall lift
29
+
30
+ ## Offline
31
+
32
+ For strict offline runs, set:
33
+
34
+ ```bash
35
+ MCVAY_EMBED_ENABLED=false npm run bench:all
36
+ ```
37
+
38
+ ## Artifacts
39
+
40
+ Written to `bench/artifacts/`:
41
+
42
+ - `runs/<timestamp>/summary.md`
43
+ - `runs/<timestamp>/metrics.csv`
44
+ - `runs/<timestamp>/results.json`
45
+ - `best-config-<timestamp>.json`
46
+ - `best-config-latest.json`
47
+ - `baseline.json`
48
+ - `latest.json`
49
+ - `gate-latest.json`
@@ -0,0 +1,106 @@
1
+ {
2
+ "created": "2026-02-20T01:21:09.340Z",
3
+ "runId": "20260220T011934Z",
4
+ "hybridSummary": {
5
+ "sampleSize": 4,
6
+ "metrics": {
7
+ "recallAtK": {
8
+ "mean": 0.25,
9
+ "lower": 0,
10
+ "upper": 0.75
11
+ },
12
+ "mrr": {
13
+ "mean": 0.125,
14
+ "lower": 0,
15
+ "upper": 0.375
16
+ },
17
+ "ndcgAtK": {
18
+ "mean": 0.15773243839286438,
19
+ "lower": 0,
20
+ "upper": 0.47319731517859315
21
+ }
22
+ },
23
+ "latency": {
24
+ "meanMs": 3445.17079825,
25
+ "p50Ms": 3344.280292,
26
+ "p95Ms": 3533.913265
27
+ },
28
+ "perTask": [
29
+ {
30
+ "rr": 0,
31
+ "recall": 0,
32
+ "ndcg": 0,
33
+ "latencyMs": 3344.280292
34
+ },
35
+ {
36
+ "rr": 0,
37
+ "recall": 0,
38
+ "ndcg": 0,
39
+ "latencyMs": 4020.542328
40
+ },
41
+ {
42
+ "rr": 0.5,
43
+ "recall": 1,
44
+ "ndcg": 0.6309297535714575,
45
+ "latencyMs": 3533.913265
46
+ },
47
+ {
48
+ "rr": 0,
49
+ "recall": 0,
50
+ "ndcg": 0,
51
+ "latencyMs": 2881.947308
52
+ }
53
+ ]
54
+ },
55
+ "candidateSummary": {
56
+ "sampleSize": 4,
57
+ "metrics": {
58
+ "recallAtK": {
59
+ "mean": 0,
60
+ "lower": 0,
61
+ "upper": 0
62
+ },
63
+ "mrr": {
64
+ "mean": 0,
65
+ "lower": 0,
66
+ "upper": 0
67
+ },
68
+ "ndcgAtK": {
69
+ "mean": 0,
70
+ "lower": 0,
71
+ "upper": 0
72
+ }
73
+ },
74
+ "latency": {
75
+ "meanMs": 0.76737475,
76
+ "p50Ms": 0.746852,
77
+ "p95Ms": 0.758073
78
+ },
79
+ "perTask": [
80
+ {
81
+ "rr": 0,
82
+ "recall": 0,
83
+ "ndcg": 0,
84
+ "latencyMs": 0.746852
85
+ },
86
+ {
87
+ "rr": 0,
88
+ "recall": 0,
89
+ "ndcg": 0,
90
+ "latencyMs": 0.858205
91
+ },
92
+ {
93
+ "rr": 0,
94
+ "recall": 0,
95
+ "ndcg": 0,
96
+ "latencyMs": 0.758073
97
+ },
98
+ {
99
+ "rr": 0,
100
+ "recall": 0,
101
+ "ndcg": 0,
102
+ "latencyMs": 0.706369
103
+ }
104
+ ]
105
+ }
106
+ }
@@ -0,0 +1,13 @@
1
+ {
2
+ "config": {
3
+ "minSimilarity": 0.25,
4
+ "rrfK": 60,
5
+ "bm25TopN": 60,
6
+ "annTopN": 80,
7
+ "graphBoost": 0
8
+ },
9
+ "recallAtK": 0.3333333333333333,
10
+ "mrr": 0.25,
11
+ "ndcgAtK": 0.271821625595243,
12
+ "latencyP95Ms": 106.768316
13
+ }
@@ -0,0 +1,13 @@
1
+ {
2
+ "config": {
3
+ "minSimilarity": 0.25,
4
+ "rrfK": 40,
5
+ "bm25TopN": 40,
6
+ "annTopN": 60,
7
+ "graphBoost": 0
8
+ },
9
+ "recallAtK": 0.5714285714285714,
10
+ "mrr": 0.369047619047619,
11
+ "ndcgAtK": 0.42091351862245524,
12
+ "latencyP95Ms": 3378.864337
13
+ }
@@ -0,0 +1,13 @@
1
+ {
2
+ "config": {
3
+ "minSimilarity": 0.25,
4
+ "rrfK": 40,
5
+ "bm25TopN": 40,
6
+ "annTopN": 60,
7
+ "graphBoost": 0
8
+ },
9
+ "recallAtK": 0.5714285714285714,
10
+ "mrr": 0.369047619047619,
11
+ "ndcgAtK": 0.42091351862245524,
12
+ "latencyP95Ms": 3378.864337
13
+ }
@@ -0,0 +1,22 @@
1
+ {
2
+ "timestamp": "2026-02-20T02:56:46.872Z",
3
+ "baselineRunId": "20260220T011934Z",
4
+ "latestRunId": "20260220T014624Z",
5
+ "gate": {
6
+ "pass": false,
7
+ "recallDrop": -833333333.3333334,
8
+ "latencyRegression": 3761.516071671198,
9
+ "failRecall": false,
10
+ "failLatency": true
11
+ },
12
+ "acceptance": {
13
+ "pass": false,
14
+ "latencyRegression": -0.07326701690082414,
15
+ "pValueThreshold": 0.05,
16
+ "maxLatencyRegression": 0.2,
17
+ "failLift": true,
18
+ "failPValue": true,
19
+ "failLatency": false,
20
+ "failReproducibility": true
21
+ }
22
+ }
@@ -0,0 +1,150 @@
1
+ {
2
+ "created": "2026-02-20T02:56:46.869Z",
3
+ "runId": "20260220T014624Z",
4
+ "baseline": {
5
+ "sampleSize": 6,
6
+ "metrics": {
7
+ "recallAtK": {
8
+ "mean": 0.8333333333333334,
9
+ "lower": 0.5,
10
+ "upper": 1
11
+ },
12
+ "mrr": {
13
+ "mean": 0.6111111111111112,
14
+ "lower": 0.27777777777777773,
15
+ "upper": 0.888888888888889
16
+ },
17
+ "ndcgAtK": {
18
+ "mean": 0.6666666666666666,
19
+ "lower": 0.3333333333333333,
20
+ "upper": 0.9166666666666666
21
+ }
22
+ },
23
+ "latency": {
24
+ "meanMs": 2599.9500551666665,
25
+ "p50Ms": 2385.250471,
26
+ "p95Ms": 3077.760151
27
+ },
28
+ "perTask": [
29
+ {
30
+ "rr": 1,
31
+ "recall": 1,
32
+ "ndcg": 1,
33
+ "latencyMs": 3070.747261
34
+ },
35
+ {
36
+ "rr": 0,
37
+ "recall": 0,
38
+ "ndcg": 0,
39
+ "latencyMs": 3077.760151
40
+ },
41
+ {
42
+ "rr": 1,
43
+ "recall": 1,
44
+ "ndcg": 1,
45
+ "latencyMs": 3776.156462
46
+ },
47
+ {
48
+ "rr": 1,
49
+ "recall": 1,
50
+ "ndcg": 1,
51
+ "latencyMs": 2385.250471
52
+ },
53
+ {
54
+ "rr": 0.3333333333333333,
55
+ "recall": 1,
56
+ "ndcg": 0.5,
57
+ "latencyMs": 1707.033999
58
+ },
59
+ {
60
+ "rr": 0.3333333333333333,
61
+ "recall": 1,
62
+ "ndcg": 0.5,
63
+ "latencyMs": 1582.751987
64
+ }
65
+ ]
66
+ },
67
+ "candidate": {
68
+ "sampleSize": 6,
69
+ "metrics": {
70
+ "recallAtK": {
71
+ "mean": 0.8333333333333334,
72
+ "lower": 0.5,
73
+ "upper": 1
74
+ },
75
+ "mrr": {
76
+ "mean": 0.6111111111111112,
77
+ "lower": 0.27777777777777773,
78
+ "upper": 0.888888888888889
79
+ },
80
+ "ndcgAtK": {
81
+ "mean": 0.6666666666666666,
82
+ "lower": 0.3333333333333333,
83
+ "upper": 0.9166666666666666
84
+ }
85
+ },
86
+ "latency": {
87
+ "meanMs": 2237.8700451666664,
88
+ "p50Ms": 2144.505668,
89
+ "p95Ms": 2852.261846
90
+ },
91
+ "perTask": [
92
+ {
93
+ "rr": 1,
94
+ "recall": 1,
95
+ "ndcg": 1,
96
+ "latencyMs": 2216.417736
97
+ },
98
+ {
99
+ "rr": 0,
100
+ "recall": 0,
101
+ "ndcg": 0,
102
+ "latencyMs": 2852.261846
103
+ },
104
+ {
105
+ "rr": 1,
106
+ "recall": 1,
107
+ "ndcg": 1,
108
+ "latencyMs": 3482.541869
109
+ },
110
+ {
111
+ "rr": 1,
112
+ "recall": 1,
113
+ "ndcg": 1,
114
+ "latencyMs": 2144.505668
115
+ },
116
+ {
117
+ "rr": 0.3333333333333333,
118
+ "recall": 1,
119
+ "ndcg": 0.5,
120
+ "latencyMs": 1443.53379
121
+ },
122
+ {
123
+ "rr": 0.3333333333333333,
124
+ "recall": 1,
125
+ "ndcg": 0.5,
126
+ "latencyMs": 1287.959362
127
+ }
128
+ ]
129
+ },
130
+ "significance": {
131
+ "baseline": "mcvay-hybrid",
132
+ "candidate": "mcvay-hybrid-v2",
133
+ "lift": 0,
134
+ "pValue": 1
135
+ },
136
+ "reproducibility": {
137
+ "variancePct": 25.00000000000001,
138
+ "stable": false
139
+ },
140
+ "acceptance": {
141
+ "pass": false,
142
+ "latencyRegression": -0.07326701690082414,
143
+ "pValueThreshold": 0.05,
144
+ "maxLatencyRegression": 0.2,
145
+ "failLift": true,
146
+ "failPValue": true,
147
+ "failLatency": false,
148
+ "failReproducibility": true
149
+ }
150
+ }
@@ -0,0 +1,7 @@
1
+ variant,recall_at_k,mrr,ndcg_at_k,latency_mean_ms,latency_p95_ms,sample_size
2
+ mcvay-keyword,0.250000,0.125000,0.157732,3234.290,3370.587,4
3
+ mcvay-semantic,0.000000,0.000000,0.000000,3083.167,3131.873,4
4
+ mcvay-hybrid,0.250000,0.125000,0.157732,3445.171,3533.913,4
5
+ mcvay-hybrid-v2-default,0.000000,0.000000,0.000000,0.874,0.920,4
6
+ mcvay-hybrid-v2,0.000000,0.000000,0.000000,0.767,0.758,4
7
+ mem0-local-fallback-lexical,1.000000,0.875000,0.907732,1.672,1.847,4