@psiclawops/hypermem 0.9.7 → 0.9.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/INSTALL.md +29 -9
- package/README.md +5 -1
- package/assets/default-config.json +20 -5
- package/assets/runtime-validation-fixture.json +123 -0
- package/bin/hypermem-cleanup.mjs +334 -0
- package/bin/hypermem-doctor.mjs +71 -0
- package/bin/hypermem-validate-runtime.mjs +282 -0
- package/dist/compositor.d.ts +43 -5
- package/dist/compositor.d.ts.map +1 -1
- package/dist/compositor.js +802 -30
- package/dist/entity-bridge-backfill.d.ts +66 -0
- package/dist/entity-bridge-backfill.d.ts.map +1 -0
- package/dist/entity-bridge-backfill.js +145 -0
- package/dist/entity-bridge-store.d.ts +164 -0
- package/dist/entity-bridge-store.d.ts.map +1 -0
- package/dist/entity-bridge-store.js +488 -0
- package/dist/entity-extractor.d.ts +124 -0
- package/dist/entity-extractor.d.ts.map +1 -0
- package/dist/entity-extractor.js +382 -0
- package/dist/entity-ppr.d.ts +55 -0
- package/dist/entity-ppr.d.ts.map +1 -0
- package/dist/entity-ppr.js +180 -0
- package/dist/hybrid-retrieval.d.ts +27 -0
- package/dist/hybrid-retrieval.d.ts.map +1 -1
- package/dist/hybrid-retrieval.js +26 -1
- package/dist/index.d.ts +19 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +63 -13
- package/dist/message-store.d.ts +36 -0
- package/dist/message-store.d.ts.map +1 -1
- package/dist/message-store.js +155 -1
- package/dist/open-domain.d.ts +13 -4
- package/dist/open-domain.d.ts.map +1 -1
- package/dist/open-domain.js +222 -20
- package/dist/profiles.js +13 -13
- package/dist/question-shape.d.ts +73 -0
- package/dist/question-shape.d.ts.map +1 -0
- package/dist/question-shape.js +230 -0
- package/dist/schema.d.ts +1 -1
- package/dist/schema.d.ts.map +1 -1
- package/dist/schema.js +92 -1
- package/dist/topic-detector.d.ts.map +1 -1
- package/dist/topic-detector.js +22 -9
- package/dist/types.d.ts +176 -2
- package/dist/types.d.ts.map +1 -1
- package/dist/vector-store.d.ts +6 -0
- package/dist/vector-store.d.ts.map +1 -1
- package/dist/vector-store.js +3 -0
- package/docs/DIAGNOSTICS.md +32 -0
- package/docs/INTEGRATION_VALIDATION.md +9 -4
- package/docs/TUNING.md +21 -21
- package/memory-plugin/dist/index.js +3 -1
- package/memory-plugin/package.json +8 -7
- package/package.json +10 -4
- package/plugin/dist/index.d.ts.map +1 -1
- package/plugin/dist/index.js +114 -11
- package/plugin/dist/index.js.map +1 -1
- package/plugin/package.json +9 -8
- package/scripts/install-runtime.mjs +4 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,22 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to hypermem are documented here.
|
|
4
4
|
|
|
5
|
+
## 0.9.9 - 2026-05-09
|
|
6
|
+
|
|
7
|
+
- **Replay duplication cleanup is operator-visible.** `hypermem-cleanup` now ships as a package CLI, dry-runs by default, reports timestamp-stamped Gateway replay duplicate debt without printing message content, and applies repairs only with `--apply` after creating a SQLite backup, rewiring local message references, rebuilding FTS, and passing integrity checks.
|
|
8
|
+
- **Doctor warns on stamped replay duplicate debt.** `hypermem-doctor` now scans the HyperMem data dir for timestamp-stamped user replay duplicate rows and emits a hard-to-miss `stamped-replay-duplicate-debt` warning with the cleanup dry-run command.
|
|
9
|
+
- **Operational memory defaults tightened.** Shipped defaults now favor stable pressure over maximum recall: lower first-turn warm history load, bounded facts/keystones, 250 hot history messages, and cross-session context off by default while LoCoMo tuning continues.
|
|
10
|
+
- **Boundary user turns are persisted reliably.** The context-engine plugin now recovers the current plain user message at the afterTurn boundary when OpenClaw pre-prompt counts already include it, preventing assistant-only replay gaps in HyperMem's SQLite history.
|
|
11
|
+
- **Plugin audit gate is clean.** Context-engine and memory-plugin packages pin the transitive `fast-xml-builder` audit fix through package overrides and refreshed lockfiles.
|
|
12
|
+
|
|
13
|
+
## 0.9.8 - 2026-05-03
|
|
14
|
+
|
|
15
|
+
- **Deterministic runtime validation added.** `hypermem-validate-runtime` now ships with the package and installer payload, seeds an isolated validation fixture, and verifies message writes, FTS, structured facts, vector indexing/search when enabled, warm, and compose without an answer LLM. Install docs now treat runtime validation as part of completion.
|
|
16
|
+
- **Long-horizon recall widened for EasyLoCoMo.** Steady semantic recall now uses a larger memory envelope and candidate pool, closer to the published Mem0 token-efficient LoCoMo posture instead of starving single-hop and temporal evidence.
|
|
17
|
+
- **Temporal recall is query-shaped.** Temporal retrieval now over-fetches indexed facts, scores them against query terms, and injects relevant temporal evidence rather than blindly taking the latest rows. This targets wrong-date failures on `when did X` questions.
|
|
18
|
+
- **Old episodic evidence is preserved for benchmark-style questions.** Long-horizon QA disables production recency decay and relaxes FTS-only episode floors so valid historical evidence is not filtered out solely because it is old.
|
|
19
|
+
- **Long-horizon fusion weights now favor exact evidence.** Benchmark-style questions use a lower RRF k, modest FTS weight boost, and wider reranker candidate window so names, dates, and quoted objects can outrank semantically-near but wrong old chatter without changing normal production turns.
|
|
20
|
+
|
|
5
21
|
## 0.9.7 - 2026-05-02
|
|
6
22
|
|
|
7
23
|
- **OpenClaw Plugin SDK imports modernized.** HyperCompositor and HyperMem memory now import plugin entry helpers through the canonical public Plugin SDK surface and refresh OpenClaw/Plugin SDK build provenance to the validated runtime.
|
package/INSTALL.md
CHANGED
|
@@ -79,8 +79,9 @@ Release validation details live in [docs/INTEGRATION_VALIDATION.md](./docs/INTEG
|
|
|
79
79
|
`hypermem-install` creates the current recommended starter config automatically when `~/.openclaw/hypermem/config.json` is missing. The shipped starter config is installation-safe FTS5 mode:
|
|
80
80
|
|
|
81
81
|
- `embedding.provider: "none"`
|
|
82
|
-
- `warmHistoryBudgetFraction: 0.
|
|
83
|
-
-
|
|
82
|
+
- `warmHistoryBudgetFraction: 0.27`
|
|
83
|
+
- bounded fact, keystone, and history caps
|
|
84
|
+
- cross-session context disabled by default
|
|
84
85
|
|
|
85
86
|
This is intentional. A clean first install should load, compose, and verify without requiring Ollama or an external API key. The result is **YELLOW** install readiness: HyperMem is active with keyword recall, but semantic vector recall is disabled. Upgrade to Ollama, OpenRouter, or Gemini after the baseline install is active.
|
|
86
87
|
|
|
@@ -152,15 +153,34 @@ OpenClaw loads the plugin runtime from `~/.openclaw/plugins/hypermem/`.
|
|
|
152
153
|
|
|
153
154
|
### Verification checkpoints
|
|
154
155
|
|
|
155
|
-
Run the installed-system doctor first:
|
|
156
|
+
Run the installed-system doctor and deterministic runtime validator first:
|
|
156
157
|
|
|
157
158
|
```bash
|
|
158
159
|
hypermem-doctor --fix-plan
|
|
160
|
+
hypermem-cleanup --data-dir ~/.openclaw/hypermem
|
|
161
|
+
hypermem-validate-runtime --allow-no-embedding
|
|
159
162
|
```
|
|
160
163
|
|
|
161
164
|
`hypermem-doctor` is read-only. It inspects OpenClaw config, HyperMem config, plugin wiring, recommended OpenClaw runtime settings, data directory shape, runtime plugin load state, and active model context-window risk. It prints exact `openclaw config set ...` commands when something needs review. It does not edit config or restart the gateway.
|
|
162
165
|
|
|
163
|
-
Expected result after a complete install: no required failures
|
|
166
|
+
`hypermem-validate-runtime` writes a tiny isolated validation agent, then checks message persistence, FTS, structured facts, vector indexing/search when embeddings are enabled, warm, and compose. It does not call an answer LLM. Expected result after a complete install: doctor has no required failures and runtime validation reports `consistencyScore: 1`. Recommendation warnings should be reviewed before production use, especially context-window warnings for GPT/OpenAI-compatible/local gateways.
|
|
167
|
+
|
|
168
|
+
### Replay duplicate cleanup after upgrade
|
|
169
|
+
|
|
170
|
+
If agents report repeated user messages after a restart, run the read-only cleanup scan. This is now part of the obvious post-upgrade diagnostic path, not tribal knowledge:
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
hypermem-doctor --fix-plan
|
|
174
|
+
hypermem-cleanup --data-dir ~/.openclaw/hypermem
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
Only apply during a maintenance window after Gateway/OpenClaw writers are stopped:
|
|
178
|
+
|
|
179
|
+
```bash
|
|
180
|
+
hypermem-cleanup --data-dir ~/.openclaw/hypermem --apply
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
Apply mode writes a SQLite backup, rewrites local references, rebuilds FTS, and rolls back unless integrity and foreign-key checks pass.
|
|
164
184
|
|
|
165
185
|
Walk the install state machine explicitly if you need a manual check:
|
|
166
186
|
|
|
@@ -849,12 +869,12 @@ Key starter defaults:
|
|
|
849
869
|
"budgetFraction": 0.6,
|
|
850
870
|
"contextWindowReserve": 0.25,
|
|
851
871
|
"targetBudgetFraction": 0.50,
|
|
852
|
-
"warmHistoryBudgetFraction": 0.
|
|
853
|
-
"maxFacts":
|
|
872
|
+
"warmHistoryBudgetFraction": 0.27,
|
|
873
|
+
"maxFacts": 25,
|
|
854
874
|
"maxHistoryMessages": 250,
|
|
855
875
|
"maxCrossSessionContext": 0,
|
|
856
|
-
"keystoneHistoryFraction": 0.
|
|
857
|
-
"keystoneMaxMessages":
|
|
876
|
+
"keystoneHistoryFraction": 0.15,
|
|
877
|
+
"keystoneMaxMessages": 12,
|
|
858
878
|
"hyperformProfile": "standard"
|
|
859
879
|
}
|
|
860
880
|
}
|
|
@@ -929,7 +949,7 @@ Use it during install and after model changes. `--strict` exits non-zero if a mo
|
|
|
929
949
|
```json
|
|
930
950
|
{
|
|
931
951
|
"compositor": {
|
|
932
|
-
"budgetFraction": 0.
|
|
952
|
+
"budgetFraction": 0.6,
|
|
933
953
|
"contextWindowReserve": 0.25,
|
|
934
954
|
"warmHistoryBudgetFraction": 0.27,
|
|
935
955
|
"contextWindowOverrides": {
|
package/README.md
CHANGED
|
@@ -455,11 +455,15 @@ openclaw config set plugins.allow '["existing-plugin","hypercompositor","hyperme
|
|
|
455
455
|
|
|
456
456
|
openclaw gateway restart
|
|
457
457
|
hypermem-doctor --fix-plan
|
|
458
|
+
hypermem-cleanup --data-dir ~/.openclaw/hypermem
|
|
458
459
|
hypermem-status --health
|
|
460
|
+
hypermem-validate-runtime --allow-no-embedding
|
|
459
461
|
hypermem-model-audit --strict
|
|
460
462
|
```
|
|
461
463
|
|
|
462
|
-
`hypermem-
|
|
464
|
+
`hypermem-cleanup` is the operator-safe repair tool for timestamp-stamped Gateway replay duplicate rows. It is dry-run by default, prints no message content, creates backups in apply mode, rewrites local message references, rebuilds FTS, and commits only after SQLite integrity checks pass. `hypermem-doctor` warns with `stamped-replay-duplicate-debt` when cleanup should be considered.
|
|
465
|
+
|
|
466
|
+
`hypermem-doctor` is the configuration confidence check: it validates plugin wiring, plugin registry refresh readiness, runtime load state, recommended OpenClaw settings such as `contextPruning.mode=off`, GPT-5 personality overlay off, startup/bootstrap injection sizing, compaction safety settings including `maxActiveTranscriptBytes` remaining unset for HyperMem-managed compaction, HyperMem data files, and model context-window overrides for GPT/OpenAI-compatible/local gateways. It is read-only and prints a reviewable fix plan. `hypermem-validate-runtime` is the deterministic component check: it seeds a tiny isolated fixture and verifies write, FTS, facts, vector indexing/search when enabled, warm, and compose without using an answer LLM.
|
|
463
467
|
|
|
464
468
|
Full install, upgrade, source-clone, embedding provider, reranker, fleet config, and rollback guidance lives in **[INSTALL.md](./INSTALL.md)**.
|
|
465
469
|
|
|
@@ -23,13 +23,28 @@
|
|
|
23
23
|
"budgetFraction": 0.6,
|
|
24
24
|
"contextWindowReserve": 0.25,
|
|
25
25
|
"targetBudgetFraction": 0.5,
|
|
26
|
-
"warmHistoryBudgetFraction": 0.
|
|
27
|
-
"maxFacts":
|
|
26
|
+
"warmHistoryBudgetFraction": 0.27,
|
|
27
|
+
"maxFacts": 25,
|
|
28
28
|
"maxHistoryMessages": 250,
|
|
29
29
|
"maxCrossSessionContext": 0,
|
|
30
|
-
"keystoneHistoryFraction": 0.
|
|
31
|
-
"keystoneMaxMessages":
|
|
32
|
-
"hyperformProfile": "standard"
|
|
30
|
+
"keystoneHistoryFraction": 0.15,
|
|
31
|
+
"keystoneMaxMessages": 12,
|
|
32
|
+
"hyperformProfile": "standard",
|
|
33
|
+
"entityBridge": {
|
|
34
|
+
"enabled": false,
|
|
35
|
+
"structuredHandoff": false,
|
|
36
|
+
"pprEnabled": false,
|
|
37
|
+
"liveIndexingEnabled": false,
|
|
38
|
+
"maxTokens": 1200,
|
|
39
|
+
"maxGraphEdges": 5000,
|
|
40
|
+
"maxGraphNodes": 2000,
|
|
41
|
+
"maxCandidateMessagesBeforeRanking": 500,
|
|
42
|
+
"maxSeedEntities": 4,
|
|
43
|
+
"maxSeedFacets": 4,
|
|
44
|
+
"pprMaxIterations": 20,
|
|
45
|
+
"pprTeleportProbability": 0.15,
|
|
46
|
+
"pprConvergenceTolerance": 1e-06
|
|
47
|
+
}
|
|
33
48
|
},
|
|
34
49
|
"indexer": {
|
|
35
50
|
"enabled": true,
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schemaVersion": 1,
|
|
3
|
+
"name": "hypermem-runtime-validation",
|
|
4
|
+
"description": "Small deterministic fixture for installed HyperMem runtime validation. It validates message storage, FTS, library facts, vector indexing/search, warm, compose, current-value recall, temporal details, and negative-control recall without an answer LLM.",
|
|
5
|
+
"agentPrefix": "hypermem-runtime-validation",
|
|
6
|
+
"messages": [
|
|
7
|
+
{
|
|
8
|
+
"sessionId": "session-1",
|
|
9
|
+
"date": "2026-01-10T09:00:00Z",
|
|
10
|
+
"role": "user",
|
|
11
|
+
"content": "[runtime-fixture alpha] Riley stores the launch checklist in a teal binder in the north lab."
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"sessionId": "session-1",
|
|
15
|
+
"date": "2026-01-10T09:01:00Z",
|
|
16
|
+
"role": "assistant",
|
|
17
|
+
"content": "Recorded: teal binder, north lab, launch checklist."
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"sessionId": "session-1",
|
|
21
|
+
"date": "2026-01-10T09:02:00Z",
|
|
22
|
+
"role": "user",
|
|
23
|
+
"content": "[runtime-fixture beta] The backup generator codename is Copper Fox and it runs on Tuesdays."
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"sessionId": "session-2",
|
|
27
|
+
"date": "2026-02-14T15:30:00Z",
|
|
28
|
+
"role": "user",
|
|
29
|
+
"content": "[runtime-fixture gamma] The old incident channel was #ops-red, but it was replaced by #ops-green on February 14."
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
"sessionId": "session-2",
|
|
33
|
+
"date": "2026-02-14T15:31:00Z",
|
|
34
|
+
"role": "assistant",
|
|
35
|
+
"content": "Current incident channel should be #ops-green; #ops-red is old."
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"sessionId": "session-2",
|
|
39
|
+
"date": "2026-02-14T15:32:00Z",
|
|
40
|
+
"role": "user",
|
|
41
|
+
"content": "[runtime-fixture delta] Mira's pager handoff window is 18:00 to 20:00 Arizona time."
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
"sessionId": "session-3",
|
|
45
|
+
"date": "2026-03-22T11:45:00Z",
|
|
46
|
+
"role": "user",
|
|
47
|
+
"content": "[runtime-fixture epsilon] The release mascot is a brass otter carrying a small lantern."
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
"sessionId": "session-3",
|
|
51
|
+
"date": "2026-03-22T11:46:00Z",
|
|
52
|
+
"role": "assistant",
|
|
53
|
+
"content": "Noted release mascot: brass otter with lantern."
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
"sessionId": "session-3",
|
|
57
|
+
"date": "2026-03-22T11:47:00Z",
|
|
58
|
+
"role": "user",
|
|
59
|
+
"content": "[runtime-fixture zeta] There is no record of a purple submarine in this validation fixture."
|
|
60
|
+
}
|
|
61
|
+
],
|
|
62
|
+
"facts": [
|
|
63
|
+
{
|
|
64
|
+
"domain": "runtime-validation",
|
|
65
|
+
"content": "Runtime validation structured fact: the backup generator codename is Copper Fox and it runs on Tuesdays."
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
"domain": "runtime-validation",
|
|
69
|
+
"content": "Runtime validation structured fact: the current incident channel is #ops-green; #ops-red is obsolete."
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
"domain": "runtime-validation",
|
|
73
|
+
"content": "Runtime validation structured fact: Mira's pager handoff window is 18:00 to 20:00 Arizona time."
|
|
74
|
+
}
|
|
75
|
+
],
|
|
76
|
+
"probes": [
|
|
77
|
+
{
|
|
78
|
+
"id": "message-fts-exact-anchor",
|
|
79
|
+
"component": "message-fts",
|
|
80
|
+
"query": "teal binder north lab launch checklist",
|
|
81
|
+
"required": ["teal binder", "north lab", "launch checklist"]
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
"id": "message-fts-late-session",
|
|
85
|
+
"component": "message-fts",
|
|
86
|
+
"query": "brass otter lantern",
|
|
87
|
+
"required": ["brass otter", "lantern"]
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
"id": "semantic-structured-fact",
|
|
91
|
+
"component": "semantic",
|
|
92
|
+
"query": "standby power system codename weekly schedule",
|
|
93
|
+
"required": ["Copper Fox", "Tuesdays"]
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
"id": "semantic-current-value",
|
|
97
|
+
"component": "semantic",
|
|
98
|
+
"query": "current incident channel replacement",
|
|
99
|
+
"required": ["#ops-green"]
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
"id": "compose-temporal-detail",
|
|
103
|
+
"component": "compose",
|
|
104
|
+
"sessionId": "session-2",
|
|
105
|
+
"query": "When is Mira's pager handoff window?",
|
|
106
|
+
"required": ["18:00", "20:00", "Arizona"]
|
|
107
|
+
},
|
|
108
|
+
{
|
|
109
|
+
"id": "compose-late-session-detail",
|
|
110
|
+
"component": "compose",
|
|
111
|
+
"sessionId": "session-3",
|
|
112
|
+
"query": "What is the release mascot?",
|
|
113
|
+
"required": ["brass otter", "lantern"]
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
"id": "compose-negative-control",
|
|
117
|
+
"component": "compose",
|
|
118
|
+
"sessionId": "session-3",
|
|
119
|
+
"query": "What does the fixture say about a purple submarine?",
|
|
120
|
+
"required": ["no record", "purple submarine"]
|
|
121
|
+
}
|
|
122
|
+
]
|
|
123
|
+
}
|
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* hypermem-cleanup — operator-safe repair utilities for persisted HyperMem DBs.
|
|
4
|
+
*
|
|
5
|
+
* Default command repairs timestamp-stamped replay duplicates caused by Gateway
|
|
6
|
+
* transcript restore re-recording the same user turn. It is read-only unless
|
|
7
|
+
* --apply is passed, and apply mode writes a SQLite backup first by default.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { existsSync, mkdirSync, readdirSync } from 'node:fs';
|
|
11
|
+
import { dirname, join, resolve } from 'node:path';
|
|
12
|
+
import { createHash } from 'node:crypto';
|
|
13
|
+
import { exit } from 'node:process';
|
|
14
|
+
import { DatabaseSync } from 'node:sqlite';
|
|
15
|
+
|
|
16
|
+
const STAMPED_PREFIX_RE = /^\[[A-Z][a-z]{2} \d{4}-\d{2}-\d{2} \d{2}:\d{2} [A-Z]{2,4}\]/;
|
|
17
|
+
const DEFAULT_DATA_DIR = join(process.env.HOME || '', '.openclaw', 'hypermem');
|
|
18
|
+
|
|
19
|
+
function parseArgs(argv) {
|
|
20
|
+
const out = { _: [] };
|
|
21
|
+
for (let i = 0; i < argv.length; i++) {
|
|
22
|
+
const arg = argv[i];
|
|
23
|
+
if (!arg.startsWith('--')) {
|
|
24
|
+
out._.push(arg);
|
|
25
|
+
continue;
|
|
26
|
+
}
|
|
27
|
+
const key = arg.slice(2);
|
|
28
|
+
const next = argv[i + 1];
|
|
29
|
+
if (next == null || next.startsWith('--')) out[key] = true;
|
|
30
|
+
else {
|
|
31
|
+
out[key] = next;
|
|
32
|
+
i++;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
return out;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function usage() {
|
|
39
|
+
console.log(`Usage: hypermem-cleanup [options]
|
|
40
|
+
|
|
41
|
+
Repairs known persisted-data issues. Default mode is a dry-run scan for
|
|
42
|
+
Gateway timestamp-stamped user replay duplicates.
|
|
43
|
+
|
|
44
|
+
Options:
|
|
45
|
+
--data-dir <path> HyperMem data dir (default: ~/.openclaw/hypermem)
|
|
46
|
+
--db <path> Scan one messages.db instead of all agents
|
|
47
|
+
--agent <id> Restrict --data-dir scan to one agent
|
|
48
|
+
--apply Delete duplicate rows and repair references
|
|
49
|
+
--backup-dir <path> Backup directory for apply mode (default: beside DB)
|
|
50
|
+
--no-backup Disable apply-mode backup (not recommended)
|
|
51
|
+
--json Print machine-readable JSON
|
|
52
|
+
--examples <n> Include up to n hashed examples per DB (default: 5)
|
|
53
|
+
--help Show this help
|
|
54
|
+
|
|
55
|
+
Safety:
|
|
56
|
+
- dry-run is default
|
|
57
|
+
- content is not printed; examples use SHA-256 prefixes and counts
|
|
58
|
+
- apply mode keeps the earliest row per exact duplicate group
|
|
59
|
+
- apply mode rewrites local message references, rebuilds FTS, and runs
|
|
60
|
+
integrity checks before committing
|
|
61
|
+
`);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function isoStamp() {
|
|
65
|
+
return new Date().toISOString().replace(/[:.]/g, '-');
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function sqlLiteral(value) {
|
|
69
|
+
return `'${String(value).replaceAll("'", "''")}'`;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function tableExists(db, table) {
|
|
73
|
+
const row = db.prepare("SELECT 1 AS ok FROM sqlite_master WHERE type IN ('table','view') AND name = ?").get(table);
|
|
74
|
+
return Boolean(row);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function columnExists(db, table, column) {
|
|
78
|
+
if (!tableExists(db, table)) return false;
|
|
79
|
+
return db.prepare(`PRAGMA table_info(${table})`).all().some((row) => row.name === column);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function firstTableColumns(db, table) {
|
|
83
|
+
if (!tableExists(db, table)) return [];
|
|
84
|
+
return db.prepare(`PRAGMA table_info(${table})`).all().map((row) => row.name);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function sha12(text) {
|
|
88
|
+
return createHash('sha256').update(text).digest('hex').slice(0, 12);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function resolveDbTargets({ dataDir, dbPath, agent }) {
|
|
92
|
+
if (dbPath) {
|
|
93
|
+
const resolved = resolve(dbPath);
|
|
94
|
+
if (!existsSync(resolved)) throw new Error(`messages.db not found: ${resolved}`);
|
|
95
|
+
return [{ agent: agent || dirname(resolved).split('/').pop() || 'unknown', dbPath: resolved }];
|
|
96
|
+
}
|
|
97
|
+
const agentsDir = join(resolve(dataDir || DEFAULT_DATA_DIR), 'agents');
|
|
98
|
+
if (!existsSync(agentsDir)) throw new Error(`HyperMem agents dir not found: ${agentsDir}`);
|
|
99
|
+
const agents = agent ? [agent] : readdirSync(agentsDir).sort();
|
|
100
|
+
const targets = [];
|
|
101
|
+
for (const id of agents) {
|
|
102
|
+
const candidate = join(agentsDir, id, 'messages.db');
|
|
103
|
+
if (existsSync(candidate)) targets.push({ agent: id, dbPath: candidate });
|
|
104
|
+
}
|
|
105
|
+
if (targets.length === 0) throw new Error(`No messages.db targets found under ${agentsDir}`);
|
|
106
|
+
return targets;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function readRows(db) {
|
|
110
|
+
const cols = firstTableColumns(db, 'messages');
|
|
111
|
+
if (!cols.includes('text_content') || !cols.includes('conversation_id') || !cols.includes('role')) {
|
|
112
|
+
throw new Error('messages table does not have the expected HyperMem columns');
|
|
113
|
+
}
|
|
114
|
+
const hasConversations = tableExists(db, 'conversations');
|
|
115
|
+
const sql = hasConversations
|
|
116
|
+
? `SELECT m.id, m.conversation_id, c.session_key, m.agent_id, m.role, m.text_content,
|
|
117
|
+
COALESCE(m.tool_calls, '') AS tool_calls,
|
|
118
|
+
COALESCE(m.tool_results, '') AS tool_results,
|
|
119
|
+
COALESCE(m.message_index, 0) AS message_index,
|
|
120
|
+
m.created_at
|
|
121
|
+
FROM messages m
|
|
122
|
+
LEFT JOIN conversations c ON c.id = m.conversation_id
|
|
123
|
+
WHERE m.role = 'user' AND m.text_content IS NOT NULL`
|
|
124
|
+
: `SELECT id, conversation_id, NULL AS session_key, agent_id, role, text_content,
|
|
125
|
+
COALESCE(tool_calls, '') AS tool_calls,
|
|
126
|
+
COALESCE(tool_results, '') AS tool_results,
|
|
127
|
+
COALESCE(message_index, 0) AS message_index,
|
|
128
|
+
created_at
|
|
129
|
+
FROM messages
|
|
130
|
+
WHERE role = 'user' AND text_content IS NOT NULL`;
|
|
131
|
+
return db.prepare(sql).all();
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function findStampedUserDuplicateGroups(db) {
|
|
135
|
+
const groups = new Map();
|
|
136
|
+
for (const row of readRows(db)) {
|
|
137
|
+
const text = String(row.text_content || '');
|
|
138
|
+
if (!STAMPED_PREFIX_RE.test(text.trimStart())) continue;
|
|
139
|
+
const key = [row.conversation_id, row.role, text, row.tool_calls || '', row.tool_results || ''].join('\u0000');
|
|
140
|
+
const list = groups.get(key) || [];
|
|
141
|
+
list.push(row);
|
|
142
|
+
groups.set(key, list);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
const duplicateGroups = [];
|
|
146
|
+
for (const rows of groups.values()) {
|
|
147
|
+
if (rows.length < 2) continue;
|
|
148
|
+
rows.sort((a, b) => {
|
|
149
|
+
const ai = Number(a.message_index || 0);
|
|
150
|
+
const bi = Number(b.message_index || 0);
|
|
151
|
+
if (ai !== bi) return ai - bi;
|
|
152
|
+
return Number(a.id) - Number(b.id);
|
|
153
|
+
});
|
|
154
|
+
const keep = rows[0];
|
|
155
|
+
duplicateGroups.push({ keep, duplicates: rows.slice(1), all: rows });
|
|
156
|
+
}
|
|
157
|
+
duplicateGroups.sort((a, b) => String(a.keep.created_at || '').localeCompare(String(b.keep.created_at || '')) || Number(a.keep.id) - Number(b.keep.id));
|
|
158
|
+
return duplicateGroups;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
function summarizeTarget(target, groups, exampleLimit) {
|
|
162
|
+
const duplicateRows = groups.reduce((sum, g) => sum + g.duplicates.length, 0);
|
|
163
|
+
const conversations = new Set(groups.map((g) => g.keep.conversation_id));
|
|
164
|
+
const first = groups[0]?.keep?.created_at || null;
|
|
165
|
+
const lastGroup = groups[groups.length - 1];
|
|
166
|
+
const lastRows = lastGroup ? lastGroup.all : [];
|
|
167
|
+
const last = lastRows.length ? lastRows.map((r) => r.created_at).sort().at(-1) : null;
|
|
168
|
+
const examples = groups.slice(0, exampleLimit).map((g) => ({
|
|
169
|
+
conversationId: g.keep.conversation_id,
|
|
170
|
+
sessionKey: g.keep.session_key || null,
|
|
171
|
+
keepId: g.keep.id,
|
|
172
|
+
duplicateIds: g.duplicates.map((r) => r.id),
|
|
173
|
+
count: g.all.length,
|
|
174
|
+
firstAt: g.all.map((r) => r.created_at).sort()[0] || null,
|
|
175
|
+
lastAt: g.all.map((r) => r.created_at).sort().at(-1) || null,
|
|
176
|
+
textSha256: sha12(String(g.keep.text_content || '')),
|
|
177
|
+
}));
|
|
178
|
+
return {
|
|
179
|
+
agent: target.agent,
|
|
180
|
+
dbPath: target.dbPath,
|
|
181
|
+
duplicateGroups: groups.length,
|
|
182
|
+
duplicateRows,
|
|
183
|
+
affectedConversations: conversations.size,
|
|
184
|
+
firstAt: first,
|
|
185
|
+
lastAt: last,
|
|
186
|
+
examples,
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
function makeBackup(db, dbPath, backupDir) {
|
|
191
|
+
const dir = backupDir ? resolve(backupDir) : dirname(dbPath);
|
|
192
|
+
mkdirSync(dir, { recursive: true });
|
|
193
|
+
const backupPath = join(dir, `${dbPath.split('/').pop()}.backup-${isoStamp()}`);
|
|
194
|
+
db.exec(`VACUUM INTO ${sqlLiteral(backupPath)}`);
|
|
195
|
+
return backupPath;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
function runIntegrityChecks(db) {
|
|
199
|
+
const integrity = db.prepare('PRAGMA integrity_check').all().map((r) => Object.values(r)[0]);
|
|
200
|
+
const foreignKeys = db.prepare('PRAGMA foreign_key_check').all();
|
|
201
|
+
return { integrity, foreignKeys };
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function assertChecksClean(checks) {
|
|
205
|
+
const integrityOk = checks.integrity.length === 1 && checks.integrity[0] === 'ok';
|
|
206
|
+
if (!integrityOk) throw new Error(`integrity_check failed: ${JSON.stringify(checks.integrity)}`);
|
|
207
|
+
if (checks.foreignKeys.length > 0) throw new Error(`foreign_key_check failed: ${JSON.stringify(checks.foreignKeys.slice(0, 20))}`);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
function rewriteReferences(db, fromId, toId) {
|
|
211
|
+
if (tableExists(db, 'summary_messages')) {
|
|
212
|
+
db.prepare('INSERT OR IGNORE INTO summary_messages (summary_id, message_id) SELECT summary_id, ? FROM summary_messages WHERE message_id = ?').run(toId, fromId);
|
|
213
|
+
db.prepare('DELETE FROM summary_messages WHERE message_id = ?').run(fromId);
|
|
214
|
+
}
|
|
215
|
+
if (tableExists(db, 'messages') && columnExists(db, 'messages', 'parent_id')) {
|
|
216
|
+
db.prepare('UPDATE messages SET parent_id = ? WHERE parent_id = ?').run(toId, fromId);
|
|
217
|
+
}
|
|
218
|
+
if (tableExists(db, 'contexts') && columnExists(db, 'contexts', 'head_message_id')) {
|
|
219
|
+
db.prepare('UPDATE contexts SET head_message_id = ? WHERE head_message_id = ?').run(toId, fromId);
|
|
220
|
+
}
|
|
221
|
+
if (tableExists(db, 'composition_snapshots') && columnExists(db, 'composition_snapshots', 'head_message_id')) {
|
|
222
|
+
db.prepare('UPDATE composition_snapshots SET head_message_id = ? WHERE head_message_id = ?').run(toId, fromId);
|
|
223
|
+
}
|
|
224
|
+
if (tableExists(db, 'tool_artifacts') && columnExists(db, 'tool_artifacts', 'message_id')) {
|
|
225
|
+
db.prepare('UPDATE tool_artifacts SET message_id = ? WHERE message_id = ?').run(toId, fromId);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// Bridge rows are derived indexes. Drop duplicate-side rows; operators can run
|
|
229
|
+
// the bridge backfill later if they want to rebuild derived mentions exactly.
|
|
230
|
+
for (const table of ['message_entity_mentions', 'message_facet_mentions', 'entity_bridge_message_index']) {
|
|
231
|
+
if (tableExists(db, table) && columnExists(db, table, 'message_id')) {
|
|
232
|
+
db.prepare(`DELETE FROM ${table} WHERE message_id = ?`).run(fromId);
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
function applyGroups(db, groups) {
|
|
238
|
+
const affectedConversations = new Set();
|
|
239
|
+
let deleted = 0;
|
|
240
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
241
|
+
db.exec('BEGIN IMMEDIATE');
|
|
242
|
+
try {
|
|
243
|
+
for (const group of groups) {
|
|
244
|
+
const keepId = Number(group.keep.id);
|
|
245
|
+
affectedConversations.add(Number(group.keep.conversation_id));
|
|
246
|
+
for (const dup of group.duplicates) {
|
|
247
|
+
const dupId = Number(dup.id);
|
|
248
|
+
rewriteReferences(db, dupId, keepId);
|
|
249
|
+
const result = db.prepare('DELETE FROM messages WHERE id = ?').run(dupId);
|
|
250
|
+
deleted += Number(result.changes || 0);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
if (tableExists(db, 'messages_fts')) {
|
|
254
|
+
db.prepare("INSERT INTO messages_fts(messages_fts) VALUES('rebuild')").run();
|
|
255
|
+
}
|
|
256
|
+
if (tableExists(db, 'conversations') && columnExists(db, 'conversations', 'message_count')) {
|
|
257
|
+
for (const conversationId of affectedConversations) {
|
|
258
|
+
db.prepare(`UPDATE conversations
|
|
259
|
+
SET message_count = (SELECT COUNT(*) FROM messages WHERE conversation_id = ?),
|
|
260
|
+
updated_at = COALESCE((SELECT MAX(created_at) FROM messages WHERE conversation_id = ?), updated_at)
|
|
261
|
+
WHERE id = ?`).run(conversationId, conversationId, conversationId);
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
assertChecksClean(runIntegrityChecks(db));
|
|
265
|
+
db.exec('COMMIT');
|
|
266
|
+
return { deleted, affectedConversations: affectedConversations.size };
|
|
267
|
+
} catch (err) {
|
|
268
|
+
try { db.exec('ROLLBACK'); } catch {}
|
|
269
|
+
throw err;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
async function main() {
|
|
274
|
+
const argv = parseArgs(process.argv.slice(2));
|
|
275
|
+
if (argv.help) {
|
|
276
|
+
usage();
|
|
277
|
+
return;
|
|
278
|
+
}
|
|
279
|
+
const apply = Boolean(argv.apply);
|
|
280
|
+
const json = Boolean(argv.json);
|
|
281
|
+
const exampleLimit = argv.examples === true ? 5 : Math.max(0, Number(argv.examples ?? 5));
|
|
282
|
+
const targets = resolveDbTargets({ dataDir: argv['data-dir'], dbPath: argv.db, agent: argv.agent });
|
|
283
|
+
const results = [];
|
|
284
|
+
|
|
285
|
+
for (const target of targets) {
|
|
286
|
+
const db = new DatabaseSync(target.dbPath);
|
|
287
|
+
try {
|
|
288
|
+
const groups = findStampedUserDuplicateGroups(db);
|
|
289
|
+
const summary = summarizeTarget(target, groups, exampleLimit);
|
|
290
|
+
summary.mode = 'stamped-user-replay-duplicates';
|
|
291
|
+
summary.dryRun = !apply;
|
|
292
|
+
if (apply && groups.length > 0) {
|
|
293
|
+
if (!argv['no-backup']) summary.backupPath = makeBackup(db, target.dbPath, argv['backup-dir']);
|
|
294
|
+
const applied = applyGroups(db, groups);
|
|
295
|
+
summary.deletedRows = applied.deleted;
|
|
296
|
+
summary.postApplyDuplicateGroups = findStampedUserDuplicateGroups(db).length;
|
|
297
|
+
}
|
|
298
|
+
results.push(summary);
|
|
299
|
+
} finally {
|
|
300
|
+
db.close();
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
const total = results.reduce((acc, r) => {
|
|
305
|
+
acc.duplicateGroups += r.duplicateGroups;
|
|
306
|
+
acc.duplicateRows += r.duplicateRows;
|
|
307
|
+
acc.deletedRows += r.deletedRows || 0;
|
|
308
|
+
return acc;
|
|
309
|
+
}, { duplicateGroups: 0, duplicateRows: 0, deletedRows: 0 });
|
|
310
|
+
|
|
311
|
+
const payload = { ok: true, dryRun: !apply, total, results };
|
|
312
|
+
if (json) {
|
|
313
|
+
console.log(JSON.stringify(payload, null, 2));
|
|
314
|
+
return;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
console.log(`HyperMem cleanup ${apply ? 'apply' : 'dry-run'}: stamped user replay duplicates`);
|
|
318
|
+
console.log(`Targets: ${results.length}`);
|
|
319
|
+
console.log(`Duplicate groups: ${total.duplicateGroups}`);
|
|
320
|
+
console.log(`Duplicate rows: ${total.duplicateRows}`);
|
|
321
|
+
if (apply) console.log(`Deleted rows: ${total.deletedRows}`);
|
|
322
|
+
for (const r of results.filter((item) => item.duplicateRows > 0).slice(0, 20)) {
|
|
323
|
+
console.log(`- ${r.agent}: groups=${r.duplicateGroups} rows=${r.duplicateRows} first=${r.firstAt || 'n/a'} last=${r.lastAt || 'n/a'}`);
|
|
324
|
+
if (r.backupPath) console.log(` backup=${r.backupPath}`);
|
|
325
|
+
}
|
|
326
|
+
if (!apply && total.duplicateRows > 0) {
|
|
327
|
+
console.log('\nNo changes made. Re-run with --apply to repair after stopping Gateway/OpenClaw writers.');
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
main().catch((err) => {
|
|
332
|
+
console.error(`hypermem-cleanup failed: ${err.message}`);
|
|
333
|
+
exit(1);
|
|
334
|
+
});
|