@psiclawops/hypermem 0.9.6 → 0.9.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/CHANGELOG.md +23 -0
  2. package/INSTALL.md +29 -9
  3. package/README.md +5 -1
  4. package/assets/default-config.json +20 -5
  5. package/assets/runtime-validation-fixture.json +123 -0
  6. package/bin/hypermem-cleanup.mjs +334 -0
  7. package/bin/hypermem-doctor.mjs +71 -0
  8. package/bin/hypermem-validate-runtime.mjs +282 -0
  9. package/dist/compositor.d.ts +43 -5
  10. package/dist/compositor.d.ts.map +1 -1
  11. package/dist/compositor.js +802 -30
  12. package/dist/entity-bridge-backfill.d.ts +66 -0
  13. package/dist/entity-bridge-backfill.d.ts.map +1 -0
  14. package/dist/entity-bridge-backfill.js +145 -0
  15. package/dist/entity-bridge-store.d.ts +164 -0
  16. package/dist/entity-bridge-store.d.ts.map +1 -0
  17. package/dist/entity-bridge-store.js +488 -0
  18. package/dist/entity-extractor.d.ts +124 -0
  19. package/dist/entity-extractor.d.ts.map +1 -0
  20. package/dist/entity-extractor.js +382 -0
  21. package/dist/entity-ppr.d.ts +55 -0
  22. package/dist/entity-ppr.d.ts.map +1 -0
  23. package/dist/entity-ppr.js +180 -0
  24. package/dist/hybrid-retrieval.d.ts +27 -0
  25. package/dist/hybrid-retrieval.d.ts.map +1 -1
  26. package/dist/hybrid-retrieval.js +26 -1
  27. package/dist/index.d.ts +19 -0
  28. package/dist/index.d.ts.map +1 -1
  29. package/dist/index.js +63 -13
  30. package/dist/message-store.d.ts +36 -0
  31. package/dist/message-store.d.ts.map +1 -1
  32. package/dist/message-store.js +155 -1
  33. package/dist/open-domain.d.ts +13 -4
  34. package/dist/open-domain.d.ts.map +1 -1
  35. package/dist/open-domain.js +222 -20
  36. package/dist/profiles.js +13 -13
  37. package/dist/question-shape.d.ts +73 -0
  38. package/dist/question-shape.d.ts.map +1 -0
  39. package/dist/question-shape.js +230 -0
  40. package/dist/schema.d.ts +1 -1
  41. package/dist/schema.d.ts.map +1 -1
  42. package/dist/schema.js +92 -1
  43. package/dist/topic-detector.d.ts.map +1 -1
  44. package/dist/topic-detector.js +22 -9
  45. package/dist/types.d.ts +176 -2
  46. package/dist/types.d.ts.map +1 -1
  47. package/dist/vector-store.d.ts +6 -0
  48. package/dist/vector-store.d.ts.map +1 -1
  49. package/dist/vector-store.js +3 -0
  50. package/docs/DIAGNOSTICS.md +47 -0
  51. package/docs/INTEGRATION_VALIDATION.md +24 -4
  52. package/docs/TUNING.md +21 -21
  53. package/memory-plugin/dist/index.d.ts +3 -3
  54. package/memory-plugin/dist/index.js +4 -2
  55. package/memory-plugin/openclaw.plugin.json +5 -0
  56. package/memory-plugin/package.json +10 -6
  57. package/package.json +22 -5
  58. package/plugin/dist/index.d.ts +3 -3
  59. package/plugin/dist/index.d.ts.map +1 -1
  60. package/plugin/dist/index.js +115 -13
  61. package/plugin/dist/index.js.map +1 -1
  62. package/plugin/package.json +10 -6
  63. package/scripts/install-runtime.mjs +4 -1
package/CHANGELOG.md CHANGED
@@ -2,6 +2,29 @@
2
2
 
3
3
  All notable changes to hypermem are documented here.
4
4
 
5
+ ## 0.9.9 - 2026-05-09
6
+
7
+ - **Replay duplication cleanup is operator-visible.** `hypermem-cleanup` now ships as a package CLI, dry-runs by default, reports timestamp-stamped Gateway replay duplicate debt without printing message content, and applies repairs only with `--apply` after creating a SQLite backup, rewiring local message references, rebuilding FTS, and passing integrity checks.
8
+ - **Doctor warns on stamped replay duplicate debt.** `hypermem-doctor` now scans the HyperMem data dir for timestamp-stamped user replay duplicate rows and emits a hard-to-miss `stamped-replay-duplicate-debt` warning with the cleanup dry-run command.
9
+ - **Operational memory defaults tightened.** Shipped defaults now favor stable pressure over maximum recall: lower first-turn warm history load, bounded facts/keystones, 250 hot history messages, and cross-session context off by default while LoCoMo tuning continues.
10
+ - **Boundary user turns are persisted reliably.** The context-engine plugin now recovers the current plain user message at the afterTurn boundary when OpenClaw pre-prompt counts already include it, preventing assistant-only replay gaps in HyperMem's SQLite history.
11
+ - **Plugin audit gate is clean.** Context-engine and memory-plugin packages pin the transitive `fast-xml-builder` audit fix through package overrides and refreshed lockfiles.
12
+
13
+ ## 0.9.8 - 2026-05-03
14
+
15
+ - **Deterministic runtime validation added.** `hypermem-validate-runtime` now ships with the package and installer payload, seeds an isolated validation fixture, and verifies message writes, FTS, structured facts, vector indexing/search when enabled, warm, and compose without an answer LLM. Install docs now treat runtime validation as part of completion.
16
+ - **Long-horizon recall widened for EasyLoCoMo.** Steady semantic recall now uses a larger memory envelope and candidate pool, closer to the published Mem0 token-efficient LoCoMo posture instead of starving single-hop and temporal evidence.
17
+ - **Temporal recall is query-shaped.** Temporal retrieval now over-fetches indexed facts, scores them against query terms, and injects relevant temporal evidence rather than blindly taking the latest rows. This targets wrong-date failures on `when did X` questions.
18
+ - **Old episodic evidence is preserved for benchmark-style questions.** Long-horizon QA disables production recency decay and relaxes FTS-only episode floors so valid historical evidence is not filtered out solely because it is old.
19
+ - **Long-horizon fusion weights now favor exact evidence.** Benchmark-style questions use a lower RRF k, modest FTS weight boost, and wider reranker candidate window so names, dates, and quoted objects can outrank semantically-near but wrong old chatter without changing normal production turns.
20
+
21
+ ## 0.9.7 - 2026-05-02
22
+
23
+ - **OpenClaw Plugin SDK imports modernized.** HyperCompositor and HyperMem memory now import plugin entry helpers through the canonical public Plugin SDK surface and refresh OpenClaw/Plugin SDK build provenance to the validated runtime.
24
+ - **SDK drift is now actively challenged.** Release gates enforce public SDK imports and exact build metadata for reproducibility, while a latest-SDK canary and Dependabot tracking keep the pin from quietly aging into another compatibility break.
25
+ - **Memory plugin tool contract declared.** `hypermem` now declares ownership of the optional `history_query` tool in its OpenClaw plugin manifest, satisfying the 2026.5.2 plugin checker contract gate.
26
+ - **Plugin checker gates are now standard release infrastructure.** HyperMem CI runs Plugin Inspector static/runtime checks plus isolated dependency-install cold import proof, production/dev dependency audit, and issue-debt validation; publish readiness has a packed-artifact OpenClaw `plugins doctor`/runtime-inspect gate. The previous context-engine root SDK barrel P2 is fixed by deriving context-engine types from the public `OpenClawPluginApi` core type surface.
27
+
5
28
  ## 0.9.6 - 2026-05-01
6
29
 
7
30
  - **OpenClaw 2026.4.29 plugin startup compatibility.** HyperCompositor and HyperMem memory manifests now declare `activation.onStartup: false`, making slot-triggered loading explicit and removing reliance on deprecated implicit startup sidecar fallback behavior.
package/INSTALL.md CHANGED
@@ -79,8 +79,9 @@ Release validation details live in [docs/INTEGRATION_VALIDATION.md](./docs/INTEG
79
79
  `hypermem-install` creates the current recommended starter config automatically when `~/.openclaw/hypermem/config.json` is missing. The shipped starter config is installation-safe FTS5 mode:
80
80
 
81
81
  - `embedding.provider: "none"`
82
- - `warmHistoryBudgetFraction: 0.45`
83
- - standard fact, keystone, and history caps
82
+ - `warmHistoryBudgetFraction: 0.27`
83
+ - bounded fact, keystone, and history caps
84
+ - cross-session context disabled by default
84
85
 
85
86
  This is intentional. A clean first install should load, compose, and verify without requiring Ollama or an external API key. The result is **YELLOW** install readiness: HyperMem is active with keyword recall, but semantic vector recall is disabled. Upgrade to Ollama, OpenRouter, or Gemini after the baseline install is active.
86
87
 
@@ -152,15 +153,34 @@ OpenClaw loads the plugin runtime from `~/.openclaw/plugins/hypermem/`.
152
153
 
153
154
  ### Verification checkpoints
154
155
 
155
- Run the installed-system doctor first:
156
+ Run the installed-system doctor and deterministic runtime validator first:
156
157
 
157
158
  ```bash
158
159
  hypermem-doctor --fix-plan
160
+ hypermem-cleanup --data-dir ~/.openclaw/hypermem
161
+ hypermem-validate-runtime --allow-no-embedding
159
162
  ```
160
163
 
161
164
  `hypermem-doctor` is read-only. It inspects OpenClaw config, HyperMem config, plugin wiring, recommended OpenClaw runtime settings, data directory shape, runtime plugin load state, and active model context-window risk. It prints exact `openclaw config set ...` commands when something needs review. It does not edit config or restart the gateway.
162
165
 
163
- Expected result after a complete install: no required failures. Recommendation warnings should be reviewed before production use, especially context-window warnings for GPT/OpenAI-compatible/local gateways.
166
+ `hypermem-validate-runtime` writes a tiny isolated validation agent, then checks message persistence, FTS, structured facts, vector indexing/search when embeddings are enabled, warm, and compose. It does not call an answer LLM. Expected result after a complete install: doctor has no required failures and runtime validation reports `consistencyScore: 1`. Recommendation warnings should be reviewed before production use, especially context-window warnings for GPT/OpenAI-compatible/local gateways.
167
+
168
+ ### Replay duplicate cleanup after upgrade
169
+
170
+ If agents report repeated user messages after a restart, run the read-only cleanup scan. This is now part of the obvious post-upgrade diagnostic path, not tribal knowledge:
171
+
172
+ ```bash
173
+ hypermem-doctor --fix-plan
174
+ hypermem-cleanup --data-dir ~/.openclaw/hypermem
175
+ ```
176
+
177
+ Only apply during a maintenance window after Gateway/OpenClaw writers are stopped:
178
+
179
+ ```bash
180
+ hypermem-cleanup --data-dir ~/.openclaw/hypermem --apply
181
+ ```
182
+
183
+ Apply mode writes a SQLite backup, rewrites local references, rebuilds FTS, and rolls back unless integrity and foreign-key checks pass.
164
184
 
165
185
  Walk the install state machine explicitly if you need a manual check:
166
186
 
@@ -849,12 +869,12 @@ Key starter defaults:
849
869
  "budgetFraction": 0.6,
850
870
  "contextWindowReserve": 0.25,
851
871
  "targetBudgetFraction": 0.50,
852
- "warmHistoryBudgetFraction": 0.45,
853
- "maxFacts": 28,
872
+ "warmHistoryBudgetFraction": 0.27,
873
+ "maxFacts": 25,
854
874
  "maxHistoryMessages": 250,
855
875
  "maxCrossSessionContext": 0,
856
- "keystoneHistoryFraction": 0.20,
857
- "keystoneMaxMessages": 15,
876
+ "keystoneHistoryFraction": 0.15,
877
+ "keystoneMaxMessages": 12,
858
878
  "hyperformProfile": "standard"
859
879
  }
860
880
  }
@@ -929,7 +949,7 @@ Use it during install and after model changes. `--strict` exits non-zero if a mo
929
949
  ```json
930
950
  {
931
951
  "compositor": {
932
- "budgetFraction": 0.55,
952
+ "budgetFraction": 0.6,
933
953
  "contextWindowReserve": 0.25,
934
954
  "warmHistoryBudgetFraction": 0.27,
935
955
  "contextWindowOverrides": {
package/README.md CHANGED
@@ -455,11 +455,15 @@ openclaw config set plugins.allow '["existing-plugin","hypercompositor","hyperme
455
455
 
456
456
  openclaw gateway restart
457
457
  hypermem-doctor --fix-plan
458
+ hypermem-cleanup --data-dir ~/.openclaw/hypermem
458
459
  hypermem-status --health
460
+ hypermem-validate-runtime --allow-no-embedding
459
461
  hypermem-model-audit --strict
460
462
  ```
461
463
 
462
- `hypermem-doctor` is the confidence check: it validates plugin wiring, plugin registry refresh readiness, runtime load state, recommended OpenClaw settings such as `contextPruning.mode=off`, GPT-5 personality overlay off, startup/bootstrap injection sizing, compaction safety settings including `maxActiveTranscriptBytes` remaining unset for HyperMem-managed compaction, HyperMem data files, and model context-window overrides for GPT/OpenAI-compatible/local gateways. It is read-only and prints a reviewable fix plan.
464
+ `hypermem-cleanup` is the operator-safe repair tool for timestamp-stamped Gateway replay duplicate rows. It is dry-run by default, prints no message content, creates backups in apply mode, rewrites local message references, rebuilds FTS, and commits only after SQLite integrity checks pass. `hypermem-doctor` warns with `stamped-replay-duplicate-debt` when cleanup should be considered.
465
+
466
+ `hypermem-doctor` is the configuration confidence check: it validates plugin wiring, plugin registry refresh readiness, runtime load state, recommended OpenClaw settings such as `contextPruning.mode=off`, GPT-5 personality overlay off, startup/bootstrap injection sizing, compaction safety settings including `maxActiveTranscriptBytes` remaining unset for HyperMem-managed compaction, HyperMem data files, and model context-window overrides for GPT/OpenAI-compatible/local gateways. It is read-only and prints a reviewable fix plan. `hypermem-validate-runtime` is the deterministic component check: it seeds a tiny isolated fixture and verifies write, FTS, facts, vector indexing/search when enabled, warm, and compose without using an answer LLM.
463
467
 
464
468
  Full install, upgrade, source-clone, embedding provider, reranker, fleet config, and rollback guidance lives in **[INSTALL.md](./INSTALL.md)**.
465
469
 
@@ -23,13 +23,28 @@
23
23
  "budgetFraction": 0.6,
24
24
  "contextWindowReserve": 0.25,
25
25
  "targetBudgetFraction": 0.5,
26
- "warmHistoryBudgetFraction": 0.45,
27
- "maxFacts": 28,
26
+ "warmHistoryBudgetFraction": 0.27,
27
+ "maxFacts": 25,
28
28
  "maxHistoryMessages": 250,
29
29
  "maxCrossSessionContext": 0,
30
- "keystoneHistoryFraction": 0.2,
31
- "keystoneMaxMessages": 15,
32
- "hyperformProfile": "standard"
30
+ "keystoneHistoryFraction": 0.15,
31
+ "keystoneMaxMessages": 12,
32
+ "hyperformProfile": "standard",
33
+ "entityBridge": {
34
+ "enabled": false,
35
+ "structuredHandoff": false,
36
+ "pprEnabled": false,
37
+ "liveIndexingEnabled": false,
38
+ "maxTokens": 1200,
39
+ "maxGraphEdges": 5000,
40
+ "maxGraphNodes": 2000,
41
+ "maxCandidateMessagesBeforeRanking": 500,
42
+ "maxSeedEntities": 4,
43
+ "maxSeedFacets": 4,
44
+ "pprMaxIterations": 20,
45
+ "pprTeleportProbability": 0.15,
46
+ "pprConvergenceTolerance": 1e-06
47
+ }
33
48
  },
34
49
  "indexer": {
35
50
  "enabled": true,
@@ -0,0 +1,123 @@
1
+ {
2
+ "schemaVersion": 1,
3
+ "name": "hypermem-runtime-validation",
4
+ "description": "Small deterministic fixture for installed HyperMem runtime validation. It validates message storage, FTS, library facts, vector indexing/search, warm, compose, current-value recall, temporal details, and negative-control recall without an answer LLM.",
5
+ "agentPrefix": "hypermem-runtime-validation",
6
+ "messages": [
7
+ {
8
+ "sessionId": "session-1",
9
+ "date": "2026-01-10T09:00:00Z",
10
+ "role": "user",
11
+ "content": "[runtime-fixture alpha] Riley stores the launch checklist in a teal binder in the north lab."
12
+ },
13
+ {
14
+ "sessionId": "session-1",
15
+ "date": "2026-01-10T09:01:00Z",
16
+ "role": "assistant",
17
+ "content": "Recorded: teal binder, north lab, launch checklist."
18
+ },
19
+ {
20
+ "sessionId": "session-1",
21
+ "date": "2026-01-10T09:02:00Z",
22
+ "role": "user",
23
+ "content": "[runtime-fixture beta] The backup generator codename is Copper Fox and it runs on Tuesdays."
24
+ },
25
+ {
26
+ "sessionId": "session-2",
27
+ "date": "2026-02-14T15:30:00Z",
28
+ "role": "user",
29
+ "content": "[runtime-fixture gamma] The old incident channel was #ops-red, but it was replaced by #ops-green on February 14."
30
+ },
31
+ {
32
+ "sessionId": "session-2",
33
+ "date": "2026-02-14T15:31:00Z",
34
+ "role": "assistant",
35
+ "content": "Current incident channel should be #ops-green; #ops-red is old."
36
+ },
37
+ {
38
+ "sessionId": "session-2",
39
+ "date": "2026-02-14T15:32:00Z",
40
+ "role": "user",
41
+ "content": "[runtime-fixture delta] Mira's pager handoff window is 18:00 to 20:00 Arizona time."
42
+ },
43
+ {
44
+ "sessionId": "session-3",
45
+ "date": "2026-03-22T11:45:00Z",
46
+ "role": "user",
47
+ "content": "[runtime-fixture epsilon] The release mascot is a brass otter carrying a small lantern."
48
+ },
49
+ {
50
+ "sessionId": "session-3",
51
+ "date": "2026-03-22T11:46:00Z",
52
+ "role": "assistant",
53
+ "content": "Noted release mascot: brass otter with lantern."
54
+ },
55
+ {
56
+ "sessionId": "session-3",
57
+ "date": "2026-03-22T11:47:00Z",
58
+ "role": "user",
59
+ "content": "[runtime-fixture zeta] There is no record of a purple submarine in this validation fixture."
60
+ }
61
+ ],
62
+ "facts": [
63
+ {
64
+ "domain": "runtime-validation",
65
+ "content": "Runtime validation structured fact: the backup generator codename is Copper Fox and it runs on Tuesdays."
66
+ },
67
+ {
68
+ "domain": "runtime-validation",
69
+ "content": "Runtime validation structured fact: the current incident channel is #ops-green; #ops-red is obsolete."
70
+ },
71
+ {
72
+ "domain": "runtime-validation",
73
+ "content": "Runtime validation structured fact: Mira's pager handoff window is 18:00 to 20:00 Arizona time."
74
+ }
75
+ ],
76
+ "probes": [
77
+ {
78
+ "id": "message-fts-exact-anchor",
79
+ "component": "message-fts",
80
+ "query": "teal binder north lab launch checklist",
81
+ "required": ["teal binder", "north lab", "launch checklist"]
82
+ },
83
+ {
84
+ "id": "message-fts-late-session",
85
+ "component": "message-fts",
86
+ "query": "brass otter lantern",
87
+ "required": ["brass otter", "lantern"]
88
+ },
89
+ {
90
+ "id": "semantic-structured-fact",
91
+ "component": "semantic",
92
+ "query": "standby power system codename weekly schedule",
93
+ "required": ["Copper Fox", "Tuesdays"]
94
+ },
95
+ {
96
+ "id": "semantic-current-value",
97
+ "component": "semantic",
98
+ "query": "current incident channel replacement",
99
+ "required": ["#ops-green"]
100
+ },
101
+ {
102
+ "id": "compose-temporal-detail",
103
+ "component": "compose",
104
+ "sessionId": "session-2",
105
+ "query": "When is Mira's pager handoff window?",
106
+ "required": ["18:00", "20:00", "Arizona"]
107
+ },
108
+ {
109
+ "id": "compose-late-session-detail",
110
+ "component": "compose",
111
+ "sessionId": "session-3",
112
+ "query": "What is the release mascot?",
113
+ "required": ["brass otter", "lantern"]
114
+ },
115
+ {
116
+ "id": "compose-negative-control",
117
+ "component": "compose",
118
+ "sessionId": "session-3",
119
+ "query": "What does the fixture say about a purple submarine?",
120
+ "required": ["no record", "purple submarine"]
121
+ }
122
+ ]
123
+ }
@@ -0,0 +1,334 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * hypermem-cleanup — operator-safe repair utilities for persisted HyperMem DBs.
4
+ *
5
+ * Default command repairs timestamp-stamped replay duplicates caused by Gateway
6
+ * transcript restore re-recording the same user turn. It is read-only unless
7
+ * --apply is passed, and apply mode writes a SQLite backup first by default.
8
+ */
9
+
10
+ import { existsSync, mkdirSync, readdirSync } from 'node:fs';
11
+ import { dirname, join, resolve } from 'node:path';
12
+ import { createHash } from 'node:crypto';
13
+ import { exit } from 'node:process';
14
+ import { DatabaseSync } from 'node:sqlite';
15
+
16
+ const STAMPED_PREFIX_RE = /^\[[A-Z][a-z]{2} \d{4}-\d{2}-\d{2} \d{2}:\d{2} [A-Z]{2,4}\]/;
17
+ const DEFAULT_DATA_DIR = join(process.env.HOME || '', '.openclaw', 'hypermem');
18
+
19
+ function parseArgs(argv) {
20
+ const out = { _: [] };
21
+ for (let i = 0; i < argv.length; i++) {
22
+ const arg = argv[i];
23
+ if (!arg.startsWith('--')) {
24
+ out._.push(arg);
25
+ continue;
26
+ }
27
+ const key = arg.slice(2);
28
+ const next = argv[i + 1];
29
+ if (next == null || next.startsWith('--')) out[key] = true;
30
+ else {
31
+ out[key] = next;
32
+ i++;
33
+ }
34
+ }
35
+ return out;
36
+ }
37
+
38
+ function usage() {
39
+ console.log(`Usage: hypermem-cleanup [options]
40
+
41
+ Repairs known persisted-data issues. Default mode is a dry-run scan for
42
+ Gateway timestamp-stamped user replay duplicates.
43
+
44
+ Options:
45
+ --data-dir <path> HyperMem data dir (default: ~/.openclaw/hypermem)
46
+ --db <path> Scan one messages.db instead of all agents
47
+ --agent <id> Restrict --data-dir scan to one agent
48
+ --apply Delete duplicate rows and repair references
49
+ --backup-dir <path> Backup directory for apply mode (default: beside DB)
50
+ --no-backup Disable apply-mode backup (not recommended)
51
+ --json Print machine-readable JSON
52
+ --examples <n> Include up to n hashed examples per DB (default: 5)
53
+ --help Show this help
54
+
55
+ Safety:
56
+ - dry-run is default
57
+ - content is not printed; examples use SHA-256 prefixes and counts
58
+ - apply mode keeps the earliest row per exact duplicate group
59
+ - apply mode rewrites local message references, rebuilds FTS, and runs
60
+ integrity checks before committing
61
+ `);
62
+ }
63
+
64
+ function isoStamp() {
65
+ return new Date().toISOString().replace(/[:.]/g, '-');
66
+ }
67
+
68
+ function sqlLiteral(value) {
69
+ return `'${String(value).replaceAll("'", "''")}'`;
70
+ }
71
+
72
+ function tableExists(db, table) {
73
+ const row = db.prepare("SELECT 1 AS ok FROM sqlite_master WHERE type IN ('table','view') AND name = ?").get(table);
74
+ return Boolean(row);
75
+ }
76
+
77
+ function columnExists(db, table, column) {
78
+ if (!tableExists(db, table)) return false;
79
+ return db.prepare(`PRAGMA table_info(${table})`).all().some((row) => row.name === column);
80
+ }
81
+
82
+ function firstTableColumns(db, table) {
83
+ if (!tableExists(db, table)) return [];
84
+ return db.prepare(`PRAGMA table_info(${table})`).all().map((row) => row.name);
85
+ }
86
+
87
+ function sha12(text) {
88
+ return createHash('sha256').update(text).digest('hex').slice(0, 12);
89
+ }
90
+
91
+ function resolveDbTargets({ dataDir, dbPath, agent }) {
92
+ if (dbPath) {
93
+ const resolved = resolve(dbPath);
94
+ if (!existsSync(resolved)) throw new Error(`messages.db not found: ${resolved}`);
95
+ return [{ agent: agent || dirname(resolved).split('/').pop() || 'unknown', dbPath: resolved }];
96
+ }
97
+ const agentsDir = join(resolve(dataDir || DEFAULT_DATA_DIR), 'agents');
98
+ if (!existsSync(agentsDir)) throw new Error(`HyperMem agents dir not found: ${agentsDir}`);
99
+ const agents = agent ? [agent] : readdirSync(agentsDir).sort();
100
+ const targets = [];
101
+ for (const id of agents) {
102
+ const candidate = join(agentsDir, id, 'messages.db');
103
+ if (existsSync(candidate)) targets.push({ agent: id, dbPath: candidate });
104
+ }
105
+ if (targets.length === 0) throw new Error(`No messages.db targets found under ${agentsDir}`);
106
+ return targets;
107
+ }
108
+
109
+ function readRows(db) {
110
+ const cols = firstTableColumns(db, 'messages');
111
+ if (!cols.includes('text_content') || !cols.includes('conversation_id') || !cols.includes('role')) {
112
+ throw new Error('messages table does not have the expected HyperMem columns');
113
+ }
114
+ const hasConversations = tableExists(db, 'conversations');
115
+ const sql = hasConversations
116
+ ? `SELECT m.id, m.conversation_id, c.session_key, m.agent_id, m.role, m.text_content,
117
+ COALESCE(m.tool_calls, '') AS tool_calls,
118
+ COALESCE(m.tool_results, '') AS tool_results,
119
+ COALESCE(m.message_index, 0) AS message_index,
120
+ m.created_at
121
+ FROM messages m
122
+ LEFT JOIN conversations c ON c.id = m.conversation_id
123
+ WHERE m.role = 'user' AND m.text_content IS NOT NULL`
124
+ : `SELECT id, conversation_id, NULL AS session_key, agent_id, role, text_content,
125
+ COALESCE(tool_calls, '') AS tool_calls,
126
+ COALESCE(tool_results, '') AS tool_results,
127
+ COALESCE(message_index, 0) AS message_index,
128
+ created_at
129
+ FROM messages
130
+ WHERE role = 'user' AND text_content IS NOT NULL`;
131
+ return db.prepare(sql).all();
132
+ }
133
+
134
+ function findStampedUserDuplicateGroups(db) {
135
+ const groups = new Map();
136
+ for (const row of readRows(db)) {
137
+ const text = String(row.text_content || '');
138
+ if (!STAMPED_PREFIX_RE.test(text.trimStart())) continue;
139
+ const key = [row.conversation_id, row.role, text, row.tool_calls || '', row.tool_results || ''].join('\u0000');
140
+ const list = groups.get(key) || [];
141
+ list.push(row);
142
+ groups.set(key, list);
143
+ }
144
+
145
+ const duplicateGroups = [];
146
+ for (const rows of groups.values()) {
147
+ if (rows.length < 2) continue;
148
+ rows.sort((a, b) => {
149
+ const ai = Number(a.message_index || 0);
150
+ const bi = Number(b.message_index || 0);
151
+ if (ai !== bi) return ai - bi;
152
+ return Number(a.id) - Number(b.id);
153
+ });
154
+ const keep = rows[0];
155
+ duplicateGroups.push({ keep, duplicates: rows.slice(1), all: rows });
156
+ }
157
+ duplicateGroups.sort((a, b) => String(a.keep.created_at || '').localeCompare(String(b.keep.created_at || '')) || Number(a.keep.id) - Number(b.keep.id));
158
+ return duplicateGroups;
159
+ }
160
+
161
+ function summarizeTarget(target, groups, exampleLimit) {
162
+ const duplicateRows = groups.reduce((sum, g) => sum + g.duplicates.length, 0);
163
+ const conversations = new Set(groups.map((g) => g.keep.conversation_id));
164
+ const first = groups[0]?.keep?.created_at || null;
165
+ const lastGroup = groups[groups.length - 1];
166
+ const lastRows = lastGroup ? lastGroup.all : [];
167
+ const last = lastRows.length ? lastRows.map((r) => r.created_at).sort().at(-1) : null;
168
+ const examples = groups.slice(0, exampleLimit).map((g) => ({
169
+ conversationId: g.keep.conversation_id,
170
+ sessionKey: g.keep.session_key || null,
171
+ keepId: g.keep.id,
172
+ duplicateIds: g.duplicates.map((r) => r.id),
173
+ count: g.all.length,
174
+ firstAt: g.all.map((r) => r.created_at).sort()[0] || null,
175
+ lastAt: g.all.map((r) => r.created_at).sort().at(-1) || null,
176
+ textSha256: sha12(String(g.keep.text_content || '')),
177
+ }));
178
+ return {
179
+ agent: target.agent,
180
+ dbPath: target.dbPath,
181
+ duplicateGroups: groups.length,
182
+ duplicateRows,
183
+ affectedConversations: conversations.size,
184
+ firstAt: first,
185
+ lastAt: last,
186
+ examples,
187
+ };
188
+ }
189
+
190
+ function makeBackup(db, dbPath, backupDir) {
191
+ const dir = backupDir ? resolve(backupDir) : dirname(dbPath);
192
+ mkdirSync(dir, { recursive: true });
193
+ const backupPath = join(dir, `${dbPath.split('/').pop()}.backup-${isoStamp()}`);
194
+ db.exec(`VACUUM INTO ${sqlLiteral(backupPath)}`);
195
+ return backupPath;
196
+ }
197
+
198
+ function runIntegrityChecks(db) {
199
+ const integrity = db.prepare('PRAGMA integrity_check').all().map((r) => Object.values(r)[0]);
200
+ const foreignKeys = db.prepare('PRAGMA foreign_key_check').all();
201
+ return { integrity, foreignKeys };
202
+ }
203
+
204
+ function assertChecksClean(checks) {
205
+ const integrityOk = checks.integrity.length === 1 && checks.integrity[0] === 'ok';
206
+ if (!integrityOk) throw new Error(`integrity_check failed: ${JSON.stringify(checks.integrity)}`);
207
+ if (checks.foreignKeys.length > 0) throw new Error(`foreign_key_check failed: ${JSON.stringify(checks.foreignKeys.slice(0, 20))}`);
208
+ }
209
+
210
+ function rewriteReferences(db, fromId, toId) {
211
+ if (tableExists(db, 'summary_messages')) {
212
+ db.prepare('INSERT OR IGNORE INTO summary_messages (summary_id, message_id) SELECT summary_id, ? FROM summary_messages WHERE message_id = ?').run(toId, fromId);
213
+ db.prepare('DELETE FROM summary_messages WHERE message_id = ?').run(fromId);
214
+ }
215
+ if (tableExists(db, 'messages') && columnExists(db, 'messages', 'parent_id')) {
216
+ db.prepare('UPDATE messages SET parent_id = ? WHERE parent_id = ?').run(toId, fromId);
217
+ }
218
+ if (tableExists(db, 'contexts') && columnExists(db, 'contexts', 'head_message_id')) {
219
+ db.prepare('UPDATE contexts SET head_message_id = ? WHERE head_message_id = ?').run(toId, fromId);
220
+ }
221
+ if (tableExists(db, 'composition_snapshots') && columnExists(db, 'composition_snapshots', 'head_message_id')) {
222
+ db.prepare('UPDATE composition_snapshots SET head_message_id = ? WHERE head_message_id = ?').run(toId, fromId);
223
+ }
224
+ if (tableExists(db, 'tool_artifacts') && columnExists(db, 'tool_artifacts', 'message_id')) {
225
+ db.prepare('UPDATE tool_artifacts SET message_id = ? WHERE message_id = ?').run(toId, fromId);
226
+ }
227
+
228
+ // Bridge rows are derived indexes. Drop duplicate-side rows; operators can run
229
+ // the bridge backfill later if they want to rebuild derived mentions exactly.
230
+ for (const table of ['message_entity_mentions', 'message_facet_mentions', 'entity_bridge_message_index']) {
231
+ if (tableExists(db, table) && columnExists(db, table, 'message_id')) {
232
+ db.prepare(`DELETE FROM ${table} WHERE message_id = ?`).run(fromId);
233
+ }
234
+ }
235
+ }
236
+
237
+ function applyGroups(db, groups) {
238
+ const affectedConversations = new Set();
239
+ let deleted = 0;
240
+ db.exec('PRAGMA foreign_keys = ON');
241
+ db.exec('BEGIN IMMEDIATE');
242
+ try {
243
+ for (const group of groups) {
244
+ const keepId = Number(group.keep.id);
245
+ affectedConversations.add(Number(group.keep.conversation_id));
246
+ for (const dup of group.duplicates) {
247
+ const dupId = Number(dup.id);
248
+ rewriteReferences(db, dupId, keepId);
249
+ const result = db.prepare('DELETE FROM messages WHERE id = ?').run(dupId);
250
+ deleted += Number(result.changes || 0);
251
+ }
252
+ }
253
+ if (tableExists(db, 'messages_fts')) {
254
+ db.prepare("INSERT INTO messages_fts(messages_fts) VALUES('rebuild')").run();
255
+ }
256
+ if (tableExists(db, 'conversations') && columnExists(db, 'conversations', 'message_count')) {
257
+ for (const conversationId of affectedConversations) {
258
+ db.prepare(`UPDATE conversations
259
+ SET message_count = (SELECT COUNT(*) FROM messages WHERE conversation_id = ?),
260
+ updated_at = COALESCE((SELECT MAX(created_at) FROM messages WHERE conversation_id = ?), updated_at)
261
+ WHERE id = ?`).run(conversationId, conversationId, conversationId);
262
+ }
263
+ }
264
+ assertChecksClean(runIntegrityChecks(db));
265
+ db.exec('COMMIT');
266
+ return { deleted, affectedConversations: affectedConversations.size };
267
+ } catch (err) {
268
+ try { db.exec('ROLLBACK'); } catch {}
269
+ throw err;
270
+ }
271
+ }
272
+
273
+ async function main() {
274
+ const argv = parseArgs(process.argv.slice(2));
275
+ if (argv.help) {
276
+ usage();
277
+ return;
278
+ }
279
+ const apply = Boolean(argv.apply);
280
+ const json = Boolean(argv.json);
281
+ const exampleLimit = argv.examples === true ? 5 : Math.max(0, Number(argv.examples ?? 5));
282
+ const targets = resolveDbTargets({ dataDir: argv['data-dir'], dbPath: argv.db, agent: argv.agent });
283
+ const results = [];
284
+
285
+ for (const target of targets) {
286
+ const db = new DatabaseSync(target.dbPath);
287
+ try {
288
+ const groups = findStampedUserDuplicateGroups(db);
289
+ const summary = summarizeTarget(target, groups, exampleLimit);
290
+ summary.mode = 'stamped-user-replay-duplicates';
291
+ summary.dryRun = !apply;
292
+ if (apply && groups.length > 0) {
293
+ if (!argv['no-backup']) summary.backupPath = makeBackup(db, target.dbPath, argv['backup-dir']);
294
+ const applied = applyGroups(db, groups);
295
+ summary.deletedRows = applied.deleted;
296
+ summary.postApplyDuplicateGroups = findStampedUserDuplicateGroups(db).length;
297
+ }
298
+ results.push(summary);
299
+ } finally {
300
+ db.close();
301
+ }
302
+ }
303
+
304
+ const total = results.reduce((acc, r) => {
305
+ acc.duplicateGroups += r.duplicateGroups;
306
+ acc.duplicateRows += r.duplicateRows;
307
+ acc.deletedRows += r.deletedRows || 0;
308
+ return acc;
309
+ }, { duplicateGroups: 0, duplicateRows: 0, deletedRows: 0 });
310
+
311
+ const payload = { ok: true, dryRun: !apply, total, results };
312
+ if (json) {
313
+ console.log(JSON.stringify(payload, null, 2));
314
+ return;
315
+ }
316
+
317
+ console.log(`HyperMem cleanup ${apply ? 'apply' : 'dry-run'}: stamped user replay duplicates`);
318
+ console.log(`Targets: ${results.length}`);
319
+ console.log(`Duplicate groups: ${total.duplicateGroups}`);
320
+ console.log(`Duplicate rows: ${total.duplicateRows}`);
321
+ if (apply) console.log(`Deleted rows: ${total.deletedRows}`);
322
+ for (const r of results.filter((item) => item.duplicateRows > 0).slice(0, 20)) {
323
+ console.log(`- ${r.agent}: groups=${r.duplicateGroups} rows=${r.duplicateRows} first=${r.firstAt || 'n/a'} last=${r.lastAt || 'n/a'}`);
324
+ if (r.backupPath) console.log(` backup=${r.backupPath}`);
325
+ }
326
+ if (!apply && total.duplicateRows > 0) {
327
+ console.log('\nNo changes made. Re-run with --apply to repair after stopping Gateway/OpenClaw writers.');
328
+ }
329
+ }
330
+
331
+ main().catch((err) => {
332
+ console.error(`hypermem-cleanup failed: ${err.message}`);
333
+ exit(1);
334
+ });