clawmem 0.8.0 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +3 -3
- package/CLAUDE.md +3 -3
- package/README.md +33 -1
- package/SKILL.md +4 -4
- package/package.json +1 -1
- package/src/clawmem.ts +72 -45
- package/src/consolidation.ts +162 -37
- package/src/hooks/context-surfacing.ts +160 -16
- package/src/hooks.ts +9 -1
- package/src/maintenance.ts +57 -2
- package/src/mcp.ts +51 -43
- package/src/store.ts +61 -6
package/AGENTS.md
CHANGED
|
@@ -94,9 +94,9 @@ curl http://host:8090/v1/models
|
|
|
94
94
|
| `CLAWMEM_NO_LOCAL_MODELS` | `false` | Blocks `node-llama-cpp` from auto-downloading GGUF models. Set `true` for remote-only setups. |
|
|
95
95
|
| `CLAWMEM_VAULTS` | (none) | JSON map of vault name → SQLite path for multi-vault mode. E.g. `{"work":"~/.cache/clawmem/work.sqlite"}` |
|
|
96
96
|
| `CLAWMEM_ENABLE_AMEM` | enabled | A-MEM note construction + link generation during indexing. |
|
|
97
|
-
| `CLAWMEM_ENABLE_CONSOLIDATION` | disabled | Background worker backfills unenriched docs.
|
|
97
|
+
| `CLAWMEM_ENABLE_CONSOLIDATION` | disabled | Background worker backfills unenriched docs and runs Phase 2/3 consolidation + deductive synthesis. **v0.8.2:** every tick is wrapped in a DB-backed `worker_leases` row (`light-consolidation` key), so multiple host processes against the same vault cannot race on Phase 2 merge writes. Hosted by either `clawmem watch` (canonical, long-lived) or `clawmem mcp` (per-session fallback). |
|
|
98
98
|
| `CLAWMEM_CONSOLIDATION_INTERVAL` | 300000 | Worker interval in ms (min 15000). |
|
|
99
|
-
| `CLAWMEM_HEAVY_LANE` | disabled | **v0.8.0.** Enable the quiet-window heavy maintenance worker — a second, longer-interval consolidation lane with DB-backed `worker_leases` exclusivity, stale-first batching, and `maintenance_runs` journaling. Runs alongside the light lane; off by default. |
|
|
99
|
+
| `CLAWMEM_HEAVY_LANE` | disabled | **v0.8.0.** Enable the quiet-window heavy maintenance worker — a second, longer-interval consolidation lane with DB-backed `worker_leases` exclusivity, stale-first batching, and `maintenance_runs` journaling. Runs alongside the light lane; off by default. **v0.8.2:** canonical host is `clawmem watch` (e.g. systemd `clawmem-watcher.service`); `clawmem mcp` retains the same gate as a fallback host but emits a stderr warning advising operators to move heavy-lane hosting to the watcher because per-session stdio MCPs may never be alive during the configured quiet window. |
|
|
100
100
|
| `CLAWMEM_HEAVY_LANE_INTERVAL` | 1800000 | **v0.8.0.** Heavy-lane tick interval in ms (min 30000, default 30 min). |
|
|
101
101
|
| `CLAWMEM_HEAVY_LANE_WINDOW_START` | (none) | **v0.8.0.** Start hour (0-23) of the quiet window. Unset → no window. |
|
|
102
102
|
| `CLAWMEM_HEAVY_LANE_WINDOW_END` | (none) | **v0.8.0.** End hour (0-23, exclusive) of the quiet window. Supports midnight wrap (22→6). |
|
|
@@ -259,7 +259,7 @@ ClawMem hooks handle ~90% of retrieval automatically. Agent-initiated MCP calls
|
|
|
259
259
|
|
|
260
260
|
| Hook | Trigger | Budget | Content |
|
|
261
261
|
|------|---------|--------|---------|
|
|
262
|
-
| `context-surfacing` | UserPromptSubmit | profile-driven (default 800) | retrieval gate → profile-driven hybrid search (vector if `useVector`, timeout from profile) → FTS supplement → file-aware supplemental search (E13) → snooze filter → noise filter → spreading activation (E11: co-activated doc boost) → memory type diversification (E10) → tiered injection (HOT/WARM/COLD snippets) → `<vault-context><instruction>…</instruction><facts>…</facts><relationships>…</relationships></vault-context>` (v0.7.1: instruction always prepended when context is returned; relationships block lists memory-graph edges where BOTH endpoints are in the surfaced set, truncated first when over budget) + optional `<vault-routing>` hint. Budget, max results, vector timeout, and min score all driven by `CLAWMEM_PROFILE`. |
|
|
262
|
+
| `context-surfacing` | UserPromptSubmit | profile-driven (default 800) | retrieval gate → **multi-turn query construction** (v0.8.1: current prompt + up to 2 recent same-session priors from `context_usage.query_text`, 10-min max age, capped at 2000 chars with current-first preservation — used only for discovery: vector/FTS/expansion, NOT for rerank/scoring/snippet extraction) → profile-driven hybrid search (vector if `useVector`, timeout from profile) → FTS supplement → file-aware supplemental search (E13, raw current prompt) → snooze filter → noise filter → spreading activation (E11: co-activated doc boost) → memory type diversification (E10) → tiered injection (HOT/WARM/COLD snippets) → `<vault-context><instruction>…</instruction><facts>…</facts><relationships>…</relationships></vault-context>` (v0.7.1: instruction always prepended when context is returned; relationships block lists memory-graph edges where BOTH endpoints are in the surfaced set, truncated first when over budget) + optional `<vault-routing>` hint. Budget, max results, vector timeout, and min score all driven by `CLAWMEM_PROFILE`. Raw prompt persisted to `context_usage.query_text` for future multi-turn lookback — except on gated skip paths (slash commands, heartbeats, too-short prompts) where the text is withheld for privacy. |
|
|
263
263
|
| `postcompact-inject` | SessionStart (compact) | 1200 tokens | re-injects authoritative context after compaction: precompact state (600) + recent decisions (400) + antipatterns (150) + vault context (200) → `<vault-postcompact>` |
|
|
264
264
|
| `curator-nudge` | SessionStart | 200 tokens | surfaces curator report actions, nudges when report is stale (>7 days) |
|
|
265
265
|
| `precompact-extract` | PreCompact | — | extracts decisions, file paths, open questions → writes `precompact-state.md` to auto-memory. Query-aware decision ranking. Reindexes auto-memory collection. |
|
package/CLAUDE.md
CHANGED
|
@@ -94,9 +94,9 @@ curl http://host:8090/v1/models
|
|
|
94
94
|
| `CLAWMEM_NO_LOCAL_MODELS` | `false` | Blocks `node-llama-cpp` from auto-downloading GGUF models. Set `true` for remote-only setups. |
|
|
95
95
|
| `CLAWMEM_VAULTS` | (none) | JSON map of vault name → SQLite path for multi-vault mode. E.g. `{"work":"~/.cache/clawmem/work.sqlite"}` |
|
|
96
96
|
| `CLAWMEM_ENABLE_AMEM` | enabled | A-MEM note construction + link generation during indexing. |
|
|
97
|
-
| `CLAWMEM_ENABLE_CONSOLIDATION` | disabled | Background worker backfills unenriched docs.
|
|
97
|
+
| `CLAWMEM_ENABLE_CONSOLIDATION` | disabled | Background worker backfills unenriched docs and runs Phase 2/3 consolidation + deductive synthesis. **v0.8.2:** every tick is wrapped in a DB-backed `worker_leases` row (`light-consolidation` key), so multiple host processes against the same vault cannot race on Phase 2 merge writes. Hosted by either `clawmem watch` (canonical, long-lived) or `clawmem mcp` (per-session fallback). |
|
|
98
98
|
| `CLAWMEM_CONSOLIDATION_INTERVAL` | 300000 | Worker interval in ms (min 15000). |
|
|
99
|
-
| `CLAWMEM_HEAVY_LANE` | disabled | **v0.8.0.** Enable the quiet-window heavy maintenance worker — a second, longer-interval consolidation lane with DB-backed `worker_leases` exclusivity, stale-first batching, and `maintenance_runs` journaling. Runs alongside the light lane; off by default. |
|
|
99
|
+
| `CLAWMEM_HEAVY_LANE` | disabled | **v0.8.0.** Enable the quiet-window heavy maintenance worker — a second, longer-interval consolidation lane with DB-backed `worker_leases` exclusivity, stale-first batching, and `maintenance_runs` journaling. Runs alongside the light lane; off by default. **v0.8.2:** canonical host is `clawmem watch` (e.g. systemd `clawmem-watcher.service`); `clawmem mcp` retains the same gate as a fallback host but emits a stderr warning advising operators to move heavy-lane hosting to the watcher because per-session stdio MCPs may never be alive during the configured quiet window. |
|
|
100
100
|
| `CLAWMEM_HEAVY_LANE_INTERVAL` | 1800000 | **v0.8.0.** Heavy-lane tick interval in ms (min 30000, default 30 min). |
|
|
101
101
|
| `CLAWMEM_HEAVY_LANE_WINDOW_START` | (none) | **v0.8.0.** Start hour (0-23) of the quiet window. Unset → no window. |
|
|
102
102
|
| `CLAWMEM_HEAVY_LANE_WINDOW_END` | (none) | **v0.8.0.** End hour (0-23, exclusive) of the quiet window. Supports midnight wrap (22→6). |
|
|
@@ -259,7 +259,7 @@ ClawMem hooks handle ~90% of retrieval automatically. Agent-initiated MCP calls
|
|
|
259
259
|
|
|
260
260
|
| Hook | Trigger | Budget | Content |
|
|
261
261
|
|------|---------|--------|---------|
|
|
262
|
-
| `context-surfacing` | UserPromptSubmit | profile-driven (default 800) | retrieval gate → profile-driven hybrid search (vector if `useVector`, timeout from profile) → FTS supplement → file-aware supplemental search (E13) → snooze filter → noise filter → spreading activation (E11: co-activated doc boost) → memory type diversification (E10) → tiered injection (HOT/WARM/COLD snippets) → `<vault-context><instruction>…</instruction><facts>…</facts><relationships>…</relationships></vault-context>` (v0.7.1: instruction always prepended when context is returned; relationships block lists memory-graph edges where BOTH endpoints are in the surfaced set, truncated first when over budget) + optional `<vault-routing>` hint. Budget, max results, vector timeout, and min score all driven by `CLAWMEM_PROFILE`. |
|
|
262
|
+
| `context-surfacing` | UserPromptSubmit | profile-driven (default 800) | retrieval gate → **multi-turn query construction** (v0.8.1: current prompt + up to 2 recent same-session priors from `context_usage.query_text`, 10-min max age, capped at 2000 chars with current-first preservation — used only for discovery: vector/FTS/expansion, NOT for rerank/scoring/snippet extraction) → profile-driven hybrid search (vector if `useVector`, timeout from profile) → FTS supplement → file-aware supplemental search (E13, raw current prompt) → snooze filter → noise filter → spreading activation (E11: co-activated doc boost) → memory type diversification (E10) → tiered injection (HOT/WARM/COLD snippets) → `<vault-context><instruction>…</instruction><facts>…</facts><relationships>…</relationships></vault-context>` (v0.7.1: instruction always prepended when context is returned; relationships block lists memory-graph edges where BOTH endpoints are in the surfaced set, truncated first when over budget) + optional `<vault-routing>` hint. Budget, max results, vector timeout, and min score all driven by `CLAWMEM_PROFILE`. Raw prompt persisted to `context_usage.query_text` for future multi-turn lookback — except on gated skip paths (slash commands, heartbeats, too-short prompts) where the text is withheld for privacy. |
|
|
263
263
|
| `postcompact-inject` | SessionStart (compact) | 1200 tokens | re-injects authoritative context after compaction: precompact state (600) + recent decisions (400) + antipatterns (150) + vault context (200) → `<vault-postcompact>` |
|
|
264
264
|
| `curator-nudge` | SessionStart | 200 tokens | surfaces curator report actions, nudges when report is stale (>7 days) |
|
|
265
265
|
| `precompact-extract` | PreCompact | — | extracts decisions, file paths, open questions → writes `precompact-state.md` to auto-memory. Query-aware decision ranking. Reindexes auto-memory collection. |
|
package/README.md
CHANGED
|
@@ -95,6 +95,35 @@ A second, longer-interval consolidation worker that keeps Phase 2 + Phase 3 runn
|
|
|
95
95
|
|
|
96
96
|
Adds +56 tests (13 worker-lease + 35 maintenance unit + 8 maintenance integration) on top of the v0.7.2 baseline.
|
|
97
97
|
|
|
98
|
+
### v0.8.1 Multi-Turn Prior-Query Lookback
|
|
99
|
+
|
|
100
|
+
`context-surfacing` now builds its retrieval query from the current prompt plus up to two recent same-session prior prompts, so a short follow-up turn ("do the same for X", "explain the rationale") can still inherit the vocabulary of earlier turns. The raw prompt is persisted in a new nullable `context_usage.query_text` column so future hook ticks can reconstitute the multi-turn query from the DB. See [multi-turn lookback](docs/concepts/architecture.md#multi-turn-prior-query-lookback-v081) for the full walkthrough.
|
|
101
|
+
|
|
102
|
+
- **Additive schema migration** — new nullable `query_text TEXT` column on `context_usage`, guarded by `PRAGMA table_info`. Pre-v0.8.1 stores get the column added on first open; ad-hoc stores that skip the migration path degrade transparently via a feature-detect WeakMap so `insertUsageFn` never writes a column that doesn't exist.
|
|
103
|
+
- **Discovery path only** — the multi-turn query feeds vector search, BM25, and query expansion. Cross-encoder reranking continues to use the RAW current prompt so relevance scoring is not diluted by older turns, and composite scoring / snippet extraction / dedupe / routing-hint detection all remain on the raw prompt as well.
|
|
104
|
+
- **Privacy-conscious persistence split** — gated skip paths (slash commands, `MIN_PROMPT_LENGTH`, `shouldSkipRetrieval`, heartbeat dedupe) do NOT persist their raw text because those turns are not meaningful user questions and carry a higher sensitivity profile. Post-retrieval empty paths (empty result set, threshold blocked, budget blocked) DO persist so a follow-up turn can still inherit the intent even when the current turn surfaced nothing.
|
|
105
|
+
- **Current-first truncation** — the combined query is clamped to 2000 chars with the current prompt preserved verbatim at the head. Older priors are dropped first when the budget runs out. If the current prompt alone already exceeds the cap, priors are omitted entirely and the current prompt is truncated.
|
|
106
|
+
- **SQL-level self-match guard** — duplicate submits of the same prompt are filtered out of the lookback SELECT via `AND query_text != ?` so a retry burst cannot eat into the 2-prior budget and leave the lookback window underfilled.
|
|
107
|
+
- **10-minute max age, session-scoped** — priors older than 10 minutes or from a different `session_id` are invisible to the lookback. All fallback paths (missing column, DB error, no matching rows) return the current prompt unchanged — the hook never throws on lookback failures.
|
|
108
|
+
|
|
109
|
+
Adds +27 tests (22 unit + 5 integration) on top of the v0.8.0 baseline.
|
|
110
|
+
|
|
111
|
+
### v0.8.2 Dual-Host Worker Architecture
|
|
112
|
+
|
|
113
|
+
Both maintenance lanes can now be hosted by the long-lived `clawmem watch` watcher service in addition to the existing per-session `clawmem mcp` host. This makes the systemd-managed watcher the canonical 24/7 home for the v0.8.0 heavy maintenance lane — its quiet-window logic finally sees a live worker at the configured hours regardless of whether any Claude Code session is open. The light consolidation lane (Phase 1 backfill + Phase 2 merge + Phase 3 deductive synthesis + Phase 4 recall stats) now also acquires its own DB-backed `worker_leases` row before each tick, symmetric with the heavy lane's existing exclusivity, so multiple host processes against the same vault cannot race on Phase 2 merges or Phase 3 deductive writes.
|
|
114
|
+
|
|
115
|
+
- **Light-lane worker lease** — `runConsolidationTick` wraps every tick (Phase 1 → 4) in `withWorkerLease` against a new `light-consolidation` worker name with a 10-minute TTL. Two host processes (e.g. one watcher service + one per-session stdio MCP) cannot both consolidate the same near-duplicate observations or both INSERT a duplicate row into `consolidated_observations`. Phase 1 enrichment is also serialized — overkill for cost but cleaner for symmetry. The in-process `isRunning` reentrancy guard remains the cheap first defense before the SQLite lease round-trip.
|
|
116
|
+
- **`cmdWatch` hosts both workers** — `clawmem watch` honors the same `CLAWMEM_ENABLE_CONSOLIDATION` and `CLAWMEM_HEAVY_LANE` env-var gates as `cmdMcp`. Off by default. Mirror the existing systemd unit (or your wrapper `.env`) to opt in. The recommended deployment for v0.8.2+ is to set both env vars on `clawmem-watcher.service` and leave `cmdMcp` unset, so the heavy lane has a continuously available host independent of Claude Code session lifecycle.
|
|
117
|
+
- **`cmdMcp` is now a fallback host with a heavy-lane warning** — `cmdMcp` retains the same env-var gates so non-watcher deployments (e.g. macOS users running everything via Claude Code launchd) keep working unchanged. When `CLAWMEM_HEAVY_LANE=true` is set on a stdio MCP host, `cmdMcp` emits a one-line warning to stderr advising operators to move heavy-lane hosting to `clawmem watch` instead.
|
|
118
|
+
- **Async drain on shutdown** — both worker stop helpers (`stopConsolidationWorker` and the closure returned by `startHeavyMaintenanceWorker`) are now `async`, clearing their `setInterval` AND polling their in-flight running flag until any mid-tick worker drains. This guarantees the worker's `withWorkerLease` finally block runs against a still-open store, so the lease is released cleanly instead of leaking until TTL expiry. Bounded waits (15s light, 30s heavy) prevent a stuck tick from wedging shutdown indefinitely; the next process reclaims any stranded lease atomically.
|
|
119
|
+
- **Signal handlers registered before worker startup** — both `cmdWatch` and `cmdMcp` now register their `SIGINT`/`SIGTERM` handlers BEFORE any worker initialization. A signal arriving in the brief window between worker startup and handler registration would otherwise terminate the host via the default signal action (exit 143) and skip the async drain entirely.
|
|
120
|
+
- **Subprocess smoke test** — new `tests/integration/cmdwatch-workers.integration.test.ts` spawns `bun src/clawmem.ts watch` against a temp vault with short worker intervals, exercises the env-var gates, exercises a real heavy-lane tick (slow path, ~35s), and asserts the lease is released cleanly on `SIGTERM`.
|
|
121
|
+
- **Bug fix: removed dead skill-vault watcher block from `clawmem.ts cmdWatch()`** — a try/catch wrapped block had been silently destructuring `getSkillContentRoot` from `./config.ts`, but that helper is forge-internal and was never exported in public ClawMem. The runtime catch swallowed the failure so it had no observable effect, but TypeScript flagged a static `TS2339` error on the destructure. v0.8.2 removes the dead code path. No behavior change for public users.
|
|
122
|
+
|
|
123
|
+
Adds +15 tests (9 light-lane lease unit + 5 cmdWatch fast subprocess + 1 cmdWatch slow subprocess) on top of the v0.8.1 baseline.
|
|
124
|
+
|
|
125
|
+
For operational guidance — enabling the workers via systemd drop-in, tuning intervals to your usage pattern, monitoring queries, and rollback steps — see [docs/guides/systemd-services.md](docs/guides/systemd-services.md#background-maintenance-workers-v082).
|
|
126
|
+
|
|
98
127
|
## Architecture
|
|
99
128
|
|
|
100
129
|
<p align="center">
|
|
@@ -160,7 +189,7 @@ After installing, here's the full journey from zero to working memory:
|
|
|
160
189
|
| **1. Bootstrap** | Create a vault, index your first collection, embed, install hooks and MCP | `clawmem bootstrap ~/notes --name notes` | One command does it all. Or run each step manually (see below). |
|
|
161
190
|
| **2. Choose models** | Pick embedding + reranker models based on your hardware | 12GB+ VRAM → SOTA stack (zembed-1 + zerank-2). Less → QMD native combo. No GPU → cloud embedding or CPU fallback. | [GPU Services](#gpu-services) |
|
|
162
191
|
| **3. Download models** | Get the GGUF files for your chosen stack | `wget` from HuggingFace, or let `node-llama-cpp` auto-download the QMD native models on first use | [Embedding](#embedding), [LLM Server](#llm-server), [Reranker Server](#reranker-server) |
|
|
163
|
-
| **4. Start services** | Run GPU servers (if using dedicated GPU) and background services | `llama-server` for each model. systemd units for watcher + embed timer. | [systemd services](docs/guides/systemd-services.md) |
|
|
192
|
+
| **4. Start services** | Run GPU servers (if using dedicated GPU) and background services. Optionally enable the v0.8.2 background maintenance workers in the watcher unit so consolidation + deductive synthesis run automatically. | `llama-server` for each model. systemd units for watcher + embed timer. Drop-in for the watcher to enable workers + tune intervals + set the quiet window. | [systemd services](docs/guides/systemd-services.md), [background workers](docs/guides/systemd-services.md#background-maintenance-workers-v082) |
|
|
164
193
|
| **5. Decide what to index** | Add collections for your projects, notes, research, and domain docs | `clawmem collection add ~/project --name project` | The more relevant markdown you index, the better retrieval works. See [building a rich context field](docs/introduction.md#building-a-rich-context-field). |
|
|
165
194
|
| **6. Connect your agent** | Hook into Claude Code, OpenClaw, Hermes, or any MCP client | `clawmem setup hooks && clawmem setup mcp` for Claude Code. `clawmem setup openclaw` for OpenClaw. Copy `src/hermes/` to Hermes plugins for Hermes. | [Integration](#integration) |
|
|
166
195
|
| **7. Verify** | Confirm everything is working | `clawmem doctor` (full health check) or `clawmem status` (quick index stats) | [Verify Installation](#verify-installation) |
|
|
@@ -210,6 +239,8 @@ clawmem embed # Re-embed if upgrading embedding models (not needed f
|
|
|
210
239
|
|
|
211
240
|
Routine patch updates (e.g. 0.2.0 → 0.2.1) do not require reindexing.
|
|
212
241
|
|
|
242
|
+
For version-specific upgrade notes (opt-in features, optional cleanup steps, verification commands), see [docs/guides/upgrading.md](docs/guides/upgrading.md).
|
|
243
|
+
|
|
213
244
|
### Integration
|
|
214
245
|
|
|
215
246
|
#### Claude Code
|
|
@@ -1187,6 +1218,7 @@ Built on the shoulders of:
|
|
|
1187
1218
|
- [QMD](https://github.com/tobi/qmd) — search backend (BM25 + vectors + RRF + reranking)
|
|
1188
1219
|
- [SAME](https://github.com/sgx-labs/statelessagent) — agent memory concepts (recency decay, confidence scoring, session tracking)
|
|
1189
1220
|
- [supermemory](https://github.com/supermemoryai/clawdbot-supermemory) — hook patterns and context surfacing ideas
|
|
1221
|
+
- [Thoth](https://github.com/siddsachar/Thoth) — anti-contamination deductive synthesis, contradiction-aware + name-aware merge gates, post-import conversation fact extraction, quiet-window heavy maintenance lane with worker leases, context instruction framing, relationship snippets, multi-turn prior-query lookback
|
|
1190
1222
|
|
|
1191
1223
|
## Roadmap
|
|
1192
1224
|
|
package/SKILL.md
CHANGED
|
@@ -85,14 +85,14 @@ curl http://host:8090/v1/models
|
|
|
85
85
|
| `CLAWMEM_RERANK_URL` | `http://localhost:8090` | Reranker server. Falls to `node-llama-cpp` if unset + `NO_LOCAL_MODELS=false`. |
|
|
86
86
|
| `CLAWMEM_NO_LOCAL_MODELS` | `false` | Blocks `node-llama-cpp` auto-downloads. Set `true` for remote-only. |
|
|
87
87
|
| `CLAWMEM_ENABLE_AMEM` | enabled | A-MEM note construction + link generation during indexing. |
|
|
88
|
-
| `CLAWMEM_ENABLE_CONSOLIDATION` | disabled |
|
|
88
|
+
| `CLAWMEM_ENABLE_CONSOLIDATION` | disabled | Light-lane consolidation worker (Phase 1 backfill + Phase 2 merge + Phase 3 deductive synthesis + Phase 4 recall stats). **v0.8.2:** every tick wraps in a `worker_leases` row (`light-consolidation` key) so multiple host processes against the same vault cannot race on Phase 2 merges. Hosted by `clawmem watch` (canonical) or `clawmem mcp` (per-session fallback). |
|
|
89
89
|
| `CLAWMEM_CONSOLIDATION_INTERVAL` | 300000 | Worker interval in ms (min 15000). |
|
|
90
90
|
| `CLAWMEM_MERGE_SCORE_NORMAL` | `0.93` | **v0.7.1.** Phase 2 merge-safety score threshold when candidate and existing anchors align. |
|
|
91
91
|
| `CLAWMEM_MERGE_SCORE_STRICT` | `0.98` | **v0.7.1.** Strictest merge-safety threshold (fallback when anchors are ambiguous). |
|
|
92
92
|
| `CLAWMEM_MERGE_GUARD_DRY_RUN` | `false` | **v0.7.1.** When `true`, Phase 2 merge-safety rejections are logged but not enforced — use for calibration. |
|
|
93
93
|
| `CLAWMEM_CONTRADICTION_POLICY` | `link` | **v0.7.1.** How the merge-time contradiction gate handles a blocked merge. `link` (default) keeps both rows + inserts `contradicts` edge. `supersede` marks the old row `status='inactive'`. |
|
|
94
94
|
| `CLAWMEM_CONTRADICTION_MIN_CONFIDENCE` | `0.5` | **v0.7.1.** Minimum combined heuristic+LLM confidence required before the contradiction gate blocks a merge. |
|
|
95
|
-
| `CLAWMEM_HEAVY_LANE` | disabled | **v0.8.0.** Enable the quiet-window heavy maintenance worker — a second, longer-interval consolidation lane with DB-backed `worker_leases` exclusivity, stale-first batching, and `maintenance_runs` journaling. Runs alongside the light lane. |
|
|
95
|
+
| `CLAWMEM_HEAVY_LANE` | disabled | **v0.8.0.** Enable the quiet-window heavy maintenance worker — a second, longer-interval consolidation lane with DB-backed `worker_leases` exclusivity, stale-first batching, and `maintenance_runs` journaling. Runs alongside the light lane. **v0.8.2:** canonical host is `clawmem watch` (e.g. systemd `clawmem-watcher.service`); `clawmem mcp` retains the same gate as a fallback host but emits a stderr warning advising operators to move heavy-lane hosting to the watcher because per-session stdio MCPs may never be alive during the configured quiet window. |
|
|
96
96
|
| `CLAWMEM_HEAVY_LANE_INTERVAL` | 1800000 | **v0.8.0.** Heavy-lane tick interval in ms (min 30000, default 30 min). |
|
|
97
97
|
| `CLAWMEM_HEAVY_LANE_WINDOW_START` / `_END` | (none) | **v0.8.0.** Start/end hours (0-23) of the quiet window. Supports midnight wrap (22→6). Null on either bound = always in window. |
|
|
98
98
|
| `CLAWMEM_HEAVY_LANE_MAX_USAGES` | 30 | **v0.8.0.** Max `context_usage` rows in the last 10 min before the heavy lane skips with `reason='query_rate_high'`. |
|
|
@@ -190,7 +190,7 @@ Hooks handle ~90% of retrieval. Zero agent effort.
|
|
|
190
190
|
|
|
191
191
|
| Hook | Trigger | Budget | Content |
|
|
192
192
|
|------|---------|--------|---------|
|
|
193
|
-
| `context-surfacing` | UserPromptSubmit | profile-driven (default 800) | retrieval gate -> profile-driven hybrid search (vector if `useVector`, timeout from profile) -> FTS supplement -> file-aware search (E13) -> snooze filter -> noise filter -> spreading activation (E11) -> memory type diversification (E10) -> tiered injection (HOT/WARM/COLD) -> `<vault-context><instruction>...</instruction><facts>...</facts><relationships>...</relationships></vault-context>` (v0.7.1: instruction always prepended; relationships list memory-graph edges where BOTH endpoints are in the surfaced set; relationships truncated first when over budget) + optional `<vault-routing>` hint. Budget, max results, vector timeout, min score all driven by `CLAWMEM_PROFILE`. |
|
|
193
|
+
| `context-surfacing` | UserPromptSubmit | profile-driven (default 800) | retrieval gate -> **multi-turn query** (v0.8.1: current + up to 2 recent same-session priors from `context_usage.query_text`, 10-min max age, 2000-char cap with current-first, used only for discovery — not rerank/scoring/snippet) -> profile-driven hybrid search (vector if `useVector`, timeout from profile) -> FTS supplement -> file-aware search (E13, raw current) -> snooze filter -> noise filter -> spreading activation (E11) -> memory type diversification (E10) -> tiered injection (HOT/WARM/COLD) -> `<vault-context><instruction>...</instruction><facts>...</facts><relationships>...</relationships></vault-context>` (v0.7.1: instruction always prepended; relationships list memory-graph edges where BOTH endpoints are in the surfaced set; relationships truncated first when over budget) + optional `<vault-routing>` hint. Budget, max results, vector timeout, min score all driven by `CLAWMEM_PROFILE`. Raw prompt persisted to `context_usage.query_text` for future lookback — gated skip paths (slash commands, heartbeats, too-short prompts) withhold the text for privacy. |
|
|
194
194
|
| `postcompact-inject` | SessionStart (compact) | 1200 tokens | re-injects authoritative context after compaction: precompact state (600) + decisions (400) + antipatterns (150) + vault context (200) -> `<vault-postcompact>` |
|
|
195
195
|
| `curator-nudge` | SessionStart | 200 tokens | surfaces curator report actions, nudges when report is stale (>7 days) |
|
|
196
196
|
| `precompact-extract` | PreCompact | — | extracts decisions, file paths, open questions -> writes `precompact-state.md`. Query-aware ranking. Reindexes auto-memory. |
|
|
@@ -767,7 +767,7 @@ clawmem consolidate [--dry-run] # Find and archive duplicate low-confidence docu
|
|
|
767
767
|
- SAME (composite scoring), MAGMA (intent + graph), A-MEM (self-evolving notes) layer on top of QMD substrate.
|
|
768
768
|
- Three `llama-server` instances on local or remote GPU. Wrapper defaults to `localhost:8088/8089/8090`.
|
|
769
769
|
- `CLAWMEM_NO_LOCAL_MODELS=false` (default) allows in-process fallback. Set `true` for remote-only to fail fast.
|
|
770
|
-
- Consolidation worker (`CLAWMEM_ENABLE_CONSOLIDATION=true`) backfills unenriched docs
|
|
770
|
+
- Consolidation worker (`CLAWMEM_ENABLE_CONSOLIDATION=true`) backfills unenriched docs and runs Phase 2 merge / Phase 3 deductive synthesis. **v0.8.2:** hosted by either `clawmem watch` (long-lived, canonical) or `clawmem mcp` (per-session fallback); every tick acquires a `light-consolidation` `worker_leases` row before doing work, so dual-hosting against the same vault is safe.
|
|
771
771
|
- Beads integration: `syncBeadsIssues()` queries `bd` CLI (Dolt backend, v0.58.0+), creates markdown docs, maps dependency edges into `memory_relations`. Watcher auto-triggers on `.beads/` changes; `beads_sync` MCP for manual sync.
|
|
772
772
|
- HTTP REST API: `clawmem serve [--port 7438]` — optional REST server on localhost. Search, retrieval, lifecycle, and graph traversal. `POST /retrieve` mirrors `memory_retrieve` with auto-routing (keyword/semantic/causal/timeline/hybrid). `POST /search` provides direct mode selection. Bearer token auth via `CLAWMEM_API_TOKEN` env var (disabled if unset).
|
|
773
773
|
- OpenClaw ContextEngine plugin: `clawmem setup openclaw` — registers as native OpenClaw context engine. Dual-mode: shares vault with Claude Code hooks. Uses `before_prompt_build` for retrieval, `afterTurn()` for extraction, `compact()` for pre-compaction + runtime delegation (v0.3.0+, required for OpenClaw v2026.3.28+).
|
package/package.json
CHANGED
package/src/clawmem.ts
CHANGED
|
@@ -45,6 +45,14 @@ import { enrichResults, reciprocalRankFusion, toRanked, type RankedResult } from
|
|
|
45
45
|
import { splitDocument } from "./splitter.ts";
|
|
46
46
|
import { getProfile, updateProfile, isProfileStale } from "./profile.ts";
|
|
47
47
|
import { regenerateAllDirectoryContexts } from "./directory-context.ts";
|
|
48
|
+
import {
|
|
49
|
+
startConsolidationWorker,
|
|
50
|
+
stopConsolidationWorker,
|
|
51
|
+
} from "./consolidation.ts";
|
|
52
|
+
import {
|
|
53
|
+
parseHeavyLaneConfigFromEnv,
|
|
54
|
+
startHeavyMaintenanceWorker,
|
|
55
|
+
} from "./maintenance.ts";
|
|
48
56
|
import { readHookInput, writeHookOutput, makeEmptyOutput, type HookOutput } from "./hooks.ts";
|
|
49
57
|
import { contextSurfacing } from "./hooks/context-surfacing.ts";
|
|
50
58
|
import { sessionBootstrap } from "./hooks/session-bootstrap.ts";
|
|
@@ -1363,13 +1371,74 @@ async function cmdWatch() {
|
|
|
1363
1371
|
const dirs = collections.map(col => col.path);
|
|
1364
1372
|
const s = getStore();
|
|
1365
1373
|
|
|
1374
|
+
// v0.8.2 Codex Turn 1 fix: register signal handlers BEFORE any async
|
|
1375
|
+
// startup work or worker startup. Resources are declared as null and
|
|
1376
|
+
// assigned once their respective creators run; the shutdown closure
|
|
1377
|
+
// captures the variable references so updates after registration are
|
|
1378
|
+
// visible. Without this ordering, a SIGTERM arriving during the brief
|
|
1379
|
+
// window between the worker startup banner and the handler registration
|
|
1380
|
+
// would terminate the watcher via the default signal action (exit 143)
|
|
1381
|
+
// instead of running the async drain → release → close sequence.
|
|
1382
|
+
let stopHeavyLane: (() => Promise<void>) | null = null;
|
|
1383
|
+
let watcherHandle: { close: () => void } | null = null;
|
|
1384
|
+
let checkpointTimerHandle: Timer | null = null;
|
|
1385
|
+
|
|
1386
|
+
// Graceful shutdown — stop workers, close watchers, then exit. SIGTERM
|
|
1387
|
+
// handling is critical for systemd `systemctl --user stop` to shut down
|
|
1388
|
+
// cleanly instead of being killed by the unit timeout. Both worker stops
|
|
1389
|
+
// are awaited so any mid-tick worker drains and releases its lease via
|
|
1390
|
+
// its own withWorkerLease finally block before we close the store.
|
|
1391
|
+
const shutdown = async (signal: string) => {
|
|
1392
|
+
console.log(`\n${c.dim}[watch] Received ${signal}, shutting down...${c.reset}`);
|
|
1393
|
+
if (stopHeavyLane) {
|
|
1394
|
+
await stopHeavyLane();
|
|
1395
|
+
stopHeavyLane = null;
|
|
1396
|
+
}
|
|
1397
|
+
await stopConsolidationWorker();
|
|
1398
|
+
if (checkpointTimerHandle) {
|
|
1399
|
+
clearInterval(checkpointTimerHandle);
|
|
1400
|
+
checkpointTimerHandle = null;
|
|
1401
|
+
}
|
|
1402
|
+
if (watcherHandle) {
|
|
1403
|
+
watcherHandle.close();
|
|
1404
|
+
watcherHandle = null;
|
|
1405
|
+
}
|
|
1406
|
+
closeStore();
|
|
1407
|
+
process.exit(0);
|
|
1408
|
+
};
|
|
1409
|
+
process.on("SIGINT", () => { void shutdown("SIGINT"); });
|
|
1410
|
+
process.on("SIGTERM", () => { void shutdown("SIGTERM"); });
|
|
1411
|
+
|
|
1366
1412
|
console.log(`${c.bold}Watching ${dirs.length} collection(s) for changes...${c.reset}`);
|
|
1367
1413
|
for (const col of collections) {
|
|
1368
1414
|
console.log(` ${c.dim}${col.name}: ${col.path}${c.reset}`);
|
|
1369
1415
|
}
|
|
1370
1416
|
console.log(`${c.dim}Press Ctrl+C to stop.${c.reset}`);
|
|
1371
1417
|
|
|
1372
|
-
|
|
1418
|
+
// v0.8.2: Light + heavy maintenance lane workers (opt-in via env vars).
|
|
1419
|
+
// Hosting them in `cmdWatch` makes the long-lived watcher service the
|
|
1420
|
+
// canonical host for both lanes — `clawmem-watcher.service` runs 24/7
|
|
1421
|
+
// under systemd, so the heavy lane's quiet-window logic actually sees a
|
|
1422
|
+
// live worker at the configured hours regardless of whether any Claude
|
|
1423
|
+
// Code session is open. `cmdMcp` (stdio MCP) keeps the same env-var
|
|
1424
|
+
// gates as a fallback host, but warns when CLAWMEM_HEAVY_LANE=true
|
|
1425
|
+
// since per-session MCPs are short-lived. Both hosts share the same
|
|
1426
|
+
// DB-backed `worker_leases` exclusivity (heavy lane v0.8.0, light lane
|
|
1427
|
+
// v0.8.2), so running both at once is safe.
|
|
1428
|
+
if (Bun.env.CLAWMEM_ENABLE_CONSOLIDATION === "true") {
|
|
1429
|
+
const llm = getDefaultLlamaCpp();
|
|
1430
|
+
const intervalMs = parseInt(Bun.env.CLAWMEM_CONSOLIDATION_INTERVAL || "300000", 10);
|
|
1431
|
+
console.log(`${c.dim}[watch] Starting consolidation worker (light lane, interval=${intervalMs}ms)${c.reset}`);
|
|
1432
|
+
startConsolidationWorker(s, llm, intervalMs);
|
|
1433
|
+
}
|
|
1434
|
+
if (Bun.env.CLAWMEM_HEAVY_LANE === "true") {
|
|
1435
|
+
const llm = getDefaultLlamaCpp();
|
|
1436
|
+
const cfg = parseHeavyLaneConfigFromEnv();
|
|
1437
|
+
console.log(`${c.dim}[watch] Starting heavy maintenance lane worker${c.reset}`);
|
|
1438
|
+
stopHeavyLane = startHeavyMaintenanceWorker(s, llm, cfg);
|
|
1439
|
+
}
|
|
1440
|
+
|
|
1441
|
+
watcherHandle = startWatcher(dirs, {
|
|
1373
1442
|
debounceMs: 2000,
|
|
1374
1443
|
onChanged: async (fullPath, event) => {
|
|
1375
1444
|
// Find which collection this belongs to
|
|
@@ -1424,45 +1493,12 @@ async function cmdWatch() {
|
|
|
1424
1493
|
},
|
|
1425
1494
|
});
|
|
1426
1495
|
|
|
1427
|
-
// Skill vault watcher: watch _clawmem-skills/ content root if configured
|
|
1428
|
-
let skillWatcher: { close: () => void } | null = null;
|
|
1429
|
-
try {
|
|
1430
|
-
const { getVaultPath, getSkillContentRoot } = await import("./config.ts");
|
|
1431
|
-
const { resolveStore } = await import("./store.ts");
|
|
1432
|
-
const skillVaultPath = getVaultPath("skill");
|
|
1433
|
-
const skillRoot = getSkillContentRoot();
|
|
1434
|
-
|
|
1435
|
-
if (skillVaultPath && existsSync(skillRoot)) {
|
|
1436
|
-
const skillStore = resolveStore("skill");
|
|
1437
|
-
console.log(`${c.bold}Watching skill vault content root...${c.reset}`);
|
|
1438
|
-
console.log(` ${c.dim}skill: ${skillRoot} → ${skillVaultPath}${c.reset}`);
|
|
1439
|
-
|
|
1440
|
-
skillWatcher = startWatcher([skillRoot], {
|
|
1441
|
-
debounceMs: 2000,
|
|
1442
|
-
onChanged: async (fullPath, event) => {
|
|
1443
|
-
const relativePath = fullPath.slice(skillRoot.length + 1);
|
|
1444
|
-
console.log(`${c.dim}[${event}]${c.reset} skill/${relativePath}`);
|
|
1445
|
-
|
|
1446
|
-
const stats = await indexCollection(skillStore, "skill-observations", skillRoot, "**/*.md");
|
|
1447
|
-
if (stats.added > 0 || stats.updated > 0 || stats.removed > 0) {
|
|
1448
|
-
console.log(` skill: +${stats.added} ~${stats.updated} -${stats.removed}`);
|
|
1449
|
-
}
|
|
1450
|
-
},
|
|
1451
|
-
onError: (err) => {
|
|
1452
|
-
console.error(`${c.red}Skill watch error: ${err.message}${c.reset}`);
|
|
1453
|
-
},
|
|
1454
|
-
});
|
|
1455
|
-
}
|
|
1456
|
-
} catch {
|
|
1457
|
-
// Skill vault not configured — skip
|
|
1458
|
-
}
|
|
1459
|
-
|
|
1460
1496
|
// Periodic WAL checkpoint: the watcher holds a long-lived DB connection which
|
|
1461
1497
|
// prevents SQLite auto-checkpoint from shrinking the WAL file. Without this,
|
|
1462
1498
|
// the WAL grows unbounded (observed 77MB+), slowing every concurrent DB access
|
|
1463
1499
|
// (hooks, MCP) and eventually causing UserPromptSubmit hook timeouts.
|
|
1464
1500
|
const WAL_CHECKPOINT_INTERVAL = 5 * 60 * 1000; // 5 minutes
|
|
1465
|
-
|
|
1501
|
+
checkpointTimerHandle = setInterval(() => {
|
|
1466
1502
|
try {
|
|
1467
1503
|
s.db.exec("PRAGMA wal_checkpoint(PASSIVE)");
|
|
1468
1504
|
} catch {
|
|
@@ -1470,16 +1506,7 @@ async function cmdWatch() {
|
|
|
1470
1506
|
}
|
|
1471
1507
|
}, WAL_CHECKPOINT_INTERVAL);
|
|
1472
1508
|
|
|
1473
|
-
//
|
|
1474
|
-
process.on("SIGINT", () => {
|
|
1475
|
-
clearInterval(checkpointTimer);
|
|
1476
|
-
watcher.close();
|
|
1477
|
-
skillWatcher?.close();
|
|
1478
|
-
closeStore();
|
|
1479
|
-
process.exit(0);
|
|
1480
|
-
});
|
|
1481
|
-
|
|
1482
|
-
// Block forever
|
|
1509
|
+
// Block forever — shutdown is driven by signal handlers registered above.
|
|
1483
1510
|
await new Promise(() => {});
|
|
1484
1511
|
}
|
|
1485
1512
|
|
package/src/consolidation.ts
CHANGED
|
@@ -17,6 +17,7 @@ import type { LlamaCpp } from "./llm.ts";
|
|
|
17
17
|
import { extractJsonFromLLM } from "./amem.ts";
|
|
18
18
|
import { hashContent } from "./indexer.ts";
|
|
19
19
|
import { passesMergeSafety } from "./text-similarity.ts";
|
|
20
|
+
import { withWorkerLease } from "./worker-lease.ts";
|
|
20
21
|
import {
|
|
21
22
|
checkContradiction,
|
|
22
23
|
isActionableContradiction,
|
|
@@ -166,22 +167,68 @@ let consolidationTimer: Timer | null = null;
|
|
|
166
167
|
let isRunning = false;
|
|
167
168
|
let tickCount = 0;
|
|
168
169
|
|
|
170
|
+
/**
|
|
171
|
+
* DB-backed worker lease name for the light consolidation lane (v0.8.2).
|
|
172
|
+
* Distinct from the heavy-maintenance lane's lease so both lanes can hold
|
|
173
|
+
* independent exclusivity against the same SQLite vault without colliding.
|
|
174
|
+
*/
|
|
175
|
+
export const DEFAULT_LIGHT_LANE_WORKER_NAME = "light-consolidation";
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Default worker-lease TTL for the light lane (10 min). A tick normally
|
|
179
|
+
* finishes in seconds, but Phase 2 consolidation + Phase 3 deductive
|
|
180
|
+
* synthesis can stack many LLM calls under worst-case conditions. A 10-min
|
|
181
|
+
* ceiling covers that case without leaving a stranded lease forever if the
|
|
182
|
+
* process is SIGKILL'd mid-tick — the next worker reclaims it atomically
|
|
183
|
+
* via the single-statement upsert in `acquireWorkerLease` once the TTL
|
|
184
|
+
* has elapsed.
|
|
185
|
+
*/
|
|
186
|
+
export const DEFAULT_LIGHT_LANE_LEASE_TTL_MS = 10 * 60 * 1000;
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Options for a single consolidation tick (v0.8.2). All fields optional;
|
|
190
|
+
* omitting the bag reproduces pre-v0.8.2 behavior except for the newly
|
|
191
|
+
* added DB-backed lease wrap, which is always on.
|
|
192
|
+
*
|
|
193
|
+
* - `workerName` override the lease name (default "light-consolidation").
|
|
194
|
+
* Tests should pass a unique name to avoid cross-test
|
|
195
|
+
* contention with other suites running in the same bun
|
|
196
|
+
* process.
|
|
197
|
+
* - `leaseTtlMs` override the lease TTL. Tests use short TTLs (e.g.
|
|
198
|
+
* 100 ms with a past `now`) to exercise expiry reclaim
|
|
199
|
+
* without real delay.
|
|
200
|
+
*/
|
|
201
|
+
export interface ConsolidationTickOptions {
|
|
202
|
+
workerName?: string;
|
|
203
|
+
leaseTtlMs?: number;
|
|
204
|
+
}
|
|
205
|
+
|
|
169
206
|
// =============================================================================
|
|
170
207
|
// Worker Functions
|
|
171
208
|
// =============================================================================
|
|
172
209
|
|
|
173
210
|
/**
|
|
174
|
-
* Starts the consolidation worker that enriches documents missing A-MEM
|
|
175
|
-
* and periodically consolidates observations.
|
|
211
|
+
* Starts the consolidation worker that enriches documents missing A-MEM
|
|
212
|
+
* metadata and periodically consolidates observations.
|
|
176
213
|
*
|
|
177
|
-
*
|
|
178
|
-
*
|
|
179
|
-
*
|
|
214
|
+
* v0.8.2 — every tick is wrapped in a DB-backed worker lease (see
|
|
215
|
+
* `runConsolidationTick`), so multiple host processes running this worker
|
|
216
|
+
* against the same vault cannot run Phase 2 merge / Phase 3 deductive
|
|
217
|
+
* synthesis concurrently. The tick still uses an in-process `isRunning`
|
|
218
|
+
* reentrancy guard that fires before the lease round-trip, so the common
|
|
219
|
+
* case (single process, overlapping timer fires) is handled without
|
|
220
|
+
* touching SQLite.
|
|
221
|
+
*
|
|
222
|
+
* @param store - Store instance with A-MEM methods
|
|
223
|
+
* @param llm - LLM instance for memory note construction
|
|
224
|
+
* @param intervalMs - Tick interval in milliseconds (default 300000 = 5 min)
|
|
225
|
+
* @param opts - Optional lease overrides (worker name, TTL)
|
|
180
226
|
*/
|
|
181
227
|
export function startConsolidationWorker(
|
|
182
228
|
store: Store,
|
|
183
229
|
llm: LlamaCpp,
|
|
184
|
-
intervalMs: number = 300000
|
|
230
|
+
intervalMs: number = 300000,
|
|
231
|
+
opts: ConsolidationTickOptions = {},
|
|
185
232
|
): void {
|
|
186
233
|
// Clamp interval to minimum 15 seconds
|
|
187
234
|
const interval = Math.max(15000, intervalMs);
|
|
@@ -190,7 +237,7 @@ export function startConsolidationWorker(
|
|
|
190
237
|
|
|
191
238
|
// Set up periodic tick
|
|
192
239
|
consolidationTimer = setInterval(async () => {
|
|
193
|
-
await
|
|
240
|
+
await runConsolidationTick(store, llm, opts);
|
|
194
241
|
}, interval);
|
|
195
242
|
|
|
196
243
|
// Use unref() to avoid blocking process exit
|
|
@@ -200,55 +247,133 @@ export function startConsolidationWorker(
|
|
|
200
247
|
}
|
|
201
248
|
|
|
202
249
|
/**
|
|
203
|
-
* Stops the consolidation worker.
|
|
250
|
+
* Stops the consolidation worker. Async since v0.8.2 — clears the interval
|
|
251
|
+
* AND awaits any in-flight tick before resolving, so callers (signal
|
|
252
|
+
* handlers, test fixtures) can safely close the store afterward without
|
|
253
|
+
* yanking the DB out from under a mid-tick worker. The wait is bounded by
|
|
254
|
+
* `STOP_DRAIN_TIMEOUT_MS` (15s) so a pathologically stuck tick cannot
|
|
255
|
+
* wedge shutdown indefinitely; if the timeout fires, the function logs
|
|
256
|
+
* and returns anyway (the next process will reclaim the stale lease via
|
|
257
|
+
* the v0.8.0 `worker_leases` TTL upsert).
|
|
204
258
|
*/
|
|
205
|
-
export function stopConsolidationWorker(): void {
|
|
259
|
+
export async function stopConsolidationWorker(): Promise<void> {
|
|
206
260
|
if (consolidationTimer) {
|
|
207
261
|
clearInterval(consolidationTimer);
|
|
208
262
|
consolidationTimer = null;
|
|
263
|
+
console.log("[consolidation] Worker stop signaled — draining in-flight tick");
|
|
264
|
+
}
|
|
265
|
+
const deadline = Date.now() + STOP_DRAIN_TIMEOUT_MS;
|
|
266
|
+
while (isRunning && Date.now() < deadline) {
|
|
267
|
+
await new Promise<void>((resolve) => setTimeout(resolve, 50));
|
|
268
|
+
}
|
|
269
|
+
if (isRunning) {
|
|
270
|
+
console.log(
|
|
271
|
+
`[consolidation] Worker stop drain timed out after ${STOP_DRAIN_TIMEOUT_MS}ms — tick still running`,
|
|
272
|
+
);
|
|
273
|
+
} else {
|
|
209
274
|
console.log("[consolidation] Worker stopped");
|
|
210
275
|
}
|
|
211
276
|
}
|
|
212
277
|
|
|
213
278
|
/**
|
|
214
|
-
*
|
|
279
|
+
* v0.8.2 — bounded wait for in-flight light-lane tick during shutdown.
|
|
280
|
+
* 15 seconds is more than enough for Phase 1 + Phase 4 to drain (the
|
|
281
|
+
* cheap phases) and lets Phase 2/3 mid-flight LLM calls finish naturally
|
|
282
|
+
* in most environments. Stuck-tick scenarios (e.g. unreachable LLM with
|
|
283
|
+
* no socket timeout) fall back to the v0.8.0 worker_leases TTL reclaim.
|
|
284
|
+
*/
|
|
285
|
+
const STOP_DRAIN_TIMEOUT_MS = 15_000;
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* Run one consolidation tick: Phase 1 (A-MEM backfill) → Phase 2 (observation
|
|
289
|
+
* consolidation, every 6th tick) → Phase 3 (deductive synthesis, every 3rd
|
|
290
|
+
* tick) → Phase 4 (recall stats recomputation, every tick).
|
|
291
|
+
*
|
|
292
|
+
* v0.8.2 — wrapped in a DB-backed worker lease so at most one host process
|
|
293
|
+
* ticks at a time against the same vault, symmetric with the v0.8.0 heavy
|
|
294
|
+
* maintenance lane's `worker_leases` exclusivity pattern. Phase 2 is the
|
|
295
|
+
* race-sensitive phase Codex flagged in the v0.8.2 pre-rollout review:
|
|
296
|
+
* without the lease, two concurrent workers could both INSERT a new
|
|
297
|
+
* consolidated observation for the same cluster, or both merge into the
|
|
298
|
+
* same existing row and lose source_ids from the read-modify-write update
|
|
299
|
+
* in `mergeIntoExistingConsolidation`.
|
|
300
|
+
*
|
|
301
|
+
* An in-process reentrancy guard (`isRunning`) fires before the lease
|
|
302
|
+
* round-trip, so overlapping setInterval timer fires from the same process
|
|
303
|
+
* do not incur a SQLite round-trip per skip.
|
|
304
|
+
*
|
|
305
|
+
* Returns `{ acquired }` so integration tests (and the setInterval wrapper)
|
|
306
|
+
* can distinguish ticks that did real work from ticks skipped by the lease
|
|
307
|
+
* or reentrancy gate.
|
|
308
|
+
*
|
|
309
|
+
* Exported in v0.8.2 so tests can drive individual ticks directly without
|
|
310
|
+
* spinning up the setInterval loop.
|
|
215
311
|
*/
|
|
216
|
-
async function
|
|
217
|
-
|
|
312
|
+
export async function runConsolidationTick(
|
|
313
|
+
store: Store,
|
|
314
|
+
llm: LlamaCpp,
|
|
315
|
+
opts: ConsolidationTickOptions = {},
|
|
316
|
+
): Promise<{ acquired: boolean }> {
|
|
317
|
+
// In-process reentrancy guard: catches overlapping setInterval fires in
|
|
318
|
+
// the same process before we hit SQLite. Cheap; the lease is the
|
|
319
|
+
// cross-process authority.
|
|
218
320
|
if (isRunning) {
|
|
219
|
-
console.log("[consolidation] Skipping tick (already running)");
|
|
220
|
-
return;
|
|
321
|
+
console.log("[consolidation] Skipping tick (already running in-process)");
|
|
322
|
+
return { acquired: false };
|
|
221
323
|
}
|
|
222
324
|
|
|
223
|
-
|
|
224
|
-
|
|
325
|
+
const workerName = opts.workerName ?? DEFAULT_LIGHT_LANE_WORKER_NAME;
|
|
326
|
+
const leaseTtlMs = opts.leaseTtlMs ?? DEFAULT_LIGHT_LANE_LEASE_TTL_MS;
|
|
225
327
|
|
|
328
|
+
isRunning = true;
|
|
226
329
|
try {
|
|
227
|
-
|
|
228
|
-
|
|
330
|
+
const lease = await withWorkerLease(
|
|
331
|
+
store,
|
|
332
|
+
workerName,
|
|
333
|
+
leaseTtlMs,
|
|
334
|
+
async () => {
|
|
335
|
+
tickCount++;
|
|
336
|
+
try {
|
|
337
|
+
// Phase 1: A-MEM backfill (every tick)
|
|
338
|
+
await backfillAmem(store, llm);
|
|
339
|
+
|
|
340
|
+
// Phase 2: Observation consolidation (every 6th tick — ~30 min
|
|
341
|
+
// at default interval). Race-sensitive — see doc comment above.
|
|
342
|
+
if (tickCount % 6 === 0) {
|
|
343
|
+
await consolidateObservations(store, llm);
|
|
344
|
+
}
|
|
229
345
|
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
346
|
+
// Phase 3: Deductive synthesis (every 3rd tick — ~15 min).
|
|
347
|
+
// Writes are mostly idempotent on the hash-stable path but the
|
|
348
|
+
// anti-contamination validator still burns LLM calls, so
|
|
349
|
+
// running two workers in parallel is pure cost.
|
|
350
|
+
if (tickCount % 3 === 0) {
|
|
351
|
+
await generateDeductiveObservations(store, llm);
|
|
352
|
+
}
|
|
234
353
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
354
|
+
// Phase 4: Recall stats recomputation (every tick — lightweight
|
|
355
|
+
// SQL aggregation). Non-critical — recall stats are
|
|
356
|
+
// informational, not retrieval-blocking.
|
|
357
|
+
try {
|
|
358
|
+
const updated = store.recomputeRecallStats();
|
|
359
|
+
if (updated > 0) {
|
|
360
|
+
console.log(`[consolidation] Phase 4: recomputed recall_stats for ${updated} docs`);
|
|
361
|
+
}
|
|
362
|
+
} catch (err) {
|
|
363
|
+
console.error("[consolidation] Phase 4 recall stats failed:", err);
|
|
364
|
+
}
|
|
365
|
+
} catch (err) {
|
|
366
|
+
console.error("[consolidation] Tick failed:", err);
|
|
367
|
+
}
|
|
368
|
+
},
|
|
369
|
+
);
|
|
239
370
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
console.log(`[consolidation] Phase 4: recomputed recall_stats for ${updated} docs`);
|
|
245
|
-
}
|
|
246
|
-
} catch (err) {
|
|
247
|
-
// Non-critical — recall stats are informational, not retrieval-blocking
|
|
248
|
-
console.error("[consolidation] Phase 4 recall stats failed:", err);
|
|
371
|
+
if (!lease.acquired) {
|
|
372
|
+
console.log(
|
|
373
|
+
`[consolidation] Skipping tick (lease '${workerName}' held by another worker)`,
|
|
374
|
+
);
|
|
249
375
|
}
|
|
250
|
-
|
|
251
|
-
console.error("[consolidation] Tick failed:", err);
|
|
376
|
+
return { acquired: lease.acquired };
|
|
252
377
|
} finally {
|
|
253
378
|
isRunning = false;
|
|
254
379
|
}
|
|
@@ -69,6 +69,19 @@ const INSTRUCTION_TOKEN_COST = estimateTokens(INSTRUCTION_XML);
|
|
|
69
69
|
const RELATIONSHIPS_XML_OVERHEAD_TOKENS = estimateTokens("<relationships>\n\n</relationships>");
|
|
70
70
|
const MAX_RELATION_SNIPPETS = 10;
|
|
71
71
|
|
|
72
|
+
// Ext 6b: Multi-turn prior-query lookback
|
|
73
|
+
// The retrieval query is built from the current prompt plus up to
|
|
74
|
+
// MULTI_TURN_LOOKBACK recent same-session prior prompts within
|
|
75
|
+
// MULTI_TURN_MAX_AGE_MINUTES. The combined query is clamped to
|
|
76
|
+
// MULTI_TURN_MAX_CHARS with newest content preserved first — so the
|
|
77
|
+
// current prompt is always the first N chars even when older priors
|
|
78
|
+
// would otherwise push it out. All other hook signals (scoring,
|
|
79
|
+
// composite recency intent, recall attribution, routing hints)
|
|
80
|
+
// continue to use the raw current prompt.
|
|
81
|
+
const MULTI_TURN_LOOKBACK = 2;
|
|
82
|
+
const MULTI_TURN_MAX_AGE_MINUTES = 10;
|
|
83
|
+
const MULTI_TURN_MAX_CHARS = 2000;
|
|
84
|
+
|
|
72
85
|
// File path patterns to extract from prompts (E13 replacement: file-aware UserPromptSubmit)
|
|
73
86
|
const FILE_PATH_RE = /(?:^|\s)((?:\/[\w.@-]+)+(?:\.\w+)?|[\w.@-]+\.(?:ts|js|py|md|sh|yaml|yml|json|toml|rs|go|tsx|jsx|css|html))\b/g;
|
|
74
87
|
|
|
@@ -133,12 +146,25 @@ export async function contextSurfacing(
|
|
|
133
146
|
const isRecency = hasRecencyIntent(prompt);
|
|
134
147
|
const minScore = isRecency ? MIN_COMPOSITE_SCORE_RECENCY : profile.minScore;
|
|
135
148
|
|
|
149
|
+
// Ext 6b: Build the retrieval query from the current prompt plus up to
|
|
150
|
+
// MULTI_TURN_LOOKBACK recent same-session prior prompts. Used only for
|
|
151
|
+
// the discovery path (vector, FTS, query expansion, reranking) so that
|
|
152
|
+
// a short "do that" / "same for X" turn can inherit the vocabulary of
|
|
153
|
+
// earlier turns. All other prompt-dependent signals (recency intent,
|
|
154
|
+
// composite scoring, recall attribution, snippet highlighting, routing
|
|
155
|
+
// hints, dedupe, heartbeat check) continue to use the raw current
|
|
156
|
+
// prompt. If the session has no priors in the window, the helper
|
|
157
|
+
// returns the current prompt unchanged.
|
|
158
|
+
const retrievalQuery = input.sessionId
|
|
159
|
+
? buildMultiTurnSurfacingQuery(store, input.sessionId, prompt)
|
|
160
|
+
: prompt;
|
|
161
|
+
|
|
136
162
|
// Search: try vector first (if profile allows), fall back to BM25
|
|
137
163
|
// When vector succeeds, also supplement with FTS for keyword-exact recall
|
|
138
164
|
let results: SearchResult[] = [];
|
|
139
165
|
if (profile.useVector) {
|
|
140
166
|
try {
|
|
141
|
-
const vectorPromise = store.searchVec(
|
|
167
|
+
const vectorPromise = store.searchVec(retrievalQuery, DEFAULT_EMBED_MODEL, maxResults);
|
|
142
168
|
const timeoutPromise = new Promise<SearchResult[]>((_, reject) =>
|
|
143
169
|
setTimeout(() => reject(new Error("vector timeout")), profile.vectorTimeout)
|
|
144
170
|
);
|
|
@@ -149,11 +175,11 @@ export async function contextSurfacing(
|
|
|
149
175
|
}
|
|
150
176
|
|
|
151
177
|
if (results.length === 0) {
|
|
152
|
-
results = store.searchFTS(
|
|
178
|
+
results = store.searchFTS(retrievalQuery, maxResults);
|
|
153
179
|
} else {
|
|
154
180
|
// Supplement vector results with FTS for keyword-exact matches (<10ms)
|
|
155
181
|
const seen = new Set(results.map(r => r.filepath));
|
|
156
|
-
const ftsSupplemental = store.searchFTS(
|
|
182
|
+
const ftsSupplemental = store.searchFTS(retrievalQuery, 5);
|
|
157
183
|
for (const r of ftsSupplemental) {
|
|
158
184
|
if (!seen.has(r.filepath)) {
|
|
159
185
|
seen.add(r.filepath);
|
|
@@ -166,7 +192,7 @@ export async function contextSurfacing(
|
|
|
166
192
|
if (getVaultPath("skill")) {
|
|
167
193
|
try {
|
|
168
194
|
const skillStore = resolveStore("skill");
|
|
169
|
-
const skillResults = skillStore.searchFTS(
|
|
195
|
+
const skillResults = skillStore.searchFTS(retrievalQuery, 5);
|
|
170
196
|
// Tag skill vault results for identification in output
|
|
171
197
|
for (const r of skillResults) {
|
|
172
198
|
(r as any)._fromVault = "skill";
|
|
@@ -178,7 +204,9 @@ export async function contextSurfacing(
|
|
|
178
204
|
}
|
|
179
205
|
|
|
180
206
|
// File-aware supplemental search (E13 replacement): extract file paths/names from prompt
|
|
181
|
-
// and run targeted FTS queries to surface file-specific vault context
|
|
207
|
+
// and run targeted FTS queries to surface file-specific vault context.
|
|
208
|
+
// File-path extraction stays on the raw current prompt so priors cannot
|
|
209
|
+
// pollute the file-specific discovery channel with stale filenames.
|
|
182
210
|
const fileMatches = [...prompt.matchAll(FILE_PATH_RE)].map(m => m[1]!.trim()).filter(Boolean);
|
|
183
211
|
if (fileMatches.length > 0) {
|
|
184
212
|
const seen = new Set(results.map(r => r.filepath));
|
|
@@ -195,17 +223,23 @@ export async function contextSurfacing(
|
|
|
195
223
|
}
|
|
196
224
|
}
|
|
197
225
|
|
|
198
|
-
if (results.length === 0) { logEmptyTurn(store, input); return makeEmptyOutput("context-surfacing"); }
|
|
226
|
+
if (results.length === 0) { logEmptyTurn(store, input, prompt); return makeEmptyOutput("context-surfacing"); }
|
|
199
227
|
|
|
200
228
|
// Budget-aware deep escalation (deep profile only):
|
|
201
229
|
// If the fast path finished quickly and found results, spend remaining time budget
|
|
202
230
|
// on query expansion (discovers new candidates) and cross-encoder reranking (reorders).
|
|
231
|
+
// Ext 6b: expansion + FTS variants use the multi-turn retrieval query so
|
|
232
|
+
// short current prompts still inherit prior-turn vocabulary. Reranking
|
|
233
|
+
// continues to use the RAW current prompt so relevance scoring is not
|
|
234
|
+
// diluted by older turns — the cross-encoder is asked "how well does
|
|
235
|
+
// this doc match the user's current question", not "how well does it
|
|
236
|
+
// match the last 10 minutes of questions".
|
|
203
237
|
if (profile.deepEscalation && results.length >= 2) {
|
|
204
238
|
const elapsed = Date.now() - startTime;
|
|
205
239
|
if (elapsed < profile.escalationBudgetMs) {
|
|
206
240
|
try {
|
|
207
241
|
// Phase 1: Query expansion — discover candidates BM25+vector missed
|
|
208
|
-
const expanded = await store.expandQuery(
|
|
242
|
+
const expanded = await store.expandQuery(retrievalQuery, DEFAULT_QUERY_MODEL);
|
|
209
243
|
if (expanded.length > 0) {
|
|
210
244
|
const seen = new Set(results.map(r => r.filepath));
|
|
211
245
|
for (const eq of expanded.slice(0, 3)) {
|
|
@@ -253,7 +287,7 @@ export async function contextSurfacing(
|
|
|
253
287
|
!FILTERED_PATHS.some(p => r.displayPath.includes(p))
|
|
254
288
|
);
|
|
255
289
|
|
|
256
|
-
if (results.length === 0) { logEmptyTurn(store, input); return makeEmptyOutput("context-surfacing"); }
|
|
290
|
+
if (results.length === 0) { logEmptyTurn(store, input, prompt); return makeEmptyOutput("context-surfacing"); }
|
|
257
291
|
|
|
258
292
|
// Filter out snoozed documents
|
|
259
293
|
const now = new Date();
|
|
@@ -269,7 +303,7 @@ export async function contextSurfacing(
|
|
|
269
303
|
return true;
|
|
270
304
|
});
|
|
271
305
|
|
|
272
|
-
if (results.length === 0) { logEmptyTurn(store, input); return makeEmptyOutput("context-surfacing"); }
|
|
306
|
+
if (results.length === 0) { logEmptyTurn(store, input, prompt); return makeEmptyOutput("context-surfacing"); }
|
|
273
307
|
|
|
274
308
|
// Deduplicate by filepath (keep best score per path)
|
|
275
309
|
const deduped = new Map<string, SearchResult>();
|
|
@@ -311,7 +345,7 @@ export async function contextSurfacing(
|
|
|
311
345
|
: 0;
|
|
312
346
|
|
|
313
347
|
// Activation floor: if even the best result is too weak, bail entirely
|
|
314
|
-
if (bestScore < profile.activationFloor) { logEmptyTurn(store, input); return makeEmptyOutput("context-surfacing"); }
|
|
348
|
+
if (bestScore < profile.activationFloor) { logEmptyTurn(store, input, prompt); return makeEmptyOutput("context-surfacing"); }
|
|
315
349
|
|
|
316
350
|
const adaptiveMin = Math.max(bestScore * profile.minScoreRatio, profile.absoluteFloor);
|
|
317
351
|
scored = allScored.filter(r => r.compositeScore >= adaptiveMin);
|
|
@@ -320,7 +354,7 @@ export async function contextSurfacing(
|
|
|
320
354
|
scored = allScored.filter(r => r.compositeScore >= minScore);
|
|
321
355
|
}
|
|
322
356
|
|
|
323
|
-
if (scored.length === 0) { logEmptyTurn(store, input); return makeEmptyOutput("context-surfacing"); }
|
|
357
|
+
if (scored.length === 0) { logEmptyTurn(store, input, prompt); return makeEmptyOutput("context-surfacing"); }
|
|
324
358
|
|
|
325
359
|
// Spreading activation (E11): boost results co-activated with top HOT results
|
|
326
360
|
if (scored.length > 3) {
|
|
@@ -369,7 +403,7 @@ export async function contextSurfacing(
|
|
|
369
403
|
const { context, paths, tokens } = buildContext(scored, prompt, factsBudget);
|
|
370
404
|
|
|
371
405
|
if (!context) {
|
|
372
|
-
logEmptyTurn(store, input);
|
|
406
|
+
logEmptyTurn(store, input, prompt);
|
|
373
407
|
return makeEmptyOutput("context-surfacing");
|
|
374
408
|
}
|
|
375
409
|
|
|
@@ -377,8 +411,10 @@ export async function contextSurfacing(
|
|
|
377
411
|
if (input.sessionId) {
|
|
378
412
|
const turnIndex = (input as any)._turnIndex ?? 0;
|
|
379
413
|
|
|
380
|
-
// Log the injection — returns usage_id for recall event linkage
|
|
381
|
-
|
|
414
|
+
// Log the injection — returns usage_id for recall event linkage.
|
|
415
|
+
// Ext 6b: persist the raw prompt as query_text so future turns in
|
|
416
|
+
// the same session can reconstitute a multi-turn retrieval query.
|
|
417
|
+
const usageId = logInjection(store, input.sessionId, "context-surfacing", paths, tokens, turnIndex, prompt);
|
|
382
418
|
|
|
383
419
|
// Record recall events ONLY for docs that made it into the injected context
|
|
384
420
|
// (post-budget). Docs trimmed by token budget were never seen by the model.
|
|
@@ -469,12 +505,21 @@ export async function contextSurfacing(
|
|
|
469
505
|
* Log an empty context_usage row for a skipped turn.
|
|
470
506
|
* Keeps turn_index aligned with transcript turns so per-turn recall
|
|
471
507
|
* attribution doesn't drift when some prompts are gated.
|
|
508
|
+
*
|
|
509
|
+
* Ext 6b: `queryText` is optional. Callers that gated BEFORE the
|
|
510
|
+
* retrieval stage (slash commands, heartbeat dedupe, too-short prompts,
|
|
511
|
+
* `shouldSkipRetrieval`) pass nothing — those turns are not meaningful
|
|
512
|
+
* user questions and their raw text is not worth persisting for future
|
|
513
|
+
* multi-turn lookback. Callers that gated AFTER retrieval (empty result
|
|
514
|
+
* set, threshold filter, budget) pass the prompt so a follow-up turn
|
|
515
|
+
* can still reuse the intent even though the current turn surfaced
|
|
516
|
+
* nothing.
|
|
472
517
|
*/
|
|
473
|
-
function logEmptyTurn(store: Store, input: HookInput): void {
|
|
518
|
+
function logEmptyTurn(store: Store, input: HookInput, queryText?: string): void {
|
|
474
519
|
if (!input.sessionId) return;
|
|
475
520
|
try {
|
|
476
521
|
const turnIndex = (input as any)._turnIndex ?? 0;
|
|
477
|
-
logInjection(store, input.sessionId, "context-surfacing", [], 0, turnIndex);
|
|
522
|
+
logInjection(store, input.sessionId, "context-surfacing", [], 0, turnIndex, queryText);
|
|
478
523
|
} catch { /* non-fatal */ }
|
|
479
524
|
}
|
|
480
525
|
|
|
@@ -700,6 +745,105 @@ export function buildVaultContextInner(
|
|
|
700
745
|
return lines.join("\n");
|
|
701
746
|
}
|
|
702
747
|
|
|
748
|
+
// =============================================================================
|
|
749
|
+
// Ext 6b: Multi-turn prior-query lookback
|
|
750
|
+
// =============================================================================
|
|
751
|
+
|
|
752
|
+
/**
|
|
753
|
+
* Build the retrieval query from the current prompt plus up to `lookback`
|
|
754
|
+
* recent prior prompts from the same session within `maxAgeMinutes`.
|
|
755
|
+
*
|
|
756
|
+
* Returns the current prompt unchanged when:
|
|
757
|
+
* - no `sessionId` (nothing to scope by)
|
|
758
|
+
* - the `query_text` column is missing (pre-migration store)
|
|
759
|
+
* - no prior rows within the window / all NULL
|
|
760
|
+
* - any DB error (fail-open — never throws)
|
|
761
|
+
*
|
|
762
|
+
* The combined query format is
|
|
763
|
+
* `<current>\n\n<newest prior>\n\n<older prior>...`
|
|
764
|
+
* truncated to `MULTI_TURN_MAX_CHARS` with **current content preserved
|
|
765
|
+
* first** — so even when older priors would push the current prompt
|
|
766
|
+
* past the char limit, the truncation drops the tail (older priors),
|
|
767
|
+
* not the head. This guarantees the retrieval query always contains the
|
|
768
|
+
* user's current question verbatim.
|
|
769
|
+
*
|
|
770
|
+
* Exported for direct unit testing.
|
|
771
|
+
*/
|
|
772
|
+
export function buildMultiTurnSurfacingQuery(
|
|
773
|
+
store: Store,
|
|
774
|
+
sessionId: string,
|
|
775
|
+
currentQuery: string,
|
|
776
|
+
lookback: number = MULTI_TURN_LOOKBACK,
|
|
777
|
+
maxAgeMinutes: number = MULTI_TURN_MAX_AGE_MINUTES,
|
|
778
|
+
maxChars: number = MULTI_TURN_MAX_CHARS,
|
|
779
|
+
): string {
|
|
780
|
+
if (!sessionId || currentQuery.length === 0) return currentQuery;
|
|
781
|
+
|
|
782
|
+
let priors: string[] = [];
|
|
783
|
+
try {
|
|
784
|
+
// ISO 8601 cutoff computed in JS (same lesson as the v0.8.0
|
|
785
|
+
// countRecentContextUsages fix — datetime('now', ...) returns a
|
|
786
|
+
// space-separated string that sorts incorrectly against the
|
|
787
|
+
// T-separated ISO 8601 timestamps stored in context_usage).
|
|
788
|
+
const cutoff = new Date(Date.now() - maxAgeMinutes * 60 * 1000).toISOString();
|
|
789
|
+
// Self-match guard lives in SQL so a duplicate submit/retry cannot eat
|
|
790
|
+
// into the lookback budget. Turn 18 review found that filtering in
|
|
791
|
+
// application code with `LIMIT lookback + 1` under-fills when multiple
|
|
792
|
+
// prior rows carry the same text as the current prompt — the SELECT
|
|
793
|
+
// returned only `lookback + 1` rows and application-level skipping
|
|
794
|
+
// then dropped legitimate distinct priors along with the dupes.
|
|
795
|
+
// Pushing the inequality into WHERE means every returned row is a
|
|
796
|
+
// valid non-self prior and the LIMIT == lookback fits exactly.
|
|
797
|
+
const rows = store.db.prepare(
|
|
798
|
+
`SELECT query_text FROM context_usage
|
|
799
|
+
WHERE session_id = ?
|
|
800
|
+
AND hook_name = 'context-surfacing'
|
|
801
|
+
AND timestamp > ?
|
|
802
|
+
AND query_text IS NOT NULL
|
|
803
|
+
AND query_text != ''
|
|
804
|
+
AND query_text != ?
|
|
805
|
+
ORDER BY id DESC
|
|
806
|
+
LIMIT ?`,
|
|
807
|
+
).all(sessionId, cutoff, currentQuery, lookback) as { query_text: string }[];
|
|
808
|
+
|
|
809
|
+
for (const row of rows) {
|
|
810
|
+
if (!row.query_text) continue;
|
|
811
|
+
priors.push(row.query_text);
|
|
812
|
+
}
|
|
813
|
+
} catch {
|
|
814
|
+
// query_text column may be missing on a pre-migration store, or
|
|
815
|
+
// the DB might be in a corrupted state — fall back to current-only.
|
|
816
|
+
return currentQuery;
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
if (priors.length === 0) return currentQuery;
|
|
820
|
+
|
|
821
|
+
// Assemble newest-first: current first, then newest prior, then older.
|
|
822
|
+
// The SQL already ordered rows DESC by id, so `priors[0]` is the newest.
|
|
823
|
+
const segments = [currentQuery, ...priors];
|
|
824
|
+
const combined = segments.join("\n\n");
|
|
825
|
+
|
|
826
|
+
if (combined.length <= maxChars) return combined;
|
|
827
|
+
|
|
828
|
+
// Over budget. Current query ALWAYS wins — include the full current
|
|
829
|
+
// prompt first, then add priors newest-first until the budget runs out.
|
|
830
|
+
// If the current prompt alone is already over budget, return it
|
|
831
|
+
// truncated (same as pre-v0.8.1 behavior — MAX_QUERY_LENGTH is
|
|
832
|
+
// enforced earlier in the handler so this branch is rare).
|
|
833
|
+
if (currentQuery.length >= maxChars) return currentQuery.slice(0, maxChars);
|
|
834
|
+
|
|
835
|
+
const parts: string[] = [currentQuery];
|
|
836
|
+
let used = currentQuery.length;
|
|
837
|
+
const separator = "\n\n";
|
|
838
|
+
for (const prior of priors) {
|
|
839
|
+
const cost = separator.length + prior.length;
|
|
840
|
+
if (used + cost > maxChars) break;
|
|
841
|
+
parts.push(prior);
|
|
842
|
+
used += cost;
|
|
843
|
+
}
|
|
844
|
+
return parts.join(separator);
|
|
845
|
+
}
|
|
846
|
+
|
|
703
847
|
/**
|
|
704
848
|
* Check if the agent should be nudged to use lifecycle tools.
|
|
705
849
|
* Returns true if N+ context-surfacing invocations have occurred since the
|
package/src/hooks.ts
CHANGED
|
@@ -379,6 +379,12 @@ export function smartTruncate(text: string, maxChars: number = 300): string {
|
|
|
379
379
|
|
|
380
380
|
/**
|
|
381
381
|
* Log a context injection to the usage tracking table.
|
|
382
|
+
*
|
|
383
|
+
* `queryText` (v0.8.1 Ext 6b) is the raw prompt for this turn. Persisted
|
|
384
|
+
* only when the caller passes it — logEmptyTurn-style skip paths omit it
|
|
385
|
+
* so gated turns (slash commands, heartbeats, noise) cannot leak raw
|
|
386
|
+
* prompt text into `context_usage.query_text`. Pre-migration stores
|
|
387
|
+
* transparently drop the column via `insertUsageFn`'s feature-detect.
|
|
382
388
|
*/
|
|
383
389
|
export function logInjection(
|
|
384
390
|
store: Store,
|
|
@@ -386,7 +392,8 @@ export function logInjection(
|
|
|
386
392
|
hookName: string,
|
|
387
393
|
injectedPaths: string[],
|
|
388
394
|
estimatedTokens: number,
|
|
389
|
-
turnIndex?: number
|
|
395
|
+
turnIndex?: number,
|
|
396
|
+
queryText?: string
|
|
390
397
|
): number {
|
|
391
398
|
try {
|
|
392
399
|
const usageId = store.insertUsage({
|
|
@@ -397,6 +404,7 @@ export function logInjection(
|
|
|
397
404
|
estimatedTokens,
|
|
398
405
|
wasReferenced: 0,
|
|
399
406
|
turnIndex,
|
|
407
|
+
queryText,
|
|
400
408
|
});
|
|
401
409
|
|
|
402
410
|
// Record co-activation for all injected paths (E3)
|
package/src/maintenance.ts
CHANGED
|
@@ -77,6 +77,37 @@ const DEFAULT_CONFIG: Required<Omit<HeavyMaintenanceConfig, "workerName" | "cloc
|
|
|
77
77
|
|
|
78
78
|
const DEFAULT_WORKER_NAME = "heavy-maintenance";
|
|
79
79
|
|
|
80
|
+
/**
|
|
81
|
+
* Parse a `HeavyMaintenanceConfig` from `Bun.env` (v0.8.2). Shared by every
|
|
82
|
+
* host that can start the heavy lane (`cmdMcp` in mcp.ts, `cmdWatch` in
|
|
83
|
+
* clawmem.ts) so the env var convention stays in one place. Each field is
|
|
84
|
+
* left undefined when its env var is unset, so `DEFAULT_CONFIG` continues
|
|
85
|
+
* to drive any field the operator did not explicitly override.
|
|
86
|
+
*/
|
|
87
|
+
export function parseHeavyLaneConfigFromEnv(): HeavyMaintenanceConfig {
|
|
88
|
+
return {
|
|
89
|
+
intervalMs: Bun.env.CLAWMEM_HEAVY_LANE_INTERVAL
|
|
90
|
+
? parseInt(Bun.env.CLAWMEM_HEAVY_LANE_INTERVAL, 10)
|
|
91
|
+
: undefined,
|
|
92
|
+
windowStartHour: Bun.env.CLAWMEM_HEAVY_LANE_WINDOW_START
|
|
93
|
+
? parseInt(Bun.env.CLAWMEM_HEAVY_LANE_WINDOW_START, 10)
|
|
94
|
+
: null,
|
|
95
|
+
windowEndHour: Bun.env.CLAWMEM_HEAVY_LANE_WINDOW_END
|
|
96
|
+
? parseInt(Bun.env.CLAWMEM_HEAVY_LANE_WINDOW_END, 10)
|
|
97
|
+
: null,
|
|
98
|
+
maxContextUsagesPer10m: Bun.env.CLAWMEM_HEAVY_LANE_MAX_USAGES
|
|
99
|
+
? parseInt(Bun.env.CLAWMEM_HEAVY_LANE_MAX_USAGES, 10)
|
|
100
|
+
: undefined,
|
|
101
|
+
staleObservationLimit: Bun.env.CLAWMEM_HEAVY_LANE_OBS_LIMIT
|
|
102
|
+
? parseInt(Bun.env.CLAWMEM_HEAVY_LANE_OBS_LIMIT, 10)
|
|
103
|
+
: undefined,
|
|
104
|
+
staleDeductiveLimit: Bun.env.CLAWMEM_HEAVY_LANE_DED_LIMIT
|
|
105
|
+
? parseInt(Bun.env.CLAWMEM_HEAVY_LANE_DED_LIMIT, 10)
|
|
106
|
+
: undefined,
|
|
107
|
+
useSurprisalSelector: Bun.env.CLAWMEM_HEAVY_LANE_SURPRISAL === "true",
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
80
111
|
// =============================================================================
|
|
81
112
|
// Journal helpers
|
|
82
113
|
// =============================================================================
|
|
@@ -503,7 +534,7 @@ export function startHeavyMaintenanceWorker(
|
|
|
503
534
|
store: Store,
|
|
504
535
|
llm: LlamaCpp,
|
|
505
536
|
cfg: HeavyMaintenanceConfig = {},
|
|
506
|
-
): () => void {
|
|
537
|
+
): () => Promise<void> {
|
|
507
538
|
const merged = { ...DEFAULT_CONFIG, ...cfg };
|
|
508
539
|
// Clamp interval to minimum 30 seconds so buggy configs can't pin the CPU.
|
|
509
540
|
const interval = Math.max(30_000, merged.intervalMs);
|
|
@@ -530,11 +561,35 @@ export function startHeavyMaintenanceWorker(
|
|
|
530
561
|
}, interval);
|
|
531
562
|
heavyTimer.unref();
|
|
532
563
|
|
|
533
|
-
|
|
564
|
+
// v0.8.2 — async stop handle. Clears the timer AND awaits any in-flight
|
|
565
|
+
// tick before resolving, so callers can safely close the store afterward
|
|
566
|
+
// without yanking the DB from under a mid-tick worker. Bounded wait —
|
|
567
|
+
// a pathologically stuck tick cannot wedge shutdown indefinitely; the
|
|
568
|
+
// worker_leases TTL upsert reclaims any stranded lease on the next
|
|
569
|
+
// process startup.
|
|
570
|
+
return async () => {
|
|
534
571
|
if (heavyTimer) {
|
|
535
572
|
clearInterval(heavyTimer);
|
|
536
573
|
heavyTimer = null;
|
|
574
|
+
console.log("[heavy-lane] Worker stop signaled — draining in-flight tick");
|
|
575
|
+
}
|
|
576
|
+
const deadline = Date.now() + HEAVY_STOP_DRAIN_TIMEOUT_MS;
|
|
577
|
+
while (heavyRunning && Date.now() < deadline) {
|
|
578
|
+
await new Promise<void>((resolve) => setTimeout(resolve, 50));
|
|
579
|
+
}
|
|
580
|
+
if (heavyRunning) {
|
|
581
|
+
console.log(
|
|
582
|
+
`[heavy-lane] Worker stop drain timed out after ${HEAVY_STOP_DRAIN_TIMEOUT_MS}ms — tick still running`,
|
|
583
|
+
);
|
|
584
|
+
} else {
|
|
537
585
|
console.log("[heavy-lane] Worker stopped");
|
|
538
586
|
}
|
|
539
587
|
};
|
|
540
588
|
}
|
|
589
|
+
|
|
590
|
+
/**
|
|
591
|
+
* v0.8.2 — bounded wait for in-flight heavy-lane tick during shutdown.
|
|
592
|
+
* 30 seconds covers a Phase 2 + Phase 3 stack with reasonable LLM latencies
|
|
593
|
+
* before falling back to the worker_leases TTL reclaim path.
|
|
594
|
+
*/
|
|
595
|
+
const HEAVY_STOP_DRAIN_TIMEOUT_MS = 30_000;
|
package/src/mcp.ts
CHANGED
|
@@ -39,7 +39,10 @@ import { classifyIntent, decomposeQuery, extractTemporalConstraint, type IntentT
|
|
|
39
39
|
import { adaptiveTraversal, mergeTraversalResults, mpfpTraversal } from "./graph-traversal.ts";
|
|
40
40
|
import { getDefaultLlamaCpp } from "./llm.ts";
|
|
41
41
|
import { startConsolidationWorker, stopConsolidationWorker } from "./consolidation.ts";
|
|
42
|
-
import {
|
|
42
|
+
import {
|
|
43
|
+
parseHeavyLaneConfigFromEnv,
|
|
44
|
+
startHeavyMaintenanceWorker,
|
|
45
|
+
} from "./maintenance.ts";
|
|
43
46
|
import { listVaults, loadVaultConfig } from "./config.ts";
|
|
44
47
|
import { getEntityGraphNeighbors, searchEntities } from "./entity.ts";
|
|
45
48
|
|
|
@@ -2595,8 +2598,37 @@ This is the recommended entry point for ALL memory queries.`,
|
|
|
2595
2598
|
await server.connect(transport);
|
|
2596
2599
|
|
|
2597
2600
|
// ---------------------------------------------------------------------------
|
|
2598
|
-
//
|
|
2599
|
-
// ---------------------------------------------------------------------------
|
|
2601
|
+
// Shutdown wiring + Workers
|
|
2602
|
+
// ---------------------------------------------------------------------------
|
|
2603
|
+
|
|
2604
|
+
// v0.8.2 Codex Turn 2 fix: register signal handlers BEFORE any worker
|
|
2605
|
+
// startup, mirroring the same null-handle capture pattern that cmdWatch
|
|
2606
|
+
// uses. The handler is the only thing that suppresses Node's default
|
|
2607
|
+
// signal action (terminate), so a SIGTERM arriving in the brief window
|
|
2608
|
+
// between worker startup and `process.on(...)` registration would
|
|
2609
|
+
// exit-143 the process and skip the async drain entirely, leaking any
|
|
2610
|
+
// lease the worker had just acquired. Capturing `stopHeavyLane` as a
|
|
2611
|
+
// mutable closure variable lets the registration happen before the
|
|
2612
|
+
// worker is actually created — the handler reads whatever value is
|
|
2613
|
+
// bound at the moment a signal arrives.
|
|
2614
|
+
let stopHeavyLane: (() => Promise<void>) | null = null;
|
|
2615
|
+
|
|
2616
|
+
// Signal handlers for graceful shutdown. async stop sequence: both
|
|
2617
|
+
// worker stops await any in-flight tick before resolving so the store
|
|
2618
|
+
// is not closed underneath a mid-tick worker. Bounded waits inside the
|
|
2619
|
+
// stop functions guarantee the handler cannot wedge indefinitely.
|
|
2620
|
+
const shutdownMcp = async (signal: string) => {
|
|
2621
|
+
console.error(`\n[mcp] Received ${signal}, shutting down...`);
|
|
2622
|
+
if (stopHeavyLane) {
|
|
2623
|
+
await stopHeavyLane();
|
|
2624
|
+
stopHeavyLane = null;
|
|
2625
|
+
}
|
|
2626
|
+
await stopConsolidationWorker();
|
|
2627
|
+
closeAllStores();
|
|
2628
|
+
process.exit(0);
|
|
2629
|
+
};
|
|
2630
|
+
process.on("SIGINT", () => { void shutdownMcp("SIGINT"); });
|
|
2631
|
+
process.on("SIGTERM", () => { void shutdownMcp("SIGTERM"); });
|
|
2600
2632
|
|
|
2601
2633
|
// Start consolidation worker if enabled
|
|
2602
2634
|
if (Bun.env.CLAWMEM_ENABLE_CONSOLIDATION === "true") {
|
|
@@ -2609,49 +2641,25 @@ This is the recommended entry point for ALL memory queries.`,
|
|
|
2609
2641
|
// longer interval than the light lane, only inside a configurable quiet
|
|
2610
2642
|
// window, and gated by context_usage query-rate so interactive sessions
|
|
2611
2643
|
// are never starved. Off by default.
|
|
2612
|
-
|
|
2644
|
+
//
|
|
2645
|
+
// v0.8.2: warn when this lane is enabled on a stdio MCP host. Per-session
|
|
2646
|
+
// MCPs spawned by Claude Code die with the session, which means the
|
|
2647
|
+
// configured quiet window may never see a live worker if no Claude Code
|
|
2648
|
+
// session is open at that time. The watcher service (`clawmem watch`) is
|
|
2649
|
+
// the canonical long-lived host for the heavy lane as of v0.8.2 — see
|
|
2650
|
+
// docs/concepts/architecture.md and docs/guides/upgrading.md for the
|
|
2651
|
+
// dual-host rationale.
|
|
2613
2652
|
if (Bun.env.CLAWMEM_HEAVY_LANE === "true") {
|
|
2653
|
+
console.error(
|
|
2654
|
+
"[mcp] WARNING: CLAWMEM_HEAVY_LANE=true on a stdio MCP host. " +
|
|
2655
|
+
"Per-session MCPs are short-lived; the configured quiet window may " +
|
|
2656
|
+
"never see a live worker. As of v0.8.2 the canonical heavy-lane host " +
|
|
2657
|
+
"is `clawmem watch` (e.g. systemd user unit clawmem-watcher.service). " +
|
|
2658
|
+
"Set the same env var on the watcher service for reliable operation.",
|
|
2659
|
+
);
|
|
2614
2660
|
const llm = getDefaultLlamaCpp();
|
|
2615
|
-
|
|
2616
|
-
intervalMs: Bun.env.CLAWMEM_HEAVY_LANE_INTERVAL
|
|
2617
|
-
? parseInt(Bun.env.CLAWMEM_HEAVY_LANE_INTERVAL, 10)
|
|
2618
|
-
: undefined,
|
|
2619
|
-
windowStartHour: Bun.env.CLAWMEM_HEAVY_LANE_WINDOW_START
|
|
2620
|
-
? parseInt(Bun.env.CLAWMEM_HEAVY_LANE_WINDOW_START, 10)
|
|
2621
|
-
: null,
|
|
2622
|
-
windowEndHour: Bun.env.CLAWMEM_HEAVY_LANE_WINDOW_END
|
|
2623
|
-
? parseInt(Bun.env.CLAWMEM_HEAVY_LANE_WINDOW_END, 10)
|
|
2624
|
-
: null,
|
|
2625
|
-
maxContextUsagesPer10m: Bun.env.CLAWMEM_HEAVY_LANE_MAX_USAGES
|
|
2626
|
-
? parseInt(Bun.env.CLAWMEM_HEAVY_LANE_MAX_USAGES, 10)
|
|
2627
|
-
: undefined,
|
|
2628
|
-
staleObservationLimit: Bun.env.CLAWMEM_HEAVY_LANE_OBS_LIMIT
|
|
2629
|
-
? parseInt(Bun.env.CLAWMEM_HEAVY_LANE_OBS_LIMIT, 10)
|
|
2630
|
-
: undefined,
|
|
2631
|
-
staleDeductiveLimit: Bun.env.CLAWMEM_HEAVY_LANE_DED_LIMIT
|
|
2632
|
-
? parseInt(Bun.env.CLAWMEM_HEAVY_LANE_DED_LIMIT, 10)
|
|
2633
|
-
: undefined,
|
|
2634
|
-
useSurprisalSelector: Bun.env.CLAWMEM_HEAVY_LANE_SURPRISAL === "true",
|
|
2635
|
-
};
|
|
2636
|
-
stopHeavyLane = startHeavyMaintenanceWorker(store, llm, cfg);
|
|
2661
|
+
stopHeavyLane = startHeavyMaintenanceWorker(store, llm, parseHeavyLaneConfigFromEnv());
|
|
2637
2662
|
}
|
|
2638
|
-
|
|
2639
|
-
// Signal handlers for graceful shutdown
|
|
2640
|
-
process.on("SIGINT", () => {
|
|
2641
|
-
console.error("\n[mcp] Received SIGINT, shutting down...");
|
|
2642
|
-
stopConsolidationWorker();
|
|
2643
|
-
if (stopHeavyLane) stopHeavyLane();
|
|
2644
|
-
closeAllStores();
|
|
2645
|
-
process.exit(0);
|
|
2646
|
-
});
|
|
2647
|
-
|
|
2648
|
-
process.on("SIGTERM", () => {
|
|
2649
|
-
console.error("\n[mcp] Received SIGTERM, shutting down...");
|
|
2650
|
-
stopConsolidationWorker();
|
|
2651
|
-
if (stopHeavyLane) stopHeavyLane();
|
|
2652
|
-
closeAllStores();
|
|
2653
|
-
process.exit(0);
|
|
2654
|
-
});
|
|
2655
2663
|
}
|
|
2656
2664
|
|
|
2657
2665
|
if (import.meta.main) {
|
package/src/store.ts
CHANGED
|
@@ -496,17 +496,36 @@ function initializeDatabase(db: Database): void {
|
|
|
496
496
|
injected_paths TEXT NOT NULL DEFAULT '[]',
|
|
497
497
|
estimated_tokens INTEGER NOT NULL DEFAULT 0,
|
|
498
498
|
was_referenced INTEGER NOT NULL DEFAULT 0,
|
|
499
|
-
turn_index INTEGER NOT NULL DEFAULT 0
|
|
499
|
+
turn_index INTEGER NOT NULL DEFAULT 0,
|
|
500
|
+
query_text TEXT
|
|
500
501
|
)
|
|
501
502
|
`);
|
|
502
503
|
db.exec(`CREATE INDEX IF NOT EXISTS idx_context_usage_session ON context_usage(session_id)`);
|
|
503
504
|
|
|
504
505
|
// Migration: add turn_index to existing context_usage
|
|
505
|
-
|
|
506
|
+
let cuCols = db.prepare("PRAGMA table_info(context_usage)").all() as { name: string }[];
|
|
506
507
|
if (!cuCols.some(c => c.name === "turn_index")) {
|
|
507
508
|
try { db.exec(`ALTER TABLE context_usage ADD COLUMN turn_index INTEGER NOT NULL DEFAULT 0`); } catch { /* exists */ }
|
|
509
|
+
cuCols = db.prepare("PRAGMA table_info(context_usage)").all() as { name: string }[];
|
|
508
510
|
}
|
|
509
511
|
|
|
512
|
+
// v0.8.1 Ext 6b: add nullable query_text column to existing context_usage
|
|
513
|
+
// so multi-turn lookback can persist the raw prompt alongside turn_index.
|
|
514
|
+
// The column is nullable and defaults to NULL — pre-migration rows are
|
|
515
|
+
// treated as "no prior query" by buildMultiTurnSurfacingQuery, preserving
|
|
516
|
+
// the current-prompt-only fallback for any session that predates v0.8.1.
|
|
517
|
+
if (!cuCols.some(c => c.name === "query_text")) {
|
|
518
|
+
try { db.exec(`ALTER TABLE context_usage ADD COLUMN query_text TEXT`); } catch { /* exists */ }
|
|
519
|
+
}
|
|
520
|
+
// Cache the column presence for insertUsageFn so it can build the INSERT
|
|
521
|
+
// statement without running PRAGMA table_info on every write path.
|
|
522
|
+
contextUsageHasQueryTextCache.set(
|
|
523
|
+
db,
|
|
524
|
+
db.prepare("PRAGMA table_info(context_usage)")
|
|
525
|
+
.all()
|
|
526
|
+
.some((c) => (c as { name: string }).name === "query_text"),
|
|
527
|
+
);
|
|
528
|
+
|
|
510
529
|
// Hook prompt dedupe: suppress duplicate/heartbeat prompts to reduce GPU churn.
|
|
511
530
|
db.exec(`
|
|
512
531
|
CREATE TABLE IF NOT EXISTS hook_dedupe (
|
|
@@ -895,6 +914,12 @@ function initializeDatabase(db: Database): void {
|
|
|
895
914
|
// Per-database dimension cache (WeakMap keyed by db object — no collisions for in-memory DBs)
|
|
896
915
|
const vecTableDimsCache = new WeakMap<Database, number>();
|
|
897
916
|
|
|
917
|
+
// v0.8.1 Ext 6b: per-database cache for the query_text column presence on
|
|
918
|
+
// context_usage. Set once at migration time so insertUsageFn can pick the
|
|
919
|
+
// correct INSERT shape without running PRAGMA on every write. Falls back
|
|
920
|
+
// to `false` (safe — equivalent to pre-migration behavior) when absent.
|
|
921
|
+
const contextUsageHasQueryTextCache = new WeakMap<Database, boolean>();
|
|
922
|
+
|
|
898
923
|
function ensureVecTableInternal(db: Database, dimensions: number): void {
|
|
899
924
|
if (vecTableDimsCache.get(db) === dimensions) return;
|
|
900
925
|
|
|
@@ -1722,6 +1747,13 @@ export type UsageRecord = {
|
|
|
1722
1747
|
estimatedTokens: number;
|
|
1723
1748
|
wasReferenced: number;
|
|
1724
1749
|
turnIndex?: number;
|
|
1750
|
+
/**
|
|
1751
|
+
* v0.8.1 Ext 6b: raw user prompt for this turn. Written when the caller
|
|
1752
|
+
* wants the row to be usable for multi-turn lookback retrieval. Persisted
|
|
1753
|
+
* via `insertUsageFn` only when the `query_text` column is present on
|
|
1754
|
+
* `context_usage` (pre-migration stores degrade to "no prior query").
|
|
1755
|
+
*/
|
|
1756
|
+
queryText?: string;
|
|
1725
1757
|
};
|
|
1726
1758
|
|
|
1727
1759
|
export type UsageRow = {
|
|
@@ -3939,10 +3971,33 @@ function getRecentSessionsFn(db: Database, limit: number): SessionRecord[] {
|
|
|
3939
3971
|
// =============================================================================
|
|
3940
3972
|
|
|
3941
3973
|
function insertUsageFn(db: Database, usage: UsageRecord): number {
|
|
3942
|
-
|
|
3943
|
-
|
|
3944
|
-
|
|
3945
|
-
|
|
3974
|
+
// v0.8.1 Ext 6b: write query_text when the column is present AND the
|
|
3975
|
+
// caller provided one. The column presence is cached at migration time
|
|
3976
|
+
// in contextUsageHasQueryTextCache — missing entries default to false
|
|
3977
|
+
// so ad-hoc DBs constructed outside createStore() degrade gracefully
|
|
3978
|
+
// to the pre-v0.8.1 INSERT shape.
|
|
3979
|
+
const hasQueryText = contextUsageHasQueryTextCache.get(db) ?? false;
|
|
3980
|
+
if (hasQueryText) {
|
|
3981
|
+
db.prepare(`
|
|
3982
|
+
INSERT INTO context_usage
|
|
3983
|
+
(session_id, timestamp, hook_name, injected_paths, estimated_tokens, was_referenced, turn_index, query_text)
|
|
3984
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
3985
|
+
`).run(
|
|
3986
|
+
usage.sessionId,
|
|
3987
|
+
usage.timestamp,
|
|
3988
|
+
usage.hookName,
|
|
3989
|
+
JSON.stringify(usage.injectedPaths),
|
|
3990
|
+
usage.estimatedTokens,
|
|
3991
|
+
usage.wasReferenced,
|
|
3992
|
+
usage.turnIndex ?? 0,
|
|
3993
|
+
usage.queryText ?? null,
|
|
3994
|
+
);
|
|
3995
|
+
} else {
|
|
3996
|
+
db.prepare(`
|
|
3997
|
+
INSERT INTO context_usage (session_id, timestamp, hook_name, injected_paths, estimated_tokens, was_referenced, turn_index)
|
|
3998
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
3999
|
+
`).run(usage.sessionId, usage.timestamp, usage.hookName, JSON.stringify(usage.injectedPaths), usage.estimatedTokens, usage.wasReferenced, usage.turnIndex ?? 0);
|
|
4000
|
+
}
|
|
3946
4001
|
// Return the rowid of the just-inserted row for recall event linkage
|
|
3947
4002
|
const row = db.prepare("SELECT last_insert_rowid() as id").get() as { id: number };
|
|
3948
4003
|
return row.id;
|