nlm-memory 0.4.2 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -34
- package/dist/cli/nlm.js +223 -33
- package/dist/cli/nlm.js.map +1 -1
- package/dist/core/adapters/cursor.d.ts +45 -0
- package/dist/core/adapters/cursor.js +397 -0
- package/dist/core/adapters/cursor.js.map +1 -0
- package/dist/core/adapters/from-source.js +10 -0
- package/dist/core/adapters/from-source.js.map +1 -1
- package/dist/core/adapters/windsurf.d.ts +44 -0
- package/dist/core/adapters/windsurf.js +299 -0
- package/dist/core/adapters/windsurf.js.map +1 -0
- package/dist/core/hook/claude-settings.d.ts +12 -5
- package/dist/core/hook/claude-settings.js +21 -6
- package/dist/core/hook/claude-settings.js.map +1 -1
- package/dist/core/sources/source-registry.d.ts +1 -1
- package/dist/core/sources/source-registry.js +18 -0
- package/dist/core/sources/source-registry.js.map +1 -1
- package/dist/core/storage/sqlite-session-store.d.ts +2 -0
- package/dist/core/storage/sqlite-session-store.js +38 -2
- package/dist/core/storage/sqlite-session-store.js.map +1 -1
- package/dist/hook/hook-auth.d.ts +13 -0
- package/dist/hook/hook-auth.js +19 -0
- package/dist/hook/hook-auth.js.map +1 -0
- package/dist/hook/prompt-recall-hook.js +7 -1
- package/dist/hook/prompt-recall-hook.js.map +1 -1
- package/dist/hook/session-start-hook.js +4 -1
- package/dist/hook/session-start-hook.js.map +1 -1
- package/dist/hook/stop-hook.js +4 -1
- package/dist/hook/stop-hook.js.map +1 -1
- package/dist/http/app.d.ts +2 -0
- package/dist/http/app.js +76 -1
- package/dist/http/app.js.map +1 -1
- package/dist/install/claude-code.js +1 -1
- package/dist/install/claude-code.js.map +1 -1
- package/dist/install/cursor.d.ts +25 -0
- package/dist/install/cursor.js +43 -0
- package/dist/install/cursor.js.map +1 -0
- package/dist/install/nlm-dir-perms.d.ts +19 -0
- package/dist/install/nlm-dir-perms.js +43 -0
- package/dist/install/nlm-dir-perms.js.map +1 -0
- package/dist/install/ollama.d.ts +18 -1
- package/dist/install/ollama.js +62 -7
- package/dist/install/ollama.js.map +1 -1
- package/dist/install/setup.d.ts +4 -0
- package/dist/install/setup.js +141 -18
- package/dist/install/setup.js.map +1 -1
- package/dist/install/windsurf.d.ts +25 -0
- package/dist/install/windsurf.js +43 -0
- package/dist/install/windsurf.js.map +1 -0
- package/dist/mcp/server.js +20 -1
- package/dist/mcp/server.js.map +1 -1
- package/dist/shared/types.d.ts +4 -0
- package/dist/ui/assets/{index-BA6IpU8g.css → index-Beo8psd-.css} +1 -1
- package/dist/ui/assets/index-CSPTTeeM.js +69 -0
- package/dist/ui/index.html +2 -2
- package/package.json +26 -1
- package/plugin/scripts/prompt-recall-hook.mjs +55 -4
- package/plugin/scripts/stop-hook.mjs +57 -6
- package/.agents/plugins/marketplace.json +0 -20
- package/.github/workflows/ci.yml +0 -30
- package/dist/ui/assets/index-B_qIVV0k.js +0 -69
- package/docs/methodology/re-derivation-rate.md +0 -112
- package/docs/methodology/useful-hit-rate.md +0 -79
- package/docs/plans/2026-05-20-fts5-lexical-recall.md +0 -1088
- package/docs/plans/2026-05-20-recall-daemon-wedge-fix.md +0 -662
- package/docs/plans/2026-05-20-recall-hook-design.md +0 -131
- package/docs/plans/2026-05-20-recall-hook-implementation.md +0 -1222
- package/docs/plans/desktop-product.md +0 -69
- package/docs/plans/factstore-design.md +0 -236
- package/logs/CHANGELOG/CHANGELOG-2026.md +0 -1389
- package/logs/CHANGELOG/CHANGELOG.md +0 -337
- package/migrations/000_initial_schema.sql +0 -174
- package/migrations/001_entity_type_rename.sql +0 -17
- package/migrations/002_adapter_state_extend.sql +0 -12
- package/migrations/003_session_embeddings.sql +0 -11
- package/migrations/004_facts.sql +0 -46
- package/migrations/005_sources.sql +0 -31
- package/migrations/006_providers.sql +0 -33
- package/migrations/007_source_tokens.sql +0 -17
- package/migrations/008_fts_rebuild.sql +0 -9
- package/migrations/009_session_embedding_chunks.sql +0 -46
- package/migrations/010_sources_opencode.sql +0 -30
- package/migrations/011_sources_hermes_agent.sql +0 -30
- package/migrations/012_sources_aider.sql +0 -30
- package/migrations/013_adapter_state_failure_count.sql +0 -12
- package/plugin-hermes-agent/README.md +0 -49
- package/plugin-hermes-agent/__init__.py +0 -75
- package/plugin-hermes-agent/plugin.yaml +0 -15
- package/scripts/backfill-citations.mjs +0 -0
- package/scripts/build-codex-plugin.mjs +0 -61
- package/scripts/deepseek-probe.mjs +0 -67
- package/scripts/extract-triples.mjs +0 -207
- package/scripts/longmemeval/embedding-cache.ts +0 -77
- package/scripts/longmemeval/fetch-dataset.sh +0 -25
- package/scripts/longmemeval/run-harness.ts +0 -315
- package/scripts/longmemeval/scorer.ts +0 -99
- package/scripts/longmemeval/tsconfig.json +0 -9
- package/scripts/longmemeval/types.ts +0 -35
- package/scripts/nlm-daily-digest.py +0 -239
- package/scripts/nlm-daily-digest.sh +0 -28
- package/src/cli/classify-parity.ts +0 -257
- package/src/cli/launchctl-helpers.ts +0 -49
- package/src/cli/nlm.ts +0 -885
- package/src/core/actions/actions-log.ts +0 -118
- package/src/core/actions/overlay.ts +0 -117
- package/src/core/adapters/aider.ts +0 -205
- package/src/core/adapters/claude-code.ts +0 -293
- package/src/core/adapters/common.ts +0 -54
- package/src/core/adapters/from-source.ts +0 -57
- package/src/core/adapters/hermes-agent.ts +0 -240
- package/src/core/adapters/hermes.ts +0 -277
- package/src/core/adapters/jsonl-generic.ts +0 -208
- package/src/core/adapters/opencode.ts +0 -281
- package/src/core/adapters/pi.ts +0 -264
- package/src/core/classifier/prompt.ts +0 -200
- package/src/core/dataset/build-dataset.ts +0 -463
- package/src/core/embedding/chunk-body.ts +0 -76
- package/src/core/embedding/embed-backfill.ts +0 -210
- package/src/core/embedding/embed-normalize.ts +0 -135
- package/src/core/facts/backfill-facts.ts +0 -254
- package/src/core/facts/extract-facts.ts +0 -50
- package/src/core/hook/citation-detect.ts +0 -124
- package/src/core/hook/cite-memo.ts +0 -68
- package/src/core/hook/claude-settings.ts +0 -166
- package/src/core/hook/gate.ts +0 -25
- package/src/core/hook/hook-log.ts +0 -41
- package/src/core/hook/memo-sweep.ts +0 -164
- package/src/core/hook/memo.ts +0 -67
- package/src/core/hook/pointer-block.ts +0 -26
- package/src/core/hook/select.ts +0 -32
- package/src/core/hook/transcript.ts +0 -121
- package/src/core/ingest/ingest-session.ts +0 -111
- package/src/core/providers/provider-models.ts +0 -100
- package/src/core/providers/provider-registry.ts +0 -196
- package/src/core/recall/citation-log.ts +0 -108
- package/src/core/recall/filter.ts +0 -27
- package/src/core/recall/index.ts +0 -6
- package/src/core/recall/match-fields.ts +0 -40
- package/src/core/recall/query-log.ts +0 -149
- package/src/core/recall/query-shape.ts +0 -66
- package/src/core/recall/recall-service.ts +0 -320
- package/src/core/recall/recent-log.ts +0 -59
- package/src/core/recall/tokenize.ts +0 -18
- package/src/core/recall/useful-scan.ts +0 -336
- package/src/core/recall-facts/fact-query-log.ts +0 -150
- package/src/core/recall-facts/fact-recall-service.ts +0 -327
- package/src/core/scheduler/scan-once.ts +0 -142
- package/src/core/scheduler/scheduler.ts +0 -225
- package/src/core/sources/source-registry.ts +0 -260
- package/src/core/storage/db-restore.ts +0 -133
- package/src/core/storage/live-status.ts +0 -45
- package/src/core/storage/migrate.ts +0 -72
- package/src/core/storage/sqlite-fact-store.ts +0 -304
- package/src/core/storage/sqlite-session-store.ts +0 -765
- package/src/hook/prompt-recall-hook.ts +0 -174
- package/src/hook/session-end-hook.ts +0 -81
- package/src/hook/session-start-hook.ts +0 -165
- package/src/hook/stop-hook.ts +0 -236
- package/src/http/app.ts +0 -1137
- package/src/install/claude-code.ts +0 -128
- package/src/install/codex.ts +0 -367
- package/src/install/hermes-agent.ts +0 -76
- package/src/install/hermes.ts +0 -78
- package/src/install/ollama.ts +0 -211
- package/src/install/setup.ts +0 -368
- package/src/llm/classifier-box.ts +0 -64
- package/src/llm/deepseek-client.ts +0 -150
- package/src/llm/env-autoload.ts +0 -55
- package/src/llm/ollama-client.ts +0 -189
- package/src/mcp/server.ts +0 -534
- package/src/ports/fact-store.ts +0 -102
- package/src/ports/llm-client.ts +0 -52
- package/src/ports/logger.ts +0 -16
- package/src/ports/session-store.ts +0 -45
- package/src/ports/transcript-adapter.ts +0 -55
- package/src/shared/types.ts +0 -145
- package/src/ui/App.tsx +0 -58
- package/src/ui/components/PromoteOpenButton.tsx +0 -65
- package/src/ui/components/SessionDrawer.tsx +0 -136
- package/src/ui/components/SideNav.tsx +0 -162
- package/src/ui/components/Skeleton.tsx +0 -107
- package/src/ui/index.html +0 -13
- package/src/ui/lib/actions.ts +0 -30
- package/src/ui/lib/api.ts +0 -92
- package/src/ui/lib/dataset.ts +0 -141
- package/src/ui/lib/registries.ts +0 -155
- package/src/ui/lib/view-settings.ts +0 -41
- package/src/ui/main.tsx +0 -15
- package/src/ui/pages/Live.tsx +0 -229
- package/src/ui/pages/Pulse.tsx +0 -415
- package/src/ui/pages/Recall.tsx +0 -190
- package/src/ui/pages/River.tsx +0 -308
- package/src/ui/pages/Search.tsx +0 -93
- package/src/ui/pages/Stub.tsx +0 -9
- package/src/ui/pages/Thread.tsx +0 -262
- package/src/ui/pages/settings/Classifier.tsx +0 -227
- package/src/ui/pages/settings/Data.tsx +0 -190
- package/src/ui/pages/settings/Index.tsx +0 -65
- package/src/ui/pages/settings/Labels.tsx +0 -224
- package/src/ui/pages/settings/Providers.tsx +0 -305
- package/src/ui/pages/settings/SettingsSubnav.tsx +0 -28
- package/src/ui/pages/settings/Sources.tsx +0 -326
- package/src/ui/pages/settings/Views.tsx +0 -96
- package/src/ui/styles.css +0 -1766
- package/src/ui/tsconfig.json +0 -21
- package/src/ui/vite.config.ts +0 -19
- package/tests/fixtures/claude_code/short_session.jsonl +0 -2
- package/tests/fixtures/claude_code/standard_iso.jsonl +0 -4
- package/tests/fixtures/claude_code/tool_heavy.jsonl +0 -8
- package/tests/fixtures/claude_code/with_subagent.jsonl +0 -7
- package/tests/fixtures/facts.ts +0 -17
- package/tests/fixtures/golden-corpus.ts +0 -85
- package/tests/fixtures/hermes/paired_request_dump.json +0 -24
- package/tests/fixtures/hermes/paired_session.json +0 -23
- package/tests/fixtures/hermes/request_dump.json +0 -28
- package/tests/fixtures/hermes/session_iso.json +0 -38
- package/tests/fixtures/hermes/session_unix.json +0 -38
- package/tests/fixtures/hermes/system_only.json +0 -18
- package/tests/fixtures/pi/error-connection-abort.jsonl +0 -8
- package/tests/fixtures/pi/short-successful.jsonl +0 -5
- package/tests/fixtures/pi/with-custom-message.jsonl +0 -6
- package/tests/fixtures/sessions.ts +0 -22
- package/tests/integration/backfill-facts.test.ts +0 -362
- package/tests/integration/citation-explicit.test.ts +0 -111
- package/tests/integration/cite-event.test.ts +0 -169
- package/tests/integration/cite-memo.test.ts +0 -87
- package/tests/integration/db-restore.test.ts +0 -153
- package/tests/integration/embed-backfill.test.ts +0 -176
- package/tests/integration/fact-supersedence.test.ts +0 -313
- package/tests/integration/fts-index.test.ts +0 -60
- package/tests/integration/getbyids-sqlite.test.ts +0 -60
- package/tests/integration/hermes-agent-hooks.test.ts +0 -248
- package/tests/integration/hook-claude-settings.test.ts +0 -205
- package/tests/integration/hook-log.test.ts +0 -54
- package/tests/integration/hook-memo.test.ts +0 -68
- package/tests/integration/hook-pre-compact.test.ts +0 -105
- package/tests/integration/hook-subagent-start.test.ts +0 -102
- package/tests/integration/http.test.ts +0 -401
- package/tests/integration/keyword-search-fts.test.ts +0 -66
- package/tests/integration/mcp-recall-logging.test.ts +0 -88
- package/tests/integration/mcp.test.ts +0 -248
- package/tests/integration/memo-sweep.test.ts +0 -91
- package/tests/integration/prompt-recall-hook.test.ts +0 -88
- package/tests/integration/provider-registry.test.ts +0 -107
- package/tests/integration/recall-golden.test.ts +0 -59
- package/tests/integration/recall-sqlite.test.ts +0 -169
- package/tests/integration/scheduler.test.ts +0 -391
- package/tests/integration/session-end-hook.test.ts +0 -48
- package/tests/integration/session-start-hook.test.ts +0 -126
- package/tests/integration/source-registry.test.ts +0 -120
- package/tests/integration/sqlite-fact-store.test.ts +0 -346
- package/tests/integration/stop-hook.test.ts +0 -560
- package/tests/integration/wal-checkpoint.test.ts +0 -49
- package/tests/unit/cli/launchctl-helpers.test.ts +0 -60
- package/tests/unit/core/adapters/aider.test.ts +0 -230
- package/tests/unit/core/adapters/claude-code.test.ts +0 -118
- package/tests/unit/core/adapters/hermes-agent.test.ts +0 -329
- package/tests/unit/core/adapters/hermes.test.ts +0 -81
- package/tests/unit/core/adapters/jsonl-generic.test.ts +0 -142
- package/tests/unit/core/adapters/opencode.test.ts +0 -354
- package/tests/unit/core/adapters/pi.test.ts +0 -110
- package/tests/unit/core/classifier/prompt.test.ts +0 -126
- package/tests/unit/core/embedding/chunk-body.test.ts +0 -100
- package/tests/unit/core/facts/extract-facts.test.ts +0 -117
- package/tests/unit/core/filter.test.ts +0 -40
- package/tests/unit/core/hook/citation-detect-cite-session.test.ts +0 -96
- package/tests/unit/core/hook/citation-detect.test.ts +0 -124
- package/tests/unit/core/hook/gate.test.ts +0 -29
- package/tests/unit/core/hook/pointer-block.test.ts +0 -22
- package/tests/unit/core/hook/select.test.ts +0 -66
- package/tests/unit/core/match-fields.test.ts +0 -39
- package/tests/unit/core/mcp-cite-session.test.ts +0 -51
- package/tests/unit/core/providers/provider-models.test.ts +0 -101
- package/tests/unit/core/query-shape.test.ts +0 -92
- package/tests/unit/core/recall-facts/fact-recall-service.test.ts +0 -258
- package/tests/unit/core/recall-service.test.ts +0 -200
- package/tests/unit/core/storage/live-status.test.ts +0 -54
- package/tests/unit/core/tokenize.test.ts +0 -32
- package/tests/unit/core/useful-scan.test.ts +0 -537
- package/tests/unit/llm/embed.test.ts +0 -93
- package/tests/unit/llm/ollama-client.test.ts +0 -124
- package/tests/unit/scripts/longmemeval-scorer.test.ts +0 -114
- package/tsconfig.json +0 -31
- package/tsconfig.test.json +0 -11
- package/vitest.config.ts +0 -22
|
@@ -1,337 +0,0 @@
|
|
|
1
|
-
# nlm-memory-ts CHANGELOG
|
|
2
|
-
|
|
3
|
-
Session-level log per session protocol. Cap: 10 entries — archive older to `CHANGELOG-YYYY.md` when exceeded.
|
|
4
|
-
|
|
5
|
-
## 2026-05-29 — Security hardening: bind address, timing-safe auth, backup/restore gate
|
|
6
|
-
|
|
7
|
-
**Changes:**
|
|
8
|
-
- `src/cli/nlm.ts`: `serve()` now passes `hostname: "127.0.0.1"` — daemon no longer listens on all interfaces. Verified: `lsof` shows `127.0.0.1:3940` only.
|
|
9
|
-
- `src/http/app.ts`: `/mcp` bearer token comparison replaced with `timingSafeEqual` (was `!==`).
|
|
10
|
-
- `src/http/app.ts`: `/api/data/backup` and `/api/data/restore` now require `Authorization: Bearer <NLM_MCP_TOKEN>` when that env var is set. When unset (local-only use), the `127.0.0.1` bind is the guard.
|
|
11
|
-
- Import: `timingSafeEqual` from `node:crypto` added.
|
|
12
|
-
|
|
13
|
-
**Findings that prompted this (peer-reviewed audit 2026-05-29):**
|
|
14
|
-
- MEDIUM: `0.0.0.0` binding exposed all HTTP endpoints to LAN — root finding that amplified everything else.
|
|
15
|
-
- MEDIUM: backup/restore unauthenticated — full DB exfiltration from LAN with one HTTP call (peer reviewer caught this; primary auditor missed).
|
|
16
|
-
- LOW: non-timing-safe bearer comparison on `/mcp`.
|
|
17
|
-
|
|
18
|
-
**State:** 543 tests passing (no regressions). Daemon reloaded; bind address confirmed.
|
|
19
|
-
|
|
20
|
-
**Next:** Cursor adapter (NocoDB task #182), Windsurf adapter (#183). Deferred: training pipeline (#185, needs >50 useful hits).
|
|
21
|
-
|
|
22
|
-
## 2026-05-28 — Scheduler failure backoff (a5c29b0)
|
|
23
|
-
|
|
24
|
-
**Bug fixed: infinite classify retry storm**
|
|
25
|
-
|
|
26
|
-
Session `517ca931.jsonl` (230KB, Whtnxt Agent project) was failing DeepSeek classification every 30-min tick because no `adapter_state` row was ever written on failure — so `scanOnce` saw a "new" file every tick. The daemon log showed hundreds of `[scheduler] classifier error for cc_517ca931 — skipping` with no error detail.
|
|
27
|
-
|
|
28
|
-
Migration 013 adds `failure_count INTEGER DEFAULT 0` to `adapter_state`. `recordFailed()` increments `failure_count` and writes the current `file_size`. `scanOnce` skips files whose size hasn't changed, whether clean or failed — so any stuck file stops retrying until new content arrives. When the file grows, `failure_count` resets to 0.
|
|
29
|
-
|
|
30
|
-
The scheduler now logs the actual error message and a `failure N/M` counter (e.g. `error: LLM unreachable: deepseek for cc_517ca931 (failure 1/3)`). The real error was `LLM unreachable: deepseek` — now diagnosable. 2 new integration tests; 543 total passing.
|
|
31
|
-
|
|
32
|
-
**State:** daemon reloaded, migration applied, first retry logged correctly. Session `517ca931` will hit ceiling at failure 3/3 and stop.
|
|
33
|
-
|
|
34
|
-
**Outstanding:** npm publish v0.4.0 (needs `npm login` in terminal). Scheduler (Phase D) is already fully implemented in `nlm start` — confirm via NocoDB task #184 whether anything remains scoped there.
|
|
35
|
-
|
|
36
|
-
## 2026-05-28 — Aider adapter + useful-scan cron + v0.4.0 release (ece591a, a222f44, 708f49d)
|
|
37
|
-
|
|
38
|
-
Three items shipped in this session.
|
|
39
|
-
|
|
40
|
-
**Aider TranscriptAdapter (ece591a)**
|
|
41
|
-
|
|
42
|
-
`AiderAdapter` reads `.aider.chat.history.md` (or `$AIDER_CHAT_HISTORY_FILE`). A single file may contain multiple sessions, each opened by a `# aider chat started at YYYY-MM-DD HH:MM:SS` header. User turns are H4 headings (`#### ...`); assistant responses are plain text; blockquote lines (`> ...`) become `[tool_action: ...]` summaries. Session IDs derive from the header timestamp as `ai_YYYYMMDD_HHMMSS`. `endedAt` uses the next session's `startedAt` when available (no per-turn timestamps in markdown). Migration `012_sources_aider.sql` adds `'aider'` to the `sources.kind` CHECK constraint. `seedDefaults()` now seeds 6 presets. 21 new unit tests; 541 total pass.
|
|
43
|
-
|
|
44
|
-
**useful-scan cron wiring (a222f44)**
|
|
45
|
-
|
|
46
|
-
`nlm-daily-digest.sh` now calls `nlm useful-scan --days 1` before the Python stats fetch so `useful_hit_rate` is populated in the Telegram digest instead of showing "pending". Scan output goes to `logs/daily-digest/useful-scan.log`; `|| true` keeps the digest alive on scan failure. Dry-run confirmed: 161 recalls scanned, 0% useful (accurate — no `cite_session` calls yet).
|
|
47
|
-
|
|
48
|
-
**v0.4.0 (708f49d)**
|
|
49
|
-
|
|
50
|
-
Version bump and `git tag v0.4.0`. npm publish requires `npm login` — tag is set, publish when authenticated.
|
|
51
|
-
|
|
52
|
-
**State:** build clean, 541 tests green. Outstanding: npm publish (needs `npm login`); C2 Aider and useful-scan cron are now complete.
|
|
53
|
-
|
|
54
|
-
**Next:** README is rewritten. No outstanding P1s remaining. Consider: npm publish, prose-citation soft labels (deferred), or new feature work.
|
|
55
|
-
|
|
56
|
-
## 2026-05-28 — HermesAgentAdapter: TranscriptAdapter for NousResearch Hermes Agent (7b9074b)
|
|
57
|
-
|
|
58
|
-
`src/core/adapters/hermes-agent.ts` — reads `~/.hermes/state.db` (WAL mode, schema v11). Extracts user/assistant/tool turns; tool calls in assistant messages summarized as `[tool_use: <name>]`; tool-role messages summarized as `[tool_result: <name>: <preview>]`; system messages skipped. Label from session `title` field, fallback to first user turn. DB path overridable via `NLM_HERMES_AGENT_DB_PATH` or `HERMES_HOME`.
|
|
59
|
-
|
|
60
|
-
**Migration 011** (`migrations/011_sources_hermes_agent.sql`) adds `'hermes-agent'` to the `sources.kind` CHECK constraint (same rename-recreate-copy pattern as migration 010).
|
|
61
|
-
|
|
62
|
-
**Source registry:** `SourceKind` extended; `seedDefaults()` now seeds 5 presets (hermes-agent inserted between hermes and opencode, auto-enabled if state.db exists).
|
|
63
|
-
|
|
64
|
-
**`from-source.ts`:** `'hermes-agent'` case added, delegates to `HermesAgentAdapter`.
|
|
65
|
-
|
|
66
|
-
**Tests: 520 pass** (was 501, +19 new in `tests/unit/core/adapters/hermes-agent.test.ts`). `source-registry.test.ts` updated for 5 presets. Build clean, typecheck clean.
|
|
67
|
-
|
|
68
|
-
**State:** NousResearch Hermes Agent is now fully integrated — plugin hooks (pre/post-turn, session lifecycle) from the previous session + transcript indexing from this session. End-to-end: sessions indexed in SQLite → recalled by the daemon → injected into Hermes Agent via the Python plugin.
|
|
69
|
-
|
|
70
|
-
**Next:** C2 Aider adapter; B3 extract-triples improvements.
|
|
71
|
-
|
|
72
|
-
## 2026-05-28 — NousResearch Hermes adapter + README rewrite
|
|
73
|
-
|
|
74
|
-
**README rewrite**
|
|
75
|
-
|
|
76
|
-
Lead with the three moats (cross-runtime reach, editable timeline, 97.2% R@5). Dropped "self-improving accuracy" framing. Added OpenCode to the shipped runtime list. Added `nlm connect hermes-agent` to the install table.
|
|
77
|
-
|
|
78
|
-
**NousResearch Hermes Agent plugin (#165)**
|
|
79
|
-
|
|
80
|
-
Python plugin for NousResearch Hermes Agent's `plugin.yaml` lifecycle hook system. Covers all 6 events the plugin system exposes.
|
|
81
|
-
|
|
82
|
-
New files:
|
|
83
|
-
- `plugin-hermes-agent/plugin.yaml` — manifest (`kind: memory`, 6 hooks declared)
|
|
84
|
-
- `plugin-hermes-agent/__init__.py` — Python shim; each hook POSTs to the local nlm daemon (stdlib only, no PyPI deps)
|
|
85
|
-
- `plugin-hermes-agent/README.md` — install guide
|
|
86
|
-
|
|
87
|
-
New HTTP endpoints in the nlm daemon:
|
|
88
|
-
- `POST /api/hook/hermes-agent/pre-turn` — keyword recall for `pre_llm_call`; updates memo; returns `{"context": str|null}`
|
|
89
|
-
- `POST /api/hook/hermes-agent/post-turn` — prose citation detect for `post_llm_call`; logs to citation-log.jsonl
|
|
90
|
-
- `POST /api/hook/hermes-agent/session-lifecycle` — clears surfaced-ID memo on end/finalize/reset
|
|
91
|
-
|
|
92
|
-
New install module: `src/install/hermes-agent.ts` — `connectHermesAgent` / `disconnectHermesAgent` (copies plugin dir to `~/.hermes/plugins/nlm-memory/`, enables via `hermes plugins enable` if available).
|
|
93
|
-
|
|
94
|
-
CLI: `nlm connect hermes-agent` / `nlm disconnect hermes-agent` added to `src/cli/nlm.ts`.
|
|
95
|
-
|
|
96
|
-
**Tests: 501 pass** (was 488, +13 new in `tests/integration/hermes-agent-hooks.test.ts`). Build clean, typecheck clean.
|
|
97
|
-
|
|
98
|
-
**State:** all three hermes-agent endpoints tested end-to-end without a TTY. Python plugin is a thin HTTP shim — no Python test harness needed.
|
|
99
|
-
|
|
100
|
-
**Next:** transcript adapter for NousResearch Hermes sessions (session files stored in `~/.hermes-agent/sessions/` or equivalent); C2 Aider adapter; B3 extract-triples improvements.
|
|
101
|
-
|
|
102
|
-
## 2026-05-28 — C1: OpenCode adapter (SQLite-based, `opencode/1.0`)
|
|
103
|
-
|
|
104
|
-
OpenCode stores all sessions in a single SQLite DB (`~/Library/Application Support/opencode/opencode.db` on macOS, `$XDG_DATA_HOME/opencode/opencode.db` on Linux) rather than per-session JSONL files. The adapter reads it via `better-sqlite3` in readonly mode, reusing the same `TranscriptAdapter` port as Claude Code, Hermes, and pi.
|
|
105
|
-
|
|
106
|
-
**What ships**
|
|
107
|
-
|
|
108
|
-
- `src/core/adapters/opencode.ts` (new) — `OpenCodeAdapter` class. `detect()` checks for the DB file. `discover()` queries `session WHERE time_archived IS NULL` with optional `time_updated >= since` filter. `parseSession(sessionId)` joins the `session`, `message`, and `part` tables: extracts `text` parts (non-ignored) and `tool` parts (summarized as `[tool: <name>]`), skips structural parts (step-start/finish, reasoning, compaction, snapshot, patch, agent, retry). Label comes from `session.title` unless it's `"New session"`, in which case it falls back to the first user turn. `gitBranch` read from `.git/HEAD` in `session.directory`. `sourcePath` is `${dbPath}::${sessionId}`.
|
|
109
|
-
- `migrations/010_sources_opencode.sql` (new) — SQLite table-recreate migration to add `"opencode"` to the `sources.kind` CHECK constraint (SQLite does not support `ALTER COLUMN`). Copies existing rows, drops old table, renames new.
|
|
110
|
-
- `src/core/adapters/from-source.ts` — `"opencode"` case added to `adapterFromSource` switch.
|
|
111
|
-
- `src/core/sources/source-registry.ts` — `SourceKind` union extended; `seedDefaults()` now seeds 4 presets (added OpenCode row, auto-enabled if DB exists).
|
|
112
|
-
- `tests/unit/core/adapters/opencode.test.ts` (new) — 15 tests: detect enabled/disabled, discover (all sessions, archived exclusion, since filter, absent DB), parseSession (null for unknown, null for no usable turns, turn count + roles, ignored-part skipping, tool-part summarization, title label, fallback label, sourcePath format, projectDir, absent DB, ISO timestamps), and metadata assertions.
|
|
113
|
-
- `tests/integration/source-registry.test.ts` — two assertions updated: "seeds three presets" → "seeds four presets"; kind list updated to include `"opencode"`.
|
|
114
|
-
|
|
115
|
-
**Architecture note**
|
|
116
|
-
|
|
117
|
-
The `discover()` / `parseSession()` contract treats session IDs (not file paths) as the identifying string — the interface's `path: string` param is opaque, so this is valid. Users with OpenCode already installed get the source auto-enabled on first `nlm migrate` + daemon restart with no manual configuration.
|
|
118
|
-
|
|
119
|
-
**Tests: 488 pass** (was 470 before this session). All 57 test files green, build clean.
|
|
120
|
-
|
|
121
|
-
**Next:** README rewrite (D) — drop "self-improving accuracy" promise; lead with the three moats (editable timeline, cross-runtime MCP reach, 97.2% R@5). Then NousResearch Hermes adapter (#165, P1).
|
|
122
|
-
|
|
123
|
-
## 2026-05-28 — Code review: HOOK_SCRIPT_MARKERS bug caught and patched (44fec62)
|
|
124
|
-
|
|
125
|
-
`code-review:code-review` skill run against commits `10c16ac..285fe9e`. One confirmed bug found and fixed: `HOOK_SCRIPT_MARKERS` in `claude-settings.ts` did not include the three Phase 2 hook filenames (`session-start-hook.js`, `pre-compact-hook.js`, `subagent-start-hook.js`). Consequence: `nlm hook uninstall` silently left all three hooks behind; each reinstall appended a duplicate instead of replacing. Live settings had two `SessionStart` NLM entries. Fix: added three filenames to `HOOK_SCRIPT_MARKERS`, updated stale file-level comment, rebuilt, reinstalled. Settings deduplicated (1 entry per event × 6 hooks). 436/436 tests pass. No other confirmed bugs from the review — four lower-confidence items scored below 80 and were not acted on.
|
|
126
|
-
|
|
127
|
-
**State:** `nlm v0.3.0` installed globally. 6 hooks clean in `~/.claude/settings.json`. Shadow mode live.
|
|
128
|
-
|
|
129
|
-
**Next:** `nlm useful-scan` CLI (B1 full); C1 OpenCode adapter #180 (P1); B3 extract-triples redesign; tests for `session-start-hook.ts`.
|
|
130
|
-
|
|
131
|
-
## 2026-05-28 — Deploy v0.3.0: 6 hooks live; cite_session double-count fixed; useful_hit_rate stub; session-start source added
|
|
132
|
-
|
|
133
|
-
Four commits on main (`976e549` → `d013caf`). All 436 tests green throughout.
|
|
134
|
-
|
|
135
|
-
1. **B2 double-count fix** (`976e549`): `citation-detect.ts` was re-detecting `cite_session` tool_uses in the Stop hook and writing a second citation log entry. MCP handler already calls `appendCitation()` directly. Fix: skip `cite_session` in Stop hook detector; updated 5 tests in `citation-detect-cite-session.test.ts`.
|
|
136
|
-
2. **B1 stub** (`976e549`): added `useful_hit_rate: null` to `StatsResult` + both `recallStats()` return paths. Daily digest shows "pending" cleanly instead of a field-access error. Unblocks schema for future `nlm useful-scan` CLI.
|
|
137
|
-
3. **Phase 2 hook wiring** (`becb591`): `ALL_HOOKS` now includes SessionStart, PreCompact, SubagentStart. Version string corrected 0.2.0-dev → 0.3.0.
|
|
138
|
-
4. **session-start source** (`d013caf`): `src/hook/session-start-hook.ts` written against current interfaces (stale dist imported `loadSurfacedForBudget` that no longer exists). `ClaudeHookEvent` union extended with `SessionStart` + `SubagentStart`.
|
|
139
|
-
|
|
140
|
-
**State:** `nlm v0.3.0` installed globally, all 6 hooks active in shadow mode. Live measurement window open. Three weekly metrics per D5 start accumulating: cite_session call rate, useful_hit_rate (shows pending until nlm useful-scan lands), null-recall rate.
|
|
141
|
-
|
|
142
|
-
**Next:** `nlm useful-scan` CLI (B1 full implementation); B3 extract-triples redesign; C1 OpenCode adapter #180.
|
|
143
|
-
|
|
144
|
-
## 2026-05-28 — D4 thesis pivot: citation moat downgraded permanently; adapter breadth + editable timeline elevated; Phase 0/2/3 engineering landed
|
|
145
|
-
|
|
146
|
-
Full-day arc on 2026-05-27 producing three clusters of work: a 3-agent audit exposing recall-layer defects, five engineering branches integrated (Phases 0/2/3 of the 90-day plan), and a D4 strategic-pivot decision ending in a permanent thesis revision. The cite_session MCP tool lands on this branch (`phase-1c-cite-tool`) as the last Phase 0 piece.
|
|
147
|
-
|
|
148
|
-
**Morning 3-agent audit findings (functional + comparative + operability lenses, parallel dispatch)**
|
|
149
|
-
|
|
150
|
-
All three agents converged on the same structural defect: `src/hook/citation-detect.ts:54-64` matches session IDs only in tool *inputs*, but `recall_sessions` inputs carry `{query, mode, limit}` — never a session ID. Only `get_session({id})` triggers a match. The dominant agent usage pattern (read the surfaced digest → answer from it without a follow-up `get_session`) generates zero citations. Additional defects found: `useful:true` field never written by any code path (metric permanently reads zero); conversation memo cap of 10 silently kills injection past turn 4 because the memo is never pruned mid-conversation; hook locked to `mode=keyword` so Build F's force-include logic never activates at hook time.
|
|
151
|
-
|
|
152
|
-
**Phases 0/2/3 engineering shipped (5 branches integrated)**
|
|
153
|
-
|
|
154
|
-
- Phase 0.1 — Stop hook citation detector fixed: scans ALL assistant turns, not just the last (CHANGELOG entry "Stop-hook multi-turn citation detection" above)
|
|
155
|
-
- Phase 0.2 — `useful:true` write path added; `useful_hit_rate` goes from structurally 0% to a real metric
|
|
156
|
-
- Phase 0.3 — Conversation memo pruning: memo capped at 15 and pruned on oldest-first when full; late-turn injection no longer silently dies at turn 4
|
|
157
|
-
- Phase 0.4 — Hook mode aligned to `hybrid` so Build F's force-include logic activates at hook time
|
|
158
|
-
- Phase 2 — SessionStart injection + opinionated 3-hook subset (SessionEnd, PreCompact, SubagentStart)
|
|
159
|
-
- Phase 3 — Training-data collection scaffolding (citation-log schema extended)
|
|
160
|
-
- Phase 1c (this branch) — `cite_session` MCP tool: explicit citation primitive that lets MCP clients signal "I used session X," bypassing the Stop-hook inference chain entirely
|
|
161
|
-
|
|
162
|
-
**Phase 1 simulation methodology failures (do not repeat)**
|
|
163
|
-
|
|
164
|
-
Two simulations run during the day produced misleading numbers and were invalidated before they influenced any decision:
|
|
165
|
-
|
|
166
|
-
1. A 70-prompt simulation of hook injection reported 15.7% `useful_hit_rate` but used a content-full injection format instead of production's pointer-only format. The content-full format inflates compliance because the session body is present in context; in production the body is NOT injected, only the ID + label. Methodology bug caught by a follow-up validator. The simulation's number was correct for the simulation's format; it is not an estimate of production rates.
|
|
167
|
-
2. A 6-session "Arm C" simulation generated by the evaluating agent itself with deliberate compliance, reporting 100% session compliance. This is not a population estimate — it is a demonstration that the measurement pipeline works end-to-end. The agent knows it is being measured and authored the sessions to pass. Not a useful number for forecasting real-system citation rates.
|
|
168
|
-
|
|
169
|
-
Rule extracted: simulation is valid for pipeline-validity checks (does the detector fire, does the rate field get written, does the log grow?). Simulation cannot estimate rates because rates are population statistics that require real-system samples.
|
|
170
|
-
|
|
171
|
-
**Pivot to `cite_session` MCP tool primitive**
|
|
172
|
-
|
|
173
|
-
The per-prompt ID injection approach (injecting surfaced session IDs into every UserPromptSubmit) was prototyped, tested against real Claude Code behavior, and rejected as not shippable. Real Claude 0% prose citation rate on pointer-only injection: the model does not naturally surface NLM session IDs in its prose responses, and the pointer-only format (no body) doesn't give the model enough content to form a meaningful citation. Reverting that branch (`revert(hook): remove cite-by-ID injection`) was correct before `cite_session` landed as the alternative: an explicit MCP tool the agent can call to record a citation when it genuinely used a session.
|
|
174
|
-
|
|
175
|
-
**D4 thesis pivot (permanent)**
|
|
176
|
-
|
|
177
|
-
Senior ML engineer review concluded the citation-trained-reranker moat hypothesis fails on fundamentals:
|
|
178
|
-
|
|
179
|
-
1. Per-operator reranker trained on ~500-1000 citation events/year with prose-channel noise cannot beat the existing hybrid+RRF baseline at 97.2% R@5. The training set is too small and the signal-to-noise ratio too low for a meaningful quality lift.
|
|
180
|
-
2. Aggregated cross-operator training violates local-first. The privacy property is the product; pooling operator data to make reranking better destroys it.
|
|
181
|
-
3. Per-operator moat math: at ~70 daily recalls with ~15% compliance, the per-operator corpus is ~3,800 labeled rows/year. Cross-encoder reranking needs 10K+ to statistically separate from the baseline. The gap does not close in reasonable product timelines.
|
|
182
|
-
|
|
183
|
-
**Citation feedback loop's new role (quality-monitoring, not reranker training):** detect trash recalls, populate a "frequently useful" dashboard the operator can act on, seed future supervised work if/when aggregated data becomes available under explicit consent. NOT a moat. NOT a reranker pipeline. Permanently downgraded.
|
|
184
|
-
|
|
185
|
-
**Three elevated moats (D4 thesis):**
|
|
186
|
-
|
|
187
|
-
1. **Editable timeline / supersedence** — schema-level, retrofit-impossible for append-only-with-decay competitors (agentmemory, mem0, honcho). The supersedence link is the only non-destructive timeline primitive in any of the four systems.
|
|
188
|
-
2. **Cross-runtime reach via MCP** — Claude Code + Codex shipped. OpenCode (#164) and NousResearch Hermes (#165) adapters UNSHELVED — both elevated to P1. Cross-runtime distribution is now load-bearing, not nice-to-have.
|
|
189
|
-
3. **Passive corpus quality** — 97.2% R@5 on LongMemEval-S (better than agentmemory's published 95.2%). Build F's force-include + hybrid+RRF is the foundation. This number is credible, reproducible, and does not require the citation moat to be true.
|
|
190
|
-
|
|
191
|
-
**90-day direction (revised)**
|
|
192
|
-
|
|
193
|
-
Adapter breadth is the primary workstream: #164 OpenCode (~2 weeks), #165 NousResearch Hermes (after #164), then pi.dev/Cursor/Aider via `nlm-wrap` MCP wrapper. Editable-timeline UX visibility (making supersedence queryable from the River UI and the MCP `get_session` surface) is the secondary workstream — the moat is structural but invisible until the UI exposes it. Phase 1 passive measurement window continues; 97.2% R@5 baseline is the floor to defend.
|
|
194
|
-
|
|
195
|
-
**Next priorities**
|
|
196
|
-
|
|
197
|
-
1. #164 OpenCode adapter (P1, ~2 weeks) — cross-runtime is load-bearing
|
|
198
|
-
2. #165 NousResearch Hermes adapter (P1, after #164)
|
|
199
|
-
3. Editable-timeline UX: supersedence visible in River + `get_session` response
|
|
200
|
-
4. Retain 97.2% R@5 baseline — no retrieval algorithm changes without clearing this bar
|
|
201
|
-
|
|
202
|
-
## 2026-05-27 — Codex CLI adapter: marketplace plugin + MCP config wiring + interactive-mode hook dispatch
|
|
203
|
-
|
|
204
|
-
Cross-runtime adapter work, first target landed. NLM is now installable on Codex CLI via `nlm connect codex`, which registers a local plugin marketplace, installs the `nlm-memory` plugin, writes a sentinel-bracketed `[mcp_servers.nlm-memory]` block to `~/.codex/config.toml`, and (optionally with `--with-hooks`) drops a legacy `~/.codex/hooks.json` fallback. Designed to mirror agentmemory's distribution pattern but the integration surface for current Codex (0.134.0) is materially different from both Codex Desktop and the wiki's 2026-05-23 prediction.
|
|
205
|
-
|
|
206
|
-
**What ships**
|
|
207
|
-
|
|
208
|
-
- `plugin/.codex-plugin/plugin.json` — Codex plugin manifest declaring `mcpServers: "./.mcp.json"` and `hooks: "./hooks/hooks.json"` pointers
|
|
209
|
-
- `plugin/hooks/hooks.json` — `UserPromptSubmit` + `Stop` event registrations, scripts referenced via `${CLAUDE_PLUGIN_ROOT}`
|
|
210
|
-
- `plugin/.mcp.json` — MCP server registration (spawns `nlm mcp` over stdio); duplicated by the direct config.toml writer for redundancy
|
|
211
|
-
- `plugin/scripts/{prompt-recall-hook,stop-hook}.mjs` — esbuild single-file bundles of the existing TS hook entries, build pinned in `scripts/build-codex-plugin.mjs`
|
|
212
|
-
- `.agents/plugins/marketplace.json` — marketplace manifest declaring the plugin and its source path (`./plugin`)
|
|
213
|
-
- `src/install/codex.ts` — `connectCodex` / `disconnectCodex` / `writeMcpServerToConfig` / `removeMcpServerFromConfig` / `writeLegacyHooks` / `removeLegacyHooks`. Marketplace + plugin add are delegated to the `codex` binary (it owns trust + snapshot state); MCP config and hooks.json are written directly with sentinel markers so disconnect can strip exact regions without touching user-authored content.
|
|
214
|
-
- `src/cli/nlm.ts` — `nlm connect codex` and `nlm disconnect codex` commands. Flags: `--source <owner/repo>` (default `pbmagnet4/nlm-memory-ts`), `--local` shortcut for dev, `--with-hooks` to also write the legacy fallback, `--dry-run`.
|
|
215
|
-
|
|
216
|
-
**The four wrong-then-right turns worth keeping in memory**
|
|
217
|
-
|
|
218
|
-
1. *Codex hooks are not Claude-Code-shape settings.json entries.* The 2026-05-23 wiki claim of "identical schema, ~95% script reuse" was wrong on the install mechanism. Codex uses a marketplace + plugin architecture. Hook *contract* (events, stdin payload, stdout convention) is identical to Claude Code; install path is entirely different. Script logic reuses verbatim.
|
|
219
|
-
2. *Marketplace requires a `.agents/plugins/marketplace.json` at the repo root.* First connect attempt failed with `marketplace root does not contain a supported manifest` until that file landed. Reverse-engineered from `~/.codex/.tmp/plugins/.agents/plugins/marketplace.json` shipped by `openai-curated`.
|
|
220
|
-
3. *The marketplace policy field is enum-constrained.* `authentication: "NONE"` rejected as `unknown variant`; only `"ON_INSTALL"` and `"ON_USE"` accepted. NLM has no auth to do, so `"ON_USE"` was picked as a no-op-on-use default. Marketplace went green after the swap.
|
|
221
|
-
4. *`--dangerously-bypass-hook-trust` is misleadingly named.* The flag warns "hooks may run without review for this invocation" but in practice does not bypass trust at all. Hooks dispatched only after persisting trust via an interactive Codex session. Once trust landed in `[hooks.state]`, hooks fired in subsequent `codex exec` (non-interactive) calls too. The bypass flag's real role is unclear.
|
|
222
|
-
|
|
223
|
-
**Verified end-to-end** (`019e69fa-4ea1-7b10-8c66-70bda64ba086` is the codex session used for final validation)
|
|
224
|
-
|
|
225
|
-
- ✅ `codex plugin marketplace add ./` (local source) succeeds
|
|
226
|
-
- ✅ `codex plugin add nlm-memory@nlm-memory-ts` produces `installed, enabled` in `codex plugin list`
|
|
227
|
-
- ✅ Cached plugin at `~/.codex/plugins/cache/nlm-memory-ts/nlm-memory/0.3.0/` contains all expected files including dotfile dirs (`.codex-plugin/`, `.mcp.json`)
|
|
228
|
-
- ✅ `[mcp_servers.nlm-memory]` block written to `~/.codex/config.toml` between sentinels; idempotent under repeated connects; cleanly stripped on disconnect
|
|
229
|
-
- ✅ `UserPromptSubmit` hook dispatches from plugin path: codex stdout shows `hook: UserPromptSubmit` / `hook: UserPromptSubmit Completed`, hook-log gains an entry with codex session UUID (`019e...`), recall ran, gate evaluated, would-inject populated, shadow mode logged correctly
|
|
230
|
-
- ✅ Plugin-only default (`nlm connect codex` without `--with-hooks`) fires UserPromptSubmit exactly once per turn. The earlier double-fire with `--with-hooks` enabled (plugin path + legacy `~/.codex/hooks.json` both fired) is exactly why `--with-hooks` stays opt-in
|
|
231
|
-
- ✅ `codex_features list` confirms `hooks: stable, true` (so the runtime supports them) but `plugin_hooks: removed, false` (the older feature flag is dead; current path is the `hooks` engine with plugin-bundled config pointers)
|
|
232
|
-
|
|
233
|
-
**Not yet verified**
|
|
234
|
-
|
|
235
|
-
- ⏳ `Stop` hook dispatch — needs a one-time interactive trust approval before it fires (Codex only prompts for trust on hooks that have a chance to run; `codex exec` -p with bypass-trust did not surface a Stop prompt). Will land on Edward's next interactive `codex` turn.
|
|
236
|
-
- ⏳ Remote marketplace install (`codex plugin marketplace add pbmagnet4/nlm-memory-ts`). The local install is the harder code path (the marketplace.json had to be authored from scratch); remote install reuses the same files via git fetch. Verifying in this session's tail after the GitHub push.
|
|
237
|
-
|
|
238
|
-
**Trust mechanics, for the future**
|
|
239
|
-
|
|
240
|
-
Codex persists hook trust per `(source, event, ...)` tuple under `[hooks.state]` in `config.toml`. Once a user approves a hook the first time, subsequent invocations (including `codex exec`) fire without prompting. The hash is content-addressed — a release that changes a script binary requires re-trust. This means `nlm connect codex` from a fresh install always requires one interactive `codex` turn to bootstrap trust before hooks fire; we cannot do that step on the user's behalf.
|
|
241
|
-
|
|
242
|
-
**Build pipeline**
|
|
243
|
-
|
|
244
|
-
`npm run build` now chains `build:server` (tsc) + `build:ui` (vite) + `build:codex-plugin` (esbuild). The codex-plugin build is single-file per entry (no dependency tree shipped), platform=node, format=esm, target=node20. Each .mjs is under 10KB.
|
|
245
|
-
|
|
246
|
-
**Tests**
|
|
247
|
-
|
|
248
|
-
414 unit + integration pass unchanged. No new test files added in this commit — the install path is exercised by the verified end-to-end smoke flow (`nlm connect codex --local` → `codex exec` → hook-log delta inspection). Test surface for install/codex.ts and the build script should land in a follow-up.
|
|
249
|
-
|
|
250
|
-
**Wiki correction owed**
|
|
251
|
-
|
|
252
|
-
`Whtnxt Agent Vault/Ventures/nlm-memory/learnings.md` line 218 lists Codex CLI as "`~/.codex/` JSON-config hooks (identical schema to Claude Code) … ~95% script reuse from Claude Code". The script reuse claim is correct (the .ts files port verbatim); the install-mechanism claim is wrong (marketplace + plugin, not settings.json). Wiki update is the next priority after this commit lands.
|
|
253
|
-
|
|
254
|
-
**Next priorities** (revised from the morning's stack)
|
|
255
|
-
|
|
256
|
-
1. Wiki update correcting the 2026-05-23 cross-runtime hook landscape table and adding a Codex plugin Tool Lesson. ← **Up next.**
|
|
257
|
-
2. Stop hook validation on Edward's first interactive codex turn (passive — happens whenever).
|
|
258
|
-
3. NousResearch Hermes Agent (#165) — has the cleanest `plugin.yaml` hook surface and was identified in the wiki as the next runtime worth a real adapter. I can validate it end-to-end without a TTY, unlike Codex.
|
|
259
|
-
4. Mode B pre-mortem and alt-embedding A/B remain shelved.
|
|
260
|
-
|
|
261
|
-
## 2026-05-27 — Stop-hook multi-turn citation detection: useful_hit_rate goes from structurally 0% to a real metric
|
|
262
|
-
|
|
263
|
-
Bug-fix to the Stop hook's citation detector. The previous implementation scanned only the LAST assistant turn of the transcript, but `tool_use` blocks live in earlier turns — the typical pattern is `tool_use → tool_result → prose summary`, and Stop fires after the summary. The detector saw prose, found no tool_use, logged 0 citations. Production evidence: 348 Stop firings with surfaced IDs, **zero** citations recorded, despite 23 real `mcp__nlm-memory__*` tool_uses in the matching transcripts over the last 7 days.
|
|
264
|
-
|
|
265
|
-
**Diagnosis path.** Cross-referenced `~/.nlm/hook-log.jsonl` (stop entries, all `citedIds:[]`) against `~/.claude/projects/<workspace>/<conv>.jsonl` (real assistant turns). Drilled into `1fc5a8f1-00fa-4ff5-85e7-a239072082b2`: recall hook surfaced `cc_7ff73609-…`, the assistant called `get_session({id:"cc_7ff73609-…"})` in turn N-1, then wrote a prose summary in turn N; the Stop hook scanned only turn N and logged `citedIds:[]`. Confirmed by code path at `transcript.ts:48` — the loop returns on the first assistant line found walking from the end.
|
|
266
|
-
|
|
267
|
-
**Changes**
|
|
268
|
-
- `src/core/hook/transcript.ts` — added `readAllAssistantTurns(transcriptPath): ReadonlyArray<AssistantTurn>` that returns every assistant turn in order. Kept `readLastAssistantTurn` as a thin wrapper (single test caller; back-compat for non-Stop callers).
|
|
269
|
-
- `src/core/hook/cite-memo.ts` (new) — per-conversation cited-set memo mirroring `memo.ts`. Same state dir (`~/.nlm/hook-state/`, overridable via `NLM_HOOK_STATE_DIR`), filename suffix `.cited.json` so memo-sweep's existing dir-walk cleans both surfaced and cited memos by mtime. `loadCited` / `recordCited` / `clearCited`.
|
|
270
|
-
- `src/hook/stop-hook.ts` — `runStopHook` now reads all assistant turns, unions text + tool_uses across them, runs `detectCitations` over the union, dedupes against `loadCited(conversationId)`, posts the fresh ones, and persists via `recordCited`. The `responsePreview` stays as the LAST turn's prose (that's the text Edward saw when Stop fired). Daemon remains blind-append; dedup is hook-local.
|
|
271
|
-
- `src/hook/session-end-hook.ts` — `runSessionEnd` now also calls `clearCited` so both memos are cleaned on session close.
|
|
272
|
-
- `scripts/backfill-citations.mjs` (new) — one-shot historical replay. Walks `~/.nlm/hook-log.jsonl` to collect surfaced-ID sets per conversation, finds matching transcripts under `~/.claude/projects/`, runs the same detector, dedupes against existing `~/.nlm/citation-log.jsonl` entries, appends fresh citations with a `backfill:true` marker. Idempotent. Dry-run by default; `--commit` writes.
|
|
273
|
-
|
|
274
|
-
**Validation**
|
|
275
|
-
- Tests: 414 unit + integration tests pass (was 396, +18 new). New cases cover: tool_use detected when it's in an earlier turn and the last turn is prose-only (the real-world pattern); dedup across repeated Stop firings on a growing transcript; local memo update even when `postCitation` fails (no double-count on next fire); 10 `cite-memo` cases (load/record/clear/corrupt-file/non-array/path-safety); 3 `readAllAssistantTurns` cases; 2 new session-end cases.
|
|
276
|
-
- Typecheck clean on changes (pre-existing `SessionEnd` error in `hook-claude-settings.test.ts` is unrelated and predates this work).
|
|
277
|
-
- Backfill dry-run against the live `~/.nlm/hook-log.jsonl`: 42 conversations had surfaced IDs, 37 had a matching transcript, **4 conversations contain at least one tool_use citation the old detector missed**. Lower than the upper bound suggests by raw tool-use count (23) because many tool_uses were `recall_sessions`/`recall_facts` (no surfaced-ID-in-input — those are pull, not push-follow-up). The 4 captured citations are the ones where the model actually drilled into a surfaced session via `get_session(id=...)`.
|
|
278
|
-
|
|
279
|
-
**Impact.** `useful_hit_rate` (cited / surfaced) goes from a structural 0% to a real signal. This is the training-data substrate for the future learned reranker (each row in the citation log is a `(query, returned_id, was_cited)` triple once joined against `~/.nlm/query_log.jsonl` by `conversation_id`). The 348 stop firings that previously generated zero training rows would have generated ~10-15 if the detector had been working — small but real, and growing with every conversation going forward.
|
|
280
|
-
|
|
281
|
-
**Methodology note worth keeping.** The bug was diagnosable in <10 minutes by cross-referencing two existing log streams (hook-log.jsonl × Claude Code transcripts) before touching code. Tomorrow's-self version of this rule: when a telemetry metric reads structurally zero, scan the raw inputs the metric is supposed to consume before assuming the metric is correct. Filing in `Operations/what-works/code-quality.md` candidate set.
|
|
282
|
-
|
|
283
|
-
**Next priorities (unchanged from earlier today's update):**
|
|
284
|
-
|
|
285
|
-
1. ~~Stop hook citation rate.~~ Shipped.
|
|
286
|
-
2. Pre-mortem Mode B before any code. Ceiling +1.5% hybrid temporal — current recommendation is to shelve unless a separate driver emerges.
|
|
287
|
-
3. Cross-runtime hook adapters (Hermes / pi / Codex). Unchanged.
|
|
288
|
-
4. Alt-embedding A/B — still deferred.
|
|
289
|
-
|
|
290
|
-
**Source:** Whtnxt Agent orchestrator session 2026-05-27 (continuation from Build F ship). Diagnosis grounded in `~/.nlm/hook-log.jsonl` (342 stop entries, 0 citations) and `~/.claude/projects/-Users-echalupa-Documents-Coding-Projects-Whtnxt-Agent/*.jsonl` (23 NLM tool_uses across 7 days).
|
|
291
|
-
|
|
292
|
-
## 2026-05-27 — Build F shipped: force-include keyword rank-1 on temporal+entity shape; hybrid temporal +3.0 / aggregate +0.8 / hybrid beats keyword for the first time
|
|
293
|
-
|
|
294
|
-
Single session arc, ~6 hours: Build E′ (asymmetric RRF multiplicative boost) shipped → harness-tested → falsified by head-baseline → reverted → diagnosed via per-question `results.json` → Probes 1 & 2 designed and run → Build F (post-merge force-include) shipped → confirmed by clean A/B head-baseline → shipped. Three full harness runs (1 cold ~50 min + 2 hot ~25s) plus two probe scripts. Zero false ships.
|
|
295
|
-
|
|
296
|
-
**Build E′ (falsified path, recorded for audit trail).** Built `src/core/recall/query-shape.ts` with `detectQueryShape(query)` returning `{hasTemporal, hasNamedEntity}` (temporal regex covers "N days/weeks/months ago", "last <day>", "when did", "before/after I", "yesterday/today/tomorrow"; named-entity accepts ALL-CAPS acronyms and mixed-case tokens, excludes days of week and month names to avoid Mode B false-fires). Modified `mergeHybrid` to accept a `boostKeyword` param and multiply the keyword leg's `1/(RRF_K + rank_kw)` by 1.75 on shape match. Added 27 unit tests for `detectQueryShape`. Harness run `2026-05-26-16-39-52` (n=500, ~48 min, partial cache): hybrid temporal 91.0 → 92.5 / aggregate 95.8 → 96.4. Head-baseline rerun with boost disabled on the same cache (`2026-05-26-16-57-47`, 26.3s): **byte-identical numbers**. The lift was 100% cache enrichment from the 7,500→5,500 chunk-size change populating new embeddings; the boost contributed zero. Post-mortem probe: detector fires on 23/133 temporal queries, but on those 23 the multiplicative boost changed zero top-5 results — the boost magnitude (1.75×) was too small to overcome the "session appears in both lists at lower rank" advantage in RRF. Reverted; recorded in [[track-record]].
|
|
297
|
-
|
|
298
|
-
**Build F (shipped).** Replaced the failed multiplicative boost with a post-merge **force-include**: when shape is `temporal && namedEntity`, ensure `kwHits[0].session.id` is in the merged top-`limit` set; if not, insert at position `limit - 1`, displacing the lowest-confidence merged hit. Sidesteps RRF arithmetic entirely. ~10 lines in `forceIncludeKeywordTop()` helper at `src/core/recall/recall-service.ts`; detector unchanged from E′.
|
|
299
|
-
|
|
300
|
-
**Pre-build probes justified the build.** Probe 1 joined each hybrid temporal miss's keyword `returnedIds` against the dataset's `answer_session_ids` to compute keyword's rank for the gold session — on the 7 KW-FOUND misses, 5 had keyword rank=1 and 2 were within rank 5 (force-include trivially recovers all 7 if the detector fires). Probe 2 measured detector fire rate by `question_type`: 17.3% on temporal-reasoning, 0% on the two paraphrase types (single-session-preference, single-session-assistant), 1.4-2.6% on the other non-temporal types — bounded blast radius of ~5 queries across 367 non-temporal questions.
|
|
301
|
-
|
|
302
|
-
**Clean A/B (same hot cache, identical code except the force-include branch).** Build F (`2026-05-26-22-47-07`, cold rebuild ~85 min) vs head-baseline boost-off (`2026-05-26-22-56-53`, 22.1s on now-hot cache):
|
|
303
|
-
|
|
304
|
-
| Metric | Off | On | Δ |
|
|
305
|
-
|---|---|---|---|
|
|
306
|
-
| hybrid aggregate | 96.4 | **97.2** | **+0.8** |
|
|
307
|
-
| hybrid temporal | 92.5 | **95.5** | **+3.0** |
|
|
308
|
-
| all other types | byte-identical | byte-identical | 0 |
|
|
309
|
-
| keyword aggregate | 96.6 | 96.6 | 0 |
|
|
310
|
-
| semantic aggregate | 91.6 | 91.6 | 0 |
|
|
311
|
-
|
|
312
|
-
Zero regression on any question type. Detector unchanged from E′ — the difference is force-include sidestepping the RRF math rather than trying to outmuscle it.
|
|
313
|
-
|
|
314
|
-
**Hybrid finally beats keyword on aggregate** (97.2 > 96.6) — first time on this benchmark. Resolves the structural tension from 2026-05-25 where keyword led aggregate R@5. The 2026-05-23 MCP default flip to hybrid is now backed by k=5 numbers, not just the k=20 ablation.
|
|
315
|
-
|
|
316
|
-
**Gate check vs the 2026-05-26 brief:** target was `hybrid temporal R@5 ≥ +4 (target ~95+)`. Landed at +3.0 / 95.5 — one question shy of +4 but inside the 95+ landing target. The miss is "Who did I meet with during the lunch last Tuesday?" — detector skips because day-of-week is excluded from the named-entity set (necessary to avoid Mode B false-fires). Adding day-of-week as NE would catch this one question but cost the Mode B exclusions. Not worth the trade at scale.
|
|
317
|
-
|
|
318
|
-
**Tests:** 186 unit tests pass (added 27 for `detectQueryShape`); typecheck clean on changes (pre-existing `SessionEnd` error in `hook-claude-settings.test.ts` unrelated). Daemon unchanged (Build F is recall-path code, not ingest/embed).
|
|
319
|
-
|
|
320
|
-
**Operational gotcha filed.** Mid-session, `~/.cache/longmemeval/{embeddings.sqlite,longmemeval_s_cleaned.json}` vanished between two harness runs — macOS Sonoma+ auto-cleanup of `~/.cache/` during an idle window. Cost ~90 min of cold rebuild + 277 MB redownload. Mitigation: move the cache outside `~/.cache/` via `LONGMEMEVAL_CACHE_DIR=$HOME/.local/share/longmemeval` before the next harness run. Full diagnosis in `Operations/Tool Lessons/longmemeval-harness.md` (vault) — also captures the harness performance envelope and the pre-build probing methodology.
|
|
321
|
-
|
|
322
|
-
**Methodology lesson worth keeping.** Two-to-five-line probe scripts catch dead hypotheses cheaper than a full harness run. Pattern: (a) probe detector fire rate on the target distribution, (b) probe detector fire rate on the non-target distribution (blast radius), (c) probe the failure mode's mechanism (rank position, candidate-set membership). Run before harness; the result is right whether or not the build ships. Filed in `Ventures/nlm-memory/track-record.md` and `Operations/Tool Lessons/longmemeval-harness.md`. Candidate addition to `Operations/what-works/code-quality.md` if the pattern recurs outside NLM.
|
|
323
|
-
|
|
324
|
-
**Next priorities (updated):**
|
|
325
|
-
|
|
326
|
-
1. **Stop hook citation rate.** Now the highest-leverage moat work — hybrid is structurally sound at 97.2 aggregate; further R@5 work hits diminishing returns until a different lever gets pre-mortem'd.
|
|
327
|
-
2. **Pre-mortem Mode B before any code.** Only 2 of 10 hybrid temporal misses are both-leg misses. Ceiling on a successful Mode B fix is +2/133 = +1.5% hybrid temporal. Probe: can a query-time date parser actually resolve those 2 questions' answer windows? If under 50%, the build doesn't justify itself.
|
|
328
|
-
3. **Cross-runtime hook adapters** (Hermes / pi / Codex). Unchanged from prior handoff.
|
|
329
|
-
4. **Alt-embedding A/B** — still deferred. Hybrid 97.2 is a higher floor than the alt-embedding work was originally framed against. Reopen only when migration 010 is justified by a separate driver.
|
|
330
|
-
|
|
331
|
-
**Source:** Whtnxt Agent orchestrator session 2026-05-26 → 2026-05-27 (continuation); harness reports `reports/longmemeval/2026-05-26-16-39-52/` (E′ on partial cache), `…16-57-47/` (head-baseline boost off, byte-identical to E′), `…22-47-07/` (Build F on cold rebuild), `…22-56-53/` (head-baseline force-include off, same hot cache as 22-47-07). Probe scripts ephemeral at `/tmp/nlm-eprime/`.
|
|
332
|
-
|
|
333
|
-
_Older entries archived in CHANGELOG-2026.md_
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
_Older entries archived in CHANGELOG-2026.md_
|
|
@@ -1,174 +0,0 @@
|
|
|
1
|
-
-- NLE Memory canonical store — SQLite schema
|
|
2
|
-
-- Default zero-config backend. Postgres mirror lives in schema/postgres.sql (TBD).
|
|
3
|
-
--
|
|
4
|
-
-- Design principles:
|
|
5
|
-
-- • Sessions are immutable once written — supersedence is via edges, never via UPDATE
|
|
6
|
-
-- • Entity registry is mutable (canonical merges, retitles, retirements)
|
|
7
|
-
-- • All timestamps are ISO 8601 strings in TEXT columns (SQLite lacks a native datetime type;
|
|
8
|
-
-- ISO strings sort correctly and are dialect-portable to Postgres)
|
|
9
|
-
-- • Foreign keys enforced; ON DELETE CASCADE only on edges, never on sessions
|
|
10
|
-
|
|
11
|
-
PRAGMA foreign_keys = ON;
|
|
12
|
-
PRAGMA journal_mode = WAL;
|
|
13
|
-
|
|
14
|
-
-- ── Sessions ───────────────────────────────────────────────────────────────
|
|
15
|
-
CREATE TABLE IF NOT EXISTS sessions (
|
|
16
|
-
id TEXT PRIMARY KEY, -- e.g. sess_2026-05-07T14-32-CAMEL
|
|
17
|
-
runtime TEXT NOT NULL, -- e.g. claude-code/1.0, hermes/0.5
|
|
18
|
-
runtime_session_id TEXT, -- the runtime's own session identifier
|
|
19
|
-
started_at TEXT NOT NULL, -- ISO 8601
|
|
20
|
-
ended_at TEXT, -- ISO 8601; NULL while session is active
|
|
21
|
-
duration_min INTEGER, -- computed; null while active
|
|
22
|
-
label TEXT NOT NULL, -- human-readable session title
|
|
23
|
-
summary TEXT NOT NULL, -- ~80-token classifier output
|
|
24
|
-
body TEXT, -- full markdown body with inline markers
|
|
25
|
-
status TEXT NOT NULL CHECK(status IN ('active','closed','superseded')),
|
|
26
|
-
transcript_kind TEXT, -- e.g. claude-code-jsonl
|
|
27
|
-
transcript_path TEXT, -- runtime-resolvable opaque pointer
|
|
28
|
-
transcript_offset INTEGER, -- byte offset start
|
|
29
|
-
transcript_length INTEGER, -- byte length
|
|
30
|
-
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
31
|
-
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
32
|
-
);
|
|
33
|
-
|
|
34
|
-
CREATE INDEX IF NOT EXISTS idx_sessions_started_at ON sessions(started_at DESC);
|
|
35
|
-
CREATE INDEX IF NOT EXISTS idx_sessions_status ON sessions(status);
|
|
36
|
-
CREATE INDEX IF NOT EXISTS idx_sessions_runtime ON sessions(runtime);
|
|
37
|
-
|
|
38
|
-
-- ── Entities ───────────────────────────────────────────────────────────────
|
|
39
|
-
CREATE TABLE IF NOT EXISTS entities (
|
|
40
|
-
canonical TEXT PRIMARY KEY, -- preferred spelling — also the primary key
|
|
41
|
-
type TEXT NOT NULL, -- 'candidate' until labeled. Built-in labels: project | tool | contact | service | concept. Custom labels are user-defined via the UI / `nle-daemon action label` and have no CHECK constraint.
|
|
42
|
-
status TEXT NOT NULL CHECK(status IN ('active','dormant','retired','rejected','candidate')),
|
|
43
|
-
source TEXT, -- e.g. 'property:.claude/properties/polysignal.yaml', 'auto-detected', 'user-registered'
|
|
44
|
-
notes TEXT, -- freeform user notes
|
|
45
|
-
first_seen_session TEXT REFERENCES sessions(id),
|
|
46
|
-
last_seen_session TEXT REFERENCES sessions(id),
|
|
47
|
-
session_count INTEGER NOT NULL DEFAULT 0,
|
|
48
|
-
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
49
|
-
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
50
|
-
);
|
|
51
|
-
|
|
52
|
-
CREATE INDEX IF NOT EXISTS idx_entities_type ON entities(type);
|
|
53
|
-
CREATE INDEX IF NOT EXISTS idx_entities_status ON entities(status);
|
|
54
|
-
|
|
55
|
-
-- ── Entity variants (case-insensitive normalization) ───────────────────────
|
|
56
|
-
CREATE TABLE IF NOT EXISTS entity_variants (
|
|
57
|
-
variant TEXT PRIMARY KEY, -- raw form as it appeared
|
|
58
|
-
canonical TEXT NOT NULL REFERENCES entities(canonical) ON DELETE CASCADE,
|
|
59
|
-
source_session_id TEXT REFERENCES sessions(id)
|
|
60
|
-
);
|
|
61
|
-
|
|
62
|
-
CREATE INDEX IF NOT EXISTS idx_entity_variants_canonical ON entity_variants(canonical);
|
|
63
|
-
|
|
64
|
-
-- ── Session ↔ Entity (many-to-many) ───────────────────────────────────────
|
|
65
|
-
CREATE TABLE IF NOT EXISTS session_entities (
|
|
66
|
-
session_id TEXT NOT NULL REFERENCES sessions(id) ON DELETE CASCADE,
|
|
67
|
-
entity_canonical TEXT NOT NULL REFERENCES entities(canonical),
|
|
68
|
-
PRIMARY KEY (session_id, entity_canonical)
|
|
69
|
-
);
|
|
70
|
-
|
|
71
|
-
CREATE INDEX IF NOT EXISTS idx_session_entities_entity ON session_entities(entity_canonical);
|
|
72
|
-
|
|
73
|
-
-- ── Markers (decisions / open questions) ──────────────────────────────────
|
|
74
|
-
-- Extracted from inline body markers. Body is canonical — these rows are a queryable cache.
|
|
75
|
-
CREATE TABLE IF NOT EXISTS markers (
|
|
76
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
77
|
-
session_id TEXT NOT NULL REFERENCES sessions(id) ON DELETE CASCADE,
|
|
78
|
-
kind TEXT NOT NULL CHECK(kind IN ('decision','open')),
|
|
79
|
-
text TEXT NOT NULL,
|
|
80
|
-
position INTEGER NOT NULL DEFAULT 0 -- ordering within session
|
|
81
|
-
);
|
|
82
|
-
|
|
83
|
-
CREATE INDEX IF NOT EXISTS idx_markers_session ON markers(session_id);
|
|
84
|
-
CREATE INDEX IF NOT EXISTS idx_markers_kind ON markers(kind);
|
|
85
|
-
|
|
86
|
-
-- ── Supersedence + continues edges ────────────────────────────────────────
|
|
87
|
-
-- Edge table for non-destructive editing. A session can supersede or continue another.
|
|
88
|
-
-- Bidirectional lookups handled by indexes on both columns.
|
|
89
|
-
CREATE TABLE IF NOT EXISTS session_edges (
|
|
90
|
-
from_session TEXT NOT NULL REFERENCES sessions(id) ON DELETE CASCADE,
|
|
91
|
-
to_session TEXT NOT NULL REFERENCES sessions(id) ON DELETE CASCADE,
|
|
92
|
-
kind TEXT NOT NULL CHECK(kind IN ('supersedes','continues','branched_from','merged_from')),
|
|
93
|
-
PRIMARY KEY (from_session, to_session, kind)
|
|
94
|
-
);
|
|
95
|
-
|
|
96
|
-
CREATE INDEX IF NOT EXISTS idx_session_edges_from ON session_edges(from_session);
|
|
97
|
-
CREATE INDEX IF NOT EXISTS idx_session_edges_to ON session_edges(to_session);
|
|
98
|
-
CREATE INDEX IF NOT EXISTS idx_session_edges_kind ON session_edges(kind);
|
|
99
|
-
|
|
100
|
-
-- ── Full-text search over sessions ────────────────────────────────────────
|
|
101
|
-
-- FTS5 virtual table for label/summary/body search. Maintained via triggers.
|
|
102
|
-
CREATE VIRTUAL TABLE IF NOT EXISTS sessions_fts USING fts5(
|
|
103
|
-
label, summary, body,
|
|
104
|
-
content='sessions',
|
|
105
|
-
content_rowid='rowid'
|
|
106
|
-
);
|
|
107
|
-
|
|
108
|
-
CREATE TRIGGER IF NOT EXISTS sessions_ai AFTER INSERT ON sessions BEGIN
|
|
109
|
-
INSERT INTO sessions_fts(rowid, label, summary, body)
|
|
110
|
-
VALUES (new.rowid, new.label, new.summary, new.body);
|
|
111
|
-
END;
|
|
112
|
-
|
|
113
|
-
CREATE TRIGGER IF NOT EXISTS sessions_au AFTER UPDATE ON sessions BEGIN
|
|
114
|
-
INSERT INTO sessions_fts(sessions_fts, rowid, label, summary, body)
|
|
115
|
-
VALUES('delete', old.rowid, old.label, old.summary, old.body);
|
|
116
|
-
INSERT INTO sessions_fts(rowid, label, summary, body)
|
|
117
|
-
VALUES (new.rowid, new.label, new.summary, new.body);
|
|
118
|
-
END;
|
|
119
|
-
|
|
120
|
-
CREATE TRIGGER IF NOT EXISTS sessions_ad AFTER DELETE ON sessions BEGIN
|
|
121
|
-
INSERT INTO sessions_fts(sessions_fts, rowid, label, summary, body)
|
|
122
|
-
VALUES('delete', old.rowid, old.label, old.summary, old.body);
|
|
123
|
-
END;
|
|
124
|
-
|
|
125
|
-
-- ── Vector embeddings (sqlite-vec) ────────────────────────────────────────
|
|
126
|
-
-- Loaded as an extension at runtime: SELECT load_extension('vec0');
|
|
127
|
-
-- Schema declared here for reference; real CREATE happens at daemon startup
|
|
128
|
-
-- after the extension is loaded.
|
|
129
|
-
--
|
|
130
|
-
-- CREATE VIRTUAL TABLE session_embeddings USING vec0(
|
|
131
|
-
-- session_id TEXT PRIMARY KEY,
|
|
132
|
-
-- embedding float[768] -- nomic-embed-text dim; configurable
|
|
133
|
-
-- );
|
|
134
|
-
|
|
135
|
-
-- ── Schema migrations tracker ─────────────────────────────────────────────
|
|
136
|
-
-- Applied by SQLiteStore.migrate() on daemon start. Tracks which versioned
|
|
137
|
-
-- migration files in daemon/migrations/ have been run on this database.
|
|
138
|
-
CREATE TABLE IF NOT EXISTS schema_migrations (
|
|
139
|
-
version INTEGER PRIMARY KEY,
|
|
140
|
-
name TEXT NOT NULL,
|
|
141
|
-
applied_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
142
|
-
);
|
|
143
|
-
|
|
144
|
-
-- ── Adapter state (per-runtime offsets for resumability) ──────────────────
|
|
145
|
-
CREATE TABLE IF NOT EXISTS adapter_state (
|
|
146
|
-
adapter_name TEXT NOT NULL, -- e.g. 'claude-code'
|
|
147
|
-
source_path TEXT NOT NULL, -- e.g. ~/.claude/projects/foo/abc123.jsonl
|
|
148
|
-
last_offset INTEGER NOT NULL DEFAULT 0,
|
|
149
|
-
last_processed_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
150
|
-
PRIMARY KEY (adapter_name, source_path)
|
|
151
|
-
);
|
|
152
|
-
|
|
153
|
-
-- ── Actions (event-sourced action log) ────────────────────────────────────
|
|
154
|
-
-- Universal primitive for every interactive change: alert dismiss/snooze, entity
|
|
155
|
-
-- retire, link supersedes/continues, mark intentional, undo, etc. Append-only.
|
|
156
|
-
-- Computed tables (session_edges, entities.status) become projections of this log.
|
|
157
|
-
-- Same schema across web UI, MCP tool calls, CLI, future mobile/api.
|
|
158
|
-
CREATE TABLE IF NOT EXISTS actions (
|
|
159
|
-
id TEXT PRIMARY KEY, -- act_<iso-ts>_<short-uuid>
|
|
160
|
-
timestamp TEXT NOT NULL, -- ISO 8601 — when the action was taken
|
|
161
|
-
kind TEXT NOT NULL, -- dismiss | snooze | retire_entity | label_entity | merge_entity | link_supersedes | link_continues | resolve_open | mark_intentional | undo | sync_localstorage
|
|
162
|
-
subject_type TEXT NOT NULL, -- alert | entity | session | decision | open_question | action
|
|
163
|
-
subject_id TEXT NOT NULL, -- e.g. 'stale_Squarespace', 'sess_002', 'NocoDB'
|
|
164
|
-
payload TEXT, -- JSON: action-specific data (snoozed_until, target_session_id, new_type, ...)
|
|
165
|
-
actor TEXT NOT NULL DEFAULT 'user', -- user | agent:claude-code | agent:hermes | system
|
|
166
|
-
runtime TEXT, -- web-ui | mcp:claude-code | cli | mobile-ios | api
|
|
167
|
-
reverted_by TEXT REFERENCES actions(id), -- the action that undid this one (null = active)
|
|
168
|
-
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
169
|
-
);
|
|
170
|
-
|
|
171
|
-
CREATE INDEX IF NOT EXISTS idx_actions_subject ON actions(subject_type, subject_id);
|
|
172
|
-
CREATE INDEX IF NOT EXISTS idx_actions_kind ON actions(kind);
|
|
173
|
-
CREATE INDEX IF NOT EXISTS idx_actions_timestamp ON actions(timestamp DESC);
|
|
174
|
-
CREATE INDEX IF NOT EXISTS idx_actions_active ON actions(subject_type, subject_id, reverted_by) WHERE reverted_by IS NULL;
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
-- Migration 001: rename entity types and action kind
|
|
2
|
-
--
|
|
3
|
-
-- Renames entity.type values: property→project, person→contact, external→service
|
|
4
|
-
-- (The CHECK constraint on entities.type was already dropped in the initial schema.)
|
|
5
|
-
-- Safe to re-run: CASE expression only matches old values; INSERT OR IGNORE is a no-op
|
|
6
|
-
-- if this migration has already been applied.
|
|
7
|
-
|
|
8
|
-
UPDATE entities
|
|
9
|
-
SET type = CASE
|
|
10
|
-
WHEN type = 'property' THEN 'project'
|
|
11
|
-
WHEN type = 'person' THEN 'contact'
|
|
12
|
-
WHEN type = 'external' THEN 'service'
|
|
13
|
-
ELSE type
|
|
14
|
-
END
|
|
15
|
-
WHERE type IN ('property', 'person', 'external');
|
|
16
|
-
|
|
17
|
-
INSERT OR IGNORE INTO schema_migrations (version, name) VALUES (1, 'entity_type_rename');
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
-- Migration 002: extend adapter_state with columns needed for supersede-on-resume.
|
|
2
|
-
--
|
|
3
|
-
-- file_size — bytes-on-disk at last classification (detects file growth)
|
|
4
|
-
-- session_id — NLE session id produced by the last classification (target for supersede)
|
|
5
|
-
--
|
|
6
|
-
-- last_offset stays for future chunking use (Phase 3+). For supersede-on-resume in
|
|
7
|
-
-- Phase 2, file_size carries the equivalent signal at whole-file granularity.
|
|
8
|
-
|
|
9
|
-
ALTER TABLE adapter_state ADD COLUMN file_size INTEGER;
|
|
10
|
-
ALTER TABLE adapter_state ADD COLUMN session_id TEXT;
|
|
11
|
-
|
|
12
|
-
INSERT OR IGNORE INTO schema_migrations (version, name) VALUES (2, '002_adapter_state_extend');
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
-- Migration 003: session_embeddings virtual table via sqlite-vec.
|
|
2
|
-
--
|
|
3
|
-
-- Requires sqlite-vec loaded at connection time (handled by SQLiteStore.connect()).
|
|
4
|
-
-- 768 dims matches nomic-embed-text, the default embedding model.
|
|
5
|
-
|
|
6
|
-
CREATE VIRTUAL TABLE IF NOT EXISTS session_embeddings USING vec0(
|
|
7
|
-
session_id TEXT PRIMARY KEY,
|
|
8
|
-
embedding float[768]
|
|
9
|
-
);
|
|
10
|
-
|
|
11
|
-
INSERT OR IGNORE INTO schema_migrations (version, name) VALUES (3, '003_session_embeddings');
|
package/migrations/004_facts.sql
DELETED
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
-- Migration 004: facts + fact_embeddings.
|
|
2
|
-
--
|
|
3
|
-
-- Facts are the agent-recall projection of session content: normalized
|
|
4
|
-
-- (subject, predicate, value) triples derived from session classifier output,
|
|
5
|
-
-- supersedence-aware via the tombstone pointer `superseded_by`. See
|
|
6
|
-
-- docs/plans/factstore-design.md.
|
|
7
|
-
--
|
|
8
|
-
-- Phase B.1 creates the tables. Writes start in Phase B.2 (classifier prompt
|
|
9
|
-
-- extension). The fact_embeddings vec0 table is created now so Phase B.3 can
|
|
10
|
-
-- light up semantic fact recall without a second migration.
|
|
11
|
-
|
|
12
|
-
CREATE TABLE IF NOT EXISTS facts (
|
|
13
|
-
id TEXT PRIMARY KEY,
|
|
14
|
-
kind TEXT NOT NULL CHECK (kind IN ('decision', 'open', 'attribute')),
|
|
15
|
-
subject TEXT NOT NULL,
|
|
16
|
-
predicate TEXT NOT NULL,
|
|
17
|
-
value TEXT NOT NULL,
|
|
18
|
-
source_session_id TEXT NOT NULL REFERENCES sessions(id) ON DELETE CASCADE,
|
|
19
|
-
source_quote TEXT,
|
|
20
|
-
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
21
|
-
superseded_by TEXT REFERENCES facts(id) ON DELETE SET NULL,
|
|
22
|
-
confidence REAL NOT NULL CHECK (confidence >= 0.0 AND confidence <= 1.0)
|
|
23
|
-
);
|
|
24
|
-
|
|
25
|
-
-- Hot path: deterministic supersedence collision check on ingest
|
|
26
|
-
-- (subject, predicate) lookups against current rows only.
|
|
27
|
-
CREATE INDEX IF NOT EXISTS idx_facts_subject_predicate_current
|
|
28
|
-
ON facts(subject, predicate)
|
|
29
|
-
WHERE superseded_by IS NULL;
|
|
30
|
-
|
|
31
|
-
-- "What do we know about X?" — subject-only browsing.
|
|
32
|
-
CREATE INDEX IF NOT EXISTS idx_facts_subject_current
|
|
33
|
-
ON facts(subject)
|
|
34
|
-
WHERE superseded_by IS NULL;
|
|
35
|
-
|
|
36
|
-
-- Reverse lookup: which facts came from this session?
|
|
37
|
-
CREATE INDEX IF NOT EXISTS idx_facts_session
|
|
38
|
-
ON facts(source_session_id);
|
|
39
|
-
|
|
40
|
-
-- Semantic recall index (Phase B.3). 768 dims matches nomic-embed-text.
|
|
41
|
-
CREATE VIRTUAL TABLE IF NOT EXISTS fact_embeddings USING vec0(
|
|
42
|
-
fact_id TEXT PRIMARY KEY,
|
|
43
|
-
embedding float[768]
|
|
44
|
-
);
|
|
45
|
-
|
|
46
|
-
INSERT OR IGNORE INTO schema_migrations (version, name) VALUES (4, '004_facts');
|