npm - llm-wiki-kit - Versions diffs - 0.2.14 → 0.2.16 - Mend

llm-wiki-kit 0.2.14 → 0.2.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +17 -9
package/docs/concepts.md +15 -10
package/docs/integrations/claude-code.md +4 -2
package/docs/integrations/codex.md +6 -4
package/docs/manual.md +55 -7
package/docs/operations.md +2 -2
package/docs/security.md +4 -0
package/docs/troubleshooting.md +2 -2
package/package.json +1 -1
package/src/capture-policy.js +22 -4
package/src/cli.js +52 -2
package/src/constants.js +2 -0
package/src/evidence.js +128 -0
package/src/hook.js +18 -4
package/src/maintenance.js +142 -22
package/src/project-state.js +9 -2
package/src/project.js +5 -2
package/src/templates.js +31 -10
package/src/wiki-eval.js +110 -0
package/src/wiki-export.js +214 -0
package/src/wiki-lint.js +66 -1
package/src/wiki-model.js +2 -0
package/src/wiki-search.js +79 -13
package/src/wiki-visibility.js +34 -4

package/README.md CHANGED Viewed

@@ -85,7 +85,9 @@ llm-wiki/
 ├── outputs/
 │   ├── questions/
 │   ├── reports/
+│   ├── exports/
 │   └── maintenance/
+├── evals/
 └── procedures/
 ```
@@ -101,15 +103,16 @@ The installed hooks:
 - automatically choose Korean or English hook guidance from the current user prompt, then fall back to Claude Code `language`, local `CLAUDE.md`/`AGENTS.md`, and English.
 - remove Codex-facing legacy `oh-my-codex:wiki`/`omx_wiki` surfaces at session start so `llm-wiki/` remains the active wiki implementation
 - record small redacted raw event envelopes and per-turn state
-- capture meaningful work and structured decision points, including tool evidence, changed files, and verification notes
-- before compaction, classify the current turn and save a redacted checkpoint only for meaningful work, structured decisions, or explicit durable requests; explicit durable candidates also get a maintenance queue item when no durable wiki update is detected
+- capture meaningful work and structured decision points, including tool evidence, changed files, verification notes, and reusable durable-candidate signals
+- attach safe `evidence_refs` candidates to generated durable candidates when changed files or verification commands are available
+- before compaction, classify the current turn and save a redacted checkpoint only for meaningful work, structured decisions, explicit durable requests, or suggested durable candidates; durable candidates get a maintenance queue item when no durable wiki update is detected
 - after compaction, store the redacted compact summary only; if pre-compact preservation failed, prepare a recovery packet for the next legal model-visible context hook
 - allow tool calls to proceed without secret/PII-based hook blocking
-- update chunked `llm-wiki/outputs/questions/YYYY-MM-DD/live-qa-001.md` style archives only for meaningful work or structured decision turns
+- update chunked `llm-wiki/outputs/questions/YYYY-MM-DD/live-qa-001.md` style archives only for meaningful work, structured decision turns, or hook-suggested durable candidates
 - avoid automatic `wiki/queries/` and `wiki/decisions/` promotion in the default answer-first mode
-- queue durable cleanup candidates only for explicit documentation requests that were not reflected in durable wiki files, or when stale turn state is recovered
+- queue durable cleanup candidates for explicit documentation requests, hook-suggested durable candidates, or recovered stale turn state that were not reflected in durable wiki files
 - recover stale per-turn state into that queue on the next session start or prompt submit when the previous stop hook did not complete
-- nudge the active LLM to fold approved reusable facts into existing wiki pages instead of leaving everything as one-off Q&A
+- nudge the active LLM to fold approved or hook-suggested reusable facts into existing wiki pages instead of leaving everything as one-off Q&A
 - automatically refresh managed rules/templates for older projects when the current runtime starts a session
 If you need to think about saving every answer manually, the setup has failed.
@@ -123,7 +126,7 @@ Most users should not need these during daily Claude Code/Codex work. They exist
 - Install/update: `llm-wiki install`, `llm-wiki update`, `llm-wiki post-update`, `llm-wiki projects`
 - Diagnostics: `llm-wiki doctor`, `llm-wiki status`, `llm-wiki version`
 - Manual: `llm-wiki manual`
-- Agent maintenance helpers: `llm-wiki context`, `llm-wiki lint`, `llm-wiki consolidate`, `llm-wiki maintenance`
+- Agent maintenance helpers: `llm-wiki context`, `llm-wiki lint`, `llm-wiki consolidate`, `llm-wiki maintenance`, `llm-wiki eval`, `llm-wiki export`
 - Live Q&A archive helper: `llm-wiki archive-questions --workspace <project> [--date YYYY-MM-DD] [--dry-run]`
 - Cleanup: `llm-wiki uninstall`
@@ -139,13 +142,17 @@ Installed npm runtimes also perform a cached update notice check from hooks whil
 `llm-wiki post-update --workspace <project>` reapplies the current runtime's hook entries and safe managed template updates without running `npm install -g`. Use `post-update --all --workspace <search-root>` to reapply templates across discovered project roots.
-`llm-wiki context "<query>"` prints the full debug view of the layered context sources used by hooks. Hook injection may render those sources as functional compact context for Codex and Claude, but this CLI stays verbose so maintainers can inspect retrieval, snippets, memory, index, expansion behavior, and context budget metadata. Daily use should rely on hook injection. By default, episodic `wiki/queries/`, `wiki/context/`, and `session-log` pages are excluded from search unless they were promoted with `memory_type: semantic` or `procedural` and `importance >= 4`; use `--include-episodic` only when debugging old automatic records. Archived or superseded pages are hidden unless `--include-archived` is requested, while stale pages remain searchable with lower score.
+`llm-wiki context "<query>"` prints the full debug view of the layered context sources used by hooks. Hook injection may render those sources as functional compact context for Codex and Claude, but this CLI stays verbose so maintainers can inspect retrieval, snippets, memory, index, expansion behavior, context budget metadata, `rankReason`, `matchedFields`, `scoreBreakdown`, `visibilityReason`, and `evidenceRefs`. The text formatter adds a short `why selected` line for each hit; hook compact context deliberately omits that extra detail. Daily use should rely on hook injection. By default, episodic `wiki/queries/`, `wiki/context/`, and `session-log` pages are excluded from search unless they were promoted with `memory_type: semantic` or `procedural` and `importance >= 4`; use `--include-episodic` only when debugging old automatic records. Archived or superseded pages are hidden unless `--include-archived` is requested, while stale pages remain searchable with lower score.
-`llm-wiki lint` checks wiki health and detects outdated managed rules from older kit versions. It also warns when `memory.md` is near budget, wiki page count nears the search cap, hidden episodic/context pages accumulate, or stale/archived pages lack supersession/link discoverability. Agents may use it before/after meaningful wiki maintenance.
+`llm-wiki lint` checks wiki health and detects outdated managed rules from older kit versions. It validates optional `evidence_refs` entries with the prefixes `file:`, `cmd:`, `raw:`, and `url:`; unsafe paths, unsupported prefixes, credential-bearing URLs, command secrets, and secret-like values are reported as errors, while missing local evidence targets are warnings. It also warns when `memory.md` is near budget, wiki page count nears the search cap, hidden episodic/context pages accumulate, or stale/archived pages lack supersession/link discoverability. Agents may use it before/after meaningful wiki maintenance.
 `llm-wiki consolidate` refreshes only generated marker blocks in `wiki/memory.md` and `wiki/index.md`. Generated maps keep durable non-archived pages, hide default episodic records, skip stale/archived/superseded pages, and report those counts in dry-run output. It is an agent maintenance helper, not a command users should run after every turn.
-`llm-wiki maintenance` prints the pending queue and review due status from `llm-wiki/outputs/maintenance/queue.md`. Hooks create only selective candidates; the active agent should merge reusable items into existing durable wiki pages and mark queue items `done` or `skipped` without delaying unrelated user answers. Periodic maintenance is a soft agent-side reminder, not a user command loop.
+`llm-wiki maintenance` prints the queue and review due status from `llm-wiki/outputs/maintenance/queue.md`. Queue states are `pending -> approved -> done` or `skipped`. Use `llm-wiki maintenance --workspace <project> --approve <id> --target <wiki/...md>` when durable promotion is approved, `--done <id> --target <wiki/...md>` after the active agent has merged the fact into a durable page, and `--skip <id> [--note "..."]` for duplicate or non-durable candidates. Approved items are shown before pending items in hook reminders. Periodic maintenance is a soft agent-side reminder, not a user command loop.
+`llm-wiki eval --workspace <project> [--fixture <path>] [--limit 5] [--json]` runs retrieval fixtures from `llm-wiki/evals/retrieval.json` by default. If the fixture is absent, it exits successfully with `no fixture found`. Fixtures list `query`, `expected`, and `unexpected` paths; output reports recall, missed expected hits, unexpected hits, and top hits using the same durable visibility policy as export.
+`llm-wiki export --workspace <project> [--format all|llms|llms-full|json] [--output <dir>] [--dry-run] [--json]` writes durable wiki manifests under `llm-wiki/outputs/exports/` by default. `llms.txt` is an agent onboarding and handoff manifest, not a passive SEO artifact. `llms-full.txt` is a redacted durable context bundle for compaction recovery or handoff. `llm-wiki.json` is a structured manifest for future adapters and eval tooling. Export uses the same durable visibility policy as search/eval and redacts credentials before writing.
 `llm-wiki archive-questions` splits older legacy `llm-wiki/outputs/questions/YYYY-MM-DD-live-qa.md` files into the chunked `llm-wiki/outputs/questions/YYYY-MM-DD/` layout. It preserves the original under `outputs/questions/archive/originals/` with a SHA-256 sidecar and replaces the legacy file with a short pointer stub. Use `--dry-run` first when reviewing a large archive.
@@ -192,6 +199,7 @@ llm-wiki hook claude Stop
 - PreCompact may read a small bounded transcript tail to create a redacted checkpoint, but it does not store the full transcript or raw `transcript_path`.
 - Tool calls are not blocked only because inputs look sensitive.
 - Authentication values such as tokens, passwords, and private keys are redacted before durable summaries are written.
+- Generated exports are redacted and must not contain npm tokens, WinRM credentials, private keys, raw `.env`, or full raw transcripts.
 - Hook payloads are stored only as redacted event envelopes.
 - Phone numbers, emails, dates, and business identifiers are preserved by default so the wiki remains useful for local work.

package/docs/concepts.md CHANGED Viewed

@@ -18,12 +18,14 @@ The important behavior is a loop:
 2. `memory.md`, `index.md`, and relevant wiki context are injected automatically with an answer-first instruction.
 3. The user works normally; no extra command loop is required.
 4. Hooks gather redacted prompt/tool/result summaries.
-5. At stop/session end, hooks append redacted chunked live Q&A only for turns with work evidence or structured decision/debugging conclusions.
-6. Simple answers, status checks, and keyword-only responses stay out of live Q&A and durable wiki by default.
-7. Durable wiki promotion is selective: explicit record/document requests should be handled by the active agent in existing wiki pages; the hook queues review only when such a request was not reflected in durable files.
-8. At the next start/prompt after an abrupt shutdown, hooks can recover stale turn state into `outputs/maintenance/queue.md`.
-9. When reusable knowledge appears, the active Claude Code/Codex agent folds approved facts into existing durable wiki pages instead of leaving everything as one-off Q&A.
-10. Future sessions start from the improved wiki instead of relying on long chat history.
+5. At stop/session end, hooks append redacted chunked live Q&A only for turns with work evidence, structured decision/debugging conclusions, or hook-suggested durable candidates.
+6. When possible, generated candidates carry safe `evidence_refs` such as changed files, verification commands, raw source IDs, or external URLs.
+7. Simple answers, status checks, and keyword-only responses stay out of live Q&A and durable wiki by default.
+8. Durable wiki promotion is selective: explicit record/document requests and hook-suggested durable candidates should be handled by the active agent in existing wiki pages; the hook queues review only when the turn was not reflected in durable files.
+9. At the next start/prompt after an abrupt shutdown, hooks can recover stale turn state into `outputs/maintenance/queue.md`.
+10. When reusable knowledge appears, the active Claude Code/Codex agent folds approved or hook-suggested facts into existing durable wiki pages instead of leaving everything as one-off Q&A.
+11. Export and eval reuse the same durable visibility policy so handoff manifests, retrieval fixtures, and context selection describe the same wiki surface.
+12. Future sessions start from the improved wiki instead of relying on long chat history.
 The kit is a template/runtime repository. It must not centralize project wiki contents.
@@ -40,8 +42,11 @@ The maintenance loop is intentionally layered:
 - `memory.md`: short hot index for current durable facts.
 - `index.md`: broad navigation map.
-- MiniSearch + wikilinks: retrieval over durable `wiki/**/*.md`, with episodic `wiki/queries/`, `wiki/context/`, and `session-log` pages hidden by default unless promoted or `--include-episodic` is requested; archived/superseded pages stay preserved but hidden unless `--include-archived` is requested.
-- `outputs/maintenance/queue.md`: selective reminders for explicit durable requests that need review, plus stale turn recovery.
-- `lint`: finds broken links, stale pages, duplicates, metadata gaps, secret-like content, outdated managed rules, memory/page-count budget pressure, hidden episodic growth, and stale/archived discoverability gaps.
-- `maintenance`: reports `reviewDue` only when periodic thresholds are met; hook reminders are soft and limited to session start/instructions loaded or maintenance-related prompts.
+- MiniSearch + wikilinks: retrieval over durable `wiki/**/*.md`, with episodic `wiki/queries/`, `wiki/context/`, and `session-log` pages hidden by default unless promoted or `--include-episodic` is requested; archived/superseded pages stay preserved but hidden unless `--include-archived` is requested. Verbose context explains `why selected`; hook context stays compact.
+- `evidence_refs`: optional frontmatter that ties durable claims to `file:`, `cmd:`, `raw:`, or `url:` evidence without embedding secrets or raw transcripts.
+- `outputs/maintenance/queue.md`: selective reminders for explicit durable requests, hook-suggested durable candidates, and stale turn recovery that need review. Queue state is `pending`, `approved`, `done`, or `skipped`.
+- `lint`: finds broken links, stale pages, duplicates, metadata gaps, invalid evidence refs, secret-like content, outdated managed rules, memory/page-count budget pressure, hidden episodic growth, and stale/archived discoverability gaps.
+- `maintenance`: reports `reviewDue` only when periodic thresholds are met; hook reminders are soft and limited to session start/instructions loaded or compact prompt-time reminders for maintenance prompts, approved items, durable candidates, stale/recovered items, or review-threshold pressure.
 - `consolidate`: agent helper that refreshes generated blocks in `memory.md` and `index.md` while preserving handwritten notes, keeping default query/context/session pages out of the durable generated maps, and skipping stale/archived/superseded pages.
+- `eval`: checks retrieval fixtures in `llm-wiki/evals/retrieval.json` and reports expected recall, missed expected paths, unexpected hits, and top hits.
+- `export`: writes redacted `llms.txt`, `llms-full.txt`, and `llm-wiki.json` manifests for agent onboarding, handoff, retrieval eval, and external consumption. `llms.txt` is not treated as a passive SEO artifact.

package/docs/integrations/claude-code.md CHANGED Viewed

@@ -42,11 +42,13 @@ when no project `CLAUDE.md` exists. Existing `CLAUDE.md` files are not overwritt
 The hook records redacted turn summaries but does not deny tool calls only because an input looks sensitive. Hook payloads are stored as small redacted event envelopes rather than full transcripts, and context output is redacted field by field before it is returned to Claude Code.
-At `SessionStart`/`InstructionsLoaded`, the hook first attempts a safe managed-template refresh, recovers stale turn state into `outputs/maintenance/queue.md`, performs a cached npm update notice check for npm installs, then injects functional compact context. The context still uses `llm-wiki/wiki/memory.md`, `llm-wiki/wiki/index.md`, relevant wiki/search state, operating rules, maintenance signals, passive runtime update status, and managed-template cleanup notes; the hook formats those signals so they are usable if shown in the Claude Code UI. At `UserPromptSubmit`, it recovers stale turn state, searches wiki pages with MiniSearch or substring fallback, expands one-hop wikilinks, redacts context fields, performs the same cached update notice check, and injects the smallest useful functional compact context set. Update notice cache is scoped by npm command, and maintenance reminders are shown only when the prompt is wiki/maintenance related or matches a queue topic.
+At `SessionStart`/`InstructionsLoaded`, the hook first attempts a safe managed-template refresh, recovers stale turn state into `outputs/maintenance/queue.md`, performs a cached npm update notice check for npm installs, then injects functional compact context. The context still uses `llm-wiki/wiki/memory.md`, `llm-wiki/wiki/index.md`, relevant wiki/search state, operating rules, maintenance signals, passive runtime update status, and managed-template cleanup notes; the hook formats those signals so they are usable if shown in the Claude Code UI. At `UserPromptSubmit`, it recovers stale turn state, searches wiki pages with MiniSearch or substring fallback, expands one-hop wikilinks, redacts context fields, performs the same cached update notice check, and injects the smallest useful functional compact context set. Verbose `llm-wiki context` can explain `why selected`, `rankReason`, `matchedFields`, and `evidenceRefs`, but hook context keeps those details compact. Update notice cache is scoped by npm command, and maintenance reminders are shown for wiki/maintenance prompts, queue topic matches, approved items, durable candidates, stale/recovered items, or review-threshold pressure.
 Hook-visible language is selected from the current user prompt first. Korean prompts get Korean guidance, English prompts get English guidance. If no prompt language is clear, the hook checks Claude Code `settings.json` `language` when it exists, then local `CLAUDE.md`/`AGENTS.md` language signals, then English. The kit does not require Claude Code to expose a language setting.
-`PostToolUse` and `PostToolBatch` record redacted tool summaries in the same turn buffer. `PreCompact` classifies the current turn before compaction: simple turns record only a context note, work-evidence or structured-decision turns write a chunked live Q&A checkpoint, and explicit durable candidates write a maintenance queue item only when no durable wiki update is detected. The checkpoint can include only a bounded redacted transcript tail, never the full raw transcript or raw `transcript_path`. Compaction is not blocked; if checkpoint storage fails, the hook records a compact recovery packet for the next legal context-injection event. `PostCompact` stores the redacted compact summary as a context note and prepares any pending recovery packet without returning model-visible context directly. In the default `answer-first` mode, `SubagentStop` does not create live Q&A, query, decision, or maintenance files. `Stop` and `SessionEnd` append chunked live Q&A only for work-evidence or structured-decision turns and do not auto-create `wiki/queries/` or `wiki/decisions/`. If the user explicitly asked to record or document durable knowledge and no durable wiki update is detected, `Stop`/`SessionEnd` queue a pending maintenance item for agent review. `Stop` and `SessionEnd` then clear the per-session turn buffer; `SubagentStop` does not.
+`PostToolUse` and `PostToolBatch` record redacted tool summaries in the same turn buffer. `PreCompact` classifies the current turn before compaction: simple turns record only a context note, work-evidence, structured-decision, explicit durable, or hook-suggested durable turns write a chunked live Q&A checkpoint, and durable candidates write a maintenance queue item only when no durable wiki update is detected. Queue items may carry safe `evidence_refs` candidates from changed files and verification commands. The checkpoint can include only a bounded redacted transcript tail, never the full raw transcript or raw `transcript_path`. Compaction is not blocked; if checkpoint storage fails, the hook records a compact recovery packet for the next legal context-injection event. `PostCompact` stores the redacted compact summary as a context note and prepares any pending recovery packet without returning model-visible context directly. In the default `answer-first` mode, `SubagentStop` does not create live Q&A, query, decision, or maintenance files. `Stop` and `SessionEnd` append chunked live Q&A only for work-evidence, structured-decision, or durable-candidate turns and do not auto-create `wiki/queries/` or `wiki/decisions/`. If the user explicitly asked to record durable knowledge, or the turn contains reusable architecture/debugging/policy/procedure/decision signals, and no durable wiki update is detected, `Stop`/`SessionEnd` queue a pending maintenance item for agent review. Approved and durable-candidate maintenance items are surfaced as compact soft reminders. `Stop` and `SessionEnd` then clear the per-session turn buffer; `SubagentStop` does not.
+For handoff or retrieval verification, use `llm-wiki export --workspace <project> --format all` and `llm-wiki eval --workspace <project>`. The generated `llms.txt`/`llms-full.txt`/`llm-wiki.json` files are redacted durable manifests, not raw transcripts.
 Set `LLM_WIKI_KIT_AUTO_PROJECT_UPDATE=0` only while diagnosing automatic managed-template refresh behavior.
 Set `LLM_WIKI_KIT_UPDATE_NOTICE=0` only while suppressing the cached passive runtime update status.

package/docs/integrations/codex.md CHANGED Viewed

@@ -30,18 +30,20 @@ Handled events:
 Expected behavior:
 - `SessionStart` first attempts a safe managed-template refresh, removes Codex-facing legacy `oh-my-codex:wiki`/`omx_wiki` surfaces when they reappear, recovers stale turn state into `outputs/maintenance/queue.md`, performs a cached npm update notice check for npm installs, then injects functional compact context. The context still uses `llm-wiki/wiki/memory.md`, `llm-wiki/wiki/index.md`, relevant wiki/search state, operating rules, maintenance signals, passive runtime update status, and managed-template cleanup notes; the hook formats those signals so they are usable if shown in the Codex UI.
-- `UserPromptSubmit` recovers stale turn state, searches project wiki pages with MiniSearch or substring fallback, expands one-hop wikilinks, redacts context fields, performs the same cached update notice check, and injects the smallest useful functional compact context set. Update notice cache is scoped by npm command, and maintenance reminders are shown only when the prompt is wiki/maintenance related or matches a queue topic.
+- `UserPromptSubmit` recovers stale turn state, searches project wiki pages with MiniSearch or substring fallback, expands one-hop wikilinks, redacts context fields, performs the same cached update notice check, and injects the smallest useful functional compact context set. Verbose `llm-wiki context` can explain `why selected`, `rankReason`, `matchedFields`, and `evidenceRefs`, but hook context keeps those details compact. Update notice cache is scoped by npm command, and maintenance reminders are shown for wiki/maintenance prompts, queue topic matches, approved items, durable candidates, stale/recovered items, or review-threshold pressure.
 - Hook-visible language is selected from the current user prompt first. Korean prompts get Korean guidance, English prompts get English guidance. If no prompt language is clear, Codex falls back to local `CLAUDE.md`/`AGENTS.md` language signals, then English.
 - `PreToolUse` records redacted tool summaries without blocking tool calls.
 - `PostToolUse` records redacted tool summaries in a turn buffer.
-- `PreCompact` classifies the current turn before compaction. Simple turns record only a context note; work-evidence or structured-decision turns write a chunked live Q&A checkpoint; explicit durable candidates write a maintenance queue item only when no durable wiki update is detected. The checkpoint can include only a bounded redacted transcript tail, never the full raw transcript or raw `transcript_path`. Compaction is not blocked; if checkpoint storage fails, the hook records a compact recovery packet for the next legal context-injection event.
+- `PreCompact` classifies the current turn before compaction. Simple turns record only a context note; work-evidence, structured-decision, explicit durable, or hook-suggested durable turns write a chunked live Q&A checkpoint; durable candidates write a maintenance queue item only when no durable wiki update is detected. The checkpoint can include only a bounded redacted transcript tail, never the full raw transcript or raw `transcript_path`. Compaction is not blocked; if checkpoint storage fails, the hook records a compact recovery packet for the next legal context-injection event.
 - `PostCompact` stores the redacted compact summary as a context note and prepares any pending compact recovery packet. It does not return `hookSpecificOutput.additionalContext`, because Codex `PostCompact` only supports common output fields.
-- In the default `answer-first` mode, `SubagentStop` does not create live Q&A, query, decision, or maintenance files. `Stop` appends chunked live Q&A only for work-evidence or structured-decision turns and does not auto-create `wiki/queries/` or `wiki/decisions/`.
-- If the user explicitly asked to record or document durable knowledge and no durable wiki update is detected, `Stop` queues a pending maintenance item for agent review.
+- In the default `answer-first` mode, `SubagentStop` does not create live Q&A, query, decision, or maintenance files. `Stop` appends chunked live Q&A only for work-evidence, structured-decision, or durable-candidate turns and does not auto-create `wiki/queries/` or `wiki/decisions/`.
+- If the user explicitly asked to record durable knowledge, or the turn contains reusable architecture/debugging/policy/procedure/decision signals, and no durable wiki update is detected, `Stop` queues a pending maintenance item for agent review. Queue items may carry safe `evidence_refs` candidates from changed files and verification commands. Approved and durable-candidate maintenance items are surfaced as compact soft reminders.
 - `Stop` clears the per-session turn buffer after recording. `SubagentStop` leaves the parent turn buffer available for the final stop event.
 Hook payloads are stored as small redacted event envelopes rather than full transcripts. Context output is also redacted field by field before it is returned to Codex. Functional compact context is a presentation policy, not a feature reduction: Codex still receives the wiki memory, search, maintenance, and passive update signals needed for the hook workflow.
+For handoff or retrieval verification, use `llm-wiki export --workspace <project> --format all` and `llm-wiki eval --workspace <project>`. The generated `llms.txt`/`llms-full.txt`/`llm-wiki.json` files are redacted durable manifests, not raw transcripts.
 Set `LLM_WIKI_KIT_AUTO_PROJECT_UPDATE=0` only while diagnosing automatic managed-template refresh behavior.
 Set `LLM_WIKI_KIT_UPDATE_NOTICE=0` only while suppressing the cached passive runtime update status.
 Set `LLM_WIKI_KIT_CAPTURE_MODE=legacy-eager` only as deprecated compatibility mode for the old eager query/decision capture behavior.

package/docs/manual.md CHANGED Viewed

@@ -55,7 +55,9 @@ llm-wiki/
 ├── outputs/
 │   ├── questions/
 │   ├── reports/
+│   ├── exports/
 │   └── maintenance/
+├── evals/
 └── procedures/
 ```
@@ -69,9 +71,10 @@ Use Codex or Claude Code normally. Installed hooks:
 - select Korean or English hook guidance from the current user prompt and local instruction files;
 - use `wiki/memory.md`, `wiki/index.md`, relevant wiki search, maintenance signals, update notices, and compact recovery packets;
 - record redacted prompt/tool/result summaries in per-turn state;
-- archive only meaningful work turns or structured decision/debugging turns into chunked `outputs/questions/YYYY-MM-DD/live-qa-001.md` files;
+- preserve safe evidence pointers as `evidence_refs` when changed files or verification commands are available;
+- archive only meaningful work turns, structured decision/debugging turns, or hook-suggested durable candidates into chunked `outputs/questions/YYYY-MM-DD/live-qa-001.md` files;
 - avoid automatic `wiki/queries/` and `wiki/decisions/` promotion in the default answer-first mode;
-- queue durable cleanup candidates only for explicit documentation requests that were not reflected in durable wiki files, or when stale turn state is recovered;
+- queue durable cleanup candidates for explicit documentation requests, hook-suggested durable candidates, or recovered stale turn state that were not reflected in durable wiki files;
 - refresh clearly managed rules/templates for older projects at session start;
 - remove legacy Codex-facing `oh-my-codex:wiki`/`omx_wiki` surfaces when they reappear.
@@ -85,7 +88,7 @@ Default:
 LLM_WIKI_KIT_CAPTURE_MODE=answer-first
 ```
-`answer-first` keeps simple Q&A, status checks, and keyword-only replies out of durable wiki and live Q&A by default. It archives work turns with tool evidence, changed-file evidence, verification, or structured `Decision:`/`Root cause:` style conclusions. Explicit durable requests create maintenance queue candidates only when no durable wiki update is detected.
+`answer-first` keeps simple Q&A, status checks, and keyword-only replies out of durable wiki and live Q&A by default. It archives work turns with tool evidence, changed-file evidence, verification, structured `Decision:`/`Root cause:` style conclusions, or reusable durable-candidate signals. Explicit durable requests and hook-suggested durable candidates create maintenance queue candidates only when no durable wiki update is detected.
 Deprecated compatibility mode:
@@ -149,9 +152,11 @@ Most users should not need these during daily coding. They are for install, upda
 - `llm-wiki bootstrap --workspace <project>`: create project-local wiki structure.
 - `llm-wiki migrate --workspace <project>`: copy legacy wiki material into the current layout.
 - `llm-wiki context "<query>" --workspace <project>`: verbose debug view of hook context sources.
+- `llm-wiki eval --workspace <project> [--fixture <path>] [--limit 5] [--json]`: run retrieval fixtures.
+- `llm-wiki export --workspace <project> [--format all|llms|llms-full|json] [--output <dir>] [--dry-run] [--json]`: write durable wiki manifests.
 - `llm-wiki lint --workspace <project>`: wiki health check.
 - `llm-wiki consolidate --workspace <project> [--dry-run]`: refresh generated blocks in `memory.md` and `index.md`.
-- `llm-wiki maintenance --workspace <project> [--json]`: show pending durable cleanup candidates and review health.
+- `llm-wiki maintenance --workspace <project> [--approve <id> --target <wiki/...md> | --done <id> --target <wiki/...md> | --skip <id> [--note "..."]] [--json]`: show or update durable cleanup review state.
 - `llm-wiki archive-questions --workspace <project> [--date YYYY-MM-DD] [--dry-run]`: split old flat live Q&A files into chunks.
 - `llm-wiki uninstall`: remove kit-managed hook entries, leaving project wiki contents intact.
@@ -187,16 +192,56 @@ llm-wiki context "auth architecture" --workspace /path/to/project --include-epis
 llm-wiki context "auth architecture" --workspace /path/to/project --include-archived
 ```
-Default search prioritizes durable semantic/procedural wiki pages. Episodic `wiki/queries/`, `wiki/context/`, and `session-log` pages are hidden unless promoted with durable metadata or explicitly requested. Archived and superseded pages are hidden unless `--include-archived` is used. Stale pages remain searchable with lower score.
+Default search prioritizes durable semantic/procedural wiki pages. Episodic `wiki/queries/`, `wiki/context/`, and `session-log` pages are hidden unless promoted with durable metadata or explicitly requested. Archived and superseded pages are hidden unless `--include-archived` is used. Stale pages remain searchable with lower score. JSON hits include `rankReason`, `visibilityReason`, `evidenceRefs`, `matchedFields`, and `scoreBreakdown`; the text formatter prints `why selected` for maintainers. Hook compact context stays shorter and does not include those debug lines.
+## Evidence, Eval, And Export
+Curated wiki pages may include optional frontmatter:
+```yaml
+evidence_refs:
+  - "file:src/wiki-search.js"
+  - "cmd:node --test"
+  - "raw:source-id"
+  - "url:https://example.com/reference"
+```
+`llm-wiki lint` validates the prefix, safety, and rough reachability of those references. `file:` must be repo-relative, `cmd:` must be a short single-line redacted-safe command, `raw:` should resolve to a raw/source candidate, and `url:` must be `http` or `https` without credentials.
+`llm-wiki eval` reads `llm-wiki/evals/retrieval.json` by default:
+```json
+{
+  "queries": [
+    {
+      "query": "semantic retrieval",
+      "expected": ["wiki/architecture/retrieval.md"],
+      "unexpected": ["wiki/queries/old-auto.md"]
+    }
+  ]
+}
+```
+Missing fixtures exit successfully with `no fixture found`. Present fixtures report expected recall, missed expected paths, unexpected hits, and top hits. Eval and export share the same durable visibility policy so archived/superseded/default episodic pages are treated consistently.
+`llm-wiki export` writes `llms.txt`, `llms-full.txt`, and `llm-wiki.json` under `llm-wiki/outputs/exports/` by default. `llms.txt` is a curated onboarding and handoff manifest for agents and humans, not a passive SEO file. `llms-full.txt` is a bounded redacted context bundle for handoff or compaction recovery. `llm-wiki.json` is the structured manifest for future adapters and eval tooling. `--dry-run` reports planned files without writing them.
 ## Maintenance
-`llm-wiki maintenance` reports pending queue state and review health. It does not merge pages automatically. The active agent should merge reusable items into existing durable pages and mark queue items `done` or `skipped`.
+`llm-wiki maintenance` reports queue state and review health. It does not merge pages automatically. The active agent should merge reusable items into existing durable pages and mark queue items through `pending`, `approved`, `done`, or `skipped`.
+```bash
+llm-wiki maintenance --workspace <project> --approve <id> --target wiki/concepts/topic.md
+llm-wiki maintenance --workspace <project> --done <id> --target wiki/concepts/topic.md
+llm-wiki maintenance --workspace <project> --skip <id> --note "duplicate"
+```
+`approved` means durable promotion is accepted but not yet merged. `done` means the durable page has been updated. `skipped` means the item was duplicate or not reusable enough. Approved reminders are shown before pending reminders.
 Hook reminders are soft:
 - session start and instructions loaded may show a one-item summary;
-- prompt submit shows a reminder only when the prompt is wiki/maintenance-related or matches a queue topic.
+- prompt submit shows one compact reminder for wiki/maintenance prompts, queue topic matches, approved items, durable candidates, stale/recovered items, or review-threshold pressure.
 ## PreCompact
@@ -214,6 +259,7 @@ LLM_WIKI_KIT_PRECOMPACT_ENFORCEMENT=limited
 - Hook payloads are stored as small redacted event envelopes.
 - Tool calls are not blocked only because input looks sensitive.
 - Tokens, passwords, bearer credentials, private keys, and raw `.env` contents are redacted before durable storage.
+- Generated exports are redacted and must not store npm tokens, WinRM credentials, private keys, raw `.env`, or full raw transcripts.
 - Phone numbers, emails, dates, and business identifiers are preserved by default because they can be useful local work context.
 - `llm-wiki lint` reports secret-like wiki content as an error.
@@ -232,6 +278,8 @@ llm-wiki version
 llm-wiki status --workspace /path/to/project
 llm-wiki doctor --workspace /path/to/project
 llm-wiki update --check --workspace /path/to/project
+llm-wiki eval --workspace /path/to/project --json
+llm-wiki export --workspace /path/to/project --format all --dry-run --json
 ```
 Native Windows support claims require a real Windows smoke: install the published package, run `install`, `status`, and `doctor` against a Windows project, inspect `%USERPROFILE%\.codex\hooks.json` and `%USERPROFILE%\.claude\settings.json`, and run hook smoke tests through `llm-wiki.cmd`.

package/docs/operations.md CHANGED Viewed

@@ -146,7 +146,7 @@ After a plain `npm install -g llm-wiki-kit@latest`, existing hooks keep working
 Daily use should be Claude Code/Codex first. The user should not need to run a chain of `llm-wiki` commands while working. Hooks inject context automatically, but the current user answer takes priority over wiki cleanup. The active agent updates durable wiki pages when reusable project knowledge appears and the turn's importance or user consent justifies persistence. Hook context policy is function-first: memory, search, maintenance, and update signals remain available, while user-visible context is formatted as functional compact context instead of a raw dump.
-In the default `LLM_WIKI_KIT_CAPTURE_MODE=answer-first` mode, `Stop` and `SessionEnd` append live Q&A only for meaningful work evidence or structured decision turns. Simple answers, status checks, and keyword-only responses are not archived. Live Q&A uses chunked files under `llm-wiki/outputs/questions/YYYY-MM-DD/` and rolls over by line/byte budget. Hooks do not auto-create `wiki/queries/` or `wiki/decisions/`. If the user explicitly asked for recording/documentation and no durable wiki update is detected, a pending cleanup candidate is written to `llm-wiki/outputs/maintenance/queue.md`. `PreCompact` performs the same answer-first classification before context compaction: simple turns get only a context note, archive-worthy turns get a live Q&A checkpoint, and explicit durable candidates get a checkpoint plus queue item only when needed. If checkpoint storage fails, compaction still proceeds and the hook prepares an important-only compact recovery packet for the next legal context-injection event. `SessionStart` and `UserPromptSubmit` also recover stale per-turn state into the same queue when the previous stop hook did not complete. `SessionStart` injects a one-item queue summary; `UserPromptSubmit` injects a soft reminder only when the prompt is wiki/maintenance related or matches a queue topic. This is a recovery and reminder layer, not a full transcript capture path.
+In the default `LLM_WIKI_KIT_CAPTURE_MODE=answer-first` mode, `Stop` and `SessionEnd` append live Q&A only for meaningful work evidence, structured decision turns, or reusable durable-candidate signals. Simple answers, status checks, and keyword-only responses are not archived. Live Q&A uses chunked files under `llm-wiki/outputs/questions/YYYY-MM-DD/` and rolls over by line/byte budget. Hooks do not auto-create `wiki/queries/` or `wiki/decisions/`. If the user explicitly asked for recording/documentation, or the turn contains reusable architecture/debugging/policy/procedure/decision signals, and no durable wiki update is detected, a pending cleanup candidate is written to `llm-wiki/outputs/maintenance/queue.md`. `PreCompact` performs the same answer-first classification before context compaction: simple turns get only a context note, archive-worthy turns get a live Q&A checkpoint, and durable candidates get a checkpoint plus queue item only when needed. If checkpoint storage fails, compaction still proceeds and the hook prepares an important-only compact recovery packet for the next legal context-injection event. `SessionStart` and `UserPromptSubmit` also recover stale per-turn state into the same queue when the previous stop hook did not complete. `SessionStart` injects a one-item queue summary; `UserPromptSubmit` injects a compact soft reminder when the prompt is wiki/maintenance related, matches a queue topic, has approved or durable-candidate items, or the queue crosses the review threshold. This is a recovery and reminder layer, not a full transcript capture path.
 Use `llm-wiki archive-questions --workspace <project> --dry-run` to review splitting legacy `outputs/questions/YYYY-MM-DD-live-qa.md` files into the chunked layout. Running it without `--dry-run` preserves the original under `outputs/questions/archive/originals/` with a checksum sidecar and replaces the legacy file with a pointer stub.
@@ -211,7 +211,7 @@ Agents may run `consolidate` after meaningful wiki growth. Users should not need
 `llm-wiki maintenance --workspace <project>` prints queue counts, review due status, and the first pending items. It does not merge wiki pages by itself; the active agent should review pending items, update the closest existing durable wiki document, then mark the queue item `done` or `skipped`. Periodic maintenance is an agent-side task, not something users need to run after every turn.
-`llm-wiki maintenance --workspace <project> --json` includes `reviewDue`, `reviewReasons`, `pendingCount`, `stalePendingCount`, `health`, and `recommendedCommands`. Review is due when the last review is older than 14 days, pending queue size reaches 5, stale or result-missing pending items exist, lint has warnings/errors, `memory.md` is near budget, or wiki page count reaches 80% of the search cap. Hook reminders are soft: `SessionStart`/`InstructionsLoaded` may show a short due note, while `UserPromptSubmit` shows it only for wiki/maintenance/cleanup-related prompts. The reminder never blocks the current answer.
+`llm-wiki maintenance --workspace <project> --json` includes `reviewDue`, `reviewReasons`, `pendingCount`, `stalePendingCount`, `health`, and `recommendedCommands`. Review is due when the last review is older than 14 days, pending queue size reaches 5, stale or result-missing pending items exist, lint has warnings/errors, `memory.md` is near budget, or wiki page count reaches 80% of the search cap. Hook reminders are soft: `SessionStart`/`InstructionsLoaded` may show a short due note, while `UserPromptSubmit` shows one compact item for wiki/maintenance prompts, approved items, durable candidates, stale/recovered items, or review-threshold pressure. The reminder never blocks the current answer.
 Recommended agent checklist:

package/docs/security.md CHANGED Viewed

@@ -13,4 +13,8 @@ Before writing durable summaries, the runtime redacts authentication values such
 Manual and hook context output also runs through redaction before returning excerpts or search hits. `llm-wiki lint` reports remaining secret-like wiki content as an error so it can be removed or rewritten before it becomes reusable project memory.
+`evidence_refs` are pointers, not a place to paste secrets or transcripts. `llm-wiki lint` rejects secret-like evidence values, unsafe `file:` paths, credential-bearing `url:` values, unsupported prefixes, and unsafe commands. Missing local `file:` or `raw:` targets are warnings so agents can fix references without losing the surrounding durable note.
+`llm-wiki export` redacts generated `llms.txt`, `llms-full.txt`, and `llm-wiki.json` output. Exports must not contain npm tokens, WinRM credentials, private keys, raw `.env`, or full raw transcripts. `llms.txt` is an agent onboarding/handoff manifest and follows the same durable visibility policy as retrieval eval, so archived/superseded/default episodic pages are excluded by default.
 Hook payloads are stored as small event envelopes, not full raw transcripts. Full transcript capture is intentionally not implemented as a default. `PreCompact` may read a small bounded transcript tail for a redacted checkpoint, but it does not store the raw transcript path or full transcript. If a project needs raw transcript capture, add a project-local policy and a redaction path first.

package/docs/troubleshooting.md CHANGED Viewed

@@ -229,7 +229,7 @@ Check:
 ## Maintenance Queue Is Empty Or Stale
-In the default answer-first mode, `llm-wiki/outputs/maintenance/queue.md` is created only when a user explicitly asked for durable recording/documentation but no durable wiki update was detected, or when `SessionStart`/`UserPromptSubmit` recovers stale per-turn state from a session that did not stop cleanly. It is not expected to grow after every normal `Stop`.
+In the default answer-first mode, `llm-wiki/outputs/maintenance/queue.md` is created when a user explicitly asked for durable recording/documentation, a turn was classified as a hook-suggested durable candidate, or stale per-turn state is recovered, and no durable wiki update was detected. It is not expected to grow after every normal `Stop`.
 Check the queue and health warnings:
@@ -238,7 +238,7 @@ llm-wiki maintenance --workspace /path/to/project
 llm-wiki lint --workspace /path/to/project
 ```
-If the queue is always empty during ordinary Q&A, that is normal. If you expected an explicit documentation request to queue, confirm hooks run and that the turn had a captured `UserPromptSubmit`. If pending items stay around, the active agent should merge reusable content into existing durable wiki pages and mark each item `done` or `skipped` without delaying unrelated answers.
+If the queue is always empty during ordinary Q&A, that is normal. If you expected an explicit documentation request or durable candidate to queue, confirm hooks run and that the turn had a captured `UserPromptSubmit`. If pending items stay around, the active agent should merge reusable content into existing durable wiki pages and mark each item `done` or `skipped` without delaying unrelated answers.
 ## Authentication Values Were Redacted

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "llm-wiki-kit",
-  "version": "0.2.14",
+  "version": "0.2.16",
   "description": "Hook-first living Markdown wiki runtime for Codex and Claude Code with Korean/English prompt-aware guidance.",
   "type": "module",
   "files": [

package/src/capture-policy.js CHANGED Viewed

@@ -116,10 +116,28 @@ export function classifyTurn(entry, eventName = '') {
   const hasVerification = Boolean(verification);
   const hasWorkEvidence = hasWork || hasFiles || hasVerification;
   const durableConclusion = hasDurableConclusion(text);
+  const durableSignal = durableConclusion || hasDurableKeyword(text);
+  const durableUpdated = hasDetectedDurableWikiChange(entry);
   if (hasWorkEvidence) {
+    if (durableUpdated) {
+      return {
+        kind: 'durable-updated',
+        archive: true,
+        suggestDurable: false,
+        queueIfMissingDurable: false,
+      };
+    }
+    if (durableSignal) {
+      return {
+        kind: 'suggest-durable',
+        archive: true,
+        suggestDurable: true,
+        queueIfMissingDurable: true,
+      };
+    }
     return {
-      kind: hasDetectedDurableWikiChange(entry) ? 'durable-updated' : 'work',
+      kind: 'work',
       archive: true,
       suggestDurable: false,
       queueIfMissingDurable: false,
@@ -128,10 +146,10 @@ export function classifyTurn(entry, eventName = '') {
   if (durableConclusion) {
     return {
-      kind: 'decision',
+      kind: 'suggest-durable',
       archive: true,
-      suggestDurable: false,
-      queueIfMissingDurable: false,
+      suggestDurable: true,
+      queueIfMissingDurable: true,
     };
   }

package/src/cli.js CHANGED Viewed

@@ -3,7 +3,7 @@ import { resolve } from 'path';
 import { formatConsolidateResult, runConsolidate } from './consolidate.js';
 import { handleHook } from './hook.js';
 import { install, status, uninstall } from './install.js';
-import { formatMaintenanceResult, maintenanceSummary } from './maintenance.js';
+import { formatMaintenanceResult, maintenanceSummary, updateMaintenanceItem } from './maintenance.js';
 import { bootstrapProject } from './project.js';
 import { inspectProjectState } from './project-state.js';
 import { commandForProject, knownProjectRoots, recordProject } from './projects.js';
@@ -11,6 +11,8 @@ import { formatDoctor, runDoctor } from './doctor.js';
 import { migrate } from './migrate.js';
 import { postUpdate, update } from './update.js';
 import { buildContextPack, formatContextPack } from './wiki-search.js';
+import { formatEvalResult, runEval } from './wiki-eval.js';
+import { formatExportResult, runExport } from './wiki-export.js';
 import { formatLintResult, runLint } from './wiki-lint.js';
 import { archiveQuestions, formatArchiveQuestionsResult } from './live-qa.js';
@@ -33,6 +35,30 @@ function parseOptions(args) {
     } else if (arg === '--to') {
       options.to = optionValue(arg, i);
       i += 1;
+    } else if (arg === '--fixture') {
+      options.fixture = optionValue(arg, i);
+      i += 1;
+    } else if (arg === '--format') {
+      options.format = optionValue(arg, i);
+      i += 1;
+    } else if (arg === '--output') {
+      options.output = resolve(optionValue(arg, i));
+      i += 1;
+    } else if (arg === '--target') {
+      options.target = optionValue(arg, i);
+      i += 1;
+    } else if (arg === '--note') {
+      options.note = optionValue(arg, i);
+      i += 1;
+    } else if (arg === '--approve') {
+      options.approve = optionValue(arg, i);
+      i += 1;
+    } else if (arg === '--done') {
+      options.done = optionValue(arg, i);
+      i += 1;
+    } else if (arg === '--skip') {
+      options.skip = optionValue(arg, i);
+      i += 1;
     } else if (arg === '--date') {
       const value = optionValue(arg, i);
       if (!/^\d{4}-\d{2}-\d{2}$/.test(value)) {
@@ -127,9 +153,11 @@ Usage:
   llm-wiki bootstrap --workspace <project>
   llm-wiki migrate --workspace <project>
   llm-wiki context "<query>" --workspace <project> [--limit 5] [--no-expand] [--include-episodic] [--include-archived]
+  llm-wiki eval --workspace <project> [--fixture <path>] [--limit 5] [--json]
+  llm-wiki export --workspace <project> [--format all|llms|llms-full|json] [--output <dir>] [--dry-run] [--json]
   llm-wiki lint --workspace <project>
   llm-wiki consolidate --workspace <project> [--dry-run]
-  llm-wiki maintenance --workspace <project> [--json]
+  llm-wiki maintenance --workspace <project> [--approve <id> --target <wiki/...md> | --done <id> --target <wiki/...md> | --skip <id> [--note "..."]] [--json]
   llm-wiki archive-questions --workspace <project> [--date YYYY-MM-DD] [--dry-run] [--json]
 `);
     return;
@@ -229,6 +257,20 @@ Usage:
     return;
   }
+  if (command === 'eval') {
+    const projectRoot = resolve(options.workspace || process.cwd());
+    const result = await runEval(projectRoot, options);
+    printJsonOrText(result, options, formatEvalResult);
+    if (!result.ok) process.exitCode = 1;
+    return;
+  }
+  if (command === 'export') {
+    const projectRoot = resolve(options.workspace || process.cwd());
+    printJsonOrText(await runExport(projectRoot, options), options, formatExportResult);
+    return;
+  }
   if (command === 'lint') {
     const projectRoot = resolve(options.workspace || process.cwd());
     const result = await runLint(projectRoot, options);
@@ -245,6 +287,14 @@ Usage:
   if (command === 'maintenance') {
     const projectRoot = resolve(options.workspace || process.cwd());
+    const actions = [options.approve ? 'approve' : '', options.done ? 'done' : '', options.skip ? 'skip' : ''].filter(Boolean);
+    if (actions.length > 1) throw new Error('maintenance accepts only one of --approve, --done, or --skip');
+    if (actions.length === 1) {
+      const action = actions[0];
+      const id = options.approve || options.done || options.skip;
+      printJsonOrText(await updateMaintenanceItem(projectRoot, id, action, options), options);
+      return;
+    }
     printJsonOrText(await maintenanceSummary(projectRoot, { ...options, includeLint: true }), options, formatMaintenanceResult);
     return;
   }

package/src/constants.js CHANGED Viewed

@@ -52,7 +52,9 @@ export const LLM_WIKI_DIRS = [
   'wiki/queries',
   'outputs/questions',
   'outputs/reports',
+  'outputs/exports',
   'outputs/maintenance',
+  'evals',
   'procedures',
 ];