akm-cli 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CHANGELOG.md +118 -0
  2. package/dist/assets/profiles/default.json +15 -0
  3. package/dist/assets/profiles/graph-refresh.json +13 -0
  4. package/dist/assets/profiles/memory-focus.json +12 -0
  5. package/dist/assets/profiles/quick.json +15 -0
  6. package/dist/assets/profiles/thorough.json +15 -0
  7. package/dist/assets/stash-skeleton/README.md +76 -0
  8. package/dist/assets/tasks/graph-refresh-weekly.yml +10 -0
  9. package/dist/cli.js +8 -3
  10. package/dist/commands/consolidate.js +36 -15
  11. package/dist/commands/extract-prompt.js +14 -1
  12. package/dist/commands/health.js +89 -8
  13. package/dist/commands/improve-cli.js +2 -2
  14. package/dist/commands/improve-profiles.js +13 -59
  15. package/dist/commands/improve-result-file.js +9 -4
  16. package/dist/commands/improve.js +86 -65
  17. package/dist/commands/info.js +23 -28
  18. package/dist/commands/init.js +6 -1
  19. package/dist/commands/{proposal-drain-policies.js → proposal/drain-policies.js} +2 -2
  20. package/dist/commands/{proposal-drain.js → proposal/drain.js} +10 -10
  21. package/dist/commands/show.js +47 -0
  22. package/dist/commands/stash-skeleton.js +78 -0
  23. package/dist/{setup/ripgrep-install.js → core/ripgrep/install.js} +2 -2
  24. package/dist/{setup/ripgrep-resolve.js → core/ripgrep/resolve.js} +2 -2
  25. package/dist/core/stash-meta.js +110 -0
  26. package/dist/indexer/indexer.js +2 -2
  27. package/dist/llm/graph-extract.js +1 -1
  28. package/dist/output/cli-hints.js +2 -2
  29. package/dist/setup/detect.js +27 -0
  30. package/dist/setup/harness-config-import.js +170 -0
  31. package/dist/setup/registry-stash-loader.js +99 -0
  32. package/dist/setup/setup.js +229 -72
  33. package/dist/tasks/backends/launchd.js +1 -1
  34. package/dist/tasks/backends/schtasks.js +1 -1
  35. package/dist/wiki/wiki-templates.js +3 -3
  36. package/dist/wiki/wiki.js +1 -1
  37. package/dist/workflows/authoring.js +1 -1
  38. package/package.json +1 -1
  39. /package/dist/{tasks → assets}/backends/launchd-template.xml +0 -0
  40. /package/dist/{tasks → assets}/backends/schtasks-template.xml +0 -0
  41. /package/dist/{commands → assets}/help/help-accept.md +0 -0
  42. /package/dist/{commands → assets}/help/help-improve.md +0 -0
  43. /package/dist/{commands → assets}/help/help-proposals.md +0 -0
  44. /package/dist/{commands → assets}/help/help-propose.md +0 -0
  45. /package/dist/{commands → assets}/help/help-reject.md +0 -0
  46. /package/dist/{output → assets/hints}/cli-hints-full.md +0 -0
  47. /package/dist/{output → assets/hints}/cli-hints-short.md +0 -0
  48. /package/dist/{llm → assets}/prompts/extract-session.md +0 -0
  49. /package/dist/{llm → assets}/prompts/graph-extract-user-prompt.md +0 -0
  50. /package/dist/{wiki → assets/wiki}/index-template.md +0 -0
  51. /package/dist/{wiki → assets/wiki}/ingest-workflow-template.md +0 -0
  52. /package/dist/{wiki → assets/wiki}/log-template.md +0 -0
  53. /package/dist/{wiki → assets/wiki}/schema-template.md +0 -0
  54. /package/dist/{workflows → assets/workflows}/workflow-template.md +0 -0
package/CHANGELOG.md CHANGED
@@ -4,6 +4,124 @@ All notable changes to this project will be documented in this file.
4
4
 
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
6
6
 
7
+ ## [0.8.2] - 2026-06-05
8
+
9
+ ### Added
10
+
11
+ - **LM Studio auto-detection in setup wizard** — `akm setup` now probes
12
+ `localhost:1234/v1/models` at startup and, when the server is running, pre-fills
13
+ the LLM backend with the active model list, mirroring the existing Ollama detection
14
+ flow (#522).
15
+ - **Agent harness config import** — `akm setup` detects installed AI coding harnesses
16
+ (currently Claude Code and OpenCode) and pre-populates LLM provider, model, and
17
+ base-URL fields from the harness configuration. The importer registry
18
+ (`HARNESS_CONFIG_IMPORTERS`) makes adding future harnesses a single append (#523).
19
+ API key *values* are never read or stored — only the environment variable name is
20
+ imported.
21
+ - **Registry-driven stash selection** — the "Add Sources" step now fetches available
22
+ stashes from the official AKM registry at startup. `DEFAULT_SELECTED_STASH_IDS`
23
+ in `src/setup/registry-stash-loader.ts` is the single edit point for changing
24
+ which stashes are pre-checked. Falls back to a hardcoded list on network error (#520).
25
+ - **`improve.autoAccept.{promoted,validationFailed}` health metrics** — auto-accepted
26
+ proposals that pass the confidence threshold but fail validation (truncated
27
+ description, invalid frontmatter) are now counted as `gateAutoAcceptFailedCount`
28
+ in the improve result envelope and surfaced as `improve.autoAccept.validationFailed`
29
+ in `akm health` reports.
30
+ - **`auto-accept-validation` health advisory** — heuristic advisory that warns when
31
+ `validationFailed > 0` so malformed proposals are visible before they pile up in
32
+ the queue.
33
+
34
+ ### Fixed
35
+
36
+ - **`akm-improve` tasks recorded as failed on budget exhaustion** — the budget
37
+ exhaustion timer called `process.exit(1)`, causing every budget-limited run to be
38
+ recorded as a task failure. Changed to `process.exit(0)`; budget exhaustion is a
39
+ normal exit condition.
40
+ - **`improve_runs.started_at` always equal to `completed_at`** — `writeImproveResultFile`
41
+ was called at end-of-run, so `new Date()` captured the completion time and both
42
+ columns held the same value (649/661 real runs affected, regressed ~May 26).
43
+ `started_at` now uses the timestamp captured at process launch, passed in from the
44
+ CLI entry point. A regex-based fallback decodes the timestamp embedded in the run ID
45
+ for any call site that does not supply an explicit value (#524).
46
+ - **`akm-health-report` task fails on transient DNS errors** — the Discord webhook
47
+ script caught `HTTPError` but not the parent `URLError`, so DNS blips caused the
48
+ task runner to record the health report as failed. `URLError` is now caught and
49
+ logged as a warning with a clean exit.
50
+
51
+ ### Added
52
+
53
+ - **Stash `.meta/` convention** — a stash may carry an optional, human-authored
54
+ `.meta/` directory at its root for orientation: purpose, key assets, conventions,
55
+ and maintainer info. Surface it on demand with `akm show meta` (the working
56
+ stash's `.meta/index.md`), `akm show meta:<name>` (e.g. `.meta/about.md`), or
57
+ scope it to a specific stash with `akm show <origin>//meta[:<name>]`. Because
58
+ `.meta/` is a dot-directory, the indexer already skips it, so these docs never
59
+ pollute search results — they are direct-read on demand. Owners extend the
60
+ convention by dropping new files (`.meta/about.md`, `.meta/conventions.md`,
61
+ `.meta/license`) with no code changes. `akm init` scaffolds a `.meta/index.md`
62
+ template into newly created stashes.
63
+ - **Default stash skeleton** — `akm init` (and `akm setup`) now copies
64
+ `src/assets/stash-skeleton/` into every newly created stash. Currently ships
65
+ a `README.md` covering what the stash contains and how agents use `akm` to
66
+ access assets. Existing files are never overwritten. Add files to
67
+ `src/assets/stash-skeleton/` to extend what ships with a fresh install.
68
+
69
+ ### Improved
70
+
71
+ - **Setup wizard pre-populates from existing config** — on re-run, `akm setup`
72
+ initialises every prompt default from the current saved configuration so users
73
+ only need to change what has actually changed (#519).
74
+ - **Config backup before every setup write** — `backupExistingConfig()` is now called
75
+ before each `saveConfig` in the setup wizard, ensuring the previous config is always
76
+ recoverable if a wizard run is interrupted (#521).
77
+
78
+ ## [0.8.1] - 2026-06-05
79
+
80
+ ### Added
81
+
82
+ - **`graph-refresh` improve profile** — new built-in profile that runs a full-corpus
83
+ graph extraction pass across all stash files (all other improve processes disabled).
84
+ Use `akm improve --profile graph-refresh` for a weekly relationship rebuild.
85
+ Pairs with the new `graph-refresh-weekly` task template (`akm tasks add --template graph-refresh-weekly`).
86
+ - **`session-extraction` health advisory** — new heuristic advisory backed by real
87
+ `akmExtract` outcomes: warns when the session-extraction process ran but produced
88
+ zero proposals across ≥ 5 sessions, or recorded warnings. Replaces the vestigial
89
+ `session-log-failures` warn signal.
90
+ - **`improve.sessionExtraction` health metrics** — `sessionsScanned`, `sessionsExtracted`,
91
+ `sessionsSkipped`, `proposalsCreated`, `warnings`, `durationMs` now tracked and
92
+ visible in `akm health` reports.
93
+
94
+ ### Fixed
95
+
96
+ - **`akm info` indexStats** — `readIndexStats` errors are now surfaced and the resolved
97
+ DB path is passed correctly; `entryCount`, `hasEmbeddings`, and related fields are
98
+ no longer silently empty (#510).
99
+ - **Indexer timing fields** — `embedMs` and `ftsMs` in timing output had their
100
+ operands swapped, producing negative durations. Fixed (#516).
101
+ - **Incremental consolidation gate** — the `volumeTriggered` path bypassed the
102
+ incremental gate introduced in 0.8.0, causing consolidation to run on chunks it
103
+ had already processed in the same run. Fixed.
104
+ - **Improve budget exhaustion** — `improve.lock` was not released after budget
105
+ exhaustion, blocking subsequent runs until the lock TTL expired.
106
+ - **Consolidation chunk retry** — failed chunks are now retried once with a 2 s
107
+ backoff before being recorded as lost, reducing transient LLM errors from
108
+ propagating to `chunksFailed`.
109
+ - **`yieldRate` health metric** — `skippedAborted` refs were incorrectly counted in
110
+ `freshAttempts`, inflating the denominator and underreporting yield rate.
111
+ - **`session-log-failures` advisory** — demoted from `warn` to always `pass`
112
+ (informational only); the advisory was a raw regex counter with no LLM signal,
113
+ producing false positives on normal session content.
114
+
115
+ ### Refactored
116
+
117
+ - All runtime assets consolidated under `src/assets/` with `dist/assets/` mirroring
118
+ the layout exactly. Built-in improve profiles moved from in-source object literals
119
+ to embedded JSON files (`src/assets/profiles/*.json`). The `copy-assets.ts` build
120
+ step now uses a precise `src/assets/**/*` glob instead of a broad catch-all.
121
+ - Vestigial Phase 0 (`getExecutionLogCandidates` / `ERROR_PATTERNS`) removed from
122
+ the improve pipeline. This regex scan collected a metric count but never fed an
123
+ LLM; `akmExtract` (Phase 0.4) is the real session extraction pipeline.
124
+
7
125
  ## [0.8.0] - 2026-05-28
8
126
 
9
127
  ### Performance
@@ -0,0 +1,15 @@
1
+ {
2
+ "description": "Standard improve pass — all sub-processes, markdown asset types.",
3
+ "processes": {
4
+ "reflect": {
5
+ "enabled": true,
6
+ "allowedTypes": ["agent", "command", "knowledge", "lesson", "memory", "skill", "wiki", "workflow"]
7
+ },
8
+ "distill": { "enabled": true, "allowedTypes": ["memory"] },
9
+ "consolidate": { "enabled": true, "allowedTypes": ["memory"] },
10
+ "memoryInference": { "enabled": true },
11
+ "graphExtraction": { "enabled": true },
12
+ "triage": { "enabled": false, "applyMode": "queue", "policy": "personal-stash" }
13
+ },
14
+ "sync": { "enabled": true, "push": true }
15
+ }
@@ -0,0 +1,13 @@
1
+ {
2
+ "description": "Full-corpus graph extraction pass — rebuilds graph relationships across all stash files. Disables all other sub-processes. Use via `akm improve --profile graph-refresh` or the embedded `graph-refresh-weekly` task.",
3
+ "processes": {
4
+ "reflect": { "enabled": false },
5
+ "distill": { "enabled": false },
6
+ "consolidate": { "enabled": false },
7
+ "memoryInference": { "enabled": false },
8
+ "graphExtraction": { "enabled": true, "fullScan": true },
9
+ "extract": { "enabled": false },
10
+ "triage": { "enabled": false }
11
+ },
12
+ "sync": { "enabled": true, "push": true }
13
+ }
@@ -0,0 +1,12 @@
1
+ {
2
+ "description": "Memory and lesson improvement only — no distill, consolidate, or graphExtraction.",
3
+ "processes": {
4
+ "reflect": { "enabled": true, "allowedTypes": ["memory", "lesson"] },
5
+ "distill": { "enabled": false },
6
+ "consolidate": { "enabled": false },
7
+ "memoryInference": { "enabled": true },
8
+ "graphExtraction": { "enabled": false },
9
+ "triage": { "enabled": false }
10
+ },
11
+ "sync": { "enabled": false }
12
+ }
@@ -0,0 +1,15 @@
1
+ {
2
+ "description": "Reflect-only pass — no distill, consolidate, memoryInference, or graphExtraction.",
3
+ "processes": {
4
+ "reflect": {
5
+ "enabled": true,
6
+ "allowedTypes": ["agent", "command", "knowledge", "lesson", "memory", "skill", "wiki", "workflow"]
7
+ },
8
+ "distill": { "enabled": false },
9
+ "consolidate": { "enabled": false },
10
+ "memoryInference": { "enabled": false },
11
+ "graphExtraction": { "enabled": false },
12
+ "triage": { "enabled": false }
13
+ },
14
+ "sync": { "enabled": false }
15
+ }
@@ -0,0 +1,15 @@
1
+ {
2
+ "description": "All sub-processes enabled (currently identical to default; reserved for future divergence).",
3
+ "processes": {
4
+ "reflect": {
5
+ "enabled": true,
6
+ "allowedTypes": ["agent", "command", "knowledge", "lesson", "memory", "skill", "wiki", "workflow"]
7
+ },
8
+ "distill": { "enabled": true, "allowedTypes": ["memory"] },
9
+ "consolidate": { "enabled": true, "allowedTypes": ["memory"] },
10
+ "memoryInference": { "enabled": true },
11
+ "graphExtraction": { "enabled": true },
12
+ "triage": { "enabled": true, "applyMode": "queue" }
13
+ },
14
+ "sync": { "enabled": true, "push": true }
15
+ }
@@ -0,0 +1,76 @@
1
+ # AKM Stash
2
+
3
+ This is an **AKM stash** — a structured knowledge repository that stores reusable
4
+ assets for you and your AI agents. AKM (Agent Knowledge Management) indexes, ranks,
5
+ and surfaces these assets at the right moment during coding sessions, improving
6
+ consistency and reducing repeated context-setting.
7
+
8
+ ## What this stash contains
9
+
10
+ | Directory | Asset type | Purpose |
11
+ |-----------|-----------|---------|
12
+ | `skills/` | Skills | Step-by-step instructions agents follow for specific tasks |
13
+ | `knowledge/` | Knowledge | Reference documents, guides, architecture notes |
14
+ | `memories/` | Memories | Persistent facts and preferences learned over time |
15
+ | `commands/` | Commands | Parameterised prompt templates for common workflows |
16
+ | `agents/` | Agents | Agent definitions with system prompts and tool policies |
17
+ | `workflows/` | Workflows | Multi-step orchestration sequences |
18
+ | `tasks/` | Tasks | Scheduled or on-demand automation tasks |
19
+ | `lessons/` | Lessons | Durable lessons extracted from past sessions |
20
+
21
+ Add your own assets to any of these directories. AKM will index them automatically
22
+ on the next `akm index` run (or when the background improve pipeline picks them up).
23
+
24
+ ## For agents: how to access this stash
25
+
26
+ All assets in this stash are searchable via the `akm` CLI. Use these commands to
27
+ find and read assets during a session:
28
+
29
+ ```sh
30
+ # Find assets relevant to your current task (recommended first step)
31
+ akm curate "<task description including project name>"
32
+
33
+ # Full-text + semantic search
34
+ akm search "<query>"
35
+ akm search "<query>" --type skill
36
+ akm search "<query>" --type knowledge
37
+
38
+ # Show a specific asset by ref
39
+ akm show skill:<name>
40
+ akm show knowledge:<name>
41
+ akm show memory:<name>
42
+ akm show command:<name>
43
+
44
+ # List available assets by type
45
+ akm list --type skill
46
+ akm list --type knowledge
47
+ ```
48
+
49
+ ### Recording feedback and new knowledge
50
+
51
+ ```sh
52
+ # Mark an asset as helpful (improves future rankings)
53
+ akm feedback <ref> --positive
54
+
55
+ # Capture a durable lesson or memory from the current session
56
+ akm remember "<fact or lesson>"
57
+ ```
58
+
59
+ ### Improving and maintaining the stash
60
+
61
+ ```sh
62
+ # Run the self-improvement pipeline (extract, reflect, consolidate)
63
+ akm improve
64
+
65
+ # Check stash health and pipeline metrics
66
+ akm health
67
+
68
+ # Review pending improvement proposals
69
+ akm proposal list
70
+ akm proposal show <id>
71
+ akm proposal accept <id>
72
+ ```
73
+
74
+ ---
75
+
76
+ *Created by `akm init`. See `akm --help` for full command reference.*
@@ -0,0 +1,10 @@
1
+ id: graph-refresh-weekly
2
+ description: >
3
+ Weekly full-corpus graph extraction pass. Rebuilds entity and relation
4
+ indexes across all stash files using the graph-refresh improve profile.
5
+ Complements the per-run improve pipeline which only extracts graph data
6
+ for files touched by actionable refs.
7
+ command: akm improve --profile graph-refresh
8
+ schedule: "0 3 * * 0"
9
+ enabled: false
10
+ timeout: 3600000
package/dist/cli.js CHANGED
@@ -92,8 +92,8 @@ function resolveEventSource() {
92
92
  }
93
93
  import { resolveImproveProfile } from "./commands/improve-profiles";
94
94
  import { akmProposalAccept, akmProposalDiff, akmProposalList, akmProposalReject, akmProposalRevert, akmProposalShow, } from "./commands/proposal";
95
- import { drainProposals } from "./commands/proposal-drain";
96
- import { resolveDrainPolicy } from "./commands/proposal-drain-policies";
95
+ import { drainProposals } from "./commands/proposal/drain";
96
+ import { resolveDrainPolicy } from "./commands/proposal/drain-policies";
97
97
  import { akmPropose } from "./commands/propose";
98
98
  import { akmSearch, parseBeliefFilterMode, parseScopeFilterFlags, parseSearchSource } from "./commands/search";
99
99
  import { checkForUpdate, performUpgrade } from "./commands/self-update";
@@ -107,6 +107,7 @@ import { DEFAULT_CONFIG, loadConfig, loadUserConfig, resolveConfiguredSources, s
107
107
  import { ConfigError, NotFoundError, UsageError } from "./core/errors";
108
108
  import { appendEvent } from "./core/events";
109
109
  import { getCacheDir, getConfigPath, getDbPath, getDefaultStashDir } from "./core/paths";
110
+ import { parseMetaRef } from "./core/stash-meta";
110
111
  import { plainize } from "./core/tty";
111
112
  import { clearLogFile, info, isQuiet, isVerbose, setLogFile, setQuiet, setVerbose, warn } from "./core/warn";
112
113
  import { closeDatabase, openExistingDatabase } from "./indexer/db";
@@ -872,7 +873,11 @@ const showCommand = defineCommand({
872
873
  output("proposal-show", result);
873
874
  return;
874
875
  }
875
- parseAssetRef(args.ref);
876
+ // `[origin//]meta[:name]` targets the stash `.meta/` convention, which is
877
+ // not a typed asset ref — skip ref validation and let akmShowUnified
878
+ // direct-read it. (`parseAssetRef` would reject the non-type `meta`.)
879
+ if (!parseMetaRef(args.ref))
880
+ parseAssetRef(args.ref);
876
881
  // The knowledge-view positional syntax (`akm show knowledge:foo section "Auth"`)
877
882
  // is rewritten to `--akmView` / `--akmHeading` / `--akmStart` / `--akmEnd`
878
883
  // by `normalizeShowArgv` before citty parses argv. We read those values
@@ -170,7 +170,7 @@ export function isHotCapturedMemory(filePath) {
170
170
  return false;
171
171
  }
172
172
  }
173
- export function consolidateGuardStatus(filePath) {
173
+ function consolidateGuardStatus(filePath) {
174
174
  if (!fs.existsSync(filePath))
175
175
  return "missing";
176
176
  let content;
@@ -395,7 +395,7 @@ export function buildChunkPrompt(sourceName, memories, chunkIndex, totalChunks,
395
395
  * trimmed). Empty set on any read/parse error — fail-safe to "annotate
396
396
  * nothing" so the LLM still proposes, just slightly more wastefully.
397
397
  */
398
- export function loadPendingConsolidateProposalHashes(stashDir) {
398
+ function loadPendingConsolidateProposalHashes(stashDir) {
399
399
  const hashes = new Set();
400
400
  try {
401
401
  const pending = listProposals(stashDir, { status: "pending" }).filter((p) => p.source === "consolidate");
@@ -965,7 +965,7 @@ export async function akmConsolidate(opts = {}) {
965
965
  }
966
966
  warn(`[consolidate] chunk ${chunkIdx + 1}/${chunks.length} (${chunk.length} memories) …`);
967
967
  const userPrompt = buildChunkPrompt(sourceName, chunk, chunkIdx, chunks.length, bodyTruncation, pendingProposalBodyHashes);
968
- const raw = await tryLlmFeature("memory_consolidation", config, async () => {
968
+ let raw = await tryLlmFeature("memory_consolidation", config, async () => {
969
969
  if (!llmConfig)
970
970
  return { ok: false, error: "No LLM configured for consolidation" };
971
971
  try {
@@ -985,16 +985,37 @@ export async function akmConsolidate(opts = {}) {
985
985
  }
986
986
  }, { ok: false, error: `chunk ${chunkIdx + 1} failed` });
987
987
  if (!raw.ok) {
988
- warn(raw.error ?? `chunk ${chunkIdx + 1} failed`);
989
- warnings.push(raw.error ?? `chunk ${chunkIdx + 1} failed`);
990
- totalChunksProcessed++;
991
- totalChunksFailed++;
992
- // Account for the chunk's memories under the failed-chunk bucket.
993
- // judgedNoAction does NOT run on this path (it's after the success
994
- // guards) so without this the accounting invariant breaks on every
995
- // chunk-level transport/parse failure.
996
- failedChunkMemories += chunk.length;
997
- continue;
988
+ // Single retry with 2s backoff before recording chunk as lost.
989
+ // Recovers transient Shredder LM Studio timeouts without significantly
990
+ // extending run time. Only marks failed if both attempts fail.
991
+ await new Promise((r) => setTimeout(r, 2_000));
992
+ const retry = await tryLlmFeature("memory_consolidation", config, async () => {
993
+ if (!llmConfig)
994
+ return { ok: false, error: "No LLM configured for consolidation" };
995
+ try {
996
+ const content = await chatCompletion(llmConfig, [
997
+ { role: "system", content: CONSOLIDATE_SYSTEM_PROMPT },
998
+ { role: "user", content: userPrompt },
999
+ ], { responseSchema: CONSOLIDATE_PLAN_JSON_SCHEMA, enableThinking: false });
1000
+ return { ok: true, content };
1001
+ }
1002
+ catch (e) {
1003
+ return { ok: false, error: String(e) };
1004
+ }
1005
+ }, { ok: false, error: `chunk ${chunkIdx + 1} retry failed` });
1006
+ if (!retry.ok) {
1007
+ warn(retry.error ?? `chunk ${chunkIdx + 1} failed after retry`);
1008
+ warnings.push(retry.error ?? `chunk ${chunkIdx + 1} failed after retry`);
1009
+ totalChunksProcessed++;
1010
+ totalChunksFailed++;
1011
+ // Account for the chunk's memories under the failed-chunk bucket.
1012
+ // judgedNoAction does NOT run on this path (it's after the success
1013
+ // guards) so without this the accounting invariant breaks on every
1014
+ // chunk-level transport/parse failure.
1015
+ failedChunkMemories += chunk.length;
1016
+ continue;
1017
+ }
1018
+ raw = retry;
998
1019
  }
999
1020
  if (process.env.AKM_DEBUG_LLM) {
1000
1021
  const preview = (raw.content ?? "").slice(0, 500);
@@ -1903,7 +1924,7 @@ export function normalizeUpdatedField(fm) {
1903
1924
  * Two slugs that normalise to the same string are considered the same asset
1904
1925
  * for dedup purposes even if they don't share an exact ref.
1905
1926
  */
1906
- export function normalizeSlugForDedup(ref) {
1927
+ function normalizeSlugForDedup(ref) {
1907
1928
  const slug = ref.replace(/^[^:]+:/, "");
1908
1929
  const monthRe = /(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)/i;
1909
1930
  const tokens = slug
@@ -1941,7 +1962,7 @@ export function normalizeSlugForDedup(ref) {
1941
1962
  * improve invocation — a different concern from the cross-run content-hash
1942
1963
  * dedup, and cheap (no embeddings, no DB query).
1943
1964
  */
1944
- export async function checkPreEmitDedup(opts) {
1965
+ async function checkPreEmitDedup(opts) {
1945
1966
  const normCandidate = normalizeSlugForDedup(opts.candidateRef);
1946
1967
  // Pending consolidate proposals (slug match) — within the same improve run.
1947
1968
  const pendingConsolidate = listProposals(opts.stashDir, { status: "pending" }).filter((p) => p.source === "consolidate");
@@ -1,7 +1,20 @@
1
1
  // This Source Code Form is subject to the terms of the Mozilla Public
2
2
  // License, v. 2.0. If a copy of the MPL was not distributed with this
3
3
  // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
- import promptTemplate from "../llm/prompts/extract-session.md" with { type: "text" };
4
+ /**
5
+ * Prompt + schema for `akm extract <session>`.
6
+ *
7
+ * Mirrors the REFLECT_JSON_SCHEMA pattern: a strict JSON Schema describing
8
+ * the LLM output, plus a {@link buildExtractPrompt} helper that interpolates
9
+ * session data into the markdown template loaded from
10
+ * `src/assets/prompts/extract-session.md`.
11
+ *
12
+ * The schema is intentionally strict — providers with `supportsJsonSchema:
13
+ * true` enforce shape upstream, so the parser only has to handle the
14
+ * happy path. `additionalProperties: false` means any hallucinated keys
15
+ * the model emits get dropped before we parse.
16
+ */
17
+ import promptTemplate from "../assets/prompts/extract-session.md" with { type: "text" };
5
18
  /**
6
19
  * JSON Schema for the structured extract output. Passed to `chatCompletion`
7
20
  * when the configured LLM connection has `supportsJsonSchema: true`.
@@ -71,9 +71,9 @@ function createUnknownImproveMetrics() {
71
71
  graphExtraction: 0,
72
72
  error: 0,
73
73
  },
74
+ autoAccept: { promoted: 0, validationFailed: 0 },
74
75
  reflectsWithErrorContext: 0,
75
76
  coverageGapCount: 0,
76
- executionLogCandidateCount: 0,
77
77
  evalCasesWritten: 0,
78
78
  deadUrlCount: 0,
79
79
  memorySummary: { eligible: 0, derived: 0 },
@@ -130,6 +130,15 @@ function createUnknownImproveMetrics() {
130
130
  failures: 0,
131
131
  durationMs: 0,
132
132
  },
133
+ sessionExtraction: {
134
+ ran: false,
135
+ sessionsScanned: 0,
136
+ sessionsExtracted: 0,
137
+ sessionsSkipped: 0,
138
+ proposalsCreated: 0,
139
+ warnings: 0,
140
+ durationMs: 0,
141
+ },
133
142
  wallTime: {
134
143
  count: 0,
135
144
  medianMs: 0,
@@ -285,11 +294,11 @@ function projectRunMetrics(result) {
285
294
  }
286
295
  }
287
296
  }
297
+ metrics.autoAccept.promoted += toFiniteNumber(result.gateAutoAcceptedCount);
298
+ metrics.autoAccept.validationFailed += toFiniteNumber(result.gateAutoAcceptFailedCount);
288
299
  metrics.reflectsWithErrorContext += toFiniteNumber(result.reflectsWithErrorContext);
289
300
  if (Array.isArray(result.coverageGaps))
290
301
  metrics.coverageGapCount += result.coverageGaps.length;
291
- if (Array.isArray(result.executionLogCandidates))
292
- metrics.executionLogCandidateCount += result.executionLogCandidates.length;
293
302
  metrics.evalCasesWritten += toFiniteNumber(result.evalCasesWritten);
294
303
  if (Array.isArray(result.deadUrls))
295
304
  metrics.deadUrlCount += result.deadUrls.length;
@@ -385,6 +394,18 @@ function projectRunMetrics(result) {
385
394
  }
386
395
  }
387
396
  metrics.graphExtraction.durationMs += toFiniteNumber(result.graphExtractionDurationMs);
397
+ if (Array.isArray(result.extract)) {
398
+ for (const e of result.extract) {
399
+ metrics.sessionExtraction.sessionsScanned += toFiniteNumber(e.sessionsProcessed);
400
+ metrics.sessionExtraction.sessionsSkipped += toFiniteNumber(e.sessionsSkipped);
401
+ if (Array.isArray(e.sessions)) {
402
+ metrics.sessionExtraction.sessionsExtracted += e.sessions.filter((s) => Array.isArray(s.proposalIds) && s.proposalIds.length > 0).length;
403
+ }
404
+ metrics.sessionExtraction.proposalsCreated += Array.isArray(e.proposals) ? e.proposals.length : 0;
405
+ metrics.sessionExtraction.warnings += Array.isArray(e.warnings) ? e.warnings.length : 0;
406
+ metrics.sessionExtraction.durationMs += toFiniteNumber(e.durationMs);
407
+ }
408
+ }
388
409
  return metrics;
389
410
  }
390
411
  /**
@@ -412,7 +433,9 @@ function finalizeImproveMetrics(metrics) {
412
433
  // `considered`/`written` for totals but are excluded from the rate so
413
434
  // they cannot drag it down. See ImproveHealthMetrics.memoryInference
414
435
  // jsdoc for the rationale.
415
- metrics.memoryInference.freshAttempts = Math.max(0, metrics.memoryInference.yieldEligibleConsidered - metrics.memoryInference.cacheHits);
436
+ metrics.memoryInference.freshAttempts = Math.max(0, metrics.memoryInference.yieldEligibleConsidered -
437
+ metrics.memoryInference.cacheHits -
438
+ metrics.memoryInference.skippedAborted);
416
439
  metrics.memoryInference.yieldRate =
417
440
  metrics.memoryInference.freshAttempts > 0
418
441
  ? roundRate(metrics.memoryInference.yieldEligibleWritten / metrics.memoryInference.freshAttempts)
@@ -423,6 +446,10 @@ function finalizeImproveMetrics(metrics) {
423
446
  metrics.graphExtraction.durationMs > 0;
424
447
  const cacheTotal = metrics.graphExtraction.cacheHits + metrics.graphExtraction.cacheMisses;
425
448
  metrics.graphExtraction.cacheHitRate = cacheTotal > 0 ? roundRate(metrics.graphExtraction.cacheHits / cacheTotal) : 0;
449
+ metrics.sessionExtraction.ran =
450
+ metrics.sessionExtraction.sessionsScanned > 0 ||
451
+ metrics.sessionExtraction.proposalsCreated > 0 ||
452
+ metrics.sessionExtraction.durationMs > 0;
426
453
  }
427
454
  /**
428
455
  * Merge per-row metrics from `src` into accumulator `dst`. All numeric fields
@@ -457,9 +484,10 @@ function mergeImproveMetrics(dst, src) {
457
484
  dst.actions.memoryInference += src.actions.memoryInference;
458
485
  dst.actions.graphExtraction += src.actions.graphExtraction;
459
486
  dst.actions.error += src.actions.error;
487
+ dst.autoAccept.promoted += src.autoAccept.promoted;
488
+ dst.autoAccept.validationFailed += src.autoAccept.validationFailed;
460
489
  dst.reflectsWithErrorContext += src.reflectsWithErrorContext;
461
490
  dst.coverageGapCount += src.coverageGapCount;
462
- dst.executionLogCandidateCount += src.executionLogCandidateCount;
463
491
  dst.evalCasesWritten += src.evalCasesWritten;
464
492
  dst.deadUrlCount += src.deadUrlCount;
465
493
  dst.memorySummary.eligible += src.memorySummary.eligible;
@@ -504,6 +532,12 @@ function mergeImproveMetrics(dst, src) {
504
532
  dst.graphExtraction.truncations += src.graphExtraction.truncations;
505
533
  dst.graphExtraction.failures += src.graphExtraction.failures;
506
534
  dst.graphExtraction.durationMs += src.graphExtraction.durationMs;
535
+ dst.sessionExtraction.sessionsScanned += src.sessionExtraction.sessionsScanned;
536
+ dst.sessionExtraction.sessionsExtracted += src.sessionExtraction.sessionsExtracted;
537
+ dst.sessionExtraction.sessionsSkipped += src.sessionExtraction.sessionsSkipped;
538
+ dst.sessionExtraction.proposalsCreated += src.sessionExtraction.proposalsCreated;
539
+ dst.sessionExtraction.warnings += src.sessionExtraction.warnings;
540
+ dst.sessionExtraction.durationMs += src.sessionExtraction.durationMs;
507
541
  }
508
542
  function loadImproveRunRows(db, since, until) {
509
543
  const sql = until
@@ -892,6 +926,10 @@ const INTERESTING_DELTA_PATHS = [
892
926
  "improve.memoryInference.skippedNoFacts",
893
927
  "improve.graphExtraction.cacheHitRate",
894
928
  "improve.graphExtraction.failures",
929
+ "improve.sessionExtraction.sessionsScanned",
930
+ "improve.sessionExtraction.proposalsCreated",
931
+ "improve.autoAccept.promoted",
932
+ "improve.autoAccept.validationFailed",
895
933
  "improve.wallTime.medianMs",
896
934
  "improve.wallTime.p95Ms",
897
935
  ];
@@ -1112,16 +1150,59 @@ export function akmHealth(options = {}) {
1112
1150
  catch {
1113
1151
  sessionLogEntries = [];
1114
1152
  }
1153
+ // session-log-failures: demoted to informational — the ERROR_PATTERNS regex
1154
+ // scans pre-LLM session text and produces false positives on diagnostic
1155
+ // conversation. It does not gate the real extraction pipeline (akmExtract).
1156
+ // Never triggers warn; kept for backward-compat visibility only.
1115
1157
  advisories.push({
1116
1158
  name: "session-log-failures",
1117
1159
  kind: "heuristic",
1118
- status: sessionLogEntries.length === 0 ? "pass" : "warn",
1119
- confidence: sessionLogEntries.length === 0 ? "low" : "medium",
1160
+ status: "pass",
1161
+ confidence: "low",
1120
1162
  message: sessionLogEntries.length === 0
1121
1163
  ? "No repeated external session-log failure patterns were detected."
1122
- : `${sessionLogEntries.length} repeated external session-log failure pattern(s) detected.`,
1164
+ : `${sessionLogEntries.length} raw session-log keyword match(es) detected (pre-LLM, informational only).`,
1123
1165
  evidence: { candidates: sessionLogEntries.slice(0, 5) },
1124
1166
  });
1167
+ const sx = improveSummary.sessionExtraction;
1168
+ const sxWarnReasons = [];
1169
+ if (sx.warnings > 0)
1170
+ sxWarnReasons.push(`${sx.warnings} harness error(s)`);
1171
+ if (sx.ran && sx.sessionsScanned >= 5 && sx.proposalsCreated === 0)
1172
+ sxWarnReasons.push("no proposals generated across scanned sessions");
1173
+ advisories.push({
1174
+ name: "session-extraction",
1175
+ kind: "heuristic",
1176
+ status: sxWarnReasons.length > 0 ? "warn" : "pass",
1177
+ confidence: sx.ran ? "medium" : "low",
1178
+ message: sx.ran
1179
+ ? sxWarnReasons.length > 0
1180
+ ? `Session extraction degraded: ${sxWarnReasons.join("; ")}.`
1181
+ : `Session extraction healthy: ${sx.sessionsScanned} scanned, ${sx.sessionsExtracted} extracted, ${sx.proposalsCreated} proposal(s) created.`
1182
+ : "Session extraction not active (feature disabled or no harness available).",
1183
+ evidence: {
1184
+ ran: sx.ran,
1185
+ sessionsScanned: sx.sessionsScanned,
1186
+ sessionsExtracted: sx.sessionsExtracted,
1187
+ sessionsSkipped: sx.sessionsSkipped,
1188
+ proposalsCreated: sx.proposalsCreated,
1189
+ warnings: sx.warnings,
1190
+ durationMs: sx.durationMs,
1191
+ },
1192
+ });
1193
+ const aa = improveSummary.autoAccept;
1194
+ advisories.push({
1195
+ name: "auto-accept-validation",
1196
+ kind: "heuristic",
1197
+ status: aa.validationFailed > 0 ? "warn" : "pass",
1198
+ confidence: aa.promoted + aa.validationFailed > 0 ? "high" : "low",
1199
+ message: aa.validationFailed > 0
1200
+ ? `${aa.validationFailed} proposal(s) passed confidence threshold but failed auto-accept validation (truncated description, invalid frontmatter, etc.) — they remain in the queue for manual review.`
1201
+ : aa.promoted > 0
1202
+ ? `Auto-accept healthy: ${aa.promoted} proposal(s) promoted, 0 validation failures.`
1203
+ : "Auto-accept gate did not run (disabled or no proposals above threshold).",
1204
+ evidence: { promoted: aa.promoted, validationFailed: aa.validationFailed },
1205
+ });
1125
1206
  const metrics = {
1126
1207
  taskFailRate: roundRate(taskFailRate),
1127
1208
  agentFailureRate: roundRate(agentFailureRate),
@@ -56,7 +56,7 @@ export const improveCommand = defineCommand({
56
56
  },
57
57
  profile: {
58
58
  type: "string",
59
- description: "Named improve profile from profiles.improve or built-in profiles (default, quick, thorough, memory-focus). Controls which sub-processes run and which asset types are processed.",
59
+ description: "Named improve profile from profiles.improve or built-in profiles (default, quick, thorough, memory-focus, graph-refresh). Controls which sub-processes run and which asset types are processed.",
60
60
  },
61
61
  sync: {
62
62
  type: "boolean",
@@ -217,7 +217,7 @@ export const improveCommand = defineCommand({
217
217
  runRecorded = true; // Suppress any late signal-handler write — the success path owns the row now.
218
218
  if (primaryStashDir) {
219
219
  try {
220
- writeImproveResultFile(primaryStashDir, runId, improveResult);
220
+ writeImproveResultFile(primaryStashDir, runId, improveResult, startedAtIso);
221
221
  }
222
222
  catch (err) {
223
223
  // Stderr warning on the failure path is preferable to crashing