akm-cli 0.8.0-rc2 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (313) hide show
  1. package/{.github/CHANGELOG.md → CHANGELOG.md} +238 -3
  2. package/README.md +22 -6
  3. package/SECURITY.md +93 -0
  4. package/dist/assets/help/help-accept.md +12 -0
  5. package/dist/assets/help/help-improve.md +81 -0
  6. package/dist/{commands → assets}/help/help-proposals.md +7 -4
  7. package/dist/assets/help/help-reject.md +11 -0
  8. package/dist/{output → assets/hints}/cli-hints-full.md +60 -32
  9. package/dist/{output → assets/hints}/cli-hints-short.md +10 -7
  10. package/dist/assets/profiles/default.json +15 -0
  11. package/dist/assets/profiles/graph-refresh.json +13 -0
  12. package/dist/assets/profiles/memory-focus.json +12 -0
  13. package/dist/assets/profiles/quick.json +15 -0
  14. package/dist/assets/profiles/thorough.json +15 -0
  15. package/dist/assets/prompts/extract-session.md +80 -0
  16. package/dist/assets/prompts/graph-extract-user-prompt.md +35 -0
  17. package/dist/assets/tasks/graph-refresh-weekly.yml +10 -0
  18. package/dist/cli/config-migrate.js +144 -0
  19. package/dist/cli/config-validate.js +39 -0
  20. package/dist/cli/confirm.js +73 -0
  21. package/dist/cli/parse-args.js +93 -3
  22. package/dist/cli/shared.js +129 -0
  23. package/dist/cli.js +2141 -1268
  24. package/dist/commands/add-cli.js +279 -0
  25. package/dist/commands/agent-dispatch.js +20 -12
  26. package/dist/commands/agent-support.js +11 -5
  27. package/dist/commands/completions.js +3 -0
  28. package/dist/commands/config-cli.js +129 -517
  29. package/dist/commands/consolidate.js +1557 -147
  30. package/dist/commands/curate.js +44 -3
  31. package/dist/commands/db-cli.js +23 -0
  32. package/dist/commands/distill-promotion-policy.js +5 -3
  33. package/dist/commands/distill.js +906 -100
  34. package/dist/commands/env.js +213 -0
  35. package/dist/commands/eval-cases.js +3 -0
  36. package/dist/commands/events.js +3 -0
  37. package/dist/commands/extract-cli.js +127 -0
  38. package/dist/commands/extract-prompt.js +217 -0
  39. package/dist/commands/extract.js +477 -0
  40. package/dist/commands/feedback-cli.js +331 -0
  41. package/dist/commands/graph.js +260 -5
  42. package/dist/commands/health.js +1042 -55
  43. package/dist/commands/history.js +51 -16
  44. package/dist/commands/improve-auto-accept.js +97 -0
  45. package/dist/commands/improve-cli.js +236 -0
  46. package/dist/commands/improve-profiles.js +138 -0
  47. package/dist/commands/improve-result-file.js +167 -0
  48. package/dist/commands/improve.js +1736 -346
  49. package/dist/commands/info.js +26 -28
  50. package/dist/commands/init.js +49 -1
  51. package/dist/commands/installed-stashes.js +6 -23
  52. package/dist/commands/knowledge.js +3 -0
  53. package/dist/commands/lint/agent-linter.js +3 -0
  54. package/dist/commands/lint/base-linter.js +199 -5
  55. package/dist/commands/lint/command-linter.js +3 -0
  56. package/dist/commands/lint/default-linter.js +3 -0
  57. package/dist/commands/lint/env-key-rules.js +154 -0
  58. package/dist/commands/lint/index.js +92 -3
  59. package/dist/commands/lint/knowledge-linter.js +3 -0
  60. package/dist/commands/lint/markdown-insertion.js +343 -0
  61. package/dist/commands/lint/memory-linter.js +3 -0
  62. package/dist/commands/lint/registry.js +3 -0
  63. package/dist/commands/lint/skill-linter.js +3 -0
  64. package/dist/commands/lint/task-linter.js +15 -12
  65. package/dist/commands/lint/types.js +3 -0
  66. package/dist/commands/lint/workflow-linter.js +3 -0
  67. package/dist/commands/lint.js +3 -0
  68. package/dist/commands/migration-help.js +5 -2
  69. package/dist/commands/proposal-drain-policies.js +128 -0
  70. package/dist/commands/proposal-drain.js +477 -0
  71. package/dist/commands/proposal.js +60 -6
  72. package/dist/commands/propose.js +24 -19
  73. package/dist/commands/reflect.js +1004 -94
  74. package/dist/commands/registry-cli.js +150 -0
  75. package/dist/commands/registry-search.js +3 -0
  76. package/dist/commands/remember-cli.js +257 -0
  77. package/dist/commands/remember.js +15 -6
  78. package/dist/commands/schema-repair.js +88 -15
  79. package/dist/commands/search.js +99 -14
  80. package/dist/commands/secret.js +173 -0
  81. package/dist/commands/self-update.js +3 -0
  82. package/dist/commands/show.js +32 -13
  83. package/dist/commands/source-add.js +7 -35
  84. package/dist/commands/source-clone.js +3 -0
  85. package/dist/commands/source-manage.js +3 -0
  86. package/dist/commands/tasks.js +161 -95
  87. package/dist/commands/url-checker.js +3 -0
  88. package/dist/core/action-contributors.js +3 -0
  89. package/dist/core/asset-ref.js +13 -2
  90. package/dist/core/asset-registry.js +9 -2
  91. package/dist/core/asset-serialize.js +88 -0
  92. package/dist/core/asset-spec.js +61 -5
  93. package/dist/core/common.js +93 -5
  94. package/dist/core/concurrent.js +3 -0
  95. package/dist/core/config-io.js +347 -0
  96. package/dist/core/config-migration.js +622 -0
  97. package/dist/core/config-schema.js +558 -0
  98. package/dist/core/config-sources.js +108 -0
  99. package/dist/core/config-types.js +4 -0
  100. package/dist/core/config-walker.js +337 -0
  101. package/dist/core/config.js +366 -1077
  102. package/dist/core/errors.js +42 -20
  103. package/dist/core/events.js +31 -25
  104. package/dist/core/file-lock.js +104 -0
  105. package/dist/core/frontmatter.js +75 -10
  106. package/dist/core/lesson-lint.js +3 -0
  107. package/dist/core/markdown.js +3 -0
  108. package/dist/core/memory-belief.js +62 -0
  109. package/dist/core/memory-contradiction-detect.js +274 -0
  110. package/dist/core/memory-improve.js +142 -14
  111. package/dist/core/parse.js +3 -0
  112. package/dist/core/paths.js +218 -50
  113. package/dist/core/proposal-quality-validators.js +380 -0
  114. package/dist/core/proposal-validators.js +11 -3
  115. package/dist/core/proposals.js +464 -5
  116. package/dist/core/state-db.js +349 -56
  117. package/dist/core/text-truncation.js +107 -0
  118. package/dist/core/time.js +3 -0
  119. package/dist/core/tty.js +59 -0
  120. package/dist/core/warn.js +7 -2
  121. package/dist/core/write-source.js +12 -0
  122. package/dist/indexer/db-backup.js +391 -0
  123. package/dist/indexer/db-search.js +136 -28
  124. package/dist/indexer/db.js +661 -166
  125. package/dist/indexer/ensure-index.js +3 -0
  126. package/dist/indexer/file-context.js +3 -0
  127. package/dist/indexer/graph-boost.js +162 -40
  128. package/dist/indexer/graph-db.js +241 -51
  129. package/dist/indexer/graph-dedup.js +3 -7
  130. package/dist/indexer/graph-extraction.js +242 -149
  131. package/dist/indexer/index-context.js +3 -9
  132. package/dist/indexer/indexer.js +86 -16
  133. package/dist/indexer/llm-cache.js +24 -19
  134. package/dist/indexer/manifest.js +3 -0
  135. package/dist/indexer/matchers.js +184 -11
  136. package/dist/indexer/memory-inference.js +94 -50
  137. package/dist/indexer/metadata-contributors.js +3 -0
  138. package/dist/indexer/metadata.js +110 -50
  139. package/dist/indexer/path-resolver.js +3 -0
  140. package/dist/indexer/project-context.js +192 -0
  141. package/dist/indexer/ranking-contributors.js +134 -7
  142. package/dist/indexer/ranking.js +8 -1
  143. package/dist/indexer/search-fields.js +5 -9
  144. package/dist/indexer/search-hit-enrichers.js +91 -2
  145. package/dist/indexer/search-source.js +20 -1
  146. package/dist/indexer/semantic-status.js +4 -1
  147. package/dist/indexer/staleness-detect.js +447 -0
  148. package/dist/indexer/usage-events.js +12 -9
  149. package/dist/indexer/walker.js +3 -0
  150. package/dist/integrations/agent/builders.js +135 -0
  151. package/dist/integrations/agent/config.js +121 -401
  152. package/dist/integrations/agent/detect.js +3 -0
  153. package/dist/integrations/agent/index.js +6 -14
  154. package/dist/integrations/agent/model-aliases.js +55 -0
  155. package/dist/integrations/agent/profiles.js +3 -0
  156. package/dist/integrations/agent/prompts.js +137 -8
  157. package/dist/integrations/agent/runner.js +208 -0
  158. package/dist/integrations/agent/sdk-runner.js +8 -2
  159. package/dist/integrations/agent/spawn.js +54 -14
  160. package/dist/integrations/github.js +3 -0
  161. package/dist/integrations/lockfile.js +22 -51
  162. package/dist/integrations/session-logs/index.js +4 -0
  163. package/dist/integrations/session-logs/inline-refs.js +35 -0
  164. package/dist/integrations/session-logs/pre-filter.js +152 -0
  165. package/dist/integrations/session-logs/providers/claude-code.js +226 -0
  166. package/dist/integrations/session-logs/providers/opencode.js +231 -25
  167. package/dist/integrations/session-logs/types.js +3 -0
  168. package/dist/llm/call-ai.js +14 -26
  169. package/dist/llm/client.js +16 -2
  170. package/dist/llm/embedder.js +20 -29
  171. package/dist/llm/embedders/cache.js +3 -7
  172. package/dist/llm/embedders/local.js +42 -1
  173. package/dist/llm/embedders/remote.js +20 -8
  174. package/dist/llm/embedders/types.js +3 -7
  175. package/dist/llm/feature-gate.js +92 -56
  176. package/dist/llm/graph-extract.js +402 -31
  177. package/dist/llm/index-passes.js +44 -29
  178. package/dist/llm/memory-infer.js +30 -2
  179. package/dist/llm/metadata-enhance.js +3 -7
  180. package/dist/output/cli-hints.js +7 -4
  181. package/dist/output/context.js +60 -8
  182. package/dist/output/renderers.js +170 -194
  183. package/dist/output/shapes/curate.js +56 -0
  184. package/dist/output/shapes/distill.js +10 -0
  185. package/dist/output/shapes/env-list.js +19 -0
  186. package/dist/output/shapes/events.js +11 -0
  187. package/dist/output/shapes/helpers.js +424 -0
  188. package/dist/output/shapes/history.js +7 -0
  189. package/dist/output/shapes/passthrough.js +105 -0
  190. package/dist/output/shapes/proposal-accept.js +7 -0
  191. package/dist/output/shapes/proposal-diff.js +7 -0
  192. package/dist/output/shapes/proposal-list.js +7 -0
  193. package/dist/output/shapes/proposal-producer.js +11 -0
  194. package/dist/output/shapes/proposal-reject.js +7 -0
  195. package/dist/output/shapes/proposal-show.js +7 -0
  196. package/dist/output/shapes/registry-search.js +6 -0
  197. package/dist/output/shapes/registry.js +30 -0
  198. package/dist/output/shapes/search.js +6 -0
  199. package/dist/output/shapes/secret-list.js +19 -0
  200. package/dist/output/shapes/show.js +6 -0
  201. package/dist/output/shapes/vault-list.js +19 -0
  202. package/dist/output/shapes.js +51 -549
  203. package/dist/output/text/add.js +6 -0
  204. package/dist/output/text/clone.js +6 -0
  205. package/dist/output/text/config.js +6 -0
  206. package/dist/output/text/curate.js +6 -0
  207. package/dist/output/text/distill.js +7 -0
  208. package/dist/output/text/enable-disable.js +7 -0
  209. package/dist/output/text/events.js +10 -0
  210. package/dist/output/text/feedback.js +6 -0
  211. package/dist/output/text/helpers.js +1059 -0
  212. package/dist/output/text/history.js +7 -0
  213. package/dist/output/text/import.js +6 -0
  214. package/dist/output/text/index.js +6 -0
  215. package/dist/output/text/info.js +6 -0
  216. package/dist/output/text/init.js +6 -0
  217. package/dist/output/text/list.js +6 -0
  218. package/dist/output/text/proposal-producer.js +8 -0
  219. package/dist/output/text/proposal.js +12 -0
  220. package/dist/output/text/registry-commands.js +11 -0
  221. package/dist/output/text/registry.js +30 -0
  222. package/dist/output/text/remember.js +6 -0
  223. package/dist/output/text/remove.js +6 -0
  224. package/dist/output/text/save.js +6 -0
  225. package/dist/output/text/search.js +6 -0
  226. package/dist/output/text/show.js +6 -0
  227. package/dist/output/text/update.js +6 -0
  228. package/dist/output/text/upgrade.js +6 -0
  229. package/dist/output/text/vault.js +16 -0
  230. package/dist/output/text/wiki.js +15 -0
  231. package/dist/output/text/workflow.js +14 -0
  232. package/dist/output/text.js +44 -1329
  233. package/dist/registry/build-index.js +3 -0
  234. package/dist/registry/create-provider-registry.js +3 -0
  235. package/dist/registry/factory.js +4 -1
  236. package/dist/registry/origin-resolve.js +3 -0
  237. package/dist/registry/providers/index.js +3 -0
  238. package/dist/registry/providers/skills-sh.js +11 -2
  239. package/dist/registry/providers/static-index.js +10 -1
  240. package/dist/registry/providers/types.js +3 -24
  241. package/dist/registry/resolve.js +11 -16
  242. package/dist/registry/types.js +3 -0
  243. package/dist/scripts/migrate-storage.js +17767 -0
  244. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +9031 -0
  245. package/dist/scripts/migrations/v16-to-v17.js +141 -0
  246. package/dist/setup/detect.js +3 -0
  247. package/dist/setup/ripgrep-install.js +3 -0
  248. package/dist/setup/ripgrep-resolve.js +3 -0
  249. package/dist/setup/setup.js +306 -67
  250. package/dist/setup/steps.js +3 -15
  251. package/dist/sources/include.js +3 -0
  252. package/dist/sources/provider-factory.js +3 -11
  253. package/dist/sources/provider.js +3 -20
  254. package/dist/sources/providers/filesystem.js +19 -23
  255. package/dist/sources/providers/git.js +171 -21
  256. package/dist/sources/providers/index.js +3 -0
  257. package/dist/sources/providers/install-types.js +3 -13
  258. package/dist/sources/providers/npm.js +3 -4
  259. package/dist/sources/providers/provider-utils.js +3 -0
  260. package/dist/sources/providers/sync-from-ref.js +3 -11
  261. package/dist/sources/providers/tar-utils.js +3 -0
  262. package/dist/sources/providers/website.js +18 -22
  263. package/dist/sources/resolve.js +3 -0
  264. package/dist/sources/types.js +3 -0
  265. package/dist/sources/website-ingest.js +3 -0
  266. package/dist/tasks/backends/cron.js +3 -0
  267. package/dist/tasks/backends/exec-utils.js +3 -0
  268. package/dist/tasks/backends/index.js +3 -11
  269. package/dist/tasks/backends/launchd.js +4 -1
  270. package/dist/tasks/backends/schtasks.js +4 -1
  271. package/dist/tasks/parser.js +51 -38
  272. package/dist/tasks/resolveAkmBin.js +3 -0
  273. package/dist/tasks/runner.js +35 -9
  274. package/dist/tasks/schedule.js +20 -1
  275. package/dist/tasks/schema.js +5 -3
  276. package/dist/tasks/validator.js +6 -3
  277. package/dist/version.js +3 -0
  278. package/dist/wiki/wiki-templates.js +6 -3
  279. package/dist/wiki/wiki.js +4 -1
  280. package/dist/workflows/authoring.js +4 -1
  281. package/dist/workflows/cli.js +3 -0
  282. package/dist/workflows/db.js +140 -10
  283. package/dist/workflows/document-cache.js +3 -10
  284. package/dist/workflows/parser.js +3 -0
  285. package/dist/workflows/renderer.js +3 -0
  286. package/dist/workflows/runs.js +18 -1
  287. package/dist/workflows/schema.js +3 -0
  288. package/dist/workflows/scope-key.js +3 -0
  289. package/dist/workflows/validator.js +5 -9
  290. package/docs/README.md +7 -2
  291. package/docs/data-and-telemetry.md +225 -0
  292. package/docs/migration/release-notes/0.7.5.md +2 -2
  293. package/docs/migration/release-notes/0.8.0.md +57 -5
  294. package/docs/migration/v0.7-to-v0.8.md +1378 -0
  295. package/package.json +28 -11
  296. package/.github/LICENSE +0 -374
  297. package/dist/commands/help/help-accept.md +0 -9
  298. package/dist/commands/help/help-improve.md +0 -53
  299. package/dist/commands/help/help-reject.md +0 -8
  300. package/dist/commands/install-audit.js +0 -385
  301. package/dist/commands/vault.js +0 -310
  302. package/dist/indexer/match-contributors.js +0 -141
  303. package/dist/integrations/agent/pipeline.js +0 -39
  304. package/dist/integrations/agent/runners.js +0 -31
  305. package/dist/llm/prompts/graph-extract-user-prompt.md +0 -12
  306. /package/dist/{tasks → assets}/backends/launchd-template.xml +0 -0
  307. /package/dist/{tasks → assets}/backends/schtasks-template.xml +0 -0
  308. /package/dist/{commands → assets}/help/help-propose.md +0 -0
  309. /package/dist/{wiki → assets/wiki}/index-template.md +0 -0
  310. /package/dist/{wiki → assets/wiki}/ingest-workflow-template.md +0 -0
  311. /package/dist/{wiki → assets/wiki}/log-template.md +0 -0
  312. /package/dist/{wiki → assets/wiki}/schema-template.md +0 -0
  313. /package/dist/{workflows → assets/workflows}/workflow-template.md +0 -0
@@ -1,3 +1,6 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
1
4
  /**
2
5
  * `akm reflect [ref]` — proposal-producing agent command (#226).
3
6
  *
@@ -19,22 +22,29 @@
19
22
  * a committed asset, and the `accept` flow is the bridge.
20
23
  */
21
24
  import fs from "node:fs";
25
+ import os from "node:os";
22
26
  import path from "node:path";
23
27
  import { parseAssetRef } from "../core/asset-ref";
28
+ import { assembleAssetFromString, serializeFrontmatter } from "../core/asset-serialize";
24
29
  import { resolveStashDir } from "../core/common";
30
+ import { loadConfig } from "../core/config";
25
31
  import { ConfigError, UsageError } from "../core/errors";
26
32
  import { appendEvent, readEvents } from "../core/events";
27
33
  import { parseFrontmatter } from "../core/frontmatter";
28
34
  import { lintLessonContent } from "../core/lesson-lint";
29
35
  import { stripMarkdownFences } from "../core/markdown";
30
- import { createProposal } from "../core/proposals";
36
+ import { checkReflectSize } from "../core/proposal-quality-validators";
37
+ import { createProposal, isProposalSkipped, listProposals, } from "../core/proposals";
31
38
  import { lookup } from "../indexer/indexer";
32
39
  import { runAgent, } from "../integrations/agent";
33
40
  import { resolveProcessAgentProfile } from "../integrations/agent/config";
34
- import { runProposalAgentPipeline } from "../integrations/agent/pipeline";
35
- import { buildReflectPrompt, parseAgentProposalPayload } from "../integrations/agent/prompts";
41
+ import { buildReflectPrompt, extractDraftConfidence, parseAgentProposalPayload, } from "../integrations/agent/prompts";
42
+ import { resolveImproveProcessRunnerFromProfile } from "../integrations/agent/runner";
43
+ import { runOpencodeSdk } from "../integrations/agent/sdk-runner";
44
+ import { chatCompletion } from "../llm/client";
45
+ import { isLlmFeatureEnabled } from "../llm/feature-gate";
36
46
  import { baseFailureFields, enoentHintMessage, isEnoentFailure, loadAgentConfigFromDisk, resolveAgentProfile, } from "./agent-support";
37
- import { deriveLessonRef } from "./distill";
47
+ import { deriveLessonRef, runLessonQualityJudge } from "./distill";
38
48
  const MAX_FEEDBACK_LINES = 10;
39
49
  const MAX_GLOBAL_FEEDBACK_LINES = 20;
40
50
  /**
@@ -61,6 +71,102 @@ function readRecentFeedback(ref) {
61
71
  return [];
62
72
  }
63
73
  }
74
+ const MAX_REJECTED_PROPOSALS = 3;
75
+ /**
76
+ * Asset types that reflect is allowed to operate on.
77
+ *
78
+ * Reflect's canonical output shape is `frontmatter + markdown body`. Running it
79
+ * against types whose on-disk form is NOT markdown (executable scripts, vault
80
+ * env files, YAML tasks) blindly prepends `---\n…\n---\n` to the asset and
81
+ * breaks the runtime contract — for example a `.ts` script with a YAML preamble
82
+ * is a TypeScript syntax error.
83
+ *
84
+ * Whitelisting (rather than blacklisting) keeps the door closed by default as
85
+ * new asset types are registered. To allow a custom registered type, extend
86
+ * this set explicitly.
87
+ *
88
+ * Observed regression: proposal `8737ab63` (May 2026) prepended frontmatter to
89
+ * a `.ts` script file via reflect. This whitelist prevents that.
90
+ */
91
+ export const REFLECT_ALLOWED_TYPES = new Set([
92
+ "knowledge",
93
+ "memory",
94
+ "lesson",
95
+ "wiki",
96
+ "skill",
97
+ "agent",
98
+ "command",
99
+ "workflow",
100
+ ]);
101
+ /**
102
+ * Identity / structural frontmatter fields the LLM is NEVER allowed to change.
103
+ *
104
+ * Renaming `name` on a skill silently breaks ref resolution because the ref is
105
+ * derived from the on-disk path. Similar reasoning for `ref`, `id`, `slug`,
106
+ * and `type`. The post-processor below restores any of these fields if the
107
+ * LLM tried to rewrite them.
108
+ *
109
+ * Observed regression: proposal `26941510` (May 2026) renamed
110
+ * `skill:openpalm-stack-diagnostics`'s `name` field to `"diagnostic-checklist"`.
111
+ */
112
+ const PROTECTED_FRONTMATTER_FIELDS = new Set(["name", "ref", "id", "slug", "type"]);
113
+ /**
114
+ * Read the last 1–3 archived rejected proposals for a given ref from the
115
+ * proposal store. Best-effort — returns `[]` when the proposals dir is absent
116
+ * or the ref is undefined. Used to inject Reflexion-style verbal-RL context
117
+ * into the reflect prompt so the agent avoids re-proposing already-refused
118
+ * content (arXiv:2303.11366).
119
+ */
120
+ function readRejectedProposals(stash, ref) {
121
+ if (!ref)
122
+ return [];
123
+ try {
124
+ return listProposals(stash, { ref, status: "rejected", includeArchive: true })
125
+ .sort((a, b) => new Date(b.updatedAt ?? 0).getTime() - new Date(a.updatedAt ?? 0).getTime())
126
+ .slice(0, MAX_REJECTED_PROPOSALS)
127
+ .map((p) => ({
128
+ ref: p.ref,
129
+ reason: p.review?.reason ?? "no reason given",
130
+ contentPreview: p.payload.content.slice(0, 500),
131
+ }));
132
+ }
133
+ catch {
134
+ return [];
135
+ }
136
+ }
137
+ /**
138
+ * Synthesize a tmp draft-file path for the agent/sdk file-write contract.
139
+ *
140
+ * Mirrors `src/commands/propose.ts:163-178` — when the runner is agent-CLI or
141
+ * the OpenCode SDK, we instruct the agent to write the proposal body directly
142
+ * to this file instead of inlining it in JSON on stdout. This bypasses two
143
+ * known failure modes for long assets: (a) ARG_MAX truncation on prompt
144
+ * round-trips through fenced JSON, and (b) embedded-JSON parser brittleness
145
+ * on multi-KB bodies (e.g. the `knowledge:systems/KOKORO_USAGE_GUIDE` 8.4KB
146
+ * payload that produced 4/5 `parse_error` in May 2026 reflect validation).
147
+ *
148
+ * The path lives under {@link os.tmpdir} and embeds the (sanitized) ref +
149
+ * timestamp + random suffix so concurrent reflect calls cannot collide.
150
+ *
151
+ * Returns `undefined` for the LLM HTTP runner — the chat-completion transport
152
+ * has no filesystem access (see warning at `src/llm/call-ai.ts:64-71`).
153
+ */
154
+ function synthesizeReflectDraftPath(ref) {
155
+ const safeRef = (ref ?? "no-ref").replace(/[^a-z0-9_-]/gi, "_");
156
+ const rand = Math.random().toString(36).slice(2, 8);
157
+ return path.join(os.tmpdir(), `akm-reflect-${safeRef}-${Date.now()}-${rand}.md`);
158
+ }
159
+ /**
160
+ * Heuristic check that the agent honoured the file-write contract.
161
+ * The contract instructs the agent to emit a single `DRAFT_WRITTEN` line on
162
+ * stdout when it has finished writing the draft file. Some agents print
163
+ * additional log lines; we match anywhere in the captured stdout.
164
+ */
165
+ function stdoutSignalsDraftWritten(stdout) {
166
+ if (!stdout)
167
+ return false;
168
+ return /\bDRAFT_WRITTEN\b/.test(stdout);
169
+ }
64
170
  /**
65
171
  * Build schema/lint hints for the prompt. For lesson refs, run the lesson
66
172
  * lint over the current content and surface any findings — they are a
@@ -132,20 +238,334 @@ async function readRelatedLessons(stash, ref, parsedRef) {
132
238
  catch {
133
239
  // Best effort only.
134
240
  }
241
+ // R-4 / #373: Filter out lessons with `derived_from_reflect: true` unless
242
+ // independent feedback exists for the skill. This prevents the echo-chamber
243
+ // risk where reflect-output lessons feed back into the next reflect pass as
244
+ // "independent" evidence, amplifying their own prior outputs over time.
245
+ //
246
+ // ExpeL arXiv:2308.10144: rules need differential evidence from independent
247
+ // sources (success vs failure traces). A lesson that only ever appeared from
248
+ // reflect-internal signals has no such differential signal.
249
+ //
250
+ // "Independent feedback" = any usage_events "feedback" events for the skill
251
+ // ref itself, indicating a human or external system rated the skill.
252
+ let hasIndependentFeedback = false;
253
+ try {
254
+ const feedbackEventsForSkill = readEvents({ type: "feedback", ref }).events;
255
+ hasIndependentFeedback = feedbackEventsForSkill.length > 0;
256
+ }
257
+ catch {
258
+ // Best effort — if we can't check, allow all lessons through.
259
+ hasIndependentFeedback = true;
260
+ }
261
+ if (!hasIndependentFeedback) {
262
+ // No independent feedback: exclude all reflect-derived lessons to prevent
263
+ // echo-chamber amplification.
264
+ for (const [lessonRef, lesson] of related.entries()) {
265
+ try {
266
+ const lessonFm = parseFrontmatter(lesson.content);
267
+ if (lessonFm.data.derived_from_reflect === true) {
268
+ related.delete(lessonRef);
269
+ }
270
+ }
271
+ catch {
272
+ // If we can't parse the frontmatter, keep the lesson (safe default).
273
+ }
274
+ }
275
+ }
135
276
  return [...related.values()];
136
277
  }
137
- function fallbackPayloadFromRawContent(stdout, ref) {
278
+ /**
279
+ * Returns true only when `stdout` is a recognised AKM proposal-skip signal.
280
+ *
281
+ * Two accepted forms:
282
+ * 1. Structured JSON: `{ skipped: true }` or `{ reason: "<known-skip-reason>" }`
283
+ * 2. Legacy text: any line matching `/proposal skipped/i`
284
+ *
285
+ * The previous regex `/cooldown/i` was intentionally broadened to avoid
286
+ * false-positives on real agent error messages that incidentally contain the
287
+ * word "cooldown" (e.g. "rate limit cooldown exceeded"). Only the tightly
288
+ * scoped forms above are treated as legitimate skip signals.
289
+ */
290
+ function isStructuredCooldownSignal(stdout) {
291
+ try {
292
+ const parsed = JSON.parse(stdout.trim());
293
+ if (parsed?.skipped === true)
294
+ return true;
295
+ if (typeof parsed?.reason === "string" &&
296
+ ["duplicate_pending", "content_hash_match", "cooldown", "below_threshold"].includes(parsed.reason))
297
+ return true;
298
+ }
299
+ catch {
300
+ // Non-JSON stdout is never a structured cooldown signal.
301
+ }
302
+ // Legacy text signal emitted by older proposal output lines.
303
+ return /proposal skipped/i.test(stdout);
304
+ }
305
+ /**
306
+ * Fallback payload parser for reflect agent stdout (R-6 / #375).
307
+ *
308
+ * When the agent does not emit valid JSON (old-style agents, SDK mode without
309
+ * structured output support), this function attempts to recover a proposal
310
+ * payload from the raw markdown output. The parser is deliberately strict —
311
+ * it requires the content to have a complete proposal structure (frontmatter
312
+ * with required fields or a full heading + body).
313
+ *
314
+ * Strictness rationale: The previous implementation accepted any markdown
315
+ * starting with `#` or `---`, which admitted malformed / hallucinated content
316
+ * as valid proposals. Anthropic agent best practices recommend structured
317
+ * output when the SDK supports it; this tighter fallback is the safety net.
318
+ *
319
+ * When `sdkMode === true`, structured output (tool-call schema) should be used
320
+ * instead of this fallback. That wiring is tracked separately (full SDK
321
+ * structured-output integration); for now this tighter parser applies to all
322
+ * modes and is the primary R-6 deliverable.
323
+ */
324
+ function fallbackPayloadFromRawContent(stdout, ref, sdkMode = false) {
138
325
  if (!ref)
139
326
  return undefined;
140
327
  const trimmed = stripMarkdownFences(stdout).trim();
141
328
  if (!trimmed)
142
329
  return undefined;
143
- if (!looksLikeAssetContent(trimmed))
330
+ const targetType = ref.split(":")[0];
331
+ if (!looksLikeAssetContent(trimmed, sdkMode, targetType))
144
332
  return undefined;
145
333
  return { ref, content: trimmed };
146
334
  }
147
- function looksLikeAssetContent(value) {
148
- return value.startsWith("#") || value.startsWith("---");
335
+ /**
336
+ * Determine whether raw agent output looks like a valid asset payload (R-6 / #375).
337
+ *
338
+ * Tightened from the previous `startsWith("#") || startsWith("---")`:
339
+ *
340
+ * - YAML frontmatter (`---`): must contain a `description:` field (the only
341
+ * required frontmatter key in v1 spec). This eliminates empty `---\n---\n`
342
+ * blocks and pure delimiter sequences as valid payloads.
343
+ * - Heading start (`#`): must have at least 3 non-blank lines after the heading,
344
+ * to ensure there is actual body content and not just a title stub.
345
+ * - In SDK mode (`sdkMode === true`): additionally requires `when_to_use:` for
346
+ * lesson types (full structured output will replace this in a future PR).
347
+ */
348
+ function looksLikeAssetContent(value, sdkMode = false, targetType) {
349
+ if (value.startsWith("---")) {
350
+ // YAML frontmatter must contain at least a description field.
351
+ const fmEnd = value.indexOf("\n---", 4);
352
+ if (fmEnd === -1)
353
+ return false;
354
+ const fmBlock = value.slice(0, fmEnd + 4);
355
+ const hasDescription = /^description\s*:/m.test(fmBlock);
356
+ if (!hasDescription)
357
+ return false;
358
+ // In SDK mode, lesson assets additionally require a when_to_use field.
359
+ // Use the target ref type rather than frontmatter type: (which is non-standard).
360
+ if (sdkMode && targetType === "lesson") {
361
+ return /^when_to_use\s*:/m.test(fmBlock);
362
+ }
363
+ return true;
364
+ }
365
+ if (value.startsWith("#")) {
366
+ // Heading + at least 2 non-blank lines (heading + at least one body line).
367
+ // This rejects pure title stubs (`# Title\n`) but accepts minimal valid content.
368
+ const lines = value.split("\n").filter((l) => l.trim().length > 0);
369
+ return lines.length >= 2;
370
+ }
371
+ return false;
372
+ }
373
+ /**
374
+ * Split a markdown blob into `[frontmatterText, bodyText]`.
375
+ *
376
+ * Returns `[null, raw]` when the blob does not start with a frontmatter block.
377
+ */
378
+ function splitFrontmatter(raw) {
379
+ const m = raw.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n?([\s\S]*)$/);
380
+ if (!m)
381
+ return { fmText: null, body: raw };
382
+ return { fmText: m[1], body: m[2] };
383
+ }
384
+ /**
385
+ * Reflect post-processor — enforces the safety rails described at the top of
386
+ * this file:
387
+ *
388
+ * 1. Restore the source frontmatter so reflect never strips load-bearing
389
+ * `description`, `when_to_use`, `tags`, etc. The LLM is only allowed to
390
+ * change the markdown body. Frontmatter fields proposed by the LLM are
391
+ * treated as a *merge on top* of the source — concrete field renames /
392
+ * identity changes (`name`, `ref`, `id`, `slug`, `type`) are reverted.
393
+ * 2. Reject responses that shrink or expand the body past the configured
394
+ * ratio thresholds, when the source body is large enough to be reliable.
395
+ * 3. Drop any leading `---` frontmatter block the LLM produced inside the
396
+ * body — the prompt asks it to emit body only, and a stray YAML preamble
397
+ * on top of an executable-typed asset is dangerous.
398
+ *
399
+ * Caller branches:
400
+ * - On `reject`: surface as a failure with the reported reason.
401
+ * - Otherwise: substitute `content` (and optional `frontmatter`) into the
402
+ * proposal payload.
403
+ *
404
+ * Source-less / new-asset case (`sourceContent === undefined`): we still strip
405
+ * the LLM's frontmatter block from `content` and re-emit a clean block built
406
+ * from `payload.frontmatter` so identity fields can be enforced. Size guard
407
+ * is skipped because there is no source to compare against.
408
+ */
409
+ function sanitizeReflectPayload(payload, sourceContent, targetRef) {
410
+ const warnings = [];
411
+ const { fmText: sourceFmText, body: sourceBody } = sourceContent
412
+ ? splitFrontmatter(sourceContent)
413
+ : { fmText: null, body: "" };
414
+ const sourceFm = sourceFmText !== null ? parseFrontmatter(sourceContent ?? "").data : {};
415
+ const { fmText: llmFmText, body: rawLlmBody } = splitFrontmatter(payload.content);
416
+ if (llmFmText !== null) {
417
+ warnings.push("LLM emitted frontmatter in content; stripped and merged through identity guard.");
418
+ }
419
+ // Parse the LLM-emitted frontmatter (if any) so we can merge its non-identity
420
+ // keys into the source frontmatter.
421
+ let llmFm = {};
422
+ if (llmFmText !== null) {
423
+ try {
424
+ llmFm = parseFrontmatter(payload.content).data;
425
+ }
426
+ catch {
427
+ llmFm = {};
428
+ }
429
+ }
430
+ // Also accept the explicit `frontmatter` field on the payload.
431
+ if (payload.frontmatter && typeof payload.frontmatter === "object") {
432
+ llmFm = { ...llmFm, ...payload.frontmatter };
433
+ }
434
+ // Strip protected identity fields from any LLM-supplied frontmatter — they
435
+ // must come from the source asset, never from the LLM.
436
+ for (const field of PROTECTED_FRONTMATTER_FIELDS) {
437
+ if (field in llmFm && llmFm[field] !== sourceFm[field]) {
438
+ warnings.push(`LLM attempted to change protected frontmatter field "${field}"; restored from source.`);
439
+ delete llmFm[field];
440
+ }
441
+ }
442
+ // Build the effective frontmatter: source overlaid with sanitized LLM fields.
443
+ // Source fields always win on identity keys.
444
+ const mergedFm = { ...sourceFm, ...llmFm };
445
+ for (const field of PROTECTED_FRONTMATTER_FIELDS) {
446
+ if (field in sourceFm) {
447
+ mergedFm[field] = sourceFm[field];
448
+ }
449
+ }
450
+ const cleanedBody = rawLlmBody.replace(/^\s+/, "");
451
+ // Size guard — only when source body is meaningfully large. The pure
452
+ // predicate lives in `core/proposal-quality-validators` so the same check
453
+ // also runs inside `runProposalValidators` on `proposal accept`.
454
+ const sizeOutcome = checkReflectSize(sourceBody, cleanedBody);
455
+ if (!sizeOutcome.ok) {
456
+ const pct = (sizeOutcome.ratio * 100).toFixed(0);
457
+ const limit = sizeOutcome.code === "EXCESSIVE_SHRINKAGE" ? "minimum 50%" : "maximum 250%";
458
+ const cause = sizeOutcome.code === "EXCESSIVE_SHRINKAGE"
459
+ ? "Concrete content was likely deleted."
460
+ : "Speculative material was likely added.";
461
+ return {
462
+ content: payload.content,
463
+ warnings,
464
+ reject: {
465
+ // Content-policy guard hit (EXCESSIVE_SHRINKAGE / EXCESSIVE_EXPANSION).
466
+ // This is the guard working as designed — the LLM responded fine, we
467
+ // blocked the output. Routed through `content_policy_reject` so the
468
+ // health aggregator can split guard hits out of true LLM faults.
469
+ reason: "content_policy_reject",
470
+ error: `Reflect rejected: ${sizeOutcome.code} — proposed body is ${pct}% of source (${limit}) for ref ${targetRef}. ${cause}`,
471
+ },
472
+ };
473
+ }
474
+ // Reassemble final content: merged frontmatter + cleaned body.
475
+ // When there is no frontmatter at all (no source fm and no LLM fm), emit body
476
+ // only so we don't add a stray `---` to e.g. a script asset that bypassed the
477
+ // type guard via a custom registration.
478
+ const hasFrontmatter = Object.keys(mergedFm).length > 0;
479
+ const reassembled = hasFrontmatter
480
+ ? assembleAssetFromString(serializeFrontmatter(mergedFm), cleanedBody)
481
+ : cleanedBody;
482
+ return {
483
+ content: reassembled,
484
+ ...(hasFrontmatter ? { frontmatter: mergedFm } : {}),
485
+ warnings,
486
+ };
487
+ }
488
+ /**
489
+ * JSON Schema for structured reflect output. Passed to `chatCompletion` when
490
+ * the connection has `supportsJsonSchema: true` so the model returns a strict
491
+ * JSON object matching {@link AgentProposalPayload}.
492
+ */
493
+ export const REFLECT_JSON_SCHEMA = {
494
+ type: "object",
495
+ required: ["ref", "content"],
496
+ additionalProperties: false,
497
+ properties: {
498
+ ref: { type: "string", description: "Asset ref in type:name format (e.g. lesson:my-lesson)." },
499
+ content: { type: "string", description: "Full markdown content for the asset." },
500
+ frontmatter: {
501
+ type: "object",
502
+ description: "Optional frontmatter key-value pairs to merge into the asset.",
503
+ additionalProperties: true,
504
+ },
505
+ // Phase 6A (Advantage D6a): self-reported confidence in [0, 1]. When the
506
+ // LLM is well-calibrated, scores at or above the configured threshold
507
+ // (default 0.8) drive auto-accept in `akm improve`. Out-of-range or
508
+ // non-finite values are clamped/dropped by the parser — the schema keeps
509
+ // the field optional so older agents that don't emit a score still work.
510
+ confidence: {
511
+ type: "number",
512
+ minimum: 0,
513
+ maximum: 1,
514
+ description: "Optional self-reported quality confidence in [0, 1]. Proposals with confidence >= the active threshold (default 0.8) may be auto-accepted by `akm improve`.",
515
+ },
516
+ },
517
+ };
518
+ /** Critique prompt injected between prior draft and refinement request (Self-Refine loop). */
519
+ const REFLECT_CRITIQUE_PROMPT = "Your previous proposal is shown above. Please review it critically and provide an improved version that is more specific, actionable, and avoids any issues with the previous attempt. Return only the improved JSON proposal.";
520
+ /**
521
+ * Run a single reflect iteration directly via the LLM API (v2 config path).
522
+ *
523
+ * Returns an {@link AgentRunResult}-shaped object so it can slot into the same
524
+ * dispatch loop as agent-based runners. On success, `stdout` contains the raw
525
+ * LLM response (unparsed JSON or prose). On failure, the error is captured
526
+ * into the result rather than thrown.
527
+ */
528
+ export async function runReflectViaLlm(opts) {
529
+ const start = Date.now();
530
+ const messages = [{ role: "user", content: opts.prompt ?? "" }];
531
+ if (opts.priorDraft !== undefined && opts.iteration > 0) {
532
+ messages.push({ role: "assistant", content: opts.priorDraft });
533
+ messages.push({ role: "user", content: REFLECT_CRITIQUE_PROMPT });
534
+ }
535
+ try {
536
+ let stdout;
537
+ if (opts.chat) {
538
+ // Test seam: injected chat function (two-arg signature, no responseSchema).
539
+ stdout = await opts.chat(opts.connection, messages);
540
+ }
541
+ else {
542
+ // Production path: full chatCompletion with optional structured-output schema
543
+ // and optional hard max_tokens cap (derived from source body size).
544
+ stdout = await chatCompletion(opts.connection, messages, {
545
+ ...(opts.responseSchema !== undefined ? { responseSchema: opts.responseSchema } : {}),
546
+ ...(opts.maxTokens !== undefined ? { maxTokens: opts.maxTokens } : {}),
547
+ });
548
+ }
549
+ return {
550
+ ok: true,
551
+ stdout,
552
+ stderr: "",
553
+ durationMs: Date.now() - start,
554
+ exitCode: 0,
555
+ };
556
+ }
557
+ catch (err) {
558
+ const msg = err instanceof Error ? err.message : String(err);
559
+ return {
560
+ ok: false,
561
+ stdout: "",
562
+ stderr: msg,
563
+ durationMs: Date.now() - start,
564
+ exitCode: 1,
565
+ reason: "non_zero_exit",
566
+ error: msg,
567
+ };
568
+ }
149
569
  }
150
570
  function failureEnvelope(result, ref, fallbackReason = "non_zero_exit") {
151
571
  return {
@@ -165,19 +585,67 @@ export async function akmReflect(options = {}) {
165
585
  ...(options.profile ? { profile: options.profile } : {}),
166
586
  },
167
587
  });
588
+ // Fix #3 (observability 0.8.0): every failure path below MUST emit
589
+ // `reflect_completed` so observers can close the invoke/complete loop. The
590
+ // three success-side `reflect_completed` emit sites carry rich metadata
591
+ // (qualityRejected, sanitized, proposalId, etc.); the failure-side emits
592
+ // carry `{ok: false, reason}` plus the ref when known. Stable failure
593
+ // reasons line up with `AgentFailureReason`: "parse_error", "non_zero_exit",
594
+ // "cooldown", "timeout", "spawn_failed", "llm_*", plus the synthetic
595
+ // "ref_mismatch" / "enoent" / "draft_missing" subtypes for cases the agent
596
+ // surface conflates as "parse_error". Sub-reasons land in `subreason`.
597
+ const emitReflectFailed = (reason, subreason, ref, extra) => {
598
+ appendEvent({
599
+ eventType: "reflect_completed",
600
+ ...(ref ? { ref } : {}),
601
+ metadata: {
602
+ source: "reflect",
603
+ ok: false,
604
+ reason,
605
+ subreason,
606
+ ...(extra ?? {}),
607
+ },
608
+ });
609
+ };
168
610
  // 2. Resolve target asset content (if a ref is supplied).
169
611
  let assetContent;
170
612
  let parsedRef;
171
613
  if (options.ref) {
172
614
  parsedRef = parseAssetRef(options.ref);
173
- try {
174
- const entry = await lookup(parsedRef);
175
- if (entry?.filePath && fs.existsSync(entry.filePath)) {
176
- assetContent = fs.readFileSync(entry.filePath, "utf8");
177
- }
615
+ // 2a. Type guard — reflect only operates on asset types whose canonical
616
+ // shape is `frontmatter + markdown body`. Refuse non-markdown types
617
+ // (script / vault / task) up-front so reflect never prepends YAML to a
618
+ // `.ts` file or rewrites a `.env` blob as prose. See REFLECT_ALLOWED_TYPES.
619
+ if (!REFLECT_ALLOWED_TYPES.has(parsedRef.type)) {
620
+ // Deterministic type-guard rejection — the LLM is never invoked. Emit
621
+ // with reason `unsupported_type` so the improve loop can route this to
622
+ // the `reflect-skipped` action bucket instead of `reflect-failed`. See
623
+ // `/tmp/akm-health-investigations/metrics-taxonomy-review.md` §1a
624
+ // ("Reflect refused asset type" — ~9% of reflect-failed events).
625
+ emitReflectFailed("unsupported_type", "unsupported_type", options.ref, { type: parsedRef.type });
626
+ return {
627
+ schemaVersion: 1,
628
+ ok: false,
629
+ reason: "unsupported_type",
630
+ error: `Reflect refused: asset type "${parsedRef.type}" is not supported by reflect (only markdown-canonical types are allowed: ${[...REFLECT_ALLOWED_TYPES].sort().join(", ")}). Use \`akm propose\` or edit the file directly.`,
631
+ ref: options.ref,
632
+ exitCode: null,
633
+ };
178
634
  }
179
- catch {
180
- // Index miss is non-fatal the agent can still propose a fresh asset.
635
+ if (options.assetContent !== undefined) {
636
+ // Test seam caller pre-loaded the source content.
637
+ assetContent = options.assetContent;
638
+ }
639
+ else {
640
+ try {
641
+ const entry = await lookup(parsedRef);
642
+ if (entry?.filePath && fs.existsSync(entry.filePath)) {
643
+ assetContent = fs.readFileSync(entry.filePath, "utf8");
644
+ }
645
+ }
646
+ catch {
647
+ // Index miss is non-fatal — the agent can still propose a fresh asset.
648
+ }
181
649
  }
182
650
  }
183
651
  // 3. Resolve agent profile. ConfigError surfaces as a thrown error so the
@@ -188,24 +656,42 @@ export async function akmReflect(options = {}) {
188
656
  // agent config (agent.processes["reflect"]) is picked up automatically.
189
657
  let profile;
190
658
  let resolvedTimeoutMs = options.timeoutMs;
659
+ let runnerSpec;
191
660
  try {
192
661
  if (options.agentProfile) {
193
662
  // Test seam: injected profile bypasses all config.
194
663
  profile = options.agentProfile;
195
664
  }
196
- else if (options.profile) {
197
- // Explicit --profile flag wins over process config.
198
- profile = resolveAgentProfile(options);
665
+ else if (options.runner) {
666
+ // Caller-provided RunnerSpec (used in tests and --dry-run-resolve).
667
+ runnerSpec = options.runner;
199
668
  }
200
669
  else {
201
- // Use per-process config resolution (falls back to agent.default).
202
- const agent = options.agentConfig ?? loadAgentConfigFromDisk();
203
- const processName = options.agentProcess ?? "reflect";
204
- const resolved = resolveProcessAgentProfile(processName, agent);
205
- profile = resolved.profile;
206
- // Only apply process-resolved timeoutMs when caller didn't supply one.
207
- if (resolvedTimeoutMs === undefined) {
208
- resolvedTimeoutMs = resolved.timeoutMs;
670
+ const cfg = options.config ?? loadConfig();
671
+ const reflectProcess = cfg.profiles?.improve?.default?.processes?.reflect;
672
+ // Resolve the runner from the improve profile's reflect entry when present.
673
+ runnerSpec = resolveImproveProcessRunnerFromProfile(reflectProcess, cfg) ?? undefined;
674
+ if (runnerSpec) {
675
+ if (resolvedTimeoutMs === undefined && runnerSpec.timeoutMs !== undefined) {
676
+ resolvedTimeoutMs = runnerSpec.timeoutMs;
677
+ }
678
+ }
679
+ else {
680
+ if (options.profile) {
681
+ // Explicit --profile flag wins over process config.
682
+ profile = resolveAgentProfile(options);
683
+ }
684
+ else {
685
+ // Use per-process config resolution (falls back to defaults.agent).
686
+ const agent = options.agentConfig ?? loadAgentConfigFromDisk();
687
+ const processName = options.agentProcess ?? "reflect";
688
+ const resolved = resolveProcessAgentProfile(processName, agent);
689
+ profile = resolved.profile;
690
+ // Only apply process-resolved timeoutMs when caller didn't supply one.
691
+ if (resolvedTimeoutMs === undefined) {
692
+ resolvedTimeoutMs = resolved.timeoutMs;
693
+ }
694
+ }
209
695
  }
210
696
  }
211
697
  }
@@ -214,106 +700,530 @@ export async function akmReflect(options = {}) {
214
700
  throw err;
215
701
  throw err;
216
702
  }
217
- // 4. Build the prompt.
218
- // Keep reflect on the same captured JSON path the bench harness already
219
- // uses successfully. The draft-file interactive path proved brittle with
220
- // local opencode models and caused proposal generation failures.
703
+ // Ensure profile is set for agent/sdk runners that don't use runnerSpec
704
+ if (!runnerSpec && !profile) {
705
+ const agent = options.agentConfig ?? loadAgentConfigFromDisk();
706
+ profile = resolveAgentProfile({ ...options, agentConfig: agent });
707
+ }
708
+ // Derive a display name for logging — either from the resolved profile or the runnerSpec.
709
+ const resolvedProfileName = profile?.name ??
710
+ (runnerSpec?.kind === "llm"
711
+ ? `llm:${runnerSpec.connection.model}`
712
+ : runnerSpec?.kind !== undefined
713
+ ? `${runnerSpec.kind}:${runnerSpec.profile?.name ?? "unknown"}`
714
+ : "unknown");
715
+ // 4. Build the shared prompt inputs — feedback, hints, lessons, rejected
716
+ // proposals. These are stable across refinement iterations; only the
717
+ // `priorDraft` field changes per-iteration (R-1 / #372).
221
718
  const feedback = readRecentFeedback(options.ref);
222
719
  const schemaHints = buildSchemaHints(parsedRef?.type ?? "", assetContent);
223
720
  const relatedLessons = options.ref && parsedRef ? await readRelatedLessons(stash, options.ref, parsedRef) : [];
224
- const prompt = buildReflectPrompt({
225
- ...(options.ref ? { ref: options.ref } : {}),
226
- ...(parsedRef?.type ? { type: parsedRef.type } : {}),
227
- ...(parsedRef?.name ? { name: parsedRef.name } : {}),
228
- ...(assetContent !== undefined ? { assetContent } : {}),
229
- ...(feedback.length > 0 ? { feedback } : {}),
230
- ...(schemaHints.length > 0 ? { schemaHints } : {}),
231
- ...(relatedLessons.length > 0 ? { relatedLessons } : {}),
232
- ...(options.task ? { task: options.task } : {}),
233
- ...(options.avoidPatterns && options.avoidPatterns.length > 0 ? { avoidPatterns: options.avoidPatterns } : {}),
234
- });
235
- // 5. Spawn the agent.
236
- // Use runProposalAgentPipeline for the shared spawn step, but fall back to
237
- // raw runAgent when a custom spawn function is injected (test seam).
238
- let result;
239
- if (options.runAgentOptions?.spawn) {
240
- // Test seam: use raw runAgent with injected spawn so tests remain deterministic.
241
- const runOptions = {
242
- stdio: "captured",
243
- parseOutput: "text",
244
- ...(resolvedTimeoutMs !== undefined ? { timeoutMs: resolvedTimeoutMs } : {}),
245
- ...(options.runAgentOptions ?? {}),
246
- };
247
- result = await runAgent(profile, prompt, runOptions);
248
- }
249
- else {
250
- // Production path: route through runProposalAgentPipeline (shared logic).
251
- const pipelineResult = await runProposalAgentPipeline({
252
- profile,
253
- prompt,
254
- // reflect always uses captured stdout (no draft file path).
255
- draftFilePath: undefined,
256
- timeoutMs: resolvedTimeoutMs,
257
- });
258
- result = {
259
- ok: pipelineResult.ok,
260
- exitCode: pipelineResult.exitCode,
261
- stdout: pipelineResult.stdout,
262
- stderr: pipelineResult.stderr,
263
- durationMs: pipelineResult.durationMs,
264
- error: pipelineResult.error,
265
- reason: pipelineResult.reason,
266
- };
267
- }
268
- if (!result.ok) {
269
- // B3: ENOENT / not-found gives an actionable hint.
270
- if (isEnoentFailure(result)) {
271
- return { ...failureEnvelope(result, options.ref), error: enoentHintMessage(profile.bin) };
721
+ // Reflexion-style verbal-RL: inject rejected proposals so the agent avoids
722
+ // reproducing proposals that have already been reviewed and refused.
723
+ const rejectedProposals = readRejectedProposals(stash, options.ref);
724
+ // 5. Spawn the agent with optional Self-Refine loop (R-1 / #372).
725
+ //
726
+ // maxRefineIters controls how many agent invocations are made:
727
+ // - 1 (default): single-shot, same as pre-R-1 behaviour
728
+ // - 2–3: on each subsequent pass, the prior draft is injected back into
729
+ // the prompt as Self-Refine critique context (arXiv:2303.17651)
730
+ //
731
+ // The loop exits early when the agent returns the same content as before
732
+ // (no-op refinement) to avoid wasting tokens on identical iterations.
733
+ const MAX_REFINE_ITERS = 3;
734
+ const maxRefineIters = Math.min(Math.max(1, options.maxRefineIters ?? 1), MAX_REFINE_ITERS);
735
+ const agentEnv = options.eventSource === "improve" ? { AKM_EVENT_SOURCE: "improve" } : {};
736
+ // Determine whether this dispatch can honour the file-write contract.
737
+ // Agent CLI + OpenCode SDK runners both have filesystem access; the direct
738
+ // LLM HTTP runner does NOT (see `src/llm/call-ai.ts:64-71`). The v1
739
+ // `profile.sdkMode` fallback also runs the SDK so it counts as file-writable.
740
+ // Test seams (`options.runAgentOptions.spawn`) emulate agent CLI behaviour so
741
+ // they participate as well tests opt out by simply not writing the file.
742
+ const runnerSupportsFileWrite = runnerSpec ? runnerSpec.kind !== "llm" : true;
743
+ // Initialized to a sentinel; always overwritten in the first loop iteration
744
+ // (maxRefineIters is clamped to >= 1 above). TypeScript cannot prove a
745
+ // for-loop always runs at least once, so we use a type assertion here.
746
+ let result = {};
747
+ let priorDraft;
748
+ // Track every draft file path we synthesize so cleanup can remove them on
749
+ // every return path (success and failure). Mirrors propose's unlink pattern
750
+ // in `src/commands/propose.ts:215-226` but generalised to N refinement
751
+ // iterations. Always called via {@link cleanupDrafts} below.
752
+ const draftPathsToCleanup = [];
753
+ // Last iteration's draft path — read back if the agent wrote it.
754
+ let lastDraftPath;
755
+ // Best-effort unlink: tolerate already-deleted files (we may have unlinked
756
+ // an intermediate iteration's draft) and unwritable paths. Never throws —
757
+ // the proposal result is the source of truth for the caller.
758
+ const cleanupDrafts = () => {
759
+ for (const p of draftPathsToCleanup) {
760
+ try {
761
+ if (fs.existsSync(p))
762
+ fs.unlinkSync(p);
763
+ }
764
+ catch {
765
+ // Swallow — cleanup is best-effort.
766
+ }
272
767
  }
273
- return failureEnvelope(result, options.ref);
274
- }
275
- // 6. Resolve the proposal content from stdout JSON.
768
+ };
769
+ // `payload` is populated inside the try (either by reading the draft file
770
+ // or parsing stdout JSON). Hoisted here so the post-try sections (R-3 ref
771
+ // guard, quality gate, sanitizer, createProposal) can use it after the
772
+ // drafts have been cleaned up.
276
773
  let payload;
277
774
  try {
278
- payload = parseAgentProposalPayload(result.stdout ?? "");
279
- }
280
- catch (err) {
281
- const fallback = fallbackPayloadFromRawContent(result.stdout ?? "", options.ref);
282
- if (fallback) {
283
- payload = fallback;
775
+ for (let iter = 0; iter < maxRefineIters; iter++) {
776
+ // Synthesize a fresh tmp path per iteration so refinement passes never
777
+ // clobber an earlier draft (and so reading back is unambiguous).
778
+ const iterDraftPath = runnerSupportsFileWrite ? synthesizeReflectDraftPath(options.ref) : undefined;
779
+ if (iterDraftPath) {
780
+ draftPathsToCleanup.push(iterDraftPath);
781
+ lastDraftPath = iterDraftPath;
782
+ }
783
+ const { prompt, maxOutputChars } = buildReflectPrompt({
784
+ ...(options.ref ? { ref: options.ref } : {}),
785
+ ...(parsedRef?.type ? { type: parsedRef.type } : {}),
786
+ ...(parsedRef?.name ? { name: parsedRef.name } : {}),
787
+ ...(assetContent !== undefined ? { assetContent } : {}),
788
+ ...(feedback.length > 0 ? { feedback } : {}),
789
+ ...(schemaHints.length > 0 ? { schemaHints } : {}),
790
+ ...(relatedLessons.length > 0 ? { relatedLessons } : {}),
791
+ ...(options.task ? { task: options.task } : {}),
792
+ ...(options.avoidPatterns && options.avoidPatterns.length > 0 ? { avoidPatterns: options.avoidPatterns } : {}),
793
+ ...(rejectedProposals.length > 0 ? { rejectedProposals } : {}),
794
+ // R-1: inject prior draft as self-critique target on iterations > 0
795
+ ...(priorDraft !== undefined ? { priorDraft } : {}),
796
+ // Issue A (#reflect-pipeline file-write contract): when the runner can
797
+ // touch the filesystem, instruct the agent to write the proposal body
798
+ // to a tmp file instead of inlining it in JSON. Avoids parse failures
799
+ // on long bodies (e.g. knowledge:systems/KOKORO_USAGE_GUIDE 8.4KB).
800
+ ...(iterDraftPath ? { draftFilePath: iterDraftPath } : {}),
801
+ });
802
+ // Convert char ceiling → token cap for the LLM path: divide by 3 chars/token
803
+ // (conservative — most models are 3.5–4) and add 500-char overhead for the
804
+ // JSON wrapper and frontmatter block that surround the body in the response.
805
+ const maxTokensForLlm = maxOutputChars !== undefined ? Math.ceil((maxOutputChars + 500) / 3) : undefined;
806
+ let iterResult;
807
+ if (options.runAgentOptions?.spawn) {
808
+ // Test seam: use raw runAgent with injected spawn so tests remain deterministic.
809
+ const resolvedProfile = profile;
810
+ if (!resolvedProfile) {
811
+ throw new Error("internal: reflect test-seam path requires a resolved agent profile");
812
+ }
813
+ const runOptions = {
814
+ stdio: "captured",
815
+ parseOutput: "text",
816
+ ...(resolvedTimeoutMs !== undefined ? { timeoutMs: resolvedTimeoutMs } : {}),
817
+ ...(Object.keys(agentEnv).length > 0 ? { env: agentEnv } : {}),
818
+ ...(options.runAgentOptions ?? {}),
819
+ };
820
+ iterResult = await runAgent(resolvedProfile, prompt, runOptions);
821
+ }
822
+ else if (runnerSpec) {
823
+ // v2: dispatch through unified RunnerSpec
824
+ const runOptions = {
825
+ stdio: "captured",
826
+ parseOutput: "text",
827
+ ...(Object.keys(agentEnv).length > 0 ? { env: agentEnv } : {}),
828
+ };
829
+ switch (runnerSpec.kind) {
830
+ case "llm":
831
+ // LLM HTTP path — `draftFilePath` is accepted for type symmetry
832
+ // (see `RunReflectViaLlmOptions.draftFilePath` docstring) but is
833
+ // intentionally a no-op. The prompt builder above also did not
834
+ // include the file-write contract for this kind, so the LLM is
835
+ // still asked for JSON via stdout.
836
+ iterResult = await runReflectViaLlm({
837
+ prompt,
838
+ connection: runnerSpec.connection,
839
+ timeoutMs: runnerSpec.timeoutMs ?? (typeof resolvedTimeoutMs === "number" ? resolvedTimeoutMs : undefined),
840
+ priorDraft,
841
+ iteration: iter,
842
+ responseSchema: REFLECT_JSON_SCHEMA,
843
+ chat: options.chat,
844
+ ...(maxTokensForLlm !== undefined ? { maxTokens: maxTokensForLlm } : {}),
845
+ });
846
+ break;
847
+ case "sdk":
848
+ iterResult = await runOpencodeSdk(runnerSpec.profile, prompt ?? "", runOptions);
849
+ break;
850
+ case "agent":
851
+ iterResult = await runAgent(runnerSpec.profile, prompt, {
852
+ ...runOptions,
853
+ ...(runnerSpec.timeoutMs !== undefined ? { timeoutMs: runnerSpec.timeoutMs } : {}),
854
+ });
855
+ break;
856
+ }
857
+ }
858
+ else {
859
+ // Production path (v1): dispatch directly to the appropriate runner.
860
+ // The fallback at the end of step 3 guarantees `profile` is set whenever
861
+ // `runnerSpec` is undefined, but TS can't prove that across the loop +
862
+ // await boundary — narrow into a const.
863
+ const resolvedProfile = profile;
864
+ if (!resolvedProfile) {
865
+ throw new Error("internal: reflect v1 dispatch reached without a resolved agent profile or runnerSpec");
866
+ }
867
+ const runOptions = {
868
+ stdio: "captured",
869
+ parseOutput: "text",
870
+ ...(resolvedTimeoutMs !== undefined ? { timeoutMs: resolvedTimeoutMs } : {}),
871
+ ...(Object.keys(agentEnv).length > 0 ? { env: agentEnv } : {}),
872
+ };
873
+ iterResult = resolvedProfile.sdkMode
874
+ ? await runOpencodeSdk(resolvedProfile, prompt ?? "", runOptions)
875
+ : await runAgent(resolvedProfile, prompt, runOptions);
876
+ }
877
+ result = iterResult;
878
+ if (!iterResult.ok)
879
+ break; // surface failure after loop
880
+ // On success, extract the draft content for the next iteration.
881
+ // If the agent returns the same content as the prior draft, stop early
882
+ // (no-op refinement) to avoid wasting tokens on identical iterations.
883
+ if (iter < maxRefineIters - 1) {
884
+ const nextDraft = iterResult.stdout ?? "";
885
+ if (priorDraft !== undefined && nextDraft === priorDraft)
886
+ break;
887
+ priorDraft = nextDraft;
888
+ }
284
889
  }
285
- else {
890
+ const finalResult = result;
891
+ if (!finalResult.ok) {
892
+ // B3: ENOENT / not-found gives an actionable hint.
893
+ if (isEnoentFailure(finalResult)) {
894
+ emitReflectFailed("spawn_failed", "enoent", options.ref, {
895
+ ...(finalResult.exitCode !== undefined ? { exitCode: finalResult.exitCode } : {}),
896
+ });
897
+ return {
898
+ ...failureEnvelope(finalResult, options.ref),
899
+ error: enoentHintMessage(profile?.bin ?? resolvedProfileName),
900
+ };
901
+ }
902
+ const envelope = failureEnvelope(finalResult, options.ref);
903
+ emitReflectFailed(envelope.reason, "agent_crash", options.ref, {
904
+ ...(envelope.exitCode !== null ? { exitCode: envelope.exitCode } : {}),
905
+ });
906
+ return envelope;
907
+ }
908
+ // Re-alias to `result` for the downstream code that references it.
909
+ result = finalResult;
910
+ // 6. Resolve the proposal content.
911
+ //
912
+ // Path A (file-write contract — preferred for agent/sdk runners on long
913
+ // assets): the agent wrote the body to `lastDraftPath` and printed
914
+ // `DRAFT_WRITTEN` on stdout. Load the body from disk and synthesize a
915
+ // payload. The `EXCESSIVE_EXPANSION`/schema-shape gates downstream still
916
+ // apply — they validate content, not transport.
917
+ //
918
+ // Path B (legacy JSON stdout): the agent inlined the proposal body in
919
+ // JSON on stdout. Falls through to `parseAgentProposalPayload`. Also the
920
+ // path used by the LLM HTTP runner, which cannot honour file-write.
921
+ const draftFileExists = lastDraftPath !== undefined && fs.existsSync(lastDraftPath) && fs.statSync(lastDraftPath).size > 0;
922
+ const draftSignaled = stdoutSignalsDraftWritten(result.stdout);
923
+ if (draftSignaled && lastDraftPath && !draftFileExists) {
924
+ // Agent claimed to write the draft but the file is missing or empty.
925
+ // Surface as a parse_error rather than silently falling through — the
926
+ // alternative would be parsing the `DRAFT_WRITTEN` sentinel as JSON,
927
+ // which is guaranteed to fail with a confusing message.
928
+ emitReflectFailed("parse_error", "draft_missing", options.ref, {
929
+ ...(result.exitCode !== null ? { exitCode: result.exitCode } : {}),
930
+ });
286
931
  return {
287
932
  schemaVersion: 1,
288
933
  ok: false,
289
934
  reason: "parse_error",
290
- error: err instanceof Error ? err.message : String(err),
935
+ error: `Agent emitted DRAFT_WRITTEN but draft file is missing or empty (${lastDraftPath}). The file-write contract failed; either the agent's file tools are broken or the path was unwritable.`,
291
936
  ...(options.ref ? { ref: options.ref } : {}),
292
937
  exitCode: result.exitCode,
293
938
  stdout: result.stdout,
294
939
  ...(result.stderr ? { stderr: result.stderr } : {}),
295
940
  };
296
941
  }
942
+ if (draftFileExists && lastDraftPath) {
943
+ // Happy path: agent wrote the body to disk. Use the ref the caller
944
+ // supplied (or a placeholder when omitted — the R-3 ref-mismatch guard
945
+ // below has no effect when there is no expected ref).
946
+ const fileContent = fs.readFileSync(lastDraftPath, "utf8");
947
+ // Phase 6A: file-write contract carries self-rated confidence on the
948
+ // `DRAFT_WRITTEN confidence=<n>` sentinel line. Extract it so the
949
+ // file-write path is on equal footing with the JSON-stdout path for
950
+ // auto-accept gating in `akm improve`.
951
+ const draftConfidence = extractDraftConfidence(result.stdout);
952
+ payload = {
953
+ ref: options.ref ?? "",
954
+ content: fileContent,
955
+ ...(draftConfidence !== undefined ? { confidence: draftConfidence } : {}),
956
+ };
957
+ // The agent followed the file-write contract — `payload.ref` mirrors the
958
+ // caller's expected ref, so the R-3 guard below cannot fire. The agent
959
+ // had no opportunity to retarget the proposal. If the ref was omitted
960
+ // entirely, downstream `createProposal` will reject the empty ref.
961
+ }
962
+ else {
963
+ try {
964
+ payload = parseAgentProposalPayload(result.stdout ?? "");
965
+ }
966
+ catch (err) {
967
+ const fallback = fallbackPayloadFromRawContent(result.stdout ?? "", options.ref, profile?.sdkMode ?? false);
968
+ if (fallback) {
969
+ payload = fallback;
970
+ }
971
+ else {
972
+ // Reclassify cooldown/skip messages that arrive as stdout text instead of
973
+ // valid proposal JSON. These are legitimate skip signals, not parse failures,
974
+ // and should not pollute reflectFailedActions or recentErrors injection.
975
+ const stdoutText = result.stdout ?? "";
976
+ const isCooldownSignal = isStructuredCooldownSignal(stdoutText);
977
+ const reason = isCooldownSignal ? "cooldown" : "parse_error";
978
+ emitReflectFailed(reason, isCooldownSignal ? "stdout_cooldown_signal" : "parse_error", options.ref, {
979
+ ...(result.exitCode !== null ? { exitCode: result.exitCode } : {}),
980
+ });
981
+ return {
982
+ schemaVersion: 1,
983
+ ok: false,
984
+ reason,
985
+ error: err instanceof Error ? err.message : String(err),
986
+ ...(options.ref ? { ref: options.ref } : {}),
987
+ exitCode: result.exitCode,
988
+ stdout: result.stdout,
989
+ ...(result.stderr ? { stderr: result.stderr } : {}),
990
+ };
991
+ }
992
+ }
993
+ }
297
994
  }
298
- // 7. Create the proposal. The proposal queue is the ONLY thing reflect
995
+ finally {
996
+ // Always remove tmp draft files — success, failure, or exception. Returns
997
+ // inside the try above trigger this block before the function exits. Code
998
+ // after this point uses the already-loaded `payload` and never touches the
999
+ // draft paths.
1000
+ cleanupDrafts();
1001
+ }
1002
+ // 6b. Validate payload.ref === options.ref (R-3 / #366).
1003
+ // A hallucinating agent can silently retarget proposals to a different ref.
1004
+ // This guard normalises both refs through parseAssetRef so origin-prefix
1005
+ // differences do not cause false positives, then rejects mismatches.
1006
+ // References: CRITIC (arXiv:2305.11738), CoVe (arXiv:2309.11495).
1007
+ if (options.ref) {
1008
+ try {
1009
+ const expectedParsed = parseAssetRef(options.ref);
1010
+ const actualParsed = parseAssetRef(payload.ref);
1011
+ // Compare type + name (drop origin — agent may omit origin prefix).
1012
+ if (expectedParsed.type !== actualParsed.type || expectedParsed.name !== actualParsed.name) {
1013
+ emitReflectFailed("parse_error", "ref_mismatch", options.ref, {
1014
+ expectedRef: options.ref,
1015
+ actualRef: payload.ref,
1016
+ ...(result.exitCode !== null ? { exitCode: result.exitCode } : {}),
1017
+ });
1018
+ return {
1019
+ schemaVersion: 1,
1020
+ ok: false,
1021
+ reason: "parse_error",
1022
+ error: `Agent retargeted proposal: expected ref "${options.ref}" but got "${payload.ref}". Proposal rejected to prevent silent ref hallucination.`,
1023
+ ref: options.ref,
1024
+ exitCode: result.exitCode,
1025
+ stdout: result.stdout,
1026
+ ...(result.stderr ? { stderr: result.stderr } : {}),
1027
+ };
1028
+ }
1029
+ }
1030
+ catch {
1031
+ // parseAssetRef failure means the agent returned a malformed ref — already
1032
+ // caught downstream by createProposal; allow it to surface naturally.
1033
+ }
1034
+ }
1035
+ // 7. R-5 / #374: Apply the proposal quality gate when enabled.
1036
+ // Mirrors the lesson quality gate on distill proposals. The gate uses
1037
+ // `runLessonQualityJudge` from distill.ts and is gated behind either
1038
+ // `profiles.improve.default.processes.reflect.qualityGate.enabled` or
1039
+ // `profiles.improve.default.processes.distill.qualityGate.enabled` (the
1040
+ // `lesson_quality_gate` flag name is the legacy alias still accepted by
1041
+ // `isLlmFeatureEnabled`). Fail-open: any judge error passes through.
1042
+ // G-Eval (arXiv:2303.16634) — quality judgment before admission.
1043
+ const runtimeConfig = options.config ??
1044
+ (() => {
1045
+ try {
1046
+ return loadConfig();
1047
+ }
1048
+ catch {
1049
+ return undefined;
1050
+ }
1051
+ })();
1052
+ const chatFn = options.chat ?? chatCompletion;
1053
+ const qualityGateEnabled = isLlmFeatureEnabled(runtimeConfig, "proposal_quality_gate") ||
1054
+ isLlmFeatureEnabled(runtimeConfig, "lesson_quality_gate");
1055
+ if (qualityGateEnabled && runtimeConfig) {
1056
+ const assetContent = (() => {
1057
+ if (!options.ref)
1058
+ return null;
1059
+ try {
1060
+ const refParsed = parseAssetRef(options.ref);
1061
+ const candidates = [
1062
+ path.join(stash, `${refParsed.type}s`, `${refParsed.name}.md`),
1063
+ path.join(stash, `${refParsed.type}s`, refParsed.name, "index.md"),
1064
+ ];
1065
+ for (const p of candidates) {
1066
+ if (fs.existsSync(p))
1067
+ return fs.readFileSync(p, "utf8");
1068
+ }
1069
+ return null;
1070
+ }
1071
+ catch {
1072
+ return null;
1073
+ }
1074
+ })();
1075
+ const judgeResult = await runLessonQualityJudge(runtimeConfig, payload.content, assetContent ?? "", chatFn);
1076
+ if (!judgeResult.pass) {
1077
+ // Quality gate rejected the proposal — surface as parse_error so the
1078
+ // improve orchestrator can log it and move on without crashing.
1079
+ appendEvent({
1080
+ eventType: "reflect_completed",
1081
+ ref: payload.ref,
1082
+ metadata: {
1083
+ source: "reflect",
1084
+ qualityRejected: true,
1085
+ qualityScore: judgeResult.score,
1086
+ qualityReason: judgeResult.reason,
1087
+ },
1088
+ });
1089
+ return {
1090
+ schemaVersion: 1,
1091
+ ok: false,
1092
+ reason: "parse_error",
1093
+ error: `Reflect proposal quality gate rejected: score=${judgeResult.score}, reason="${judgeResult.reason}"`,
1094
+ ...(options.ref ? { ref: options.ref } : {}),
1095
+ exitCode: result.exitCode,
1096
+ };
1097
+ }
1098
+ }
1099
+ // 7b. Reflect content-preservation rails:
1100
+ // - Restore source frontmatter so reflect can never strip indexable
1101
+ // fields (`description`, `when_to_use`, `tags`, ...).
1102
+ // - Reset protected identity fields (`name`, `ref`, `id`, `slug`,
1103
+ // `type`) the LLM tried to change.
1104
+ // - Reject proposals that shrink/expand the body past safe ratios.
1105
+ //
1106
+ // See REFLECT_ALLOWED_TYPES / sanitizeReflectPayload for the underlying
1107
+ // hypotheses + observed regressions (`8737ab63`, `26941510`, and the
1108
+ // catastrophic-shrinkage cases from the May 2026 review).
1109
+ const sanitizeOutcome = sanitizeReflectPayload({ content: payload.content, ...(payload.frontmatter ? { frontmatter: payload.frontmatter } : {}) }, assetContent, payload.ref);
1110
+ if (sanitizeOutcome.reject) {
1111
+ appendEvent({
1112
+ eventType: "reflect_completed",
1113
+ ref: payload.ref,
1114
+ metadata: {
1115
+ source: "reflect",
1116
+ sanitized: true,
1117
+ rejected: true,
1118
+ rejectReason: sanitizeOutcome.reject.error,
1119
+ ...(sanitizeOutcome.warnings.length > 0 ? { sanitizerWarnings: sanitizeOutcome.warnings } : {}),
1120
+ },
1121
+ });
1122
+ return {
1123
+ schemaVersion: 1,
1124
+ ok: false,
1125
+ reason: sanitizeOutcome.reject.reason,
1126
+ error: sanitizeOutcome.reject.error,
1127
+ ...(options.ref ? { ref: options.ref } : {}),
1128
+ exitCode: result.exitCode,
1129
+ };
1130
+ }
1131
+ payload = {
1132
+ ...payload,
1133
+ content: sanitizeOutcome.content,
1134
+ ...(sanitizeOutcome.frontmatter ? { frontmatter: sanitizeOutcome.frontmatter } : {}),
1135
+ };
1136
+ // 8. Create the proposal. The proposal queue is the ONLY thing reflect
299
1137
  // writes — promotion to a real asset is gated by `akm proposal accept`.
1138
+ //
1139
+ // R-4 / #373: Stamp `derived_from_reflect: true` in the frontmatter of any
1140
+ // lesson proposal generated by reflect. This provenance marker lets
1141
+ // `readRelatedLessons` exclude echo-chamber lessons (lessons that originate
1142
+ // from prior reflect runs on the same skill) unless independent feedback
1143
+ // evidence exists. ExpeL arXiv:2308.10144 — reject rules without success/
1144
+ // failure differential from independent evidence.
1145
+ const isLessonProposal = (() => {
1146
+ try {
1147
+ return parseAssetRef(payload.ref).type === "lesson";
1148
+ }
1149
+ catch {
1150
+ return false;
1151
+ }
1152
+ })();
1153
+ const basePayloadFrontmatter = payload.frontmatter ?? {};
1154
+ const payloadFrontmatterWithProvenance = isLessonProposal
1155
+ ? { ...basePayloadFrontmatter, derived_from_reflect: true }
1156
+ : basePayloadFrontmatter;
1157
+ // Draft mode: skip DB persistence — the SC sampling loop in improve.ts persists
1158
+ // only the majority-vote winner (R-2 / #389). Return a synthetic proposal so
1159
+ // pickMajorityVote can compare content via Jaccard similarity.
1160
+ if (options.draftMode) {
1161
+ const draftProposal = {
1162
+ id: `sc-draft-${Date.now()}`,
1163
+ ref: payload.ref,
1164
+ source: "reflect",
1165
+ status: "pending",
1166
+ createdAt: new Date().toISOString(),
1167
+ updatedAt: new Date().toISOString(),
1168
+ payload: {
1169
+ content: payload.content,
1170
+ ...(Object.keys(payloadFrontmatterWithProvenance).length > 0
1171
+ ? { frontmatter: payloadFrontmatterWithProvenance }
1172
+ : {}),
1173
+ },
1174
+ // Phase 6A: preserve confidence on the synthetic draft so the SC majority
1175
+ // winner carries the score through to the persisted proposal.
1176
+ ...(typeof payload.confidence === "number" ? { confidence: payload.confidence } : {}),
1177
+ };
1178
+ return {
1179
+ schemaVersion: 1,
1180
+ ok: true,
1181
+ proposal: draftProposal,
1182
+ ref: draftProposal.ref,
1183
+ agentProfile: resolvedProfileName,
1184
+ durationMs: result.durationMs,
1185
+ };
1186
+ }
300
1187
  const createInput = {
301
1188
  ref: payload.ref,
302
1189
  source: "reflect",
303
1190
  sourceRun: `reflect-${Date.now()}`,
304
1191
  payload: {
305
1192
  content: payload.content,
306
- ...(payload.frontmatter ? { frontmatter: payload.frontmatter } : {}),
1193
+ ...(Object.keys(payloadFrontmatterWithProvenance).length > 0
1194
+ ? { frontmatter: payloadFrontmatterWithProvenance }
1195
+ : {}),
307
1196
  },
1197
+ // Phase 6A: forward LLM-reported confidence into the proposal record.
1198
+ // `parseAgentProposalPayload` already clamps to [0, 1] and drops non-
1199
+ // finite values; `createProposal` runs its own sanitizer as a safety net.
1200
+ ...(typeof payload.confidence === "number" ? { confidence: payload.confidence } : {}),
308
1201
  };
309
- const proposal = createProposal(stash, createInput, options.ctx);
1202
+ const proposalResult = createProposal(stash, createInput, options.ctx);
1203
+ if (isProposalSkipped(proposalResult)) {
1204
+ // Dedup/cooldown guard fired — surface as a "cooldown" reason (not "parse_error")
1205
+ // so the improve orchestrator can distinguish legitimate skips from real failures
1206
+ // and exclude them from recentErrors/avoidPatterns injection.
1207
+ emitReflectFailed("cooldown", "proposal_skipped", options.ref, {
1208
+ proposalSkipReason: proposalResult.reason,
1209
+ });
1210
+ return {
1211
+ schemaVersion: 1,
1212
+ ok: false,
1213
+ reason: "cooldown",
1214
+ error: `Proposal skipped (${proposalResult.reason}): ${proposalResult.message}`,
1215
+ ...(options.ref ? { ref: options.ref } : {}),
1216
+ exitCode: null,
1217
+ };
1218
+ }
1219
+ const proposal = proposalResult;
310
1220
  appendEvent({
311
1221
  eventType: "reflect_completed",
312
1222
  ref: proposal.ref,
313
1223
  metadata: {
314
1224
  proposalId: proposal.id,
315
1225
  source: "reflect",
316
- agentProfile: profile.name,
1226
+ agentProfile: resolvedProfileName,
317
1227
  },
318
1228
  });
319
1229
  return {
@@ -321,7 +1231,7 @@ export async function akmReflect(options = {}) {
321
1231
  ok: true,
322
1232
  proposal,
323
1233
  ref: proposal.ref,
324
- agentProfile: profile.name,
1234
+ agentProfile: resolvedProfileName,
325
1235
  durationMs: result.durationMs,
326
1236
  };
327
1237
  }