akm-cli 0.7.5 → 0.8.0-rc.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/{.github/CHANGELOG.md → CHANGELOG.md} +113 -2
- package/README.md +20 -4
- package/SECURITY.md +93 -0
- package/dist/cli/config-migrate.js +144 -0
- package/dist/cli/config-validate.js +39 -0
- package/dist/cli/confirm.js +73 -0
- package/dist/cli/parse-args.js +133 -0
- package/dist/cli.js +1995 -551
- package/dist/commands/agent-dispatch.js +110 -0
- package/dist/commands/agent-support.js +68 -0
- package/dist/commands/completions.js +3 -0
- package/dist/commands/config-cli.js +130 -534
- package/dist/commands/consolidate.js +1531 -0
- package/dist/commands/curate.js +44 -3
- package/dist/commands/db-cli.js +23 -0
- package/dist/commands/distill-promotion-policy.js +660 -0
- package/dist/commands/distill.js +990 -75
- package/dist/commands/eval-cases.js +43 -0
- package/dist/commands/events.js +5 -23
- package/dist/commands/graph.js +477 -0
- package/dist/commands/health.js +400 -0
- package/dist/commands/help/help-accept.md +9 -0
- package/dist/commands/help/help-improve.md +77 -0
- package/dist/commands/help/help-proposals.md +15 -0
- package/dist/commands/help/help-propose.md +17 -0
- package/dist/commands/help/help-reject.md +8 -0
- package/dist/commands/history.js +54 -46
- package/dist/commands/improve-profiles.js +146 -0
- package/dist/commands/improve-result-file.js +103 -0
- package/dist/commands/improve.js +2175 -0
- package/dist/commands/info.js +5 -2
- package/dist/commands/init.js +50 -2
- package/dist/commands/installed-stashes.js +102 -139
- package/dist/commands/knowledge.js +136 -0
- package/dist/commands/lint/agent-linter.js +49 -0
- package/dist/commands/lint/base-linter.js +479 -0
- package/dist/commands/lint/command-linter.js +49 -0
- package/dist/commands/lint/default-linter.js +16 -0
- package/dist/commands/lint/index.js +183 -0
- package/dist/commands/lint/knowledge-linter.js +16 -0
- package/dist/commands/lint/markdown-insertion.js +343 -0
- package/dist/commands/lint/memory-linter.js +61 -0
- package/dist/commands/lint/registry.js +36 -0
- package/dist/commands/lint/skill-linter.js +45 -0
- package/dist/commands/lint/task-linter.js +50 -0
- package/dist/commands/lint/types.js +4 -0
- package/dist/commands/lint/vault-key-rules.js +139 -0
- package/dist/commands/lint/workflow-linter.js +56 -0
- package/dist/commands/lint.js +4 -0
- package/dist/commands/migration-help.js +5 -2
- package/dist/commands/proposal.js +66 -12
- package/dist/commands/propose.js +86 -31
- package/dist/commands/reflect.js +1119 -73
- package/dist/commands/registry-search.js +5 -2
- package/dist/commands/remember.js +69 -6
- package/dist/commands/schema-repair.js +203 -0
- package/dist/commands/search.js +115 -14
- package/dist/commands/self-update.js +3 -0
- package/dist/commands/show.js +144 -25
- package/dist/commands/source-add.js +17 -45
- package/dist/commands/source-clone.js +3 -0
- package/dist/commands/source-manage.js +14 -19
- package/dist/commands/tasks.js +438 -0
- package/dist/commands/url-checker.js +42 -0
- package/dist/commands/vault.js +130 -77
- package/dist/core/action-contributors.js +28 -0
- package/dist/core/asset-ref.js +7 -0
- package/dist/core/asset-registry.js +7 -16
- package/dist/core/asset-serialize.js +88 -0
- package/dist/core/asset-spec.js +22 -0
- package/dist/core/common.js +157 -0
- package/dist/core/concurrent.js +25 -0
- package/dist/core/config-io.js +347 -0
- package/dist/core/config-migration.js +625 -0
- package/dist/core/config-schema.js +501 -0
- package/dist/core/config-sources.js +108 -0
- package/dist/core/config-types.js +4 -0
- package/dist/core/config-walker.js +337 -0
- package/dist/core/config.js +327 -987
- package/dist/core/errors.js +40 -19
- package/dist/core/events.js +91 -138
- package/dist/core/file-lock.js +104 -0
- package/dist/core/frontmatter.js +3 -6
- package/dist/core/lesson-lint.js +3 -0
- package/dist/core/markdown.js +20 -0
- package/dist/core/memory-belief.js +62 -0
- package/dist/core/memory-contradiction-detect.js +274 -0
- package/dist/core/memory-improve.js +806 -0
- package/dist/core/parse.js +158 -0
- package/dist/core/paths.js +326 -14
- package/dist/core/proposal-quality-validators.js +364 -0
- package/dist/core/proposal-validators.js +69 -0
- package/dist/core/proposals.js +498 -42
- package/dist/core/state-db.js +927 -0
- package/dist/core/text-truncation.js +107 -0
- package/dist/core/time.js +54 -0
- package/dist/core/warn.js +62 -1
- package/dist/core/write-source.js +3 -0
- package/dist/indexer/db-backup.js +391 -0
- package/dist/indexer/db-search.js +152 -253
- package/dist/indexer/db.js +933 -103
- package/dist/indexer/ensure-index.js +64 -0
- package/dist/indexer/file-context.js +3 -0
- package/dist/indexer/graph-boost.js +376 -101
- package/dist/indexer/graph-db.js +391 -0
- package/dist/indexer/graph-dedup.js +95 -0
- package/dist/indexer/graph-extraction.js +550 -124
- package/dist/indexer/index-context.js +4 -0
- package/dist/indexer/indexer.js +506 -291
- package/dist/indexer/llm-cache.js +47 -0
- package/dist/indexer/manifest.js +3 -0
- package/dist/indexer/matchers.js +148 -160
- package/dist/indexer/memory-inference.js +99 -74
- package/dist/indexer/metadata-contributors.js +29 -0
- package/dist/indexer/metadata.js +255 -196
- package/dist/indexer/path-resolver.js +92 -0
- package/dist/indexer/project-context.js +192 -0
- package/dist/indexer/ranking-contributors.js +331 -0
- package/dist/indexer/ranking.js +81 -0
- package/dist/indexer/search-fields.js +5 -9
- package/dist/indexer/search-hit-enrichers.js +111 -0
- package/dist/indexer/search-source.js +44 -10
- package/dist/indexer/semantic-status.js +5 -16
- package/dist/indexer/staleness-detect.js +447 -0
- package/dist/indexer/usage-events.js +12 -9
- package/dist/indexer/walker.js +28 -0
- package/dist/integrations/agent/builders.js +135 -0
- package/dist/integrations/agent/config.js +122 -230
- package/dist/integrations/agent/detect.js +3 -0
- package/dist/integrations/agent/index.js +7 -13
- package/dist/integrations/agent/model-aliases.js +55 -0
- package/dist/integrations/agent/profiles.js +70 -5
- package/dist/integrations/agent/prompts.js +150 -74
- package/dist/integrations/agent/runner.js +151 -0
- package/dist/integrations/agent/sdk-runner.js +126 -0
- package/dist/integrations/agent/spawn.js +118 -23
- package/dist/integrations/github.js +3 -0
- package/dist/integrations/lockfile.js +32 -69
- package/dist/integrations/session-logs/index.js +68 -0
- package/dist/integrations/session-logs/providers/claude-code.js +59 -0
- package/dist/integrations/session-logs/providers/opencode.js +55 -0
- package/dist/integrations/session-logs/types.js +4 -0
- package/dist/llm/call-ai.js +62 -0
- package/dist/llm/client.js +72 -124
- package/dist/llm/embedder.js +3 -19
- package/dist/llm/embedders/cache.js +3 -7
- package/dist/llm/embedders/local.js +3 -0
- package/dist/llm/embedders/remote.js +20 -8
- package/dist/llm/embedders/types.js +3 -7
- package/dist/llm/feature-gate.js +89 -48
- package/dist/llm/graph-extract.js +676 -70
- package/dist/llm/index-passes.js +9 -23
- package/dist/llm/memory-infer.js +52 -71
- package/dist/llm/metadata-enhance.js +42 -29
- package/dist/llm/prompts/graph-extract-user-prompt.md +35 -0
- package/dist/output/cli-hints-full.md +281 -0
- package/dist/output/cli-hints-short.md +65 -0
- package/dist/output/cli-hints.js +5 -318
- package/dist/output/context.js +3 -0
- package/dist/output/renderers.js +223 -256
- package/dist/output/shapes.js +150 -105
- package/dist/output/text.js +318 -30
- package/dist/registry/build-index.js +3 -0
- package/dist/registry/create-provider-registry.js +3 -0
- package/dist/registry/factory.js +3 -0
- package/dist/registry/origin-resolve.js +3 -0
- package/dist/registry/providers/index.js +3 -0
- package/dist/registry/providers/skills-sh.js +70 -49
- package/dist/registry/providers/static-index.js +53 -48
- package/dist/registry/providers/types.js +3 -24
- package/dist/registry/resolve.js +11 -16
- package/dist/registry/types.js +3 -0
- package/dist/scripts/migrate-storage.js +17307 -0
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +8900 -0
- package/dist/scripts/migrations/v16-to-v17.js +141 -0
- package/dist/setup/detect.js +3 -0
- package/dist/setup/ripgrep-install.js +3 -0
- package/dist/setup/ripgrep-resolve.js +3 -0
- package/dist/setup/setup.js +775 -37
- package/dist/setup/steps.js +3 -15
- package/dist/sources/include.js +3 -0
- package/dist/sources/provider-factory.js +5 -12
- package/dist/sources/provider.js +3 -20
- package/dist/sources/providers/filesystem.js +19 -23
- package/dist/sources/providers/git.js +7 -5
- package/dist/sources/providers/index.js +3 -0
- package/dist/sources/providers/install-types.js +3 -13
- package/dist/sources/providers/npm.js +3 -4
- package/dist/sources/providers/provider-utils.js +3 -0
- package/dist/sources/providers/sync-from-ref.js +3 -11
- package/dist/sources/providers/tar-utils.js +3 -0
- package/dist/sources/providers/website.js +18 -22
- package/dist/sources/resolve.js +3 -0
- package/dist/sources/types.js +3 -0
- package/dist/sources/website-ingest.js +7 -0
- package/dist/tasks/backends/cron.js +203 -0
- package/dist/tasks/backends/exec-utils.js +28 -0
- package/dist/tasks/backends/index.js +24 -0
- package/dist/tasks/backends/launchd-template.xml +19 -0
- package/dist/tasks/backends/launchd.js +187 -0
- package/dist/tasks/backends/schtasks-template.xml +29 -0
- package/dist/tasks/backends/schtasks.js +215 -0
- package/dist/tasks/parser.js +211 -0
- package/dist/tasks/resolveAkmBin.js +87 -0
- package/dist/tasks/runner.js +458 -0
- package/dist/tasks/schedule.js +211 -0
- package/dist/tasks/schema.js +15 -0
- package/dist/tasks/validator.js +62 -0
- package/dist/version.js +3 -0
- package/dist/wiki/index-template.md +12 -0
- package/dist/wiki/ingest-workflow-template.md +54 -0
- package/dist/wiki/log-template.md +8 -0
- package/dist/wiki/schema-template.md +61 -0
- package/dist/wiki/wiki-templates.js +15 -0
- package/dist/wiki/wiki.js +13 -61
- package/dist/workflows/authoring.js +8 -25
- package/dist/workflows/cli.js +3 -0
- package/dist/workflows/db.js +140 -10
- package/dist/workflows/document-cache.js +3 -10
- package/dist/workflows/parser.js +3 -0
- package/dist/workflows/renderer.js +11 -3
- package/dist/workflows/runs.js +62 -91
- package/dist/workflows/schema.js +3 -0
- package/dist/workflows/scope-key.js +3 -0
- package/dist/workflows/validator.js +4 -8
- package/dist/workflows/workflow-template.md +24 -0
- package/docs/README.md +9 -2
- package/docs/data-and-telemetry.md +225 -0
- package/docs/migration/release-notes/0.7.0.md +1 -1
- package/docs/migration/release-notes/0.7.5.md +2 -2
- package/docs/migration/release-notes/0.8.0.md +48 -0
- package/docs/migration/v0.7-to-v0.8.md +1307 -0
- package/package.json +20 -8
- package/.github/LICENSE +0 -374
- package/dist/commands/install-audit.js +0 -381
- package/dist/templates/wiki-templates.js +0 -100
package/dist/commands/reflect.js
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
1
4
|
/**
|
|
2
5
|
* `akm reflect [ref]` — proposal-producing agent command (#226).
|
|
3
6
|
*
|
|
@@ -19,16 +22,29 @@
|
|
|
19
22
|
* a committed asset, and the `accept` flow is the bridge.
|
|
20
23
|
*/
|
|
21
24
|
import fs from "node:fs";
|
|
25
|
+
import os from "node:os";
|
|
26
|
+
import path from "node:path";
|
|
22
27
|
import { parseAssetRef } from "../core/asset-ref";
|
|
28
|
+
import { assembleAssetFromString } from "../core/asset-serialize";
|
|
23
29
|
import { resolveStashDir } from "../core/common";
|
|
24
30
|
import { loadConfig } from "../core/config";
|
|
25
31
|
import { ConfigError, UsageError } from "../core/errors";
|
|
26
32
|
import { appendEvent, readEvents } from "../core/events";
|
|
33
|
+
import { parseFrontmatter } from "../core/frontmatter";
|
|
27
34
|
import { lintLessonContent } from "../core/lesson-lint";
|
|
28
|
-
import {
|
|
35
|
+
import { stripMarkdownFences } from "../core/markdown";
|
|
36
|
+
import { checkReflectSize } from "../core/proposal-quality-validators";
|
|
37
|
+
import { createProposal, isProposalSkipped, listProposals, } from "../core/proposals";
|
|
29
38
|
import { lookup } from "../indexer/indexer";
|
|
30
|
-
import {
|
|
31
|
-
import {
|
|
39
|
+
import { runAgent, } from "../integrations/agent";
|
|
40
|
+
import { resolveProcessAgentProfile } from "../integrations/agent/config";
|
|
41
|
+
import { buildReflectPrompt, parseAgentProposalPayload, } from "../integrations/agent/prompts";
|
|
42
|
+
import { resolveImproveProcessRunnerFromProfile } from "../integrations/agent/runner";
|
|
43
|
+
import { runOpencodeSdk } from "../integrations/agent/sdk-runner";
|
|
44
|
+
import { chatCompletion } from "../llm/client";
|
|
45
|
+
import { isLlmFeatureEnabled } from "../llm/feature-gate";
|
|
46
|
+
import { baseFailureFields, enoentHintMessage, isEnoentFailure, loadAgentConfigFromDisk, resolveAgentProfile, } from "./agent-support";
|
|
47
|
+
import { deriveLessonRef, runLessonQualityJudge } from "./distill";
|
|
32
48
|
const MAX_FEEDBACK_LINES = 10;
|
|
33
49
|
const MAX_GLOBAL_FEEDBACK_LINES = 20;
|
|
34
50
|
/**
|
|
@@ -45,7 +61,7 @@ function readRecentFeedback(ref) {
|
|
|
45
61
|
for (const event of result.events.slice(-limit)) {
|
|
46
62
|
const md = (event.metadata ?? {});
|
|
47
63
|
const signal = typeof md.signal === "string" ? md.signal : "?";
|
|
48
|
-
const note = typeof md.
|
|
64
|
+
const note = typeof md.reason === "string" ? md.reason : typeof md.note === "string" ? md.note : "";
|
|
49
65
|
const details = note ? `[${signal}] ${note}` : `[${signal}]`;
|
|
50
66
|
lines.push(!ref && event.ref ? `${event.ref} ${details}` : details);
|
|
51
67
|
}
|
|
@@ -55,6 +71,102 @@ function readRecentFeedback(ref) {
|
|
|
55
71
|
return [];
|
|
56
72
|
}
|
|
57
73
|
}
|
|
74
|
+
const MAX_REJECTED_PROPOSALS = 3;
|
|
75
|
+
/**
|
|
76
|
+
* Asset types that reflect is allowed to operate on.
|
|
77
|
+
*
|
|
78
|
+
* Reflect's canonical output shape is `frontmatter + markdown body`. Running it
|
|
79
|
+
* against types whose on-disk form is NOT markdown (executable scripts, vault
|
|
80
|
+
* env files, YAML tasks) blindly prepends `---\n…\n---\n` to the asset and
|
|
81
|
+
* breaks the runtime contract — for example a `.ts` script with a YAML preamble
|
|
82
|
+
* is a TypeScript syntax error.
|
|
83
|
+
*
|
|
84
|
+
* Whitelisting (rather than blacklisting) keeps the door closed by default as
|
|
85
|
+
* new asset types are registered. To allow a custom registered type, extend
|
|
86
|
+
* this set explicitly.
|
|
87
|
+
*
|
|
88
|
+
* Observed regression: proposal `8737ab63` (May 2026) prepended frontmatter to
|
|
89
|
+
* a `.ts` script file via reflect. This whitelist prevents that.
|
|
90
|
+
*/
|
|
91
|
+
export const REFLECT_ALLOWED_TYPES = new Set([
|
|
92
|
+
"knowledge",
|
|
93
|
+
"memory",
|
|
94
|
+
"lesson",
|
|
95
|
+
"wiki",
|
|
96
|
+
"skill",
|
|
97
|
+
"agent",
|
|
98
|
+
"command",
|
|
99
|
+
"workflow",
|
|
100
|
+
]);
|
|
101
|
+
/**
|
|
102
|
+
* Identity / structural frontmatter fields the LLM is NEVER allowed to change.
|
|
103
|
+
*
|
|
104
|
+
* Renaming `name` on a skill silently breaks ref resolution because the ref is
|
|
105
|
+
* derived from the on-disk path. Similar reasoning for `ref`, `id`, `slug`,
|
|
106
|
+
* and `type`. The post-processor below restores any of these fields if the
|
|
107
|
+
* LLM tried to rewrite them.
|
|
108
|
+
*
|
|
109
|
+
* Observed regression: proposal `26941510` (May 2026) renamed
|
|
110
|
+
* `skill:openpalm-stack-diagnostics`'s `name` field to `"diagnostic-checklist"`.
|
|
111
|
+
*/
|
|
112
|
+
const PROTECTED_FRONTMATTER_FIELDS = new Set(["name", "ref", "id", "slug", "type"]);
|
|
113
|
+
/**
|
|
114
|
+
* Read the last 1–3 archived rejected proposals for a given ref from the
|
|
115
|
+
* proposal store. Best-effort — returns `[]` when the proposals dir is absent
|
|
116
|
+
* or the ref is undefined. Used to inject Reflexion-style verbal-RL context
|
|
117
|
+
* into the reflect prompt so the agent avoids re-proposing already-refused
|
|
118
|
+
* content (arXiv:2303.11366).
|
|
119
|
+
*/
|
|
120
|
+
function readRejectedProposals(stash, ref) {
|
|
121
|
+
if (!ref)
|
|
122
|
+
return [];
|
|
123
|
+
try {
|
|
124
|
+
return listProposals(stash, { ref, status: "rejected", includeArchive: true })
|
|
125
|
+
.sort((a, b) => new Date(b.updatedAt ?? 0).getTime() - new Date(a.updatedAt ?? 0).getTime())
|
|
126
|
+
.slice(0, MAX_REJECTED_PROPOSALS)
|
|
127
|
+
.map((p) => ({
|
|
128
|
+
ref: p.ref,
|
|
129
|
+
reason: p.review?.reason ?? "no reason given",
|
|
130
|
+
contentPreview: p.payload.content.slice(0, 500),
|
|
131
|
+
}));
|
|
132
|
+
}
|
|
133
|
+
catch {
|
|
134
|
+
return [];
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Synthesize a tmp draft-file path for the agent/sdk file-write contract.
|
|
139
|
+
*
|
|
140
|
+
* Mirrors `src/commands/propose.ts:163-178` — when the runner is agent-CLI or
|
|
141
|
+
* the OpenCode SDK, we instruct the agent to write the proposal body directly
|
|
142
|
+
* to this file instead of inlining it in JSON on stdout. This bypasses two
|
|
143
|
+
* known failure modes for long assets: (a) ARG_MAX truncation on prompt
|
|
144
|
+
* round-trips through fenced JSON, and (b) embedded-JSON parser brittleness
|
|
145
|
+
* on multi-KB bodies (e.g. the `knowledge:systems/KOKORO_USAGE_GUIDE` 8.4KB
|
|
146
|
+
* payload that produced 4/5 `parse_error` in May 2026 reflect validation).
|
|
147
|
+
*
|
|
148
|
+
* The path lives under {@link os.tmpdir} and embeds the (sanitized) ref +
|
|
149
|
+
* timestamp + random suffix so concurrent reflect calls cannot collide.
|
|
150
|
+
*
|
|
151
|
+
* Returns `undefined` for the LLM HTTP runner — the chat-completion transport
|
|
152
|
+
* has no filesystem access (see warning at `src/llm/call-ai.ts:64-71`).
|
|
153
|
+
*/
|
|
154
|
+
function synthesizeReflectDraftPath(ref) {
|
|
155
|
+
const safeRef = (ref ?? "no-ref").replace(/[^a-z0-9_-]/gi, "_");
|
|
156
|
+
const rand = Math.random().toString(36).slice(2, 8);
|
|
157
|
+
return path.join(os.tmpdir(), `akm-reflect-${safeRef}-${Date.now()}-${rand}.md`);
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Heuristic check that the agent honoured the file-write contract.
|
|
161
|
+
* The contract instructs the agent to emit a single `DRAFT_WRITTEN` line on
|
|
162
|
+
* stdout when it has finished writing the draft file. Some agents print
|
|
163
|
+
* additional log lines; we match anywhere in the captured stdout.
|
|
164
|
+
*/
|
|
165
|
+
function stdoutSignalsDraftWritten(stdout) {
|
|
166
|
+
if (!stdout)
|
|
167
|
+
return false;
|
|
168
|
+
return /\bDRAFT_WRITTEN\b/.test(stdout);
|
|
169
|
+
}
|
|
58
170
|
/**
|
|
59
171
|
* Build schema/lint hints for the prompt. For lesson refs, run the lesson
|
|
60
172
|
* lint over the current content and surface any findings — they are a
|
|
@@ -68,45 +180,441 @@ function buildSchemaHints(type, content) {
|
|
|
68
180
|
const report = lintLessonContent(content, "reflect");
|
|
69
181
|
return report.findings.map((f) => `[${f.kind}] ${f.message}`);
|
|
70
182
|
}
|
|
71
|
-
function
|
|
183
|
+
function hasRelatedSkillSource(content, skillRef) {
|
|
184
|
+
const parsed = parseFrontmatter(content);
|
|
185
|
+
const sources = parsed.data.sources;
|
|
186
|
+
return Array.isArray(sources) && sources.some((source) => typeof source === "string" && source.trim() === skillRef);
|
|
187
|
+
}
|
|
188
|
+
async function readRelatedLessons(stash, ref, parsedRef) {
|
|
189
|
+
if (parsedRef.type !== "skill")
|
|
190
|
+
return [];
|
|
191
|
+
const related = new Map();
|
|
192
|
+
const derivedLessonRef = deriveLessonRef(ref);
|
|
193
|
+
const candidateRefs = new Set([derivedLessonRef]);
|
|
194
|
+
const derivedLessonPath = path.join(stash, "lessons", `${derivedLessonRef.slice("lesson:".length)}.md`);
|
|
195
|
+
if (fs.existsSync(derivedLessonPath)) {
|
|
196
|
+
related.set(derivedLessonRef, { ref: derivedLessonRef, content: fs.readFileSync(derivedLessonPath, "utf8") });
|
|
197
|
+
}
|
|
198
|
+
try {
|
|
199
|
+
const feedbackEvents = readEvents({ type: "distill_invoked", ref }).events;
|
|
200
|
+
for (const event of feedbackEvents) {
|
|
201
|
+
const lessonRef = typeof event.metadata?.lessonRef === "string" ? event.metadata.lessonRef : undefined;
|
|
202
|
+
if (lessonRef?.startsWith("lesson:"))
|
|
203
|
+
candidateRefs.add(lessonRef);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
catch {
|
|
207
|
+
// Best effort only.
|
|
208
|
+
}
|
|
209
|
+
for (const candidateRef of candidateRefs) {
|
|
210
|
+
try {
|
|
211
|
+
const entry = await lookup(parseAssetRef(candidateRef));
|
|
212
|
+
if (!entry?.filePath || !fs.existsSync(entry.filePath))
|
|
213
|
+
continue;
|
|
214
|
+
const content = fs.readFileSync(entry.filePath, "utf8");
|
|
215
|
+
related.set(candidateRef, { ref: candidateRef, content });
|
|
216
|
+
}
|
|
217
|
+
catch {
|
|
218
|
+
// Index miss is non-fatal.
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
try {
|
|
222
|
+
const lessonsDir = path.join(stash, "lessons");
|
|
223
|
+
if (fs.existsSync(lessonsDir)) {
|
|
224
|
+
for (const fileName of fs.readdirSync(lessonsDir)) {
|
|
225
|
+
if (!fileName.endsWith(".md"))
|
|
226
|
+
continue;
|
|
227
|
+
const content = fs.readFileSync(path.join(lessonsDir, fileName), "utf8");
|
|
228
|
+
if (!hasRelatedSkillSource(content, ref))
|
|
229
|
+
continue;
|
|
230
|
+
const lessonName = fileName.slice(0, -3);
|
|
231
|
+
const lessonRef = `lesson:${lessonName}`;
|
|
232
|
+
if (!related.has(lessonRef)) {
|
|
233
|
+
related.set(lessonRef, { ref: lessonRef, content });
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
catch {
|
|
239
|
+
// Best effort only.
|
|
240
|
+
}
|
|
241
|
+
// R-4 / #373: Filter out lessons with `derived_from_reflect: true` unless
|
|
242
|
+
// independent feedback exists for the skill. This prevents the echo-chamber
|
|
243
|
+
// risk where reflect-output lessons feed back into the next reflect pass as
|
|
244
|
+
// "independent" evidence, amplifying their own prior outputs over time.
|
|
245
|
+
//
|
|
246
|
+
// ExpeL arXiv:2308.10144: rules need differential evidence from independent
|
|
247
|
+
// sources (success vs failure traces). A lesson that only ever appeared from
|
|
248
|
+
// reflect-internal signals has no such differential signal.
|
|
249
|
+
//
|
|
250
|
+
// "Independent feedback" = any usage_events "feedback" events for the skill
|
|
251
|
+
// ref itself, indicating a human or external system rated the skill.
|
|
252
|
+
let hasIndependentFeedback = false;
|
|
253
|
+
try {
|
|
254
|
+
const feedbackEventsForSkill = readEvents({ type: "feedback", ref }).events;
|
|
255
|
+
hasIndependentFeedback = feedbackEventsForSkill.length > 0;
|
|
256
|
+
}
|
|
257
|
+
catch {
|
|
258
|
+
// Best effort — if we can't check, allow all lessons through.
|
|
259
|
+
hasIndependentFeedback = true;
|
|
260
|
+
}
|
|
261
|
+
if (!hasIndependentFeedback) {
|
|
262
|
+
// No independent feedback: exclude all reflect-derived lessons to prevent
|
|
263
|
+
// echo-chamber amplification.
|
|
264
|
+
for (const [lessonRef, lesson] of related.entries()) {
|
|
265
|
+
try {
|
|
266
|
+
const lessonFm = parseFrontmatter(lesson.content);
|
|
267
|
+
if (lessonFm.data.derived_from_reflect === true) {
|
|
268
|
+
related.delete(lessonRef);
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
catch {
|
|
272
|
+
// If we can't parse the frontmatter, keep the lesson (safe default).
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
return [...related.values()];
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Returns true only when `stdout` is a recognised AKM proposal-skip signal.
|
|
280
|
+
*
|
|
281
|
+
* Two accepted forms:
|
|
282
|
+
* 1. Structured JSON: `{ skipped: true }` or `{ reason: "<known-skip-reason>" }`
|
|
283
|
+
* 2. Legacy text: any line matching `/proposal skipped/i`
|
|
284
|
+
*
|
|
285
|
+
* The previous regex `/cooldown/i` was intentionally broadened to avoid
|
|
286
|
+
* false-positives on real agent error messages that incidentally contain the
|
|
287
|
+
* word "cooldown" (e.g. "rate limit cooldown exceeded"). Only the tightly
|
|
288
|
+
* scoped forms above are treated as legitimate skip signals.
|
|
289
|
+
*/
|
|
290
|
+
function isStructuredCooldownSignal(stdout) {
|
|
291
|
+
try {
|
|
292
|
+
const parsed = JSON.parse(stdout.trim());
|
|
293
|
+
if (parsed?.skipped === true)
|
|
294
|
+
return true;
|
|
295
|
+
if (typeof parsed?.reason === "string" &&
|
|
296
|
+
["duplicate_pending", "content_hash_match", "cooldown", "below_threshold"].includes(parsed.reason))
|
|
297
|
+
return true;
|
|
298
|
+
}
|
|
299
|
+
catch {
|
|
300
|
+
// Non-JSON stdout is never a structured cooldown signal.
|
|
301
|
+
}
|
|
302
|
+
// Legacy text signal emitted by older proposal output lines.
|
|
303
|
+
return /proposal skipped/i.test(stdout);
|
|
304
|
+
}
|
|
305
|
+
/**
|
|
306
|
+
* Fallback payload parser for reflect agent stdout (R-6 / #375).
|
|
307
|
+
*
|
|
308
|
+
* When the agent does not emit valid JSON (old-style agents, SDK mode without
|
|
309
|
+
* structured output support), this function attempts to recover a proposal
|
|
310
|
+
* payload from the raw markdown output. The parser is deliberately strict —
|
|
311
|
+
* it requires the content to have a complete proposal structure (frontmatter
|
|
312
|
+
* with required fields or a full heading + body).
|
|
313
|
+
*
|
|
314
|
+
* Strictness rationale: The previous implementation accepted any markdown
|
|
315
|
+
* starting with `#` or `---`, which admitted malformed / hallucinated content
|
|
316
|
+
* as valid proposals. Anthropic agent best practices recommend structured
|
|
317
|
+
* output when the SDK supports it; this tighter fallback is the safety net.
|
|
318
|
+
*
|
|
319
|
+
* When `sdkMode === true`, structured output (tool-call schema) should be used
|
|
320
|
+
* instead of this fallback. That wiring is tracked separately (full SDK
|
|
321
|
+
* structured-output integration); for now this tighter parser applies to all
|
|
322
|
+
* modes and is the primary R-6 deliverable.
|
|
323
|
+
*/
|
|
324
|
+
function fallbackPayloadFromRawContent(stdout, ref, sdkMode = false) {
|
|
72
325
|
if (!ref)
|
|
73
326
|
return undefined;
|
|
74
|
-
const trimmed =
|
|
327
|
+
const trimmed = stripMarkdownFences(stdout).trim();
|
|
75
328
|
if (!trimmed)
|
|
76
329
|
return undefined;
|
|
77
|
-
|
|
330
|
+
const targetType = ref.split(":")[0];
|
|
331
|
+
if (!looksLikeAssetContent(trimmed, sdkMode, targetType))
|
|
78
332
|
return undefined;
|
|
79
333
|
return { ref, content: trimmed };
|
|
80
334
|
}
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
335
|
+
/**
|
|
336
|
+
* Determine whether raw agent output looks like a valid asset payload (R-6 / #375).
|
|
337
|
+
*
|
|
338
|
+
* Tightened from the previous `startsWith("#") || startsWith("---")`:
|
|
339
|
+
*
|
|
340
|
+
* - YAML frontmatter (`---`): must contain a `description:` field (the only
|
|
341
|
+
* required frontmatter key in v1 spec). This eliminates empty `---\n---\n`
|
|
342
|
+
* blocks and pure delimiter sequences as valid payloads.
|
|
343
|
+
* - Heading start (`#`): must have at least 3 non-blank lines after the heading,
|
|
344
|
+
* to ensure there is actual body content and not just a title stub.
|
|
345
|
+
* - In SDK mode (`sdkMode === true`): additionally requires `when_to_use:` for
|
|
346
|
+
* lesson types (full structured output will replace this in a future PR).
|
|
347
|
+
*/
|
|
348
|
+
function looksLikeAssetContent(value, sdkMode = false, targetType) {
|
|
349
|
+
if (value.startsWith("---")) {
|
|
350
|
+
// YAML frontmatter must contain at least a description field.
|
|
351
|
+
const fmEnd = value.indexOf("\n---", 4);
|
|
352
|
+
if (fmEnd === -1)
|
|
353
|
+
return false;
|
|
354
|
+
const fmBlock = value.slice(0, fmEnd + 4);
|
|
355
|
+
const hasDescription = /^description\s*:/m.test(fmBlock);
|
|
356
|
+
if (!hasDescription)
|
|
357
|
+
return false;
|
|
358
|
+
// In SDK mode, lesson assets additionally require a when_to_use field.
|
|
359
|
+
// Use the target ref type rather than frontmatter type: (which is non-standard).
|
|
360
|
+
if (sdkMode && targetType === "lesson") {
|
|
361
|
+
return /^when_to_use\s*:/m.test(fmBlock);
|
|
362
|
+
}
|
|
363
|
+
return true;
|
|
364
|
+
}
|
|
365
|
+
if (value.startsWith("#")) {
|
|
366
|
+
// Heading + at least 2 non-blank lines (heading + at least one body line).
|
|
367
|
+
// This rejects pure title stubs (`# Title\n`) but accepts minimal valid content.
|
|
368
|
+
const lines = value.split("\n").filter((l) => l.trim().length > 0);
|
|
369
|
+
return lines.length >= 2;
|
|
370
|
+
}
|
|
371
|
+
return false;
|
|
85
372
|
}
|
|
86
|
-
|
|
87
|
-
|
|
373
|
+
/**
|
|
374
|
+
* Split a markdown blob into `[frontmatterText, bodyText]`.
|
|
375
|
+
*
|
|
376
|
+
* Returns `[null, raw]` when the blob does not start with a frontmatter block.
|
|
377
|
+
*/
|
|
378
|
+
function splitFrontmatter(raw) {
|
|
379
|
+
const m = raw.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n?([\s\S]*)$/);
|
|
380
|
+
if (!m)
|
|
381
|
+
return { fmText: null, body: raw };
|
|
382
|
+
return { fmText: m[1], body: m[2] };
|
|
88
383
|
}
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
384
|
+
/**
|
|
385
|
+
* Serialize a sanitized frontmatter map back into a YAML-subset block matching
|
|
386
|
+
* what `parseFrontmatter` accepts. Conservative — strings, numbers, booleans,
|
|
387
|
+
* scalar arrays. Anything exotic is JSON.stringified to keep the YAML valid.
|
|
388
|
+
*
|
|
389
|
+
* Why not `core/asset-serialize.ts#serializeFrontmatter`? The canonical helper
|
|
390
|
+
* uses the full `yaml` library, which can emit `|`-block scalars or other
|
|
391
|
+
* shapes that the project's hand-rolled `parseFrontmatter` subset parser
|
|
392
|
+
* cannot read. Reflect output reads its own product back via that subset
|
|
393
|
+
* parser, so we keep this defensive serializer here. The fence/body assembly
|
|
394
|
+
* is shared via `assembleAssetFromString` from `core/asset-serialize.ts`.
|
|
395
|
+
*/
|
|
396
|
+
function serializeFrontmatter(data) {
|
|
397
|
+
const lines = [];
|
|
398
|
+
for (const [key, value] of Object.entries(data)) {
|
|
399
|
+
if (value === undefined)
|
|
400
|
+
continue;
|
|
401
|
+
if (value === null) {
|
|
402
|
+
lines.push(`${key}:`);
|
|
403
|
+
continue;
|
|
404
|
+
}
|
|
405
|
+
if (typeof value === "string") {
|
|
406
|
+
// Multi-line strings would break the YAML-subset parser — fold to a
|
|
407
|
+
// single line. The reflect prompt forbids multi-line frontmatter values
|
|
408
|
+
// so this branch is defensive.
|
|
409
|
+
const flat = value.includes("\n") ? value.replace(/\s+/g, " ").trim() : value;
|
|
410
|
+
lines.push(`${key}: ${flat}`);
|
|
411
|
+
continue;
|
|
412
|
+
}
|
|
413
|
+
if (typeof value === "number" || typeof value === "boolean") {
|
|
414
|
+
lines.push(`${key}: ${String(value)}`);
|
|
415
|
+
continue;
|
|
416
|
+
}
|
|
417
|
+
if (Array.isArray(value)) {
|
|
418
|
+
lines.push(`${key}:`);
|
|
419
|
+
for (const item of value) {
|
|
420
|
+
if (item === null || item === undefined)
|
|
421
|
+
continue;
|
|
422
|
+
if (typeof item === "string" || typeof item === "number" || typeof item === "boolean") {
|
|
423
|
+
lines.push(` - ${String(item)}`);
|
|
424
|
+
}
|
|
425
|
+
else {
|
|
426
|
+
lines.push(` - ${JSON.stringify(item)}`);
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
continue;
|
|
430
|
+
}
|
|
431
|
+
// Objects / unknowns → JSON-string fallback. Reviewer can re-shape on accept.
|
|
432
|
+
lines.push(`${key}: ${JSON.stringify(value)}`);
|
|
433
|
+
}
|
|
434
|
+
return lines.join("\n");
|
|
92
435
|
}
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
436
|
+
/**
|
|
437
|
+
* Reflect post-processor — enforces the safety rails described at the top of
|
|
438
|
+
* this file:
|
|
439
|
+
*
|
|
440
|
+
* 1. Restore the source frontmatter so reflect never strips load-bearing
|
|
441
|
+
* `description`, `when_to_use`, `tags`, etc. The LLM is only allowed to
|
|
442
|
+
* change the markdown body. Frontmatter fields proposed by the LLM are
|
|
443
|
+
* treated as a *merge on top* of the source — concrete field renames /
|
|
444
|
+
* identity changes (`name`, `ref`, `id`, `slug`, `type`) are reverted.
|
|
445
|
+
* 2. Reject responses that shrink or expand the body past the configured
|
|
446
|
+
* ratio thresholds, when the source body is large enough to be reliable.
|
|
447
|
+
* 3. Drop any leading `---` frontmatter block the LLM produced inside the
|
|
448
|
+
* body — the prompt asks it to emit body only, and a stray YAML preamble
|
|
449
|
+
* on top of an executable-typed asset is dangerous.
|
|
450
|
+
*
|
|
451
|
+
* Caller branches:
|
|
452
|
+
* - On `reject`: surface as a failure with the reported reason.
|
|
453
|
+
* - Otherwise: substitute `content` (and optional `frontmatter`) into the
|
|
454
|
+
* proposal payload.
|
|
455
|
+
*
|
|
456
|
+
* Source-less / new-asset case (`sourceContent === undefined`): we still strip
|
|
457
|
+
* the LLM's frontmatter block from `content` and re-emit a clean block built
|
|
458
|
+
* from `payload.frontmatter` so identity fields can be enforced. Size guard
|
|
459
|
+
* is skipped because there is no source to compare against.
|
|
460
|
+
*/
|
|
461
|
+
function sanitizeReflectPayload(payload, sourceContent, targetRef) {
|
|
462
|
+
const warnings = [];
|
|
463
|
+
const { fmText: sourceFmText, body: sourceBody } = sourceContent
|
|
464
|
+
? splitFrontmatter(sourceContent)
|
|
465
|
+
: { fmText: null, body: "" };
|
|
466
|
+
const sourceFm = sourceFmText !== null ? parseFrontmatter(sourceContent ?? "").data : {};
|
|
467
|
+
const { fmText: llmFmText, body: rawLlmBody } = splitFrontmatter(payload.content);
|
|
468
|
+
if (llmFmText !== null) {
|
|
469
|
+
warnings.push("LLM emitted frontmatter in content; stripped and merged through identity guard.");
|
|
470
|
+
}
|
|
471
|
+
// Parse the LLM-emitted frontmatter (if any) so we can merge its non-identity
|
|
472
|
+
// keys into the source frontmatter.
|
|
473
|
+
let llmFm = {};
|
|
474
|
+
if (llmFmText !== null) {
|
|
475
|
+
try {
|
|
476
|
+
llmFm = parseFrontmatter(payload.content).data;
|
|
477
|
+
}
|
|
478
|
+
catch {
|
|
479
|
+
llmFm = {};
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
// Also accept the explicit `frontmatter` field on the payload.
|
|
483
|
+
if (payload.frontmatter && typeof payload.frontmatter === "object") {
|
|
484
|
+
llmFm = { ...llmFm, ...payload.frontmatter };
|
|
485
|
+
}
|
|
486
|
+
// Strip protected identity fields from any LLM-supplied frontmatter — they
|
|
487
|
+
// must come from the source asset, never from the LLM.
|
|
488
|
+
for (const field of PROTECTED_FRONTMATTER_FIELDS) {
|
|
489
|
+
if (field in llmFm && llmFm[field] !== sourceFm[field]) {
|
|
490
|
+
warnings.push(`LLM attempted to change protected frontmatter field "${field}"; restored from source.`);
|
|
491
|
+
delete llmFm[field];
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
// Build the effective frontmatter: source overlaid with sanitized LLM fields.
|
|
495
|
+
// Source fields always win on identity keys.
|
|
496
|
+
const mergedFm = { ...sourceFm, ...llmFm };
|
|
497
|
+
for (const field of PROTECTED_FRONTMATTER_FIELDS) {
|
|
498
|
+
if (field in sourceFm) {
|
|
499
|
+
mergedFm[field] = sourceFm[field];
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
const cleanedBody = rawLlmBody.replace(/^\s+/, "");
|
|
503
|
+
// Size guard — only when source body is meaningfully large. The pure
|
|
504
|
+
// predicate lives in `core/proposal-quality-validators` so the same check
|
|
505
|
+
// also runs inside `runProposalValidators` on `proposal accept`.
|
|
506
|
+
const sizeOutcome = checkReflectSize(sourceBody, cleanedBody);
|
|
507
|
+
if (!sizeOutcome.ok) {
|
|
508
|
+
const pct = (sizeOutcome.ratio * 100).toFixed(0);
|
|
509
|
+
const limit = sizeOutcome.code === "EXCESSIVE_SHRINKAGE" ? "minimum 50%" : "maximum 200%";
|
|
510
|
+
const cause = sizeOutcome.code === "EXCESSIVE_SHRINKAGE"
|
|
511
|
+
? "Concrete content was likely deleted."
|
|
512
|
+
: "Speculative material was likely added.";
|
|
513
|
+
return {
|
|
514
|
+
content: payload.content,
|
|
515
|
+
warnings,
|
|
516
|
+
reject: {
|
|
517
|
+
reason: "parse_error",
|
|
518
|
+
error: `Reflect rejected: ${sizeOutcome.code} — proposed body is ${pct}% of source (${limit}) for ref ${targetRef}. ${cause}`,
|
|
519
|
+
},
|
|
520
|
+
};
|
|
521
|
+
}
|
|
522
|
+
// Reassemble final content: merged frontmatter + cleaned body.
|
|
523
|
+
// When there is no frontmatter at all (no source fm and no LLM fm), emit body
|
|
524
|
+
// only so we don't add a stray `---` to e.g. a script asset that bypassed the
|
|
525
|
+
// type guard via a custom registration.
|
|
526
|
+
const hasFrontmatter = Object.keys(mergedFm).length > 0;
|
|
527
|
+
const reassembled = hasFrontmatter
|
|
528
|
+
? assembleAssetFromString(serializeFrontmatter(mergedFm), cleanedBody)
|
|
529
|
+
: cleanedBody;
|
|
530
|
+
return {
|
|
531
|
+
content: reassembled,
|
|
532
|
+
...(hasFrontmatter ? { frontmatter: mergedFm } : {}),
|
|
533
|
+
warnings,
|
|
534
|
+
};
|
|
535
|
+
}
|
|
536
|
+
/**
|
|
537
|
+
* JSON Schema for structured reflect output. Passed to `chatCompletion` when
|
|
538
|
+
* the connection has `supportsJsonSchema: true` so the model returns a strict
|
|
539
|
+
* JSON object matching {@link AgentProposalPayload}.
|
|
540
|
+
*/
|
|
541
|
+
export const REFLECT_JSON_SCHEMA = {
|
|
542
|
+
type: "object",
|
|
543
|
+
required: ["ref", "content"],
|
|
544
|
+
additionalProperties: false,
|
|
545
|
+
properties: {
|
|
546
|
+
ref: { type: "string", description: "Asset ref in type:name format (e.g. lesson:my-lesson)." },
|
|
547
|
+
content: { type: "string", description: "Full markdown content for the asset." },
|
|
548
|
+
frontmatter: {
|
|
549
|
+
type: "object",
|
|
550
|
+
description: "Optional frontmatter key-value pairs to merge into the asset.",
|
|
551
|
+
additionalProperties: true,
|
|
552
|
+
},
|
|
553
|
+
// Phase 6A (Advantage D6a): self-reported confidence in [0, 1]. When the
|
|
554
|
+
// LLM is well-calibrated, scores at or above the configured threshold
|
|
555
|
+
// (default 0.8) drive auto-accept in `akm improve`. Out-of-range or
|
|
556
|
+
// non-finite values are clamped/dropped by the parser — the schema keeps
|
|
557
|
+
// the field optional so older agents that don't emit a score still work.
|
|
558
|
+
confidence: {
|
|
559
|
+
type: "number",
|
|
560
|
+
minimum: 0,
|
|
561
|
+
maximum: 1,
|
|
562
|
+
description: "Optional self-reported quality confidence in [0, 1]. Proposals with confidence >= the active threshold (default 0.8) may be auto-accepted by `akm improve`.",
|
|
563
|
+
},
|
|
564
|
+
},
|
|
565
|
+
};
|
|
566
|
+
/** Critique prompt injected between prior draft and refinement request (Self-Refine loop). */
|
|
567
|
+
const REFLECT_CRITIQUE_PROMPT = "Your previous proposal is shown above. Please review it critically and provide an improved version that is more specific, actionable, and avoids any issues with the previous attempt. Return only the improved JSON proposal.";
|
|
568
|
+
/**
|
|
569
|
+
* Run a single reflect iteration directly via the LLM API (v2 config path).
|
|
570
|
+
*
|
|
571
|
+
* Returns an {@link AgentRunResult}-shaped object so it can slot into the same
|
|
572
|
+
* dispatch loop as agent-based runners. On success, `stdout` contains the raw
|
|
573
|
+
* LLM response (unparsed JSON or prose). On failure, the error is captured
|
|
574
|
+
* into the result rather than thrown.
|
|
575
|
+
*/
|
|
576
|
+
export async function runReflectViaLlm(opts) {
|
|
577
|
+
const start = Date.now();
|
|
578
|
+
const messages = [{ role: "user", content: opts.prompt ?? "" }];
|
|
579
|
+
if (opts.priorDraft !== undefined && opts.iteration > 0) {
|
|
580
|
+
messages.push({ role: "assistant", content: opts.priorDraft });
|
|
581
|
+
messages.push({ role: "user", content: REFLECT_CRITIQUE_PROMPT });
|
|
582
|
+
}
|
|
583
|
+
try {
|
|
584
|
+
let stdout;
|
|
585
|
+
if (opts.chat) {
|
|
586
|
+
// Test seam: injected chat function (two-arg signature, no responseSchema).
|
|
587
|
+
stdout = await opts.chat(opts.connection, messages);
|
|
588
|
+
}
|
|
589
|
+
else {
|
|
590
|
+
// Production path: full chatCompletion with optional structured-output schema.
|
|
591
|
+
stdout = await chatCompletion(opts.connection, messages, opts.responseSchema !== undefined ? { responseSchema: opts.responseSchema } : undefined);
|
|
592
|
+
}
|
|
593
|
+
return {
|
|
594
|
+
ok: true,
|
|
595
|
+
stdout,
|
|
596
|
+
stderr: "",
|
|
597
|
+
durationMs: Date.now() - start,
|
|
598
|
+
exitCode: 0,
|
|
599
|
+
};
|
|
600
|
+
}
|
|
601
|
+
catch (err) {
|
|
602
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
603
|
+
return {
|
|
604
|
+
ok: false,
|
|
605
|
+
stdout: "",
|
|
606
|
+
stderr: msg,
|
|
607
|
+
durationMs: Date.now() - start,
|
|
608
|
+
exitCode: 1,
|
|
609
|
+
reason: "non_zero_exit",
|
|
610
|
+
error: msg,
|
|
611
|
+
};
|
|
612
|
+
}
|
|
98
613
|
}
|
|
99
614
|
function failureEnvelope(result, ref, fallbackReason = "non_zero_exit") {
|
|
100
|
-
const reason = result.reason ?? fallbackReason;
|
|
101
615
|
return {
|
|
102
|
-
|
|
103
|
-
ok: false,
|
|
104
|
-
reason,
|
|
105
|
-
error: result.error ?? `agent failure (${reason})`,
|
|
616
|
+
...baseFailureFields(result, fallbackReason),
|
|
106
617
|
...(ref ? { ref } : {}),
|
|
107
|
-
exitCode: result.exitCode,
|
|
108
|
-
...(result.stdout ? { stdout: result.stdout } : {}),
|
|
109
|
-
...(result.stderr ? { stderr: result.stderr } : {}),
|
|
110
618
|
};
|
|
111
619
|
}
|
|
112
620
|
export async function akmReflect(options = {}) {
|
|
@@ -121,99 +629,637 @@ export async function akmReflect(options = {}) {
|
|
|
121
629
|
...(options.profile ? { profile: options.profile } : {}),
|
|
122
630
|
},
|
|
123
631
|
});
|
|
632
|
+
// Fix #3 (observability 0.8.0): every failure path below MUST emit
|
|
633
|
+
// `reflect_completed` so observers can close the invoke/complete loop. The
|
|
634
|
+
// three success-side `reflect_completed` emit sites carry rich metadata
|
|
635
|
+
// (qualityRejected, sanitized, proposalId, etc.); the failure-side emits
|
|
636
|
+
// carry `{ok: false, reason}` plus the ref when known. Stable failure
|
|
637
|
+
// reasons line up with `AgentFailureReason`: "parse_error", "non_zero_exit",
|
|
638
|
+
// "cooldown", "timeout", "spawn_failed", "llm_*", plus the synthetic
|
|
639
|
+
// "ref_mismatch" / "enoent" / "draft_missing" subtypes for cases the agent
|
|
640
|
+
// surface conflates as "parse_error". Sub-reasons land in `subreason`.
|
|
641
|
+
const emitReflectFailed = (reason, subreason, ref, extra) => {
|
|
642
|
+
appendEvent({
|
|
643
|
+
eventType: "reflect_completed",
|
|
644
|
+
...(ref ? { ref } : {}),
|
|
645
|
+
metadata: {
|
|
646
|
+
source: "reflect",
|
|
647
|
+
ok: false,
|
|
648
|
+
reason,
|
|
649
|
+
subreason,
|
|
650
|
+
...(extra ?? {}),
|
|
651
|
+
},
|
|
652
|
+
});
|
|
653
|
+
};
|
|
124
654
|
// 2. Resolve target asset content (if a ref is supplied).
|
|
125
655
|
let assetContent;
|
|
126
656
|
let parsedRef;
|
|
127
657
|
if (options.ref) {
|
|
128
658
|
parsedRef = parseAssetRef(options.ref);
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
659
|
+
// 2a. Type guard — reflect only operates on asset types whose canonical
|
|
660
|
+
// shape is `frontmatter + markdown body`. Refuse non-markdown types
|
|
661
|
+
// (script / vault / task) up-front so reflect never prepends YAML to a
|
|
662
|
+
// `.ts` file or rewrites a `.env` blob as prose. See REFLECT_ALLOWED_TYPES.
|
|
663
|
+
if (!REFLECT_ALLOWED_TYPES.has(parsedRef.type)) {
|
|
664
|
+
emitReflectFailed("parse_error", "unsupported_type", options.ref, { type: parsedRef.type });
|
|
665
|
+
return {
|
|
666
|
+
schemaVersion: 1,
|
|
667
|
+
ok: false,
|
|
668
|
+
reason: "parse_error",
|
|
669
|
+
error: `Reflect refused: asset type "${parsedRef.type}" is not supported by reflect (only markdown-canonical types are allowed: ${[...REFLECT_ALLOWED_TYPES].sort().join(", ")}). Use \`akm propose\` or edit the file directly.`,
|
|
670
|
+
ref: options.ref,
|
|
671
|
+
exitCode: null,
|
|
672
|
+
};
|
|
134
673
|
}
|
|
135
|
-
|
|
136
|
-
//
|
|
674
|
+
if (options.assetContent !== undefined) {
|
|
675
|
+
// Test seam — caller pre-loaded the source content.
|
|
676
|
+
assetContent = options.assetContent;
|
|
677
|
+
}
|
|
678
|
+
else {
|
|
679
|
+
try {
|
|
680
|
+
const entry = await lookup(parsedRef);
|
|
681
|
+
if (entry?.filePath && fs.existsSync(entry.filePath)) {
|
|
682
|
+
assetContent = fs.readFileSync(entry.filePath, "utf8");
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
catch {
|
|
686
|
+
// Index miss is non-fatal — the agent can still propose a fresh asset.
|
|
687
|
+
}
|
|
137
688
|
}
|
|
138
689
|
}
|
|
139
690
|
// 3. Resolve agent profile. ConfigError surfaces as a thrown error so the
|
|
140
691
|
// CLI dispatcher renders the standard envelope.
|
|
692
|
+
//
|
|
693
|
+
// When an explicit --profile flag is given, honour it directly (existing
|
|
694
|
+
// behaviour). Otherwise use resolveProcessAgentProfile so that per-process
|
|
695
|
+
// agent config (agent.processes["reflect"]) is picked up automatically.
|
|
141
696
|
let profile;
|
|
697
|
+
let resolvedTimeoutMs = options.timeoutMs;
|
|
698
|
+
let runnerSpec;
|
|
142
699
|
try {
|
|
143
|
-
|
|
700
|
+
if (options.agentProfile) {
|
|
701
|
+
// Test seam: injected profile bypasses all config.
|
|
702
|
+
profile = options.agentProfile;
|
|
703
|
+
}
|
|
704
|
+
else if (options.runner) {
|
|
705
|
+
// Caller-provided RunnerSpec (used in tests and --dry-run-resolve).
|
|
706
|
+
runnerSpec = options.runner;
|
|
707
|
+
}
|
|
708
|
+
else {
|
|
709
|
+
const cfg = options.config ?? loadConfig();
|
|
710
|
+
const reflectProcess = cfg.profiles?.improve?.default?.processes?.reflect;
|
|
711
|
+
// Resolve the runner from the improve profile's reflect entry when present.
|
|
712
|
+
runnerSpec = resolveImproveProcessRunnerFromProfile(reflectProcess, cfg) ?? undefined;
|
|
713
|
+
if (runnerSpec) {
|
|
714
|
+
if (resolvedTimeoutMs === undefined && runnerSpec.timeoutMs !== undefined) {
|
|
715
|
+
resolvedTimeoutMs = runnerSpec.timeoutMs;
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
else {
|
|
719
|
+
if (options.profile) {
|
|
720
|
+
// Explicit --profile flag wins over process config.
|
|
721
|
+
profile = resolveAgentProfile(options);
|
|
722
|
+
}
|
|
723
|
+
else {
|
|
724
|
+
// Use per-process config resolution (falls back to defaults.agent).
|
|
725
|
+
const agent = options.agentConfig ?? loadAgentConfigFromDisk();
|
|
726
|
+
const processName = options.agentProcess ?? "reflect";
|
|
727
|
+
const resolved = resolveProcessAgentProfile(processName, agent);
|
|
728
|
+
profile = resolved.profile;
|
|
729
|
+
// Only apply process-resolved timeoutMs when caller didn't supply one.
|
|
730
|
+
if (resolvedTimeoutMs === undefined) {
|
|
731
|
+
resolvedTimeoutMs = resolved.timeoutMs;
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
}
|
|
144
736
|
}
|
|
145
737
|
catch (err) {
|
|
146
738
|
if (err instanceof ConfigError || err instanceof UsageError)
|
|
147
739
|
throw err;
|
|
148
740
|
throw err;
|
|
149
741
|
}
|
|
150
|
-
//
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
742
|
+
// Ensure profile is set for agent/sdk runners that don't use runnerSpec
|
|
743
|
+
if (!runnerSpec && !profile) {
|
|
744
|
+
const agent = options.agentConfig ?? loadAgentConfigFromDisk();
|
|
745
|
+
profile = resolveAgentProfile({ ...options, agentConfig: agent });
|
|
746
|
+
}
|
|
747
|
+
// Derive a display name for logging — either from the resolved profile or the runnerSpec.
|
|
748
|
+
const resolvedProfileName = profile?.name ??
|
|
749
|
+
(runnerSpec?.kind === "llm"
|
|
750
|
+
? `llm:${runnerSpec.connection.model}`
|
|
751
|
+
: runnerSpec?.kind !== undefined
|
|
752
|
+
? `${runnerSpec.kind}:${runnerSpec.profile?.name ?? "unknown"}`
|
|
753
|
+
: "unknown");
|
|
754
|
+
// 4. Build the shared prompt inputs — feedback, hints, lessons, rejected
|
|
755
|
+
// proposals. These are stable across refinement iterations; only the
|
|
756
|
+
// `priorDraft` field changes per-iteration (R-1 / #372).
|
|
154
757
|
const feedback = readRecentFeedback(options.ref);
|
|
155
758
|
const schemaHints = buildSchemaHints(parsedRef?.type ?? "", assetContent);
|
|
156
|
-
const
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
//
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
759
|
+
const relatedLessons = options.ref && parsedRef ? await readRelatedLessons(stash, options.ref, parsedRef) : [];
|
|
760
|
+
// Reflexion-style verbal-RL: inject rejected proposals so the agent avoids
|
|
761
|
+
// reproducing proposals that have already been reviewed and refused.
|
|
762
|
+
const rejectedProposals = readRejectedProposals(stash, options.ref);
|
|
763
|
+
// 5. Spawn the agent — with optional Self-Refine loop (R-1 / #372).
|
|
764
|
+
//
|
|
765
|
+
// maxRefineIters controls how many agent invocations are made:
|
|
766
|
+
// - 1 (default): single-shot, same as pre-R-1 behaviour
|
|
767
|
+
// - 2–3: on each subsequent pass, the prior draft is injected back into
|
|
768
|
+
// the prompt as Self-Refine critique context (arXiv:2303.17651)
|
|
769
|
+
//
|
|
770
|
+
// The loop exits early when the agent returns the same content as before
|
|
771
|
+
// (no-op refinement) to avoid wasting tokens on identical iterations.
|
|
772
|
+
const MAX_REFINE_ITERS = 3;
|
|
773
|
+
const maxRefineIters = Math.min(Math.max(1, options.maxRefineIters ?? 1), MAX_REFINE_ITERS);
|
|
774
|
+
const agentEnv = options.eventSource === "improve" ? { AKM_EVENT_SOURCE: "improve" } : {};
|
|
775
|
+
// Determine whether this dispatch can honour the file-write contract.
|
|
776
|
+
// Agent CLI + OpenCode SDK runners both have filesystem access; the direct
|
|
777
|
+
// LLM HTTP runner does NOT (see `src/llm/call-ai.ts:64-71`). The v1
|
|
778
|
+
// `profile.sdkMode` fallback also runs the SDK so it counts as file-writable.
|
|
779
|
+
// Test seams (`options.runAgentOptions.spawn`) emulate agent CLI behaviour so
|
|
780
|
+
// they participate as well — tests opt out by simply not writing the file.
|
|
781
|
+
const runnerSupportsFileWrite = runnerSpec ? runnerSpec.kind !== "llm" : true;
|
|
782
|
+
// Initialized to a sentinel; always overwritten in the first loop iteration
|
|
783
|
+
// (maxRefineIters is clamped to >= 1 above). TypeScript cannot prove a
|
|
784
|
+
// for-loop always runs at least once, so we use a type assertion here.
|
|
785
|
+
let result = {};
|
|
786
|
+
let priorDraft;
|
|
787
|
+
// Track every draft file path we synthesize so cleanup can remove them on
|
|
788
|
+
// every return path (success and failure). Mirrors propose's unlink pattern
|
|
789
|
+
// in `src/commands/propose.ts:215-226` but generalised to N refinement
|
|
790
|
+
// iterations. Always called via {@link cleanupDrafts} below.
|
|
791
|
+
const draftPathsToCleanup = [];
|
|
792
|
+
// Last iteration's draft path — read back if the agent wrote it.
|
|
793
|
+
let lastDraftPath;
|
|
794
|
+
// Best-effort unlink: tolerate already-deleted files (we may have unlinked
|
|
795
|
+
// an intermediate iteration's draft) and unwritable paths. Never throws —
|
|
796
|
+
// the proposal result is the source of truth for the caller.
|
|
797
|
+
const cleanupDrafts = () => {
|
|
798
|
+
for (const p of draftPathsToCleanup) {
|
|
799
|
+
try {
|
|
800
|
+
if (fs.existsSync(p))
|
|
801
|
+
fs.unlinkSync(p);
|
|
802
|
+
}
|
|
803
|
+
catch {
|
|
804
|
+
// Swallow — cleanup is best-effort.
|
|
805
|
+
}
|
|
806
|
+
}
|
|
171
807
|
};
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
// 6. Resolve the proposal content from stdout JSON.
|
|
808
|
+
// `payload` is populated inside the try (either by reading the draft file
|
|
809
|
+
// or parsing stdout JSON). Hoisted here so the post-try sections (R-3 ref
|
|
810
|
+
// guard, quality gate, sanitizer, createProposal) can use it after the
|
|
811
|
+
// drafts have been cleaned up.
|
|
177
812
|
let payload;
|
|
178
813
|
try {
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
814
|
+
for (let iter = 0; iter < maxRefineIters; iter++) {
|
|
815
|
+
// Synthesize a fresh tmp path per iteration so refinement passes never
|
|
816
|
+
// clobber an earlier draft (and so reading back is unambiguous).
|
|
817
|
+
const iterDraftPath = runnerSupportsFileWrite ? synthesizeReflectDraftPath(options.ref) : undefined;
|
|
818
|
+
if (iterDraftPath) {
|
|
819
|
+
draftPathsToCleanup.push(iterDraftPath);
|
|
820
|
+
lastDraftPath = iterDraftPath;
|
|
821
|
+
}
|
|
822
|
+
const prompt = buildReflectPrompt({
|
|
823
|
+
...(options.ref ? { ref: options.ref } : {}),
|
|
824
|
+
...(parsedRef?.type ? { type: parsedRef.type } : {}),
|
|
825
|
+
...(parsedRef?.name ? { name: parsedRef.name } : {}),
|
|
826
|
+
...(assetContent !== undefined ? { assetContent } : {}),
|
|
827
|
+
...(feedback.length > 0 ? { feedback } : {}),
|
|
828
|
+
...(schemaHints.length > 0 ? { schemaHints } : {}),
|
|
829
|
+
...(relatedLessons.length > 0 ? { relatedLessons } : {}),
|
|
830
|
+
...(options.task ? { task: options.task } : {}),
|
|
831
|
+
...(options.avoidPatterns && options.avoidPatterns.length > 0 ? { avoidPatterns: options.avoidPatterns } : {}),
|
|
832
|
+
...(rejectedProposals.length > 0 ? { rejectedProposals } : {}),
|
|
833
|
+
// R-1: inject prior draft as self-critique target on iterations > 0
|
|
834
|
+
...(priorDraft !== undefined ? { priorDraft } : {}),
|
|
835
|
+
// Issue A (#reflect-pipeline file-write contract): when the runner can
|
|
836
|
+
// touch the filesystem, instruct the agent to write the proposal body
|
|
837
|
+
// to a tmp file instead of inlining it in JSON. Avoids parse failures
|
|
838
|
+
// on long bodies (e.g. knowledge:systems/KOKORO_USAGE_GUIDE 8.4KB).
|
|
839
|
+
...(iterDraftPath ? { draftFilePath: iterDraftPath } : {}),
|
|
840
|
+
});
|
|
841
|
+
let iterResult;
|
|
842
|
+
if (options.runAgentOptions?.spawn) {
|
|
843
|
+
// Test seam: use raw runAgent with injected spawn so tests remain deterministic.
|
|
844
|
+
const resolvedProfile = profile;
|
|
845
|
+
if (!resolvedProfile) {
|
|
846
|
+
throw new Error("internal: reflect test-seam path requires a resolved agent profile");
|
|
847
|
+
}
|
|
848
|
+
const runOptions = {
|
|
849
|
+
stdio: "captured",
|
|
850
|
+
parseOutput: "text",
|
|
851
|
+
...(resolvedTimeoutMs !== undefined ? { timeoutMs: resolvedTimeoutMs } : {}),
|
|
852
|
+
...(Object.keys(agentEnv).length > 0 ? { env: agentEnv } : {}),
|
|
853
|
+
...(options.runAgentOptions ?? {}),
|
|
854
|
+
};
|
|
855
|
+
iterResult = await runAgent(resolvedProfile, prompt, runOptions);
|
|
856
|
+
}
|
|
857
|
+
else if (runnerSpec) {
|
|
858
|
+
// v2: dispatch through unified RunnerSpec
|
|
859
|
+
const runOptions = {
|
|
860
|
+
stdio: "captured",
|
|
861
|
+
parseOutput: "text",
|
|
862
|
+
...(Object.keys(agentEnv).length > 0 ? { env: agentEnv } : {}),
|
|
863
|
+
};
|
|
864
|
+
switch (runnerSpec.kind) {
|
|
865
|
+
case "llm":
|
|
866
|
+
// LLM HTTP path — `draftFilePath` is accepted for type symmetry
|
|
867
|
+
// (see `RunReflectViaLlmOptions.draftFilePath` docstring) but is
|
|
868
|
+
// intentionally a no-op. The prompt builder above also did not
|
|
869
|
+
// include the file-write contract for this kind, so the LLM is
|
|
870
|
+
// still asked for JSON via stdout.
|
|
871
|
+
iterResult = await runReflectViaLlm({
|
|
872
|
+
prompt,
|
|
873
|
+
connection: runnerSpec.connection,
|
|
874
|
+
timeoutMs: runnerSpec.timeoutMs ?? (typeof resolvedTimeoutMs === "number" ? resolvedTimeoutMs : undefined),
|
|
875
|
+
priorDraft,
|
|
876
|
+
iteration: iter,
|
|
877
|
+
responseSchema: REFLECT_JSON_SCHEMA,
|
|
878
|
+
chat: options.chat,
|
|
879
|
+
});
|
|
880
|
+
break;
|
|
881
|
+
case "sdk":
|
|
882
|
+
iterResult = await runOpencodeSdk(runnerSpec.profile, prompt ?? "", runOptions);
|
|
883
|
+
break;
|
|
884
|
+
case "agent":
|
|
885
|
+
iterResult = await runAgent(runnerSpec.profile, prompt, {
|
|
886
|
+
...runOptions,
|
|
887
|
+
...(runnerSpec.timeoutMs !== undefined ? { timeoutMs: runnerSpec.timeoutMs } : {}),
|
|
888
|
+
});
|
|
889
|
+
break;
|
|
890
|
+
}
|
|
891
|
+
}
|
|
892
|
+
else {
|
|
893
|
+
// Production path (v1): dispatch directly to the appropriate runner.
|
|
894
|
+
// The fallback at the end of step 3 guarantees `profile` is set whenever
|
|
895
|
+
// `runnerSpec` is undefined, but TS can't prove that across the loop +
|
|
896
|
+
// await boundary — narrow into a const.
|
|
897
|
+
const resolvedProfile = profile;
|
|
898
|
+
if (!resolvedProfile) {
|
|
899
|
+
throw new Error("internal: reflect v1 dispatch reached without a resolved agent profile or runnerSpec");
|
|
900
|
+
}
|
|
901
|
+
const runOptions = {
|
|
902
|
+
stdio: "captured",
|
|
903
|
+
parseOutput: "text",
|
|
904
|
+
...(resolvedTimeoutMs !== undefined ? { timeoutMs: resolvedTimeoutMs } : {}),
|
|
905
|
+
...(Object.keys(agentEnv).length > 0 ? { env: agentEnv } : {}),
|
|
906
|
+
};
|
|
907
|
+
iterResult = resolvedProfile.sdkMode
|
|
908
|
+
? await runOpencodeSdk(resolvedProfile, prompt ?? "", runOptions)
|
|
909
|
+
: await runAgent(resolvedProfile, prompt, runOptions);
|
|
910
|
+
}
|
|
911
|
+
result = iterResult;
|
|
912
|
+
if (!iterResult.ok)
|
|
913
|
+
break; // surface failure after loop
|
|
914
|
+
// On success, extract the draft content for the next iteration.
|
|
915
|
+
// If the agent returns the same content as the prior draft, stop early
|
|
916
|
+
// (no-op refinement) to avoid wasting tokens on identical iterations.
|
|
917
|
+
if (iter < maxRefineIters - 1) {
|
|
918
|
+
const nextDraft = iterResult.stdout ?? "";
|
|
919
|
+
if (priorDraft !== undefined && nextDraft === priorDraft)
|
|
920
|
+
break;
|
|
921
|
+
priorDraft = nextDraft;
|
|
922
|
+
}
|
|
185
923
|
}
|
|
186
|
-
|
|
924
|
+
const finalResult = result;
|
|
925
|
+
if (!finalResult.ok) {
|
|
926
|
+
// B3: ENOENT / not-found gives an actionable hint.
|
|
927
|
+
if (isEnoentFailure(finalResult)) {
|
|
928
|
+
emitReflectFailed("spawn_failed", "enoent", options.ref, {
|
|
929
|
+
...(finalResult.exitCode !== undefined ? { exitCode: finalResult.exitCode } : {}),
|
|
930
|
+
});
|
|
931
|
+
return {
|
|
932
|
+
...failureEnvelope(finalResult, options.ref),
|
|
933
|
+
error: enoentHintMessage(profile?.bin ?? resolvedProfileName),
|
|
934
|
+
};
|
|
935
|
+
}
|
|
936
|
+
const envelope = failureEnvelope(finalResult, options.ref);
|
|
937
|
+
emitReflectFailed(envelope.reason, "agent_crash", options.ref, {
|
|
938
|
+
...(envelope.exitCode !== null ? { exitCode: envelope.exitCode } : {}),
|
|
939
|
+
});
|
|
940
|
+
return envelope;
|
|
941
|
+
}
|
|
942
|
+
// Re-alias to `result` for the downstream code that references it.
|
|
943
|
+
result = finalResult;
|
|
944
|
+
// 6. Resolve the proposal content.
|
|
945
|
+
//
|
|
946
|
+
// Path A (file-write contract — preferred for agent/sdk runners on long
|
|
947
|
+
// assets): the agent wrote the body to `lastDraftPath` and printed
|
|
948
|
+
// `DRAFT_WRITTEN` on stdout. Load the body from disk and synthesize a
|
|
949
|
+
// payload. The `EXCESSIVE_EXPANSION`/schema-shape gates downstream still
|
|
950
|
+
// apply — they validate content, not transport.
|
|
951
|
+
//
|
|
952
|
+
// Path B (legacy JSON stdout): the agent inlined the proposal body in
|
|
953
|
+
// JSON on stdout. Falls through to `parseAgentProposalPayload`. Also the
|
|
954
|
+
// path used by the LLM HTTP runner, which cannot honour file-write.
|
|
955
|
+
const draftFileExists = lastDraftPath !== undefined && fs.existsSync(lastDraftPath) && fs.statSync(lastDraftPath).size > 0;
|
|
956
|
+
const draftSignaled = stdoutSignalsDraftWritten(result.stdout);
|
|
957
|
+
if (draftSignaled && lastDraftPath && !draftFileExists) {
|
|
958
|
+
// Agent claimed to write the draft but the file is missing or empty.
|
|
959
|
+
// Surface as a parse_error rather than silently falling through — the
|
|
960
|
+
// alternative would be parsing the `DRAFT_WRITTEN` sentinel as JSON,
|
|
961
|
+
// which is guaranteed to fail with a confusing message.
|
|
962
|
+
emitReflectFailed("parse_error", "draft_missing", options.ref, {
|
|
963
|
+
...(result.exitCode !== null ? { exitCode: result.exitCode } : {}),
|
|
964
|
+
});
|
|
187
965
|
return {
|
|
188
966
|
schemaVersion: 1,
|
|
189
967
|
ok: false,
|
|
190
968
|
reason: "parse_error",
|
|
191
|
-
error:
|
|
969
|
+
error: `Agent emitted DRAFT_WRITTEN but draft file is missing or empty (${lastDraftPath}). The file-write contract failed; either the agent's file tools are broken or the path was unwritable.`,
|
|
192
970
|
...(options.ref ? { ref: options.ref } : {}),
|
|
193
971
|
exitCode: result.exitCode,
|
|
194
972
|
stdout: result.stdout,
|
|
195
973
|
...(result.stderr ? { stderr: result.stderr } : {}),
|
|
196
974
|
};
|
|
197
975
|
}
|
|
976
|
+
if (draftFileExists && lastDraftPath) {
|
|
977
|
+
// Happy path: agent wrote the body to disk. Use the ref the caller
|
|
978
|
+
// supplied (or a placeholder when omitted — the R-3 ref-mismatch guard
|
|
979
|
+
// below has no effect when there is no expected ref).
|
|
980
|
+
const fileContent = fs.readFileSync(lastDraftPath, "utf8");
|
|
981
|
+
payload = {
|
|
982
|
+
ref: options.ref ?? "",
|
|
983
|
+
content: fileContent,
|
|
984
|
+
};
|
|
985
|
+
// The agent followed the file-write contract — `payload.ref` mirrors the
|
|
986
|
+
// caller's expected ref, so the R-3 guard below cannot fire. The agent
|
|
987
|
+
// had no opportunity to retarget the proposal. If the ref was omitted
|
|
988
|
+
// entirely, downstream `createProposal` will reject the empty ref.
|
|
989
|
+
}
|
|
990
|
+
else {
|
|
991
|
+
try {
|
|
992
|
+
payload = parseAgentProposalPayload(result.stdout ?? "");
|
|
993
|
+
}
|
|
994
|
+
catch (err) {
|
|
995
|
+
const fallback = fallbackPayloadFromRawContent(result.stdout ?? "", options.ref, profile?.sdkMode ?? false);
|
|
996
|
+
if (fallback) {
|
|
997
|
+
payload = fallback;
|
|
998
|
+
}
|
|
999
|
+
else {
|
|
1000
|
+
// Reclassify cooldown/skip messages that arrive as stdout text instead of
|
|
1001
|
+
// valid proposal JSON. These are legitimate skip signals, not parse failures,
|
|
1002
|
+
// and should not pollute reflectFailedActions or recentErrors injection.
|
|
1003
|
+
const stdoutText = result.stdout ?? "";
|
|
1004
|
+
const isCooldownSignal = isStructuredCooldownSignal(stdoutText);
|
|
1005
|
+
const reason = isCooldownSignal ? "cooldown" : "parse_error";
|
|
1006
|
+
emitReflectFailed(reason, isCooldownSignal ? "stdout_cooldown_signal" : "parse_error", options.ref, {
|
|
1007
|
+
...(result.exitCode !== null ? { exitCode: result.exitCode } : {}),
|
|
1008
|
+
});
|
|
1009
|
+
return {
|
|
1010
|
+
schemaVersion: 1,
|
|
1011
|
+
ok: false,
|
|
1012
|
+
reason,
|
|
1013
|
+
error: err instanceof Error ? err.message : String(err),
|
|
1014
|
+
...(options.ref ? { ref: options.ref } : {}),
|
|
1015
|
+
exitCode: result.exitCode,
|
|
1016
|
+
stdout: result.stdout,
|
|
1017
|
+
...(result.stderr ? { stderr: result.stderr } : {}),
|
|
1018
|
+
};
|
|
1019
|
+
}
|
|
1020
|
+
}
|
|
1021
|
+
}
|
|
198
1022
|
}
|
|
199
|
-
|
|
1023
|
+
finally {
|
|
1024
|
+
// Always remove tmp draft files — success, failure, or exception. Returns
|
|
1025
|
+
// inside the try above trigger this block before the function exits. Code
|
|
1026
|
+
// after this point uses the already-loaded `payload` and never touches the
|
|
1027
|
+
// draft paths.
|
|
1028
|
+
cleanupDrafts();
|
|
1029
|
+
}
|
|
1030
|
+
// 6b. Validate payload.ref === options.ref (R-3 / #366).
|
|
1031
|
+
// A hallucinating agent can silently retarget proposals to a different ref.
|
|
1032
|
+
// This guard normalises both refs through parseAssetRef so origin-prefix
|
|
1033
|
+
// differences do not cause false positives, then rejects mismatches.
|
|
1034
|
+
// References: CRITIC (arXiv:2305.11738), CoVe (arXiv:2309.11495).
|
|
1035
|
+
if (options.ref) {
|
|
1036
|
+
try {
|
|
1037
|
+
const expectedParsed = parseAssetRef(options.ref);
|
|
1038
|
+
const actualParsed = parseAssetRef(payload.ref);
|
|
1039
|
+
// Compare type + name (drop origin — agent may omit origin prefix).
|
|
1040
|
+
if (expectedParsed.type !== actualParsed.type || expectedParsed.name !== actualParsed.name) {
|
|
1041
|
+
emitReflectFailed("parse_error", "ref_mismatch", options.ref, {
|
|
1042
|
+
expectedRef: options.ref,
|
|
1043
|
+
actualRef: payload.ref,
|
|
1044
|
+
...(result.exitCode !== null ? { exitCode: result.exitCode } : {}),
|
|
1045
|
+
});
|
|
1046
|
+
return {
|
|
1047
|
+
schemaVersion: 1,
|
|
1048
|
+
ok: false,
|
|
1049
|
+
reason: "parse_error",
|
|
1050
|
+
error: `Agent retargeted proposal: expected ref "${options.ref}" but got "${payload.ref}". Proposal rejected to prevent silent ref hallucination.`,
|
|
1051
|
+
ref: options.ref,
|
|
1052
|
+
exitCode: result.exitCode,
|
|
1053
|
+
stdout: result.stdout,
|
|
1054
|
+
...(result.stderr ? { stderr: result.stderr } : {}),
|
|
1055
|
+
};
|
|
1056
|
+
}
|
|
1057
|
+
}
|
|
1058
|
+
catch {
|
|
1059
|
+
// parseAssetRef failure means the agent returned a malformed ref — already
|
|
1060
|
+
// caught downstream by createProposal; allow it to surface naturally.
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
1063
|
+
// 7. R-5 / #374: Apply the proposal quality gate when enabled.
|
|
1064
|
+
// Mirrors the lesson quality gate on distill proposals. The gate uses
|
|
1065
|
+
// `runLessonQualityJudge` from distill.ts and is gated behind either
|
|
1066
|
+
// `profiles.improve.default.processes.reflect.qualityGate.enabled` or
|
|
1067
|
+
// `profiles.improve.default.processes.distill.qualityGate.enabled` (the
|
|
1068
|
+
// `lesson_quality_gate` flag name is the legacy alias still accepted by
|
|
1069
|
+
// `isLlmFeatureEnabled`). Fail-open: any judge error passes through.
|
|
1070
|
+
// G-Eval (arXiv:2303.16634) — quality judgment before admission.
|
|
1071
|
+
const runtimeConfig = options.config ??
|
|
1072
|
+
(() => {
|
|
1073
|
+
try {
|
|
1074
|
+
return loadConfig();
|
|
1075
|
+
}
|
|
1076
|
+
catch {
|
|
1077
|
+
return undefined;
|
|
1078
|
+
}
|
|
1079
|
+
})();
|
|
1080
|
+
const chatFn = options.chat ?? chatCompletion;
|
|
1081
|
+
const qualityGateEnabled = isLlmFeatureEnabled(runtimeConfig, "proposal_quality_gate") ||
|
|
1082
|
+
isLlmFeatureEnabled(runtimeConfig, "lesson_quality_gate");
|
|
1083
|
+
if (qualityGateEnabled && runtimeConfig) {
|
|
1084
|
+
const assetContent = (() => {
|
|
1085
|
+
if (!options.ref)
|
|
1086
|
+
return null;
|
|
1087
|
+
try {
|
|
1088
|
+
const refParsed = parseAssetRef(options.ref);
|
|
1089
|
+
const candidates = [
|
|
1090
|
+
path.join(stash, `${refParsed.type}s`, `${refParsed.name}.md`),
|
|
1091
|
+
path.join(stash, `${refParsed.type}s`, refParsed.name, "index.md"),
|
|
1092
|
+
];
|
|
1093
|
+
for (const p of candidates) {
|
|
1094
|
+
if (fs.existsSync(p))
|
|
1095
|
+
return fs.readFileSync(p, "utf8");
|
|
1096
|
+
}
|
|
1097
|
+
return null;
|
|
1098
|
+
}
|
|
1099
|
+
catch {
|
|
1100
|
+
return null;
|
|
1101
|
+
}
|
|
1102
|
+
})();
|
|
1103
|
+
const judgeResult = await runLessonQualityJudge(runtimeConfig, payload.content, assetContent ?? "", chatFn);
|
|
1104
|
+
if (!judgeResult.pass) {
|
|
1105
|
+
// Quality gate rejected the proposal — surface as parse_error so the
|
|
1106
|
+
// improve orchestrator can log it and move on without crashing.
|
|
1107
|
+
appendEvent({
|
|
1108
|
+
eventType: "reflect_completed",
|
|
1109
|
+
ref: payload.ref,
|
|
1110
|
+
metadata: {
|
|
1111
|
+
source: "reflect",
|
|
1112
|
+
qualityRejected: true,
|
|
1113
|
+
qualityScore: judgeResult.score,
|
|
1114
|
+
qualityReason: judgeResult.reason,
|
|
1115
|
+
},
|
|
1116
|
+
});
|
|
1117
|
+
return {
|
|
1118
|
+
schemaVersion: 1,
|
|
1119
|
+
ok: false,
|
|
1120
|
+
reason: "parse_error",
|
|
1121
|
+
error: `Reflect proposal quality gate rejected: score=${judgeResult.score}, reason="${judgeResult.reason}"`,
|
|
1122
|
+
...(options.ref ? { ref: options.ref } : {}),
|
|
1123
|
+
exitCode: result.exitCode,
|
|
1124
|
+
};
|
|
1125
|
+
}
|
|
1126
|
+
}
|
|
1127
|
+
// 7b. Reflect content-preservation rails:
|
|
1128
|
+
// - Restore source frontmatter so reflect can never strip indexable
|
|
1129
|
+
// fields (`description`, `when_to_use`, `tags`, ...).
|
|
1130
|
+
// - Reset protected identity fields (`name`, `ref`, `id`, `slug`,
|
|
1131
|
+
// `type`) the LLM tried to change.
|
|
1132
|
+
// - Reject proposals that shrink/expand the body past safe ratios.
|
|
1133
|
+
//
|
|
1134
|
+
// See REFLECT_ALLOWED_TYPES / sanitizeReflectPayload for the underlying
|
|
1135
|
+
// hypotheses + observed regressions (`8737ab63`, `26941510`, and the
|
|
1136
|
+
// catastrophic-shrinkage cases from the May 2026 review).
|
|
1137
|
+
const sanitizeOutcome = sanitizeReflectPayload({ content: payload.content, ...(payload.frontmatter ? { frontmatter: payload.frontmatter } : {}) }, assetContent, payload.ref);
|
|
1138
|
+
if (sanitizeOutcome.reject) {
|
|
1139
|
+
appendEvent({
|
|
1140
|
+
eventType: "reflect_completed",
|
|
1141
|
+
ref: payload.ref,
|
|
1142
|
+
metadata: {
|
|
1143
|
+
source: "reflect",
|
|
1144
|
+
sanitized: true,
|
|
1145
|
+
rejected: true,
|
|
1146
|
+
rejectReason: sanitizeOutcome.reject.error,
|
|
1147
|
+
...(sanitizeOutcome.warnings.length > 0 ? { sanitizerWarnings: sanitizeOutcome.warnings } : {}),
|
|
1148
|
+
},
|
|
1149
|
+
});
|
|
1150
|
+
return {
|
|
1151
|
+
schemaVersion: 1,
|
|
1152
|
+
ok: false,
|
|
1153
|
+
reason: sanitizeOutcome.reject.reason,
|
|
1154
|
+
error: sanitizeOutcome.reject.error,
|
|
1155
|
+
...(options.ref ? { ref: options.ref } : {}),
|
|
1156
|
+
exitCode: result.exitCode,
|
|
1157
|
+
};
|
|
1158
|
+
}
|
|
1159
|
+
payload = {
|
|
1160
|
+
...payload,
|
|
1161
|
+
content: sanitizeOutcome.content,
|
|
1162
|
+
...(sanitizeOutcome.frontmatter ? { frontmatter: sanitizeOutcome.frontmatter } : {}),
|
|
1163
|
+
};
|
|
1164
|
+
// 8. Create the proposal. The proposal queue is the ONLY thing reflect
|
|
200
1165
|
// writes — promotion to a real asset is gated by `akm proposal accept`.
|
|
1166
|
+
//
|
|
1167
|
+
// R-4 / #373: Stamp `derived_from_reflect: true` in the frontmatter of any
|
|
1168
|
+
// lesson proposal generated by reflect. This provenance marker lets
|
|
1169
|
+
// `readRelatedLessons` exclude echo-chamber lessons (lessons that originate
|
|
1170
|
+
// from prior reflect runs on the same skill) unless independent feedback
|
|
1171
|
+
// evidence exists. ExpeL arXiv:2308.10144 — reject rules without success/
|
|
1172
|
+
// failure differential from independent evidence.
|
|
1173
|
+
const isLessonProposal = (() => {
|
|
1174
|
+
try {
|
|
1175
|
+
return parseAssetRef(payload.ref).type === "lesson";
|
|
1176
|
+
}
|
|
1177
|
+
catch {
|
|
1178
|
+
return false;
|
|
1179
|
+
}
|
|
1180
|
+
})();
|
|
1181
|
+
const basePayloadFrontmatter = payload.frontmatter ?? {};
|
|
1182
|
+
const payloadFrontmatterWithProvenance = isLessonProposal
|
|
1183
|
+
? { ...basePayloadFrontmatter, derived_from_reflect: true }
|
|
1184
|
+
: basePayloadFrontmatter;
|
|
1185
|
+
// Draft mode: skip DB persistence — the SC sampling loop in improve.ts persists
|
|
1186
|
+
// only the majority-vote winner (R-2 / #389). Return a synthetic proposal so
|
|
1187
|
+
// pickMajorityVote can compare content via Jaccard similarity.
|
|
1188
|
+
if (options.draftMode) {
|
|
1189
|
+
const draftProposal = {
|
|
1190
|
+
id: `sc-draft-${Date.now()}`,
|
|
1191
|
+
ref: payload.ref,
|
|
1192
|
+
source: "reflect",
|
|
1193
|
+
status: "pending",
|
|
1194
|
+
createdAt: new Date().toISOString(),
|
|
1195
|
+
updatedAt: new Date().toISOString(),
|
|
1196
|
+
payload: {
|
|
1197
|
+
content: payload.content,
|
|
1198
|
+
...(Object.keys(payloadFrontmatterWithProvenance).length > 0
|
|
1199
|
+
? { frontmatter: payloadFrontmatterWithProvenance }
|
|
1200
|
+
: {}),
|
|
1201
|
+
},
|
|
1202
|
+
// Phase 6A: preserve confidence on the synthetic draft so the SC majority
|
|
1203
|
+
// winner carries the score through to the persisted proposal.
|
|
1204
|
+
...(typeof payload.confidence === "number" ? { confidence: payload.confidence } : {}),
|
|
1205
|
+
};
|
|
1206
|
+
return {
|
|
1207
|
+
schemaVersion: 1,
|
|
1208
|
+
ok: true,
|
|
1209
|
+
proposal: draftProposal,
|
|
1210
|
+
ref: draftProposal.ref,
|
|
1211
|
+
agentProfile: resolvedProfileName,
|
|
1212
|
+
durationMs: result.durationMs,
|
|
1213
|
+
};
|
|
1214
|
+
}
|
|
201
1215
|
const createInput = {
|
|
202
1216
|
ref: payload.ref,
|
|
203
1217
|
source: "reflect",
|
|
204
1218
|
sourceRun: `reflect-${Date.now()}`,
|
|
205
1219
|
payload: {
|
|
206
1220
|
content: payload.content,
|
|
207
|
-
...(
|
|
1221
|
+
...(Object.keys(payloadFrontmatterWithProvenance).length > 0
|
|
1222
|
+
? { frontmatter: payloadFrontmatterWithProvenance }
|
|
1223
|
+
: {}),
|
|
208
1224
|
},
|
|
1225
|
+
// Phase 6A: forward LLM-reported confidence into the proposal record.
|
|
1226
|
+
// `parseAgentProposalPayload` already clamps to [0, 1] and drops non-
|
|
1227
|
+
// finite values; `createProposal` runs its own sanitizer as a safety net.
|
|
1228
|
+
...(typeof payload.confidence === "number" ? { confidence: payload.confidence } : {}),
|
|
209
1229
|
};
|
|
210
|
-
const
|
|
1230
|
+
const proposalResult = createProposal(stash, createInput, options.ctx);
|
|
1231
|
+
if (isProposalSkipped(proposalResult)) {
|
|
1232
|
+
// Dedup/cooldown guard fired — surface as a "cooldown" reason (not "parse_error")
|
|
1233
|
+
// so the improve orchestrator can distinguish legitimate skips from real failures
|
|
1234
|
+
// and exclude them from recentErrors/avoidPatterns injection.
|
|
1235
|
+
emitReflectFailed("cooldown", "proposal_skipped", options.ref, {
|
|
1236
|
+
proposalSkipReason: proposalResult.reason,
|
|
1237
|
+
});
|
|
1238
|
+
return {
|
|
1239
|
+
schemaVersion: 1,
|
|
1240
|
+
ok: false,
|
|
1241
|
+
reason: "cooldown",
|
|
1242
|
+
error: `Proposal skipped (${proposalResult.reason}): ${proposalResult.message}`,
|
|
1243
|
+
...(options.ref ? { ref: options.ref } : {}),
|
|
1244
|
+
exitCode: null,
|
|
1245
|
+
};
|
|
1246
|
+
}
|
|
1247
|
+
const proposal = proposalResult;
|
|
1248
|
+
appendEvent({
|
|
1249
|
+
eventType: "reflect_completed",
|
|
1250
|
+
ref: proposal.ref,
|
|
1251
|
+
metadata: {
|
|
1252
|
+
proposalId: proposal.id,
|
|
1253
|
+
source: "reflect",
|
|
1254
|
+
agentProfile: resolvedProfileName,
|
|
1255
|
+
},
|
|
1256
|
+
});
|
|
211
1257
|
return {
|
|
212
1258
|
schemaVersion: 1,
|
|
213
1259
|
ok: true,
|
|
214
1260
|
proposal,
|
|
215
1261
|
ref: proposal.ref,
|
|
216
|
-
agentProfile:
|
|
1262
|
+
agentProfile: resolvedProfileName,
|
|
217
1263
|
durationMs: result.durationMs,
|
|
218
1264
|
};
|
|
219
1265
|
}
|