akm-cli 0.8.0-rc2 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/{.github/CHANGELOG.md → CHANGELOG.md} +238 -3
- package/README.md +22 -6
- package/SECURITY.md +93 -0
- package/dist/assets/help/help-accept.md +12 -0
- package/dist/assets/help/help-improve.md +81 -0
- package/dist/{commands → assets}/help/help-proposals.md +7 -4
- package/dist/assets/help/help-reject.md +11 -0
- package/dist/{output → assets/hints}/cli-hints-full.md +60 -32
- package/dist/{output → assets/hints}/cli-hints-short.md +10 -7
- package/dist/assets/profiles/default.json +15 -0
- package/dist/assets/profiles/graph-refresh.json +13 -0
- package/dist/assets/profiles/memory-focus.json +12 -0
- package/dist/assets/profiles/quick.json +15 -0
- package/dist/assets/profiles/thorough.json +15 -0
- package/dist/assets/prompts/extract-session.md +80 -0
- package/dist/assets/prompts/graph-extract-user-prompt.md +35 -0
- package/dist/assets/tasks/graph-refresh-weekly.yml +10 -0
- package/dist/cli/config-migrate.js +144 -0
- package/dist/cli/config-validate.js +39 -0
- package/dist/cli/confirm.js +73 -0
- package/dist/cli/parse-args.js +93 -3
- package/dist/cli/shared.js +129 -0
- package/dist/cli.js +2141 -1268
- package/dist/commands/add-cli.js +279 -0
- package/dist/commands/agent-dispatch.js +20 -12
- package/dist/commands/agent-support.js +11 -5
- package/dist/commands/completions.js +3 -0
- package/dist/commands/config-cli.js +129 -517
- package/dist/commands/consolidate.js +1557 -147
- package/dist/commands/curate.js +44 -3
- package/dist/commands/db-cli.js +23 -0
- package/dist/commands/distill-promotion-policy.js +5 -3
- package/dist/commands/distill.js +906 -100
- package/dist/commands/env.js +213 -0
- package/dist/commands/eval-cases.js +3 -0
- package/dist/commands/events.js +3 -0
- package/dist/commands/extract-cli.js +127 -0
- package/dist/commands/extract-prompt.js +217 -0
- package/dist/commands/extract.js +477 -0
- package/dist/commands/feedback-cli.js +331 -0
- package/dist/commands/graph.js +260 -5
- package/dist/commands/health.js +1042 -55
- package/dist/commands/history.js +51 -16
- package/dist/commands/improve-auto-accept.js +97 -0
- package/dist/commands/improve-cli.js +236 -0
- package/dist/commands/improve-profiles.js +138 -0
- package/dist/commands/improve-result-file.js +167 -0
- package/dist/commands/improve.js +1736 -346
- package/dist/commands/info.js +26 -28
- package/dist/commands/init.js +49 -1
- package/dist/commands/installed-stashes.js +6 -23
- package/dist/commands/knowledge.js +3 -0
- package/dist/commands/lint/agent-linter.js +3 -0
- package/dist/commands/lint/base-linter.js +199 -5
- package/dist/commands/lint/command-linter.js +3 -0
- package/dist/commands/lint/default-linter.js +3 -0
- package/dist/commands/lint/env-key-rules.js +154 -0
- package/dist/commands/lint/index.js +92 -3
- package/dist/commands/lint/knowledge-linter.js +3 -0
- package/dist/commands/lint/markdown-insertion.js +343 -0
- package/dist/commands/lint/memory-linter.js +3 -0
- package/dist/commands/lint/registry.js +3 -0
- package/dist/commands/lint/skill-linter.js +3 -0
- package/dist/commands/lint/task-linter.js +15 -12
- package/dist/commands/lint/types.js +3 -0
- package/dist/commands/lint/workflow-linter.js +3 -0
- package/dist/commands/lint.js +3 -0
- package/dist/commands/migration-help.js +5 -2
- package/dist/commands/proposal-drain-policies.js +128 -0
- package/dist/commands/proposal-drain.js +477 -0
- package/dist/commands/proposal.js +60 -6
- package/dist/commands/propose.js +24 -19
- package/dist/commands/reflect.js +1004 -94
- package/dist/commands/registry-cli.js +150 -0
- package/dist/commands/registry-search.js +3 -0
- package/dist/commands/remember-cli.js +257 -0
- package/dist/commands/remember.js +15 -6
- package/dist/commands/schema-repair.js +88 -15
- package/dist/commands/search.js +99 -14
- package/dist/commands/secret.js +173 -0
- package/dist/commands/self-update.js +3 -0
- package/dist/commands/show.js +32 -13
- package/dist/commands/source-add.js +7 -35
- package/dist/commands/source-clone.js +3 -0
- package/dist/commands/source-manage.js +3 -0
- package/dist/commands/tasks.js +161 -95
- package/dist/commands/url-checker.js +3 -0
- package/dist/core/action-contributors.js +3 -0
- package/dist/core/asset-ref.js +13 -2
- package/dist/core/asset-registry.js +9 -2
- package/dist/core/asset-serialize.js +88 -0
- package/dist/core/asset-spec.js +61 -5
- package/dist/core/common.js +93 -5
- package/dist/core/concurrent.js +3 -0
- package/dist/core/config-io.js +347 -0
- package/dist/core/config-migration.js +622 -0
- package/dist/core/config-schema.js +558 -0
- package/dist/core/config-sources.js +108 -0
- package/dist/core/config-types.js +4 -0
- package/dist/core/config-walker.js +337 -0
- package/dist/core/config.js +366 -1077
- package/dist/core/errors.js +42 -20
- package/dist/core/events.js +31 -25
- package/dist/core/file-lock.js +104 -0
- package/dist/core/frontmatter.js +75 -10
- package/dist/core/lesson-lint.js +3 -0
- package/dist/core/markdown.js +3 -0
- package/dist/core/memory-belief.js +62 -0
- package/dist/core/memory-contradiction-detect.js +274 -0
- package/dist/core/memory-improve.js +142 -14
- package/dist/core/parse.js +3 -0
- package/dist/core/paths.js +218 -50
- package/dist/core/proposal-quality-validators.js +380 -0
- package/dist/core/proposal-validators.js +11 -3
- package/dist/core/proposals.js +464 -5
- package/dist/core/state-db.js +349 -56
- package/dist/core/text-truncation.js +107 -0
- package/dist/core/time.js +3 -0
- package/dist/core/tty.js +59 -0
- package/dist/core/warn.js +7 -2
- package/dist/core/write-source.js +12 -0
- package/dist/indexer/db-backup.js +391 -0
- package/dist/indexer/db-search.js +136 -28
- package/dist/indexer/db.js +661 -166
- package/dist/indexer/ensure-index.js +3 -0
- package/dist/indexer/file-context.js +3 -0
- package/dist/indexer/graph-boost.js +162 -40
- package/dist/indexer/graph-db.js +241 -51
- package/dist/indexer/graph-dedup.js +3 -7
- package/dist/indexer/graph-extraction.js +242 -149
- package/dist/indexer/index-context.js +3 -9
- package/dist/indexer/indexer.js +86 -16
- package/dist/indexer/llm-cache.js +24 -19
- package/dist/indexer/manifest.js +3 -0
- package/dist/indexer/matchers.js +184 -11
- package/dist/indexer/memory-inference.js +94 -50
- package/dist/indexer/metadata-contributors.js +3 -0
- package/dist/indexer/metadata.js +110 -50
- package/dist/indexer/path-resolver.js +3 -0
- package/dist/indexer/project-context.js +192 -0
- package/dist/indexer/ranking-contributors.js +134 -7
- package/dist/indexer/ranking.js +8 -1
- package/dist/indexer/search-fields.js +5 -9
- package/dist/indexer/search-hit-enrichers.js +91 -2
- package/dist/indexer/search-source.js +20 -1
- package/dist/indexer/semantic-status.js +4 -1
- package/dist/indexer/staleness-detect.js +447 -0
- package/dist/indexer/usage-events.js +12 -9
- package/dist/indexer/walker.js +3 -0
- package/dist/integrations/agent/builders.js +135 -0
- package/dist/integrations/agent/config.js +121 -401
- package/dist/integrations/agent/detect.js +3 -0
- package/dist/integrations/agent/index.js +6 -14
- package/dist/integrations/agent/model-aliases.js +55 -0
- package/dist/integrations/agent/profiles.js +3 -0
- package/dist/integrations/agent/prompts.js +137 -8
- package/dist/integrations/agent/runner.js +208 -0
- package/dist/integrations/agent/sdk-runner.js +8 -2
- package/dist/integrations/agent/spawn.js +54 -14
- package/dist/integrations/github.js +3 -0
- package/dist/integrations/lockfile.js +22 -51
- package/dist/integrations/session-logs/index.js +4 -0
- package/dist/integrations/session-logs/inline-refs.js +35 -0
- package/dist/integrations/session-logs/pre-filter.js +152 -0
- package/dist/integrations/session-logs/providers/claude-code.js +226 -0
- package/dist/integrations/session-logs/providers/opencode.js +231 -25
- package/dist/integrations/session-logs/types.js +3 -0
- package/dist/llm/call-ai.js +14 -26
- package/dist/llm/client.js +16 -2
- package/dist/llm/embedder.js +20 -29
- package/dist/llm/embedders/cache.js +3 -7
- package/dist/llm/embedders/local.js +42 -1
- package/dist/llm/embedders/remote.js +20 -8
- package/dist/llm/embedders/types.js +3 -7
- package/dist/llm/feature-gate.js +92 -56
- package/dist/llm/graph-extract.js +402 -31
- package/dist/llm/index-passes.js +44 -29
- package/dist/llm/memory-infer.js +30 -2
- package/dist/llm/metadata-enhance.js +3 -7
- package/dist/output/cli-hints.js +7 -4
- package/dist/output/context.js +60 -8
- package/dist/output/renderers.js +170 -194
- package/dist/output/shapes/curate.js +56 -0
- package/dist/output/shapes/distill.js +10 -0
- package/dist/output/shapes/env-list.js +19 -0
- package/dist/output/shapes/events.js +11 -0
- package/dist/output/shapes/helpers.js +424 -0
- package/dist/output/shapes/history.js +7 -0
- package/dist/output/shapes/passthrough.js +105 -0
- package/dist/output/shapes/proposal-accept.js +7 -0
- package/dist/output/shapes/proposal-diff.js +7 -0
- package/dist/output/shapes/proposal-list.js +7 -0
- package/dist/output/shapes/proposal-producer.js +11 -0
- package/dist/output/shapes/proposal-reject.js +7 -0
- package/dist/output/shapes/proposal-show.js +7 -0
- package/dist/output/shapes/registry-search.js +6 -0
- package/dist/output/shapes/registry.js +30 -0
- package/dist/output/shapes/search.js +6 -0
- package/dist/output/shapes/secret-list.js +19 -0
- package/dist/output/shapes/show.js +6 -0
- package/dist/output/shapes/vault-list.js +19 -0
- package/dist/output/shapes.js +51 -549
- package/dist/output/text/add.js +6 -0
- package/dist/output/text/clone.js +6 -0
- package/dist/output/text/config.js +6 -0
- package/dist/output/text/curate.js +6 -0
- package/dist/output/text/distill.js +7 -0
- package/dist/output/text/enable-disable.js +7 -0
- package/dist/output/text/events.js +10 -0
- package/dist/output/text/feedback.js +6 -0
- package/dist/output/text/helpers.js +1059 -0
- package/dist/output/text/history.js +7 -0
- package/dist/output/text/import.js +6 -0
- package/dist/output/text/index.js +6 -0
- package/dist/output/text/info.js +6 -0
- package/dist/output/text/init.js +6 -0
- package/dist/output/text/list.js +6 -0
- package/dist/output/text/proposal-producer.js +8 -0
- package/dist/output/text/proposal.js +12 -0
- package/dist/output/text/registry-commands.js +11 -0
- package/dist/output/text/registry.js +30 -0
- package/dist/output/text/remember.js +6 -0
- package/dist/output/text/remove.js +6 -0
- package/dist/output/text/save.js +6 -0
- package/dist/output/text/search.js +6 -0
- package/dist/output/text/show.js +6 -0
- package/dist/output/text/update.js +6 -0
- package/dist/output/text/upgrade.js +6 -0
- package/dist/output/text/vault.js +16 -0
- package/dist/output/text/wiki.js +15 -0
- package/dist/output/text/workflow.js +14 -0
- package/dist/output/text.js +44 -1329
- package/dist/registry/build-index.js +3 -0
- package/dist/registry/create-provider-registry.js +3 -0
- package/dist/registry/factory.js +4 -1
- package/dist/registry/origin-resolve.js +3 -0
- package/dist/registry/providers/index.js +3 -0
- package/dist/registry/providers/skills-sh.js +11 -2
- package/dist/registry/providers/static-index.js +10 -1
- package/dist/registry/providers/types.js +3 -24
- package/dist/registry/resolve.js +11 -16
- package/dist/registry/types.js +3 -0
- package/dist/scripts/migrate-storage.js +17767 -0
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +9031 -0
- package/dist/scripts/migrations/v16-to-v17.js +141 -0
- package/dist/setup/detect.js +3 -0
- package/dist/setup/ripgrep-install.js +3 -0
- package/dist/setup/ripgrep-resolve.js +3 -0
- package/dist/setup/setup.js +306 -67
- package/dist/setup/steps.js +3 -15
- package/dist/sources/include.js +3 -0
- package/dist/sources/provider-factory.js +3 -11
- package/dist/sources/provider.js +3 -20
- package/dist/sources/providers/filesystem.js +19 -23
- package/dist/sources/providers/git.js +171 -21
- package/dist/sources/providers/index.js +3 -0
- package/dist/sources/providers/install-types.js +3 -13
- package/dist/sources/providers/npm.js +3 -4
- package/dist/sources/providers/provider-utils.js +3 -0
- package/dist/sources/providers/sync-from-ref.js +3 -11
- package/dist/sources/providers/tar-utils.js +3 -0
- package/dist/sources/providers/website.js +18 -22
- package/dist/sources/resolve.js +3 -0
- package/dist/sources/types.js +3 -0
- package/dist/sources/website-ingest.js +3 -0
- package/dist/tasks/backends/cron.js +3 -0
- package/dist/tasks/backends/exec-utils.js +3 -0
- package/dist/tasks/backends/index.js +3 -11
- package/dist/tasks/backends/launchd.js +4 -1
- package/dist/tasks/backends/schtasks.js +4 -1
- package/dist/tasks/parser.js +51 -38
- package/dist/tasks/resolveAkmBin.js +3 -0
- package/dist/tasks/runner.js +35 -9
- package/dist/tasks/schedule.js +20 -1
- package/dist/tasks/schema.js +5 -3
- package/dist/tasks/validator.js +6 -3
- package/dist/version.js +3 -0
- package/dist/wiki/wiki-templates.js +6 -3
- package/dist/wiki/wiki.js +4 -1
- package/dist/workflows/authoring.js +4 -1
- package/dist/workflows/cli.js +3 -0
- package/dist/workflows/db.js +140 -10
- package/dist/workflows/document-cache.js +3 -10
- package/dist/workflows/parser.js +3 -0
- package/dist/workflows/renderer.js +3 -0
- package/dist/workflows/runs.js +18 -1
- package/dist/workflows/schema.js +3 -0
- package/dist/workflows/scope-key.js +3 -0
- package/dist/workflows/validator.js +5 -9
- package/docs/README.md +7 -2
- package/docs/data-and-telemetry.md +225 -0
- package/docs/migration/release-notes/0.7.5.md +2 -2
- package/docs/migration/release-notes/0.8.0.md +57 -5
- package/docs/migration/v0.7-to-v0.8.md +1378 -0
- package/package.json +28 -11
- package/.github/LICENSE +0 -374
- package/dist/commands/help/help-accept.md +0 -9
- package/dist/commands/help/help-improve.md +0 -53
- package/dist/commands/help/help-reject.md +0 -8
- package/dist/commands/install-audit.js +0 -385
- package/dist/commands/vault.js +0 -310
- package/dist/indexer/match-contributors.js +0 -141
- package/dist/integrations/agent/pipeline.js +0 -39
- package/dist/integrations/agent/runners.js +0 -31
- package/dist/llm/prompts/graph-extract-user-prompt.md +0 -12
- /package/dist/{tasks → assets}/backends/launchd-template.xml +0 -0
- /package/dist/{tasks → assets}/backends/schtasks-template.xml +0 -0
- /package/dist/{commands → assets}/help/help-propose.md +0 -0
- /package/dist/{wiki → assets/wiki}/index-template.md +0 -0
- /package/dist/{wiki → assets/wiki}/ingest-workflow-template.md +0 -0
- /package/dist/{wiki → assets/wiki}/log-template.md +0 -0
- /package/dist/{wiki → assets/wiki}/schema-template.md +0 -0
- /package/dist/{workflows → assets/workflows}/workflow-template.md +0 -0
package/dist/commands/reflect.js
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
1
4
|
/**
|
|
2
5
|
* `akm reflect [ref]` — proposal-producing agent command (#226).
|
|
3
6
|
*
|
|
@@ -19,22 +22,29 @@
|
|
|
19
22
|
* a committed asset, and the `accept` flow is the bridge.
|
|
20
23
|
*/
|
|
21
24
|
import fs from "node:fs";
|
|
25
|
+
import os from "node:os";
|
|
22
26
|
import path from "node:path";
|
|
23
27
|
import { parseAssetRef } from "../core/asset-ref";
|
|
28
|
+
import { assembleAssetFromString, serializeFrontmatter } from "../core/asset-serialize";
|
|
24
29
|
import { resolveStashDir } from "../core/common";
|
|
30
|
+
import { loadConfig } from "../core/config";
|
|
25
31
|
import { ConfigError, UsageError } from "../core/errors";
|
|
26
32
|
import { appendEvent, readEvents } from "../core/events";
|
|
27
33
|
import { parseFrontmatter } from "../core/frontmatter";
|
|
28
34
|
import { lintLessonContent } from "../core/lesson-lint";
|
|
29
35
|
import { stripMarkdownFences } from "../core/markdown";
|
|
30
|
-
import {
|
|
36
|
+
import { checkReflectSize } from "../core/proposal-quality-validators";
|
|
37
|
+
import { createProposal, isProposalSkipped, listProposals, } from "../core/proposals";
|
|
31
38
|
import { lookup } from "../indexer/indexer";
|
|
32
39
|
import { runAgent, } from "../integrations/agent";
|
|
33
40
|
import { resolveProcessAgentProfile } from "../integrations/agent/config";
|
|
34
|
-
import {
|
|
35
|
-
import {
|
|
41
|
+
import { buildReflectPrompt, extractDraftConfidence, parseAgentProposalPayload, } from "../integrations/agent/prompts";
|
|
42
|
+
import { resolveImproveProcessRunnerFromProfile } from "../integrations/agent/runner";
|
|
43
|
+
import { runOpencodeSdk } from "../integrations/agent/sdk-runner";
|
|
44
|
+
import { chatCompletion } from "../llm/client";
|
|
45
|
+
import { isLlmFeatureEnabled } from "../llm/feature-gate";
|
|
36
46
|
import { baseFailureFields, enoentHintMessage, isEnoentFailure, loadAgentConfigFromDisk, resolveAgentProfile, } from "./agent-support";
|
|
37
|
-
import { deriveLessonRef } from "./distill";
|
|
47
|
+
import { deriveLessonRef, runLessonQualityJudge } from "./distill";
|
|
38
48
|
const MAX_FEEDBACK_LINES = 10;
|
|
39
49
|
const MAX_GLOBAL_FEEDBACK_LINES = 20;
|
|
40
50
|
/**
|
|
@@ -61,6 +71,102 @@ function readRecentFeedback(ref) {
|
|
|
61
71
|
return [];
|
|
62
72
|
}
|
|
63
73
|
}
|
|
74
|
+
const MAX_REJECTED_PROPOSALS = 3;
|
|
75
|
+
/**
|
|
76
|
+
* Asset types that reflect is allowed to operate on.
|
|
77
|
+
*
|
|
78
|
+
* Reflect's canonical output shape is `frontmatter + markdown body`. Running it
|
|
79
|
+
* against types whose on-disk form is NOT markdown (executable scripts, vault
|
|
80
|
+
* env files, YAML tasks) blindly prepends `---\n…\n---\n` to the asset and
|
|
81
|
+
* breaks the runtime contract — for example a `.ts` script with a YAML preamble
|
|
82
|
+
* is a TypeScript syntax error.
|
|
83
|
+
*
|
|
84
|
+
* Whitelisting (rather than blacklisting) keeps the door closed by default as
|
|
85
|
+
* new asset types are registered. To allow a custom registered type, extend
|
|
86
|
+
* this set explicitly.
|
|
87
|
+
*
|
|
88
|
+
* Observed regression: proposal `8737ab63` (May 2026) prepended frontmatter to
|
|
89
|
+
* a `.ts` script file via reflect. This whitelist prevents that.
|
|
90
|
+
*/
|
|
91
|
+
export const REFLECT_ALLOWED_TYPES = new Set([
|
|
92
|
+
"knowledge",
|
|
93
|
+
"memory",
|
|
94
|
+
"lesson",
|
|
95
|
+
"wiki",
|
|
96
|
+
"skill",
|
|
97
|
+
"agent",
|
|
98
|
+
"command",
|
|
99
|
+
"workflow",
|
|
100
|
+
]);
|
|
101
|
+
/**
|
|
102
|
+
* Identity / structural frontmatter fields the LLM is NEVER allowed to change.
|
|
103
|
+
*
|
|
104
|
+
* Renaming `name` on a skill silently breaks ref resolution because the ref is
|
|
105
|
+
* derived from the on-disk path. Similar reasoning for `ref`, `id`, `slug`,
|
|
106
|
+
* and `type`. The post-processor below restores any of these fields if the
|
|
107
|
+
* LLM tried to rewrite them.
|
|
108
|
+
*
|
|
109
|
+
* Observed regression: proposal `26941510` (May 2026) renamed
|
|
110
|
+
* `skill:openpalm-stack-diagnostics`'s `name` field to `"diagnostic-checklist"`.
|
|
111
|
+
*/
|
|
112
|
+
const PROTECTED_FRONTMATTER_FIELDS = new Set(["name", "ref", "id", "slug", "type"]);
|
|
113
|
+
/**
|
|
114
|
+
* Read the last 1–3 archived rejected proposals for a given ref from the
|
|
115
|
+
* proposal store. Best-effort — returns `[]` when the proposals dir is absent
|
|
116
|
+
* or the ref is undefined. Used to inject Reflexion-style verbal-RL context
|
|
117
|
+
* into the reflect prompt so the agent avoids re-proposing already-refused
|
|
118
|
+
* content (arXiv:2303.11366).
|
|
119
|
+
*/
|
|
120
|
+
function readRejectedProposals(stash, ref) {
|
|
121
|
+
if (!ref)
|
|
122
|
+
return [];
|
|
123
|
+
try {
|
|
124
|
+
return listProposals(stash, { ref, status: "rejected", includeArchive: true })
|
|
125
|
+
.sort((a, b) => new Date(b.updatedAt ?? 0).getTime() - new Date(a.updatedAt ?? 0).getTime())
|
|
126
|
+
.slice(0, MAX_REJECTED_PROPOSALS)
|
|
127
|
+
.map((p) => ({
|
|
128
|
+
ref: p.ref,
|
|
129
|
+
reason: p.review?.reason ?? "no reason given",
|
|
130
|
+
contentPreview: p.payload.content.slice(0, 500),
|
|
131
|
+
}));
|
|
132
|
+
}
|
|
133
|
+
catch {
|
|
134
|
+
return [];
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Synthesize a tmp draft-file path for the agent/sdk file-write contract.
|
|
139
|
+
*
|
|
140
|
+
* Mirrors `src/commands/propose.ts:163-178` — when the runner is agent-CLI or
|
|
141
|
+
* the OpenCode SDK, we instruct the agent to write the proposal body directly
|
|
142
|
+
* to this file instead of inlining it in JSON on stdout. This bypasses two
|
|
143
|
+
* known failure modes for long assets: (a) ARG_MAX truncation on prompt
|
|
144
|
+
* round-trips through fenced JSON, and (b) embedded-JSON parser brittleness
|
|
145
|
+
* on multi-KB bodies (e.g. the `knowledge:systems/KOKORO_USAGE_GUIDE` 8.4KB
|
|
146
|
+
* payload that produced 4/5 `parse_error` in May 2026 reflect validation).
|
|
147
|
+
*
|
|
148
|
+
* The path lives under {@link os.tmpdir} and embeds the (sanitized) ref +
|
|
149
|
+
* timestamp + random suffix so concurrent reflect calls cannot collide.
|
|
150
|
+
*
|
|
151
|
+
* Returns `undefined` for the LLM HTTP runner — the chat-completion transport
|
|
152
|
+
* has no filesystem access (see warning at `src/llm/call-ai.ts:64-71`).
|
|
153
|
+
*/
|
|
154
|
+
function synthesizeReflectDraftPath(ref) {
|
|
155
|
+
const safeRef = (ref ?? "no-ref").replace(/[^a-z0-9_-]/gi, "_");
|
|
156
|
+
const rand = Math.random().toString(36).slice(2, 8);
|
|
157
|
+
return path.join(os.tmpdir(), `akm-reflect-${safeRef}-${Date.now()}-${rand}.md`);
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Heuristic check that the agent honoured the file-write contract.
|
|
161
|
+
* The contract instructs the agent to emit a single `DRAFT_WRITTEN` line on
|
|
162
|
+
* stdout when it has finished writing the draft file. Some agents print
|
|
163
|
+
* additional log lines; we match anywhere in the captured stdout.
|
|
164
|
+
*/
|
|
165
|
+
function stdoutSignalsDraftWritten(stdout) {
|
|
166
|
+
if (!stdout)
|
|
167
|
+
return false;
|
|
168
|
+
return /\bDRAFT_WRITTEN\b/.test(stdout);
|
|
169
|
+
}
|
|
64
170
|
/**
|
|
65
171
|
* Build schema/lint hints for the prompt. For lesson refs, run the lesson
|
|
66
172
|
* lint over the current content and surface any findings — they are a
|
|
@@ -132,20 +238,334 @@ async function readRelatedLessons(stash, ref, parsedRef) {
|
|
|
132
238
|
catch {
|
|
133
239
|
// Best effort only.
|
|
134
240
|
}
|
|
241
|
+
// R-4 / #373: Filter out lessons with `derived_from_reflect: true` unless
|
|
242
|
+
// independent feedback exists for the skill. This prevents the echo-chamber
|
|
243
|
+
// risk where reflect-output lessons feed back into the next reflect pass as
|
|
244
|
+
// "independent" evidence, amplifying their own prior outputs over time.
|
|
245
|
+
//
|
|
246
|
+
// ExpeL arXiv:2308.10144: rules need differential evidence from independent
|
|
247
|
+
// sources (success vs failure traces). A lesson that only ever appeared from
|
|
248
|
+
// reflect-internal signals has no such differential signal.
|
|
249
|
+
//
|
|
250
|
+
// "Independent feedback" = any usage_events "feedback" events for the skill
|
|
251
|
+
// ref itself, indicating a human or external system rated the skill.
|
|
252
|
+
let hasIndependentFeedback = false;
|
|
253
|
+
try {
|
|
254
|
+
const feedbackEventsForSkill = readEvents({ type: "feedback", ref }).events;
|
|
255
|
+
hasIndependentFeedback = feedbackEventsForSkill.length > 0;
|
|
256
|
+
}
|
|
257
|
+
catch {
|
|
258
|
+
// Best effort — if we can't check, allow all lessons through.
|
|
259
|
+
hasIndependentFeedback = true;
|
|
260
|
+
}
|
|
261
|
+
if (!hasIndependentFeedback) {
|
|
262
|
+
// No independent feedback: exclude all reflect-derived lessons to prevent
|
|
263
|
+
// echo-chamber amplification.
|
|
264
|
+
for (const [lessonRef, lesson] of related.entries()) {
|
|
265
|
+
try {
|
|
266
|
+
const lessonFm = parseFrontmatter(lesson.content);
|
|
267
|
+
if (lessonFm.data.derived_from_reflect === true) {
|
|
268
|
+
related.delete(lessonRef);
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
catch {
|
|
272
|
+
// If we can't parse the frontmatter, keep the lesson (safe default).
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
135
276
|
return [...related.values()];
|
|
136
277
|
}
|
|
137
|
-
|
|
278
|
+
/**
|
|
279
|
+
* Returns true only when `stdout` is a recognised AKM proposal-skip signal.
|
|
280
|
+
*
|
|
281
|
+
* Two accepted forms:
|
|
282
|
+
* 1. Structured JSON: `{ skipped: true }` or `{ reason: "<known-skip-reason>" }`
|
|
283
|
+
* 2. Legacy text: any line matching `/proposal skipped/i`
|
|
284
|
+
*
|
|
285
|
+
* The previous regex `/cooldown/i` was intentionally broadened to avoid
|
|
286
|
+
* false-positives on real agent error messages that incidentally contain the
|
|
287
|
+
* word "cooldown" (e.g. "rate limit cooldown exceeded"). Only the tightly
|
|
288
|
+
* scoped forms above are treated as legitimate skip signals.
|
|
289
|
+
*/
|
|
290
|
+
function isStructuredCooldownSignal(stdout) {
|
|
291
|
+
try {
|
|
292
|
+
const parsed = JSON.parse(stdout.trim());
|
|
293
|
+
if (parsed?.skipped === true)
|
|
294
|
+
return true;
|
|
295
|
+
if (typeof parsed?.reason === "string" &&
|
|
296
|
+
["duplicate_pending", "content_hash_match", "cooldown", "below_threshold"].includes(parsed.reason))
|
|
297
|
+
return true;
|
|
298
|
+
}
|
|
299
|
+
catch {
|
|
300
|
+
// Non-JSON stdout is never a structured cooldown signal.
|
|
301
|
+
}
|
|
302
|
+
// Legacy text signal emitted by older proposal output lines.
|
|
303
|
+
return /proposal skipped/i.test(stdout);
|
|
304
|
+
}
|
|
305
|
+
/**
|
|
306
|
+
* Fallback payload parser for reflect agent stdout (R-6 / #375).
|
|
307
|
+
*
|
|
308
|
+
* When the agent does not emit valid JSON (old-style agents, SDK mode without
|
|
309
|
+
* structured output support), this function attempts to recover a proposal
|
|
310
|
+
* payload from the raw markdown output. The parser is deliberately strict —
|
|
311
|
+
* it requires the content to have a complete proposal structure (frontmatter
|
|
312
|
+
* with required fields or a full heading + body).
|
|
313
|
+
*
|
|
314
|
+
* Strictness rationale: The previous implementation accepted any markdown
|
|
315
|
+
* starting with `#` or `---`, which admitted malformed / hallucinated content
|
|
316
|
+
* as valid proposals. Anthropic agent best practices recommend structured
|
|
317
|
+
* output when the SDK supports it; this tighter fallback is the safety net.
|
|
318
|
+
*
|
|
319
|
+
* When `sdkMode === true`, structured output (tool-call schema) should be used
|
|
320
|
+
* instead of this fallback. That wiring is tracked separately (full SDK
|
|
321
|
+
* structured-output integration); for now this tighter parser applies to all
|
|
322
|
+
* modes and is the primary R-6 deliverable.
|
|
323
|
+
*/
|
|
324
|
+
function fallbackPayloadFromRawContent(stdout, ref, sdkMode = false) {
|
|
138
325
|
if (!ref)
|
|
139
326
|
return undefined;
|
|
140
327
|
const trimmed = stripMarkdownFences(stdout).trim();
|
|
141
328
|
if (!trimmed)
|
|
142
329
|
return undefined;
|
|
143
|
-
|
|
330
|
+
const targetType = ref.split(":")[0];
|
|
331
|
+
if (!looksLikeAssetContent(trimmed, sdkMode, targetType))
|
|
144
332
|
return undefined;
|
|
145
333
|
return { ref, content: trimmed };
|
|
146
334
|
}
|
|
147
|
-
|
|
148
|
-
|
|
335
|
+
/**
|
|
336
|
+
* Determine whether raw agent output looks like a valid asset payload (R-6 / #375).
|
|
337
|
+
*
|
|
338
|
+
* Tightened from the previous `startsWith("#") || startsWith("---")`:
|
|
339
|
+
*
|
|
340
|
+
* - YAML frontmatter (`---`): must contain a `description:` field (the only
|
|
341
|
+
* required frontmatter key in v1 spec). This eliminates empty `---\n---\n`
|
|
342
|
+
* blocks and pure delimiter sequences as valid payloads.
|
|
343
|
+
* - Heading start (`#`): must have at least 3 non-blank lines after the heading,
|
|
344
|
+
* to ensure there is actual body content and not just a title stub.
|
|
345
|
+
* - In SDK mode (`sdkMode === true`): additionally requires `when_to_use:` for
|
|
346
|
+
* lesson types (full structured output will replace this in a future PR).
|
|
347
|
+
*/
|
|
348
|
+
function looksLikeAssetContent(value, sdkMode = false, targetType) {
|
|
349
|
+
if (value.startsWith("---")) {
|
|
350
|
+
// YAML frontmatter must contain at least a description field.
|
|
351
|
+
const fmEnd = value.indexOf("\n---", 4);
|
|
352
|
+
if (fmEnd === -1)
|
|
353
|
+
return false;
|
|
354
|
+
const fmBlock = value.slice(0, fmEnd + 4);
|
|
355
|
+
const hasDescription = /^description\s*:/m.test(fmBlock);
|
|
356
|
+
if (!hasDescription)
|
|
357
|
+
return false;
|
|
358
|
+
// In SDK mode, lesson assets additionally require a when_to_use field.
|
|
359
|
+
// Use the target ref type rather than frontmatter type: (which is non-standard).
|
|
360
|
+
if (sdkMode && targetType === "lesson") {
|
|
361
|
+
return /^when_to_use\s*:/m.test(fmBlock);
|
|
362
|
+
}
|
|
363
|
+
return true;
|
|
364
|
+
}
|
|
365
|
+
if (value.startsWith("#")) {
|
|
366
|
+
// Heading + at least 2 non-blank lines (heading + at least one body line).
|
|
367
|
+
// This rejects pure title stubs (`# Title\n`) but accepts minimal valid content.
|
|
368
|
+
const lines = value.split("\n").filter((l) => l.trim().length > 0);
|
|
369
|
+
return lines.length >= 2;
|
|
370
|
+
}
|
|
371
|
+
return false;
|
|
372
|
+
}
|
|
373
|
+
/**
|
|
374
|
+
* Split a markdown blob into `[frontmatterText, bodyText]`.
|
|
375
|
+
*
|
|
376
|
+
* Returns `[null, raw]` when the blob does not start with a frontmatter block.
|
|
377
|
+
*/
|
|
378
|
+
function splitFrontmatter(raw) {
|
|
379
|
+
const m = raw.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n?([\s\S]*)$/);
|
|
380
|
+
if (!m)
|
|
381
|
+
return { fmText: null, body: raw };
|
|
382
|
+
return { fmText: m[1], body: m[2] };
|
|
383
|
+
}
|
|
384
|
+
/**
|
|
385
|
+
* Reflect post-processor — enforces the safety rails described at the top of
|
|
386
|
+
* this file:
|
|
387
|
+
*
|
|
388
|
+
* 1. Restore the source frontmatter so reflect never strips load-bearing
|
|
389
|
+
* `description`, `when_to_use`, `tags`, etc. The LLM is only allowed to
|
|
390
|
+
* change the markdown body. Frontmatter fields proposed by the LLM are
|
|
391
|
+
* treated as a *merge on top* of the source — concrete field renames /
|
|
392
|
+
* identity changes (`name`, `ref`, `id`, `slug`, `type`) are reverted.
|
|
393
|
+
* 2. Reject responses that shrink or expand the body past the configured
|
|
394
|
+
* ratio thresholds, when the source body is large enough to be reliable.
|
|
395
|
+
* 3. Drop any leading `---` frontmatter block the LLM produced inside the
|
|
396
|
+
* body — the prompt asks it to emit body only, and a stray YAML preamble
|
|
397
|
+
* on top of an executable-typed asset is dangerous.
|
|
398
|
+
*
|
|
399
|
+
* Caller branches:
|
|
400
|
+
* - On `reject`: surface as a failure with the reported reason.
|
|
401
|
+
* - Otherwise: substitute `content` (and optional `frontmatter`) into the
|
|
402
|
+
* proposal payload.
|
|
403
|
+
*
|
|
404
|
+
* Source-less / new-asset case (`sourceContent === undefined`): we still strip
|
|
405
|
+
* the LLM's frontmatter block from `content` and re-emit a clean block built
|
|
406
|
+
* from `payload.frontmatter` so identity fields can be enforced. Size guard
|
|
407
|
+
* is skipped because there is no source to compare against.
|
|
408
|
+
*/
|
|
409
|
+
function sanitizeReflectPayload(payload, sourceContent, targetRef) {
|
|
410
|
+
const warnings = [];
|
|
411
|
+
const { fmText: sourceFmText, body: sourceBody } = sourceContent
|
|
412
|
+
? splitFrontmatter(sourceContent)
|
|
413
|
+
: { fmText: null, body: "" };
|
|
414
|
+
const sourceFm = sourceFmText !== null ? parseFrontmatter(sourceContent ?? "").data : {};
|
|
415
|
+
const { fmText: llmFmText, body: rawLlmBody } = splitFrontmatter(payload.content);
|
|
416
|
+
if (llmFmText !== null) {
|
|
417
|
+
warnings.push("LLM emitted frontmatter in content; stripped and merged through identity guard.");
|
|
418
|
+
}
|
|
419
|
+
// Parse the LLM-emitted frontmatter (if any) so we can merge its non-identity
|
|
420
|
+
// keys into the source frontmatter.
|
|
421
|
+
let llmFm = {};
|
|
422
|
+
if (llmFmText !== null) {
|
|
423
|
+
try {
|
|
424
|
+
llmFm = parseFrontmatter(payload.content).data;
|
|
425
|
+
}
|
|
426
|
+
catch {
|
|
427
|
+
llmFm = {};
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
// Also accept the explicit `frontmatter` field on the payload.
|
|
431
|
+
if (payload.frontmatter && typeof payload.frontmatter === "object") {
|
|
432
|
+
llmFm = { ...llmFm, ...payload.frontmatter };
|
|
433
|
+
}
|
|
434
|
+
// Strip protected identity fields from any LLM-supplied frontmatter — they
|
|
435
|
+
// must come from the source asset, never from the LLM.
|
|
436
|
+
for (const field of PROTECTED_FRONTMATTER_FIELDS) {
|
|
437
|
+
if (field in llmFm && llmFm[field] !== sourceFm[field]) {
|
|
438
|
+
warnings.push(`LLM attempted to change protected frontmatter field "${field}"; restored from source.`);
|
|
439
|
+
delete llmFm[field];
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
// Build the effective frontmatter: source overlaid with sanitized LLM fields.
|
|
443
|
+
// Source fields always win on identity keys.
|
|
444
|
+
const mergedFm = { ...sourceFm, ...llmFm };
|
|
445
|
+
for (const field of PROTECTED_FRONTMATTER_FIELDS) {
|
|
446
|
+
if (field in sourceFm) {
|
|
447
|
+
mergedFm[field] = sourceFm[field];
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
const cleanedBody = rawLlmBody.replace(/^\s+/, "");
|
|
451
|
+
// Size guard — only when source body is meaningfully large. The pure
|
|
452
|
+
// predicate lives in `core/proposal-quality-validators` so the same check
|
|
453
|
+
// also runs inside `runProposalValidators` on `proposal accept`.
|
|
454
|
+
const sizeOutcome = checkReflectSize(sourceBody, cleanedBody);
|
|
455
|
+
if (!sizeOutcome.ok) {
|
|
456
|
+
const pct = (sizeOutcome.ratio * 100).toFixed(0);
|
|
457
|
+
const limit = sizeOutcome.code === "EXCESSIVE_SHRINKAGE" ? "minimum 50%" : "maximum 250%";
|
|
458
|
+
const cause = sizeOutcome.code === "EXCESSIVE_SHRINKAGE"
|
|
459
|
+
? "Concrete content was likely deleted."
|
|
460
|
+
: "Speculative material was likely added.";
|
|
461
|
+
return {
|
|
462
|
+
content: payload.content,
|
|
463
|
+
warnings,
|
|
464
|
+
reject: {
|
|
465
|
+
// Content-policy guard hit (EXCESSIVE_SHRINKAGE / EXCESSIVE_EXPANSION).
|
|
466
|
+
// This is the guard working as designed — the LLM responded fine, we
|
|
467
|
+
// blocked the output. Routed through `content_policy_reject` so the
|
|
468
|
+
// health aggregator can split guard hits out of true LLM faults.
|
|
469
|
+
reason: "content_policy_reject",
|
|
470
|
+
error: `Reflect rejected: ${sizeOutcome.code} — proposed body is ${pct}% of source (${limit}) for ref ${targetRef}. ${cause}`,
|
|
471
|
+
},
|
|
472
|
+
};
|
|
473
|
+
}
|
|
474
|
+
// Reassemble final content: merged frontmatter + cleaned body.
|
|
475
|
+
// When there is no frontmatter at all (no source fm and no LLM fm), emit body
|
|
476
|
+
// only so we don't add a stray `---` to e.g. a script asset that bypassed the
|
|
477
|
+
// type guard via a custom registration.
|
|
478
|
+
const hasFrontmatter = Object.keys(mergedFm).length > 0;
|
|
479
|
+
const reassembled = hasFrontmatter
|
|
480
|
+
? assembleAssetFromString(serializeFrontmatter(mergedFm), cleanedBody)
|
|
481
|
+
: cleanedBody;
|
|
482
|
+
return {
|
|
483
|
+
content: reassembled,
|
|
484
|
+
...(hasFrontmatter ? { frontmatter: mergedFm } : {}),
|
|
485
|
+
warnings,
|
|
486
|
+
};
|
|
487
|
+
}
|
|
488
|
+
/**
|
|
489
|
+
* JSON Schema for structured reflect output. Passed to `chatCompletion` when
|
|
490
|
+
* the connection has `supportsJsonSchema: true` so the model returns a strict
|
|
491
|
+
* JSON object matching {@link AgentProposalPayload}.
|
|
492
|
+
*/
|
|
493
|
+
export const REFLECT_JSON_SCHEMA = {
|
|
494
|
+
type: "object",
|
|
495
|
+
required: ["ref", "content"],
|
|
496
|
+
additionalProperties: false,
|
|
497
|
+
properties: {
|
|
498
|
+
ref: { type: "string", description: "Asset ref in type:name format (e.g. lesson:my-lesson)." },
|
|
499
|
+
content: { type: "string", description: "Full markdown content for the asset." },
|
|
500
|
+
frontmatter: {
|
|
501
|
+
type: "object",
|
|
502
|
+
description: "Optional frontmatter key-value pairs to merge into the asset.",
|
|
503
|
+
additionalProperties: true,
|
|
504
|
+
},
|
|
505
|
+
// Phase 6A (Advantage D6a): self-reported confidence in [0, 1]. When the
|
|
506
|
+
// LLM is well-calibrated, scores at or above the configured threshold
|
|
507
|
+
// (default 0.8) drive auto-accept in `akm improve`. Out-of-range or
|
|
508
|
+
// non-finite values are clamped/dropped by the parser — the schema keeps
|
|
509
|
+
// the field optional so older agents that don't emit a score still work.
|
|
510
|
+
confidence: {
|
|
511
|
+
type: "number",
|
|
512
|
+
minimum: 0,
|
|
513
|
+
maximum: 1,
|
|
514
|
+
description: "Optional self-reported quality confidence in [0, 1]. Proposals with confidence >= the active threshold (default 0.8) may be auto-accepted by `akm improve`.",
|
|
515
|
+
},
|
|
516
|
+
},
|
|
517
|
+
};
|
|
518
|
+
/** Critique prompt injected between prior draft and refinement request (Self-Refine loop). */
|
|
519
|
+
const REFLECT_CRITIQUE_PROMPT = "Your previous proposal is shown above. Please review it critically and provide an improved version that is more specific, actionable, and avoids any issues with the previous attempt. Return only the improved JSON proposal.";
|
|
520
|
+
/**
|
|
521
|
+
* Run a single reflect iteration directly via the LLM API (v2 config path).
|
|
522
|
+
*
|
|
523
|
+
* Returns an {@link AgentRunResult}-shaped object so it can slot into the same
|
|
524
|
+
* dispatch loop as agent-based runners. On success, `stdout` contains the raw
|
|
525
|
+
* LLM response (unparsed JSON or prose). On failure, the error is captured
|
|
526
|
+
* into the result rather than thrown.
|
|
527
|
+
*/
|
|
528
|
+
export async function runReflectViaLlm(opts) {
|
|
529
|
+
const start = Date.now();
|
|
530
|
+
const messages = [{ role: "user", content: opts.prompt ?? "" }];
|
|
531
|
+
if (opts.priorDraft !== undefined && opts.iteration > 0) {
|
|
532
|
+
messages.push({ role: "assistant", content: opts.priorDraft });
|
|
533
|
+
messages.push({ role: "user", content: REFLECT_CRITIQUE_PROMPT });
|
|
534
|
+
}
|
|
535
|
+
try {
|
|
536
|
+
let stdout;
|
|
537
|
+
if (opts.chat) {
|
|
538
|
+
// Test seam: injected chat function (two-arg signature, no responseSchema).
|
|
539
|
+
stdout = await opts.chat(opts.connection, messages);
|
|
540
|
+
}
|
|
541
|
+
else {
|
|
542
|
+
// Production path: full chatCompletion with optional structured-output schema
|
|
543
|
+
// and optional hard max_tokens cap (derived from source body size).
|
|
544
|
+
stdout = await chatCompletion(opts.connection, messages, {
|
|
545
|
+
...(opts.responseSchema !== undefined ? { responseSchema: opts.responseSchema } : {}),
|
|
546
|
+
...(opts.maxTokens !== undefined ? { maxTokens: opts.maxTokens } : {}),
|
|
547
|
+
});
|
|
548
|
+
}
|
|
549
|
+
return {
|
|
550
|
+
ok: true,
|
|
551
|
+
stdout,
|
|
552
|
+
stderr: "",
|
|
553
|
+
durationMs: Date.now() - start,
|
|
554
|
+
exitCode: 0,
|
|
555
|
+
};
|
|
556
|
+
}
|
|
557
|
+
catch (err) {
|
|
558
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
559
|
+
return {
|
|
560
|
+
ok: false,
|
|
561
|
+
stdout: "",
|
|
562
|
+
stderr: msg,
|
|
563
|
+
durationMs: Date.now() - start,
|
|
564
|
+
exitCode: 1,
|
|
565
|
+
reason: "non_zero_exit",
|
|
566
|
+
error: msg,
|
|
567
|
+
};
|
|
568
|
+
}
|
|
149
569
|
}
|
|
150
570
|
function failureEnvelope(result, ref, fallbackReason = "non_zero_exit") {
|
|
151
571
|
return {
|
|
@@ -165,19 +585,67 @@ export async function akmReflect(options = {}) {
|
|
|
165
585
|
...(options.profile ? { profile: options.profile } : {}),
|
|
166
586
|
},
|
|
167
587
|
});
|
|
588
|
+
// Fix #3 (observability 0.8.0): every failure path below MUST emit
|
|
589
|
+
// `reflect_completed` so observers can close the invoke/complete loop. The
|
|
590
|
+
// three success-side `reflect_completed` emit sites carry rich metadata
|
|
591
|
+
// (qualityRejected, sanitized, proposalId, etc.); the failure-side emits
|
|
592
|
+
// carry `{ok: false, reason}` plus the ref when known. Stable failure
|
|
593
|
+
// reasons line up with `AgentFailureReason`: "parse_error", "non_zero_exit",
|
|
594
|
+
// "cooldown", "timeout", "spawn_failed", "llm_*", plus the synthetic
|
|
595
|
+
// "ref_mismatch" / "enoent" / "draft_missing" subtypes for cases the agent
|
|
596
|
+
// surface conflates as "parse_error". Sub-reasons land in `subreason`.
|
|
597
|
+
const emitReflectFailed = (reason, subreason, ref, extra) => {
|
|
598
|
+
appendEvent({
|
|
599
|
+
eventType: "reflect_completed",
|
|
600
|
+
...(ref ? { ref } : {}),
|
|
601
|
+
metadata: {
|
|
602
|
+
source: "reflect",
|
|
603
|
+
ok: false,
|
|
604
|
+
reason,
|
|
605
|
+
subreason,
|
|
606
|
+
...(extra ?? {}),
|
|
607
|
+
},
|
|
608
|
+
});
|
|
609
|
+
};
|
|
168
610
|
// 2. Resolve target asset content (if a ref is supplied).
|
|
169
611
|
let assetContent;
|
|
170
612
|
let parsedRef;
|
|
171
613
|
if (options.ref) {
|
|
172
614
|
parsedRef = parseAssetRef(options.ref);
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
615
|
+
// 2a. Type guard — reflect only operates on asset types whose canonical
|
|
616
|
+
// shape is `frontmatter + markdown body`. Refuse non-markdown types
|
|
617
|
+
// (script / vault / task) up-front so reflect never prepends YAML to a
|
|
618
|
+
// `.ts` file or rewrites a `.env` blob as prose. See REFLECT_ALLOWED_TYPES.
|
|
619
|
+
if (!REFLECT_ALLOWED_TYPES.has(parsedRef.type)) {
|
|
620
|
+
// Deterministic type-guard rejection — the LLM is never invoked. Emit
|
|
621
|
+
// with reason `unsupported_type` so the improve loop can route this to
|
|
622
|
+
// the `reflect-skipped` action bucket instead of `reflect-failed`. See
|
|
623
|
+
// `/tmp/akm-health-investigations/metrics-taxonomy-review.md` §1a
|
|
624
|
+
// ("Reflect refused asset type" — ~9% of reflect-failed events).
|
|
625
|
+
emitReflectFailed("unsupported_type", "unsupported_type", options.ref, { type: parsedRef.type });
|
|
626
|
+
return {
|
|
627
|
+
schemaVersion: 1,
|
|
628
|
+
ok: false,
|
|
629
|
+
reason: "unsupported_type",
|
|
630
|
+
error: `Reflect refused: asset type "${parsedRef.type}" is not supported by reflect (only markdown-canonical types are allowed: ${[...REFLECT_ALLOWED_TYPES].sort().join(", ")}). Use \`akm propose\` or edit the file directly.`,
|
|
631
|
+
ref: options.ref,
|
|
632
|
+
exitCode: null,
|
|
633
|
+
};
|
|
178
634
|
}
|
|
179
|
-
|
|
180
|
-
//
|
|
635
|
+
if (options.assetContent !== undefined) {
|
|
636
|
+
// Test seam — caller pre-loaded the source content.
|
|
637
|
+
assetContent = options.assetContent;
|
|
638
|
+
}
|
|
639
|
+
else {
|
|
640
|
+
try {
|
|
641
|
+
const entry = await lookup(parsedRef);
|
|
642
|
+
if (entry?.filePath && fs.existsSync(entry.filePath)) {
|
|
643
|
+
assetContent = fs.readFileSync(entry.filePath, "utf8");
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
catch {
|
|
647
|
+
// Index miss is non-fatal — the agent can still propose a fresh asset.
|
|
648
|
+
}
|
|
181
649
|
}
|
|
182
650
|
}
|
|
183
651
|
// 3. Resolve agent profile. ConfigError surfaces as a thrown error so the
|
|
@@ -188,24 +656,42 @@ export async function akmReflect(options = {}) {
|
|
|
188
656
|
// agent config (agent.processes["reflect"]) is picked up automatically.
|
|
189
657
|
let profile;
|
|
190
658
|
let resolvedTimeoutMs = options.timeoutMs;
|
|
659
|
+
let runnerSpec;
|
|
191
660
|
try {
|
|
192
661
|
if (options.agentProfile) {
|
|
193
662
|
// Test seam: injected profile bypasses all config.
|
|
194
663
|
profile = options.agentProfile;
|
|
195
664
|
}
|
|
196
|
-
else if (options.
|
|
197
|
-
//
|
|
198
|
-
|
|
665
|
+
else if (options.runner) {
|
|
666
|
+
// Caller-provided RunnerSpec (used in tests and --dry-run-resolve).
|
|
667
|
+
runnerSpec = options.runner;
|
|
199
668
|
}
|
|
200
669
|
else {
|
|
201
|
-
|
|
202
|
-
const
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
670
|
+
const cfg = options.config ?? loadConfig();
|
|
671
|
+
const reflectProcess = cfg.profiles?.improve?.default?.processes?.reflect;
|
|
672
|
+
// Resolve the runner from the improve profile's reflect entry when present.
|
|
673
|
+
runnerSpec = resolveImproveProcessRunnerFromProfile(reflectProcess, cfg) ?? undefined;
|
|
674
|
+
if (runnerSpec) {
|
|
675
|
+
if (resolvedTimeoutMs === undefined && runnerSpec.timeoutMs !== undefined) {
|
|
676
|
+
resolvedTimeoutMs = runnerSpec.timeoutMs;
|
|
677
|
+
}
|
|
678
|
+
}
|
|
679
|
+
else {
|
|
680
|
+
if (options.profile) {
|
|
681
|
+
// Explicit --profile flag wins over process config.
|
|
682
|
+
profile = resolveAgentProfile(options);
|
|
683
|
+
}
|
|
684
|
+
else {
|
|
685
|
+
// Use per-process config resolution (falls back to defaults.agent).
|
|
686
|
+
const agent = options.agentConfig ?? loadAgentConfigFromDisk();
|
|
687
|
+
const processName = options.agentProcess ?? "reflect";
|
|
688
|
+
const resolved = resolveProcessAgentProfile(processName, agent);
|
|
689
|
+
profile = resolved.profile;
|
|
690
|
+
// Only apply process-resolved timeoutMs when caller didn't supply one.
|
|
691
|
+
if (resolvedTimeoutMs === undefined) {
|
|
692
|
+
resolvedTimeoutMs = resolved.timeoutMs;
|
|
693
|
+
}
|
|
694
|
+
}
|
|
209
695
|
}
|
|
210
696
|
}
|
|
211
697
|
}
|
|
@@ -214,106 +700,530 @@ export async function akmReflect(options = {}) {
|
|
|
214
700
|
throw err;
|
|
215
701
|
throw err;
|
|
216
702
|
}
|
|
217
|
-
//
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
703
|
+
// Ensure profile is set for agent/sdk runners that don't use runnerSpec
|
|
704
|
+
if (!runnerSpec && !profile) {
|
|
705
|
+
const agent = options.agentConfig ?? loadAgentConfigFromDisk();
|
|
706
|
+
profile = resolveAgentProfile({ ...options, agentConfig: agent });
|
|
707
|
+
}
|
|
708
|
+
// Derive a display name for logging — either from the resolved profile or the runnerSpec.
|
|
709
|
+
const resolvedProfileName = profile?.name ??
|
|
710
|
+
(runnerSpec?.kind === "llm"
|
|
711
|
+
? `llm:${runnerSpec.connection.model}`
|
|
712
|
+
: runnerSpec?.kind !== undefined
|
|
713
|
+
? `${runnerSpec.kind}:${runnerSpec.profile?.name ?? "unknown"}`
|
|
714
|
+
: "unknown");
|
|
715
|
+
// 4. Build the shared prompt inputs — feedback, hints, lessons, rejected
|
|
716
|
+
// proposals. These are stable across refinement iterations; only the
|
|
717
|
+
// `priorDraft` field changes per-iteration (R-1 / #372).
|
|
221
718
|
const feedback = readRecentFeedback(options.ref);
|
|
222
719
|
const schemaHints = buildSchemaHints(parsedRef?.type ?? "", assetContent);
|
|
223
720
|
const relatedLessons = options.ref && parsedRef ? await readRelatedLessons(stash, options.ref, parsedRef) : [];
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
//
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
if (isEnoentFailure(result)) {
|
|
271
|
-
return { ...failureEnvelope(result, options.ref), error: enoentHintMessage(profile.bin) };
|
|
721
|
+
// Reflexion-style verbal-RL: inject rejected proposals so the agent avoids
|
|
722
|
+
// reproducing proposals that have already been reviewed and refused.
|
|
723
|
+
const rejectedProposals = readRejectedProposals(stash, options.ref);
|
|
724
|
+
// 5. Spawn the agent — with optional Self-Refine loop (R-1 / #372).
|
|
725
|
+
//
|
|
726
|
+
// maxRefineIters controls how many agent invocations are made:
|
|
727
|
+
// - 1 (default): single-shot, same as pre-R-1 behaviour
|
|
728
|
+
// - 2–3: on each subsequent pass, the prior draft is injected back into
|
|
729
|
+
// the prompt as Self-Refine critique context (arXiv:2303.17651)
|
|
730
|
+
//
|
|
731
|
+
// The loop exits early when the agent returns the same content as before
|
|
732
|
+
// (no-op refinement) to avoid wasting tokens on identical iterations.
|
|
733
|
+
const MAX_REFINE_ITERS = 3;
|
|
734
|
+
const maxRefineIters = Math.min(Math.max(1, options.maxRefineIters ?? 1), MAX_REFINE_ITERS);
|
|
735
|
+
const agentEnv = options.eventSource === "improve" ? { AKM_EVENT_SOURCE: "improve" } : {};
|
|
736
|
+
// Determine whether this dispatch can honour the file-write contract.
|
|
737
|
+
// Agent CLI + OpenCode SDK runners both have filesystem access; the direct
|
|
738
|
+
// LLM HTTP runner does NOT (see `src/llm/call-ai.ts:64-71`). The v1
|
|
739
|
+
// `profile.sdkMode` fallback also runs the SDK so it counts as file-writable.
|
|
740
|
+
// Test seams (`options.runAgentOptions.spawn`) emulate agent CLI behaviour so
|
|
741
|
+
// they participate as well — tests opt out by simply not writing the file.
|
|
742
|
+
const runnerSupportsFileWrite = runnerSpec ? runnerSpec.kind !== "llm" : true;
|
|
743
|
+
// Initialized to a sentinel; always overwritten in the first loop iteration
|
|
744
|
+
// (maxRefineIters is clamped to >= 1 above). TypeScript cannot prove a
|
|
745
|
+
// for-loop always runs at least once, so we use a type assertion here.
|
|
746
|
+
let result = {};
|
|
747
|
+
let priorDraft;
|
|
748
|
+
// Track every draft file path we synthesize so cleanup can remove them on
|
|
749
|
+
// every return path (success and failure). Mirrors propose's unlink pattern
|
|
750
|
+
// in `src/commands/propose.ts:215-226` but generalised to N refinement
|
|
751
|
+
// iterations. Always called via {@link cleanupDrafts} below.
|
|
752
|
+
const draftPathsToCleanup = [];
|
|
753
|
+
// Last iteration's draft path — read back if the agent wrote it.
|
|
754
|
+
let lastDraftPath;
|
|
755
|
+
// Best-effort unlink: tolerate already-deleted files (we may have unlinked
|
|
756
|
+
// an intermediate iteration's draft) and unwritable paths. Never throws —
|
|
757
|
+
// the proposal result is the source of truth for the caller.
|
|
758
|
+
const cleanupDrafts = () => {
|
|
759
|
+
for (const p of draftPathsToCleanup) {
|
|
760
|
+
try {
|
|
761
|
+
if (fs.existsSync(p))
|
|
762
|
+
fs.unlinkSync(p);
|
|
763
|
+
}
|
|
764
|
+
catch {
|
|
765
|
+
// Swallow — cleanup is best-effort.
|
|
766
|
+
}
|
|
272
767
|
}
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
//
|
|
768
|
+
};
|
|
769
|
+
// `payload` is populated inside the try (either by reading the draft file
|
|
770
|
+
// or parsing stdout JSON). Hoisted here so the post-try sections (R-3 ref
|
|
771
|
+
// guard, quality gate, sanitizer, createProposal) can use it after the
|
|
772
|
+
// drafts have been cleaned up.
|
|
276
773
|
let payload;
|
|
277
774
|
try {
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
775
|
+
for (let iter = 0; iter < maxRefineIters; iter++) {
|
|
776
|
+
// Synthesize a fresh tmp path per iteration so refinement passes never
|
|
777
|
+
// clobber an earlier draft (and so reading back is unambiguous).
|
|
778
|
+
const iterDraftPath = runnerSupportsFileWrite ? synthesizeReflectDraftPath(options.ref) : undefined;
|
|
779
|
+
if (iterDraftPath) {
|
|
780
|
+
draftPathsToCleanup.push(iterDraftPath);
|
|
781
|
+
lastDraftPath = iterDraftPath;
|
|
782
|
+
}
|
|
783
|
+
const { prompt, maxOutputChars } = buildReflectPrompt({
|
|
784
|
+
...(options.ref ? { ref: options.ref } : {}),
|
|
785
|
+
...(parsedRef?.type ? { type: parsedRef.type } : {}),
|
|
786
|
+
...(parsedRef?.name ? { name: parsedRef.name } : {}),
|
|
787
|
+
...(assetContent !== undefined ? { assetContent } : {}),
|
|
788
|
+
...(feedback.length > 0 ? { feedback } : {}),
|
|
789
|
+
...(schemaHints.length > 0 ? { schemaHints } : {}),
|
|
790
|
+
...(relatedLessons.length > 0 ? { relatedLessons } : {}),
|
|
791
|
+
...(options.task ? { task: options.task } : {}),
|
|
792
|
+
...(options.avoidPatterns && options.avoidPatterns.length > 0 ? { avoidPatterns: options.avoidPatterns } : {}),
|
|
793
|
+
...(rejectedProposals.length > 0 ? { rejectedProposals } : {}),
|
|
794
|
+
// R-1: inject prior draft as self-critique target on iterations > 0
|
|
795
|
+
...(priorDraft !== undefined ? { priorDraft } : {}),
|
|
796
|
+
// Issue A (#reflect-pipeline file-write contract): when the runner can
|
|
797
|
+
// touch the filesystem, instruct the agent to write the proposal body
|
|
798
|
+
// to a tmp file instead of inlining it in JSON. Avoids parse failures
|
|
799
|
+
// on long bodies (e.g. knowledge:systems/KOKORO_USAGE_GUIDE 8.4KB).
|
|
800
|
+
...(iterDraftPath ? { draftFilePath: iterDraftPath } : {}),
|
|
801
|
+
});
|
|
802
|
+
// Convert char ceiling → token cap for the LLM path: divide by 3 chars/token
|
|
803
|
+
// (conservative — most models are 3.5–4) and add 500-char overhead for the
|
|
804
|
+
// JSON wrapper and frontmatter block that surround the body in the response.
|
|
805
|
+
const maxTokensForLlm = maxOutputChars !== undefined ? Math.ceil((maxOutputChars + 500) / 3) : undefined;
|
|
806
|
+
let iterResult;
|
|
807
|
+
if (options.runAgentOptions?.spawn) {
|
|
808
|
+
// Test seam: use raw runAgent with injected spawn so tests remain deterministic.
|
|
809
|
+
const resolvedProfile = profile;
|
|
810
|
+
if (!resolvedProfile) {
|
|
811
|
+
throw new Error("internal: reflect test-seam path requires a resolved agent profile");
|
|
812
|
+
}
|
|
813
|
+
const runOptions = {
|
|
814
|
+
stdio: "captured",
|
|
815
|
+
parseOutput: "text",
|
|
816
|
+
...(resolvedTimeoutMs !== undefined ? { timeoutMs: resolvedTimeoutMs } : {}),
|
|
817
|
+
...(Object.keys(agentEnv).length > 0 ? { env: agentEnv } : {}),
|
|
818
|
+
...(options.runAgentOptions ?? {}),
|
|
819
|
+
};
|
|
820
|
+
iterResult = await runAgent(resolvedProfile, prompt, runOptions);
|
|
821
|
+
}
|
|
822
|
+
else if (runnerSpec) {
|
|
823
|
+
// v2: dispatch through unified RunnerSpec
|
|
824
|
+
const runOptions = {
|
|
825
|
+
stdio: "captured",
|
|
826
|
+
parseOutput: "text",
|
|
827
|
+
...(Object.keys(agentEnv).length > 0 ? { env: agentEnv } : {}),
|
|
828
|
+
};
|
|
829
|
+
switch (runnerSpec.kind) {
|
|
830
|
+
case "llm":
|
|
831
|
+
// LLM HTTP path — `draftFilePath` is accepted for type symmetry
|
|
832
|
+
// (see `RunReflectViaLlmOptions.draftFilePath` docstring) but is
|
|
833
|
+
// intentionally a no-op. The prompt builder above also did not
|
|
834
|
+
// include the file-write contract for this kind, so the LLM is
|
|
835
|
+
// still asked for JSON via stdout.
|
|
836
|
+
iterResult = await runReflectViaLlm({
|
|
837
|
+
prompt,
|
|
838
|
+
connection: runnerSpec.connection,
|
|
839
|
+
timeoutMs: runnerSpec.timeoutMs ?? (typeof resolvedTimeoutMs === "number" ? resolvedTimeoutMs : undefined),
|
|
840
|
+
priorDraft,
|
|
841
|
+
iteration: iter,
|
|
842
|
+
responseSchema: REFLECT_JSON_SCHEMA,
|
|
843
|
+
chat: options.chat,
|
|
844
|
+
...(maxTokensForLlm !== undefined ? { maxTokens: maxTokensForLlm } : {}),
|
|
845
|
+
});
|
|
846
|
+
break;
|
|
847
|
+
case "sdk":
|
|
848
|
+
iterResult = await runOpencodeSdk(runnerSpec.profile, prompt ?? "", runOptions);
|
|
849
|
+
break;
|
|
850
|
+
case "agent":
|
|
851
|
+
iterResult = await runAgent(runnerSpec.profile, prompt, {
|
|
852
|
+
...runOptions,
|
|
853
|
+
...(runnerSpec.timeoutMs !== undefined ? { timeoutMs: runnerSpec.timeoutMs } : {}),
|
|
854
|
+
});
|
|
855
|
+
break;
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
else {
|
|
859
|
+
// Production path (v1): dispatch directly to the appropriate runner.
|
|
860
|
+
// The fallback at the end of step 3 guarantees `profile` is set whenever
|
|
861
|
+
// `runnerSpec` is undefined, but TS can't prove that across the loop +
|
|
862
|
+
// await boundary — narrow into a const.
|
|
863
|
+
const resolvedProfile = profile;
|
|
864
|
+
if (!resolvedProfile) {
|
|
865
|
+
throw new Error("internal: reflect v1 dispatch reached without a resolved agent profile or runnerSpec");
|
|
866
|
+
}
|
|
867
|
+
const runOptions = {
|
|
868
|
+
stdio: "captured",
|
|
869
|
+
parseOutput: "text",
|
|
870
|
+
...(resolvedTimeoutMs !== undefined ? { timeoutMs: resolvedTimeoutMs } : {}),
|
|
871
|
+
...(Object.keys(agentEnv).length > 0 ? { env: agentEnv } : {}),
|
|
872
|
+
};
|
|
873
|
+
iterResult = resolvedProfile.sdkMode
|
|
874
|
+
? await runOpencodeSdk(resolvedProfile, prompt ?? "", runOptions)
|
|
875
|
+
: await runAgent(resolvedProfile, prompt, runOptions);
|
|
876
|
+
}
|
|
877
|
+
result = iterResult;
|
|
878
|
+
if (!iterResult.ok)
|
|
879
|
+
break; // surface failure after loop
|
|
880
|
+
// On success, extract the draft content for the next iteration.
|
|
881
|
+
// If the agent returns the same content as the prior draft, stop early
|
|
882
|
+
// (no-op refinement) to avoid wasting tokens on identical iterations.
|
|
883
|
+
if (iter < maxRefineIters - 1) {
|
|
884
|
+
const nextDraft = iterResult.stdout ?? "";
|
|
885
|
+
if (priorDraft !== undefined && nextDraft === priorDraft)
|
|
886
|
+
break;
|
|
887
|
+
priorDraft = nextDraft;
|
|
888
|
+
}
|
|
284
889
|
}
|
|
285
|
-
|
|
890
|
+
const finalResult = result;
|
|
891
|
+
if (!finalResult.ok) {
|
|
892
|
+
// B3: ENOENT / not-found gives an actionable hint.
|
|
893
|
+
if (isEnoentFailure(finalResult)) {
|
|
894
|
+
emitReflectFailed("spawn_failed", "enoent", options.ref, {
|
|
895
|
+
...(finalResult.exitCode !== undefined ? { exitCode: finalResult.exitCode } : {}),
|
|
896
|
+
});
|
|
897
|
+
return {
|
|
898
|
+
...failureEnvelope(finalResult, options.ref),
|
|
899
|
+
error: enoentHintMessage(profile?.bin ?? resolvedProfileName),
|
|
900
|
+
};
|
|
901
|
+
}
|
|
902
|
+
const envelope = failureEnvelope(finalResult, options.ref);
|
|
903
|
+
emitReflectFailed(envelope.reason, "agent_crash", options.ref, {
|
|
904
|
+
...(envelope.exitCode !== null ? { exitCode: envelope.exitCode } : {}),
|
|
905
|
+
});
|
|
906
|
+
return envelope;
|
|
907
|
+
}
|
|
908
|
+
// Re-alias to `result` for the downstream code that references it.
|
|
909
|
+
result = finalResult;
|
|
910
|
+
// 6. Resolve the proposal content.
|
|
911
|
+
//
|
|
912
|
+
// Path A (file-write contract — preferred for agent/sdk runners on long
|
|
913
|
+
// assets): the agent wrote the body to `lastDraftPath` and printed
|
|
914
|
+
// `DRAFT_WRITTEN` on stdout. Load the body from disk and synthesize a
|
|
915
|
+
// payload. The `EXCESSIVE_EXPANSION`/schema-shape gates downstream still
|
|
916
|
+
// apply — they validate content, not transport.
|
|
917
|
+
//
|
|
918
|
+
// Path B (legacy JSON stdout): the agent inlined the proposal body in
|
|
919
|
+
// JSON on stdout. Falls through to `parseAgentProposalPayload`. Also the
|
|
920
|
+
// path used by the LLM HTTP runner, which cannot honour file-write.
|
|
921
|
+
const draftFileExists = lastDraftPath !== undefined && fs.existsSync(lastDraftPath) && fs.statSync(lastDraftPath).size > 0;
|
|
922
|
+
const draftSignaled = stdoutSignalsDraftWritten(result.stdout);
|
|
923
|
+
if (draftSignaled && lastDraftPath && !draftFileExists) {
|
|
924
|
+
// Agent claimed to write the draft but the file is missing or empty.
|
|
925
|
+
// Surface as a parse_error rather than silently falling through — the
|
|
926
|
+
// alternative would be parsing the `DRAFT_WRITTEN` sentinel as JSON,
|
|
927
|
+
// which is guaranteed to fail with a confusing message.
|
|
928
|
+
emitReflectFailed("parse_error", "draft_missing", options.ref, {
|
|
929
|
+
...(result.exitCode !== null ? { exitCode: result.exitCode } : {}),
|
|
930
|
+
});
|
|
286
931
|
return {
|
|
287
932
|
schemaVersion: 1,
|
|
288
933
|
ok: false,
|
|
289
934
|
reason: "parse_error",
|
|
290
|
-
error:
|
|
935
|
+
error: `Agent emitted DRAFT_WRITTEN but draft file is missing or empty (${lastDraftPath}). The file-write contract failed; either the agent's file tools are broken or the path was unwritable.`,
|
|
291
936
|
...(options.ref ? { ref: options.ref } : {}),
|
|
292
937
|
exitCode: result.exitCode,
|
|
293
938
|
stdout: result.stdout,
|
|
294
939
|
...(result.stderr ? { stderr: result.stderr } : {}),
|
|
295
940
|
};
|
|
296
941
|
}
|
|
942
|
+
if (draftFileExists && lastDraftPath) {
|
|
943
|
+
// Happy path: agent wrote the body to disk. Use the ref the caller
|
|
944
|
+
// supplied (or a placeholder when omitted — the R-3 ref-mismatch guard
|
|
945
|
+
// below has no effect when there is no expected ref).
|
|
946
|
+
const fileContent = fs.readFileSync(lastDraftPath, "utf8");
|
|
947
|
+
// Phase 6A: file-write contract carries self-rated confidence on the
|
|
948
|
+
// `DRAFT_WRITTEN confidence=<n>` sentinel line. Extract it so the
|
|
949
|
+
// file-write path is on equal footing with the JSON-stdout path for
|
|
950
|
+
// auto-accept gating in `akm improve`.
|
|
951
|
+
const draftConfidence = extractDraftConfidence(result.stdout);
|
|
952
|
+
payload = {
|
|
953
|
+
ref: options.ref ?? "",
|
|
954
|
+
content: fileContent,
|
|
955
|
+
...(draftConfidence !== undefined ? { confidence: draftConfidence } : {}),
|
|
956
|
+
};
|
|
957
|
+
// The agent followed the file-write contract — `payload.ref` mirrors the
|
|
958
|
+
// caller's expected ref, so the R-3 guard below cannot fire. The agent
|
|
959
|
+
// had no opportunity to retarget the proposal. If the ref was omitted
|
|
960
|
+
// entirely, downstream `createProposal` will reject the empty ref.
|
|
961
|
+
}
|
|
962
|
+
else {
|
|
963
|
+
try {
|
|
964
|
+
payload = parseAgentProposalPayload(result.stdout ?? "");
|
|
965
|
+
}
|
|
966
|
+
catch (err) {
|
|
967
|
+
const fallback = fallbackPayloadFromRawContent(result.stdout ?? "", options.ref, profile?.sdkMode ?? false);
|
|
968
|
+
if (fallback) {
|
|
969
|
+
payload = fallback;
|
|
970
|
+
}
|
|
971
|
+
else {
|
|
972
|
+
// Reclassify cooldown/skip messages that arrive as stdout text instead of
|
|
973
|
+
// valid proposal JSON. These are legitimate skip signals, not parse failures,
|
|
974
|
+
// and should not pollute reflectFailedActions or recentErrors injection.
|
|
975
|
+
const stdoutText = result.stdout ?? "";
|
|
976
|
+
const isCooldownSignal = isStructuredCooldownSignal(stdoutText);
|
|
977
|
+
const reason = isCooldownSignal ? "cooldown" : "parse_error";
|
|
978
|
+
emitReflectFailed(reason, isCooldownSignal ? "stdout_cooldown_signal" : "parse_error", options.ref, {
|
|
979
|
+
...(result.exitCode !== null ? { exitCode: result.exitCode } : {}),
|
|
980
|
+
});
|
|
981
|
+
return {
|
|
982
|
+
schemaVersion: 1,
|
|
983
|
+
ok: false,
|
|
984
|
+
reason,
|
|
985
|
+
error: err instanceof Error ? err.message : String(err),
|
|
986
|
+
...(options.ref ? { ref: options.ref } : {}),
|
|
987
|
+
exitCode: result.exitCode,
|
|
988
|
+
stdout: result.stdout,
|
|
989
|
+
...(result.stderr ? { stderr: result.stderr } : {}),
|
|
990
|
+
};
|
|
991
|
+
}
|
|
992
|
+
}
|
|
993
|
+
}
|
|
297
994
|
}
|
|
298
|
-
|
|
995
|
+
finally {
|
|
996
|
+
// Always remove tmp draft files — success, failure, or exception. Returns
|
|
997
|
+
// inside the try above trigger this block before the function exits. Code
|
|
998
|
+
// after this point uses the already-loaded `payload` and never touches the
|
|
999
|
+
// draft paths.
|
|
1000
|
+
cleanupDrafts();
|
|
1001
|
+
}
|
|
1002
|
+
// 6b. Validate payload.ref === options.ref (R-3 / #366).
|
|
1003
|
+
// A hallucinating agent can silently retarget proposals to a different ref.
|
|
1004
|
+
// This guard normalises both refs through parseAssetRef so origin-prefix
|
|
1005
|
+
// differences do not cause false positives, then rejects mismatches.
|
|
1006
|
+
// References: CRITIC (arXiv:2305.11738), CoVe (arXiv:2309.11495).
|
|
1007
|
+
if (options.ref) {
|
|
1008
|
+
try {
|
|
1009
|
+
const expectedParsed = parseAssetRef(options.ref);
|
|
1010
|
+
const actualParsed = parseAssetRef(payload.ref);
|
|
1011
|
+
// Compare type + name (drop origin — agent may omit origin prefix).
|
|
1012
|
+
if (expectedParsed.type !== actualParsed.type || expectedParsed.name !== actualParsed.name) {
|
|
1013
|
+
emitReflectFailed("parse_error", "ref_mismatch", options.ref, {
|
|
1014
|
+
expectedRef: options.ref,
|
|
1015
|
+
actualRef: payload.ref,
|
|
1016
|
+
...(result.exitCode !== null ? { exitCode: result.exitCode } : {}),
|
|
1017
|
+
});
|
|
1018
|
+
return {
|
|
1019
|
+
schemaVersion: 1,
|
|
1020
|
+
ok: false,
|
|
1021
|
+
reason: "parse_error",
|
|
1022
|
+
error: `Agent retargeted proposal: expected ref "${options.ref}" but got "${payload.ref}". Proposal rejected to prevent silent ref hallucination.`,
|
|
1023
|
+
ref: options.ref,
|
|
1024
|
+
exitCode: result.exitCode,
|
|
1025
|
+
stdout: result.stdout,
|
|
1026
|
+
...(result.stderr ? { stderr: result.stderr } : {}),
|
|
1027
|
+
};
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
catch {
|
|
1031
|
+
// parseAssetRef failure means the agent returned a malformed ref — already
|
|
1032
|
+
// caught downstream by createProposal; allow it to surface naturally.
|
|
1033
|
+
}
|
|
1034
|
+
}
|
|
1035
|
+
// 7. R-5 / #374: Apply the proposal quality gate when enabled.
|
|
1036
|
+
// Mirrors the lesson quality gate on distill proposals. The gate uses
|
|
1037
|
+
// `runLessonQualityJudge` from distill.ts and is gated behind either
|
|
1038
|
+
// `profiles.improve.default.processes.reflect.qualityGate.enabled` or
|
|
1039
|
+
// `profiles.improve.default.processes.distill.qualityGate.enabled` (the
|
|
1040
|
+
// `lesson_quality_gate` flag name is the legacy alias still accepted by
|
|
1041
|
+
// `isLlmFeatureEnabled`). Fail-open: any judge error passes through.
|
|
1042
|
+
// G-Eval (arXiv:2303.16634) — quality judgment before admission.
|
|
1043
|
+
const runtimeConfig = options.config ??
|
|
1044
|
+
(() => {
|
|
1045
|
+
try {
|
|
1046
|
+
return loadConfig();
|
|
1047
|
+
}
|
|
1048
|
+
catch {
|
|
1049
|
+
return undefined;
|
|
1050
|
+
}
|
|
1051
|
+
})();
|
|
1052
|
+
const chatFn = options.chat ?? chatCompletion;
|
|
1053
|
+
const qualityGateEnabled = isLlmFeatureEnabled(runtimeConfig, "proposal_quality_gate") ||
|
|
1054
|
+
isLlmFeatureEnabled(runtimeConfig, "lesson_quality_gate");
|
|
1055
|
+
if (qualityGateEnabled && runtimeConfig) {
|
|
1056
|
+
const assetContent = (() => {
|
|
1057
|
+
if (!options.ref)
|
|
1058
|
+
return null;
|
|
1059
|
+
try {
|
|
1060
|
+
const refParsed = parseAssetRef(options.ref);
|
|
1061
|
+
const candidates = [
|
|
1062
|
+
path.join(stash, `${refParsed.type}s`, `${refParsed.name}.md`),
|
|
1063
|
+
path.join(stash, `${refParsed.type}s`, refParsed.name, "index.md"),
|
|
1064
|
+
];
|
|
1065
|
+
for (const p of candidates) {
|
|
1066
|
+
if (fs.existsSync(p))
|
|
1067
|
+
return fs.readFileSync(p, "utf8");
|
|
1068
|
+
}
|
|
1069
|
+
return null;
|
|
1070
|
+
}
|
|
1071
|
+
catch {
|
|
1072
|
+
return null;
|
|
1073
|
+
}
|
|
1074
|
+
})();
|
|
1075
|
+
const judgeResult = await runLessonQualityJudge(runtimeConfig, payload.content, assetContent ?? "", chatFn);
|
|
1076
|
+
if (!judgeResult.pass) {
|
|
1077
|
+
// Quality gate rejected the proposal — surface as parse_error so the
|
|
1078
|
+
// improve orchestrator can log it and move on without crashing.
|
|
1079
|
+
appendEvent({
|
|
1080
|
+
eventType: "reflect_completed",
|
|
1081
|
+
ref: payload.ref,
|
|
1082
|
+
metadata: {
|
|
1083
|
+
source: "reflect",
|
|
1084
|
+
qualityRejected: true,
|
|
1085
|
+
qualityScore: judgeResult.score,
|
|
1086
|
+
qualityReason: judgeResult.reason,
|
|
1087
|
+
},
|
|
1088
|
+
});
|
|
1089
|
+
return {
|
|
1090
|
+
schemaVersion: 1,
|
|
1091
|
+
ok: false,
|
|
1092
|
+
reason: "parse_error",
|
|
1093
|
+
error: `Reflect proposal quality gate rejected: score=${judgeResult.score}, reason="${judgeResult.reason}"`,
|
|
1094
|
+
...(options.ref ? { ref: options.ref } : {}),
|
|
1095
|
+
exitCode: result.exitCode,
|
|
1096
|
+
};
|
|
1097
|
+
}
|
|
1098
|
+
}
|
|
1099
|
+
// 7b. Reflect content-preservation rails:
|
|
1100
|
+
// - Restore source frontmatter so reflect can never strip indexable
|
|
1101
|
+
// fields (`description`, `when_to_use`, `tags`, ...).
|
|
1102
|
+
// - Reset protected identity fields (`name`, `ref`, `id`, `slug`,
|
|
1103
|
+
// `type`) the LLM tried to change.
|
|
1104
|
+
// - Reject proposals that shrink/expand the body past safe ratios.
|
|
1105
|
+
//
|
|
1106
|
+
// See REFLECT_ALLOWED_TYPES / sanitizeReflectPayload for the underlying
|
|
1107
|
+
// hypotheses + observed regressions (`8737ab63`, `26941510`, and the
|
|
1108
|
+
// catastrophic-shrinkage cases from the May 2026 review).
|
|
1109
|
+
const sanitizeOutcome = sanitizeReflectPayload({ content: payload.content, ...(payload.frontmatter ? { frontmatter: payload.frontmatter } : {}) }, assetContent, payload.ref);
|
|
1110
|
+
if (sanitizeOutcome.reject) {
|
|
1111
|
+
appendEvent({
|
|
1112
|
+
eventType: "reflect_completed",
|
|
1113
|
+
ref: payload.ref,
|
|
1114
|
+
metadata: {
|
|
1115
|
+
source: "reflect",
|
|
1116
|
+
sanitized: true,
|
|
1117
|
+
rejected: true,
|
|
1118
|
+
rejectReason: sanitizeOutcome.reject.error,
|
|
1119
|
+
...(sanitizeOutcome.warnings.length > 0 ? { sanitizerWarnings: sanitizeOutcome.warnings } : {}),
|
|
1120
|
+
},
|
|
1121
|
+
});
|
|
1122
|
+
return {
|
|
1123
|
+
schemaVersion: 1,
|
|
1124
|
+
ok: false,
|
|
1125
|
+
reason: sanitizeOutcome.reject.reason,
|
|
1126
|
+
error: sanitizeOutcome.reject.error,
|
|
1127
|
+
...(options.ref ? { ref: options.ref } : {}),
|
|
1128
|
+
exitCode: result.exitCode,
|
|
1129
|
+
};
|
|
1130
|
+
}
|
|
1131
|
+
payload = {
|
|
1132
|
+
...payload,
|
|
1133
|
+
content: sanitizeOutcome.content,
|
|
1134
|
+
...(sanitizeOutcome.frontmatter ? { frontmatter: sanitizeOutcome.frontmatter } : {}),
|
|
1135
|
+
};
|
|
1136
|
+
// 8. Create the proposal. The proposal queue is the ONLY thing reflect
|
|
299
1137
|
// writes — promotion to a real asset is gated by `akm proposal accept`.
|
|
1138
|
+
//
|
|
1139
|
+
// R-4 / #373: Stamp `derived_from_reflect: true` in the frontmatter of any
|
|
1140
|
+
// lesson proposal generated by reflect. This provenance marker lets
|
|
1141
|
+
// `readRelatedLessons` exclude echo-chamber lessons (lessons that originate
|
|
1142
|
+
// from prior reflect runs on the same skill) unless independent feedback
|
|
1143
|
+
// evidence exists. ExpeL arXiv:2308.10144 — reject rules without success/
|
|
1144
|
+
// failure differential from independent evidence.
|
|
1145
|
+
const isLessonProposal = (() => {
|
|
1146
|
+
try {
|
|
1147
|
+
return parseAssetRef(payload.ref).type === "lesson";
|
|
1148
|
+
}
|
|
1149
|
+
catch {
|
|
1150
|
+
return false;
|
|
1151
|
+
}
|
|
1152
|
+
})();
|
|
1153
|
+
const basePayloadFrontmatter = payload.frontmatter ?? {};
|
|
1154
|
+
const payloadFrontmatterWithProvenance = isLessonProposal
|
|
1155
|
+
? { ...basePayloadFrontmatter, derived_from_reflect: true }
|
|
1156
|
+
: basePayloadFrontmatter;
|
|
1157
|
+
// Draft mode: skip DB persistence — the SC sampling loop in improve.ts persists
|
|
1158
|
+
// only the majority-vote winner (R-2 / #389). Return a synthetic proposal so
|
|
1159
|
+
// pickMajorityVote can compare content via Jaccard similarity.
|
|
1160
|
+
if (options.draftMode) {
|
|
1161
|
+
const draftProposal = {
|
|
1162
|
+
id: `sc-draft-${Date.now()}`,
|
|
1163
|
+
ref: payload.ref,
|
|
1164
|
+
source: "reflect",
|
|
1165
|
+
status: "pending",
|
|
1166
|
+
createdAt: new Date().toISOString(),
|
|
1167
|
+
updatedAt: new Date().toISOString(),
|
|
1168
|
+
payload: {
|
|
1169
|
+
content: payload.content,
|
|
1170
|
+
...(Object.keys(payloadFrontmatterWithProvenance).length > 0
|
|
1171
|
+
? { frontmatter: payloadFrontmatterWithProvenance }
|
|
1172
|
+
: {}),
|
|
1173
|
+
},
|
|
1174
|
+
// Phase 6A: preserve confidence on the synthetic draft so the SC majority
|
|
1175
|
+
// winner carries the score through to the persisted proposal.
|
|
1176
|
+
...(typeof payload.confidence === "number" ? { confidence: payload.confidence } : {}),
|
|
1177
|
+
};
|
|
1178
|
+
return {
|
|
1179
|
+
schemaVersion: 1,
|
|
1180
|
+
ok: true,
|
|
1181
|
+
proposal: draftProposal,
|
|
1182
|
+
ref: draftProposal.ref,
|
|
1183
|
+
agentProfile: resolvedProfileName,
|
|
1184
|
+
durationMs: result.durationMs,
|
|
1185
|
+
};
|
|
1186
|
+
}
|
|
300
1187
|
const createInput = {
|
|
301
1188
|
ref: payload.ref,
|
|
302
1189
|
source: "reflect",
|
|
303
1190
|
sourceRun: `reflect-${Date.now()}`,
|
|
304
1191
|
payload: {
|
|
305
1192
|
content: payload.content,
|
|
306
|
-
...(
|
|
1193
|
+
...(Object.keys(payloadFrontmatterWithProvenance).length > 0
|
|
1194
|
+
? { frontmatter: payloadFrontmatterWithProvenance }
|
|
1195
|
+
: {}),
|
|
307
1196
|
},
|
|
1197
|
+
// Phase 6A: forward LLM-reported confidence into the proposal record.
|
|
1198
|
+
// `parseAgentProposalPayload` already clamps to [0, 1] and drops non-
|
|
1199
|
+
// finite values; `createProposal` runs its own sanitizer as a safety net.
|
|
1200
|
+
...(typeof payload.confidence === "number" ? { confidence: payload.confidence } : {}),
|
|
308
1201
|
};
|
|
309
|
-
const
|
|
1202
|
+
const proposalResult = createProposal(stash, createInput, options.ctx);
|
|
1203
|
+
if (isProposalSkipped(proposalResult)) {
|
|
1204
|
+
// Dedup/cooldown guard fired — surface as a "cooldown" reason (not "parse_error")
|
|
1205
|
+
// so the improve orchestrator can distinguish legitimate skips from real failures
|
|
1206
|
+
// and exclude them from recentErrors/avoidPatterns injection.
|
|
1207
|
+
emitReflectFailed("cooldown", "proposal_skipped", options.ref, {
|
|
1208
|
+
proposalSkipReason: proposalResult.reason,
|
|
1209
|
+
});
|
|
1210
|
+
return {
|
|
1211
|
+
schemaVersion: 1,
|
|
1212
|
+
ok: false,
|
|
1213
|
+
reason: "cooldown",
|
|
1214
|
+
error: `Proposal skipped (${proposalResult.reason}): ${proposalResult.message}`,
|
|
1215
|
+
...(options.ref ? { ref: options.ref } : {}),
|
|
1216
|
+
exitCode: null,
|
|
1217
|
+
};
|
|
1218
|
+
}
|
|
1219
|
+
const proposal = proposalResult;
|
|
310
1220
|
appendEvent({
|
|
311
1221
|
eventType: "reflect_completed",
|
|
312
1222
|
ref: proposal.ref,
|
|
313
1223
|
metadata: {
|
|
314
1224
|
proposalId: proposal.id,
|
|
315
1225
|
source: "reflect",
|
|
316
|
-
agentProfile:
|
|
1226
|
+
agentProfile: resolvedProfileName,
|
|
317
1227
|
},
|
|
318
1228
|
});
|
|
319
1229
|
return {
|
|
@@ -321,7 +1231,7 @@ export async function akmReflect(options = {}) {
|
|
|
321
1231
|
ok: true,
|
|
322
1232
|
proposal,
|
|
323
1233
|
ref: proposal.ref,
|
|
324
|
-
agentProfile:
|
|
1234
|
+
agentProfile: resolvedProfileName,
|
|
325
1235
|
durationMs: result.durationMs,
|
|
326
1236
|
};
|
|
327
1237
|
}
|