akm-cli 0.8.0-rc2 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/{.github/CHANGELOG.md → CHANGELOG.md} +191 -3
- package/README.md +22 -6
- package/SECURITY.md +93 -0
- package/dist/cli/config-migrate.js +144 -0
- package/dist/cli/config-validate.js +39 -0
- package/dist/cli/confirm.js +73 -0
- package/dist/cli/parse-args.js +93 -3
- package/dist/cli/shared.js +129 -0
- package/dist/cli.js +2141 -1268
- package/dist/commands/add-cli.js +279 -0
- package/dist/commands/agent-dispatch.js +20 -12
- package/dist/commands/agent-support.js +11 -5
- package/dist/commands/completions.js +3 -0
- package/dist/commands/config-cli.js +129 -517
- package/dist/commands/consolidate.js +1533 -144
- package/dist/commands/curate.js +44 -3
- package/dist/commands/db-cli.js +23 -0
- package/dist/commands/distill-promotion-policy.js +5 -3
- package/dist/commands/distill.js +906 -100
- package/dist/commands/env.js +213 -0
- package/dist/commands/eval-cases.js +3 -0
- package/dist/commands/events.js +3 -0
- package/dist/commands/extract-cli.js +127 -0
- package/dist/commands/extract-prompt.js +204 -0
- package/dist/commands/extract.js +477 -0
- package/dist/commands/feedback-cli.js +331 -0
- package/dist/commands/graph.js +260 -5
- package/dist/commands/health.js +977 -51
- package/dist/commands/help/help-accept.md +6 -3
- package/dist/commands/help/help-improve.md +36 -8
- package/dist/commands/help/help-proposals.md +7 -4
- package/dist/commands/help/help-reject.md +5 -2
- package/dist/commands/history.js +51 -16
- package/dist/commands/improve-auto-accept.js +97 -0
- package/dist/commands/improve-cli.js +236 -0
- package/dist/commands/improve-profiles.js +184 -0
- package/dist/commands/improve-result-file.js +167 -0
- package/dist/commands/improve.js +1725 -332
- package/dist/commands/info.js +3 -0
- package/dist/commands/init.js +49 -1
- package/dist/commands/installed-stashes.js +6 -23
- package/dist/commands/knowledge.js +3 -0
- package/dist/commands/lint/agent-linter.js +3 -0
- package/dist/commands/lint/base-linter.js +199 -5
- package/dist/commands/lint/command-linter.js +3 -0
- package/dist/commands/lint/default-linter.js +3 -0
- package/dist/commands/lint/env-key-rules.js +154 -0
- package/dist/commands/lint/index.js +92 -3
- package/dist/commands/lint/knowledge-linter.js +3 -0
- package/dist/commands/lint/markdown-insertion.js +343 -0
- package/dist/commands/lint/memory-linter.js +3 -0
- package/dist/commands/lint/registry.js +3 -0
- package/dist/commands/lint/skill-linter.js +3 -0
- package/dist/commands/lint/task-linter.js +15 -12
- package/dist/commands/lint/types.js +3 -0
- package/dist/commands/lint/workflow-linter.js +3 -0
- package/dist/commands/lint.js +3 -0
- package/dist/commands/migration-help.js +5 -2
- package/dist/commands/proposal-drain-policies.js +128 -0
- package/dist/commands/proposal-drain.js +477 -0
- package/dist/commands/proposal.js +60 -6
- package/dist/commands/propose.js +24 -19
- package/dist/commands/reflect.js +1004 -94
- package/dist/commands/registry-cli.js +150 -0
- package/dist/commands/registry-search.js +3 -0
- package/dist/commands/remember-cli.js +257 -0
- package/dist/commands/remember.js +15 -6
- package/dist/commands/schema-repair.js +88 -15
- package/dist/commands/search.js +99 -14
- package/dist/commands/secret.js +173 -0
- package/dist/commands/self-update.js +3 -0
- package/dist/commands/show.js +32 -13
- package/dist/commands/source-add.js +7 -35
- package/dist/commands/source-clone.js +3 -0
- package/dist/commands/source-manage.js +3 -0
- package/dist/commands/tasks.js +161 -95
- package/dist/commands/url-checker.js +3 -0
- package/dist/core/action-contributors.js +3 -0
- package/dist/core/asset-ref.js +13 -2
- package/dist/core/asset-registry.js +9 -2
- package/dist/core/asset-serialize.js +88 -0
- package/dist/core/asset-spec.js +61 -5
- package/dist/core/common.js +93 -5
- package/dist/core/concurrent.js +3 -0
- package/dist/core/config-io.js +347 -0
- package/dist/core/config-migration.js +622 -0
- package/dist/core/config-schema.js +558 -0
- package/dist/core/config-sources.js +108 -0
- package/dist/core/config-types.js +4 -0
- package/dist/core/config-walker.js +337 -0
- package/dist/core/config.js +366 -1077
- package/dist/core/errors.js +42 -20
- package/dist/core/events.js +31 -25
- package/dist/core/file-lock.js +104 -0
- package/dist/core/frontmatter.js +75 -10
- package/dist/core/lesson-lint.js +3 -0
- package/dist/core/markdown.js +3 -0
- package/dist/core/memory-belief.js +62 -0
- package/dist/core/memory-contradiction-detect.js +274 -0
- package/dist/core/memory-improve.js +142 -14
- package/dist/core/parse.js +3 -0
- package/dist/core/paths.js +218 -50
- package/dist/core/proposal-quality-validators.js +380 -0
- package/dist/core/proposal-validators.js +11 -3
- package/dist/core/proposals.js +464 -5
- package/dist/core/state-db.js +349 -56
- package/dist/core/text-truncation.js +107 -0
- package/dist/core/time.js +3 -0
- package/dist/core/tty.js +59 -0
- package/dist/core/warn.js +7 -2
- package/dist/core/write-source.js +12 -0
- package/dist/indexer/db-backup.js +391 -0
- package/dist/indexer/db-search.js +136 -28
- package/dist/indexer/db.js +661 -166
- package/dist/indexer/ensure-index.js +3 -0
- package/dist/indexer/file-context.js +3 -0
- package/dist/indexer/graph-boost.js +162 -40
- package/dist/indexer/graph-db.js +241 -51
- package/dist/indexer/graph-dedup.js +3 -7
- package/dist/indexer/graph-extraction.js +242 -149
- package/dist/indexer/index-context.js +3 -9
- package/dist/indexer/indexer.js +84 -14
- package/dist/indexer/llm-cache.js +24 -19
- package/dist/indexer/manifest.js +3 -0
- package/dist/indexer/matchers.js +184 -11
- package/dist/indexer/memory-inference.js +94 -50
- package/dist/indexer/metadata-contributors.js +3 -0
- package/dist/indexer/metadata.js +110 -50
- package/dist/indexer/path-resolver.js +3 -0
- package/dist/indexer/project-context.js +192 -0
- package/dist/indexer/ranking-contributors.js +134 -7
- package/dist/indexer/ranking.js +8 -1
- package/dist/indexer/search-fields.js +5 -9
- package/dist/indexer/search-hit-enrichers.js +91 -2
- package/dist/indexer/search-source.js +20 -1
- package/dist/indexer/semantic-status.js +4 -1
- package/dist/indexer/staleness-detect.js +447 -0
- package/dist/indexer/usage-events.js +12 -9
- package/dist/indexer/walker.js +3 -0
- package/dist/integrations/agent/builders.js +135 -0
- package/dist/integrations/agent/config.js +121 -401
- package/dist/integrations/agent/detect.js +3 -0
- package/dist/integrations/agent/index.js +6 -14
- package/dist/integrations/agent/model-aliases.js +55 -0
- package/dist/integrations/agent/profiles.js +3 -0
- package/dist/integrations/agent/prompts.js +137 -8
- package/dist/integrations/agent/runner.js +208 -0
- package/dist/integrations/agent/sdk-runner.js +8 -2
- package/dist/integrations/agent/spawn.js +54 -14
- package/dist/integrations/github.js +3 -0
- package/dist/integrations/lockfile.js +22 -51
- package/dist/integrations/session-logs/index.js +4 -0
- package/dist/integrations/session-logs/inline-refs.js +35 -0
- package/dist/integrations/session-logs/pre-filter.js +152 -0
- package/dist/integrations/session-logs/providers/claude-code.js +226 -0
- package/dist/integrations/session-logs/providers/opencode.js +231 -25
- package/dist/integrations/session-logs/types.js +3 -0
- package/dist/llm/call-ai.js +14 -26
- package/dist/llm/client.js +16 -2
- package/dist/llm/embedder.js +20 -29
- package/dist/llm/embedders/cache.js +3 -7
- package/dist/llm/embedders/local.js +42 -1
- package/dist/llm/embedders/remote.js +20 -8
- package/dist/llm/embedders/types.js +3 -7
- package/dist/llm/feature-gate.js +92 -56
- package/dist/llm/graph-extract.js +401 -30
- package/dist/llm/index-passes.js +44 -29
- package/dist/llm/memory-infer.js +30 -2
- package/dist/llm/metadata-enhance.js +3 -7
- package/dist/llm/prompts/extract-session.md +80 -0
- package/dist/llm/prompts/graph-extract-user-prompt.md +24 -1
- package/dist/output/cli-hints-full.md +60 -32
- package/dist/output/cli-hints-short.md +10 -7
- package/dist/output/cli-hints.js +5 -2
- package/dist/output/context.js +60 -8
- package/dist/output/renderers.js +170 -194
- package/dist/output/shapes/curate.js +56 -0
- package/dist/output/shapes/distill.js +10 -0
- package/dist/output/shapes/env-list.js +19 -0
- package/dist/output/shapes/events.js +11 -0
- package/dist/output/shapes/helpers.js +424 -0
- package/dist/output/shapes/history.js +7 -0
- package/dist/output/shapes/passthrough.js +105 -0
- package/dist/output/shapes/proposal-accept.js +7 -0
- package/dist/output/shapes/proposal-diff.js +7 -0
- package/dist/output/shapes/proposal-list.js +7 -0
- package/dist/output/shapes/proposal-producer.js +11 -0
- package/dist/output/shapes/proposal-reject.js +7 -0
- package/dist/output/shapes/proposal-show.js +7 -0
- package/dist/output/shapes/registry-search.js +6 -0
- package/dist/output/shapes/registry.js +30 -0
- package/dist/output/shapes/search.js +6 -0
- package/dist/output/shapes/secret-list.js +19 -0
- package/dist/output/shapes/show.js +6 -0
- package/dist/output/shapes/vault-list.js +19 -0
- package/dist/output/shapes.js +51 -549
- package/dist/output/text/add.js +6 -0
- package/dist/output/text/clone.js +6 -0
- package/dist/output/text/config.js +6 -0
- package/dist/output/text/curate.js +6 -0
- package/dist/output/text/distill.js +7 -0
- package/dist/output/text/enable-disable.js +7 -0
- package/dist/output/text/events.js +10 -0
- package/dist/output/text/feedback.js +6 -0
- package/dist/output/text/helpers.js +1059 -0
- package/dist/output/text/history.js +7 -0
- package/dist/output/text/import.js +6 -0
- package/dist/output/text/index.js +6 -0
- package/dist/output/text/info.js +6 -0
- package/dist/output/text/init.js +6 -0
- package/dist/output/text/list.js +6 -0
- package/dist/output/text/proposal-producer.js +8 -0
- package/dist/output/text/proposal.js +12 -0
- package/dist/output/text/registry-commands.js +11 -0
- package/dist/output/text/registry.js +30 -0
- package/dist/output/text/remember.js +6 -0
- package/dist/output/text/remove.js +6 -0
- package/dist/output/text/save.js +6 -0
- package/dist/output/text/search.js +6 -0
- package/dist/output/text/show.js +6 -0
- package/dist/output/text/update.js +6 -0
- package/dist/output/text/upgrade.js +6 -0
- package/dist/output/text/vault.js +16 -0
- package/dist/output/text/wiki.js +15 -0
- package/dist/output/text/workflow.js +14 -0
- package/dist/output/text.js +44 -1329
- package/dist/registry/build-index.js +3 -0
- package/dist/registry/create-provider-registry.js +3 -0
- package/dist/registry/factory.js +4 -1
- package/dist/registry/origin-resolve.js +3 -0
- package/dist/registry/providers/index.js +3 -0
- package/dist/registry/providers/skills-sh.js +11 -2
- package/dist/registry/providers/static-index.js +10 -1
- package/dist/registry/providers/types.js +3 -24
- package/dist/registry/resolve.js +11 -16
- package/dist/registry/types.js +3 -0
- package/dist/scripts/migrate-storage.js +17767 -0
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +9031 -0
- package/dist/scripts/migrations/v16-to-v17.js +141 -0
- package/dist/setup/detect.js +3 -0
- package/dist/setup/ripgrep-install.js +3 -0
- package/dist/setup/ripgrep-resolve.js +3 -0
- package/dist/setup/setup.js +306 -67
- package/dist/setup/steps.js +3 -15
- package/dist/sources/include.js +3 -0
- package/dist/sources/provider-factory.js +3 -11
- package/dist/sources/provider.js +3 -20
- package/dist/sources/providers/filesystem.js +19 -23
- package/dist/sources/providers/git.js +171 -21
- package/dist/sources/providers/index.js +3 -0
- package/dist/sources/providers/install-types.js +3 -13
- package/dist/sources/providers/npm.js +3 -4
- package/dist/sources/providers/provider-utils.js +3 -0
- package/dist/sources/providers/sync-from-ref.js +3 -11
- package/dist/sources/providers/tar-utils.js +3 -0
- package/dist/sources/providers/website.js +18 -22
- package/dist/sources/resolve.js +3 -0
- package/dist/sources/types.js +3 -0
- package/dist/sources/website-ingest.js +3 -0
- package/dist/tasks/backends/cron.js +3 -0
- package/dist/tasks/backends/exec-utils.js +3 -0
- package/dist/tasks/backends/index.js +3 -11
- package/dist/tasks/backends/launchd.js +3 -0
- package/dist/tasks/backends/schtasks.js +3 -0
- package/dist/tasks/parser.js +51 -38
- package/dist/tasks/resolveAkmBin.js +3 -0
- package/dist/tasks/runner.js +35 -9
- package/dist/tasks/schedule.js +20 -1
- package/dist/tasks/schema.js +5 -3
- package/dist/tasks/validator.js +6 -3
- package/dist/version.js +3 -0
- package/dist/wiki/wiki-templates.js +3 -0
- package/dist/wiki/wiki.js +3 -0
- package/dist/workflows/authoring.js +3 -0
- package/dist/workflows/cli.js +3 -0
- package/dist/workflows/db.js +140 -10
- package/dist/workflows/document-cache.js +3 -10
- package/dist/workflows/parser.js +3 -0
- package/dist/workflows/renderer.js +3 -0
- package/dist/workflows/runs.js +18 -1
- package/dist/workflows/schema.js +3 -0
- package/dist/workflows/scope-key.js +3 -0
- package/dist/workflows/validator.js +5 -9
- package/docs/README.md +7 -2
- package/docs/data-and-telemetry.md +225 -0
- package/docs/migration/release-notes/0.7.5.md +2 -2
- package/docs/migration/release-notes/0.8.0.md +57 -5
- package/docs/migration/v0.7-to-v0.8.md +1378 -0
- package/package.json +28 -11
- package/.github/LICENSE +0 -374
- package/dist/commands/install-audit.js +0 -385
- package/dist/commands/vault.js +0 -310
- package/dist/indexer/match-contributors.js +0 -141
- package/dist/integrations/agent/pipeline.js +0 -39
- package/dist/integrations/agent/runners.js +0 -31
package/dist/commands/distill.js
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
1
4
|
/**
|
|
2
5
|
* `akm distill <ref>` — feedback distillation into lesson proposals (#228).
|
|
3
6
|
*
|
|
@@ -11,8 +14,10 @@
|
|
|
11
14
|
* # Architectural seams
|
|
12
15
|
*
|
|
13
16
|
* - **Single bounded in-tree LLM call.** Wrapped in {@link tryLlmFeature}
|
|
14
|
-
* under the `
|
|
15
|
-
*
|
|
17
|
+
* under the `distill` gate (v1 spec §14; 0.8.0 unified the orchestration
|
|
18
|
+
* and LLM-call gates under `processes.distill.enabled`). The wrapper
|
|
19
|
+
* enforces a hard timeout (default 600s / 10 min — overridable via
|
|
20
|
+
* `opts.timeoutMs`) and converts disable / throw / timeout
|
|
16
21
|
* into a `null` return from `fn`, which we treat as a graceful
|
|
17
22
|
* "skipped" outcome (exit 0, no proposal, `distill_invoked` event with
|
|
18
23
|
* `outcome: "skipped"`).
|
|
@@ -48,43 +53,47 @@
|
|
|
48
53
|
import fs from "node:fs";
|
|
49
54
|
import path from "node:path";
|
|
50
55
|
import { parseAssetRef } from "../core/asset-ref";
|
|
56
|
+
import { assembleAssetFromString } from "../core/asset-serialize";
|
|
51
57
|
import { resolveStashDir, timestampForFilename } from "../core/common";
|
|
52
|
-
import { loadConfig } from "../core/config";
|
|
58
|
+
import { getDefaultLlmConfig, loadConfig } from "../core/config";
|
|
53
59
|
import { ConfigError, UsageError } from "../core/errors";
|
|
54
60
|
import { appendEvent, readEvents } from "../core/events";
|
|
55
61
|
import { parseFrontmatter } from "../core/frontmatter";
|
|
56
62
|
import { lintLessonContent } from "../core/lesson-lint";
|
|
57
63
|
import { stripMarkdownFences } from "../core/markdown";
|
|
58
|
-
import { createProposal } from "../core/proposals";
|
|
64
|
+
import { createProposal, isProposalSkipped, listProposals, } from "../core/proposals";
|
|
59
65
|
import { warnVerbose } from "../core/warn";
|
|
60
66
|
import { resolveAssetPath } from "../indexer/path-resolver";
|
|
61
67
|
import { chatCompletion, parseEmbeddedJsonResponse } from "../llm/client";
|
|
62
68
|
import { isLlmFeatureEnabled, tryLlmFeature } from "../llm/feature-gate";
|
|
63
69
|
import { assessMemoryKnowledgePromotionCandidate, deriveKnowledgeRef } from "./distill-promotion-policy";
|
|
70
|
+
import { akmSearch } from "./search";
|
|
64
71
|
/**
|
|
65
|
-
*
|
|
66
|
-
*
|
|
67
|
-
*
|
|
72
|
+
* Asset-ref types that `akm distill` structurally refuses as inputs.
|
|
73
|
+
*
|
|
74
|
+
* Distill *produces* lessons from non-lesson sources (memory, skill, knowledge,
|
|
75
|
+
* etc.). Calling distill on an existing `lesson:*` ref would derive
|
|
76
|
+
* `lesson:lesson-<name>-lesson-lesson` (double `-lesson` suffix) — the
|
|
77
|
+
* recursive-ref defect observed across 323 archived rejected proposals.
|
|
78
|
+
*
|
|
79
|
+
* The runtime gate inside {@link akmDistill} still refuses these inputs
|
|
80
|
+
* defensively (returning an `outcome: "skipped"` envelope with `skipReason:
|
|
81
|
+
* "recursive_lesson_input"`). This exported set is the planner-side companion:
|
|
82
|
+
* callers that schedule distill attempts (e.g. `akm improve`'s distill queue)
|
|
83
|
+
* import it so refs of these types never enter the queue in the first place.
|
|
84
|
+
*
|
|
85
|
+
* Source of truth: this set drives the gate in `akmDistill` and is consumed
|
|
86
|
+
* directly by the improve planner. Adding a new structurally-refused input
|
|
87
|
+
* type means updating this constant — the planner picks the change up for
|
|
88
|
+
* free.
|
|
89
|
+
*/
|
|
90
|
+
export const DISTILL_REFUSED_INPUT_TYPES = new Set(["lesson"]);
|
|
91
|
+
/**
|
|
92
|
+
* Returns true when `type` is structurally refused as an input by
|
|
93
|
+
* {@link akmDistill}. See {@link DISTILL_REFUSED_INPUT_TYPES}.
|
|
68
94
|
*/
|
|
69
|
-
function
|
|
70
|
-
|
|
71
|
-
if (extras.proposalKind !== undefined)
|
|
72
|
-
meta.proposalKind = extras.proposalKind;
|
|
73
|
-
if (extras.proposalId !== undefined)
|
|
74
|
-
meta.proposalId = extras.proposalId;
|
|
75
|
-
if (extras.proposalRef !== undefined)
|
|
76
|
-
meta.proposalRef = extras.proposalRef;
|
|
77
|
-
if (extras.score !== undefined)
|
|
78
|
-
meta.score = extras.score;
|
|
79
|
-
if (extras.reason !== undefined)
|
|
80
|
-
meta.reason = extras.reason;
|
|
81
|
-
if (extras.findingKinds !== undefined)
|
|
82
|
-
meta.findingKinds = extras.findingKinds;
|
|
83
|
-
if (extras.filteredFeedbackCount !== undefined)
|
|
84
|
-
meta.filteredFeedbackCount = extras.filteredFeedbackCount;
|
|
85
|
-
if (extras.sourceRun !== undefined)
|
|
86
|
-
meta.sourceRun = extras.sourceRun;
|
|
87
|
-
return meta;
|
|
95
|
+
export function isDistillRefusedInputType(type) {
|
|
96
|
+
return DISTILL_REFUSED_INPUT_TYPES.has(type);
|
|
88
97
|
}
|
|
89
98
|
// ── Lesson-ref derivation ───────────────────────────────────────────────────
|
|
90
99
|
/** Derive the proposed lesson ref from the input ref. See module docblock. */
|
|
@@ -103,6 +112,14 @@ export function deriveLessonRef(inputRef) {
|
|
|
103
112
|
.replace(/^-|-$/g, "");
|
|
104
113
|
return `lesson:${safe}-lesson`;
|
|
105
114
|
}
|
|
115
|
+
// ── Content quality validators ──────────────────────────────────────────────
|
|
116
|
+
//
|
|
117
|
+
// The actual implementations now live in `core/proposal-quality-validators.ts`
|
|
118
|
+
// so the same checks run inside `runProposalValidators` on `proposal accept`.
|
|
119
|
+
// We re-export the public-facing helpers here so existing imports
|
|
120
|
+
// (`from "../src/commands/distill"`) continue to resolve.
|
|
121
|
+
import { detectDoubleFrontmatter, isValidDescription, isValidWhenToUse } from "../core/proposal-quality-validators";
|
|
122
|
+
export { detectDoubleFrontmatter, isValidDescription, isValidWhenToUse };
|
|
106
123
|
// ── Prompt assembly ─────────────────────────────────────────────────────────
|
|
107
124
|
const LESSON_SYSTEM_PROMPT = [
|
|
108
125
|
"You are the akm `distill` distiller.",
|
|
@@ -110,40 +127,232 @@ const LESSON_SYSTEM_PROMPT = [
|
|
|
110
127
|
"concise *lesson* an agent should remember next time it works on this",
|
|
111
128
|
"asset's domain.",
|
|
112
129
|
"",
|
|
113
|
-
"
|
|
114
|
-
"
|
|
115
|
-
" description: <one-line summary of what the lesson teaches>",
|
|
116
|
-
" when_to_use: <one-line trigger that should make a caller apply it>",
|
|
117
|
-
" ---",
|
|
130
|
+
"YOUR RESPONSE MUST START EXACTLY WITH `---` ON THE VERY FIRST LINE.",
|
|
131
|
+
"DO NOT output any prose, explanation, or code fences before or after.",
|
|
118
132
|
"",
|
|
119
|
-
"
|
|
133
|
+
"Required output format — copy this structure exactly:",
|
|
134
|
+
"---",
|
|
135
|
+
"description: <one complete sentence (ending with `.`) summarising what the lesson teaches>",
|
|
136
|
+
"when_to_use: <one complete sentence describing the concrete trigger condition>",
|
|
137
|
+
"---",
|
|
120
138
|
"",
|
|
121
|
-
"
|
|
122
|
-
"
|
|
139
|
+
"<lesson body — plain markdown, 1–3 short paragraphs of practical guidance>",
|
|
140
|
+
"",
|
|
141
|
+
"## description field (MANDATORY)",
|
|
142
|
+
"- A single complete sentence in present tense, 80-200 chars, NO markdown.",
|
|
143
|
+
"- Self-contained: a reviewer must understand the lesson from this field alone.",
|
|
144
|
+
'- DO NOT start with "When ", "If ", or a connector word — that belongs in when_to_use.',
|
|
145
|
+
'- DO NOT copy a section heading ("Key takeaways", "For example", "Key pitfalls").',
|
|
146
|
+
"- DO NOT begin with a numbered list marker, code fence, or markdown heading.",
|
|
147
|
+
"",
|
|
148
|
+
'GOOD: "Always validate ref existence before promoting a memory to knowledge; missing refs surface as silent 404s during accept."',
|
|
149
|
+
'BAD: "Key pitfalls"',
|
|
150
|
+
'BAD: "When working with the akm CLI"',
|
|
151
|
+
'BAD: "For example, you might..."',
|
|
152
|
+
'BAD: "1. Check the file"',
|
|
153
|
+
"",
|
|
154
|
+
"RULES:",
|
|
155
|
+
"- `when_to_use` MUST be a complete sentence describing a concrete trigger. Never write `When working with <asset-name>` — that is circular and useless.",
|
|
156
|
+
"- `description` and `when_to_use` MUST differ from each other.",
|
|
157
|
+
"- The lesson body MUST be non-empty markdown prose. Do NOT restate `description:` or `when_to_use:` inside the body (no `**description:** ...` or `**when_to_use:** ...` lines — the frontmatter is the only place those keys belong).",
|
|
158
|
+
"- Do NOT emit a second `---` fence after the opening frontmatter — there are exactly two `---` lines in the output, both belonging to the single frontmatter block at the top.",
|
|
159
|
+
"- Do NOT reproduce the source asset verbatim — distil what a caller needs to know.",
|
|
160
|
+
"- Output ONLY the lesson file. No preamble, no code fences, no trailing prose.",
|
|
123
161
|
].join("\n");
|
|
124
162
|
const KNOWLEDGE_SYSTEM_PROMPT = [
|
|
125
163
|
"You are the akm `distill` distiller.",
|
|
126
164
|
"Given an asset and recent feedback events about it, produce a concise",
|
|
127
165
|
"*knowledge* markdown document capturing the durable, reusable facts.",
|
|
128
166
|
"Prefer stable guidance over narrative recap.",
|
|
129
|
-
"
|
|
130
|
-
"
|
|
131
|
-
"
|
|
132
|
-
"
|
|
167
|
+
"",
|
|
168
|
+
"YOUR RESPONSE MUST START EXACTLY WITH `---` ON THE VERY FIRST LINE.",
|
|
169
|
+
"DO NOT output any prose, explanation, or code fences before or after.",
|
|
170
|
+
"",
|
|
171
|
+
"Required output format:",
|
|
172
|
+
"---",
|
|
173
|
+
"description: <one-line summary of the knowledge asset>",
|
|
174
|
+
"tags: [<tag1>, <tag2>]",
|
|
175
|
+
"---",
|
|
176
|
+
"",
|
|
177
|
+
"# <Title>",
|
|
178
|
+
"",
|
|
179
|
+
"<body — structured markdown, durable facts only>",
|
|
180
|
+
"",
|
|
181
|
+
"RULES:",
|
|
182
|
+
"- `description` MUST be a non-empty single-line string.",
|
|
183
|
+
"- Include a meaningful markdown body with a `# Title` heading.",
|
|
184
|
+
"- Output ONLY the knowledge file. No preamble, no code fences, no trailing prose.",
|
|
133
185
|
].join("\n");
|
|
186
|
+
// ── Structured-output schemas (responseSchema lift) ─────────────────────────
|
|
187
|
+
//
|
|
188
|
+
// PR 1 of the asset-writers decision (see knowledge:projects/akm/
|
|
189
|
+
// asset-writers-investigation/00-synthesis): on providers that honour
|
|
190
|
+
// `response_format: json_schema`, ask the LLM for a typed JSON object and
|
|
191
|
+
// re-assemble the markdown locally. The previous "emit raw markdown with
|
|
192
|
+
// embedded frontmatter" path remains as a fallback for providers that ignore
|
|
193
|
+
// the schema (and for the `chat` test seam, which is wired to return strings
|
|
194
|
+
// today). Shape-level rejection codes — MALFORMED_FRONTMATTER_BLOCK,
|
|
195
|
+
// FRONTMATTER_NOT_OBJECT, INVALID_YAML, UNBALANCED_CODE_FENCE — become
|
|
196
|
+
// unreachable on the structured path. Content-quality validators
|
|
197
|
+
// (isValidDescription / isValidWhenToUse) keep firing post-assembly because
|
|
198
|
+
// the LLM still controls the string contents of typed fields.
|
|
199
|
+
/**
|
|
200
|
+
* JSON Schema for structured lesson distillation. Mirrors the LESSON_SYSTEM_PROMPT
|
|
201
|
+
* frontmatter contract. Required: description, when_to_use, body. Optional:
|
|
202
|
+
* tags (string array) so providers that volunteer categorisation hints survive
|
|
203
|
+
* the round-trip without being rejected as additionalProperties.
|
|
204
|
+
*/
|
|
205
|
+
export const DISTILL_LESSON_JSON_SCHEMA = {
|
|
206
|
+
type: "object",
|
|
207
|
+
required: ["description", "when_to_use", "body"],
|
|
208
|
+
additionalProperties: false,
|
|
209
|
+
properties: {
|
|
210
|
+
description: {
|
|
211
|
+
type: "string",
|
|
212
|
+
minLength: 10,
|
|
213
|
+
description: "Single complete sentence (80-200 chars) summarising what the lesson teaches. No markdown, no leading 'When'/'If'.",
|
|
214
|
+
},
|
|
215
|
+
when_to_use: {
|
|
216
|
+
type: "string",
|
|
217
|
+
minLength: 10,
|
|
218
|
+
description: "Single complete sentence describing the concrete trigger condition for the lesson.",
|
|
219
|
+
},
|
|
220
|
+
body: {
|
|
221
|
+
type: "string",
|
|
222
|
+
minLength: 1,
|
|
223
|
+
description: "Lesson body — plain markdown, 1-3 short paragraphs of practical guidance.",
|
|
224
|
+
},
|
|
225
|
+
tags: {
|
|
226
|
+
type: "array",
|
|
227
|
+
items: { type: "string" },
|
|
228
|
+
description: "Optional tag list. Empty array is allowed; the post-processor drops it if empty.",
|
|
229
|
+
},
|
|
230
|
+
},
|
|
231
|
+
};
|
|
232
|
+
/**
|
|
233
|
+
* JSON Schema for structured knowledge distillation. Mirrors the
|
|
234
|
+
* KNOWLEDGE_SYSTEM_PROMPT contract. Required: description, body. Optional:
|
|
235
|
+
* tags, sources.
|
|
236
|
+
*/
|
|
237
|
+
export const DISTILL_KNOWLEDGE_JSON_SCHEMA = {
|
|
238
|
+
type: "object",
|
|
239
|
+
required: ["description", "body"],
|
|
240
|
+
additionalProperties: false,
|
|
241
|
+
properties: {
|
|
242
|
+
description: {
|
|
243
|
+
type: "string",
|
|
244
|
+
minLength: 1,
|
|
245
|
+
description: "One-line summary of the knowledge asset.",
|
|
246
|
+
},
|
|
247
|
+
body: {
|
|
248
|
+
type: "string",
|
|
249
|
+
minLength: 1,
|
|
250
|
+
description: "Knowledge body — structured markdown with a `# Title` heading and durable facts only.",
|
|
251
|
+
},
|
|
252
|
+
tags: {
|
|
253
|
+
type: "array",
|
|
254
|
+
items: { type: "string" },
|
|
255
|
+
description: "Optional tag list. Empty array is allowed; the post-processor drops it if empty.",
|
|
256
|
+
},
|
|
257
|
+
sources: {
|
|
258
|
+
type: "array",
|
|
259
|
+
items: { type: "string" },
|
|
260
|
+
description: "Optional list of source refs the knowledge was distilled from.",
|
|
261
|
+
},
|
|
262
|
+
},
|
|
263
|
+
};
|
|
264
|
+
/**
|
|
265
|
+
* Assemble a markdown asset from a structured-output payload. Returns `null`
|
|
266
|
+
* when the payload is missing required fields — the caller then falls through
|
|
267
|
+
* to the prompt-contract markdown path. We deliberately do NOT validate
|
|
268
|
+
* content quality here (isValidDescription / isValidWhenToUse run downstream
|
|
269
|
+
* on the assembled content); this helper only catches shape-level emptiness
|
|
270
|
+
* that the schema may not have rejected (e.g. a provider that ignored
|
|
271
|
+
* `minLength` but still returned the field).
|
|
272
|
+
*/
|
|
273
|
+
export function assembleStructuredDistillMarkdown(payload, kind) {
|
|
274
|
+
if (payload === null || typeof payload !== "object")
|
|
275
|
+
return null;
|
|
276
|
+
const description = typeof payload.description === "string" ? payload.description.trim() : "";
|
|
277
|
+
const body = typeof payload.body === "string" ? payload.body.trim() : "";
|
|
278
|
+
if (description.length === 0 || body.length === 0)
|
|
279
|
+
return null;
|
|
280
|
+
const fm = { description };
|
|
281
|
+
if (kind === "lesson") {
|
|
282
|
+
const whenToUse = typeof payload.when_to_use === "string" ? payload.when_to_use.trim() : "";
|
|
283
|
+
if (whenToUse.length === 0)
|
|
284
|
+
return null;
|
|
285
|
+
fm.when_to_use = whenToUse;
|
|
286
|
+
}
|
|
287
|
+
if (Array.isArray(payload.tags)) {
|
|
288
|
+
const tags = payload.tags.filter((t) => typeof t === "string" && t.trim().length > 0);
|
|
289
|
+
if (tags.length > 0)
|
|
290
|
+
fm.tags = tags;
|
|
291
|
+
}
|
|
292
|
+
if (kind === "knowledge" && Array.isArray(payload.sources)) {
|
|
293
|
+
const sources = payload.sources.filter((s) => typeof s === "string" && s.trim().length > 0);
|
|
294
|
+
if (sources.length > 0)
|
|
295
|
+
fm.sources = sources;
|
|
296
|
+
}
|
|
297
|
+
const fmLines = Object.entries(fm)
|
|
298
|
+
.map(([k, v]) => {
|
|
299
|
+
if (Array.isArray(v))
|
|
300
|
+
return `${k}: [${v.map((s) => JSON.stringify(s)).join(", ")}]`;
|
|
301
|
+
return `${k}: ${JSON.stringify(v)}`;
|
|
302
|
+
})
|
|
303
|
+
.join("\n");
|
|
304
|
+
return assembleAssetFromString(fmLines, body);
|
|
305
|
+
}
|
|
134
306
|
function validateKnowledgeContent(content, inputRef) {
|
|
307
|
+
const findings = [];
|
|
135
308
|
const parsed = parseFrontmatter(content);
|
|
136
|
-
if (parsed.content.trim().length
|
|
137
|
-
|
|
138
|
-
return [
|
|
139
|
-
{
|
|
309
|
+
if (parsed.content.trim().length === 0) {
|
|
310
|
+
findings.push({
|
|
140
311
|
kind: "missing-body",
|
|
141
312
|
field: "body",
|
|
142
313
|
message: `Distilled knowledge for ${inputRef} must include a non-empty markdown body.`,
|
|
143
|
-
}
|
|
144
|
-
|
|
314
|
+
});
|
|
315
|
+
}
|
|
316
|
+
// Knowledge proposals don't strictly require a description, but if one is
|
|
317
|
+
// present it must be a real summary — not a placeholder like `---` or a
|
|
318
|
+
// truncated heading. Without this check, distill can land knowledge assets
|
|
319
|
+
// with `description: ---` (observed in the wild when the LLM has nothing
|
|
320
|
+
// meaningful to say about a session-checkpoint memory).
|
|
321
|
+
const fm = (parsed.data ?? {});
|
|
322
|
+
if (fm.description !== undefined) {
|
|
323
|
+
// Knowledge can legitimately mention the topic name in its description, so
|
|
324
|
+
// suppress the ref-restatement heuristic that's tuned for lesson assets.
|
|
325
|
+
const descCheck = isValidDescription(fm.description, inputRef, { skipRefTailCheck: true });
|
|
326
|
+
if (!descCheck.ok) {
|
|
327
|
+
findings.push({
|
|
328
|
+
kind: "invalid-description",
|
|
329
|
+
field: "description",
|
|
330
|
+
message: `Distilled knowledge for ${inputRef} has an invalid description: ${descCheck.reason}.`,
|
|
331
|
+
});
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
// Double-frontmatter pollution shows up in knowledge too — the LLM sometimes
|
|
335
|
+
// re-emits the source asset's frontmatter inside its own response, leaving
|
|
336
|
+
// two `---`-delimited blocks back-to-back.
|
|
337
|
+
const dfm = detectDoubleFrontmatter(content);
|
|
338
|
+
if (dfm) {
|
|
339
|
+
findings.push({
|
|
340
|
+
kind: dfm.kind,
|
|
341
|
+
field: "body",
|
|
342
|
+
message: `Distilled knowledge for ${inputRef}: ${dfm.message}`,
|
|
343
|
+
});
|
|
344
|
+
}
|
|
345
|
+
return findings;
|
|
145
346
|
}
|
|
146
|
-
/**
|
|
347
|
+
/**
|
|
348
|
+
* Pure: build the user-prompt body. Exported for tests.
|
|
349
|
+
*
|
|
350
|
+
* D-3 (#371): restructures the feedback section from raw JSON event lines into
|
|
351
|
+
* a Reflexion-style verbal contrast (`## What worked` / `## What failed`).
|
|
352
|
+
* The verbal format allows LLMs to use feedback as gradient signal rather than
|
|
353
|
+
* just metadata — capturing the +8% AlfWorld lift from arXiv:2303.11366 and
|
|
354
|
+
* the contrast-based rule-learning gain from ExpeL arXiv:2308.10144.
|
|
355
|
+
*/
|
|
147
356
|
export function buildDistillPrompt(input) {
|
|
148
357
|
const lines = [];
|
|
149
358
|
lines.push(`Asset ref: ${input.inputRef}`);
|
|
@@ -159,23 +368,128 @@ export function buildDistillPrompt(input) {
|
|
|
159
368
|
lines.push("(asset is not currently indexed; distil from feedback signal alone)");
|
|
160
369
|
}
|
|
161
370
|
lines.push("");
|
|
162
|
-
lines.push("Recent feedback events (most recent last):");
|
|
163
371
|
if (input.feedback.length === 0) {
|
|
164
|
-
lines.push("(no feedback events recorded — distil from the asset itself)");
|
|
372
|
+
lines.push("Recent feedback: (no feedback events recorded — distil from the asset itself)");
|
|
165
373
|
}
|
|
166
374
|
else {
|
|
375
|
+
// D-3 (#371): verbal contrast format for Reflexion verbal-gradient lift.
|
|
376
|
+
// Partition events into positive ("what worked") and negative ("what failed").
|
|
377
|
+
const positive = [];
|
|
378
|
+
const negative = [];
|
|
379
|
+
const neutral = [];
|
|
167
380
|
for (const event of input.feedback) {
|
|
168
|
-
const meta = event.metadata
|
|
169
|
-
|
|
381
|
+
const meta = (event.metadata ?? {});
|
|
382
|
+
const signal = typeof meta.signal === "string" ? meta.signal : undefined;
|
|
383
|
+
const reason = typeof meta.reason === "string" ? meta.reason : "";
|
|
384
|
+
const note = typeof meta.note === "string" ? meta.note : "";
|
|
385
|
+
const detail = reason || note;
|
|
386
|
+
const line = detail ? `- ${event.ts}: ${detail}` : `- ${event.ts}: feedback received`;
|
|
387
|
+
if (signal === "positive")
|
|
388
|
+
positive.push(line);
|
|
389
|
+
else if (signal === "negative")
|
|
390
|
+
negative.push(line);
|
|
391
|
+
else
|
|
392
|
+
neutral.push(`- ${event.ts} ${event.eventType}${event.metadata ? ` ${JSON.stringify(event.metadata)}` : ""}`);
|
|
393
|
+
}
|
|
394
|
+
if (positive.length > 0 || negative.length > 0) {
|
|
395
|
+
if (positive.length > 0) {
|
|
396
|
+
lines.push("## What worked");
|
|
397
|
+
for (const l of positive)
|
|
398
|
+
lines.push(l);
|
|
399
|
+
lines.push("");
|
|
400
|
+
}
|
|
401
|
+
if (negative.length > 0) {
|
|
402
|
+
lines.push("## What failed");
|
|
403
|
+
for (const l of negative)
|
|
404
|
+
lines.push(l);
|
|
405
|
+
lines.push("");
|
|
406
|
+
}
|
|
407
|
+
if (neutral.length > 0) {
|
|
408
|
+
lines.push("## Other signals");
|
|
409
|
+
for (const l of neutral)
|
|
410
|
+
lines.push(l);
|
|
411
|
+
lines.push("");
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
else {
|
|
415
|
+
// No positive/negative signals — fall back to the pre-D3 flat format for
|
|
416
|
+
// non-feedback event types (e.g. reflect_invoked, distill_invoked).
|
|
417
|
+
lines.push("Recent feedback events (most recent last):");
|
|
418
|
+
for (const event of input.feedback) {
|
|
419
|
+
const meta = event.metadata ? ` ${JSON.stringify(event.metadata)}` : "";
|
|
420
|
+
lines.push(`- ${event.ts} ${event.eventType}${meta}`);
|
|
421
|
+
}
|
|
422
|
+
lines.push("");
|
|
170
423
|
}
|
|
171
424
|
}
|
|
172
|
-
|
|
173
|
-
|
|
425
|
+
if (input.rejectedProposals && input.rejectedProposals.length > 0) {
|
|
426
|
+
lines.push("");
|
|
427
|
+
lines.push("Previously rejected proposals for this ref (Reflexion context):");
|
|
428
|
+
lines.push("The following proposals were already reviewed and rejected. " +
|
|
429
|
+
"Your new proposal MUST differ meaningfully in approach, framing, or evidence.");
|
|
430
|
+
for (const rp of input.rejectedProposals) {
|
|
431
|
+
lines.push(`- Rejection reason: ${rp.reason}`);
|
|
432
|
+
if (rp.contentPreview) {
|
|
433
|
+
lines.push(` Content preview: ${rp.contentPreview.slice(0, 200).replace(/\n/g, " ")}`);
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
if (input.proposalKind === "knowledge") {
|
|
438
|
+
lines.push("Produce the knowledge markdown file now. Start your response with `---` on the first line, followed by a `description:` field whose value is a 1-sentence summary (20–400 chars). Never use placeholder values like `---`, `tbd`, `n/a`, or a single dash. If the source has nothing meaningful to summarize, do NOT produce a proposal — return an empty response instead. The frontmatter block ends with a second `---` line; do not emit any additional `---` fences in the body.");
|
|
439
|
+
}
|
|
440
|
+
else {
|
|
441
|
+
lines.push("Produce the lesson markdown file now. Start your response with `---` on the first line, followed by `description:` and `when_to_use:` fields. Both must be real one-sentence summaries (20–400 chars) — never placeholder values like `---`, `tbd`, or `n/a`. The frontmatter block ends with a second `---` line; do not emit any additional `---` fences in the body.");
|
|
442
|
+
}
|
|
174
443
|
return lines.join("\n");
|
|
175
444
|
}
|
|
445
|
+
// ── D-4 / #390: Top-3 similar lessons retrieval ──────────────────────────────
|
|
446
|
+
/**
|
|
447
|
+
* Default implementation: use akmSearch to find top-N similar lesson assets.
|
|
448
|
+
* Returns empty array when search fails or returns no results.
|
|
449
|
+
* Requires embedding configured for semantic similarity; degrades gracefully.
|
|
450
|
+
*/
|
|
451
|
+
async function fetchTopSimilarLessons(query, n, _stashDir) {
|
|
452
|
+
try {
|
|
453
|
+
const result = await akmSearch({
|
|
454
|
+
query,
|
|
455
|
+
type: "lesson",
|
|
456
|
+
limit: n,
|
|
457
|
+
skipLogging: true,
|
|
458
|
+
eventSource: "improve",
|
|
459
|
+
});
|
|
460
|
+
const hits = result?.hits ?? [];
|
|
461
|
+
return hits
|
|
462
|
+
.filter((h) => "path" in h && typeof h.path === "string")
|
|
463
|
+
.slice(0, n)
|
|
464
|
+
.map((h) => {
|
|
465
|
+
let content = "";
|
|
466
|
+
try {
|
|
467
|
+
if (h.path && fs.existsSync(h.path)) {
|
|
468
|
+
content = fs.readFileSync(h.path, "utf8");
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
catch {
|
|
472
|
+
/* best-effort */
|
|
473
|
+
}
|
|
474
|
+
return { ref: h.ref, content };
|
|
475
|
+
});
|
|
476
|
+
}
|
|
477
|
+
catch {
|
|
478
|
+
return [];
|
|
479
|
+
}
|
|
480
|
+
}
|
|
176
481
|
// ── LLM-as-judge quality gate (P2-B) ────────────────────────────────────────
|
|
177
|
-
|
|
178
|
-
|
|
482
|
+
/**
|
|
483
|
+
* D-4 / #390: Build the LLM-as-judge prompt.
|
|
484
|
+
*
|
|
485
|
+
* When similarLessons are provided (top-3 by embedding similarity), they are
|
|
486
|
+
* included in the context so the judge can lower the score for near-duplicates.
|
|
487
|
+
* Voyager arXiv:2305.16291 — skill library admission requires similarity check
|
|
488
|
+
* against the existing library. A-MEM arXiv:2502.12110 — new notes are checked
|
|
489
|
+
* against existing notes before linking.
|
|
490
|
+
*/
|
|
491
|
+
function buildJudgePrompt(lessonContent, sourceContent, similarLessons) {
|
|
492
|
+
const lines = [
|
|
179
493
|
"You are evaluating a proposed lesson asset for an akm knowledge base.",
|
|
180
494
|
"",
|
|
181
495
|
"Score this lesson on each criterion from 1 (poor) to 5 (excellent):",
|
|
@@ -187,26 +501,51 @@ function buildJudgePrompt(lessonContent, sourceContent) {
|
|
|
187
501
|
"```",
|
|
188
502
|
sourceContent.slice(0, 2000),
|
|
189
503
|
"```",
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
"
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
504
|
+
];
|
|
505
|
+
if (similarLessons && similarLessons.length > 0) {
|
|
506
|
+
lines.push("");
|
|
507
|
+
lines.push("Existing similar lessons (top-3 by similarity). Rate lower if the proposed lesson is substantially similar to any of these:");
|
|
508
|
+
for (const sl of similarLessons) {
|
|
509
|
+
lines.push(`\nExisting lesson ref: ${sl.ref}`);
|
|
510
|
+
lines.push("```");
|
|
511
|
+
lines.push(sl.content.slice(0, 500));
|
|
512
|
+
lines.push("```");
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
lines.push("");
|
|
516
|
+
lines.push("Proposed lesson content:");
|
|
517
|
+
lines.push("```");
|
|
518
|
+
lines.push(lessonContent.slice(0, 1000));
|
|
519
|
+
lines.push("```");
|
|
520
|
+
lines.push("");
|
|
521
|
+
lines.push('Return ONLY valid JSON, no prose: {"score": <average score 1-5 as float>, "reason": "<one sentence>"}');
|
|
522
|
+
return lines.join("\n");
|
|
198
523
|
}
|
|
199
|
-
|
|
200
|
-
|
|
524
|
+
/**
|
|
525
|
+
* Run the LLM-as-judge quality gate on a proposal's content.
|
|
526
|
+
*
|
|
527
|
+
* Exported so reflect.ts can apply the same gate to reflect proposals (R-5 / #374).
|
|
528
|
+
* Gated by the flag name `lesson_quality_gate` (or its alias
|
|
529
|
+
* `proposal_quality_gate`) via {@link isLlmFeatureEnabled} — which reads
|
|
530
|
+
* `profiles.improve.default.processes.distill.qualityGate.enabled` (and the
|
|
531
|
+
* corresponding `.reflect.qualityGate.enabled` for proposals).
|
|
532
|
+
*
|
|
533
|
+
* Fail-open: returns `pass: true` on timeout, parse failure, or missing LLM.
|
|
534
|
+
*/
|
|
535
|
+
export async function runLessonQualityJudge(config, lessonContent, sourceContent, chat,
|
|
536
|
+
/** D-4 / #390: top-3 similar existing lessons for dedup check. */
|
|
537
|
+
similarLessons) {
|
|
538
|
+
const llmConfig = getDefaultLlmConfig(config);
|
|
539
|
+
if (!llmConfig) {
|
|
201
540
|
return { pass: true, score: -1, reason: "no LLM configured — passing through" };
|
|
202
541
|
}
|
|
203
|
-
const judgeLlmConfig =
|
|
542
|
+
const judgeLlmConfig = llmConfig.judgeModel ? { ...llmConfig, model: llmConfig.judgeModel } : llmConfig;
|
|
204
543
|
const JUDGE_TIMEOUT_MS = 8_000;
|
|
205
544
|
try {
|
|
206
545
|
const raw = await Promise.race([
|
|
207
546
|
chat(judgeLlmConfig, [
|
|
208
547
|
{ role: "system", content: "Return only valid JSON. No prose." },
|
|
209
|
-
{ role: "user", content: buildJudgePrompt(lessonContent, sourceContent) },
|
|
548
|
+
{ role: "user", content: buildJudgePrompt(lessonContent, sourceContent, similarLessons) },
|
|
210
549
|
]),
|
|
211
550
|
new Promise((_, reject) => setTimeout(() => reject(new Error("judge timeout")), JUDGE_TIMEOUT_MS)),
|
|
212
551
|
]);
|
|
@@ -214,7 +553,20 @@ async function runLessonQualityJudge(config, lessonContent, sourceContent, chat)
|
|
|
214
553
|
if (!parsed || typeof parsed.score !== "number") {
|
|
215
554
|
return { pass: true, score: -1, reason: "judge parse failed — passing through" };
|
|
216
555
|
}
|
|
217
|
-
|
|
556
|
+
// D-5 / #388: Three-band system (MT-Bench arXiv:2306.05685 — ~±0.5 judge variance).
|
|
557
|
+
// >= 3.5: auto-queue as pending (pass: true)
|
|
558
|
+
// 2.5–3.5: review-needed band — uncertain, escalate to human (reviewNeeded: true)
|
|
559
|
+
// < 2.5: auto-reject (pass: false)
|
|
560
|
+
const score = parsed.score;
|
|
561
|
+
const reason = parsed.reason ?? "";
|
|
562
|
+
if (score >= 3.5) {
|
|
563
|
+
return { pass: true, score, reason };
|
|
564
|
+
}
|
|
565
|
+
if (score >= 2.5) {
|
|
566
|
+
// Uncertainty band: treat as failed for auto-queuing but flag for review.
|
|
567
|
+
return { pass: false, score, reason, reviewNeeded: true };
|
|
568
|
+
}
|
|
569
|
+
return { pass: false, score, reason };
|
|
218
570
|
}
|
|
219
571
|
catch {
|
|
220
572
|
return { pass: true, score: -1, reason: "judge failed — passing through" };
|
|
@@ -234,15 +586,17 @@ async function runLessonQualityJudge(config, lessonContent, sourceContent, chat)
|
|
|
234
586
|
* @param extraMeta - Optional additional metadata for the event.
|
|
235
587
|
*/
|
|
236
588
|
function writeQualityRejection(stash, inputRef, lessonRef, content, score, reason, extraMeta = {}) {
|
|
589
|
+
// D-5 / #388: reviewNeeded flag selects "review_needed" vs "quality_rejected" outcome.
|
|
590
|
+
const outcome = extraMeta.reviewNeeded ? "review_needed" : "quality_rejected";
|
|
237
591
|
const rejectDir = path.join(stash, ".akm", "distill-rejected");
|
|
238
592
|
fs.mkdirSync(rejectDir, { recursive: true });
|
|
239
593
|
const ts = timestampForFilename();
|
|
240
|
-
fs.writeFileSync(path.join(rejectDir, `${ts}-${lessonRef}.md`), `---\nscore: ${score}\nreason: ${reason}\n---\n\n${content}`, "utf8");
|
|
594
|
+
fs.writeFileSync(path.join(rejectDir, `${ts}-${lessonRef}.md`), `---\nscore: ${score}\nreason: ${reason}\noutcome: ${outcome}\n---\n\n${content}`, "utf8");
|
|
241
595
|
appendEvent({
|
|
242
596
|
eventType: "distill_invoked",
|
|
243
597
|
ref: inputRef,
|
|
244
598
|
metadata: {
|
|
245
|
-
outcome
|
|
599
|
+
outcome,
|
|
246
600
|
lessonRef,
|
|
247
601
|
score,
|
|
248
602
|
reason,
|
|
@@ -252,7 +606,7 @@ function writeQualityRejection(stash, inputRef, lessonRef, content, score, reaso
|
|
|
252
606
|
return {
|
|
253
607
|
schemaVersion: 1,
|
|
254
608
|
ok: true,
|
|
255
|
-
outcome
|
|
609
|
+
outcome,
|
|
256
610
|
inputRef,
|
|
257
611
|
lessonRef,
|
|
258
612
|
score,
|
|
@@ -272,13 +626,47 @@ export async function akmDistill(options) {
|
|
|
272
626
|
throw new UsageError("Asset ref is required. Usage: akm distill <ref>", "MISSING_REQUIRED_ARGUMENT");
|
|
273
627
|
}
|
|
274
628
|
// Validate the ref shape up front so a typo never reaches the LLM.
|
|
275
|
-
parseAssetRef(inputRef);
|
|
629
|
+
const parsedInputRef = parseAssetRef(inputRef);
|
|
276
630
|
const targetKind = options.proposalKind ?? "lesson";
|
|
631
|
+
// Recursive-distillation guard. Distill produces *lessons* from non-lesson
|
|
632
|
+
// sources (memory, skill, knowledge, etc.). Calling distill on an existing
|
|
633
|
+
// lesson would derive `lesson:lesson-<name>-lesson-lesson` (double `-lesson`
|
|
634
|
+
// suffix) and route a "lesson of a lesson" through the proposal queue —
|
|
635
|
+
// observed in 323 reviewed archived proposals as the recursive-ref defect.
|
|
636
|
+
// Refuse the input here so the improve loop (or other callers) get a clean
|
|
637
|
+
// skipped outcome instead of producing nonsense refs.
|
|
638
|
+
//
|
|
639
|
+
// The refused-type set is exported as {@link DISTILL_REFUSED_INPUT_TYPES} so
|
|
640
|
+
// the improve planner can skip these refs before queuing distill attempts;
|
|
641
|
+
// this runtime check stays as a defensive backstop for direct callers.
|
|
642
|
+
if (isDistillRefusedInputType(parsedInputRef.type)) {
|
|
643
|
+
const skippedRef = `lesson:${parsedInputRef.name}`;
|
|
644
|
+
appendEvent({
|
|
645
|
+
eventType: "distill_invoked",
|
|
646
|
+
ref: inputRef,
|
|
647
|
+
metadata: {
|
|
648
|
+
outcome: "skipped",
|
|
649
|
+
lessonRef: skippedRef,
|
|
650
|
+
message: "distill refuses lesson inputs — lessons are the distilled form, not a source",
|
|
651
|
+
skipReason: "recursive_lesson_input",
|
|
652
|
+
},
|
|
653
|
+
});
|
|
654
|
+
return {
|
|
655
|
+
schemaVersion: 1,
|
|
656
|
+
ok: true,
|
|
657
|
+
outcome: "skipped",
|
|
658
|
+
inputRef,
|
|
659
|
+
lessonRef: skippedRef,
|
|
660
|
+
message: "Distill refuses lesson inputs — lessons are the distilled form, not a source.",
|
|
661
|
+
};
|
|
662
|
+
}
|
|
277
663
|
const config = options.config ?? loadConfig();
|
|
278
664
|
const stash = options.stashDir ?? resolveStashDir();
|
|
279
665
|
const chat = options.chat ?? chatCompletion;
|
|
280
666
|
const lookup = options.lookupFn ?? defaultLookup;
|
|
281
667
|
const readEventsImpl = options.readEventsFn ?? readEvents;
|
|
668
|
+
// D-4 / #390: similar-lessons retrieval seam (test-injectable).
|
|
669
|
+
const fetchSimilarLessonsFn = options.fetchSimilarLessonsFn ?? ((query, n) => fetchTopSimilarLessons(query, n, options.stashDir));
|
|
282
670
|
// Best-effort load: when the asset is not yet indexed we still proceed —
|
|
283
671
|
// the LLM is asked to distil from "available signal" (feedback alone).
|
|
284
672
|
let assetContent = null;
|
|
@@ -324,33 +712,161 @@ export async function akmDistill(options) {
|
|
|
324
712
|
})),
|
|
325
713
|
});
|
|
326
714
|
if (promotion?.promote && promotion.content && (targetKind === "knowledge" || targetKind === "auto")) {
|
|
715
|
+
// D-1 / #369: When the destination knowledge file already exists, route
|
|
716
|
+
// through the LLM for contradiction resolution instead of silently
|
|
717
|
+
// overwriting. Follows mem0 ADD/UPDATE/DELETE/NOOP pattern (arXiv:2504.19413 §3.2)
|
|
718
|
+
// and A-MEM dynamic linking (arXiv:2502.12110).
|
|
719
|
+
let resolvedPromotionContent = promotion.content;
|
|
720
|
+
const existingKnowledgePath = await lookup(promotion.knowledgeRef);
|
|
721
|
+
const existingKnowledgeContent = existingKnowledgePath && fs.existsSync(existingKnowledgePath)
|
|
722
|
+
? (() => {
|
|
723
|
+
try {
|
|
724
|
+
return fs.readFileSync(existingKnowledgePath, "utf8");
|
|
725
|
+
}
|
|
726
|
+
catch {
|
|
727
|
+
return null;
|
|
728
|
+
}
|
|
729
|
+
})()
|
|
730
|
+
: null;
|
|
731
|
+
if (existingKnowledgeContent && config && getDefaultLlmConfig(config)) {
|
|
732
|
+
// Existing content found: call LLM for contradiction-resolution merge.
|
|
733
|
+
const mergePrompt = [
|
|
734
|
+
"You are merging two versions of a knowledge document.",
|
|
735
|
+
"Existing content is already committed; new content comes from a memory distillation run.",
|
|
736
|
+
"Choose one of: ADD (combine both), UPDATE (replace existing with new), NOOP (keep existing unchanged).",
|
|
737
|
+
'Return ONLY valid JSON: {"action": "ADD"|"UPDATE"|"NOOP", "content": "<merged markdown if ADD/UPDATE, empty string if NOOP>"}',
|
|
738
|
+
"",
|
|
739
|
+
"## Existing knowledge content",
|
|
740
|
+
"```",
|
|
741
|
+
existingKnowledgeContent.slice(0, 3000),
|
|
742
|
+
"```",
|
|
743
|
+
"",
|
|
744
|
+
"## New content from distillation",
|
|
745
|
+
"```",
|
|
746
|
+
promotion.content.slice(0, 3000),
|
|
747
|
+
"```",
|
|
748
|
+
].join("\n");
|
|
749
|
+
try {
|
|
750
|
+
const mergeLlm = getDefaultLlmConfig(config);
|
|
751
|
+
if (!mergeLlm) {
|
|
752
|
+
throw new ConfigError("LLM is not configured for distillation merge.", "LLM_NOT_CONFIGURED");
|
|
753
|
+
}
|
|
754
|
+
const mergeResponse = await chat(mergeLlm, [
|
|
755
|
+
{ role: "system", content: "Return only valid JSON. No prose." },
|
|
756
|
+
{ role: "user", content: mergePrompt },
|
|
757
|
+
]);
|
|
758
|
+
const mergeResult = parseEmbeddedJsonResponse(mergeResponse);
|
|
759
|
+
if (mergeResult?.action === "NOOP") {
|
|
760
|
+
// Existing content is authoritative — no update needed.
|
|
761
|
+
appendEvent({
|
|
762
|
+
eventType: "distill_invoked",
|
|
763
|
+
ref: inputRef,
|
|
764
|
+
metadata: {
|
|
765
|
+
outcome: "skipped",
|
|
766
|
+
lessonRef: promotion.knowledgeRef,
|
|
767
|
+
message: "D-1: LLM resolved destination conflict as NOOP — existing content kept",
|
|
768
|
+
},
|
|
769
|
+
});
|
|
770
|
+
return {
|
|
771
|
+
schemaVersion: 1,
|
|
772
|
+
ok: true,
|
|
773
|
+
outcome: "skipped",
|
|
774
|
+
inputRef,
|
|
775
|
+
lessonRef: promotion.knowledgeRef,
|
|
776
|
+
message: "Existing knowledge content unchanged (contradiction resolution: NOOP)",
|
|
777
|
+
};
|
|
778
|
+
}
|
|
779
|
+
if (mergeResult?.action && (mergeResult.action === "ADD" || mergeResult.action === "UPDATE")) {
|
|
780
|
+
if (mergeResult.content?.trim()) {
|
|
781
|
+
resolvedPromotionContent = mergeResult.content;
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
}
|
|
785
|
+
catch {
|
|
786
|
+
// LLM merge failed — fall through with the original promotion content.
|
|
787
|
+
// The reviewer will see both versions in the proposal diff.
|
|
788
|
+
}
|
|
789
|
+
}
|
|
790
|
+
else if (existingKnowledgeContent && config && !getDefaultLlmConfig(config)) {
|
|
791
|
+
// No LLM configured: include existing content as context in the proposal
|
|
792
|
+
// so the reviewer can do the contradiction resolution manually.
|
|
793
|
+
resolvedPromotionContent = [
|
|
794
|
+
promotion.content,
|
|
795
|
+
"",
|
|
796
|
+
"---",
|
|
797
|
+
"<!-- D-1 / #369: Existing knowledge content is shown below for reviewer reference. -->",
|
|
798
|
+
"<!-- Review: decide whether to ADD (merge), UPDATE (replace), or NOOP (keep existing). -->",
|
|
799
|
+
"",
|
|
800
|
+
"## Existing content (for reviewer reference)",
|
|
801
|
+
"",
|
|
802
|
+
existingKnowledgeContent,
|
|
803
|
+
].join("\n");
|
|
804
|
+
}
|
|
327
805
|
// Apply quality gate to fast-path knowledge promotion (Risk 4 fix).
|
|
806
|
+
// D-5 / #388: Three-band system — review_needed band queues to proposal
|
|
807
|
+
// queue with review_needed outcome rather than auto-rejecting.
|
|
808
|
+
let knowledgeJudgeConfidence;
|
|
328
809
|
if (isLlmFeatureEnabled(config, "lesson_quality_gate")) {
|
|
329
|
-
|
|
810
|
+
// D-4 / #390: retrieve top-3 similar lessons for dedup check in judge.
|
|
811
|
+
const similarLessons = await fetchSimilarLessonsFn(resolvedPromotionContent.slice(0, 500), 3);
|
|
812
|
+
const judgeResult = await runLessonQualityJudge(config, resolvedPromotionContent, assetContent ?? "", chat, similarLessons.length > 0 ? similarLessons : undefined);
|
|
330
813
|
if (!judgeResult.pass) {
|
|
331
|
-
|
|
814
|
+
if (judgeResult.reviewNeeded) {
|
|
815
|
+
// Uncertainty band (2.5–3.5): queue as review_needed instead of rejecting.
|
|
816
|
+
return writeQualityRejection(stash, inputRef, promotion.knowledgeRef, resolvedPromotionContent, judgeResult.score, judgeResult.reason, { reviewNeeded: true });
|
|
817
|
+
}
|
|
818
|
+
return writeQualityRejection(stash, inputRef, promotion.knowledgeRef, resolvedPromotionContent, judgeResult.score, judgeResult.reason);
|
|
332
819
|
}
|
|
820
|
+
// Normalize 1-5 judge score to [0, 1]. Score of -1 means pass-through
|
|
821
|
+
// (no LLM / timeout / parse failure) — leave confidence undefined so
|
|
822
|
+
// the auto-accept gate treats the proposal as unscored and skips it.
|
|
823
|
+
if (judgeResult.score > 0)
|
|
824
|
+
knowledgeJudgeConfidence = judgeResult.score / 5;
|
|
333
825
|
}
|
|
334
|
-
const knowledgeParsed = parseFrontmatter(
|
|
335
|
-
const
|
|
826
|
+
const knowledgeParsed = parseFrontmatter(resolvedPromotionContent);
|
|
827
|
+
const proposalResult = createProposal(stash, {
|
|
336
828
|
ref: promotion.knowledgeRef,
|
|
337
829
|
source: "distill",
|
|
338
830
|
...(options.sourceRun !== undefined ? { sourceRun: options.sourceRun } : {}),
|
|
339
831
|
payload: {
|
|
340
|
-
content:
|
|
832
|
+
content: resolvedPromotionContent,
|
|
341
833
|
...(Object.keys(knowledgeParsed.data).length > 0 ? { frontmatter: knowledgeParsed.data } : {}),
|
|
342
834
|
},
|
|
835
|
+
...(knowledgeJudgeConfidence !== undefined ? { confidence: knowledgeJudgeConfidence } : {}),
|
|
343
836
|
}, options.ctx);
|
|
837
|
+
if (isProposalSkipped(proposalResult)) {
|
|
838
|
+
appendEvent({
|
|
839
|
+
eventType: "distill_invoked",
|
|
840
|
+
ref: inputRef,
|
|
841
|
+
metadata: {
|
|
842
|
+
outcome: "skipped",
|
|
843
|
+
lessonRef: promotion.knowledgeRef,
|
|
844
|
+
message: proposalResult.message,
|
|
845
|
+
skipReason: proposalResult.reason,
|
|
846
|
+
},
|
|
847
|
+
});
|
|
848
|
+
return {
|
|
849
|
+
schemaVersion: 1,
|
|
850
|
+
ok: true,
|
|
851
|
+
outcome: "skipped",
|
|
852
|
+
inputRef,
|
|
853
|
+
lessonRef: promotion.knowledgeRef,
|
|
854
|
+
message: proposalResult.message,
|
|
855
|
+
};
|
|
856
|
+
}
|
|
857
|
+
const proposal = proposalResult;
|
|
344
858
|
appendEvent({
|
|
345
859
|
eventType: "distill_invoked",
|
|
346
860
|
ref: inputRef,
|
|
347
|
-
metadata:
|
|
861
|
+
metadata: {
|
|
862
|
+
outcome: "queued",
|
|
863
|
+
lessonRef: promotion.knowledgeRef,
|
|
348
864
|
proposalRef: promotion.knowledgeRef,
|
|
349
865
|
proposalKind: "knowledge",
|
|
350
866
|
proposalId: proposal.id,
|
|
351
867
|
...(options.sourceRun !== undefined ? { sourceRun: options.sourceRun } : {}),
|
|
352
868
|
...(exclusionSet.size > 0 ? { filteredFeedbackCount } : {}),
|
|
353
|
-
}
|
|
869
|
+
},
|
|
354
870
|
});
|
|
355
871
|
return {
|
|
356
872
|
schemaVersion: 1,
|
|
@@ -367,51 +883,244 @@ export async function akmDistill(options) {
|
|
|
367
883
|
}
|
|
368
884
|
const effectiveProposalKind = targetKind === "knowledge" ? "knowledge" : "lesson";
|
|
369
885
|
const effectiveLessonRef = effectiveProposalKind === "knowledge" ? deriveKnowledgeRef(inputRef) : deriveLessonRef(inputRef);
|
|
370
|
-
|
|
886
|
+
// Inject last 1–3 rejected proposals for this ref as Reflexion-style
|
|
887
|
+
// verbal-RL context so the LLM avoids regenerating refused proposals.
|
|
888
|
+
const MAX_REJECTED_PROPOSALS = 3;
|
|
889
|
+
const rejectedForRef = listProposals(stash, { ref: inputRef, status: "rejected", includeArchive: true })
|
|
890
|
+
.sort((a, b) => new Date(b.updatedAt ?? 0).getTime() - new Date(a.updatedAt ?? 0).getTime())
|
|
891
|
+
.slice(0, MAX_REJECTED_PROPOSALS)
|
|
892
|
+
.map((p) => ({
|
|
893
|
+
reason: p.review?.reason ?? "no reason given",
|
|
894
|
+
contentPreview: p.payload.content.slice(0, 500),
|
|
895
|
+
}));
|
|
896
|
+
const userPrompt = buildDistillPrompt({
|
|
897
|
+
inputRef,
|
|
898
|
+
assetContent,
|
|
899
|
+
feedback,
|
|
900
|
+
proposalKind: effectiveProposalKind,
|
|
901
|
+
...(rejectedForRef.length > 0 ? { rejectedProposals: rejectedForRef } : {}),
|
|
902
|
+
});
|
|
371
903
|
const messages = [
|
|
372
904
|
{ role: "system", content: effectiveProposalKind === "knowledge" ? KNOWLEDGE_SYSTEM_PROMPT : LESSON_SYSTEM_PROMPT },
|
|
373
905
|
{ role: "user", content: userPrompt },
|
|
374
906
|
];
|
|
375
|
-
// Single bounded LLM call. The wrapper handles the gate-check,
|
|
376
|
-
// timeout, and error fallback (returning `null`).
|
|
377
|
-
|
|
378
|
-
|
|
907
|
+
// Single bounded LLM call. The wrapper handles the gate-check, 600s
|
|
908
|
+
// (10 min) default timeout, and error fallback (returning `null`).
|
|
909
|
+
//
|
|
910
|
+
// Capture the fallback reason so we can distinguish "config gate is off"
|
|
911
|
+
// (no LLM was called — operator action required) from "LLM call was made
|
|
912
|
+
// but returned no usable output" (transport/timeout/empty — observability).
|
|
913
|
+
// The previous conflated message ("disabled or the LLM call failed") gave
|
|
914
|
+
// operators no signal to act on; a 108-run audit found 100% of skipped
|
|
915
|
+
// outcomes were actually the config-gate-off branch.
|
|
916
|
+
//
|
|
917
|
+
// responseSchema lift (PR 1, asset-writers-investigation §5): on the
|
|
918
|
+
// production path (no test `chat` seam) we pass the lesson/knowledge JSON
|
|
919
|
+
// schema to `chatCompletion`. Providers with `supportsJsonSchema: true`
|
|
920
|
+
// return a typed JSON object the post-call code re-assembles into markdown,
|
|
921
|
+
// bypassing the four shape-level rejection codes the validator log catches.
|
|
922
|
+
// The test seam keeps its two-arg signature, so injected fakes still pin
|
|
923
|
+
// markdown responses verbatim and the existing assertion suite is unchanged.
|
|
924
|
+
const distillSchema = effectiveProposalKind === "knowledge" ? DISTILL_KNOWLEDGE_JSON_SCHEMA : DISTILL_LESSON_JSON_SCHEMA;
|
|
925
|
+
let fallbackReason;
|
|
926
|
+
const raw = await tryLlmFeature("distill", config, async () => {
|
|
927
|
+
const distillLlm = getDefaultLlmConfig(config);
|
|
928
|
+
if (!distillLlm) {
|
|
379
929
|
// No LLM connection configured — treat as gate-disabled. Throwing
|
|
380
930
|
// here lets `tryLlmFeature` route us through the "error" fallback,
|
|
381
931
|
// which is the same graceful skipped path.
|
|
382
|
-
throw new ConfigError("No LLM connection configured. Set `llm
|
|
932
|
+
throw new ConfigError("No LLM connection configured. Set `defaults.llm` and a profile under `profiles.llm`.", "LLM_NOT_CONFIGURED");
|
|
383
933
|
}
|
|
384
|
-
|
|
934
|
+
// Production path: pass the JSON schema so providers that honour
|
|
935
|
+
// `response_format: json_schema` enforce shape upstream. Providers that
|
|
936
|
+
// ignore the option fall through to the prompt-contract markdown path.
|
|
937
|
+
if (options.chat === undefined) {
|
|
938
|
+
return chatCompletion(distillLlm, messages, { responseSchema: distillSchema });
|
|
939
|
+
}
|
|
940
|
+
// Test seam: preserve the two-arg signature so existing fake `chat`
|
|
941
|
+
// functions (which return markdown strings) continue to work.
|
|
942
|
+
return chat(distillLlm, messages);
|
|
385
943
|
}, null, {
|
|
386
944
|
onFallback: (evt) => {
|
|
945
|
+
fallbackReason = evt.reason;
|
|
387
946
|
// Log the fallback reason; the caller (raw === null path) handles
|
|
388
947
|
// emitting the distill_invoked event so we don't double-emit here.
|
|
389
948
|
warnVerbose(`[akm] LLM fallback for ${evt.feature}: ${evt.reason}`);
|
|
390
949
|
},
|
|
391
950
|
});
|
|
392
951
|
if (raw === null || raw.trim() === "") {
|
|
952
|
+
// Distinguish "config gate disabled" from "LLM call failed". For the
|
|
953
|
+
// config-disabled branch, we ALSO suppress the `distill_invoked` event
|
|
954
|
+
// because no LLM work was actually invoked — emitting the event causes
|
|
955
|
+
// the planner to accumulate phantom invocations that drown out real
|
|
956
|
+
// signal.
|
|
957
|
+
if (fallbackReason === "disabled") {
|
|
958
|
+
return {
|
|
959
|
+
schemaVersion: 1,
|
|
960
|
+
ok: true,
|
|
961
|
+
outcome: "config_disabled",
|
|
962
|
+
inputRef,
|
|
963
|
+
lessonRef: effectiveLessonRef,
|
|
964
|
+
proposalRef: effectiveLessonRef,
|
|
965
|
+
proposalKind: effectiveProposalKind,
|
|
966
|
+
message: "distill is disabled in config; enable processes.distill.enabled to activate.",
|
|
967
|
+
...(exclusionSet.size > 0 ? { filteredFeedbackCount, feedbackFullyFiltered } : {}),
|
|
968
|
+
};
|
|
969
|
+
}
|
|
970
|
+
// LLM was actually invoked but produced nothing usable (transport error,
|
|
971
|
+
// timeout, or empty/whitespace response). Emit the event so the failure
|
|
972
|
+
// is observable.
|
|
393
973
|
appendEvent({
|
|
394
974
|
eventType: "distill_invoked",
|
|
395
975
|
ref: inputRef,
|
|
396
|
-
metadata:
|
|
976
|
+
metadata: {
|
|
977
|
+
outcome: "llm_failed",
|
|
978
|
+
lessonRef: effectiveLessonRef,
|
|
397
979
|
proposalKind: effectiveProposalKind,
|
|
398
980
|
...(exclusionSet.size > 0 ? { filteredFeedbackCount } : {}),
|
|
399
|
-
}
|
|
981
|
+
},
|
|
400
982
|
});
|
|
401
983
|
return {
|
|
402
984
|
schemaVersion: 1,
|
|
403
985
|
ok: true,
|
|
404
|
-
outcome: "
|
|
986
|
+
outcome: "llm_failed",
|
|
405
987
|
inputRef,
|
|
406
988
|
lessonRef: effectiveLessonRef,
|
|
407
989
|
proposalRef: effectiveLessonRef,
|
|
408
990
|
proposalKind: effectiveProposalKind,
|
|
409
|
-
message: "
|
|
991
|
+
message: "LLM call returned no usable output (timeout, empty, or error).",
|
|
410
992
|
...(exclusionSet.size > 0 ? { filteredFeedbackCount, feedbackFullyFiltered } : {}),
|
|
411
993
|
};
|
|
412
994
|
}
|
|
413
|
-
//
|
|
414
|
-
|
|
995
|
+
// Structured-output path: when the provider honoured the JSON schema, `raw`
|
|
996
|
+
// is a JSON object string (not a markdown blob). Try to parse it and assemble
|
|
997
|
+
// the canonical `---\nfm\n---\n\nbody` form before falling through to the
|
|
998
|
+
// legacy markdown pipeline. Failure here (non-JSON response, missing
|
|
999
|
+
// required field, unexpected types) is non-fatal — we drop down to the
|
|
1000
|
+
// markdown path which has its own auto-repair + lint pass.
|
|
1001
|
+
let content;
|
|
1002
|
+
const structuredCandidate = parseEmbeddedJsonResponse(raw);
|
|
1003
|
+
const structuredAssembled = structuredCandidate && !Array.isArray(structuredCandidate)
|
|
1004
|
+
? assembleStructuredDistillMarkdown(structuredCandidate, effectiveProposalKind)
|
|
1005
|
+
: null;
|
|
1006
|
+
if (structuredAssembled !== null) {
|
|
1007
|
+
content = structuredAssembled;
|
|
1008
|
+
}
|
|
1009
|
+
else {
|
|
1010
|
+
// Strip any stray fence the LLM might have added around the markdown.
|
|
1011
|
+
content = stripMarkdownFences(raw);
|
|
1012
|
+
}
|
|
1013
|
+
// Auto-repair missing frontmatter fields before hard-failing. Small models
|
|
1014
|
+
// frequently produce a good lesson body but omit the YAML header entirely.
|
|
1015
|
+
// Rather than discarding valid content, we extract description/when_to_use
|
|
1016
|
+
// from the body and prepend the required frontmatter block.
|
|
1017
|
+
//
|
|
1018
|
+
// IMPORTANT: We do NOT synthesise placeholder strings here. If the body
|
|
1019
|
+
// does not contain text that passes the post-LLM validators
|
|
1020
|
+
// (`isValidDescription` / `isValidWhenToUse`), we leave the field missing
|
|
1021
|
+
// and let the lesson lint reject the proposal as `validation_failed`.
|
|
1022
|
+
// Emitting placeholders like `"Lesson distilled from <ref>"` or
|
|
1023
|
+
// `"When working with <slug>"` is what produced the systematic broken
|
|
1024
|
+
// proposals observed across 323 archived rejections.
|
|
1025
|
+
if (effectiveProposalKind !== "knowledge") {
|
|
1026
|
+
const parsed = parseFrontmatter(content);
|
|
1027
|
+
const fm = (parsed.data ?? {});
|
|
1028
|
+
const missingDesc = typeof fm.description !== "string" || !fm.description.trim();
|
|
1029
|
+
const missingWtu = typeof fm.when_to_use !== "string" || !fm.when_to_use.trim();
|
|
1030
|
+
if (missingDesc || missingWtu) {
|
|
1031
|
+
const body = parsed.content.trim();
|
|
1032
|
+
// Strip markdown formatting tokens from a line so extracted text is clean.
|
|
1033
|
+
const stripMd = (l) => l
|
|
1034
|
+
.replace(/\*\*([^*]+)\*\*/g, "$1")
|
|
1035
|
+
.replace(/\*([^*]+)\*/g, "$1")
|
|
1036
|
+
.replace(/`([^`]+)`/g, "$1")
|
|
1037
|
+
.replace(/^[#*\->_]+\s*/, "")
|
|
1038
|
+
.replace(/:\s*$/, "")
|
|
1039
|
+
.trim();
|
|
1040
|
+
// Skip lines that look like YAML field assignments (key: value) or frontmatter delimiters.
|
|
1041
|
+
// These appear when the LLM leaks frontmatter content into the body, causing
|
|
1042
|
+
// auto-repair to produce description: "description: Key Takeaways".
|
|
1043
|
+
const isYamlLike = (l) => /^---/.test(l) || /^[a-z_]+:\s/i.test(l);
|
|
1044
|
+
const bodyLines = body.split("\n").map(stripMd);
|
|
1045
|
+
// Extract description: first body line that BOTH looks like prose AND
|
|
1046
|
+
// passes isValidDescription. If nothing qualifies, leave the field
|
|
1047
|
+
// missing — the lint pass will reject the proposal cleanly.
|
|
1048
|
+
let descLine;
|
|
1049
|
+
for (const l of bodyLines) {
|
|
1050
|
+
if (isYamlLike(l))
|
|
1051
|
+
continue;
|
|
1052
|
+
if (l.length <= 10 || l.length >= 400)
|
|
1053
|
+
continue;
|
|
1054
|
+
if (isValidDescription(l, inputRef).ok) {
|
|
1055
|
+
descLine = l;
|
|
1056
|
+
break;
|
|
1057
|
+
}
|
|
1058
|
+
}
|
|
1059
|
+
// Extract when_to_use: a line starting with "When" / "Use when" / "Apply when"
|
|
1060
|
+
// that ALSO passes isValidWhenToUse (rejects circular fallbacks).
|
|
1061
|
+
let wtuLine;
|
|
1062
|
+
for (const l of bodyLines) {
|
|
1063
|
+
if (!/^(when |use when|apply when)/i.test(l))
|
|
1064
|
+
continue;
|
|
1065
|
+
if (l.length >= 400)
|
|
1066
|
+
continue;
|
|
1067
|
+
if (isValidWhenToUse(l, inputRef).ok) {
|
|
1068
|
+
wtuLine = l;
|
|
1069
|
+
break;
|
|
1070
|
+
}
|
|
1071
|
+
}
|
|
1072
|
+
const repairedFm = {
|
|
1073
|
+
...fm,
|
|
1074
|
+
...(missingDesc && descLine ? { description: descLine } : {}),
|
|
1075
|
+
...(missingWtu && wtuLine ? { when_to_use: wtuLine } : {}),
|
|
1076
|
+
};
|
|
1077
|
+
const fmLines = Object.entries(repairedFm)
|
|
1078
|
+
.map(([k, v]) => `${k}: ${JSON.stringify(v)}`)
|
|
1079
|
+
.join("\n");
|
|
1080
|
+
// Only rewrite content if we actually have at least one field to write.
|
|
1081
|
+
// Otherwise leave the original content for the lint pass to reject.
|
|
1082
|
+
if (Object.keys(repairedFm).length > 0) {
|
|
1083
|
+
content = assembleAssetFromString(fmLines, body);
|
|
1084
|
+
}
|
|
1085
|
+
}
|
|
1086
|
+
}
|
|
1087
|
+
// Description ↔ when_to_use auto-swap normalization (recover ~93% of
|
|
1088
|
+
// qwen-9b's `^when\b/i` rejections at zero LLM cost). When the LLM emits
|
|
1089
|
+
// a conditional-framed description ("When X happens, do Y") and the
|
|
1090
|
+
// when_to_use field looks like a declarative description (or is empty),
|
|
1091
|
+
// the two fields are mis-fielded — exactly what `isValidDescription`'s
|
|
1092
|
+
// error message says ("that pattern belongs in when_to_use"). We swap
|
|
1093
|
+
// them and revalidate; the swap is committed only if BOTH fields pass
|
|
1094
|
+
// their respective validators afterwards. If revalidation still fails,
|
|
1095
|
+
// we fall through to the existing reject path.
|
|
1096
|
+
let descriptionSwapped = 0;
|
|
1097
|
+
if (effectiveProposalKind !== "knowledge") {
|
|
1098
|
+
const parsedSwap = parseFrontmatter(content);
|
|
1099
|
+
const fmSwap = (parsedSwap.data ?? {});
|
|
1100
|
+
const descRaw = typeof fmSwap.description === "string" ? fmSwap.description.trim() : "";
|
|
1101
|
+
const wtuRaw = typeof fmSwap.when_to_use === "string" ? fmSwap.when_to_use.trim() : "";
|
|
1102
|
+
const descStartsConditional = /^(when|if)\b/i.test(descRaw);
|
|
1103
|
+
const wtuStartsConditional = /^(when|if)\b/i.test(wtuRaw);
|
|
1104
|
+
if (descStartsConditional && !wtuStartsConditional && wtuRaw.length > 0) {
|
|
1105
|
+
// Try the swap and revalidate. The when_to_use validator requires the
|
|
1106
|
+
// value not match `/^when working with\b/i` (the circular fallback) —
|
|
1107
|
+
// a real description rarely does, so this usually passes.
|
|
1108
|
+
const swappedDescCheck = isValidDescription(wtuRaw, inputRef);
|
|
1109
|
+
const swappedWtuCheck = isValidWhenToUse(descRaw, inputRef);
|
|
1110
|
+
if (swappedDescCheck.ok && swappedWtuCheck.ok) {
|
|
1111
|
+
const swappedFm = {
|
|
1112
|
+
...fmSwap,
|
|
1113
|
+
description: wtuRaw,
|
|
1114
|
+
when_to_use: descRaw,
|
|
1115
|
+
};
|
|
1116
|
+
const swappedFmLines = Object.entries(swappedFm)
|
|
1117
|
+
.map(([k, v]) => `${k}: ${JSON.stringify(v)}`)
|
|
1118
|
+
.join("\n");
|
|
1119
|
+
content = assembleAssetFromString(swappedFmLines, parsedSwap.content);
|
|
1120
|
+
descriptionSwapped = 1;
|
|
1121
|
+
}
|
|
1122
|
+
}
|
|
1123
|
+
}
|
|
415
1124
|
// Parse + lint the lesson before creating the proposal. The lint is the
|
|
416
1125
|
// canonical gate for required frontmatter (v1 spec §13). On failure we
|
|
417
1126
|
// surface a structured error and exit non-zero — but still emit
|
|
@@ -419,15 +1128,61 @@ export async function akmDistill(options) {
|
|
|
419
1128
|
const findings = effectiveProposalKind === "knowledge"
|
|
420
1129
|
? validateKnowledgeContent(content, inputRef)
|
|
421
1130
|
: lintLessonContent(content, `distill:${inputRef}`).findings;
|
|
1131
|
+
// Additional quality validators run only on lessons. lesson-lint checks
|
|
1132
|
+
// "field is present and non-empty"; these reject the systematic failure
|
|
1133
|
+
// modes observed across 323 archived rejected proposals:
|
|
1134
|
+
// - description is a body fragment, section heading, or placeholder
|
|
1135
|
+
// - when_to_use is the circular "When working with <ref>" fallback
|
|
1136
|
+
// - description == when_to_use (LLM duplicated a single sentence)
|
|
1137
|
+
// - body contains a second pseudo-frontmatter block
|
|
1138
|
+
if (effectiveProposalKind !== "knowledge" && findings.length === 0) {
|
|
1139
|
+
const parsedQC = parseFrontmatter(content);
|
|
1140
|
+
const fmQC = (parsedQC.data ?? {});
|
|
1141
|
+
const descCheck = isValidDescription(fmQC.description, inputRef);
|
|
1142
|
+
if (!descCheck.ok) {
|
|
1143
|
+
findings.push({
|
|
1144
|
+
kind: "invalid-description",
|
|
1145
|
+
field: "description",
|
|
1146
|
+
message: `Distilled lesson for ${inputRef} has an invalid description: ${descCheck.reason}.`,
|
|
1147
|
+
});
|
|
1148
|
+
}
|
|
1149
|
+
const wtuCheck = isValidWhenToUse(fmQC.when_to_use, inputRef);
|
|
1150
|
+
if (!wtuCheck.ok) {
|
|
1151
|
+
findings.push({
|
|
1152
|
+
kind: "invalid-when_to_use",
|
|
1153
|
+
field: "when_to_use",
|
|
1154
|
+
message: `Distilled lesson for ${inputRef} has an invalid when_to_use: ${wtuCheck.reason}.`,
|
|
1155
|
+
});
|
|
1156
|
+
}
|
|
1157
|
+
// description and when_to_use must say different things.
|
|
1158
|
+
if (descCheck.ok &&
|
|
1159
|
+
wtuCheck.ok &&
|
|
1160
|
+
typeof fmQC.description === "string" &&
|
|
1161
|
+
typeof fmQC.when_to_use === "string" &&
|
|
1162
|
+
fmQC.description.trim().toLowerCase() === fmQC.when_to_use.trim().toLowerCase()) {
|
|
1163
|
+
findings.push({
|
|
1164
|
+
kind: "description-equals-when_to_use",
|
|
1165
|
+
field: "description",
|
|
1166
|
+
message: `Distilled lesson for ${inputRef} has identical description and when_to_use.`,
|
|
1167
|
+
});
|
|
1168
|
+
}
|
|
1169
|
+
// Double-frontmatter / pseudo-frontmatter pollution in the body.
|
|
1170
|
+
const dfm = detectDoubleFrontmatter(content);
|
|
1171
|
+
if (dfm) {
|
|
1172
|
+
findings.push({ kind: dfm.kind, field: "body", message: `Distilled lesson for ${inputRef}: ${dfm.message}` });
|
|
1173
|
+
}
|
|
1174
|
+
}
|
|
422
1175
|
if (findings.length > 0) {
|
|
423
1176
|
appendEvent({
|
|
424
1177
|
eventType: "distill_invoked",
|
|
425
1178
|
ref: inputRef,
|
|
426
|
-
metadata:
|
|
1179
|
+
metadata: {
|
|
1180
|
+
outcome: "validation_failed",
|
|
1181
|
+
lessonRef: effectiveLessonRef,
|
|
427
1182
|
proposalKind: effectiveProposalKind,
|
|
428
1183
|
findingKinds: findings.map((f) => f.kind),
|
|
429
1184
|
...(exclusionSet.size > 0 ? { filteredFeedbackCount } : {}),
|
|
430
|
-
}
|
|
1185
|
+
},
|
|
431
1186
|
});
|
|
432
1187
|
const message = findings.map((f) => f.message).join("\n");
|
|
433
1188
|
throw new UsageError(`Distilled ${effectiveProposalKind} failed validation:\n${message}`, "MISSING_REQUIRED_ARGUMENT", effectiveProposalKind === "knowledge"
|
|
@@ -436,35 +1191,85 @@ export async function akmDistill(options) {
|
|
|
436
1191
|
}
|
|
437
1192
|
// LLM-as-judge quality gate (P2-B). Only active when the feature flag is
|
|
438
1193
|
// explicitly enabled. Fail-open: judge failures always pass through.
|
|
1194
|
+
// D-5 / #388: Three-band system — review_needed band queues a proposal
|
|
1195
|
+
// with review_needed outcome rather than auto-rejecting.
|
|
1196
|
+
let lessonJudgeConfidence;
|
|
439
1197
|
if (isLlmFeatureEnabled(config, "lesson_quality_gate")) {
|
|
440
|
-
|
|
1198
|
+
// D-4 / #390: retrieve top-3 similar lessons for dedup check in judge.
|
|
1199
|
+
const similarLessons = await fetchSimilarLessonsFn(content.slice(0, 500), 3);
|
|
1200
|
+
const judgeResult = await runLessonQualityJudge(config, content, assetContent ?? "", chat, similarLessons.length > 0 ? similarLessons : undefined);
|
|
441
1201
|
if (!judgeResult.pass) {
|
|
1202
|
+
if (judgeResult.reviewNeeded) {
|
|
1203
|
+
return writeQualityRejection(stash, inputRef, effectiveLessonRef, content, judgeResult.score, judgeResult.reason, {
|
|
1204
|
+
reviewNeeded: true,
|
|
1205
|
+
...(exclusionSet.size > 0 ? { filteredFeedbackCount, feedbackFullyFiltered } : {}),
|
|
1206
|
+
});
|
|
1207
|
+
}
|
|
442
1208
|
return writeQualityRejection(stash, inputRef, effectiveLessonRef, content, judgeResult.score, judgeResult.reason, exclusionSet.size > 0 ? { filteredFeedbackCount, feedbackFullyFiltered } : {});
|
|
443
1209
|
}
|
|
1210
|
+
// Normalize 1-5 judge score to [0, 1]. Score of -1 means pass-through
|
|
1211
|
+
// (no LLM / timeout / parse failure) — leave confidence undefined so
|
|
1212
|
+
// the auto-accept gate treats the proposal as unscored and skips it.
|
|
1213
|
+
if (judgeResult.score > 0)
|
|
1214
|
+
lessonJudgeConfidence = judgeResult.score / 5;
|
|
444
1215
|
}
|
|
445
1216
|
// Round-trip the parsed frontmatter so the proposal carries it as a
|
|
446
1217
|
// structured payload alongside the raw content (matches the shape used by
|
|
447
1218
|
// other proposal sources).
|
|
1219
|
+
//
|
|
1220
|
+
// D-7 / #398: Inject `sources: [inputRef]` into the LLM-path proposal
|
|
1221
|
+
// frontmatter when the field is absent, providing reviewers with provenance
|
|
1222
|
+
// without requiring them to open event history. A-MEM arXiv:2502.12110 —
|
|
1223
|
+
// all notes carry explicit provenance links.
|
|
448
1224
|
const parsed = parseFrontmatter(content);
|
|
449
|
-
const
|
|
1225
|
+
const frontmatterWithSources = { ...parsed.data };
|
|
1226
|
+
if (!Array.isArray(frontmatterWithSources.sources) || frontmatterWithSources.sources.length === 0) {
|
|
1227
|
+
frontmatterWithSources.sources = [inputRef];
|
|
1228
|
+
}
|
|
1229
|
+
const proposalResult2 = createProposal(stash, {
|
|
450
1230
|
ref: effectiveLessonRef,
|
|
451
1231
|
source: "distill",
|
|
452
1232
|
...(options.sourceRun !== undefined ? { sourceRun: options.sourceRun } : {}),
|
|
453
1233
|
payload: {
|
|
454
1234
|
content,
|
|
455
|
-
|
|
1235
|
+
frontmatter: frontmatterWithSources,
|
|
456
1236
|
},
|
|
1237
|
+
...(lessonJudgeConfidence !== undefined ? { confidence: lessonJudgeConfidence } : {}),
|
|
457
1238
|
}, options.ctx);
|
|
1239
|
+
if (isProposalSkipped(proposalResult2)) {
|
|
1240
|
+
appendEvent({
|
|
1241
|
+
eventType: "distill_invoked",
|
|
1242
|
+
ref: inputRef,
|
|
1243
|
+
metadata: {
|
|
1244
|
+
outcome: "skipped",
|
|
1245
|
+
lessonRef: effectiveLessonRef,
|
|
1246
|
+
message: proposalResult2.message,
|
|
1247
|
+
skipReason: proposalResult2.reason,
|
|
1248
|
+
},
|
|
1249
|
+
});
|
|
1250
|
+
return {
|
|
1251
|
+
schemaVersion: 1,
|
|
1252
|
+
ok: true,
|
|
1253
|
+
outcome: "skipped",
|
|
1254
|
+
inputRef,
|
|
1255
|
+
lessonRef: effectiveLessonRef,
|
|
1256
|
+
message: proposalResult2.message,
|
|
1257
|
+
};
|
|
1258
|
+
}
|
|
1259
|
+
const proposal2 = proposalResult2;
|
|
458
1260
|
appendEvent({
|
|
459
1261
|
eventType: "distill_invoked",
|
|
460
1262
|
ref: inputRef,
|
|
461
|
-
metadata:
|
|
1263
|
+
metadata: {
|
|
1264
|
+
outcome: "queued",
|
|
1265
|
+
lessonRef: effectiveLessonRef,
|
|
462
1266
|
proposalRef: effectiveLessonRef,
|
|
463
1267
|
proposalKind: effectiveProposalKind,
|
|
464
|
-
proposalId:
|
|
1268
|
+
proposalId: proposal2.id,
|
|
465
1269
|
...(options.sourceRun !== undefined ? { sourceRun: options.sourceRun } : {}),
|
|
466
1270
|
...(exclusionSet.size > 0 ? { filteredFeedbackCount } : {}),
|
|
467
|
-
|
|
1271
|
+
...(descriptionSwapped > 0 ? { descriptionSwapped } : {}),
|
|
1272
|
+
},
|
|
468
1273
|
});
|
|
469
1274
|
return {
|
|
470
1275
|
schemaVersion: 1,
|
|
@@ -474,9 +1279,10 @@ export async function akmDistill(options) {
|
|
|
474
1279
|
lessonRef: effectiveLessonRef,
|
|
475
1280
|
proposalRef: effectiveLessonRef,
|
|
476
1281
|
proposalKind: effectiveProposalKind,
|
|
477
|
-
proposalId:
|
|
478
|
-
proposal,
|
|
1282
|
+
proposalId: proposal2.id,
|
|
1283
|
+
proposal: proposal2,
|
|
479
1284
|
...(exclusionSet.size > 0 ? { filteredFeedbackCount, feedbackFullyFiltered } : {}),
|
|
1285
|
+
...(descriptionSwapped > 0 ? { descriptionSwapped } : {}),
|
|
480
1286
|
};
|
|
481
1287
|
}
|
|
482
1288
|
// ── Helpers ─────────────────────────────────────────────────────────────────
|