akm-cli 0.9.0-beta.54 → 0.9.0-beta.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/dist/cli.js +5 -3
  2. package/dist/commands/agent/contribute-cli.js +2 -3
  3. package/dist/commands/env/env-cli.js +187 -202
  4. package/dist/commands/env/secret-cli.js +109 -121
  5. package/dist/commands/feedback-cli.js +152 -155
  6. package/dist/commands/health/advisories.js +151 -0
  7. package/dist/commands/health/improve-metrics.js +754 -0
  8. package/dist/commands/health/llm-usage.js +65 -0
  9. package/dist/commands/health/md-report.js +103 -0
  10. package/dist/commands/health/metrics.js +278 -0
  11. package/dist/commands/health/task-runs.js +135 -0
  12. package/dist/commands/health/types.js +18 -0
  13. package/dist/commands/health/windows.js +196 -0
  14. package/dist/commands/health.js +14 -1624
  15. package/dist/commands/improve/anti-collapse.js +170 -0
  16. package/dist/commands/improve/collapse-detector.js +3 -2
  17. package/dist/commands/improve/consolidate.js +636 -633
  18. package/dist/commands/improve/dedup.js +1 -1
  19. package/dist/commands/improve/distill/content-repair.js +202 -0
  20. package/dist/commands/improve/distill/promote-memory.js +228 -0
  21. package/dist/commands/improve/distill/quality-gate.js +233 -0
  22. package/dist/commands/improve/distill-guards.js +127 -0
  23. package/dist/commands/improve/distill.js +49 -575
  24. package/dist/commands/improve/extract-cli.js +74 -76
  25. package/dist/commands/improve/extract.js +6 -4
  26. package/dist/commands/improve/hot-probation.js +45 -0
  27. package/dist/commands/improve/improve-auto-accept.js +3 -2
  28. package/dist/commands/improve/improve-cli.js +14 -13
  29. package/dist/commands/improve/improve-result-file.js +2 -1
  30. package/dist/commands/improve/improve.js +6 -5
  31. package/dist/commands/improve/loop-stages.js +19 -21
  32. package/dist/commands/improve/preparation.js +4 -2
  33. package/dist/commands/improve/procedural.js +10 -31
  34. package/dist/commands/improve/recombine.js +19 -43
  35. package/dist/commands/improve/reflect.js +1 -1
  36. package/dist/commands/improve/schema-similarity-gate.js +168 -0
  37. package/dist/commands/improve/shared.js +48 -0
  38. package/dist/commands/observability-cli.js +4 -4
  39. package/dist/commands/proposal/drain-policies.js +2 -2
  40. package/dist/commands/proposal/drain.js +1 -1
  41. package/dist/commands/proposal/legacy-import.js +115 -0
  42. package/dist/commands/proposal/proposal-cli.js +3 -3
  43. package/dist/commands/proposal/proposal.js +2 -1
  44. package/dist/commands/proposal/propose.js +1 -1
  45. package/dist/commands/proposal/repository.js +829 -0
  46. package/dist/commands/proposal/validators/proposals.js +5 -920
  47. package/dist/commands/read/remember-cli.js +132 -137
  48. package/dist/commands/read/search-cli.js +1 -1
  49. package/dist/commands/registry-cli.js +76 -87
  50. package/dist/commands/sources/add-cli.js +90 -94
  51. package/dist/commands/sources/history.js +1 -1
  52. package/dist/commands/sources/schema-repair.js +1 -1
  53. package/dist/commands/sources/sources-cli.js +3 -3
  54. package/dist/commands/sources/stash-cli.js +1 -1
  55. package/dist/commands/tasks/tasks-cli.js +1 -2
  56. package/dist/commands/wiki-cli.js +2 -3
  57. package/dist/core/common.js +3 -3
  58. package/dist/core/config/config-schema.js +6 -0
  59. package/dist/core/deep-merge.js +38 -0
  60. package/dist/core/events.js +2 -1
  61. package/dist/core/logs-db.js +8 -13
  62. package/dist/core/paths.js +14 -14
  63. package/dist/core/state-db.js +13 -1140
  64. package/dist/indexer/db/db.js +66 -709
  65. package/dist/indexer/db/entry-mapper.js +41 -0
  66. package/dist/indexer/db/schema.js +516 -0
  67. package/dist/indexer/feedback/utility-policy.js +85 -0
  68. package/dist/indexer/graph/graph-extraction.js +2 -1
  69. package/dist/indexer/index-writer-lock.js +9 -0
  70. package/dist/indexer/indexer.js +78 -23
  71. package/dist/indexer/search/fts-query.js +51 -0
  72. package/dist/integrations/agent/spawn.js +15 -66
  73. package/dist/output/text/helpers.js +13 -0
  74. package/dist/scripts/migrate-storage.js +6891 -7436
  75. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +44 -43
  76. package/dist/setup/legacy-config.js +106 -0
  77. package/dist/setup/prompt.js +57 -0
  78. package/dist/setup/providers.js +14 -0
  79. package/dist/setup/semantic-assets.js +124 -0
  80. package/dist/setup/setup.js +24 -1607
  81. package/dist/setup/steps/connection.js +734 -0
  82. package/dist/setup/steps/output.js +31 -0
  83. package/dist/setup/steps/platforms.js +124 -0
  84. package/dist/setup/steps/semantic.js +27 -0
  85. package/dist/setup/steps/sources.js +222 -0
  86. package/dist/setup/steps/stashdir.js +42 -0
  87. package/dist/setup/steps/tasks.js +152 -0
  88. package/dist/storage/repositories/canaries-repository.js +107 -0
  89. package/dist/storage/repositories/consolidation-repository.js +38 -0
  90. package/dist/storage/repositories/embeddings-repository.js +72 -0
  91. package/dist/storage/repositories/events-repository.js +187 -0
  92. package/dist/storage/repositories/extract-sessions-repository.js +96 -0
  93. package/dist/storage/repositories/improve-runs-repository.js +130 -0
  94. package/dist/storage/repositories/index-db.js +4 -7
  95. package/dist/storage/repositories/proposals-repository.js +220 -0
  96. package/dist/storage/repositories/recombine-repository.js +213 -0
  97. package/dist/storage/repositories/task-history-repository.js +93 -0
  98. package/dist/storage/sqlite-pragmas.js +3 -3
  99. package/dist/tasks/runner.js +2 -1
  100. package/package.json +1 -1
  101. package/dist/commands/improve/homeostatic.js +0 -497
@@ -40,9 +40,9 @@ import path from "node:path";
40
40
  import { parseAssetRef } from "../../core/asset/asset-ref.js";
41
41
  import { assembleAssetFromString, serializeFrontmatter } from "../../core/asset/asset-serialize.js";
42
42
  import { parseFrontmatter } from "../../core/asset/frontmatter.js";
43
- import { getBodyEmbeddings, upsertBodyEmbeddings } from "../../core/state-db.js";
44
43
  import { warn } from "../../core/warn.js";
45
44
  import { cosineSimilarity, embedBatch, resolveEmbeddingModelId } from "../../llm/embedder.js";
45
+ import { getBodyEmbeddings, upsertBodyEmbeddings } from "../../storage/repositories/embeddings-repository.js";
46
46
  /** Default strict cosine floor — high enough to skip distinct-but-related memories. */
47
47
  export const DEFAULT_DEDUP_COSINE_THRESHOLD = 0.97;
48
48
  /**
@@ -0,0 +1,202 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ /**
5
+ * Pure content-repair + quality-validation stages for `akm distill`.
6
+ *
7
+ * Extracted verbatim from the inline body of `akmDistill` so each normalization
8
+ * pass is an independently testable unit. Every function is a pure transform of
9
+ * `(content, inputRef) → content | findings` with no I/O — logic is
10
+ * byte-identical to the pre-extraction inline code. The lesson-path guard
11
+ * (`effectiveProposalKind !== "knowledge"`) stays in the caller; these helpers
12
+ * assume the lesson path.
13
+ */
14
+ import { assembleAssetFromString } from "../../../core/asset/asset-serialize.js";
15
+ import { parseFrontmatter } from "../../../core/asset/frontmatter.js";
16
+ import { repairTruncatedDescription } from "../../../core/text-truncation.js";
17
+ import { detectDoubleFrontmatter, isValidDescription, isValidWhenToUse, } from "../../proposal/validators/proposal-quality-validators.js";
18
+ /**
19
+ * Auto-repair missing frontmatter fields before hard-failing. Small models
20
+ * frequently produce a good lesson body but omit the YAML header entirely.
21
+ * Rather than discarding valid content, we extract description/when_to_use
22
+ * from the body and prepend the required frontmatter block.
23
+ *
24
+ * IMPORTANT: We do NOT synthesise placeholder strings here. If the body
25
+ * does not contain text that passes the post-LLM validators
26
+ * (`isValidDescription` / `isValidWhenToUse`), we leave the field missing
27
+ * and let the lesson lint reject the proposal as `validation_failed`.
28
+ * Emitting placeholders like `"Lesson distilled from <ref>"` or
29
+ * `"When working with <slug>"` is what produced the systematic broken
30
+ * proposals observed across 323 archived rejections.
31
+ */
32
+ export function autoRepairLessonFrontmatter(content, inputRef) {
33
+ const parsed = parseFrontmatter(content);
34
+ const fm = (parsed.data ?? {});
35
+ const missingDesc = typeof fm.description !== "string" || !fm.description.trim();
36
+ const missingWtu = typeof fm.when_to_use !== "string" || !fm.when_to_use.trim();
37
+ if (!missingDesc && !missingWtu)
38
+ return content;
39
+ const body = parsed.content.trim();
40
+ // Strip markdown formatting tokens from a line so extracted text is clean.
41
+ const stripMd = (l) => l
42
+ .replace(/\*\*([^*]+)\*\*/g, "$1")
43
+ .replace(/\*([^*]+)\*/g, "$1")
44
+ .replace(/`([^`]+)`/g, "$1")
45
+ .replace(/^[#*\->_]+\s*/, "")
46
+ .replace(/:\s*$/, "")
47
+ .trim();
48
+ // Skip lines that look like YAML field assignments (key: value) or frontmatter delimiters.
49
+ // These appear when the LLM leaks frontmatter content into the body, causing
50
+ // auto-repair to produce description: "description: Key Takeaways".
51
+ const isYamlLike = (l) => /^---/.test(l) || /^[a-z_]+:\s/i.test(l);
52
+ const bodyLines = body.split("\n").map(stripMd);
53
+ // Extract description: first body line that BOTH looks like prose AND
54
+ // passes isValidDescription. If nothing qualifies, leave the field
55
+ // missing — the lint pass will reject the proposal cleanly.
56
+ let descLine;
57
+ for (const l of bodyLines) {
58
+ if (isYamlLike(l))
59
+ continue;
60
+ if (l.length <= 10 || l.length >= 400)
61
+ continue;
62
+ if (isValidDescription(l, inputRef).ok) {
63
+ descLine = l;
64
+ break;
65
+ }
66
+ }
67
+ // Extract when_to_use: a line starting with "When" / "Use when" / "Apply when"
68
+ // that ALSO passes isValidWhenToUse (rejects circular fallbacks).
69
+ let wtuLine;
70
+ for (const l of bodyLines) {
71
+ if (!/^(when |use when|apply when)/i.test(l))
72
+ continue;
73
+ if (l.length >= 400)
74
+ continue;
75
+ if (isValidWhenToUse(l, inputRef).ok) {
76
+ wtuLine = l;
77
+ break;
78
+ }
79
+ }
80
+ const repairedFm = {
81
+ ...fm,
82
+ ...(missingDesc && descLine ? { description: descLine } : {}),
83
+ ...(missingWtu && wtuLine ? { when_to_use: wtuLine } : {}),
84
+ };
85
+ const fmLines = Object.entries(repairedFm)
86
+ .map(([k, v]) => `${k}: ${JSON.stringify(v)}`)
87
+ .join("\n");
88
+ // Only rewrite content if we actually have at least one field to write.
89
+ // Otherwise leave the original content for the lint pass to reject.
90
+ if (Object.keys(repairedFm).length > 0) {
91
+ return assembleAssetFromString(fmLines, body);
92
+ }
93
+ return content;
94
+ }
95
+ /**
96
+ * Description ↔ when_to_use auto-swap normalization (recover ~93% of
97
+ * qwen-9b's `^when\b/i` rejections at zero LLM cost). When the LLM emits
98
+ * a conditional-framed description ("When X happens, do Y") and the
99
+ * when_to_use field looks like a declarative description (or is empty),
100
+ * the two fields are mis-fielded — exactly what `isValidDescription`'s
101
+ * error message says ("that pattern belongs in when_to_use"). We swap
102
+ * them and revalidate; the swap is committed only if BOTH fields pass
103
+ * their respective validators afterwards. If revalidation still fails,
104
+ * we fall through returning the original content (swapped: 0).
105
+ */
106
+ export function autoSwapDescriptionWhenToUse(content, inputRef) {
107
+ const parsedSwap = parseFrontmatter(content);
108
+ const fmSwap = (parsedSwap.data ?? {});
109
+ const descRaw = typeof fmSwap.description === "string" ? fmSwap.description.trim() : "";
110
+ const wtuRaw = typeof fmSwap.when_to_use === "string" ? fmSwap.when_to_use.trim() : "";
111
+ const descStartsConditional = /^(when|if)\b/i.test(descRaw);
112
+ const wtuStartsConditional = /^(when|if)\b/i.test(wtuRaw);
113
+ if (descStartsConditional && !wtuStartsConditional && wtuRaw.length > 0) {
114
+ // Try the swap and revalidate. The when_to_use validator requires the
115
+ // value not match `/^when working with\b/i` (the circular fallback) —
116
+ // a real description rarely does, so this usually passes.
117
+ const swappedDescCheck = isValidDescription(wtuRaw, inputRef);
118
+ const swappedWtuCheck = isValidWhenToUse(descRaw, inputRef);
119
+ if (swappedDescCheck.ok && swappedWtuCheck.ok) {
120
+ const swappedFm = {
121
+ ...fmSwap,
122
+ description: wtuRaw,
123
+ when_to_use: descRaw,
124
+ };
125
+ const swappedFmLines = Object.entries(swappedFm)
126
+ .map(([k, v]) => `${k}: ${JSON.stringify(v)}`)
127
+ .join("\n");
128
+ return { content: assembleAssetFromString(swappedFmLines, parsedSwap.content), swapped: 1 };
129
+ }
130
+ }
131
+ return { content, swapped: 0 };
132
+ }
133
+ /**
134
+ * Post-generation truncation repair (#556): if the LLM sliced the
135
+ * description mid-sentence, deterministically complete it from its own text
136
+ * / the lesson body BEFORE the lint + quality validators run. No-op
137
+ * (byte-identical) for already-complete descriptions, so this never alters
138
+ * a valid proposal.
139
+ */
140
+ export function repairLessonDescriptionTruncation(content) {
141
+ const parsedRepair = parseFrontmatter(content);
142
+ const fmRepair = (parsedRepair.data ?? {});
143
+ const descRepairRaw = typeof fmRepair.description === "string" ? fmRepair.description : "";
144
+ if (!descRepairRaw)
145
+ return content;
146
+ const repaired = repairTruncatedDescription(descRepairRaw, parsedRepair.content);
147
+ if (repaired === descRepairRaw)
148
+ return content;
149
+ const repairedFmLines = Object.entries({ ...fmRepair, description: repaired })
150
+ .map(([k, v]) => `${k}: ${JSON.stringify(v)}`)
151
+ .join("\n");
152
+ return assembleAssetFromString(repairedFmLines, parsedRepair.content);
153
+ }
154
+ /**
155
+ * Additional quality validators that run only on lessons whose lesson-lint
156
+ * pass was clean. lesson-lint checks "field is present and non-empty"; these
157
+ * reject the systematic failure modes observed across 323 archived rejected
158
+ * proposals:
159
+ * - description is a body fragment, section heading, or placeholder
160
+ * - when_to_use is the circular "When working with <ref>" fallback
161
+ * - description == when_to_use (LLM duplicated a single sentence)
162
+ * - body contains a second pseudo-frontmatter block
163
+ */
164
+ export function collectLessonQualityFindings(content, inputRef) {
165
+ const findings = [];
166
+ const parsedQC = parseFrontmatter(content);
167
+ const fmQC = (parsedQC.data ?? {});
168
+ const descCheck = isValidDescription(fmQC.description, inputRef);
169
+ if (!descCheck.ok) {
170
+ findings.push({
171
+ kind: "invalid-description",
172
+ field: "description",
173
+ message: `Distilled lesson for ${inputRef} has an invalid description: ${descCheck.reason}.`,
174
+ });
175
+ }
176
+ const wtuCheck = isValidWhenToUse(fmQC.when_to_use, inputRef);
177
+ if (!wtuCheck.ok) {
178
+ findings.push({
179
+ kind: "invalid-when_to_use",
180
+ field: "when_to_use",
181
+ message: `Distilled lesson for ${inputRef} has an invalid when_to_use: ${wtuCheck.reason}.`,
182
+ });
183
+ }
184
+ // description and when_to_use must say different things.
185
+ if (descCheck.ok &&
186
+ wtuCheck.ok &&
187
+ typeof fmQC.description === "string" &&
188
+ typeof fmQC.when_to_use === "string" &&
189
+ fmQC.description.trim().toLowerCase() === fmQC.when_to_use.trim().toLowerCase()) {
190
+ findings.push({
191
+ kind: "description-equals-when_to_use",
192
+ field: "description",
193
+ message: `Distilled lesson for ${inputRef} has identical description and when_to_use.`,
194
+ });
195
+ }
196
+ // Double-frontmatter / pseudo-frontmatter pollution in the body.
197
+ const dfm = detectDoubleFrontmatter(content);
198
+ if (dfm) {
199
+ findings.push({ kind: dfm.kind, field: "body", message: `Distilled lesson for ${inputRef}: ${dfm.message}` });
200
+ }
201
+ return findings;
202
+ }
@@ -0,0 +1,228 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ /**
5
+ * Memory→knowledge promotion branch for `akm distill`.
6
+ *
7
+ * This is an entire second command that used to be inlined inside `akmDistill`:
8
+ * when a `memory:*` ref is reinforced enough (per the deterministic stability
9
+ * heuristic in `distill-promotion-policy`), distill graduates it into a
10
+ * `knowledge:*` proposal instead of a lesson. The branch owns its own LLM
11
+ * contradiction-merge (mem0 ADD/UPDATE/NOOP), quality gate, proposal creation,
12
+ * and `distill_invoked` event emit.
13
+ *
14
+ * {@link promoteMemoryToKnowledge} returns the finished {@link AkmDistillResult}
15
+ * when the branch fired, or `null` when the ref is not a promotion candidate —
16
+ * in which case the caller falls through to the ordinary lesson/knowledge LLM
17
+ * path. Logic is byte-identical to the pre-extraction inline code.
18
+ */
19
+ import fs from "node:fs";
20
+ import { parseFrontmatter } from "../../../core/asset/frontmatter.js";
21
+ import { getDefaultLlmConfig } from "../../../core/config/config.js";
22
+ import { ConfigError } from "../../../core/errors.js";
23
+ import { appendEvent } from "../../../core/events.js";
24
+ import { parseEmbeddedJsonResponse } from "../../../llm/client.js";
25
+ import { isLlmFeatureEnabled } from "../../../llm/feature-gate.js";
26
+ import { createProposal, isProposalSkipped } from "../../proposal/repository.js";
27
+ import { assessMemoryKnowledgePromotionCandidate } from "../distill-promotion-policy.js";
28
+ import { persistOutputEncodingSalience, runLessonQualityJudge, writeQualityRejection } from "./quality-gate.js";
29
+ /**
30
+ * Run the memory→knowledge promotion branch. Returns the finished distill
31
+ * result when promotion fired (all paths terminal), or `null` when the ref is
32
+ * not a promotion candidate and the caller should continue to the ordinary
33
+ * lesson/knowledge distillation path.
34
+ */
35
+ export async function promoteMemoryToKnowledge(ctx) {
36
+ const { targetKind, inputRef, assetContent, config, chat, stash, lookup, fetchSimilarLessonsFn, existingRefVocabulary, outcomeWeightEnabled, eligMeta, exclusionSetSize, filteredFeedbackCount, feedbackFullyFiltered, } = ctx;
37
+ const promotion = targetKind === "lesson"
38
+ ? null
39
+ : assessMemoryKnowledgePromotionCandidate({
40
+ inputRef,
41
+ assetContent,
42
+ feedbackEvents: ctx.filteredEvents.map((event) => ({
43
+ ...(event.metadata !== undefined ? { metadata: event.metadata } : {}),
44
+ })),
45
+ });
46
+ if (!(promotion?.promote && promotion.content && (targetKind === "knowledge" || targetKind === "auto"))) {
47
+ return null;
48
+ }
49
+ // D-1 / #369: When the destination knowledge file already exists, route
50
+ // through the LLM for contradiction resolution instead of silently
51
+ // overwriting. Follows mem0 ADD/UPDATE/DELETE/NOOP pattern (arXiv:2504.19413 §3.2)
52
+ // and A-MEM dynamic linking (arXiv:2502.12110).
53
+ let resolvedPromotionContent = promotion.content;
54
+ const existingKnowledgePath = await lookup(promotion.knowledgeRef);
55
+ const existingKnowledgeContent = existingKnowledgePath && fs.existsSync(existingKnowledgePath)
56
+ ? (() => {
57
+ try {
58
+ return fs.readFileSync(existingKnowledgePath, "utf8");
59
+ }
60
+ catch {
61
+ return null;
62
+ }
63
+ })()
64
+ : null;
65
+ if (existingKnowledgeContent && config && getDefaultLlmConfig(config)) {
66
+ // Existing content found: call LLM for contradiction-resolution merge.
67
+ const mergePrompt = [
68
+ "You are merging two versions of a knowledge document.",
69
+ "Existing content is already committed; new content comes from a memory distillation run.",
70
+ "Choose one of: ADD (combine both), UPDATE (replace existing with new), NOOP (keep existing unchanged).",
71
+ 'Return ONLY valid JSON: {"action": "ADD"|"UPDATE"|"NOOP", "content": "<merged markdown if ADD/UPDATE, empty string if NOOP>"}',
72
+ "",
73
+ "## Existing knowledge content",
74
+ "```",
75
+ existingKnowledgeContent.slice(0, 3000),
76
+ "```",
77
+ "",
78
+ "## New content from distillation",
79
+ "```",
80
+ promotion.content.slice(0, 3000),
81
+ "```",
82
+ ].join("\n");
83
+ try {
84
+ const mergeLlm = getDefaultLlmConfig(config);
85
+ if (!mergeLlm) {
86
+ throw new ConfigError("LLM is not configured for distillation merge.", "LLM_NOT_CONFIGURED");
87
+ }
88
+ const mergeResponse = await chat(mergeLlm, [
89
+ { role: "system", content: "Return only valid JSON. No prose." },
90
+ { role: "user", content: mergePrompt },
91
+ ]);
92
+ const mergeResult = parseEmbeddedJsonResponse(mergeResponse);
93
+ if (mergeResult?.action === "NOOP") {
94
+ // Existing content is authoritative — no update needed.
95
+ appendEvent({
96
+ eventType: "distill_invoked",
97
+ ref: inputRef,
98
+ metadata: {
99
+ outcome: "skipped",
100
+ lessonRef: promotion.knowledgeRef,
101
+ message: "D-1: LLM resolved destination conflict as NOOP — existing content kept",
102
+ ...eligMeta,
103
+ },
104
+ });
105
+ return {
106
+ schemaVersion: 1,
107
+ ok: true,
108
+ outcome: "skipped",
109
+ inputRef,
110
+ lessonRef: promotion.knowledgeRef,
111
+ message: "Existing knowledge content unchanged (contradiction resolution: NOOP)",
112
+ };
113
+ }
114
+ if (mergeResult?.action && (mergeResult.action === "ADD" || mergeResult.action === "UPDATE")) {
115
+ if (mergeResult.content?.trim()) {
116
+ resolvedPromotionContent = mergeResult.content;
117
+ }
118
+ }
119
+ }
120
+ catch {
121
+ // LLM merge failed — fall through with the original promotion content.
122
+ // The reviewer will see both versions in the proposal diff.
123
+ }
124
+ }
125
+ else if (existingKnowledgeContent && config && !getDefaultLlmConfig(config)) {
126
+ // No LLM configured: include existing content as context in the proposal
127
+ // so the reviewer can do the contradiction resolution manually.
128
+ resolvedPromotionContent = [
129
+ promotion.content,
130
+ "",
131
+ "---",
132
+ "<!-- D-1 / #369: Existing knowledge content is shown below for reviewer reference. -->",
133
+ "<!-- Review: decide whether to ADD (merge), UPDATE (replace), or NOOP (keep existing). -->",
134
+ "",
135
+ "## Existing content (for reviewer reference)",
136
+ "",
137
+ existingKnowledgeContent,
138
+ ].join("\n");
139
+ }
140
+ // Apply quality gate to fast-path knowledge promotion (Risk 4 fix).
141
+ // D-5 / #388: Three-band system — review_needed band queues to proposal
142
+ // queue with review_needed outcome rather than auto-rejecting.
143
+ let knowledgeJudgeConfidence;
144
+ if (isLlmFeatureEnabled(config, "lesson_quality_gate")) {
145
+ // D-4 / #390: retrieve top-3 similar lessons for dedup check in judge.
146
+ const similarLessons = await fetchSimilarLessonsFn(resolvedPromotionContent.slice(0, 500), 3);
147
+ const judgeResult = await runLessonQualityJudge(config, resolvedPromotionContent, assetContent ?? "", chat, similarLessons.length > 0 ? similarLessons : undefined);
148
+ if (!judgeResult.pass) {
149
+ if (judgeResult.reviewNeeded) {
150
+ // Uncertainty band (2.5–3.5): queue as review_needed instead of rejecting.
151
+ return writeQualityRejection(stash, inputRef, promotion.knowledgeRef, resolvedPromotionContent, judgeResult.score, judgeResult.reason, { reviewNeeded: true }, ctx.eligibilitySource);
152
+ }
153
+ return writeQualityRejection(stash, inputRef, promotion.knowledgeRef, resolvedPromotionContent, judgeResult.score, judgeResult.reason, {}, ctx.eligibilitySource);
154
+ }
155
+ // Normalize 1-5 judge score to [0, 1]. Score of -1 means pass-through
156
+ // (no LLM / timeout / parse failure) — leave confidence undefined so
157
+ // the auto-accept gate treats the proposal as unscored and skips it.
158
+ if (judgeResult.score > 0)
159
+ knowledgeJudgeConfidence = judgeResult.score / 5;
160
+ }
161
+ const knowledgeParsed = parseFrontmatter(resolvedPromotionContent);
162
+ const proposalResult = createProposal(stash, {
163
+ ref: promotion.knowledgeRef,
164
+ source: "distill",
165
+ ...(ctx.sourceRun !== undefined ? { sourceRun: ctx.sourceRun } : {}),
166
+ payload: {
167
+ content: resolvedPromotionContent,
168
+ ...(Object.keys(knowledgeParsed.data).length > 0 ? { frontmatter: knowledgeParsed.data } : {}),
169
+ },
170
+ ...(knowledgeJudgeConfidence !== undefined ? { confidence: knowledgeJudgeConfidence } : {}),
171
+ // Attribution tagging: persist the eligibility lane on the proposal.
172
+ ...(ctx.eligibilitySource ? { eligibilitySource: ctx.eligibilitySource } : {}),
173
+ }, ctx.proposalsCtx);
174
+ if (isProposalSkipped(proposalResult)) {
175
+ appendEvent({
176
+ eventType: "distill_invoked",
177
+ ref: inputRef,
178
+ metadata: {
179
+ outcome: "skipped",
180
+ lessonRef: promotion.knowledgeRef,
181
+ message: proposalResult.message,
182
+ skipReason: proposalResult.reason,
183
+ ...eligMeta,
184
+ },
185
+ });
186
+ return {
187
+ schemaVersion: 1,
188
+ ok: true,
189
+ outcome: "skipped",
190
+ inputRef,
191
+ lessonRef: promotion.knowledgeRef,
192
+ message: proposalResult.message,
193
+ };
194
+ }
195
+ const proposal = proposalResult;
196
+ // G4: content-score the distilled OUTPUT so it carries a real encoding
197
+ // salience (encoding_source='content') from creation.
198
+ persistOutputEncodingSalience(promotion.knowledgeRef, resolvedPromotionContent, existingRefVocabulary, outcomeWeightEnabled);
199
+ appendEvent({
200
+ eventType: "distill_invoked",
201
+ ref: inputRef,
202
+ metadata: {
203
+ outcome: "queued",
204
+ lessonRef: promotion.knowledgeRef,
205
+ proposalRef: promotion.knowledgeRef,
206
+ proposalKind: "knowledge",
207
+ proposalId: proposal.id,
208
+ // R3: judge verdicts are longitudinally queryable, not just a one-shot
209
+ // proposal.confidence write (normalized 1–5 score / 5).
210
+ ...(knowledgeJudgeConfidence !== undefined ? { judgeConfidence: knowledgeJudgeConfidence } : {}),
211
+ ...(ctx.sourceRun !== undefined ? { sourceRun: ctx.sourceRun } : {}),
212
+ ...(exclusionSetSize > 0 ? { filteredFeedbackCount } : {}),
213
+ ...eligMeta,
214
+ },
215
+ });
216
+ return {
217
+ schemaVersion: 1,
218
+ ok: true,
219
+ outcome: "queued",
220
+ inputRef,
221
+ lessonRef: promotion.knowledgeRef,
222
+ proposalRef: promotion.knowledgeRef,
223
+ proposalKind: "knowledge",
224
+ proposalId: proposal.id,
225
+ proposal,
226
+ ...(exclusionSetSize > 0 ? { filteredFeedbackCount, feedbackFullyFiltered } : {}),
227
+ };
228
+ }
@@ -0,0 +1,233 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ /**
5
+ * Distill quality-gate cluster — LLM-as-judge, quality-rejection envelope
6
+ * writer, and output-salience persistence. Extracted verbatim from
7
+ * `distill.ts` so the main `akmDistill` orchestrator and the memory→knowledge
8
+ * promotion branch (`promote-memory.ts`) can share the same helpers without a
9
+ * circular import. Logic is byte-identical to the pre-extraction inline code.
10
+ */
11
+ import fs from "node:fs";
12
+ import path from "node:path";
13
+ import { parseAssetRef } from "../../../core/asset/asset-ref.js";
14
+ import { timestampForFilename } from "../../../core/common.js";
15
+ import { getDefaultLlmConfig } from "../../../core/config/config.js";
16
+ import { appendEvent } from "../../../core/events.js";
17
+ import { withStateDb } from "../../../core/state-db.js";
18
+ import { parseEmbeddedJsonResponse } from "../../../llm/client.js";
19
+ import { akmSearch } from "../../read/search.js";
20
+ import { scoreEncodingSalience } from "../encoding-salience.js";
21
+ import { computeSalience, upsertAssetSalience } from "../salience.js";
22
+ // ── D-4 / #390: Top-3 similar lessons retrieval ──────────────────────────────
23
+ /**
24
+ * Default implementation: use akmSearch to find top-N similar lesson assets.
25
+ * Returns empty array when search fails or returns no results.
26
+ * Requires embedding configured for semantic similarity; degrades gracefully.
27
+ */
28
+ export async function fetchTopSimilarLessons(query, n, _stashDir) {
29
+ try {
30
+ const result = await akmSearch({
31
+ query,
32
+ type: "lesson",
33
+ limit: n,
34
+ skipLogging: true,
35
+ eventSource: "improve",
36
+ });
37
+ const hits = result?.hits ?? [];
38
+ return hits
39
+ .filter((h) => "path" in h && typeof h.path === "string")
40
+ .slice(0, n)
41
+ .map((h) => {
42
+ let content = "";
43
+ try {
44
+ if (h.path && fs.existsSync(h.path)) {
45
+ content = fs.readFileSync(h.path, "utf8");
46
+ }
47
+ }
48
+ catch {
49
+ /* best-effort */
50
+ }
51
+ return { ref: h.ref, content };
52
+ });
53
+ }
54
+ catch {
55
+ return [];
56
+ }
57
+ }
58
+ // ── LLM-as-judge quality gate (P2-B) ────────────────────────────────────────
59
+ /**
60
+ * D-4 / #390: Build the LLM-as-judge prompt.
61
+ *
62
+ * When similarLessons are provided (top-3 by embedding similarity), they are
63
+ * included in the context so the judge can lower the score for near-duplicates.
64
+ * Voyager arXiv:2305.16291 — skill library admission requires similarity check
65
+ * against the existing library. A-MEM arXiv:2502.12110 — new notes are checked
66
+ * against existing notes before linking.
67
+ */
68
+ export function buildJudgePrompt(lessonContent, sourceContent, similarLessons) {
69
+ const lines = [
70
+ "You are evaluating a proposed lesson asset for an akm knowledge base.",
71
+ "",
72
+ "Score this lesson on each criterion from 1 (poor) to 5 (excellent):",
73
+ "1. NOVELTY: Does the lesson add information not already present in the source asset?",
74
+ "2. ACTIONABILITY: Can an agent follow this lesson without additional context?",
75
+ "3. NON-REDUNDANCY: Is this lesson meaningfully different from what the source already says?",
76
+ "",
77
+ "Source asset content:",
78
+ "```",
79
+ sourceContent.slice(0, 2000),
80
+ "```",
81
+ ];
82
+ if (similarLessons && similarLessons.length > 0) {
83
+ lines.push("");
84
+ lines.push("Existing similar lessons (top-3 by similarity). Rate lower if the proposed lesson is substantially similar to any of these:");
85
+ for (const sl of similarLessons) {
86
+ lines.push(`\nExisting lesson ref: ${sl.ref}`);
87
+ lines.push("```");
88
+ lines.push(sl.content.slice(0, 500));
89
+ lines.push("```");
90
+ }
91
+ }
92
+ lines.push("");
93
+ lines.push("Proposed lesson content:");
94
+ lines.push("```");
95
+ lines.push(lessonContent.slice(0, 1000));
96
+ lines.push("```");
97
+ lines.push("");
98
+ lines.push('Return ONLY valid JSON, no prose: {"score": <average score 1-5 as float>, "reason": "<one sentence>"}');
99
+ return lines.join("\n");
100
+ }
101
+ /**
102
+ * Run the LLM-as-judge quality gate on a proposal's content.
103
+ *
104
+ * Exported so reflect.ts can apply the same gate to reflect proposals (R-5 / #374).
105
+ * Gated by the flag name `lesson_quality_gate` (or its alias
106
+ * `proposal_quality_gate`) via {@link isLlmFeatureEnabled} — which reads
107
+ * `profiles.improve.default.processes.distill.qualityGate.enabled` (and the
108
+ * corresponding `.reflect.qualityGate.enabled` for proposals).
109
+ *
110
+ * Fail-open: returns `pass: true` on timeout, parse failure, or missing LLM.
111
+ */
112
+ export async function runLessonQualityJudge(config, lessonContent, sourceContent, chat,
113
+ /** D-4 / #390: top-3 similar existing lessons for dedup check. */
114
+ similarLessons) {
115
+ const llmConfig = getDefaultLlmConfig(config);
116
+ if (!llmConfig) {
117
+ return { pass: true, score: -1, reason: "no LLM configured — passing through" };
118
+ }
119
+ const judgeLlmConfig = llmConfig.judgeModel ? { ...llmConfig, model: llmConfig.judgeModel } : llmConfig;
120
+ const JUDGE_TIMEOUT_MS = 8_000;
121
+ try {
122
+ const raw = await Promise.race([
123
+ chat(judgeLlmConfig, [
124
+ { role: "system", content: "Return only valid JSON. No prose." },
125
+ { role: "user", content: buildJudgePrompt(lessonContent, sourceContent, similarLessons) },
126
+ ]),
127
+ new Promise((_, reject) => setTimeout(() => reject(new Error("judge timeout")), JUDGE_TIMEOUT_MS)),
128
+ ]);
129
+ const parsed = parseEmbeddedJsonResponse(raw);
130
+ if (!parsed || typeof parsed.score !== "number") {
131
+ return { pass: true, score: -1, reason: "judge parse failed — passing through" };
132
+ }
133
+ // D-5 / #388: Three-band system (MT-Bench arXiv:2306.05685 — ~±0.5 judge variance).
134
+ // >= 3.5: auto-queue as pending (pass: true)
135
+ // 2.5–3.5: review-needed band — uncertain, escalate to human (reviewNeeded: true)
136
+ // < 2.5: auto-reject (pass: false)
137
+ const score = parsed.score;
138
+ const reason = parsed.reason ?? "";
139
+ if (score >= 3.5) {
140
+ return { pass: true, score, reason };
141
+ }
142
+ if (score >= 2.5) {
143
+ // Uncertainty band: treat as failed for auto-queuing but flag for review.
144
+ return { pass: false, score, reason, reviewNeeded: true };
145
+ }
146
+ return { pass: false, score, reason };
147
+ }
148
+ catch {
149
+ return { pass: true, score: -1, reason: "judge failed — passing through" };
150
+ }
151
+ }
152
+ // ── Quality-rejection helper ─────────────────────────────────────────────────
153
+ /**
154
+ * Write a rejected lesson to `.akm/distill-rejected/`, append a `distill_invoked`
155
+ * quality-rejected event, and return the `quality_rejected` envelope.
156
+ *
157
+ * @param stash - Root stash directory.
158
+ * @param inputRef - The original input ref (for the event).
159
+ * @param lessonRef - The proposed lesson/knowledge ref.
160
+ * @param content - The raw content that failed the quality gate.
161
+ * @param score - Quality score from the judge.
162
+ * @param reason - Human-readable rejection reason.
163
+ * @param extraMeta - Optional additional metadata for the event.
164
+ */
165
+ export function writeQualityRejection(stash, inputRef, lessonRef, content, score, reason, extraMeta = {}, eligibilitySource) {
166
+ // D-5 / #388: reviewNeeded flag selects "review_needed" vs "quality_rejected" outcome.
167
+ const outcome = extraMeta.reviewNeeded ? "review_needed" : "quality_rejected";
168
+ const rejectDir = path.join(stash, ".akm", "distill-rejected");
169
+ fs.mkdirSync(rejectDir, { recursive: true });
170
+ const ts = timestampForFilename();
171
+ fs.writeFileSync(path.join(rejectDir, `${ts}-${lessonRef}.md`), `---\nscore: ${score}\nreason: ${reason}\noutcome: ${outcome}\n---\n\n${content}`, "utf8");
172
+ appendEvent({
173
+ eventType: "distill_invoked",
174
+ ref: inputRef,
175
+ metadata: {
176
+ outcome,
177
+ lessonRef,
178
+ score,
179
+ reason,
180
+ ...extraMeta,
181
+ // Attribution tagging: stamp the eligibility lane so distill_invoked can be
182
+ // sliced by lane downstream. See EligibilitySource.
183
+ ...(eligibilitySource ? { eligibilitySource } : {}),
184
+ },
185
+ });
186
+ return {
187
+ schemaVersion: 1,
188
+ ok: true,
189
+ outcome,
190
+ inputRef,
191
+ lessonRef,
192
+ score,
193
+ reason,
194
+ ...extraMeta,
195
+ };
196
+ }
197
+ /**
198
+ * G4 — content-score a distilled OUTPUT (lesson/knowledge proposal body) and
199
+ * persist it to state.db :: asset_salience with `encoding_source: "content"`.
200
+ *
201
+ * Lessons are refused as distill INPUTS (`DISTILL_REFUSED_INPUT_TYPES`), so
202
+ * this creation-time write is their only chance to earn a real content-derived
203
+ * encoding score instead of sitting on the type-weight stub forever. Best-effort:
204
+ * never blocks or fails the proposal flow.
205
+ */
206
+ export function persistOutputEncodingSalience(ref, body, existingRefVocabulary,
207
+ // Operator opt-out (improve.salience.outcomeWeightEnabled: false) must apply
208
+ // here too, or distill-written rank_score rows would use WS-2 weights while
209
+ // preparation uses parity weights — inconsistent salience semantics.
210
+ outcomeWeightEnabled) {
211
+ try {
212
+ const parsedRef = parseAssetRef(ref);
213
+ const salienceResult = scoreEncodingSalience({
214
+ body,
215
+ type: parsedRef.type,
216
+ existingRefVocabulary,
217
+ revisionCount: 0, // a freshly distilled output IS a first encounter
218
+ });
219
+ withStateDb((stateDb) => {
220
+ const vector = computeSalience({
221
+ ref,
222
+ type: parsedRef.type,
223
+ retrievalFreq: 0,
224
+ encodingSalience: salienceResult.score,
225
+ outcomeWeightEnabled,
226
+ });
227
+ upsertAssetSalience(stateDb, ref, vector);
228
+ });
229
+ }
230
+ catch {
231
+ // Best-effort — scoring must never block proposal creation.
232
+ }
233
+ }