akm-cli 0.8.0-rc2 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/{.github/CHANGELOG.md → CHANGELOG.md} +191 -3
- package/README.md +22 -6
- package/SECURITY.md +93 -0
- package/dist/cli/config-migrate.js +144 -0
- package/dist/cli/config-validate.js +39 -0
- package/dist/cli/confirm.js +73 -0
- package/dist/cli/parse-args.js +93 -3
- package/dist/cli/shared.js +129 -0
- package/dist/cli.js +2141 -1268
- package/dist/commands/add-cli.js +279 -0
- package/dist/commands/agent-dispatch.js +20 -12
- package/dist/commands/agent-support.js +11 -5
- package/dist/commands/completions.js +3 -0
- package/dist/commands/config-cli.js +129 -517
- package/dist/commands/consolidate.js +1533 -144
- package/dist/commands/curate.js +44 -3
- package/dist/commands/db-cli.js +23 -0
- package/dist/commands/distill-promotion-policy.js +5 -3
- package/dist/commands/distill.js +906 -100
- package/dist/commands/env.js +213 -0
- package/dist/commands/eval-cases.js +3 -0
- package/dist/commands/events.js +3 -0
- package/dist/commands/extract-cli.js +127 -0
- package/dist/commands/extract-prompt.js +204 -0
- package/dist/commands/extract.js +477 -0
- package/dist/commands/feedback-cli.js +331 -0
- package/dist/commands/graph.js +260 -5
- package/dist/commands/health.js +977 -51
- package/dist/commands/help/help-accept.md +6 -3
- package/dist/commands/help/help-improve.md +36 -8
- package/dist/commands/help/help-proposals.md +7 -4
- package/dist/commands/help/help-reject.md +5 -2
- package/dist/commands/history.js +51 -16
- package/dist/commands/improve-auto-accept.js +97 -0
- package/dist/commands/improve-cli.js +236 -0
- package/dist/commands/improve-profiles.js +184 -0
- package/dist/commands/improve-result-file.js +167 -0
- package/dist/commands/improve.js +1725 -332
- package/dist/commands/info.js +3 -0
- package/dist/commands/init.js +49 -1
- package/dist/commands/installed-stashes.js +6 -23
- package/dist/commands/knowledge.js +3 -0
- package/dist/commands/lint/agent-linter.js +3 -0
- package/dist/commands/lint/base-linter.js +199 -5
- package/dist/commands/lint/command-linter.js +3 -0
- package/dist/commands/lint/default-linter.js +3 -0
- package/dist/commands/lint/env-key-rules.js +154 -0
- package/dist/commands/lint/index.js +92 -3
- package/dist/commands/lint/knowledge-linter.js +3 -0
- package/dist/commands/lint/markdown-insertion.js +343 -0
- package/dist/commands/lint/memory-linter.js +3 -0
- package/dist/commands/lint/registry.js +3 -0
- package/dist/commands/lint/skill-linter.js +3 -0
- package/dist/commands/lint/task-linter.js +15 -12
- package/dist/commands/lint/types.js +3 -0
- package/dist/commands/lint/workflow-linter.js +3 -0
- package/dist/commands/lint.js +3 -0
- package/dist/commands/migration-help.js +5 -2
- package/dist/commands/proposal-drain-policies.js +128 -0
- package/dist/commands/proposal-drain.js +477 -0
- package/dist/commands/proposal.js +60 -6
- package/dist/commands/propose.js +24 -19
- package/dist/commands/reflect.js +1004 -94
- package/dist/commands/registry-cli.js +150 -0
- package/dist/commands/registry-search.js +3 -0
- package/dist/commands/remember-cli.js +257 -0
- package/dist/commands/remember.js +15 -6
- package/dist/commands/schema-repair.js +88 -15
- package/dist/commands/search.js +99 -14
- package/dist/commands/secret.js +173 -0
- package/dist/commands/self-update.js +3 -0
- package/dist/commands/show.js +32 -13
- package/dist/commands/source-add.js +7 -35
- package/dist/commands/source-clone.js +3 -0
- package/dist/commands/source-manage.js +3 -0
- package/dist/commands/tasks.js +161 -95
- package/dist/commands/url-checker.js +3 -0
- package/dist/core/action-contributors.js +3 -0
- package/dist/core/asset-ref.js +13 -2
- package/dist/core/asset-registry.js +9 -2
- package/dist/core/asset-serialize.js +88 -0
- package/dist/core/asset-spec.js +61 -5
- package/dist/core/common.js +93 -5
- package/dist/core/concurrent.js +3 -0
- package/dist/core/config-io.js +347 -0
- package/dist/core/config-migration.js +622 -0
- package/dist/core/config-schema.js +558 -0
- package/dist/core/config-sources.js +108 -0
- package/dist/core/config-types.js +4 -0
- package/dist/core/config-walker.js +337 -0
- package/dist/core/config.js +366 -1077
- package/dist/core/errors.js +42 -20
- package/dist/core/events.js +31 -25
- package/dist/core/file-lock.js +104 -0
- package/dist/core/frontmatter.js +75 -10
- package/dist/core/lesson-lint.js +3 -0
- package/dist/core/markdown.js +3 -0
- package/dist/core/memory-belief.js +62 -0
- package/dist/core/memory-contradiction-detect.js +274 -0
- package/dist/core/memory-improve.js +142 -14
- package/dist/core/parse.js +3 -0
- package/dist/core/paths.js +218 -50
- package/dist/core/proposal-quality-validators.js +380 -0
- package/dist/core/proposal-validators.js +11 -3
- package/dist/core/proposals.js +464 -5
- package/dist/core/state-db.js +349 -56
- package/dist/core/text-truncation.js +107 -0
- package/dist/core/time.js +3 -0
- package/dist/core/tty.js +59 -0
- package/dist/core/warn.js +7 -2
- package/dist/core/write-source.js +12 -0
- package/dist/indexer/db-backup.js +391 -0
- package/dist/indexer/db-search.js +136 -28
- package/dist/indexer/db.js +661 -166
- package/dist/indexer/ensure-index.js +3 -0
- package/dist/indexer/file-context.js +3 -0
- package/dist/indexer/graph-boost.js +162 -40
- package/dist/indexer/graph-db.js +241 -51
- package/dist/indexer/graph-dedup.js +3 -7
- package/dist/indexer/graph-extraction.js +242 -149
- package/dist/indexer/index-context.js +3 -9
- package/dist/indexer/indexer.js +84 -14
- package/dist/indexer/llm-cache.js +24 -19
- package/dist/indexer/manifest.js +3 -0
- package/dist/indexer/matchers.js +184 -11
- package/dist/indexer/memory-inference.js +94 -50
- package/dist/indexer/metadata-contributors.js +3 -0
- package/dist/indexer/metadata.js +110 -50
- package/dist/indexer/path-resolver.js +3 -0
- package/dist/indexer/project-context.js +192 -0
- package/dist/indexer/ranking-contributors.js +134 -7
- package/dist/indexer/ranking.js +8 -1
- package/dist/indexer/search-fields.js +5 -9
- package/dist/indexer/search-hit-enrichers.js +91 -2
- package/dist/indexer/search-source.js +20 -1
- package/dist/indexer/semantic-status.js +4 -1
- package/dist/indexer/staleness-detect.js +447 -0
- package/dist/indexer/usage-events.js +12 -9
- package/dist/indexer/walker.js +3 -0
- package/dist/integrations/agent/builders.js +135 -0
- package/dist/integrations/agent/config.js +121 -401
- package/dist/integrations/agent/detect.js +3 -0
- package/dist/integrations/agent/index.js +6 -14
- package/dist/integrations/agent/model-aliases.js +55 -0
- package/dist/integrations/agent/profiles.js +3 -0
- package/dist/integrations/agent/prompts.js +137 -8
- package/dist/integrations/agent/runner.js +208 -0
- package/dist/integrations/agent/sdk-runner.js +8 -2
- package/dist/integrations/agent/spawn.js +54 -14
- package/dist/integrations/github.js +3 -0
- package/dist/integrations/lockfile.js +22 -51
- package/dist/integrations/session-logs/index.js +4 -0
- package/dist/integrations/session-logs/inline-refs.js +35 -0
- package/dist/integrations/session-logs/pre-filter.js +152 -0
- package/dist/integrations/session-logs/providers/claude-code.js +226 -0
- package/dist/integrations/session-logs/providers/opencode.js +231 -25
- package/dist/integrations/session-logs/types.js +3 -0
- package/dist/llm/call-ai.js +14 -26
- package/dist/llm/client.js +16 -2
- package/dist/llm/embedder.js +20 -29
- package/dist/llm/embedders/cache.js +3 -7
- package/dist/llm/embedders/local.js +42 -1
- package/dist/llm/embedders/remote.js +20 -8
- package/dist/llm/embedders/types.js +3 -7
- package/dist/llm/feature-gate.js +92 -56
- package/dist/llm/graph-extract.js +401 -30
- package/dist/llm/index-passes.js +44 -29
- package/dist/llm/memory-infer.js +30 -2
- package/dist/llm/metadata-enhance.js +3 -7
- package/dist/llm/prompts/extract-session.md +80 -0
- package/dist/llm/prompts/graph-extract-user-prompt.md +24 -1
- package/dist/output/cli-hints-full.md +60 -32
- package/dist/output/cli-hints-short.md +10 -7
- package/dist/output/cli-hints.js +5 -2
- package/dist/output/context.js +60 -8
- package/dist/output/renderers.js +170 -194
- package/dist/output/shapes/curate.js +56 -0
- package/dist/output/shapes/distill.js +10 -0
- package/dist/output/shapes/env-list.js +19 -0
- package/dist/output/shapes/events.js +11 -0
- package/dist/output/shapes/helpers.js +424 -0
- package/dist/output/shapes/history.js +7 -0
- package/dist/output/shapes/passthrough.js +105 -0
- package/dist/output/shapes/proposal-accept.js +7 -0
- package/dist/output/shapes/proposal-diff.js +7 -0
- package/dist/output/shapes/proposal-list.js +7 -0
- package/dist/output/shapes/proposal-producer.js +11 -0
- package/dist/output/shapes/proposal-reject.js +7 -0
- package/dist/output/shapes/proposal-show.js +7 -0
- package/dist/output/shapes/registry-search.js +6 -0
- package/dist/output/shapes/registry.js +30 -0
- package/dist/output/shapes/search.js +6 -0
- package/dist/output/shapes/secret-list.js +19 -0
- package/dist/output/shapes/show.js +6 -0
- package/dist/output/shapes/vault-list.js +19 -0
- package/dist/output/shapes.js +51 -549
- package/dist/output/text/add.js +6 -0
- package/dist/output/text/clone.js +6 -0
- package/dist/output/text/config.js +6 -0
- package/dist/output/text/curate.js +6 -0
- package/dist/output/text/distill.js +7 -0
- package/dist/output/text/enable-disable.js +7 -0
- package/dist/output/text/events.js +10 -0
- package/dist/output/text/feedback.js +6 -0
- package/dist/output/text/helpers.js +1059 -0
- package/dist/output/text/history.js +7 -0
- package/dist/output/text/import.js +6 -0
- package/dist/output/text/index.js +6 -0
- package/dist/output/text/info.js +6 -0
- package/dist/output/text/init.js +6 -0
- package/dist/output/text/list.js +6 -0
- package/dist/output/text/proposal-producer.js +8 -0
- package/dist/output/text/proposal.js +12 -0
- package/dist/output/text/registry-commands.js +11 -0
- package/dist/output/text/registry.js +30 -0
- package/dist/output/text/remember.js +6 -0
- package/dist/output/text/remove.js +6 -0
- package/dist/output/text/save.js +6 -0
- package/dist/output/text/search.js +6 -0
- package/dist/output/text/show.js +6 -0
- package/dist/output/text/update.js +6 -0
- package/dist/output/text/upgrade.js +6 -0
- package/dist/output/text/vault.js +16 -0
- package/dist/output/text/wiki.js +15 -0
- package/dist/output/text/workflow.js +14 -0
- package/dist/output/text.js +44 -1329
- package/dist/registry/build-index.js +3 -0
- package/dist/registry/create-provider-registry.js +3 -0
- package/dist/registry/factory.js +4 -1
- package/dist/registry/origin-resolve.js +3 -0
- package/dist/registry/providers/index.js +3 -0
- package/dist/registry/providers/skills-sh.js +11 -2
- package/dist/registry/providers/static-index.js +10 -1
- package/dist/registry/providers/types.js +3 -24
- package/dist/registry/resolve.js +11 -16
- package/dist/registry/types.js +3 -0
- package/dist/scripts/migrate-storage.js +17767 -0
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +9031 -0
- package/dist/scripts/migrations/v16-to-v17.js +141 -0
- package/dist/setup/detect.js +3 -0
- package/dist/setup/ripgrep-install.js +3 -0
- package/dist/setup/ripgrep-resolve.js +3 -0
- package/dist/setup/setup.js +306 -67
- package/dist/setup/steps.js +3 -15
- package/dist/sources/include.js +3 -0
- package/dist/sources/provider-factory.js +3 -11
- package/dist/sources/provider.js +3 -20
- package/dist/sources/providers/filesystem.js +19 -23
- package/dist/sources/providers/git.js +171 -21
- package/dist/sources/providers/index.js +3 -0
- package/dist/sources/providers/install-types.js +3 -13
- package/dist/sources/providers/npm.js +3 -4
- package/dist/sources/providers/provider-utils.js +3 -0
- package/dist/sources/providers/sync-from-ref.js +3 -11
- package/dist/sources/providers/tar-utils.js +3 -0
- package/dist/sources/providers/website.js +18 -22
- package/dist/sources/resolve.js +3 -0
- package/dist/sources/types.js +3 -0
- package/dist/sources/website-ingest.js +3 -0
- package/dist/tasks/backends/cron.js +3 -0
- package/dist/tasks/backends/exec-utils.js +3 -0
- package/dist/tasks/backends/index.js +3 -11
- package/dist/tasks/backends/launchd.js +3 -0
- package/dist/tasks/backends/schtasks.js +3 -0
- package/dist/tasks/parser.js +51 -38
- package/dist/tasks/resolveAkmBin.js +3 -0
- package/dist/tasks/runner.js +35 -9
- package/dist/tasks/schedule.js +20 -1
- package/dist/tasks/schema.js +5 -3
- package/dist/tasks/validator.js +6 -3
- package/dist/version.js +3 -0
- package/dist/wiki/wiki-templates.js +3 -0
- package/dist/wiki/wiki.js +3 -0
- package/dist/workflows/authoring.js +3 -0
- package/dist/workflows/cli.js +3 -0
- package/dist/workflows/db.js +140 -10
- package/dist/workflows/document-cache.js +3 -10
- package/dist/workflows/parser.js +3 -0
- package/dist/workflows/renderer.js +3 -0
- package/dist/workflows/runs.js +18 -1
- package/dist/workflows/schema.js +3 -0
- package/dist/workflows/scope-key.js +3 -0
- package/dist/workflows/validator.js +5 -9
- package/docs/README.md +7 -2
- package/docs/data-and-telemetry.md +225 -0
- package/docs/migration/release-notes/0.7.5.md +2 -2
- package/docs/migration/release-notes/0.8.0.md +57 -5
- package/docs/migration/v0.7-to-v0.8.md +1378 -0
- package/package.json +28 -11
- package/.github/LICENSE +0 -374
- package/dist/commands/install-audit.js +0 -385
- package/dist/commands/vault.js +0 -310
- package/dist/indexer/match-contributors.js +0 -141
- package/dist/integrations/agent/pipeline.js +0 -39
- package/dist/integrations/agent/runners.js +0 -31
package/dist/commands/improve.js
CHANGED
|
@@ -1,26 +1,43 @@
|
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
1
4
|
import fs from "node:fs";
|
|
2
5
|
import path from "node:path";
|
|
3
6
|
import { makeAssetRef, parseAssetRef } from "../core/asset-ref";
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
7
|
+
import { daysToMs, isAssetType } from "../core/common";
|
|
8
|
+
import { getDefaultLlmConfig, loadConfig } from "../core/config";
|
|
9
|
+
import { ConfigError, NotFoundError, rethrowIfTestIsolationError, UsageError } from "../core/errors";
|
|
6
10
|
import { appendEvent, readEvents } from "../core/events";
|
|
11
|
+
import { probeLock, releaseLock, tryAcquireLockSync } from "../core/file-lock";
|
|
7
12
|
import { parseFrontmatter } from "../core/frontmatter";
|
|
13
|
+
import { detectAndWriteContradictions } from "../core/memory-contradiction-detect";
|
|
8
14
|
import { analyzeMemoryCleanup, applyMemoryCleanup, } from "../core/memory-improve";
|
|
9
15
|
import { getDbPath } from "../core/paths";
|
|
10
|
-
import { listProposals } from "../core/proposals";
|
|
16
|
+
import { createProposal, expireStaleProposals, getProposal, isProposalSkipped, listProposals, purgeOrphanProposals, } from "../core/proposals";
|
|
17
|
+
import { openStateDatabase, purgeOldEvents, purgeOldImproveRuns } from "../core/state-db";
|
|
11
18
|
import { info, warn } from "../core/warn";
|
|
12
|
-
import { closeDatabase, getAllEntries, getRetrievalCounts, getUtilityScoresByIds, getZeroResultSearches, openDatabase, openExistingDatabase, } from "../indexer/db";
|
|
19
|
+
import { closeDatabase, getAllEntries, getEntryCount, getRetrievalCounts, getUtilityScoresByIds, getZeroResultSearches, openDatabase, openExistingDatabase, } from "../indexer/db";
|
|
13
20
|
import { ensureIndex } from "../indexer/ensure-index";
|
|
14
21
|
import { runGraphExtractionPass } from "../indexer/graph-extraction";
|
|
15
22
|
import { akmIndex } from "../indexer/indexer";
|
|
16
23
|
import { runMemoryInferencePass, } from "../indexer/memory-inference";
|
|
17
24
|
import { resolveAssetPath } from "../indexer/path-resolver";
|
|
18
25
|
import { getWritableStashDirs, resolveSourceEntries } from "../indexer/search-source";
|
|
19
|
-
import {
|
|
26
|
+
import { runStalenessDetectionPass } from "../indexer/staleness-detect";
|
|
27
|
+
import { resolveImproveProcessRunnerFromProfile, resolveTriageJudgmentRunner } from "../integrations/agent/runner";
|
|
28
|
+
import { getAvailableHarnesses, getExecutionLogCandidates } from "../integrations/session-logs";
|
|
29
|
+
import { isLlmFeatureEnabled, isProcessEnabled } from "../llm/feature-gate";
|
|
30
|
+
import { isGitBackedStash, resolveWritableOverride, saveGitStash } from "../sources/providers/git";
|
|
20
31
|
import { akmConsolidate } from "./consolidate";
|
|
21
|
-
import { akmDistill, deriveLessonRef } from "./distill";
|
|
32
|
+
import { akmDistill, deriveLessonRef, isDistillRefusedInputType } from "./distill";
|
|
33
|
+
import { deriveKnowledgeRef } from "./distill-promotion-policy";
|
|
22
34
|
import { countEvalCases, writeEvalCase } from "./eval-cases";
|
|
35
|
+
import { akmExtract } from "./extract";
|
|
36
|
+
import { makeGateConfig, resolveExtractConfidence, runAutoAcceptGate } from "./improve-auto-accept";
|
|
37
|
+
import { isProfileFilteredForAllPasses, resolveImproveProfile, resolveProcessEnabled, shouldSkipRef, } from "./improve-profiles";
|
|
23
38
|
import { akmLint } from "./lint/index";
|
|
39
|
+
import { drainProposals } from "./proposal-drain";
|
|
40
|
+
import { resolveDrainPolicy } from "./proposal-drain-policies";
|
|
24
41
|
import { akmReflect } from "./reflect";
|
|
25
42
|
import { runSchemaRepairPass } from "./schema-repair";
|
|
26
43
|
import { checkDeadUrls } from "./url-checker";
|
|
@@ -33,10 +50,47 @@ function resolveImproveScope(scope) {
|
|
|
33
50
|
return { mode: "ref", value: trimmed };
|
|
34
51
|
}
|
|
35
52
|
catch {
|
|
53
|
+
if (!isAssetType(trimmed)) {
|
|
54
|
+
throw new UsageError(`Unknown asset type: "${trimmed}". Valid types: memory, knowledge, skill, lesson, workflow, agent, command, script, wiki, env, vault, task.\n` +
|
|
55
|
+
`If you passed --format to akm improve, that flag is not supported — use it with akm search or akm show instead.`, "INVALID_FLAG_VALUE");
|
|
56
|
+
}
|
|
36
57
|
return { mode: "type", value: trimmed };
|
|
37
58
|
}
|
|
38
59
|
}
|
|
39
|
-
|
|
60
|
+
/**
|
|
61
|
+
* Render the end-of-run stash-sync commit message, expanding `{token}`
|
|
62
|
+
* placeholders against this run's results. Unknown tokens are passed through
|
|
63
|
+
* verbatim so adding new tokens later never breaks an existing template, and so
|
|
64
|
+
* a literal brace in a message is harmless.
|
|
65
|
+
*
|
|
66
|
+
* Supported tokens (the "free" set — derived from data already on the result):
|
|
67
|
+
* {timestamp} `YYYY-MM-DD HH:MM:SS` (UTC)
|
|
68
|
+
* {date} `YYYY-MM-DD` (UTC)
|
|
69
|
+
* {time} `HH:MM:SS` (UTC)
|
|
70
|
+
* {scope} scope value (e.g. a ref/type) or the scope mode (`all`)
|
|
71
|
+
* {refs} number of planned refs this run processed
|
|
72
|
+
* {accepted} number of proposals auto-accepted by the confidence gate
|
|
73
|
+
*
|
|
74
|
+
* The result is still passed through `sanitizeCommitMessage` downstream in
|
|
75
|
+
* `saveGitStash`, so token values never widen the commit-message attack surface
|
|
76
|
+
* (newlines/control chars are collapsed there).
|
|
77
|
+
*
|
|
78
|
+
* `nowMs` is injected (not read from `Date.now()`) so the function is pure and
|
|
79
|
+
* deterministically testable.
|
|
80
|
+
*/
|
|
81
|
+
export function renderSyncCommitMessage(template, result, nowMs) {
|
|
82
|
+
const iso = new Date(nowMs).toISOString();
|
|
83
|
+
const tokens = {
|
|
84
|
+
timestamp: `${iso.slice(0, 10)} ${iso.slice(11, 19)}`,
|
|
85
|
+
date: iso.slice(0, 10),
|
|
86
|
+
time: iso.slice(11, 19),
|
|
87
|
+
scope: result.scope.value ?? result.scope.mode,
|
|
88
|
+
refs: String(result.plannedRefs.length),
|
|
89
|
+
accepted: String(result.gateAutoAcceptedCount ?? 0),
|
|
90
|
+
};
|
|
91
|
+
return template.replace(/\{(\w+)\}/g, (match, key) => (Object.hasOwn(tokens, key) ? tokens[key] : match));
|
|
92
|
+
}
|
|
93
|
+
async function collectEligibleRefs(scope, stashDir, improveProfile) {
|
|
40
94
|
if (scope.mode === "ref" && scope.value) {
|
|
41
95
|
const parsed = parseAssetRef(scope.value);
|
|
42
96
|
const writableDirs = new Set(getWritableStashDirs(stashDir).map((dir) => path.resolve(dir)));
|
|
@@ -45,6 +99,7 @@ async function collectEligibleRefs(scope, stashDir) {
|
|
|
45
99
|
return {
|
|
46
100
|
plannedRefs: [],
|
|
47
101
|
memorySummary: { eligible: 0, derived: 0 },
|
|
102
|
+
profileFilteredRefs: [],
|
|
48
103
|
};
|
|
49
104
|
}
|
|
50
105
|
return {
|
|
@@ -53,6 +108,7 @@ async function collectEligibleRefs(scope, stashDir) {
|
|
|
53
108
|
eligible: parsed.type === "memory" ? 1 : 0,
|
|
54
109
|
derived: parsed.type === "memory" && parsed.name.endsWith(".derived") ? 1 : 0,
|
|
55
110
|
},
|
|
111
|
+
profileFilteredRefs: [],
|
|
56
112
|
};
|
|
57
113
|
}
|
|
58
114
|
let sources;
|
|
@@ -60,10 +116,10 @@ async function collectEligibleRefs(scope, stashDir) {
|
|
|
60
116
|
sources = resolveSourceEntries(stashDir);
|
|
61
117
|
}
|
|
62
118
|
catch {
|
|
63
|
-
return { plannedRefs: [], memorySummary: { eligible: 0, derived: 0 } };
|
|
119
|
+
return { plannedRefs: [], memorySummary: { eligible: 0, derived: 0 }, profileFilteredRefs: [] };
|
|
64
120
|
}
|
|
65
121
|
if (sources.length === 0) {
|
|
66
|
-
return { plannedRefs: [], memorySummary: { eligible: 0, derived: 0 } };
|
|
122
|
+
return { plannedRefs: [], memorySummary: { eligible: 0, derived: 0 }, profileFilteredRefs: [] };
|
|
67
123
|
}
|
|
68
124
|
// Only operate on writable sources — never mutate read-only registry caches
|
|
69
125
|
// or remote stashes that the user did not mark writable.
|
|
@@ -86,30 +142,57 @@ async function collectEligibleRefs(scope, stashDir) {
|
|
|
86
142
|
return isEntryInWritableSource(indexed.stashDir, indexed.filePath, writableDirSet);
|
|
87
143
|
});
|
|
88
144
|
const planned = new Map();
|
|
145
|
+
const profileFiltered = new Map();
|
|
89
146
|
let memoryEligible = 0;
|
|
90
147
|
let memoryDerived = 0;
|
|
91
148
|
for (const indexed of entries) {
|
|
92
149
|
const ref = makeAssetRef(indexed.entry.type, indexed.entry.name);
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
150
|
+
const isDerived = indexed.entry.name.endsWith(".derived");
|
|
151
|
+
// `.derived` memories are LLM-inferred and intentionally skip reflect
|
|
152
|
+
// (see the synthetic `derived-memory-reflect-skipped` branch in the
|
|
153
|
+
// improve loop). Enqueueing them here just produced one synthetic skip
|
|
154
|
+
// per derived memory per hour with no real work — pure churn observed
|
|
155
|
+
// 2026-05-21: 11 derived refs re-planned every hour during idle periods.
|
|
156
|
+
// The cleanup phase (analyzeMemoryCleanup) inspects derived memories
|
|
157
|
+
// independently of `plannedRefs`, so dropping them here loses nothing.
|
|
158
|
+
if (!isDerived && !planned.has(ref) && !profileFiltered.has(ref)) {
|
|
159
|
+
// 2026-05-27: extend the .derived precedent to profile-incompatible
|
|
160
|
+
// refs. If every per-ref pass (reflect + distill) on the active
|
|
161
|
+
// profile would refuse this ref, drop it from `plannedRefs`. The
|
|
162
|
+
// caller emits `improve_skipped { reason: profile_filtered_all_passes }`
|
|
163
|
+
// once `eventsCtx` is available so the audit trail is preserved in a
|
|
164
|
+
// single event per ref instead of 2× synthetic actions per run.
|
|
165
|
+
// Background: see /tmp/akm-health-investigations/planner-profile-metrics-deep-analysis.md
|
|
166
|
+
if (improveProfile && isProfileFilteredForAllPasses(ref, improveProfile)) {
|
|
167
|
+
profileFiltered.set(ref, {
|
|
168
|
+
ref,
|
|
169
|
+
reason: "profile_filtered_all_passes",
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
else {
|
|
173
|
+
planned.set(ref, {
|
|
174
|
+
ref,
|
|
175
|
+
reason: scope.mode === "type" ? "scope-type" : indexed.entry.type === "memory" ? "memory-cleanup" : "scope-type",
|
|
176
|
+
});
|
|
177
|
+
}
|
|
98
178
|
}
|
|
99
179
|
if (indexed.entry.type === "memory") {
|
|
100
180
|
memoryEligible += 1;
|
|
101
|
-
if (
|
|
181
|
+
if (isDerived)
|
|
102
182
|
memoryDerived += 1;
|
|
103
183
|
}
|
|
104
184
|
}
|
|
105
185
|
return {
|
|
106
186
|
plannedRefs: [...planned.values()],
|
|
107
187
|
memorySummary: { eligible: memoryEligible, derived: memoryDerived },
|
|
188
|
+
profileFilteredRefs: [...profileFiltered.values()],
|
|
108
189
|
};
|
|
109
190
|
}
|
|
110
191
|
catch (error) {
|
|
192
|
+
// The bun-test isolation guard must never be downgraded to "empty plan".
|
|
193
|
+
rethrowIfTestIsolationError(error);
|
|
111
194
|
if (error instanceof NotFoundError || error instanceof Error) {
|
|
112
|
-
return { plannedRefs: [], memorySummary: { eligible: 0, derived: 0 } };
|
|
195
|
+
return { plannedRefs: [], memorySummary: { eligible: 0, derived: 0 }, profileFilteredRefs: [] };
|
|
113
196
|
}
|
|
114
197
|
throw error;
|
|
115
198
|
}
|
|
@@ -172,15 +255,37 @@ function memoryCleanupParentRef(scope, stashDir) {
|
|
|
172
255
|
}
|
|
173
256
|
return makeAssetRef("memory", parsed.name.slice(0, -".derived".length));
|
|
174
257
|
}
|
|
175
|
-
function filterRemovedPlannedRefs(plannedRefs, archivedRefs) {
|
|
176
|
-
if (archivedRefs.length === 0)
|
|
177
|
-
return plannedRefs;
|
|
178
|
-
const removed = new Set(archivedRefs);
|
|
179
|
-
return plannedRefs.filter((planned) => !removed.has(planned.ref));
|
|
180
|
-
}
|
|
181
258
|
function isLessonCandidate(ref) {
|
|
259
|
+
// Only lesson assets need lesson-schema validation (description + when_to_use).
|
|
260
|
+
// Memories have their own distill path via shouldDistillMemoryRef.
|
|
261
|
+
// All other types go through reflect, not distill.
|
|
262
|
+
return parseAssetRef(ref).type === "lesson";
|
|
263
|
+
}
|
|
264
|
+
/**
|
|
265
|
+
* Planner-side check: should this ref enter the distill queue?
|
|
266
|
+
*
|
|
267
|
+
* Distill produces lessons from non-lesson sources. Two cases are eligible:
|
|
268
|
+
*
|
|
269
|
+
* 1. Memory refs that pass {@link shouldDistillMemoryRef} (the existing
|
|
270
|
+
* memory→lesson/knowledge promotion path).
|
|
271
|
+
*
|
|
272
|
+
* Refs whose `type` is in {@link DISTILL_REFUSED_INPUT_TYPES} (currently
|
|
273
|
+
* `lesson:*`) are explicitly excluded — distill refuses them at runtime and
|
|
274
|
+
* queuing them just produces a no-op `skipped` outcome per ref per hour. That
|
|
275
|
+
* planner waste was the bug fixed in commit
|
|
276
|
+
* fix(improve): drop distill-refused types from planner.
|
|
277
|
+
*
|
|
278
|
+
* Note: prior to this fix the gate used `isLessonCandidate(ref)` directly,
|
|
279
|
+
* which was true *only* for `lesson:*` refs — exactly the set distill refuses.
|
|
280
|
+
* The result: every hourly run re-queued the same lesson refs, the same skip
|
|
281
|
+
* message returned, and no work was ever done. See
|
|
282
|
+
* `tests/commands/improve-distill-planner-skip-lessons.test.ts`.
|
|
283
|
+
*/
|
|
284
|
+
function isDistillCandidateRef(ref, stashDir) {
|
|
182
285
|
const parsed = parseAssetRef(ref);
|
|
183
|
-
|
|
286
|
+
if (isDistillRefusedInputType(parsed.type))
|
|
287
|
+
return false;
|
|
288
|
+
return shouldDistillMemoryRef(ref, stashDir);
|
|
184
289
|
}
|
|
185
290
|
function shouldDistillMemoryRef(ref, stashDir) {
|
|
186
291
|
const parsed = parseAssetRef(ref);
|
|
@@ -200,13 +305,115 @@ function shouldDistillMemoryRef(ref, stashDir) {
|
|
|
200
305
|
}
|
|
201
306
|
return !parsed.name.endsWith(".derived");
|
|
202
307
|
}
|
|
308
|
+
// ── Signal-delta eligibility helpers (0.8.0) ────────────────────────────────
|
|
309
|
+
//
|
|
310
|
+
// The 0.8.0 redesign replaced flat time-based cooldowns for reflect/distill
|
|
311
|
+
// with a *signal-delta* gate: a ref is re-eligible iff new feedback has
|
|
312
|
+
// landed since the last proposal was generated for it. These helpers build
|
|
313
|
+
// the two timestamp maps the gate needs in bulk, so the planner avoids
|
|
314
|
+
// N+1 queries across the full postCleanupRefs set.
|
|
315
|
+
/**
|
|
316
|
+
* Latest feedback event timestamp per ref in the active window. Reads all
|
|
317
|
+
* `feedback` events newer than `sinceIso` in one query and indexes by ref,
|
|
318
|
+
* keeping the maximum `ts` per ref.
|
|
319
|
+
*
|
|
320
|
+
* Only events with a meaningful payload count as "signal" — `metadata.signal`
|
|
321
|
+
* (positive/negative) OR `metadata.note` (a free-form annotation). Empty
|
|
322
|
+
* metadata events are ignored so a stray `akm feedback <ref>` invocation
|
|
323
|
+
* without a flag doesn't trigger downstream re-processing.
|
|
324
|
+
*/
|
|
325
|
+
function buildLatestFeedbackTsMap(refs, sinceIso) {
|
|
326
|
+
const out = new Map();
|
|
327
|
+
if (refs.length === 0)
|
|
328
|
+
return out;
|
|
329
|
+
const refSet = new Set(refs);
|
|
330
|
+
const { events } = readEvents({ type: "feedback", since: sinceIso });
|
|
331
|
+
for (const e of events) {
|
|
332
|
+
const ref = e.ref;
|
|
333
|
+
if (!ref || !refSet.has(ref))
|
|
334
|
+
continue;
|
|
335
|
+
const meta = e.metadata;
|
|
336
|
+
const hasSignal = meta !== undefined && (typeof meta.signal === "string" || typeof meta.note === "string");
|
|
337
|
+
if (!hasSignal)
|
|
338
|
+
continue;
|
|
339
|
+
const ts = e.ts ?? "";
|
|
340
|
+
if (ts > (out.get(ref) ?? ""))
|
|
341
|
+
out.set(ref, ts);
|
|
342
|
+
}
|
|
343
|
+
return out;
|
|
344
|
+
}
|
|
345
|
+
/**
|
|
346
|
+
* Latest proposal timestamp per input-ref, filtered by source ('reflect' or
|
|
347
|
+
* 'distill'). Reads the corresponding `*_invoked` events from state.db —
|
|
348
|
+
* these events are emitted at proposal creation time and carry the *input*
|
|
349
|
+
* asset ref (memory:foo, skill:bar, etc.) directly. We use them rather than
|
|
350
|
+
* `listProposals` because distill proposals are keyed by the derived
|
|
351
|
+
* lesson/knowledge ref, not the source memory — joining back through the
|
|
352
|
+
* payload would be fragile.
|
|
353
|
+
*/
|
|
354
|
+
function buildLatestProposalTsMap(refs, source) {
|
|
355
|
+
const out = new Map();
|
|
356
|
+
if (refs.length === 0)
|
|
357
|
+
return out;
|
|
358
|
+
const refSet = new Set(refs);
|
|
359
|
+
const eventType = source === "reflect" ? "reflect_invoked" : "distill_invoked";
|
|
360
|
+
const { events } = readEvents({ type: eventType });
|
|
361
|
+
for (const e of events) {
|
|
362
|
+
const ref = e.ref;
|
|
363
|
+
if (!ref || !refSet.has(ref))
|
|
364
|
+
continue;
|
|
365
|
+
// For distill_invoked we only count attempts that produced (or attempted
|
|
366
|
+
// to produce) a real proposal — config_disabled / parse-error outcomes
|
|
367
|
+
// should not move the signal-delta cursor forward.
|
|
368
|
+
if (eventType === "distill_invoked") {
|
|
369
|
+
const outcome = e.metadata?.outcome;
|
|
370
|
+
if (outcome !== "queued" && outcome !== "skipped" && outcome !== "validation_failed")
|
|
371
|
+
continue;
|
|
372
|
+
}
|
|
373
|
+
const ts = e.ts ?? "";
|
|
374
|
+
if (ts > (out.get(ref) ?? ""))
|
|
375
|
+
out.set(ref, ts);
|
|
376
|
+
}
|
|
377
|
+
return out;
|
|
378
|
+
}
|
|
379
|
+
/**
|
|
380
|
+
* Signal-delta eligibility predicate.
|
|
381
|
+
*
|
|
382
|
+
* True iff `latestFeedback[ref]` is defined AND either no prior proposal
|
|
383
|
+
* exists for this (ref, source) OR `latestFeedback[ref] > lastProposal[ref]`.
|
|
384
|
+
*
|
|
385
|
+
* Refs with no feedback signal at all are ineligible by definition — the
|
|
386
|
+
* high-retrieval fallback path (see `noFeedbackCandidates` later in the
|
|
387
|
+
* planner) handles never-touched-but-frequently-read assets separately.
|
|
388
|
+
*/
|
|
389
|
+
function isSignalDeltaEligible(ref, latestFeedback, lastProposal) {
|
|
390
|
+
const fb = latestFeedback.get(ref);
|
|
391
|
+
if (!fb)
|
|
392
|
+
return false;
|
|
393
|
+
const lp = lastProposal.get(ref);
|
|
394
|
+
if (!lp)
|
|
395
|
+
return true;
|
|
396
|
+
return fb > lp;
|
|
397
|
+
}
|
|
203
398
|
export async function akmImprove(options = {}) {
|
|
204
399
|
const scope = resolveImproveScope(options.scope);
|
|
205
|
-
const { plannedRefs, memorySummary } = await collectEligibleRefs(scope, options.stashDir);
|
|
206
400
|
const reflectFn = options.reflectFn ?? akmReflect;
|
|
207
401
|
const distillFn = options.distillFn ?? akmDistill;
|
|
208
402
|
const ensureIndexFn = options.ensureIndexFn ?? ensureIndex;
|
|
209
403
|
const reindexFn = options.reindexFn ?? akmIndex;
|
|
404
|
+
const drainProposalsFn = options.drainProposalsFn ?? drainProposals;
|
|
405
|
+
// Resolve the improve profile for this run. Profile drives type filtering,
|
|
406
|
+
// process gating, and default autoAccept/limit values.
|
|
407
|
+
const _earlyConfig = options.config ?? loadConfig();
|
|
408
|
+
const improveProfile = resolveImproveProfile(options.profile, _earlyConfig);
|
|
409
|
+
// Apply profile defaults — CLI flags take precedence over profile defaults.
|
|
410
|
+
// Rebuild options with effective values so all downstream stage functions
|
|
411
|
+
// automatically pick up the profile-driven defaults.
|
|
412
|
+
options = {
|
|
413
|
+
...options,
|
|
414
|
+
autoAccept: options.autoAccept ?? improveProfile.autoAccept,
|
|
415
|
+
limit: options.limit ?? improveProfile.limit,
|
|
416
|
+
};
|
|
210
417
|
let primaryStashDir;
|
|
211
418
|
try {
|
|
212
419
|
primaryStashDir = resolveSourceEntries(options.stashDir)[0]?.path;
|
|
@@ -214,63 +421,284 @@ export async function akmImprove(options = {}) {
|
|
|
214
421
|
catch {
|
|
215
422
|
primaryStashDir = undefined;
|
|
216
423
|
}
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
: undefined;
|
|
224
|
-
if (options.dryRun) {
|
|
225
|
-
const result = {
|
|
226
|
-
schemaVersion: 1,
|
|
227
|
-
ok: true,
|
|
228
|
-
scope,
|
|
229
|
-
dryRun: true,
|
|
230
|
-
...(guidance ? { guidance } : {}),
|
|
231
|
-
memorySummary,
|
|
232
|
-
...(memoryCleanupPlan ? { memoryCleanup: shapeMemoryCleanup(memoryCleanupPlan) } : {}),
|
|
233
|
-
plannedRefs,
|
|
234
|
-
};
|
|
235
|
-
return result;
|
|
236
|
-
}
|
|
424
|
+
// Phase 4 lock hoist (§7): the `improve.lock` setup is hoisted ABOVE
|
|
425
|
+
// ensureIndex/collectEligibleRefs so the triage pre-pass (and improve's own
|
|
426
|
+
// queue writes) run fully serialized under the lock. The dry-run early-return
|
|
427
|
+
// below still skips the lock and triage (the lock+triage block is gated on
|
|
428
|
+
// `!options.dryRun`); contradiction-detection and memory-cleanup analysis,
|
|
429
|
+
// which previously ran before the lock, now sit after it for free.
|
|
237
430
|
const resolvedLockPath = primaryStashDir
|
|
238
431
|
? path.join(primaryStashDir, ".akm", "improve.lock")
|
|
239
432
|
: path.join(options.stashDir ?? ".", ".akm", "improve.lock");
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
433
|
+
const MAX_LOCK_AGE_MS = 4 * 60 * 60 * 1000; // 4 hours
|
|
434
|
+
const acquireLock = () => {
|
|
435
|
+
fs.mkdirSync(path.dirname(resolvedLockPath), { recursive: true });
|
|
436
|
+
const lockPayload = () => JSON.stringify({ pid: process.pid, startedAt: new Date().toISOString() });
|
|
437
|
+
if (tryAcquireLockSync(resolvedLockPath, lockPayload()))
|
|
438
|
+
return;
|
|
439
|
+
// Lock file already exists — probe to determine whether it's still held
|
|
440
|
+
// or whether the prior run died without cleaning up.
|
|
441
|
+
const probe = probeLock(resolvedLockPath, { staleAfterMs: MAX_LOCK_AGE_MS });
|
|
442
|
+
const rawContent = probe.state === "absent" ? undefined : probe.rawContent;
|
|
443
|
+
const lock = rawContent
|
|
444
|
+
? (() => {
|
|
445
|
+
try {
|
|
446
|
+
return JSON.parse(rawContent);
|
|
447
|
+
}
|
|
448
|
+
catch {
|
|
449
|
+
return null;
|
|
450
|
+
}
|
|
451
|
+
})()
|
|
452
|
+
: null;
|
|
453
|
+
if (probe.state === "stale") {
|
|
454
|
+
// O-7 / #394: Emit improve_lock_recovered event before recovery so the
|
|
455
|
+
// audit trail records the abnormal prior-run exit (Temporal/Airflow pattern).
|
|
456
|
+
try {
|
|
457
|
+
appendEvent({
|
|
458
|
+
eventType: "improve_lock_recovered",
|
|
459
|
+
metadata: {
|
|
460
|
+
stalePid: lock?.pid ?? null,
|
|
461
|
+
lockedAt: lock?.startedAt ?? null,
|
|
462
|
+
recoveredAt: new Date().toISOString(),
|
|
463
|
+
lockAgeMs: probe.ageMs ?? null,
|
|
464
|
+
reason: probe.reason === "pid_dead" ? "pid_not_alive" : probe.reason,
|
|
465
|
+
},
|
|
466
|
+
});
|
|
467
|
+
}
|
|
468
|
+
catch {
|
|
469
|
+
/* event emission is best-effort; never block lock recovery */
|
|
470
|
+
}
|
|
471
|
+
releaseLock(resolvedLockPath);
|
|
472
|
+
if (tryAcquireLockSync(resolvedLockPath, lockPayload()))
|
|
473
|
+
return;
|
|
474
|
+
throw new ConfigError(`akm improve is already running. Delete ${resolvedLockPath} to force.`, "INVALID_CONFIG_FILE");
|
|
475
|
+
}
|
|
476
|
+
throw new ConfigError(`akm improve is already running (PID ${lock?.pid}, started ${lock?.startedAt}). Delete ${resolvedLockPath} to force.`, "INVALID_CONFIG_FILE");
|
|
477
|
+
};
|
|
478
|
+
// Phase 4 lock-leak guard (§7 ordering hazard): hoisting `improve.lock` above
|
|
479
|
+
// the pre-index region (so the triage pre-pass runs under it) means the lock is
|
|
480
|
+
// held while ensureIndex / collectEligibleRefs / contradiction-detection /
|
|
481
|
+
// memory-cleanup analysis run — but the main protecting `try { … } finally {
|
|
482
|
+
// unlinkSync(resolvedLockPath) }` does not begin until after them. A throw in
|
|
483
|
+
// any of those steps would leak the lock. We close that window by wrapping the
|
|
484
|
+
// whole region in a try whose catch releases the lock (when held) and
|
|
485
|
+
// re-throws. The values this region computes are declared in the outer scope so
|
|
486
|
+
// they remain visible to the main run below. The dry-run path never sets
|
|
487
|
+
// `lockAcquired`, so its early return releases nothing.
|
|
488
|
+
let lockAcquired = false;
|
|
489
|
+
const releaseLockOnError = () => {
|
|
490
|
+
if (!lockAcquired)
|
|
491
|
+
return;
|
|
243
492
|
try {
|
|
244
|
-
|
|
493
|
+
fs.unlinkSync(resolvedLockPath);
|
|
245
494
|
}
|
|
246
495
|
catch {
|
|
247
|
-
|
|
496
|
+
// best-effort release on the error path
|
|
248
497
|
}
|
|
249
|
-
|
|
498
|
+
lockAcquired = false;
|
|
499
|
+
};
|
|
500
|
+
const preEnsureCleanupWarnings = [];
|
|
501
|
+
let plannedRefs;
|
|
502
|
+
let memorySummary;
|
|
503
|
+
let profileFilteredRefs;
|
|
504
|
+
let memoryCleanupPlan;
|
|
505
|
+
let guidance;
|
|
506
|
+
try {
|
|
507
|
+
// Acquire the lock and run the triage pre-pass for non-dry-run executions.
|
|
508
|
+
// The dry-run branch below produces plannedRefs/memorySummary WITHOUT the lock
|
|
509
|
+
// or triage (decision: dry-run never mutates the queue).
|
|
510
|
+
if (!options.dryRun) {
|
|
511
|
+
acquireLock();
|
|
512
|
+
lockAcquired = true;
|
|
513
|
+
// Phase 4 triage pre-pass (§7, §13): drain the standing pending backlog
|
|
514
|
+
// BEFORE ensureIndex so improve generates fresh proposals against a cleared
|
|
515
|
+
// queue (no `duplicate_pending` collisions) and ensureIndex absorbs triage's
|
|
516
|
+
// promotions for free. Gated on the triage process being enabled (opt-in,
|
|
517
|
+
// defaults off) and on a whole-stash / type-scoped run — a single-ref
|
|
518
|
+
// `akm improve skill:x` must never drain the whole queue. Best-effort: a
|
|
519
|
+
// triage failure is a non-fatal warning, never an abort (mirrors the
|
|
520
|
+
// contradiction-detection pass below).
|
|
521
|
+
if (primaryStashDir && resolveProcessEnabled("triage", improveProfile)) {
|
|
522
|
+
if (scope.mode === "ref") {
|
|
523
|
+
warn("[improve] triage pre-pass skipped (single-ref scope never drains the whole queue)");
|
|
524
|
+
}
|
|
525
|
+
else {
|
|
526
|
+
try {
|
|
527
|
+
const triageConfig = improveProfile.processes?.triage;
|
|
528
|
+
const policy = resolveDrainPolicy(triageConfig?.policy);
|
|
529
|
+
const applyMode = triageConfig?.applyMode ?? "queue";
|
|
530
|
+
const maxAccepts = triageConfig?.maxAcceptsPerRun ?? 25;
|
|
531
|
+
const judgment = triageConfig?.judgment
|
|
532
|
+
? resolveTriageJudgmentRunner(triageConfig.judgment, _earlyConfig)
|
|
533
|
+
: null;
|
|
534
|
+
await drainProposalsFn({
|
|
535
|
+
stashDir: primaryStashDir,
|
|
536
|
+
policy,
|
|
537
|
+
applyMode,
|
|
538
|
+
maxAccepts,
|
|
539
|
+
dryRun: false,
|
|
540
|
+
// No fresh ids exist yet — triage runs before improve generates any.
|
|
541
|
+
excludeIds: new Set(),
|
|
542
|
+
...(triageConfig?.maxDiffLines !== undefined ? { maxDiffLines: triageConfig.maxDiffLines } : {}),
|
|
543
|
+
judgment,
|
|
544
|
+
});
|
|
545
|
+
}
|
|
546
|
+
catch (err) {
|
|
547
|
+
// Non-fatal: triage is a best-effort pre-pass and must never abort improve.
|
|
548
|
+
warn(`[improve] triage pre-pass failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
// #339 fix: ensureIndex MUST run BEFORE collectEligibleRefs. The eligible-ref
|
|
554
|
+
// query reads the `entries` table; if a DB version upgrade just dropped that
|
|
555
|
+
// table (or the index is otherwise empty), the prior run order silently
|
|
556
|
+
// returned plannedRefs=[] and the improve loop no-op'd. Hoisting the call
|
|
557
|
+
// here repopulates the index first so the subsequent query sees fresh data.
|
|
558
|
+
if (primaryStashDir) {
|
|
559
|
+
// Probe pre-ensureIndex entry count to drive the loud-fail warning below.
|
|
560
|
+
// Best-effort: a missing DB / unreadable schema is the fresh-install case
|
|
561
|
+
// and not a bug — we silently skip the probe.
|
|
562
|
+
let preEnsureEntryCount;
|
|
250
563
|
try {
|
|
251
|
-
|
|
252
|
-
|
|
564
|
+
const dbPath = getDbPath();
|
|
565
|
+
if (fs.existsSync(dbPath)) {
|
|
566
|
+
const probeDb = openExistingDatabase();
|
|
567
|
+
try {
|
|
568
|
+
preEnsureEntryCount = getEntryCount(probeDb);
|
|
569
|
+
}
|
|
570
|
+
finally {
|
|
571
|
+
closeDatabase(probeDb);
|
|
572
|
+
}
|
|
573
|
+
}
|
|
253
574
|
}
|
|
254
575
|
catch (err) {
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
576
|
+
rethrowIfTestIsolationError(err);
|
|
577
|
+
// best-effort; leave preEnsureEntryCount undefined
|
|
578
|
+
}
|
|
579
|
+
try {
|
|
580
|
+
await ensureIndexFn(primaryStashDir);
|
|
581
|
+
}
|
|
582
|
+
catch (err) {
|
|
583
|
+
preEnsureCleanupWarnings.push(`ensureIndex failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
584
|
+
}
|
|
585
|
+
// #339 loud-fail: if the index was empty pre-ensureIndex but is now
|
|
586
|
+
// populated, a version-upgrade-triggered rebuild just happened. Surface
|
|
587
|
+
// that on stderr so the improve run is not silently masked by stale
|
|
588
|
+
// index state. Zero-before AND zero-after is the empty-stash case and
|
|
589
|
+
// is intentionally not warned (not a bug).
|
|
590
|
+
if (preEnsureEntryCount === 0) {
|
|
591
|
+
try {
|
|
592
|
+
const probeDb = openExistingDatabase();
|
|
593
|
+
let postCount = 0;
|
|
594
|
+
try {
|
|
595
|
+
postCount = getEntryCount(probeDb);
|
|
596
|
+
}
|
|
597
|
+
finally {
|
|
598
|
+
closeDatabase(probeDb);
|
|
599
|
+
}
|
|
600
|
+
if (postCount > 0) {
|
|
601
|
+
warn("[improve] index was empty after DB version upgrade — repopulating before continuing");
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
catch (err) {
|
|
605
|
+
rethrowIfTestIsolationError(err);
|
|
606
|
+
// best-effort
|
|
607
|
+
}
|
|
258
608
|
}
|
|
259
609
|
}
|
|
260
|
-
|
|
610
|
+
({ plannedRefs, memorySummary, profileFilteredRefs } = await collectEligibleRefs(scope, options.stashDir, improveProfile));
|
|
611
|
+
const cleanupParentRef = memoryCleanupParentRef(scope, options.stashDir);
|
|
612
|
+
// M-1 (#367): Run contradiction-detection BEFORE analyzeMemoryCleanup so
|
|
613
|
+
// the SCC resolver in resolveFamilyContradictions has edges to work on.
|
|
614
|
+
// Best-effort: failures are warnings, never fatal.
|
|
615
|
+
if (primaryStashDir && shouldAnalyzeMemoryCleanup(scope, memorySummary.eligible, primaryStashDir)) {
|
|
261
616
|
try {
|
|
262
|
-
|
|
617
|
+
// Reuse the config resolved at the top of the run instead of a second load.
|
|
618
|
+
await detectAndWriteContradictions(primaryStashDir, _earlyConfig);
|
|
263
619
|
}
|
|
264
|
-
catch {
|
|
265
|
-
//
|
|
620
|
+
catch (err) {
|
|
621
|
+
// Non-fatal: contradiction detection is a best-effort pass.
|
|
622
|
+
warn(`[improve] contradiction detection failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
|
|
266
623
|
}
|
|
267
624
|
}
|
|
625
|
+
memoryCleanupPlan = shouldAnalyzeMemoryCleanup(scope, memorySummary.eligible, primaryStashDir)
|
|
626
|
+
? analyzeMemoryCleanup(primaryStashDir, cleanupParentRef ? { parentRef: cleanupParentRef } : undefined)
|
|
627
|
+
: undefined;
|
|
628
|
+
guidance =
|
|
629
|
+
memorySummary.eligible > 0
|
|
630
|
+
? "Improve folds memory cleanup into the same proposal queue: speculative promotions still go through reflect/distill proposals, while high-confidence redundant derived memories are moved into a recoverable cleanup archive instead of being left active in the stash."
|
|
631
|
+
: undefined;
|
|
632
|
+
if (options.dryRun) {
|
|
633
|
+
const result = {
|
|
634
|
+
schemaVersion: 1,
|
|
635
|
+
ok: true,
|
|
636
|
+
scope,
|
|
637
|
+
dryRun: true,
|
|
638
|
+
...(guidance ? { guidance } : {}),
|
|
639
|
+
memorySummary,
|
|
640
|
+
...(memoryCleanupPlan ? { memoryCleanup: shapeMemoryCleanup(memoryCleanupPlan) } : {}),
|
|
641
|
+
plannedRefs,
|
|
642
|
+
...(profileFilteredRefs.length > 0 ? { profileFilteredRefs } : {}),
|
|
643
|
+
};
|
|
644
|
+
return result;
|
|
645
|
+
}
|
|
268
646
|
}
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
647
|
+
catch (err) {
|
|
648
|
+
releaseLockOnError();
|
|
649
|
+
throw err;
|
|
650
|
+
}
|
|
651
|
+
// FIX 2 (lock-leak window): everything from here on runs UNDER the lock that
|
|
652
|
+
// `acquireLock()` just took. The single `try { … } finally { unlinkSync(lock) }`
|
|
653
|
+
// below now spans the budget-timer setup, `openStateDatabase()`, and the
|
|
654
|
+
// `profileFilteredRefs` audit-event loop too — regions that previously sat in
|
|
655
|
+
// the gap between the lock-acquire catch (above) and the main try. A throw in
|
|
656
|
+
// any of them used to leak the lock (blocking the next improve up to 4h);
|
|
657
|
+
// now the finally releases it exactly once. The dry-run path already returned
|
|
658
|
+
// above without acquiring the lock, so it never reaches this finally; the
|
|
659
|
+
// best-effort `unlinkSync` is a no-op when no lock file exists.
|
|
272
660
|
const startMs = Date.now();
|
|
661
|
+
const budgetMs = options.timeoutMs ?? 2 * 60 * 60 * 1000; // default 2 hours
|
|
662
|
+
// O-1 (#364): Create a shared AbortController derived from startMs + budgetMs.
|
|
663
|
+
// Every async seam receives this signal so a hung sub-call cannot extend the
|
|
664
|
+
// run past the declared budget.
|
|
665
|
+
// References: Anthropic *Building Effective Agents* (2024); CoALA §5 (arXiv:2309.02427).
|
|
666
|
+
const budgetAbortController = new AbortController();
|
|
667
|
+
// Declared in the outer scope so the `finally` can clear the timer even if a
|
|
668
|
+
// throw occurs before/after it is armed. Defaults to a no-op until armed.
|
|
669
|
+
let clearBudgetTimer = () => { };
|
|
670
|
+
// I1: open a single state.db connection for the entire improve run so all
|
|
671
|
+
// appendEvent calls reuse one handle instead of open/migrate/close per call.
|
|
672
|
+
let eventsDb;
|
|
673
|
+
// `eventsCtx` is read by the main catch (improve_failed) and finally, so it
|
|
674
|
+
// lives in the outer scope. It is always assigned at the top of the try.
|
|
675
|
+
let eventsCtx = {};
|
|
273
676
|
try {
|
|
677
|
+
const budgetTimer = setTimeout(() => budgetAbortController.abort("improve budget exhausted"), budgetMs);
|
|
678
|
+
// Clear the timer when the run ends to avoid keeping the event loop alive.
|
|
679
|
+
clearBudgetTimer = () => clearTimeout(budgetTimer);
|
|
680
|
+
try {
|
|
681
|
+
eventsDb = openStateDatabase();
|
|
682
|
+
eventsCtx = { db: eventsDb };
|
|
683
|
+
}
|
|
684
|
+
catch (err) {
|
|
685
|
+
rethrowIfTestIsolationError(err);
|
|
686
|
+
// If we cannot open state.db up-front, fall back to per-call opens.
|
|
687
|
+
eventsCtx = {};
|
|
688
|
+
}
|
|
689
|
+
// 2026-05-27: emit `improve_skipped` audit events for refs the planner
|
|
690
|
+
// pre-filtered (reflect AND distill both refuse them under the active
|
|
691
|
+
// profile). One event per ref so the existing improve_skipped histogram in
|
|
692
|
+
// `health.ts#improveSummary.skipReasons` accumulates the right count under
|
|
693
|
+
// the new `profile_filtered_all_passes` reason code. See
|
|
694
|
+
// `/tmp/akm-health-investigations/planner-profile-metrics-deep-analysis.md`.
|
|
695
|
+
for (const filtered of profileFilteredRefs) {
|
|
696
|
+
appendEvent({
|
|
697
|
+
eventType: "improve_skipped",
|
|
698
|
+
ref: filtered.ref,
|
|
699
|
+
metadata: { reason: "profile_filtered_all_passes" },
|
|
700
|
+
}, eventsCtx);
|
|
701
|
+
}
|
|
274
702
|
const preparation = await runImprovePreparationStage({
|
|
275
703
|
scope,
|
|
276
704
|
options,
|
|
@@ -278,12 +706,25 @@ export async function akmImprove(options = {}) {
|
|
|
278
706
|
memoryCleanupPlan,
|
|
279
707
|
primaryStashDir,
|
|
280
708
|
memorySummary,
|
|
281
|
-
ensureIndexFn,
|
|
282
709
|
reindexFn,
|
|
283
710
|
startMs,
|
|
284
711
|
budgetMs,
|
|
712
|
+
eventsCtx,
|
|
713
|
+
initialCleanupWarnings: preEnsureCleanupWarnings,
|
|
714
|
+
improveProfile,
|
|
285
715
|
});
|
|
286
|
-
|
|
716
|
+
// D6: pre-load all proposal_rejected events from the last 30 days once,
|
|
717
|
+
// so the per-asset loop can use a Map lookup instead of N DB round trips.
|
|
718
|
+
const REJECTED_PROPOSAL_WINDOW_MS = daysToMs(30);
|
|
719
|
+
const rejectedProposalSince = new Date(Date.now() - REJECTED_PROPOSAL_WINDOW_MS).toISOString();
|
|
720
|
+
const allRejectedProposalEvents = readEvents({ type: "proposal_rejected", since: rejectedProposalSince }).events;
|
|
721
|
+
const rejectedProposalsByRef = new Map();
|
|
722
|
+
for (const e of allRejectedProposalEvents) {
|
|
723
|
+
if (e.ref && (!rejectedProposalsByRef.has(e.ref) || e.ts > (rejectedProposalsByRef.get(e.ref)?.ts ?? ""))) {
|
|
724
|
+
rejectedProposalsByRef.set(e.ref, e);
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
const { reflectsWithErrorContext, memoryRefsForInference, gateAutoAcceptedCount: loopGateCount, } = await runImproveLoopStage({
|
|
287
728
|
scope,
|
|
288
729
|
options,
|
|
289
730
|
primaryStashDir,
|
|
@@ -293,11 +734,16 @@ export async function akmImprove(options = {}) {
|
|
|
293
734
|
actions: preparation.actions,
|
|
294
735
|
signalBearingSet: preparation.signalBearingSet,
|
|
295
736
|
distillCooledRefs: preparation.distillCooledRefs,
|
|
737
|
+
distillOnlyRefs: preparation.distillOnlyRefs,
|
|
296
738
|
recentErrors: preparation.recentErrors,
|
|
739
|
+
rejectedProposalsByRef,
|
|
740
|
+
utilityMap: preparation.utilityMap,
|
|
297
741
|
startMs,
|
|
298
742
|
budgetMs,
|
|
743
|
+
eventsCtx,
|
|
744
|
+
improveProfile,
|
|
299
745
|
});
|
|
300
|
-
const { allWarnings, consolidation, deadUrls, memoryInference, graphExtraction, maintenanceActions } = await runImprovePostLoopStage({
|
|
746
|
+
const { allWarnings, consolidation, deadUrls, memoryInference, graphExtraction, stalenessDetection, maintenanceActions, memoryInferenceDurationMs, graphExtractionDurationMs, orphansPurged, proposalsExpired, gateAutoAcceptedCount: postLoopGateCount, } = await runImprovePostLoopStage({
|
|
301
747
|
scope,
|
|
302
748
|
options,
|
|
303
749
|
primaryStashDir,
|
|
@@ -307,6 +753,10 @@ export async function akmImprove(options = {}) {
|
|
|
307
753
|
memorySummary,
|
|
308
754
|
memoryRefsForInference,
|
|
309
755
|
reindexFn,
|
|
756
|
+
eventsCtx,
|
|
757
|
+
// O-1 (#364): propagate wall-clock budget signal to post-loop maintenance.
|
|
758
|
+
budgetSignal: budgetAbortController.signal,
|
|
759
|
+
improveProfile,
|
|
310
760
|
});
|
|
311
761
|
const finalActions = maintenanceActions && maintenanceActions.length > 0
|
|
312
762
|
? [...preparation.actions, ...maintenanceActions]
|
|
@@ -340,39 +790,146 @@ export async function akmImprove(options = {}) {
|
|
|
340
790
|
}
|
|
341
791
|
: {}),
|
|
342
792
|
plannedRefs: preparation.actionableRefs,
|
|
793
|
+
...(profileFilteredRefs.length > 0 ? { profileFilteredRefs } : {}),
|
|
343
794
|
actions: finalActions,
|
|
344
795
|
...(preparation.validationFailures.length > 0 ? { validationFailures: preparation.validationFailures } : {}),
|
|
345
796
|
...(preparation.schemaRepairs.length > 0 ? { schemaRepairs: preparation.schemaRepairs } : {}),
|
|
346
797
|
...(consolidation.processed > 0 || consolidation.warnings.length > 0 ? { consolidation } : {}),
|
|
347
798
|
...(preparation.lintSummary !== undefined ? { lintSummary: preparation.lintSummary } : {}),
|
|
348
799
|
...(preparation.memoryIndexHealth !== undefined ? { memoryIndexHealth: preparation.memoryIndexHealth } : {}),
|
|
349
|
-
feedbackRatioUsed: preparation.feedbackRatioUsed,
|
|
350
800
|
...(preparation.coverageGaps.length > 0 ? { coverageGaps: preparation.coverageGaps } : {}),
|
|
351
801
|
...(preparation.executionLogCandidates.length > 0
|
|
352
802
|
? { executionLogCandidates: preparation.executionLogCandidates }
|
|
353
803
|
: {}),
|
|
804
|
+
...(preparation.extract && preparation.extract.length > 0 ? { extract: preparation.extract } : {}),
|
|
354
805
|
...(primaryStashDir !== undefined ? { evalCasesWritten: countEvalCases(primaryStashDir) } : {}),
|
|
355
806
|
...(deadUrls !== undefined && deadUrls.length > 0 ? { deadUrls } : {}),
|
|
356
|
-
...(
|
|
807
|
+
...(reflectsWithErrorContext > 0 ? { reflectsWithErrorContext } : {}),
|
|
357
808
|
...(memoryInference ? { memoryInference } : {}),
|
|
358
809
|
...(graphExtraction ? { graphExtraction } : {}),
|
|
810
|
+
// Per-phase wall-clock durations. Surfaced at the top level of the
|
|
811
|
+
// envelope (not nested) because `health.ts`'s `wallTime.byPhase`
|
|
812
|
+
// aggregator and the existing `memoryInference.durationMs` /
|
|
813
|
+
// `graphExtraction.durationMs` health buckets all read
|
|
814
|
+
// `result.{memoryInferenceDurationMs,graphExtractionDurationMs}`
|
|
815
|
+
// directly. Mirrors how `consolidation.durationMs` is surfaced inside
|
|
816
|
+
// the consolidation sub-object (different convention because the
|
|
817
|
+
// consolidation result type already owns that field). Phases that did
|
|
818
|
+
// not run (zero duration) are omitted so the aggregator's
|
|
819
|
+
// "phase actually ran" filter (`> 0`) excludes them from the median/p95
|
|
820
|
+
// sample. Plumbed in d1273d0's follow-up — see
|
|
821
|
+
// `/tmp/akm-health-investigations/metrics-taxonomy-review.md` §1k / §3.
|
|
822
|
+
...(memoryInferenceDurationMs > 0 ? { memoryInferenceDurationMs } : {}),
|
|
823
|
+
...(graphExtractionDurationMs > 0 ? { graphExtractionDurationMs } : {}),
|
|
824
|
+
...(stalenessDetection ? { stalenessDetection } : {}),
|
|
825
|
+
...(orphansPurged !== undefined ? { orphansPurged } : {}),
|
|
826
|
+
...(proposalsExpired !== undefined && proposalsExpired > 0 ? { proposalsExpired } : {}),
|
|
827
|
+
reflectCooldownActions: finalActions.filter((a) => a.mode === "reflect-cooldown").length,
|
|
828
|
+
reflectSkippedActions: finalActions.filter((a) => a.mode === "reflect-skipped").length,
|
|
829
|
+
reflectGuardRejectedActions: finalActions.filter((a) => a.mode === "reflect-guard-rejected").length,
|
|
830
|
+
...(() => {
|
|
831
|
+
const t = preparation.gateAutoAcceptedCount + loopGateCount + postLoopGateCount;
|
|
832
|
+
return t > 0 ? { gateAutoAcceptedCount: t } : {};
|
|
833
|
+
})(),
|
|
359
834
|
};
|
|
360
835
|
if (!result.dryRun)
|
|
361
|
-
emitImproveCompletedEvent(result
|
|
836
|
+
emitImproveCompletedEvent(result, {
|
|
837
|
+
memoryInferenceDurationMs,
|
|
838
|
+
graphExtractionDurationMs,
|
|
839
|
+
totalDurationMs: Date.now() - startMs,
|
|
840
|
+
warningCount: allWarnings.length,
|
|
841
|
+
orphansPurged: orphansPurged ?? 0,
|
|
842
|
+
}, eventsCtx);
|
|
843
|
+
// End-of-run BATCH auto-sync. Recognition is decoupled from the per-write
|
|
844
|
+
// path (see write-source.ts case-3): the primary stash writes as a
|
|
845
|
+
// filesystem source during the run, then is committed in one shot here via
|
|
846
|
+
// the same `saveGitStash` that `akm sync` calls. Gated on a non-dry-run, a
|
|
847
|
+
// git-backed primary stash (by `.git`, not by remote), and sync not
|
|
848
|
+
// disabled. A sync failure is NON-FATAL — it never fails a successful run
|
|
849
|
+
// (mirrors the contradiction-detection best-effort pattern).
|
|
850
|
+
const effectiveSync = { ...improveProfile.sync, ...options.sync };
|
|
851
|
+
if (!result.dryRun && primaryStashDir && effectiveSync.enabled !== false && isGitBackedStash(primaryStashDir)) {
|
|
852
|
+
const saveGitStashFn = options.saveGitStashFn ?? saveGitStash;
|
|
853
|
+
// Reuse the config resolved at the top of the run (`_earlyConfig`) instead
|
|
854
|
+
// of a second loadConfig(); the writable derivation is shared with
|
|
855
|
+
// `akm sync` via resolveWritableOverride().
|
|
856
|
+
const writableOverride = resolveWritableOverride(_earlyConfig);
|
|
857
|
+
const push = effectiveSync.push !== false;
|
|
858
|
+
// `sync.message` may contain `{token}` placeholders (timestamp/date/time/
|
|
859
|
+
// scope/refs/accepted) expanded against this run's results; the default
|
|
860
|
+
// template has no tokens so it renders verbatim.
|
|
861
|
+
const message = renderSyncCommitMessage(effectiveSync.message ?? "akm improve auto-sync", result, Date.now());
|
|
862
|
+
try {
|
|
863
|
+
// Pass primaryStashDir as the explicit commit target so the gate above
|
|
864
|
+
// (which validated primaryStashDir via isGitBackedStash) and the commit
|
|
865
|
+
// operate on the SAME directory — avoids divergence when a caller passes
|
|
866
|
+
// a non-default options.stashDir (FIX 9).
|
|
867
|
+
const syncResult = saveGitStashFn(undefined, message, writableOverride, { push, repoDir: primaryStashDir });
|
|
868
|
+
result.sync = {
|
|
869
|
+
committed: syncResult.committed,
|
|
870
|
+
pushed: syncResult.pushed,
|
|
871
|
+
skipped: syncResult.skipped,
|
|
872
|
+
...(syncResult.reason !== undefined ? { reason: syncResult.reason } : {}),
|
|
873
|
+
};
|
|
874
|
+
appendEvent({
|
|
875
|
+
eventType: "stash_synced",
|
|
876
|
+
metadata: {
|
|
877
|
+
committed: syncResult.committed,
|
|
878
|
+
pushed: syncResult.pushed,
|
|
879
|
+
skipped: syncResult.skipped,
|
|
880
|
+
reason: syncResult.reason ?? null,
|
|
881
|
+
},
|
|
882
|
+
}, eventsCtx);
|
|
883
|
+
}
|
|
884
|
+
catch (syncErr) {
|
|
885
|
+
const reason = syncErr instanceof Error ? syncErr.message : String(syncErr);
|
|
886
|
+
warn(`improve: end-of-run stash sync failed (non-fatal): ${reason}`);
|
|
887
|
+
result.sync = { committed: false, pushed: false, skipped: true, reason };
|
|
888
|
+
appendEvent({
|
|
889
|
+
eventType: "stash_synced",
|
|
890
|
+
metadata: { committed: false, pushed: false, skipped: true, reason },
|
|
891
|
+
}, eventsCtx);
|
|
892
|
+
}
|
|
893
|
+
}
|
|
362
894
|
return result;
|
|
363
895
|
}
|
|
896
|
+
catch (err) {
|
|
897
|
+
// D3: emit improve_failed on unexpected crash so dashboards can detect failures.
|
|
898
|
+
appendEvent({
|
|
899
|
+
eventType: "improve_failed",
|
|
900
|
+
ref: scope.mode === "ref" ? scope.value : `improve:${scope.mode}:${scope.value ?? "all"}`,
|
|
901
|
+
metadata: {
|
|
902
|
+
error: err instanceof Error ? err.message : String(err),
|
|
903
|
+
durationMs: Date.now() - startMs,
|
|
904
|
+
},
|
|
905
|
+
}, eventsCtx);
|
|
906
|
+
throw err;
|
|
907
|
+
}
|
|
364
908
|
finally {
|
|
909
|
+
// O-1 (#364): Clear the budget abort timer so it does not keep the event
|
|
910
|
+
// loop alive after the run completes.
|
|
911
|
+
clearBudgetTimer();
|
|
365
912
|
try {
|
|
366
913
|
fs.unlinkSync(resolvedLockPath);
|
|
367
914
|
}
|
|
368
915
|
catch {
|
|
369
916
|
// ignore
|
|
370
917
|
}
|
|
918
|
+
// I1: close the long-lived state.db connection opened at the top of the run.
|
|
919
|
+
try {
|
|
920
|
+
eventsDb?.close();
|
|
921
|
+
}
|
|
922
|
+
catch {
|
|
923
|
+
// ignore — DB may already be closed
|
|
924
|
+
}
|
|
371
925
|
}
|
|
372
926
|
}
|
|
373
|
-
function emitImproveCompletedEvent(result) {
|
|
927
|
+
function emitImproveCompletedEvent(result, durations, eventsCtx) {
|
|
374
928
|
const actionCounts = {
|
|
375
929
|
reflect: 0,
|
|
930
|
+
reflectFailed: 0,
|
|
931
|
+
reflectCooldown: 0,
|
|
932
|
+
reflectSkipped: 0,
|
|
376
933
|
distill: 0,
|
|
377
934
|
distillSkipped: 0,
|
|
378
935
|
memoryPrune: 0,
|
|
@@ -385,6 +942,15 @@ function emitImproveCompletedEvent(result) {
|
|
|
385
942
|
case "reflect":
|
|
386
943
|
actionCounts.reflect += 1;
|
|
387
944
|
break;
|
|
945
|
+
case "reflect-failed":
|
|
946
|
+
actionCounts.reflectFailed += 1;
|
|
947
|
+
break;
|
|
948
|
+
case "reflect-cooldown":
|
|
949
|
+
actionCounts.reflectCooldown += 1;
|
|
950
|
+
break;
|
|
951
|
+
case "reflect-skipped":
|
|
952
|
+
actionCounts.reflectSkipped += 1;
|
|
953
|
+
break;
|
|
388
954
|
case "distill":
|
|
389
955
|
actionCounts.distill += 1;
|
|
390
956
|
break;
|
|
@@ -407,7 +973,9 @@ function emitImproveCompletedEvent(result) {
|
|
|
407
973
|
}
|
|
408
974
|
appendEvent({
|
|
409
975
|
eventType: "improve_completed",
|
|
410
|
-
ref: result.scope.mode === "ref"
|
|
976
|
+
ref: result.scope.mode === "ref"
|
|
977
|
+
? result.scope.value
|
|
978
|
+
: `improve:${result.scope.mode}:${result.scope.value ?? "all"}`,
|
|
411
979
|
metadata: {
|
|
412
980
|
plannedRefs: result.plannedRefs.length,
|
|
413
981
|
reflectActions: actionCounts.reflect,
|
|
@@ -417,8 +985,10 @@ function emitImproveCompletedEvent(result) {
|
|
|
417
985
|
memoryInferenceActions: actionCounts.memoryInference,
|
|
418
986
|
graphExtractionActions: actionCounts.graphExtraction,
|
|
419
987
|
errorActions: actionCounts.error,
|
|
420
|
-
|
|
421
|
-
|
|
988
|
+
reflectFailedActions: actionCounts.reflectFailed,
|
|
989
|
+
reflectCooldownActions: actionCounts.reflectCooldown,
|
|
990
|
+
reflectSkippedActions: actionCounts.reflectSkipped,
|
|
991
|
+
reflectsWithErrorContext: result.reflectsWithErrorContext ?? 0,
|
|
422
992
|
coverageGapCount: result.coverageGaps?.length ?? 0,
|
|
423
993
|
executionLogCandidateCount: result.executionLogCandidates?.length ?? 0,
|
|
424
994
|
evalCasesWritten: result.evalCasesWritten ?? 0,
|
|
@@ -434,16 +1004,31 @@ function emitImproveCompletedEvent(result) {
|
|
|
434
1004
|
consolidationProcessed: result.consolidation?.processed ?? 0,
|
|
435
1005
|
consolidationDurationMs: result.consolidation?.durationMs ?? 0,
|
|
436
1006
|
memoryInferenceWrites: result.memoryInference?.writtenFacts ?? 0,
|
|
437
|
-
memoryInferenceDurationMs:
|
|
1007
|
+
memoryInferenceDurationMs: durations.memoryInferenceDurationMs,
|
|
438
1008
|
graphExtractionExtractedFiles: result.graphExtraction?.quality.extractedFiles ?? 0,
|
|
439
|
-
graphExtractionDurationMs:
|
|
1009
|
+
graphExtractionDurationMs: durations.graphExtractionDurationMs,
|
|
1010
|
+
// New metrics for tuning the improve loop.
|
|
1011
|
+
...(durations.totalDurationMs !== undefined ? { durationMs: durations.totalDurationMs } : {}),
|
|
1012
|
+
...(durations.warningCount !== undefined ? { warningCount: durations.warningCount } : {}),
|
|
1013
|
+
...(durations.orphansPurged !== undefined ? { orphansPurged: durations.orphansPurged } : {}),
|
|
1014
|
+
...(result.graphExtraction?.quality
|
|
1015
|
+
? {
|
|
1016
|
+
graphCoverage: result.graphExtraction.quality.extractionCoverage,
|
|
1017
|
+
graphDensity: result.graphExtraction.quality.density,
|
|
1018
|
+
graphEntities: result.graphExtraction.quality.entityCount,
|
|
1019
|
+
}
|
|
1020
|
+
: {}),
|
|
440
1021
|
},
|
|
441
|
-
});
|
|
1022
|
+
}, eventsCtx);
|
|
442
1023
|
}
|
|
443
1024
|
async function runImprovePreparationStage(args) {
|
|
444
|
-
const { scope, options, plannedRefs, memoryCleanupPlan, primaryStashDir,
|
|
1025
|
+
const { scope, options, plannedRefs, memoryCleanupPlan, primaryStashDir, reindexFn, startMs, budgetMs, eventsCtx, initialCleanupWarnings,
|
|
1026
|
+
// improveProfile is part of the preparation-stage signature for future use
|
|
1027
|
+
// (per-process gating moved into the in-loop stage). Kept here so the
|
|
1028
|
+
// signature does not drift away from the rest of the planner stack.
|
|
1029
|
+
improveProfile: _improveProfile, } = args;
|
|
445
1030
|
const actions = [];
|
|
446
|
-
const cleanupWarnings = [];
|
|
1031
|
+
const cleanupWarnings = initialCleanupWarnings ? [...initialCleanupWarnings] : [];
|
|
447
1032
|
// Phase 0 — MEMORY.md budget check (200-line cap; warn at 180)
|
|
448
1033
|
let memoryIndexHealth;
|
|
449
1034
|
if (primaryStashDir) {
|
|
@@ -471,19 +1056,90 @@ async function runImprovePreparationStage(args) {
|
|
|
471
1056
|
catch {
|
|
472
1057
|
// best-effort
|
|
473
1058
|
}
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
1059
|
+
// Phase 0.4 — session-extract pass.
|
|
1060
|
+
//
|
|
1061
|
+
// Reads native session files (claude-code JSONL, opencode storage tree)
|
|
1062
|
+
// through the SessionLogHarness registry, pre-filters noise, and asks a
|
|
1063
|
+
// bounded in-tree LLM to produce candidate memory/lesson/knowledge
|
|
1064
|
+
// proposals for content the agent did NOT preserve via inline `akm remember`
|
|
1065
|
+
// / `akm feedback` invocations. Replaces the akm-plugin session-checkpoint
|
|
1066
|
+
// hook with an on-demand pull pipeline.
|
|
1067
|
+
//
|
|
1068
|
+
// Default-on; opt out via `profiles.improve.default.processes.extract.enabled: false`.
|
|
1069
|
+
// Each available harness gets one call with the default --since window;
|
|
1070
|
+
// already-seen sessions (tracked in state.db.extract_sessions_seen) are
|
|
1071
|
+
// skipped automatically so re-runs don't burn LLM calls on unchanged data.
|
|
1072
|
+
//
|
|
1073
|
+
// Failures are non-fatal — one harness throwing doesn't abort improve.
|
|
1074
|
+
// The extract envelope's own `warnings` field surfaces what went wrong.
|
|
1075
|
+
let extractResults;
|
|
1076
|
+
let gateAutoAcceptedCount = 0;
|
|
1077
|
+
const extractConfig = options.config ?? loadConfig();
|
|
1078
|
+
const extractGateCfg = makeGateConfig("extract", {
|
|
1079
|
+
globalThreshold: options.autoAccept,
|
|
1080
|
+
dryRun: options.dryRun ?? false,
|
|
1081
|
+
stashDir: primaryStashDir,
|
|
1082
|
+
config: extractConfig,
|
|
1083
|
+
eventsCtx,
|
|
478
1084
|
});
|
|
479
|
-
if (
|
|
480
|
-
|
|
481
|
-
|
|
1085
|
+
if (isLlmFeatureEnabled(extractConfig, "session_extraction")) {
|
|
1086
|
+
const availableHarnesses = getAvailableHarnesses();
|
|
1087
|
+
if (availableHarnesses.length > 0) {
|
|
1088
|
+
extractResults = [];
|
|
1089
|
+
for (const h of availableHarnesses) {
|
|
1090
|
+
try {
|
|
1091
|
+
const result = await akmExtract({
|
|
1092
|
+
type: h.name,
|
|
1093
|
+
...(primaryStashDir !== undefined ? { stashDir: primaryStashDir } : {}),
|
|
1094
|
+
config: extractConfig,
|
|
1095
|
+
dryRun: options.dryRun ?? false,
|
|
1096
|
+
});
|
|
1097
|
+
extractResults.push(result);
|
|
1098
|
+
gateAutoAcceptedCount += (await runAutoAcceptGate(primaryStashDir
|
|
1099
|
+
? result.proposals.map((proposalId) => {
|
|
1100
|
+
const proposal = getProposal(primaryStashDir, proposalId);
|
|
1101
|
+
return { proposalId, confidence: resolveExtractConfidence(proposal) };
|
|
1102
|
+
})
|
|
1103
|
+
: [], extractGateCfg)).promoted.length;
|
|
1104
|
+
}
|
|
1105
|
+
catch (err) {
|
|
1106
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
1107
|
+
cleanupWarnings.push(`extract(${h.name}) failed: ${msg}`);
|
|
1108
|
+
}
|
|
1109
|
+
}
|
|
1110
|
+
if (extractResults.length === 0) {
|
|
1111
|
+
// All harnesses threw — clear so the envelope's `extract` field is
|
|
1112
|
+
// absent rather than misleadingly empty.
|
|
1113
|
+
extractResults = undefined;
|
|
1114
|
+
}
|
|
482
1115
|
}
|
|
483
|
-
|
|
484
|
-
|
|
1116
|
+
}
|
|
1117
|
+
// Backlog drain: gate any pending extract proposals that weren't created in
|
|
1118
|
+
// this run (i.e. pre-date the gate or were produced by a run that timed out
|
|
1119
|
+
// before the gate fired). Without this, eligible proposals accumulate
|
|
1120
|
+
// indefinitely — the fresh-gate only covers the current run's output.
|
|
1121
|
+
if (primaryStashDir && !options.dryRun && options.autoAccept !== undefined) {
|
|
1122
|
+
const freshIds = new Set((extractResults ?? []).flatMap((r) => r.proposals));
|
|
1123
|
+
const backlog = listProposals(primaryStashDir, { status: "pending" }).filter((p) => p.source === "extract" && !freshIds.has(p.id));
|
|
1124
|
+
if (backlog.length > 0) {
|
|
1125
|
+
const backlogCandidates = backlog.map((p) => ({
|
|
1126
|
+
proposalId: p.id,
|
|
1127
|
+
confidence: resolveExtractConfidence(p),
|
|
1128
|
+
}));
|
|
1129
|
+
gateAutoAcceptedCount += (await runAutoAcceptGate(backlogCandidates, extractGateCfg)).promoted.length;
|
|
485
1130
|
}
|
|
486
1131
|
}
|
|
1132
|
+
// eligibleCount = raw pre-filter count (before cooldown/signal/cleanup filters).
|
|
1133
|
+
// improve_completed.plannedRefs = post-filter count of refs that actually entered the loop.
|
|
1134
|
+
appendEvent({
|
|
1135
|
+
eventType: "improve_invoked",
|
|
1136
|
+
ref: scope.mode === "ref" ? scope.value : `improve:${scope.mode}:${scope.value ?? "all"}`,
|
|
1137
|
+
metadata: { scope, dryRun: options.dryRun ?? false, eligibleCount: plannedRefs.length },
|
|
1138
|
+
}, eventsCtx);
|
|
1139
|
+
// ensureIndex now runs in akmImprove() BEFORE collectEligibleRefs so the
|
|
1140
|
+
// eligible-ref query sees a populated `entries` table on the very first
|
|
1141
|
+
// pass after a DB version upgrade (#339). Any failure messages from that
|
|
1142
|
+
// earlier call were threaded in via args.initialCleanupWarnings.
|
|
487
1143
|
let appliedCleanup;
|
|
488
1144
|
try {
|
|
489
1145
|
appliedCleanup =
|
|
@@ -493,94 +1149,12 @@ async function runImprovePreparationStage(args) {
|
|
|
493
1149
|
cleanupWarnings.push(`applyMemoryCleanup failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
494
1150
|
}
|
|
495
1151
|
const archivedRefs = appliedCleanup?.archived.map((record) => record.ref) ?? [];
|
|
496
|
-
const
|
|
497
|
-
|
|
498
|
-
//
|
|
499
|
-
//
|
|
500
|
-
//
|
|
501
|
-
|
|
502
|
-
const feedbackSinceCutoff = new Date(Date.now() - FEEDBACK_SIGNAL_WINDOW_DAYS * 24 * 60 * 60 * 1000).toISOString();
|
|
503
|
-
const signalFiltered = postCleanupRefs.filter((candidate) => {
|
|
504
|
-
const { events } = readEvents({ type: "feedback", ref: candidate.ref });
|
|
505
|
-
return events.some((e) => (e.ts ?? "") >= feedbackSinceCutoff &&
|
|
506
|
-
((e.metadata !== undefined && typeof e.metadata.signal === "string") ||
|
|
507
|
-
(e.metadata !== undefined && typeof e.metadata.note === "string")));
|
|
508
|
-
});
|
|
509
|
-
// P0-A: also surface zero-feedback assets that have been retrieved many times.
|
|
510
|
-
const RETRIEVAL_COUNT_THRESHOLD = options.minRetrievalCount ?? 5;
|
|
511
|
-
const signalBearingSet = new Set(signalFiltered.map((r) => r.ref));
|
|
512
|
-
const noFeedbackCandidates = postCleanupRefs.filter((r) => !signalBearingSet.has(r.ref));
|
|
513
|
-
let highRetrievalRefs = [];
|
|
514
|
-
let dbForRetrieval;
|
|
515
|
-
try {
|
|
516
|
-
dbForRetrieval = openExistingDatabase();
|
|
517
|
-
const showEventCount = dbForRetrieval.prepare("SELECT COUNT(*) AS cnt FROM usage_events WHERE event_type = 'show'").get().cnt;
|
|
518
|
-
if (showEventCount === 0) {
|
|
519
|
-
warn("Warning: show events not yet in usage_events — zero-feedback fallback will match only search-retrieved assets.");
|
|
520
|
-
}
|
|
521
|
-
const retrievalCounts = getRetrievalCounts(dbForRetrieval, noFeedbackCandidates.map((r) => r.ref));
|
|
522
|
-
highRetrievalRefs = noFeedbackCandidates.filter((r) => (retrievalCounts.get(r.ref) ?? 0) >= RETRIEVAL_COUNT_THRESHOLD);
|
|
523
|
-
}
|
|
524
|
-
catch {
|
|
525
|
-
// best-effort: if DB unavailable, highRetrievalRefs stays empty
|
|
526
|
-
}
|
|
527
|
-
finally {
|
|
528
|
-
if (dbForRetrieval)
|
|
529
|
-
closeDatabase(dbForRetrieval);
|
|
530
|
-
}
|
|
531
|
-
// If the user explicitly scoped to a single ref, always act on it —
|
|
532
|
-
// skip the signal/retrieval filter entirely. The filter exists to avoid
|
|
533
|
-
// noisy "improve everything" runs; it should not gate an intentional
|
|
534
|
-
// per-ref invocation where the user's explicit choice is the signal.
|
|
535
|
-
//
|
|
536
|
-
// For type/all scope with no signals yet (fresh environment), fall back
|
|
537
|
-
// to all postCleanupRefs so that the first improve run is not a no-op.
|
|
538
|
-
const signalAndRetrievalRefs = [...signalFiltered, ...highRetrievalRefs];
|
|
539
|
-
const mergedRefs = scope.mode === "ref"
|
|
540
|
-
? postCleanupRefs
|
|
541
|
-
: options.requireFeedbackSignal
|
|
542
|
-
? signalFiltered
|
|
543
|
-
: signalAndRetrievalRefs.length === 0
|
|
544
|
-
? postCleanupRefs
|
|
545
|
-
: signalAndRetrievalRefs;
|
|
546
|
-
const utilityMap = buildUtilityMap(mergedRefs);
|
|
547
|
-
// Load feedback ratio per ref and blend into sort key
|
|
548
|
-
const feedbackRatios = new Map();
|
|
549
|
-
for (const ref of mergedRefs) {
|
|
550
|
-
const { events } = readEvents({ type: "feedback", ref: ref.ref });
|
|
551
|
-
const positive = events.filter((e) => e.metadata?.signal === "positive").length;
|
|
552
|
-
const negative = events.filter((e) => e.metadata?.signal === "negative").length;
|
|
553
|
-
const total = positive + negative;
|
|
554
|
-
// ratio = negative proportion (high = needs more improvement)
|
|
555
|
-
feedbackRatios.set(ref.ref, total > 0 ? negative / total : 0);
|
|
556
|
-
}
|
|
557
|
-
// Sort: combine utility (desc) with feedback negativity (desc) — high-negative assets rank higher
|
|
558
|
-
const sorted = [...mergedRefs].sort((a, b) => {
|
|
559
|
-
const utilA = utilityMap.get(a.ref) ?? 0;
|
|
560
|
-
const utilB = utilityMap.get(b.ref) ?? 0;
|
|
561
|
-
const ratioA = feedbackRatios.get(a.ref) ?? 0;
|
|
562
|
-
const ratioB = feedbackRatios.get(b.ref) ?? 0;
|
|
563
|
-
// Combined score: 70% utility, 30% negative ratio
|
|
564
|
-
const scoreA = utilA * 0.7 + ratioA * 0.3;
|
|
565
|
-
const scoreB = utilB * 0.7 + ratioB * 0.3;
|
|
566
|
-
return scoreB - scoreA;
|
|
567
|
-
});
|
|
568
|
-
const feedbackRatioUsed = true;
|
|
569
|
-
// Phase 0: surface coverage gaps from zero-result search queries
|
|
570
|
-
let coverageGaps = [];
|
|
571
|
-
try {
|
|
572
|
-
const dbForGaps = openExistingDatabase();
|
|
573
|
-
try {
|
|
574
|
-
coverageGaps = getZeroResultSearches(dbForGaps);
|
|
575
|
-
}
|
|
576
|
-
finally {
|
|
577
|
-
closeDatabase(dbForGaps);
|
|
578
|
-
}
|
|
579
|
-
}
|
|
580
|
-
catch {
|
|
581
|
-
// best-effort
|
|
582
|
-
}
|
|
583
|
-
const actionableRefs = options.limit ? sorted.slice(0, options.limit) : sorted;
|
|
1152
|
+
const removed = new Set(archivedRefs);
|
|
1153
|
+
const postCleanupRefs = archivedRefs.length === 0 ? plannedRefs : plannedRefs.filter((r) => !removed.has(r.ref));
|
|
1154
|
+
// ── Phase 1: validation pass + schema repair (run on full postCleanupRefs) ──
|
|
1155
|
+
// Identifies refs whose on-disk asset has structural problems. Validation
|
|
1156
|
+
// failures are excluded from every downstream bucket. Run early so the
|
|
1157
|
+
// cooldown partition operates on a clean set.
|
|
584
1158
|
if (appliedCleanup) {
|
|
585
1159
|
for (const candidate of memoryCleanupPlan?.pruneCandidates ?? []) {
|
|
586
1160
|
const archived = appliedCleanup.archived.find((record) => record.ref === candidate.ref);
|
|
@@ -602,13 +1176,16 @@ async function runImprovePreparationStage(args) {
|
|
|
602
1176
|
}
|
|
603
1177
|
}
|
|
604
1178
|
const validationFailures = [];
|
|
605
|
-
for (const candidate of
|
|
1179
|
+
for (const candidate of postCleanupRefs) {
|
|
606
1180
|
try {
|
|
607
1181
|
const filePath = await findAssetFilePath(candidate.ref, options.stashDir);
|
|
608
1182
|
if (!filePath) {
|
|
609
1183
|
validationFailures.push({ ref: candidate.ref, reason: "file not found on disk" });
|
|
610
1184
|
continue;
|
|
611
1185
|
}
|
|
1186
|
+
if (path.extname(filePath).toLowerCase() !== ".md") {
|
|
1187
|
+
continue;
|
|
1188
|
+
}
|
|
612
1189
|
if (isLessonCandidate(candidate.ref)) {
|
|
613
1190
|
const raw = fs.readFileSync(filePath, "utf8");
|
|
614
1191
|
const fm = parseFrontmatter(raw).data;
|
|
@@ -621,7 +1198,7 @@ async function runImprovePreparationStage(args) {
|
|
|
621
1198
|
}
|
|
622
1199
|
}
|
|
623
1200
|
if (validationFailures.length > 0) {
|
|
624
|
-
info(`[improve] ${validationFailures.length} assets have validation issues (will
|
|
1201
|
+
info(`[improve] ${validationFailures.length} assets have validation issues (will attempt schema repair):`);
|
|
625
1202
|
for (const f of validationFailures)
|
|
626
1203
|
info(` ${f.ref}: ${f.reason}`);
|
|
627
1204
|
}
|
|
@@ -630,7 +1207,7 @@ async function runImprovePreparationStage(args) {
|
|
|
630
1207
|
// Schema repair pass: attempt to fix validation failures via LLM before skipping.
|
|
631
1208
|
if (validationFailures.length > 0 && options.repairValidationFailures !== false) {
|
|
632
1209
|
const baseConfigForRepair = options.config ?? loadConfig();
|
|
633
|
-
const llmCfg = baseConfigForRepair
|
|
1210
|
+
const llmCfg = getDefaultLlmConfig(baseConfigForRepair);
|
|
634
1211
|
if (llmCfg) {
|
|
635
1212
|
const result = await runSchemaRepairPass(validationFailures, {
|
|
636
1213
|
startMs,
|
|
@@ -645,6 +1222,9 @@ async function runImprovePreparationStage(args) {
|
|
|
645
1222
|
}
|
|
646
1223
|
}
|
|
647
1224
|
const validationFailureRefs = new Set(validationFailures.filter((f) => !repairedRefs.has(f.ref)).map((f) => f.ref));
|
|
1225
|
+
if (repairedRefs.size > 0) {
|
|
1226
|
+
info(`[improve] schema repair fixed ${repairedRefs.size}/${validationFailures.length} validation failures; ${validationFailureRefs.size} remain`);
|
|
1227
|
+
}
|
|
648
1228
|
// Phase 0.5 — structural hygiene pass
|
|
649
1229
|
let lintSummary;
|
|
650
1230
|
if (primaryStashDir) {
|
|
@@ -656,106 +1236,311 @@ async function runImprovePreparationStage(args) {
|
|
|
656
1236
|
// lint is best-effort; never block improve
|
|
657
1237
|
}
|
|
658
1238
|
}
|
|
659
|
-
|
|
1239
|
+
// O-5 / #378: Per-originator rolling error windows.
|
|
1240
|
+
// Reflexion (arXiv:2303.11366) warns that cross-task verbal critique
|
|
1241
|
+
// contamination degrades below single-shot baseline. Each originator key
|
|
1242
|
+
// ("schema-repair", "reflect") maintains its own rolling window so that
|
|
1243
|
+
// schema-repair failures are not injected as avoidPatterns into reflect calls.
|
|
1244
|
+
const recentErrors = {};
|
|
660
1245
|
const RECENT_ERRORS_CAP = 3;
|
|
661
|
-
//
|
|
1246
|
+
// Helper: push an error onto an originator's rolling window.
|
|
1247
|
+
function pushRecentError(originator, msg) {
|
|
1248
|
+
if (!recentErrors[originator])
|
|
1249
|
+
recentErrors[originator] = [];
|
|
1250
|
+
recentErrors[originator].push(msg);
|
|
1251
|
+
if (recentErrors[originator].length > RECENT_ERRORS_CAP)
|
|
1252
|
+
recentErrors[originator].shift();
|
|
1253
|
+
}
|
|
1254
|
+
// Seed schema-repair originator window from any schema-repair errors.
|
|
662
1255
|
for (const repair of schemaRepairs) {
|
|
663
1256
|
if (repair.outcome === "error") {
|
|
664
1257
|
const errMsg = repair.error ?? `schema repair error: ${repair.reason}`;
|
|
665
|
-
|
|
666
|
-
if (recentErrors.length > RECENT_ERRORS_CAP)
|
|
667
|
-
recentErrors.shift();
|
|
1258
|
+
pushRecentError("schema-repair", errMsg);
|
|
668
1259
|
}
|
|
669
1260
|
}
|
|
670
|
-
// ──
|
|
671
|
-
//
|
|
672
|
-
//
|
|
673
|
-
//
|
|
674
|
-
//
|
|
675
|
-
//
|
|
1261
|
+
// ── Phase 2: signal-delta eligibility sets built EARLY ────────────────────
|
|
1262
|
+
// 0.8.0 replaces the flat time-based cooldowns (which produced synchronised
|
|
1263
|
+
// waves whenever many refs cooled at the same instant — see the 2026-05-26
|
|
1264
|
+
// 54-ref simultaneous-reflect incident) with a *signal-delta* gate:
|
|
1265
|
+
//
|
|
1266
|
+
// reflectEligible(ref) ≡ latestFeedbackTs(ref) > lastReflectProposalTs(ref)
|
|
1267
|
+
// distillEligible(ref) ≡ latestFeedbackTs(ref) > lastDistillProposalTs(ref)
|
|
676
1268
|
//
|
|
677
|
-
//
|
|
678
|
-
//
|
|
679
|
-
//
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
1269
|
+
// i.e. a ref is re-eligible iff new feedback has landed since the last
|
|
1270
|
+
// proposal was generated for it. Stable content with no new signal stays
|
|
1271
|
+
// out of the queue regardless of clock time; a sudden burst of feedback
|
|
1272
|
+
// surfaces only the refs that the burst actually touches.
|
|
1273
|
+
//
|
|
1274
|
+
// The 30-day FEEDBACK_SIGNAL_WINDOW_DAYS bound still applies — only feedback
|
|
1275
|
+
// events newer than that count as "current signal". Ancient one-off
|
|
1276
|
+
// negatives don't permanently lock a ref into every run.
|
|
1277
|
+
//
|
|
1278
|
+
// High-retrieval refs (P0-A path) use a simpler "eligible once" rule: a
|
|
1279
|
+
// ref with no feedback signal but retrievalCount ≥ threshold is eligible
|
|
1280
|
+
// exactly once (no prior reflect proposal). Subsequent re-eligibility for
|
|
1281
|
+
// those refs requires either a new feedback event (then the normal
|
|
1282
|
+
// signal-delta gate applies) or human action. Documented limitation: this
|
|
1283
|
+
// path does not re-fire on retrieval-count growth alone in 0.8.0; storing
|
|
1284
|
+
// the retrieval count in proposal metadata for proper delta-tracking is
|
|
1285
|
+
// captured as future work.
|
|
1286
|
+
const FEEDBACK_SIGNAL_WINDOW_DAYS = 30;
|
|
1287
|
+
const feedbackSinceCutoff = new Date(Date.now() - daysToMs(FEEDBACK_SIGNAL_WINDOW_DAYS)).toISOString();
|
|
1288
|
+
// Build the three timestamp maps once across the entire postCleanupRefs set.
|
|
1289
|
+
// Per-ref queries would be N+1 and the planner is already the hottest path
|
|
1290
|
+
// in `akm improve`.
|
|
1291
|
+
const candidateRefs = postCleanupRefs.filter((r) => !validationFailureRefs.has(r.ref)).map((r) => r.ref);
|
|
1292
|
+
const latestFeedbackTs = buildLatestFeedbackTsMap(candidateRefs, feedbackSinceCutoff);
|
|
1293
|
+
const lastReflectProposalTs = buildLatestProposalTsMap(candidateRefs, "reflect");
|
|
1294
|
+
const lastDistillProposalTs = buildLatestProposalTsMap(candidateRefs, "distill");
|
|
1295
|
+
// Refs the distill signal-delta gate rejected at planning time. The main
|
|
1296
|
+
// loop reads this to skip distill for these refs without re-checking
|
|
1297
|
+
// eligibility per iteration.
|
|
683
1298
|
const distillCooledRefs = new Set();
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
latestReflect.set(e.ref, e.ts ?? "");
|
|
706
|
-
}
|
|
707
|
-
for (const [ref, lastTs] of latestReflect) {
|
|
708
|
-
if (!lastTs)
|
|
709
|
-
continue;
|
|
710
|
-
const hasAccepted = (promotedTs.get(ref) ?? "") > lastTs;
|
|
711
|
-
const hasRejected = (rejectedTs.get(ref) ?? "") > lastTs;
|
|
712
|
-
let effectiveCooldownDays = REFLECT_COOLDOWN_DAYS;
|
|
713
|
-
if (hasAccepted)
|
|
714
|
-
continue;
|
|
715
|
-
else if (hasRejected)
|
|
716
|
-
effectiveCooldownDays = Math.min(REFLECT_COOLDOWN_DAYS, 3);
|
|
717
|
-
if (Date.now() - new Date(lastTs).getTime() < effectiveCooldownDays * 24 * 60 * 60 * 1000) {
|
|
718
|
-
reflectCooledRefs.add(ref);
|
|
719
|
-
}
|
|
720
|
-
}
|
|
1299
|
+
const preCooldownCount = postCleanupRefs.length;
|
|
1300
|
+
// ── Phase 3: partition postCleanupRefs by signal-delta eligibility ────────
|
|
1301
|
+
// Three buckets (validation failures are excluded entirely):
|
|
1302
|
+
// eligibleRefs — reflect signal-delta passes (full reflect+distill
|
|
1303
|
+
// loop path; distill guard remains in the loop for
|
|
1304
|
+
// refs that fail the distill signal-delta gate).
|
|
1305
|
+
// distillOnlyRefs — reflect blocked but distill signal-delta passes
|
|
1306
|
+
// AND ref is a distill candidate.
|
|
1307
|
+
// fullySkippedCount — neither gate passes → synthetic skip action
|
|
1308
|
+
// + improve_skipped event, excluded from sort.
|
|
1309
|
+
const eligibleRefs = [];
|
|
1310
|
+
const distillOnlyRefs = [];
|
|
1311
|
+
let fullySkippedCount = 0;
|
|
1312
|
+
// O-2 (#365): explicit --scope <ref> bypasses every gate (user intent wins).
|
|
1313
|
+
const scopeRefBypass = scope.mode === "ref";
|
|
1314
|
+
for (const r of postCleanupRefs) {
|
|
1315
|
+
if (validationFailureRefs.has(r.ref))
|
|
1316
|
+
continue;
|
|
1317
|
+
if (scopeRefBypass) {
|
|
1318
|
+
eligibleRefs.push(r);
|
|
1319
|
+
continue;
|
|
721
1320
|
}
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
1321
|
+
const reflectOk = isSignalDeltaEligible(r.ref, latestFeedbackTs, lastReflectProposalTs);
|
|
1322
|
+
const distillOk = isSignalDeltaEligible(r.ref, latestFeedbackTs, lastDistillProposalTs);
|
|
1323
|
+
const isDistillCandidate = isDistillCandidateRef(r.ref, options.stashDir);
|
|
1324
|
+
if (reflectOk) {
|
|
1325
|
+
if (!distillOk && isDistillCandidate) {
|
|
1326
|
+
// Reflect passes the gate, distill does not — emit the synthetic
|
|
1327
|
+
// distill-skipped action and event up-front so the in-loop guard
|
|
1328
|
+
// does not have to re-derive eligibility.
|
|
1329
|
+
distillCooledRefs.add(r.ref);
|
|
1330
|
+
actions.push({ ref: r.ref, mode: "distill-skipped", result: { ok: true, reason: "distill signal-delta" } });
|
|
1331
|
+
appendEvent({
|
|
1332
|
+
eventType: "improve_skipped",
|
|
1333
|
+
ref: r.ref,
|
|
1334
|
+
metadata: { reason: "distill_no_new_signal" },
|
|
1335
|
+
}, eventsCtx);
|
|
729
1336
|
}
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
1337
|
+
else if (!distillOk) {
|
|
1338
|
+
// Not a distill candidate AND distill gate doesn't pass — just mark
|
|
1339
|
+
// distillCooled so the loop's distill section is a no-op.
|
|
1340
|
+
distillCooledRefs.add(r.ref);
|
|
734
1341
|
}
|
|
1342
|
+
eligibleRefs.push(r);
|
|
735
1343
|
}
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
1344
|
+
else if (distillOk && isDistillCandidate) {
|
|
1345
|
+
// Reflect blocked but distill passes → distill-only bucket.
|
|
1346
|
+
distillOnlyRefs.push(r);
|
|
1347
|
+
}
|
|
1348
|
+
else {
|
|
1349
|
+
// Neither gate passes — fully skipped.
|
|
1350
|
+
fullySkippedCount++;
|
|
742
1351
|
actions.push({
|
|
743
1352
|
ref: r.ref,
|
|
744
1353
|
mode: "distill-skipped",
|
|
745
|
-
result: { ok: true, reason: "
|
|
1354
|
+
result: { ok: true, reason: "no new signal since last proposal" },
|
|
746
1355
|
});
|
|
747
|
-
appendEvent({ eventType: "improve_skipped", ref: r.ref, metadata: { reason: "
|
|
1356
|
+
appendEvent({ eventType: "improve_skipped", ref: r.ref, metadata: { reason: "no_new_signal" } }, eventsCtx);
|
|
1357
|
+
}
|
|
1358
|
+
}
|
|
1359
|
+
// ── Phase 4: signal/feedback/utility/sort on the reduced set ──────────────
|
|
1360
|
+
// Everything from here works only on (eligibleRefs ∪ distillOnlyRefs). The
|
|
1361
|
+
// fully-skipped bucket has already been routed and emitted; we deliberately
|
|
1362
|
+
// avoid spending DB/CPU on refs that cannot enter the loop.
|
|
1363
|
+
const processableRefs = [...eligibleRefs, ...distillOnlyRefs];
|
|
1364
|
+
// Gap 6: only surface feedback signals from the last 30 days so that
|
|
1365
|
+
// ancient one-off feedback events don't permanently lock an asset into
|
|
1366
|
+
// every improve run. Assets with only stale signals fall through to the
|
|
1367
|
+
// high-retrieval path (P0-A) or are skipped until new signals arrive.
|
|
1368
|
+
// (FEEDBACK_SIGNAL_WINDOW_DAYS / feedbackSinceCutoff are already defined in
|
|
1369
|
+
// Phase 2 above for the signal-delta gate; we reuse them here.)
|
|
1370
|
+
// Pre-compute feedback summary per ref in a single pass so we don't issue
|
|
1371
|
+
// two readEvents({type:"feedback", ref}) per asset (one for signal filtering,
|
|
1372
|
+
// one for ratio computation).
|
|
1373
|
+
const feedbackSummary = new Map();
|
|
1374
|
+
for (const candidate of processableRefs) {
|
|
1375
|
+
const { events } = readEvents({ type: "feedback", ref: candidate.ref });
|
|
1376
|
+
let hasSignal = false;
|
|
1377
|
+
let positive = 0;
|
|
1378
|
+
let negative = 0;
|
|
1379
|
+
for (const e of events) {
|
|
1380
|
+
if (!hasSignal &&
|
|
1381
|
+
(e.ts ?? "") >= feedbackSinceCutoff &&
|
|
1382
|
+
e.metadata !== undefined &&
|
|
1383
|
+
(typeof e.metadata.signal === "string" || typeof e.metadata.note === "string")) {
|
|
1384
|
+
hasSignal = true;
|
|
1385
|
+
}
|
|
1386
|
+
if (e.metadata?.signal === "positive")
|
|
1387
|
+
positive++;
|
|
1388
|
+
else if (e.metadata?.signal === "negative")
|
|
1389
|
+
negative++;
|
|
1390
|
+
}
|
|
1391
|
+
feedbackSummary.set(candidate.ref, { hasSignal, positive, negative });
|
|
1392
|
+
}
|
|
1393
|
+
const signalFiltered = processableRefs.filter((candidate) => feedbackSummary.get(candidate.ref)?.hasSignal === true);
|
|
1394
|
+
// P0-A: also surface zero-feedback assets that have been retrieved many times.
|
|
1395
|
+
const RETRIEVAL_COUNT_THRESHOLD = options.minRetrievalCount ?? 5;
|
|
1396
|
+
const signalBearingSet = new Set(signalFiltered.map((r) => r.ref));
|
|
1397
|
+
const noFeedbackCandidates = processableRefs.filter((r) => !signalBearingSet.has(r.ref));
|
|
1398
|
+
let highRetrievalRefs = [];
|
|
1399
|
+
let dbForRetrieval;
|
|
1400
|
+
try {
|
|
1401
|
+
dbForRetrieval = openExistingDatabase();
|
|
1402
|
+
const showEventCount = dbForRetrieval.prepare("SELECT COUNT(*) AS cnt FROM usage_events WHERE event_type = 'show'").get().cnt;
|
|
1403
|
+
if (showEventCount === 0) {
|
|
1404
|
+
warn("Warning: show events not yet in usage_events — zero-feedback fallback will match only search-retrieved assets.");
|
|
1405
|
+
}
|
|
1406
|
+
const retrievalCounts = getRetrievalCounts(dbForRetrieval, noFeedbackCandidates.map((r) => r.ref));
|
|
1407
|
+
// High-retrieval signal-delta (simplified rule, 0.8.0): a no-feedback
|
|
1408
|
+
// ref qualifies exactly once — when retrievalCount ≥ threshold AND no
|
|
1409
|
+
// prior reflect proposal exists for it. Once a reflect proposal is on
|
|
1410
|
+
// record, subsequent re-eligibility requires explicit feedback (which
|
|
1411
|
+
// flows through the normal signal-delta gate above). Tracking growth in
|
|
1412
|
+
// retrieval count would require persisting the count in proposal
|
|
1413
|
+
// metadata; deferred to a follow-up.
|
|
1414
|
+
highRetrievalRefs = noFeedbackCandidates.filter((r) => (retrievalCounts.get(r.ref) ?? 0) >= RETRIEVAL_COUNT_THRESHOLD && !lastReflectProposalTs.has(r.ref));
|
|
1415
|
+
}
|
|
1416
|
+
catch (err) {
|
|
1417
|
+
rethrowIfTestIsolationError(err);
|
|
1418
|
+
// best-effort: if DB unavailable, highRetrievalRefs stays empty
|
|
1419
|
+
}
|
|
1420
|
+
finally {
|
|
1421
|
+
if (dbForRetrieval)
|
|
1422
|
+
closeDatabase(dbForRetrieval);
|
|
1423
|
+
}
|
|
1424
|
+
// If the user explicitly scoped to a single ref, always act on it —
|
|
1425
|
+
// skip the signal/retrieval filter entirely. The filter exists to avoid
|
|
1426
|
+
// noisy "improve everything" runs; it should not gate an intentional
|
|
1427
|
+
// per-ref invocation where the user's explicit choice is the signal.
|
|
1428
|
+
//
|
|
1429
|
+
// For type/all scope: only process refs with usage signals (recent feedback
|
|
1430
|
+
// or sufficient retrievals). A stash with no signals has 0 eligible refs —
|
|
1431
|
+
// usage is the gate. Run `akm feedback <ref> --positive` or retrieve assets
|
|
1432
|
+
// to bring them into the eligible pool.
|
|
1433
|
+
const signalAndRetrievalRefs = [...signalFiltered, ...highRetrievalRefs];
|
|
1434
|
+
const mergedRefs = scope.mode === "ref" ? processableRefs : options.requireFeedbackSignal ? signalFiltered : signalAndRetrievalRefs;
|
|
1435
|
+
const utilityMap = buildUtilityMap(mergedRefs);
|
|
1436
|
+
// Load feedback ratio per ref from the pre-computed summary (no extra DB pass).
|
|
1437
|
+
const feedbackRatios = new Map();
|
|
1438
|
+
for (const ref of mergedRefs) {
|
|
1439
|
+
const summary = feedbackSummary.get(ref.ref);
|
|
1440
|
+
const positive = summary?.positive ?? 0;
|
|
1441
|
+
const negative = summary?.negative ?? 0;
|
|
1442
|
+
const total = positive + negative;
|
|
1443
|
+
// ratio = negative proportion (high = needs more improvement)
|
|
1444
|
+
feedbackRatios.set(ref.ref, total > 0 ? negative / total : 0);
|
|
1445
|
+
}
|
|
1446
|
+
// Sort: combine utility (desc) with feedback negativity (desc) — high-negative assets rank higher
|
|
1447
|
+
const sorted = [...mergedRefs].sort((a, b) => {
|
|
1448
|
+
const utilA = utilityMap.get(a.ref) ?? 0;
|
|
1449
|
+
const utilB = utilityMap.get(b.ref) ?? 0;
|
|
1450
|
+
const ratioA = feedbackRatios.get(a.ref) ?? 0;
|
|
1451
|
+
const ratioB = feedbackRatios.get(b.ref) ?? 0;
|
|
1452
|
+
// Combined score: 70% utility, 30% negative ratio
|
|
1453
|
+
const scoreA = utilA * 0.7 + ratioA * 0.3;
|
|
1454
|
+
const scoreB = utilB * 0.7 + ratioB * 0.3;
|
|
1455
|
+
return scoreB - scoreA;
|
|
1456
|
+
});
|
|
1457
|
+
// Phase 0: surface coverage gaps from zero-result search queries
|
|
1458
|
+
let coverageGaps = [];
|
|
1459
|
+
try {
|
|
1460
|
+
const dbForGaps = openExistingDatabase();
|
|
1461
|
+
try {
|
|
1462
|
+
coverageGaps = getZeroResultSearches(dbForGaps);
|
|
1463
|
+
}
|
|
1464
|
+
finally {
|
|
1465
|
+
closeDatabase(dbForGaps);
|
|
1466
|
+
}
|
|
1467
|
+
}
|
|
1468
|
+
catch (err) {
|
|
1469
|
+
rethrowIfTestIsolationError(err);
|
|
1470
|
+
// best-effort
|
|
1471
|
+
}
|
|
1472
|
+
// actionableRefs is the post-cooldown, post-validation, post-signal, post-sort
|
|
1473
|
+
// set — i.e. the genuinely processable refs in priority order. Note: this is
|
|
1474
|
+
// a semantic shift from earlier code where actionableRefs was the pre-cooldown
|
|
1475
|
+
// sorted set; the new meaning matches reality and is documented on
|
|
1476
|
+
// ImprovePreparationResult.actionableRefs.
|
|
1477
|
+
//
|
|
1478
|
+
// Final guard: drop any candidate whose backing file is no longer on disk.
|
|
1479
|
+
// Phase 1 validation captures missing files at the start of preparation, but
|
|
1480
|
+
// the gap between that check and dispatch can be minutes on large stashes —
|
|
1481
|
+
// long enough for a checkpoint / git checkout / external cleanup to delete
|
|
1482
|
+
// the asset. Empirically (improve-critical-review 2026-05-20) the single
|
|
1483
|
+
// biggest reject category was "Asset no longer exists on disk" (604/1407 =
|
|
1484
|
+
// 43%), meaning reflect/distill was producing proposals against deleted refs.
|
|
1485
|
+
// A cheap existsSync per surviving candidate eliminates that wasted work.
|
|
1486
|
+
const assetMissingOnDisk = [];
|
|
1487
|
+
const existsCheckedActionable = [];
|
|
1488
|
+
for (const candidate of sorted) {
|
|
1489
|
+
const filePath = await findAssetFilePath(candidate.ref, options.stashDir);
|
|
1490
|
+
if (filePath && fs.existsSync(filePath)) {
|
|
1491
|
+
existsCheckedActionable.push(candidate);
|
|
1492
|
+
}
|
|
1493
|
+
else {
|
|
1494
|
+
assetMissingOnDisk.push(candidate.ref);
|
|
1495
|
+
appendEvent({ eventType: "improve_skipped", ref: candidate.ref, metadata: { reason: "asset_missing_on_disk" } }, eventsCtx);
|
|
1496
|
+
}
|
|
1497
|
+
}
|
|
1498
|
+
const actionableRefs = existsCheckedActionable;
|
|
1499
|
+
// Re-split actionableRefs (sorted) into reflect-path vs distill-only-path while
|
|
1500
|
+
// preserving sort order. distillOnlyRefs participate in the sort so --limit
|
|
1501
|
+
// picks them by score, not by arbitrary position.
|
|
1502
|
+
const distillOnlyRefSetForSort = new Set(distillOnlyRefs.map((r) => r.ref));
|
|
1503
|
+
const reflectAndDistillRefsAfterSort = [];
|
|
1504
|
+
const distillOnlyRefsAfterSort = [];
|
|
1505
|
+
for (const r of actionableRefs) {
|
|
1506
|
+
if (distillOnlyRefSetForSort.has(r.ref)) {
|
|
1507
|
+
distillOnlyRefsAfterSort.push(r);
|
|
748
1508
|
}
|
|
1509
|
+
else {
|
|
1510
|
+
reflectAndDistillRefsAfterSort.push(r);
|
|
1511
|
+
}
|
|
1512
|
+
}
|
|
1513
|
+
// ── Phase 5: --limit applies to the post-cooldown actionable set ──────────
|
|
1514
|
+
const allLoopRefs = [...reflectAndDistillRefsAfterSort, ...distillOnlyRefsAfterSort];
|
|
1515
|
+
const loopRefs = options.limit ? allLoopRefs.slice(0, options.limit) : allLoopRefs;
|
|
1516
|
+
// Update the returned distillOnlyRefs to the sorted order so callers see the
|
|
1517
|
+
// ranked view (loop stage uses it as a Set so order is irrelevant, but the
|
|
1518
|
+
// shape change keeps downstream consumers consistent).
|
|
1519
|
+
const distillOnlyRefsResult = distillOnlyRefsAfterSort;
|
|
1520
|
+
const totalReflectBlocked = fullySkippedCount + distillOnlyRefs.length;
|
|
1521
|
+
if (totalReflectBlocked > 0) {
|
|
1522
|
+
info(`[improve] ${totalReflectBlocked} of ${preCooldownCount} indexed refs blocked by reflect signal-delta ` +
|
|
1523
|
+
`(${fullySkippedCount} fully skipped, ${distillOnlyRefs.length} routed to distill-only)`);
|
|
1524
|
+
}
|
|
1525
|
+
if (signalAndRetrievalRefs.length > 0) {
|
|
1526
|
+
info(`[improve] ${signalAndRetrievalRefs.length} refs with usage signals (${signalFiltered.length} feedback, ${highRetrievalRefs.length} high-retrieval)`);
|
|
749
1527
|
}
|
|
750
1528
|
if (validationFailureRefs.size > 0) {
|
|
751
|
-
info(`[improve] ${validationFailureRefs.size}
|
|
1529
|
+
info(`[improve] ${validationFailureRefs.size} with validation failures excluded`);
|
|
752
1530
|
}
|
|
1531
|
+
if (assetMissingOnDisk.length > 0) {
|
|
1532
|
+
info(`[improve] ${assetMissingOnDisk.length} candidates dropped — file not on disk`);
|
|
1533
|
+
}
|
|
1534
|
+
const deferredCount = actionableRefs.length - loopRefs.length;
|
|
1535
|
+
info(`[improve] ${actionableRefs.length} actionable; ${loopRefs.length} will be processed` +
|
|
1536
|
+
(options.limit && deferredCount > 0 ? ` (--limit ${options.limit} applied; ${deferredCount} deferred)` : ""));
|
|
753
1537
|
return {
|
|
754
1538
|
actions,
|
|
755
1539
|
cleanupWarnings,
|
|
756
1540
|
appliedCleanup,
|
|
757
1541
|
memoryIndexHealth,
|
|
758
1542
|
executionLogCandidates,
|
|
1543
|
+
extract: extractResults,
|
|
759
1544
|
actionableRefs,
|
|
760
1545
|
signalBearingSet,
|
|
761
1546
|
validationFailures,
|
|
@@ -763,18 +1548,108 @@ async function runImprovePreparationStage(args) {
|
|
|
763
1548
|
lintSummary,
|
|
764
1549
|
loopRefs,
|
|
765
1550
|
distillCooledRefs,
|
|
766
|
-
|
|
1551
|
+
distillOnlyRefs: distillOnlyRefsResult,
|
|
767
1552
|
coverageGaps,
|
|
768
1553
|
recentErrors,
|
|
1554
|
+
utilityMap,
|
|
1555
|
+
gateAutoAcceptedCount,
|
|
769
1556
|
};
|
|
770
1557
|
}
|
|
1558
|
+
// TODO(refactor): 13 args including `actions`/`recentErrors` mutation channels. Restructure into immutable plan + mutable context objects — deferred to dedicated refactor with isolated testing.
|
|
771
1559
|
async function runImproveLoopStage(args) {
|
|
772
|
-
const { scope, options, primaryStashDir, reflectFn, distillFn, loopRefs, actions, signalBearingSet, distillCooledRefs, recentErrors, startMs, budgetMs, } = args;
|
|
1560
|
+
const { scope, options, primaryStashDir, reflectFn, distillFn, loopRefs, actions, signalBearingSet, distillCooledRefs, distillOnlyRefs, recentErrors, rejectedProposalsByRef, utilityMap, startMs, budgetMs, eventsCtx, improveProfile, } = args;
|
|
1561
|
+
// O-1 (#364): compute remaining budget at call time so each sub-call
|
|
1562
|
+
// receives only its fair share of the wall-clock budget.
|
|
1563
|
+
const remainingBudgetMs = () => Math.max(0, budgetMs - (Date.now() - startMs));
|
|
773
1564
|
const RECENT_ERRORS_CAP = 3;
|
|
774
|
-
|
|
1565
|
+
// R-2 / #389: Self-Consistency multi-sample voting helpers.
|
|
1566
|
+
// Wang et al. arXiv:2203.11171 — N=3 samples beat single-shot on reasoning tasks.
|
|
1567
|
+
const SC_THRESHOLD = options.selfConsistencyThreshold ?? 0.7;
|
|
1568
|
+
const SC_N = Math.min(Math.max(2, options.selfConsistencyN ?? 3), 5);
|
|
1569
|
+
/**
|
|
1570
|
+
* Compute Jaccard token overlap between two strings.
|
|
1571
|
+
* Tokenizes by whitespace; returns 0 when both are empty.
|
|
1572
|
+
*/
|
|
1573
|
+
function jaccardSimilarity(a, b) {
|
|
1574
|
+
const tokensA = new Set(a.split(/\s+/).filter(Boolean));
|
|
1575
|
+
const tokensB = new Set(b.split(/\s+/).filter(Boolean));
|
|
1576
|
+
if (tokensA.size === 0 && tokensB.size === 0)
|
|
1577
|
+
return 1;
|
|
1578
|
+
let intersection = 0;
|
|
1579
|
+
for (const t of tokensA) {
|
|
1580
|
+
if (tokensB.has(t))
|
|
1581
|
+
intersection++;
|
|
1582
|
+
}
|
|
1583
|
+
const union = tokensA.size + tokensB.size - intersection;
|
|
1584
|
+
return union > 0 ? intersection / union : 0;
|
|
1585
|
+
}
|
|
1586
|
+
/**
|
|
1587
|
+
* Given N reflect results, return the one with the highest average Jaccard
|
|
1588
|
+
* similarity to all other successful results (majority-vote winner).
|
|
1589
|
+
* Falls back to the first successful result when N < 2.
|
|
1590
|
+
*/
|
|
1591
|
+
function pickMajorityVote(results) {
|
|
1592
|
+
const successful = results.filter((r) => r.ok);
|
|
1593
|
+
if (successful.length === 0)
|
|
1594
|
+
return (results[0] ?? {
|
|
1595
|
+
schemaVersion: 1,
|
|
1596
|
+
ok: false,
|
|
1597
|
+
reason: "non_zero_exit",
|
|
1598
|
+
error: "all samples failed",
|
|
1599
|
+
exitCode: null,
|
|
1600
|
+
});
|
|
1601
|
+
if (successful.length === 1)
|
|
1602
|
+
return successful[0];
|
|
1603
|
+
let bestIdx = 0;
|
|
1604
|
+
let bestScore = -1;
|
|
1605
|
+
for (let i = 0; i < successful.length; i++) {
|
|
1606
|
+
let totalSim = 0;
|
|
1607
|
+
for (let j = 0; j < successful.length; j++) {
|
|
1608
|
+
if (i === j)
|
|
1609
|
+
continue;
|
|
1610
|
+
totalSim += jaccardSimilarity(successful[i].proposal.payload.content ?? "", successful[j].proposal.payload.content ?? "");
|
|
1611
|
+
}
|
|
1612
|
+
const avgSim = totalSim / (successful.length - 1);
|
|
1613
|
+
if (avgSim > bestScore) {
|
|
1614
|
+
bestScore = avgSim;
|
|
1615
|
+
bestIdx = i;
|
|
1616
|
+
}
|
|
1617
|
+
}
|
|
1618
|
+
return successful[bestIdx] ?? successful[0];
|
|
1619
|
+
}
|
|
1620
|
+
// O-5 / #378: helper to push per-originator errors into the rolling window.
|
|
1621
|
+
function pushRecentError(originator, msg) {
|
|
1622
|
+
if (!recentErrors[originator])
|
|
1623
|
+
recentErrors[originator] = [];
|
|
1624
|
+
recentErrors[originator].push(msg);
|
|
1625
|
+
if (recentErrors[originator].length > RECENT_ERRORS_CAP)
|
|
1626
|
+
recentErrors[originator].shift();
|
|
1627
|
+
}
|
|
1628
|
+
// Build a Set for O(1) membership test — these refs skip the reflect call (Bug D2).
|
|
1629
|
+
const distillOnlyRefSet = new Set(distillOnlyRefs.map((r) => r.ref));
|
|
775
1630
|
let completedCount = 0;
|
|
776
|
-
let
|
|
1631
|
+
let reflectsWithErrorContext = 0;
|
|
777
1632
|
const memoryRefsForInference = new Set();
|
|
1633
|
+
// Pre-load all pending proposals once instead of querying per asset in the loop.
|
|
1634
|
+
const dedupeStashDirForProposals = primaryStashDir ?? options.stashDir;
|
|
1635
|
+
const pendingProposalRefSet = new Set(dedupeStashDirForProposals
|
|
1636
|
+
? listProposals(dedupeStashDirForProposals, { status: "pending" }).map((p) => p.ref)
|
|
1637
|
+
: []);
|
|
1638
|
+
let gateAutoAcceptedCount = 0;
|
|
1639
|
+
const reflectGateCfg = makeGateConfig("reflect", {
|
|
1640
|
+
globalThreshold: options.autoAccept,
|
|
1641
|
+
dryRun: options.dryRun ?? false,
|
|
1642
|
+
stashDir: primaryStashDir,
|
|
1643
|
+
config: options.config ?? loadConfig(),
|
|
1644
|
+
eventsCtx,
|
|
1645
|
+
});
|
|
1646
|
+
const distillGateCfg = makeGateConfig("distill", {
|
|
1647
|
+
globalThreshold: options.autoAccept,
|
|
1648
|
+
dryRun: options.dryRun ?? false,
|
|
1649
|
+
stashDir: primaryStashDir,
|
|
1650
|
+
config: options.config ?? loadConfig(),
|
|
1651
|
+
eventsCtx,
|
|
1652
|
+
});
|
|
778
1653
|
for (const planned of loopRefs) {
|
|
779
1654
|
if (Date.now() - startMs >= budgetMs) {
|
|
780
1655
|
const remaining = loopRefs.length - completedCount;
|
|
@@ -786,7 +1661,15 @@ async function runImproveLoopStage(args) {
|
|
|
786
1661
|
reason: "budget_exhausted",
|
|
787
1662
|
remaining,
|
|
788
1663
|
},
|
|
789
|
-
});
|
|
1664
|
+
}, eventsCtx);
|
|
1665
|
+
// B11: Emit improve_skipped for all remaining assets that will not be processed.
|
|
1666
|
+
for (const remainingRef of loopRefs.slice(completedCount + 1)) {
|
|
1667
|
+
appendEvent({
|
|
1668
|
+
eventType: "improve_skipped",
|
|
1669
|
+
ref: remainingRef.ref,
|
|
1670
|
+
metadata: { reason: "budget_exhausted_batch", remaining: loopRefs.length - completedCount - 1 },
|
|
1671
|
+
}, eventsCtx);
|
|
1672
|
+
}
|
|
790
1673
|
actions.push({
|
|
791
1674
|
ref: planned.ref,
|
|
792
1675
|
mode: "error",
|
|
@@ -795,59 +1678,243 @@ async function runImproveLoopStage(args) {
|
|
|
795
1678
|
break;
|
|
796
1679
|
}
|
|
797
1680
|
try {
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
1681
|
+
// Bug D2: distillOnlyRefs skip the reflect call but still run the distill path.
|
|
1682
|
+
// Bug D1: in-loop distill-cooldown check removed — distill-cooled candidates
|
|
1683
|
+
// have their synthetic actions emitted in runImprovePreparationStage.
|
|
1684
|
+
const isDistillOnly = distillOnlyRefSet.has(planned.ref);
|
|
1685
|
+
const parsedPlannedRef = parseAssetRef(planned.ref);
|
|
1686
|
+
// B6: derived memories are machine-generated; skip reflect to avoid noisy proposals.
|
|
1687
|
+
// shouldDistillMemoryRef already returns false for .derived refs, so the distill
|
|
1688
|
+
// path is also a no-op for them — we just avoid unnecessary agent spawns.
|
|
1689
|
+
// D2: distillOnlyRefs also skip the reflect call (reflect-cooled, distill path only).
|
|
1690
|
+
if (!isDistillOnly && !planned.ref.endsWith(".derived")) {
|
|
1691
|
+
// Type guard: skip reflect for unsupported types (script, vault, task, etc.)
|
|
1692
|
+
// and raw wiki directories, driven by the active improve profile.
|
|
1693
|
+
const reflectSkip = shouldSkipRef(planned.ref, "reflect", improveProfile);
|
|
1694
|
+
if (reflectSkip.skip) {
|
|
1695
|
+
actions.push({
|
|
1696
|
+
ref: planned.ref,
|
|
1697
|
+
mode: "reflect-skipped",
|
|
1698
|
+
result: { ok: true, reason: reflectSkip.reason },
|
|
1699
|
+
});
|
|
1700
|
+
}
|
|
1701
|
+
else {
|
|
1702
|
+
// O-5 / #378: only inject reflect-originator errors into the reflect call.
|
|
1703
|
+
// Cross-task errors (e.g. schema-repair) must NOT contaminate reflect prompts.
|
|
1704
|
+
const reflectErrors = recentErrors.reflect ?? [];
|
|
1705
|
+
if (reflectErrors.length > 0)
|
|
1706
|
+
reflectsWithErrorContext++;
|
|
1707
|
+
// O-1 (#364): pass remaining budget as timeoutMs so the agent spawn is
|
|
1708
|
+
// bounded by the wall-clock deadline rather than the default per-profile timeout.
|
|
1709
|
+
const reflectBudgetMs = remainingBudgetMs();
|
|
1710
|
+
// Wire profile.processes.reflect.{mode, profile, timeoutMs} into the reflect
|
|
1711
|
+
// dispatch when present. Falls back to akmReflect's own config-based resolution
|
|
1712
|
+
// (profiles.improve.<name>.processes.reflect → defaults.llm) when the profile
|
|
1713
|
+
// does not specify.
|
|
1714
|
+
const reflectProfileRunner = resolveImproveProcessRunnerFromProfile(improveProfile.processes?.reflect, options.config ?? loadConfig());
|
|
1715
|
+
const reflectCallArgs = {
|
|
1716
|
+
ref: planned.ref,
|
|
1717
|
+
task: options.task,
|
|
1718
|
+
...(options.stashDir ? { stashDir: options.stashDir } : {}),
|
|
1719
|
+
...(reflectErrors.length > 0 ? { avoidPatterns: [...reflectErrors] } : {}),
|
|
1720
|
+
agentProcess: options.agentProcess ?? "reflect",
|
|
1721
|
+
eventSource: "improve",
|
|
1722
|
+
...(reflectBudgetMs > 0 ? { timeoutMs: reflectBudgetMs } : {}),
|
|
1723
|
+
...(reflectProfileRunner ? { runner: reflectProfileRunner } : {}),
|
|
1724
|
+
};
|
|
1725
|
+
// R-2 / #389: Self-consistency multi-sample voting for high-utility refs.
|
|
1726
|
+
// Self-Consistency arXiv:2203.11171 — N=3 samples beat single-shot quality.
|
|
1727
|
+
const refUtility = utilityMap.get(planned.ref) ?? 0;
|
|
1728
|
+
const useConsistency = refUtility >= SC_THRESHOLD && SC_N >= 2;
|
|
1729
|
+
let reflectResult;
|
|
1730
|
+
if (useConsistency) {
|
|
1731
|
+
const samples = [];
|
|
1732
|
+
for (let s = 0; s < SC_N; s++) {
|
|
1733
|
+
if (remainingBudgetMs() <= 0)
|
|
1734
|
+
break;
|
|
1735
|
+
// draftMode: skip DB write so each sample doesn't create a proposal.
|
|
1736
|
+
samples.push(await reflectFn({ ...reflectCallArgs, draftMode: true }));
|
|
1737
|
+
}
|
|
1738
|
+
const winner = pickMajorityVote(samples.length > 0 ? samples : [await reflectFn({ ...reflectCallArgs, draftMode: true })]);
|
|
1739
|
+
// Persist only the majority-vote winner as a single real proposal.
|
|
1740
|
+
if (winner.ok && primaryStashDir) {
|
|
1741
|
+
const persistResult = createProposal(primaryStashDir, {
|
|
1742
|
+
ref: winner.proposal.ref,
|
|
1743
|
+
source: "reflect",
|
|
1744
|
+
sourceRun: `reflect-sc-${Date.now()}`,
|
|
1745
|
+
payload: winner.proposal.payload,
|
|
1746
|
+
});
|
|
1747
|
+
reflectResult = isProposalSkipped(persistResult)
|
|
1748
|
+
? {
|
|
1749
|
+
schemaVersion: 1,
|
|
1750
|
+
ok: false,
|
|
1751
|
+
reason: "cooldown",
|
|
1752
|
+
error: `SC proposal skipped: ${persistResult.message}`,
|
|
1753
|
+
ref: winner.ref,
|
|
1754
|
+
exitCode: null,
|
|
1755
|
+
}
|
|
1756
|
+
: { ...winner, proposal: persistResult };
|
|
1757
|
+
}
|
|
1758
|
+
else {
|
|
1759
|
+
reflectResult = winner;
|
|
1760
|
+
}
|
|
1761
|
+
}
|
|
1762
|
+
else {
|
|
1763
|
+
reflectResult = await reflectFn(reflectCallArgs);
|
|
1764
|
+
}
|
|
1765
|
+
const isCooldown = !reflectResult.ok && reflectResult.reason === "cooldown";
|
|
1766
|
+
// Content-policy guard hits (reflect size-rail rejections) are NOT
|
|
1767
|
+
// LLM faults — the agent responded fine, the downstream guard
|
|
1768
|
+
// blocked the output. Route them to a distinct `reflect-guard-rejected`
|
|
1769
|
+
// mode so health metrics can split deterministic guard hits out of
|
|
1770
|
+
// true LLM failures. See
|
|
1771
|
+
// `/tmp/akm-health-investigations/metrics-taxonomy-review.md` §1a.
|
|
1772
|
+
const isGuardReject = !reflectResult.ok && reflectResult.reason === "content_policy_reject";
|
|
1773
|
+
// Type-guard rejection (reflect refused a script/vault/task ref) is
|
|
1774
|
+
// also NOT an LLM failure — the LLM is never invoked. Route to the
|
|
1775
|
+
// existing `reflect-skipped` bucket so it does not inflate the
|
|
1776
|
+
// failure-rate numerator. ~9% of `reflect-failed` events in the
|
|
1777
|
+
// user's stack were this case; see review §1a row "Reflect refused
|
|
1778
|
+
// asset type".
|
|
1779
|
+
const isTypeRefused = !reflectResult.ok && reflectResult.reason === "unsupported_type";
|
|
1780
|
+
actions.push({
|
|
1781
|
+
ref: planned.ref,
|
|
1782
|
+
mode: reflectResult.ok
|
|
1783
|
+
? "reflect"
|
|
1784
|
+
: isCooldown
|
|
1785
|
+
? "reflect-cooldown"
|
|
1786
|
+
: isGuardReject
|
|
1787
|
+
? "reflect-guard-rejected"
|
|
1788
|
+
: isTypeRefused
|
|
1789
|
+
? "reflect-skipped"
|
|
1790
|
+
: "reflect-failed",
|
|
1791
|
+
result: reflectResult,
|
|
1792
|
+
});
|
|
1793
|
+
// Cooldown skips, guard rejects, and type-refused skips are not
|
|
1794
|
+
// failures — do not pollute recentErrors with them (those get
|
|
1795
|
+
// injected as `avoidPatterns` into the next reflect prompt). Guard
|
|
1796
|
+
// rejects ARE worth showing the LLM as a learn-signal so the next
|
|
1797
|
+
// iteration sees "your last expansion was too large"; type-refused
|
|
1798
|
+
// is deterministic and adds no learning signal.
|
|
1799
|
+
if (!reflectResult.ok && !isCooldown && !isTypeRefused) {
|
|
1800
|
+
const errMsg = reflectResult.error ?? reflectResult.reason ?? "unknown reflect error";
|
|
1801
|
+
pushRecentError("reflect", errMsg);
|
|
1802
|
+
}
|
|
1803
|
+
// improve_reflect_outcome — per-asset metric for tuning the reflect path.
|
|
1804
|
+
appendEvent({
|
|
1805
|
+
eventType: "improve_reflect_outcome",
|
|
1806
|
+
ref: planned.ref,
|
|
1807
|
+
metadata: {
|
|
1808
|
+
ok: reflectResult.ok,
|
|
1809
|
+
durationMs: reflectResult.ok ? reflectResult.durationMs : undefined,
|
|
1810
|
+
agentProfile: reflectResult.ok ? reflectResult.agentProfile : undefined,
|
|
1811
|
+
reason: reflectResult.ok ? undefined : reflectResult.reason,
|
|
1812
|
+
},
|
|
1813
|
+
}, eventsCtx);
|
|
1814
|
+
if (reflectResult.ok) {
|
|
1815
|
+
gateAutoAcceptedCount += (await runAutoAcceptGate([{ proposalId: reflectResult.proposal.id, confidence: reflectResult.proposal.confidence }], reflectGateCfg)).promoted.length;
|
|
1816
|
+
}
|
|
1817
|
+
} // end else (reflect type/profile check)
|
|
1818
|
+
}
|
|
1819
|
+
else if (!isDistillOnly && planned.ref.endsWith(".derived")) {
|
|
1820
|
+
// B6: .derived refs skip reflect; record synthetic skip action.
|
|
801
1821
|
actions.push({
|
|
802
1822
|
ref: planned.ref,
|
|
803
1823
|
mode: "distill-skipped",
|
|
804
|
-
result: { ok: true, reason: "
|
|
1824
|
+
result: { ok: true, reason: "derived-memory-reflect-skipped" },
|
|
805
1825
|
});
|
|
806
|
-
completedCount++;
|
|
807
1826
|
appendEvent({
|
|
808
1827
|
eventType: "improve_skipped",
|
|
809
1828
|
ref: planned.ref,
|
|
810
|
-
metadata: { reason: "
|
|
811
|
-
});
|
|
812
|
-
info(`[improve] ${completedCount}/${loopRefs.length} ${planned.ref} (distill cooldown)`);
|
|
813
|
-
continue;
|
|
1829
|
+
metadata: { reason: "derived_memory_reflect_skipped" },
|
|
1830
|
+
}, eventsCtx);
|
|
814
1831
|
}
|
|
815
|
-
|
|
816
|
-
crossStepErrorsInjected++;
|
|
817
|
-
const reflectResult = await reflectFn({
|
|
818
|
-
ref: planned.ref,
|
|
819
|
-
task: options.task,
|
|
820
|
-
...(options.stashDir ? { stashDir: options.stashDir } : {}),
|
|
821
|
-
...(recentErrors.length > 0 ? { avoidPatterns: [...recentErrors] } : {}),
|
|
822
|
-
agentProcess: options.agentProcess ?? "reflect",
|
|
823
|
-
});
|
|
824
|
-
actions.push({ ref: planned.ref, mode: "reflect", result: reflectResult });
|
|
825
|
-
if (!reflectResult.ok) {
|
|
826
|
-
const errMsg = reflectResult.error ?? reflectResult.reason ?? "unknown reflect error";
|
|
827
|
-
recentErrors.push(errMsg);
|
|
828
|
-
if (recentErrors.length > RECENT_ERRORS_CAP)
|
|
829
|
-
recentErrors.shift();
|
|
830
|
-
}
|
|
831
|
-
const parsedPlannedRef = parseAssetRef(planned.ref);
|
|
1832
|
+
// isDistillOnly refs: no reflect action emitted — proceed directly to distill path below.
|
|
832
1833
|
const hasRecentFeedbackSignal = signalBearingSet.has(planned.ref);
|
|
833
1834
|
const explicitRefScope = scope.mode === "ref";
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
1835
|
+
// Profile gate: apply the full type-filter / raw-wiki / disabled rules to
|
|
1836
|
+
// distill so callers who configure `profile.processes.distill.allowedTypes`
|
|
1837
|
+
// or land on raw-wiki refs get a recorded skip action instead of silently
|
|
1838
|
+
// proceeding.
|
|
1839
|
+
const distillSkip = shouldSkipRef(planned.ref, "distill", improveProfile);
|
|
1840
|
+
if (distillSkip.skip) {
|
|
1841
|
+
actions.push({
|
|
1842
|
+
ref: planned.ref,
|
|
1843
|
+
mode: "distill-skipped",
|
|
1844
|
+
result: { ok: true, reason: distillSkip.reason },
|
|
1845
|
+
});
|
|
1846
|
+
completedCount++;
|
|
1847
|
+
info(`[improve] ${completedCount}/${loopRefs.length} ${planned.ref}`);
|
|
1848
|
+
continue;
|
|
1849
|
+
}
|
|
1850
|
+
// See `isDistillCandidateRef` — excludes `lesson:*` (and anything else in
|
|
1851
|
+
// DISTILL_REFUSED_INPUT_TYPES) so distill never gets queued for an input
|
|
1852
|
+
// it will refuse.
|
|
1853
|
+
const shouldAttemptDistill = isDistillCandidateRef(planned.ref, options.stashDir);
|
|
1854
|
+
const skipMemoryDistillForWeakSignal = !isDistillOnly && parsedPlannedRef.type === "memory" && !hasRecentFeedbackSignal && !explicitRefScope;
|
|
1855
|
+
// distillCooledRefs guard: pre-filter emitted synthetic actions for distill-candidate
|
|
1856
|
+
// refs; non-candidate refs in the set are blocked here.
|
|
1857
|
+
// O-2 (#365): bypass the distill cooldown when the user explicitly targeted
|
|
1858
|
+
// this ref via --scope — their intent overrides unattended-run policies.
|
|
1859
|
+
if (shouldAttemptDistill &&
|
|
1860
|
+
!skipMemoryDistillForWeakSignal &&
|
|
1861
|
+
(!distillCooledRefs.has(planned.ref) || explicitRefScope)) {
|
|
1862
|
+
// TODO(refactor): single call site needs both lesson+knowledge refs for proposal dedup. If a third target ref type is added, extract deriveAllTargetRefs(inputRef): string[].
|
|
837
1863
|
const lessonRef = deriveLessonRef(planned.ref);
|
|
1864
|
+
const knowledgeRef = deriveKnowledgeRef(planned.ref);
|
|
838
1865
|
const dedupeStashDir = primaryStashDir ?? options.stashDir;
|
|
839
1866
|
if (dedupeStashDir) {
|
|
840
|
-
|
|
841
|
-
|
|
1867
|
+
// B2: check both lesson ref and knowledge ref since auto-promoted memories
|
|
1868
|
+
// create knowledge: proposals, not lesson: proposals.
|
|
1869
|
+
const hasExistingPending = pendingProposalRefSet.has(lessonRef) || pendingProposalRefSet.has(knowledgeRef);
|
|
1870
|
+
if (hasExistingPending) {
|
|
842
1871
|
actions.push({
|
|
843
1872
|
ref: planned.ref,
|
|
844
1873
|
mode: "distill-skipped",
|
|
845
1874
|
result: { ok: true, reason: "pending proposal exists" },
|
|
846
1875
|
});
|
|
1876
|
+
appendEvent({
|
|
1877
|
+
eventType: "improve_skipped",
|
|
1878
|
+
ref: planned.ref,
|
|
1879
|
+
metadata: { reason: "pending_proposal_exists" },
|
|
1880
|
+
}, eventsCtx);
|
|
847
1881
|
completedCount++;
|
|
848
1882
|
info(`[improve] ${completedCount}/${loopRefs.length} ${planned.ref}`);
|
|
849
1883
|
continue;
|
|
850
1884
|
}
|
|
1885
|
+
// D-2 (#370): reject-aware cooldown for distill. When the reviewer
|
|
1886
|
+
// recently rejected a distilled lesson or knowledge proposal for this
|
|
1887
|
+
// asset, skip re-distillation for a 1-day grace window. Prevents the
|
|
1888
|
+
// same rejected proposal from being regenerated immediately. The
|
|
1889
|
+
// window is fixed (the 0.8.0 redesign moved per-ref cooldowns to
|
|
1890
|
+
// signal-delta gates and dropped --distill-cooldown-days; a short
|
|
1891
|
+
// reject grace is preserved here so a fresh rejection isn't
|
|
1892
|
+
// overridden by the same run).
|
|
1893
|
+
// References: ExpeL arXiv:2308.10144, STaR arXiv:2203.14465.
|
|
1894
|
+
const DISTILL_REJECT_COOLDOWN_MS = daysToMs(1);
|
|
1895
|
+
const recentlyRejectedLesson = !explicitRefScope && // O-2: bypass when --scope <ref> is explicit
|
|
1896
|
+
(rejectedProposalsByRef.has(lessonRef) || rejectedProposalsByRef.has(knowledgeRef));
|
|
1897
|
+
if (recentlyRejectedLesson) {
|
|
1898
|
+
const rejectedEntry = rejectedProposalsByRef.get(lessonRef) ?? rejectedProposalsByRef.get(knowledgeRef);
|
|
1899
|
+
const rejectedAgeMs = rejectedEntry ? Date.now() - new Date(rejectedEntry.ts).getTime() : 0;
|
|
1900
|
+
if (rejectedAgeMs < DISTILL_REJECT_COOLDOWN_MS) {
|
|
1901
|
+
actions.push({
|
|
1902
|
+
ref: planned.ref,
|
|
1903
|
+
mode: "distill-skipped",
|
|
1904
|
+
result: { ok: true, reason: "distill reject grace window" },
|
|
1905
|
+
});
|
|
1906
|
+
appendEvent({
|
|
1907
|
+
eventType: "improve_skipped",
|
|
1908
|
+
ref: planned.ref,
|
|
1909
|
+
metadata: {
|
|
1910
|
+
reason: "distill_reject_grace_window",
|
|
1911
|
+
},
|
|
1912
|
+
}, eventsCtx);
|
|
1913
|
+
completedCount++;
|
|
1914
|
+
info(`[improve] ${completedCount}/${loopRefs.length} ${planned.ref}`);
|
|
1915
|
+
continue;
|
|
1916
|
+
}
|
|
1917
|
+
}
|
|
851
1918
|
}
|
|
852
1919
|
const distillResult = await distillFn({
|
|
853
1920
|
ref: planned.ref,
|
|
@@ -855,6 +1922,9 @@ async function runImproveLoopStage(args) {
|
|
|
855
1922
|
...(options.stashDir ? { stashDir: options.stashDir } : {}),
|
|
856
1923
|
});
|
|
857
1924
|
actions.push({ ref: planned.ref, mode: "distill", result: distillResult });
|
|
1925
|
+
if (distillResult.outcome === "queued" && distillResult.proposal) {
|
|
1926
|
+
gateAutoAcceptedCount += (await runAutoAcceptGate([{ proposalId: distillResult.proposal.id, confidence: distillResult.proposal.confidence }], distillGateCfg)).promoted.length;
|
|
1927
|
+
}
|
|
858
1928
|
if (parsedPlannedRef.type === "memory") {
|
|
859
1929
|
const promotedToKnowledge = distillResult.outcome === "queued" && distillResult.proposalKind === "knowledge";
|
|
860
1930
|
if (!promotedToKnowledge)
|
|
@@ -874,17 +1944,18 @@ async function runImproveLoopStage(args) {
|
|
|
874
1944
|
slug: `${slug}-${Date.now()}`,
|
|
875
1945
|
});
|
|
876
1946
|
}
|
|
877
|
-
|
|
878
|
-
|
|
1947
|
+
// D6: use pre-loaded map instead of per-iteration DB query
|
|
1948
|
+
const rejectedProposalEvent = rejectedProposalsByRef.get(planned.ref);
|
|
1949
|
+
if (rejectedProposalEvent && primaryStashDir) {
|
|
879
1950
|
const slug = planned.ref
|
|
880
1951
|
.replace(/[^a-z0-9]/gi, "-")
|
|
881
1952
|
.toLowerCase()
|
|
882
1953
|
.slice(0, 60);
|
|
883
1954
|
writeEvalCase(primaryStashDir, {
|
|
884
1955
|
ref: planned.ref,
|
|
885
|
-
failureReason:
|
|
1956
|
+
failureReason: rejectedProposalEvent.metadata?.reason ?? "proposal rejected",
|
|
886
1957
|
assetType: parseAssetRef(planned.ref).type ?? "unknown",
|
|
887
|
-
rejectedAt: new Date(
|
|
1958
|
+
rejectedAt: new Date(rejectedProposalEvent.ts).getTime(),
|
|
888
1959
|
source: "proposal_rejected",
|
|
889
1960
|
slug: `${slug}-rejected`,
|
|
890
1961
|
});
|
|
@@ -900,51 +1971,111 @@ async function runImproveLoopStage(args) {
|
|
|
900
1971
|
eventType: "improve_skipped",
|
|
901
1972
|
ref: planned.ref,
|
|
902
1973
|
metadata: { reason: "memory_distill_requires_feedback" },
|
|
903
|
-
});
|
|
1974
|
+
}, eventsCtx);
|
|
904
1975
|
}
|
|
905
1976
|
}
|
|
906
1977
|
catch (err) {
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
1978
|
+
// B7: UsageError thrown by akmDistill on validation_failed should be recorded
|
|
1979
|
+
// as mode:"distill" with outcome:"validation_failed", NOT as a generic error.
|
|
1980
|
+
// The distill_invoked event was already emitted inside akmDistill before the throw.
|
|
1981
|
+
if (err instanceof UsageError) {
|
|
1982
|
+
actions.push({
|
|
1983
|
+
ref: planned.ref,
|
|
1984
|
+
mode: "distill",
|
|
1985
|
+
result: { ok: false, outcome: "validation_failed", error: err.message },
|
|
1986
|
+
});
|
|
1987
|
+
}
|
|
1988
|
+
else {
|
|
1989
|
+
actions.push({
|
|
1990
|
+
ref: planned.ref,
|
|
1991
|
+
mode: "error",
|
|
1992
|
+
result: { ok: false, error: err instanceof Error ? err.message : String(err) },
|
|
1993
|
+
});
|
|
1994
|
+
}
|
|
912
1995
|
}
|
|
913
1996
|
completedCount++;
|
|
914
1997
|
info(`[improve] ${completedCount}/${loopRefs.length} ${planned.ref}`);
|
|
915
1998
|
}
|
|
916
|
-
return {
|
|
1999
|
+
return { reflectsWithErrorContext, memoryRefsForInference, gateAutoAcceptedCount };
|
|
917
2000
|
}
|
|
918
2001
|
async function runImprovePostLoopStage(args) {
|
|
919
|
-
const { scope, options, primaryStashDir, actionableRefs, appliedCleanup, cleanupWarnings, memorySummary, memoryRefsForInference, reindexFn, } = args;
|
|
2002
|
+
const { scope, options, primaryStashDir, actionableRefs, appliedCleanup, cleanupWarnings, memorySummary, memoryRefsForInference, reindexFn, eventsCtx, budgetSignal, improveProfile, } = args;
|
|
920
2003
|
const allWarnings = [...cleanupWarnings, ...(appliedCleanup?.warnings ?? [])];
|
|
921
2004
|
const baseConfig = options.config ?? loadConfig();
|
|
922
2005
|
const MEMORY_VOLUME_THRESHOLD = options.memoryVolumeConsolidationThreshold ?? 100;
|
|
923
|
-
const hasLlm = !!(baseConfig.llm || baseConfig.agent);
|
|
2006
|
+
const hasLlm = !!(baseConfig.defaults?.llm || baseConfig.defaults?.agent);
|
|
924
2007
|
const volumeTriggered = typeof memorySummary.eligible === "number" && memorySummary.eligible > MEMORY_VOLUME_THRESHOLD && hasLlm;
|
|
2008
|
+
// When volume triggers a consolidation pass, force-enable the consolidate
|
|
2009
|
+
// process on the default improve profile so the gate accepts the run even
|
|
2010
|
+
// if the user's config disabled it. We synthesise a new profile override
|
|
2011
|
+
// rather than mutating connection settings.
|
|
925
2012
|
const consolidationConfig = volumeTriggered
|
|
926
2013
|
? {
|
|
927
2014
|
...baseConfig,
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
2015
|
+
profiles: {
|
|
2016
|
+
...(baseConfig.profiles ?? {}),
|
|
2017
|
+
improve: {
|
|
2018
|
+
...(baseConfig.profiles?.improve ?? {}),
|
|
2019
|
+
default: {
|
|
2020
|
+
...(baseConfig.profiles?.improve?.default ?? {}),
|
|
2021
|
+
processes: {
|
|
2022
|
+
...(baseConfig.profiles?.improve?.default?.processes ?? {}),
|
|
2023
|
+
consolidate: {
|
|
2024
|
+
...(baseConfig.profiles?.improve?.default?.processes?.consolidate ?? {}),
|
|
2025
|
+
enabled: true,
|
|
2026
|
+
},
|
|
2027
|
+
},
|
|
933
2028
|
},
|
|
934
|
-
}
|
|
935
|
-
|
|
2029
|
+
},
|
|
2030
|
+
},
|
|
936
2031
|
}
|
|
937
2032
|
: baseConfig;
|
|
938
|
-
|
|
939
|
-
|
|
2033
|
+
// 0.8.0 pool-delta gate for consolidate: re-eligible iff at least one
|
|
2034
|
+
// memory file has been updated since the most recent successful
|
|
2035
|
+
// consolidate_completed event. Time-based cooldowns produced the same
|
|
2036
|
+
// synchronised-wave failure mode the reflect/distill cooldowns did; the
|
|
2037
|
+
// pool-delta gate ties consolidation to actual work-to-do.
|
|
940
2038
|
const recentConsolidations = readEvents({ type: "consolidate_completed" });
|
|
941
2039
|
const lastConsolidation = recentConsolidations.events
|
|
942
2040
|
.filter((e) => e.metadata?.processed && Number(e.metadata.processed) > 0)
|
|
943
2041
|
.sort((a, b) => new Date(b.ts ?? 0).getTime() - new Date(a.ts ?? 0).getTime())[0];
|
|
944
|
-
const
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
2042
|
+
const lastConsolidateTs = lastConsolidation?.ts;
|
|
2043
|
+
// Pool-delta: any memory file with mtime > lastConsolidateTs flags work to do.
|
|
2044
|
+
// Using file mtime keeps this query DB-free and matches what the indexer
|
|
2045
|
+
// already uses as the canonical `memory.updated_at` proxy.
|
|
2046
|
+
//
|
|
2047
|
+
// Bootstrap: when no successful consolidate_completed event has ever been
|
|
2048
|
+
// recorded, we cannot evaluate the pool-delta — treat as eligible so a
|
|
2049
|
+
// fresh stash runs consolidate once before the steady-state gate kicks in.
|
|
2050
|
+
const memoryUpdatedAfterLastConsolidate = (() => {
|
|
2051
|
+
if (volumeTriggered)
|
|
2052
|
+
return true; // volume override forces the run regardless.
|
|
2053
|
+
if (!lastConsolidateTs)
|
|
2054
|
+
return true; // bootstrap path: never consolidated.
|
|
2055
|
+
if (!primaryStashDir)
|
|
2056
|
+
return false;
|
|
2057
|
+
const memoriesDir = path.join(primaryStashDir, "memories");
|
|
2058
|
+
if (!fs.existsSync(memoriesDir))
|
|
2059
|
+
return false;
|
|
2060
|
+
try {
|
|
2061
|
+
return fs.readdirSync(memoriesDir).some((f) => {
|
|
2062
|
+
if (!f.endsWith(".md"))
|
|
2063
|
+
return false;
|
|
2064
|
+
try {
|
|
2065
|
+
return fs.statSync(path.join(memoriesDir, f)).mtime.toISOString() > lastConsolidateTs;
|
|
2066
|
+
}
|
|
2067
|
+
catch {
|
|
2068
|
+
return false;
|
|
2069
|
+
}
|
|
2070
|
+
});
|
|
2071
|
+
}
|
|
2072
|
+
catch {
|
|
2073
|
+
return false;
|
|
2074
|
+
}
|
|
2075
|
+
})();
|
|
2076
|
+
const consolidationOnCooldown = !volumeTriggered && !memoryUpdatedAfterLastConsolidate;
|
|
2077
|
+
// Profile gate: if profile explicitly disables consolidate, skip the entire pass.
|
|
2078
|
+
const consolidateDisabledByProfile = improveProfile?.processes?.consolidate?.enabled === false;
|
|
948
2079
|
let consolidation = {
|
|
949
2080
|
schemaVersion: 1,
|
|
950
2081
|
ok: true,
|
|
@@ -956,38 +2087,78 @@ async function runImprovePostLoopStage(args) {
|
|
|
956
2087
|
merged: 0,
|
|
957
2088
|
deleted: 0,
|
|
958
2089
|
promoted: [],
|
|
2090
|
+
contradicted: 0,
|
|
959
2091
|
warnings: [],
|
|
960
2092
|
durationMs: 0,
|
|
961
2093
|
};
|
|
962
|
-
|
|
2094
|
+
let gateAutoAcceptedCount = 0;
|
|
2095
|
+
const consolidateGateCfg = makeGateConfig("consolidate", {
|
|
2096
|
+
globalThreshold: options.autoAccept,
|
|
2097
|
+
dryRun: options.dryRun ?? false,
|
|
2098
|
+
stashDir: primaryStashDir,
|
|
2099
|
+
config: consolidationConfig,
|
|
2100
|
+
eventsCtx,
|
|
2101
|
+
}, { minimumThreshold: 95 });
|
|
2102
|
+
if (consolidateDisabledByProfile) {
|
|
2103
|
+
info("[improve] consolidation skipped (disabled by improve profile)");
|
|
2104
|
+
}
|
|
2105
|
+
else if (!consolidationOnCooldown) {
|
|
963
2106
|
consolidation = await akmConsolidate({
|
|
964
2107
|
...options.consolidateOptions,
|
|
965
2108
|
config: consolidationConfig,
|
|
966
2109
|
stashDir: options.stashDir,
|
|
967
2110
|
autoTriggered: volumeTriggered,
|
|
968
|
-
|
|
2111
|
+
// Tie consolidate proposals back to this improve invocation so
|
|
2112
|
+
// accept-rate-per-run aggregation works. Mirrors reflect/propose/extract.
|
|
2113
|
+
sourceRun: `consolidate-${Date.now()}`,
|
|
2114
|
+
// Incremental consolidation: in steady state (not bootstrap, not volume-
|
|
2115
|
+
// triggered) pass the last-consolidation timestamp so akmConsolidate skips
|
|
2116
|
+
// chunks with no memory changed since then. Converts consolidation cost
|
|
2117
|
+
// from O(pool) to O(changed clusters) — the fix for the rising p95 tail
|
|
2118
|
+
// where full-pool re-judging produced 5–10 min runs that promoted ~0.
|
|
2119
|
+
// undefined → full pass (bootstrap, or volume-triggered large-pool sweep).
|
|
2120
|
+
incrementalSince: volumeTriggered ? undefined : lastConsolidateTs,
|
|
2121
|
+
maxChunkSize: improveProfile?.processes?.consolidate?.maxChunkSize,
|
|
2122
|
+
// Honor profile.autoAccept (already merged into options.autoAccept at the
|
|
2123
|
+
// top of akmImprove). The CLI parser always supplies 90 when --auto-accept
|
|
2124
|
+
// is absent, so ?? 90 is not needed here and would prevent --auto-accept=false
|
|
2125
|
+
// (which maps to undefined) from disabling consolidation auto-accept.
|
|
2126
|
+
// options.consolidateOptions.autoAccept (if explicitly provided by caller)
|
|
2127
|
+
// still wins because the spread above runs first.
|
|
2128
|
+
autoAccept: options.consolidateOptions?.autoAccept ?? options.autoAccept,
|
|
969
2129
|
});
|
|
2130
|
+
gateAutoAcceptedCount += (await runAutoAcceptGate(consolidation.promoted.map((proposalId) => {
|
|
2131
|
+
try {
|
|
2132
|
+
if (!primaryStashDir)
|
|
2133
|
+
return { proposalId, confidence: undefined };
|
|
2134
|
+
const proposal = getProposal(primaryStashDir, proposalId);
|
|
2135
|
+
return { proposalId, confidence: proposal.confidence };
|
|
2136
|
+
}
|
|
2137
|
+
catch {
|
|
2138
|
+
return { proposalId, confidence: undefined };
|
|
2139
|
+
}
|
|
2140
|
+
}), consolidateGateCfg)).promoted.length;
|
|
970
2141
|
if (consolidation.processed > 0) {
|
|
971
2142
|
appendEvent({
|
|
972
2143
|
eventType: "consolidate_completed",
|
|
973
2144
|
ref: "memory:_consolidation",
|
|
974
2145
|
metadata: { processed: consolidation.processed, merged: consolidation.merged },
|
|
975
|
-
});
|
|
2146
|
+
}, eventsCtx);
|
|
976
2147
|
}
|
|
977
2148
|
}
|
|
978
2149
|
else {
|
|
979
|
-
const daysAgo = Math.round((Date.now() - new Date(lastConsolidation?.ts ?? 0).getTime()) / 86400000);
|
|
980
2150
|
appendEvent({
|
|
981
2151
|
eventType: "improve_skipped",
|
|
982
2152
|
ref: "memory:_consolidation",
|
|
983
2153
|
metadata: {
|
|
984
|
-
reason: "
|
|
985
|
-
cooldownDays: 14,
|
|
2154
|
+
reason: "consolidation_no_memory_updates",
|
|
986
2155
|
lastEventTs: lastConsolidation?.ts ?? null,
|
|
987
2156
|
},
|
|
988
|
-
});
|
|
989
|
-
info(
|
|
2157
|
+
}, eventsCtx);
|
|
2158
|
+
info("[improve] consolidation skipped (no memory updates since last run)");
|
|
990
2159
|
}
|
|
2160
|
+
// D9: track whether consolidation wrote any data so graph extraction can reindex if needed
|
|
2161
|
+
const consolidationRan = !consolidateDisabledByProfile && !consolidationOnCooldown && consolidation.processed > 0;
|
|
991
2162
|
info("[improve] post-loop maintenance starting");
|
|
992
2163
|
const maintenanceResult = await runImproveMaintenancePasses({
|
|
993
2164
|
options,
|
|
@@ -996,6 +2167,11 @@ async function runImprovePostLoopStage(args) {
|
|
|
996
2167
|
memoryRefsForInference,
|
|
997
2168
|
allWarnings,
|
|
998
2169
|
reindexFn,
|
|
2170
|
+
consolidationRan,
|
|
2171
|
+
// O-1 (#364): forward the budget signal to memory inference + graph extraction.
|
|
2172
|
+
budgetSignal,
|
|
2173
|
+
eventsCtx,
|
|
2174
|
+
improveProfile,
|
|
999
2175
|
});
|
|
1000
2176
|
let deadUrls;
|
|
1001
2177
|
if (scope.mode === "all" && primaryStashDir && actionableRefs.length > 0) {
|
|
@@ -1027,39 +2203,74 @@ async function runImprovePostLoopStage(args) {
|
|
|
1027
2203
|
deadUrls,
|
|
1028
2204
|
...(maintenanceResult.memoryInference ? { memoryInference: maintenanceResult.memoryInference } : {}),
|
|
1029
2205
|
...(maintenanceResult.graphExtraction ? { graphExtraction: maintenanceResult.graphExtraction } : {}),
|
|
2206
|
+
...(maintenanceResult.stalenessDetection ? { stalenessDetection: maintenanceResult.stalenessDetection } : {}),
|
|
1030
2207
|
...(maintenanceResult.actions && maintenanceResult.actions.length > 0
|
|
1031
2208
|
? { maintenanceActions: maintenanceResult.actions }
|
|
1032
2209
|
: {}),
|
|
2210
|
+
memoryInferenceDurationMs: maintenanceResult.memoryInferenceDurationMs,
|
|
2211
|
+
graphExtractionDurationMs: maintenanceResult.graphExtractionDurationMs,
|
|
2212
|
+
orphansPurged: maintenanceResult.orphansPurged,
|
|
2213
|
+
proposalsExpired: maintenanceResult.proposalsExpired,
|
|
2214
|
+
gateAutoAcceptedCount,
|
|
1033
2215
|
};
|
|
1034
2216
|
}
|
|
2217
|
+
// TODO(refactor): mutates the passed-in `allWarnings` array as a hidden side channel. Return warnings in ImproveMaintenanceResult and merge in caller — invasive signature change deferred to next refactor pass.
|
|
1035
2218
|
async function runImproveMaintenancePasses(args) {
|
|
1036
|
-
const { options, primaryStashDir, memoryRefsForInference, allWarnings, reindexFn } = args;
|
|
2219
|
+
const { options, primaryStashDir, memoryRefsForInference, allWarnings, reindexFn, consolidationRan, budgetSignal, eventsCtx, improveProfile, } = args;
|
|
1037
2220
|
if (!primaryStashDir)
|
|
1038
|
-
return {};
|
|
2221
|
+
return { memoryInferenceDurationMs: 0, graphExtractionDurationMs: 0 };
|
|
1039
2222
|
const config = options.config ?? loadConfig();
|
|
1040
2223
|
const sources = resolveSourceEntries(options.stashDir, config);
|
|
1041
2224
|
const memoryInferenceFn = options.memoryInferenceFn ?? runMemoryInferencePass;
|
|
1042
2225
|
const graphExtractionFn = options.graphExtractionFn ?? runGraphExtractionPass;
|
|
2226
|
+
const stalenessDetectionFn = options.stalenessDetectionFn ?? runStalenessDetectionPass;
|
|
1043
2227
|
let db;
|
|
1044
2228
|
let memoryInference;
|
|
1045
2229
|
let graphExtraction;
|
|
2230
|
+
let stalenessDetection;
|
|
1046
2231
|
let reindexedAfterInference = false;
|
|
1047
2232
|
const actions = [];
|
|
2233
|
+
let memoryInferenceDurationMs = 0;
|
|
2234
|
+
let graphExtractionDurationMs = 0;
|
|
2235
|
+
let orphansPurged = 0;
|
|
2236
|
+
let proposalsExpired = 0;
|
|
1048
2237
|
try {
|
|
1049
2238
|
db = openDatabase(getDbPath(), config.embedding?.dimension ? { embeddingDim: config.embedding.dimension } : undefined);
|
|
1050
|
-
|
|
1051
|
-
|
|
2239
|
+
// Memory inference candidate-discovery (post-Item 9 fix from
|
|
2240
|
+
// memory:akm-improve-critical-review-2026-05-20). Previously this pass
|
|
2241
|
+
// was gated on memoryRefsForInference.size > 0 AND passed those refs as a
|
|
2242
|
+
// candidateRefs filter. But memoryRefsForInference is populated from refs
|
|
2243
|
+
// distilled THIS RUN — by the time that happens, those parents are
|
|
2244
|
+
// already split (`inferenceProcessed: true`) and `isPendingMemory` excludes
|
|
2245
|
+
// them. The genuinely-pending parents in the stash never entered the
|
|
2246
|
+
// filter. Result: 0/0/0 for 25 consecutive runs.
|
|
2247
|
+
//
|
|
2248
|
+
// Fix: always run the pass when the feature is enabled; let the pass's
|
|
2249
|
+
// own `collectPendingMemories` + `isPendingMemory` predicate find
|
|
2250
|
+
// candidates from the filesystem-of-truth. The this-run set is still
|
|
2251
|
+
// logged as a hint but no longer used as a filter.
|
|
2252
|
+
const memoryInferenceDisabledByProfile = improveProfile?.processes?.memoryInference?.enabled === false;
|
|
2253
|
+
if (memoryInferenceDisabledByProfile) {
|
|
2254
|
+
info("[improve] memory inference skipped (disabled by improve profile)");
|
|
2255
|
+
}
|
|
2256
|
+
else {
|
|
2257
|
+
const hintRefs = memoryRefsForInference.size;
|
|
2258
|
+
info(hintRefs > 0
|
|
2259
|
+
? `[improve] memory inference starting (${hintRefs} hint refs touched this run; pass discovers all pending)`
|
|
2260
|
+
: "[improve] memory inference starting (discovering pending parents)");
|
|
2261
|
+
const inferenceStart = Date.now();
|
|
1052
2262
|
try {
|
|
1053
|
-
|
|
2263
|
+
// O-1 (#364): pass budget signal so a hung inference call is cancelled.
|
|
2264
|
+
memoryInference = await memoryInferenceFn(config, sources, budgetSignal, db, false, (event) => {
|
|
1054
2265
|
const current = event.currentRef ? ` ${event.currentRef}` : "";
|
|
1055
2266
|
info(`[improve] memory inference ${event.processed}/${event.total}${current} (written ${event.writtenFacts}, skipped ${event.skippedNoFacts})`);
|
|
1056
|
-
}, {
|
|
1057
|
-
candidateRefs: memoryRefsForInference,
|
|
1058
2267
|
});
|
|
2268
|
+
memoryInferenceDurationMs = Date.now() - inferenceStart;
|
|
1059
2269
|
actions.push({ ref: "memory:_inference", mode: "memory-inference", result: memoryInference });
|
|
1060
2270
|
info(`[improve] memory inference complete (${memoryInference.writtenFacts} facts written from ${memoryInference.splitParents} parents)`);
|
|
1061
2271
|
}
|
|
1062
2272
|
catch (err) {
|
|
2273
|
+
memoryInferenceDurationMs = Date.now() - inferenceStart;
|
|
1063
2274
|
allWarnings.push(`memory inference failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
1064
2275
|
}
|
|
1065
2276
|
}
|
|
@@ -1074,24 +2285,200 @@ async function runImproveMaintenancePasses(args) {
|
|
|
1074
2285
|
allWarnings.push(`reindex after memory inference failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
1075
2286
|
}
|
|
1076
2287
|
}
|
|
1077
|
-
|
|
2288
|
+
const graphEnabled = isProcessEnabled("index", "graph_extraction", config);
|
|
2289
|
+
const graphExtractionDisabledByProfile = improveProfile?.processes?.graphExtraction?.enabled === false;
|
|
2290
|
+
// Build the set of refs actually touched this run.
|
|
2291
|
+
const touchedRefs = new Set();
|
|
2292
|
+
for (const r of args.actionableRefs)
|
|
2293
|
+
touchedRefs.add(r.ref);
|
|
2294
|
+
for (const r of memoryRefsForInference)
|
|
2295
|
+
touchedRefs.add(r);
|
|
2296
|
+
// INVARIANT: graph extraction must never run on the full corpus from the
|
|
2297
|
+
// improve post-loop. Full-corpus scans belong in `akm index`. We enforce
|
|
2298
|
+
// this by ALWAYS passing `candidatePaths` (possibly an empty Set) to the
|
|
2299
|
+
// extractor — never `undefined`. With an empty Set, the extractor's
|
|
2300
|
+
// filter (graph-extraction.ts ~L452) rejects every file and returns the
|
|
2301
|
+
// empty result without scanning. The pass is still invoked so that the
|
|
2302
|
+
// action is recorded, the D9 post-consolidation reindex still fires, and
|
|
2303
|
+
// mock injection (graphExtractionFn) used by tests stays exercised.
|
|
2304
|
+
if (graphExtractionDisabledByProfile) {
|
|
2305
|
+
info("[improve] graph extraction skipped (disabled by improve profile)");
|
|
2306
|
+
}
|
|
2307
|
+
else if (sources.length > 0 && graphEnabled) {
|
|
1078
2308
|
info("[improve] graph extraction starting");
|
|
2309
|
+
const extractionStart = Date.now();
|
|
1079
2310
|
try {
|
|
2311
|
+
// D9: if consolidation ran but memory inference did not reindex, force a reindex
|
|
2312
|
+
// so graph extraction sees current DB state after consolidation writes.
|
|
2313
|
+
if (consolidationRan && !reindexedAfterInference) {
|
|
2314
|
+
info("[improve] reindexing after consolidation (graph extraction needs current state)");
|
|
2315
|
+
try {
|
|
2316
|
+
await reindexFn({ stashDir: primaryStashDir });
|
|
2317
|
+
reindexedAfterInference = true;
|
|
2318
|
+
info("[improve] reindex after consolidation complete");
|
|
2319
|
+
}
|
|
2320
|
+
catch (err) {
|
|
2321
|
+
allWarnings.push(`reindex after consolidation failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
2322
|
+
}
|
|
2323
|
+
}
|
|
1080
2324
|
if (db && reindexedAfterInference) {
|
|
1081
2325
|
closeDatabase(db);
|
|
1082
2326
|
db = openDatabase(getDbPath(), config.embedding?.dimension ? { embeddingDim: config.embedding.dimension } : undefined);
|
|
1083
2327
|
}
|
|
1084
|
-
|
|
2328
|
+
// Resolve touched refs to absolute file paths. Empty Set is intentional
|
|
2329
|
+
// when no refs were touched — see INVARIANT above.
|
|
2330
|
+
const candidatePaths = new Set();
|
|
2331
|
+
if (primaryStashDir && touchedRefs.size > 0) {
|
|
2332
|
+
const writableDirSet = new Set(getWritableStashDirs(primaryStashDir).map((d) => path.resolve(d)));
|
|
2333
|
+
const resolved = await Promise.all([...touchedRefs].map((ref) => findAssetFilePath(ref, primaryStashDir, writableDirSet).catch(() => null)));
|
|
2334
|
+
for (const p of resolved) {
|
|
2335
|
+
if (typeof p === "string" && p.length > 0)
|
|
2336
|
+
candidatePaths.add(p);
|
|
2337
|
+
}
|
|
2338
|
+
}
|
|
2339
|
+
const progressHandler = (event) => {
|
|
1085
2340
|
const current = event.currentPath ? ` ${path.basename(event.currentPath)}` : "";
|
|
1086
2341
|
info(`[improve] graph extraction ${event.processed}/${event.total}${current} (extracted ${event.extracted}, entities ${event.totalEntities}, relations ${event.totalRelations})`);
|
|
2342
|
+
};
|
|
2343
|
+
// O-1 (#364): pass budget signal so a hung graph extraction call is cancelled.
|
|
2344
|
+
graphExtraction = await graphExtractionFn(config, sources, budgetSignal, db, false, progressHandler, {
|
|
2345
|
+
candidatePaths,
|
|
1087
2346
|
});
|
|
2347
|
+
graphExtractionDurationMs = Date.now() - extractionStart;
|
|
1088
2348
|
actions.push({ ref: "graph:_artifact", mode: "graph-extraction", result: graphExtraction });
|
|
1089
2349
|
info(`[improve] graph extraction complete (${graphExtraction.quality.extractedFiles} files, ${graphExtraction.quality.entityCount} entities, ${graphExtraction.quality.relationCount} relations)`);
|
|
1090
2350
|
}
|
|
1091
2351
|
catch (err) {
|
|
2352
|
+
graphExtractionDurationMs = Date.now() - extractionStart;
|
|
1092
2353
|
allWarnings.push(`graph extraction failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
1093
2354
|
}
|
|
1094
2355
|
}
|
|
2356
|
+
else if (sources.length > 0 && !graphEnabled) {
|
|
2357
|
+
info("[improve] graph extraction skipped (features.index.graph_extraction is disabled)");
|
|
2358
|
+
}
|
|
2359
|
+
// Orphan proposal purge — reject pending reflect proposals whose target
|
|
2360
|
+
// asset no longer exists on disk. Runs after graph extraction so newly
|
|
2361
|
+
// promoted assets from accept flows during this run are already present.
|
|
2362
|
+
if (primaryStashDir) {
|
|
2363
|
+
try {
|
|
2364
|
+
const purgeResult = purgeOrphanProposals(primaryStashDir, sources.map((s) => s.path));
|
|
2365
|
+
orphansPurged = purgeResult.rejected;
|
|
2366
|
+
if (purgeResult.rejected > 0) {
|
|
2367
|
+
info(`[improve] orphan purge: ${purgeResult.rejected}/${purgeResult.checked} orphaned proposals rejected (${purgeResult.durationMs}ms)`);
|
|
2368
|
+
}
|
|
2369
|
+
appendEvent({
|
|
2370
|
+
eventType: "proposal_orphan_purge",
|
|
2371
|
+
ref: "proposals:_orphan-purge",
|
|
2372
|
+
metadata: {
|
|
2373
|
+
checked: purgeResult.checked,
|
|
2374
|
+
rejected: purgeResult.rejected,
|
|
2375
|
+
durationMs: purgeResult.durationMs,
|
|
2376
|
+
byType: purgeResult.byType,
|
|
2377
|
+
orphans: purgeResult.orphans.map((o) => o.ref),
|
|
2378
|
+
},
|
|
2379
|
+
}, eventsCtx);
|
|
2380
|
+
}
|
|
2381
|
+
catch (err) {
|
|
2382
|
+
allWarnings.push(`orphan purge failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
2383
|
+
}
|
|
2384
|
+
// Phase 6B (Advantage D6b): expire pending proposals that have aged past
|
|
2385
|
+
// the retention window. Runs AFTER orphan purge so we never double-archive
|
|
2386
|
+
// a proposal that orphan-purge already moved. `expireStaleProposals` emits
|
|
2387
|
+
// its own per-proposal `proposal_expired` events; we additionally emit a
|
|
2388
|
+
// single roll-up event here for parity with the orphan-purge surface.
|
|
2389
|
+
try {
|
|
2390
|
+
const expireResult = expireStaleProposals(primaryStashDir, config);
|
|
2391
|
+
proposalsExpired = expireResult.expired;
|
|
2392
|
+
if (expireResult.expired > 0) {
|
|
2393
|
+
info(`[improve] expiration: ${expireResult.expired}/${expireResult.checked} pending proposals expired ` +
|
|
2394
|
+
`(retention=${expireResult.retentionDays}d, ${expireResult.durationMs}ms)`);
|
|
2395
|
+
}
|
|
2396
|
+
appendEvent({
|
|
2397
|
+
eventType: "proposal_expiration_pass",
|
|
2398
|
+
ref: "proposals:_expiration",
|
|
2399
|
+
metadata: {
|
|
2400
|
+
checked: expireResult.checked,
|
|
2401
|
+
expired: expireResult.expired,
|
|
2402
|
+
durationMs: expireResult.durationMs,
|
|
2403
|
+
retentionDays: expireResult.retentionDays,
|
|
2404
|
+
expiredProposals: expireResult.expiredProposals,
|
|
2405
|
+
},
|
|
2406
|
+
}, eventsCtx);
|
|
2407
|
+
}
|
|
2408
|
+
catch (err) {
|
|
2409
|
+
allWarnings.push(`proposal expiration failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
2410
|
+
}
|
|
2411
|
+
}
|
|
2412
|
+
// Fix #2 (observability 0.8.0): trim the events table in state.db so it
|
|
2413
|
+
// doesn't grow unbounded. `akm health` writes a `health_probe` row on every
|
|
2414
|
+
// invocation, and every command surface emits at least one event besides —
|
|
2415
|
+
// without this trim, state.db is a permanent append-only log. Config key
|
|
2416
|
+
// `improve.eventRetentionDays` (default 90, set 0 to disable) controls the
|
|
2417
|
+
// window. `purgeOldEvents()` opens its own state.db handle separate from
|
|
2418
|
+
// the index `db` above (different SQLite file).
|
|
2419
|
+
{
|
|
2420
|
+
const retentionDays = typeof config.improve?.eventRetentionDays === "number" ? config.improve.eventRetentionDays : 90;
|
|
2421
|
+
if (retentionDays > 0) {
|
|
2422
|
+
let stateDb;
|
|
2423
|
+
try {
|
|
2424
|
+
stateDb = openStateDatabase();
|
|
2425
|
+
const purgedCount = purgeOldEvents(stateDb, retentionDays);
|
|
2426
|
+
if (purgedCount > 0) {
|
|
2427
|
+
info(`[improve] events purge: ${purgedCount} event(s) older than ${retentionDays}d removed from state.db`);
|
|
2428
|
+
}
|
|
2429
|
+
appendEvent({
|
|
2430
|
+
eventType: "events_purged",
|
|
2431
|
+
ref: "events:_purge",
|
|
2432
|
+
metadata: { purgedCount, retentionDays },
|
|
2433
|
+
}, eventsCtx);
|
|
2434
|
+
// improve_runs uses the same retention window as events — both are
|
|
2435
|
+
// observability/audit data, both grow append-only, both have a
|
|
2436
|
+
// dedicated purge helper. Mirroring the events purge here means a
|
|
2437
|
+
// single retention knob (improve.eventRetentionDays) governs both.
|
|
2438
|
+
const improveRunsPurged = purgeOldImproveRuns(stateDb, retentionDays);
|
|
2439
|
+
if (improveRunsPurged > 0) {
|
|
2440
|
+
info(`[improve] improve_runs purge: ${improveRunsPurged} run(s) older than ${retentionDays}d removed from state.db`);
|
|
2441
|
+
}
|
|
2442
|
+
appendEvent({
|
|
2443
|
+
eventType: "improve_runs_purged",
|
|
2444
|
+
ref: "improve_runs:_purge",
|
|
2445
|
+
metadata: { purgedCount: improveRunsPurged, retentionDays },
|
|
2446
|
+
}, eventsCtx);
|
|
2447
|
+
}
|
|
2448
|
+
catch (err) {
|
|
2449
|
+
allWarnings.push(`events purge failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
2450
|
+
}
|
|
2451
|
+
finally {
|
|
2452
|
+
if (stateDb) {
|
|
2453
|
+
try {
|
|
2454
|
+
stateDb.close();
|
|
2455
|
+
}
|
|
2456
|
+
catch {
|
|
2457
|
+
// best-effort
|
|
2458
|
+
}
|
|
2459
|
+
}
|
|
2460
|
+
}
|
|
2461
|
+
}
|
|
2462
|
+
}
|
|
2463
|
+
// Phase 4A (staleness detection). Activates the `deprecated` belief-state
|
|
2464
|
+
// machinery shipped in Phase 1A. Default OFF — gated by
|
|
2465
|
+
// `features.index.staleness_detection.enabled`. Runs after orphan purge
|
|
2466
|
+
// and before the URL check (which lives in the outer caller).
|
|
2467
|
+
if (sources.length > 0) {
|
|
2468
|
+
try {
|
|
2469
|
+
stalenessDetection = await stalenessDetectionFn(config, sources, budgetSignal, db);
|
|
2470
|
+
if (stalenessDetection.considered > 0) {
|
|
2471
|
+
info(`[improve] staleness detection complete (considered ${stalenessDetection.considered}, ` +
|
|
2472
|
+
`deprecated ${stalenessDetection.deprecated}, confirmed ${stalenessDetection.confirmed}, ` +
|
|
2473
|
+
`skipped ${stalenessDetection.skipped}, ${stalenessDetection.durationMs}ms)`);
|
|
2474
|
+
}
|
|
2475
|
+
for (const w of stalenessDetection.warnings)
|
|
2476
|
+
allWarnings.push(`[improve] staleness detection: ${w}`);
|
|
2477
|
+
}
|
|
2478
|
+
catch (err) {
|
|
2479
|
+
allWarnings.push(`staleness detection failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
2480
|
+
}
|
|
2481
|
+
}
|
|
1095
2482
|
}
|
|
1096
2483
|
finally {
|
|
1097
2484
|
if (db)
|
|
@@ -1100,7 +2487,12 @@ async function runImproveMaintenancePasses(args) {
|
|
|
1100
2487
|
return {
|
|
1101
2488
|
...(memoryInference ? { memoryInference } : {}),
|
|
1102
2489
|
...(graphExtraction ? { graphExtraction } : {}),
|
|
2490
|
+
...(stalenessDetection ? { stalenessDetection } : {}),
|
|
1103
2491
|
...(actions.length > 0 ? { actions } : {}),
|
|
2492
|
+
memoryInferenceDurationMs,
|
|
2493
|
+
graphExtractionDurationMs,
|
|
2494
|
+
orphansPurged,
|
|
2495
|
+
proposalsExpired,
|
|
1104
2496
|
};
|
|
1105
2497
|
}
|
|
1106
2498
|
function shouldAnalyzeMemoryCleanup(scope, eligibleMemories, primaryStashDir) {
|
|
@@ -1141,7 +2533,7 @@ function buildUtilityMap(refs) {
|
|
|
1141
2533
|
}
|
|
1142
2534
|
const ids = [...idToRef.keys()];
|
|
1143
2535
|
if (ids.length > 0) {
|
|
1144
|
-
const scores = getUtilityScoresByIds(db, ids);
|
|
2536
|
+
const { global: scores } = getUtilityScoresByIds(db, ids);
|
|
1145
2537
|
for (const [id, score] of scores) {
|
|
1146
2538
|
const ref = idToRef.get(id);
|
|
1147
2539
|
if (ref)
|
|
@@ -1149,7 +2541,8 @@ function buildUtilityMap(refs) {
|
|
|
1149
2541
|
}
|
|
1150
2542
|
}
|
|
1151
2543
|
}
|
|
1152
|
-
catch {
|
|
2544
|
+
catch (err) {
|
|
2545
|
+
rethrowIfTestIsolationError(err);
|
|
1153
2546
|
// best-effort: if DB unavailable, all utilities default to 0
|
|
1154
2547
|
}
|
|
1155
2548
|
finally {
|