akm-cli 0.9.0-beta.56 → 0.9.0-beta.58
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/prompts/extract-session.md +5 -1
- package/dist/cli/config-migrate.js +7 -1
- package/dist/commands/config-cli.js +8 -11
- package/dist/commands/health/stash-exposure.js +46 -0
- package/dist/commands/health/windows.js +6 -7
- package/dist/commands/health.js +31 -10
- package/dist/commands/improve/collapse-detector.js +2 -1
- package/dist/commands/improve/consolidate.js +207 -159
- package/dist/commands/improve/distill/promote-memory.js +4 -3
- package/dist/commands/improve/distill/quality-gate.js +7 -4
- package/dist/commands/improve/distill-promotion-policy.js +826 -167
- package/dist/commands/improve/distill.js +26 -12
- package/dist/commands/improve/extract-prompt.js +16 -2
- package/dist/commands/improve/extract.js +16 -8
- package/dist/commands/improve/improve-auto-accept.js +22 -1
- package/dist/commands/improve/loop-stages.js +7 -2
- package/dist/commands/improve/memory/memory-belief.js +14 -15
- package/dist/commands/improve/memory/memory-contradiction-detect.js +60 -32
- package/dist/commands/improve/memory/memory-improve.js +27 -27
- package/dist/commands/improve/preparation.js +4 -0
- package/dist/commands/improve/procedural.js +1 -0
- package/dist/commands/improve/recombine.js +1 -0
- package/dist/commands/improve/reflect-noise.js +1 -1
- package/dist/commands/improve/reflect.js +4 -3
- package/dist/commands/improve/shared.js +9 -6
- package/dist/commands/proposal/drain-policies.js +4 -2
- package/dist/commands/read/remember-cli.js +1 -1
- package/dist/commands/read/show.js +15 -0
- package/dist/commands/remember.js +11 -12
- package/dist/commands/sources/init.js +5 -1
- package/dist/commands/sources/stash-skeleton.js +34 -0
- package/dist/core/asset/frontmatter.js +22 -0
- package/dist/core/common.js +1 -15
- package/dist/core/config/config-io.js +10 -1
- package/dist/core/config/config-migration.js +2 -15
- package/dist/core/config/config-schema.js +15 -3
- package/dist/core/config/config.js +22 -14
- package/dist/core/paths.js +4 -4
- package/dist/core/time.js +53 -0
- package/dist/indexer/db/db.js +51 -46
- package/dist/indexer/indexer.js +77 -65
- package/dist/indexer/search/db-search.js +41 -6
- package/dist/indexer/search/ranking-contributors.js +14 -8
- package/dist/indexer/search/search-source.js +15 -3
- package/dist/integrations/agent/profiles.js +7 -1
- package/dist/llm/feature-gate.js +4 -8
- package/dist/output/renderers.js +4 -0
- package/dist/scripts/migrate-storage.js +84 -60
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +6 -0
- package/dist/storage/repositories/registry-cache.js +2 -1
- package/dist/storage/repositories/registry-index-cache-repository.js +46 -0
- package/dist/workflows/runtime/runs.js +6 -1
- package/package.json +1 -1
|
@@ -10,12 +10,13 @@ import { parseAssetRef } from "../../core/asset/asset-ref.js";
|
|
|
10
10
|
import { assembleAssetFromString, serializeFrontmatter } from "../../core/asset/asset-serialize.js";
|
|
11
11
|
import { parseFrontmatter } from "../../core/asset/frontmatter.js";
|
|
12
12
|
import { resolveStashDir, timestampForFilename } from "../../core/common.js";
|
|
13
|
-
import { getDefaultLlmConfig, loadConfig } from "../../core/config/config.js";
|
|
13
|
+
import { getDefaultLlmConfig, getImproveProcessConfig, loadConfig } from "../../core/config/config.js";
|
|
14
14
|
import { ConfigError } from "../../core/errors.js";
|
|
15
15
|
// Note: appendEvent import removed (WS-3a: archive TTL machinery retired)
|
|
16
16
|
import { parseEmbeddedJsonResponse } from "../../core/parse.js";
|
|
17
17
|
import { resolveStashStandards } from "../../core/standards/resolve-stash-standards.js";
|
|
18
18
|
import { detectTruncatedDescription } from "../../core/text-truncation.js";
|
|
19
|
+
import { DURATION_UNITS, parseDuration } from "../../core/time.js";
|
|
19
20
|
import { createProposal, isProposalSkipped, listProposals } from "../proposal/repository.js";
|
|
20
21
|
import { hasSupersededStatus, MERGE_ABSOLUTE_FLOOR_CHARS, MERGE_SHRINK_RATIO_MIN, validateProposalFrontmatter, } from "../proposal/validators/proposal-quality-validators.js";
|
|
21
22
|
import { checkGenerationGuard, checkLexicalDiversity, checkMergeInformationFloor, computeMergedGeneration, readAssetGeneration, } from "./anti-collapse.js";
|
|
@@ -507,8 +508,8 @@ function archiveMemory(filePath, stashDir, ref, reason, opIndex, supersededBy, w
|
|
|
507
508
|
* silent 400s from LM Studio). The investigation lives at
|
|
508
509
|
* `/tmp/akm-health-investigations/consolidation-no-op.md`.
|
|
509
510
|
*/
|
|
510
|
-
function resolveConsolidateLlmConfig(config) {
|
|
511
|
-
const consolidateProcess = config
|
|
511
|
+
function resolveConsolidateLlmConfig(config, activeProfile) {
|
|
512
|
+
const consolidateProcess = getImproveProcessConfig(config, "consolidate", activeProfile);
|
|
512
513
|
const runnerSpec = resolveImproveProcessRunnerFromProfile(consolidateProcess, config);
|
|
513
514
|
if (runnerSpec && runnerIsLlm(runnerSpec)) {
|
|
514
515
|
return runnerSpec.connection;
|
|
@@ -605,9 +606,49 @@ export async function akmConsolidate(opts = {}) {
|
|
|
605
606
|
sharedStateDb?.close();
|
|
606
607
|
}
|
|
607
608
|
}
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
609
|
+
/** Fresh, zeroed accounting accumulators for one consolidate run. */
|
|
610
|
+
function createConsolidateAccounting() {
|
|
611
|
+
const acc = {
|
|
612
|
+
judgedNoAction: 0,
|
|
613
|
+
failedChunkMemories: 0,
|
|
614
|
+
totalChunksFailed: 0,
|
|
615
|
+
skipReasons: [],
|
|
616
|
+
skipReasonByRef: new Map(),
|
|
617
|
+
judgedNoActionRefs: new Set(),
|
|
618
|
+
pushSkipReason: () => { },
|
|
619
|
+
};
|
|
620
|
+
acc.pushSkipReason = (op, ref, reason) => {
|
|
621
|
+
// 2026-05-27 cross-chunk double-count fix: if `ref` already contributed
|
|
622
|
+
// to judgedNoAction in its own chunk (a different chunk proposed an op
|
|
623
|
+
// for it that is now being rejected here), promote it from the
|
|
624
|
+
// judgedNoAction bucket into the more specific skipReason bucket.
|
|
625
|
+
// Preserves the invariant: processed == actioned + judgedNoAction +
|
|
626
|
+
// Σ(skipReasons) + failedChunkMemories.
|
|
627
|
+
if (acc.judgedNoActionRefs.delete(ref))
|
|
628
|
+
acc.judgedNoAction--;
|
|
629
|
+
const existing = acc.skipReasonByRef.get(ref);
|
|
630
|
+
if (existing) {
|
|
631
|
+
// Already counted once for accounting. Append the extra skip to the
|
|
632
|
+
// ref's grouped entry for observability without adding a new array
|
|
633
|
+
// entry (which would break the accounting invariant).
|
|
634
|
+
existing.skips.push({ op, reason });
|
|
635
|
+
return;
|
|
636
|
+
}
|
|
637
|
+
const entry = { ref, skips: [{ op, reason }] };
|
|
638
|
+
acc.skipReasonByRef.set(ref, entry);
|
|
639
|
+
acc.skipReasons.push(entry);
|
|
640
|
+
};
|
|
641
|
+
return acc;
|
|
642
|
+
}
|
|
643
|
+
/**
|
|
644
|
+
* Pass 1 — narrow the memory pool before any LLM work: drop stale DB entries,
|
|
645
|
+
* partition hot-probation assets, run the deterministic dedup pre-pass, apply
|
|
646
|
+
* incremental-since and judged-state-cache narrowing, and cap to `opts.limit`
|
|
647
|
+
* (oldest-modified first). Returns an early envelope when the pool empties at
|
|
648
|
+
* any stage; otherwise returns the narrowed pool and the state the plan/apply
|
|
649
|
+
* passes consume. Behavior-identical to the former inlined narrowing block.
|
|
650
|
+
*/
|
|
651
|
+
async function narrowConsolidationPool(opts, config, stashDir, startMs, warnings, sharedStateDb) {
|
|
611
652
|
let memories = loadMemoriesForSource(opts.target, stashDir, warnings);
|
|
612
653
|
// Pre-flight: filter out stale DB entries whose files no longer exist on
|
|
613
654
|
// disk. Without this, memories deleted by a prior run (but not yet
|
|
@@ -632,7 +673,7 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
632
673
|
// (the flag that causes extract to tag new extractions as hot-probation).
|
|
633
674
|
// Without that flag no assets will ever carry the hot-probation marker, so
|
|
634
675
|
// running the filter loop would be pure unnecessary I/O over the full corpus.
|
|
635
|
-
const hotProbationEnabled = config.
|
|
676
|
+
const hotProbationEnabled = getImproveProcessConfig(config, "extract", opts.improveProfile)?.hotProbation
|
|
636
677
|
?.enabled === true;
|
|
637
678
|
let hotProbationCount = 0;
|
|
638
679
|
if (hotProbationEnabled) {
|
|
@@ -685,26 +726,32 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
685
726
|
}
|
|
686
727
|
}
|
|
687
728
|
if (memories.length === 0) {
|
|
688
|
-
return
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
// #617: the deterministic dedup pre-pass may have emptied the pool by
|
|
692
|
-
// collapsing every remaining memory into a canonical. Surface those
|
|
693
|
-
// collapses in `deleted` so the run reports the work it actually did.
|
|
694
|
-
deleted: dedupCollapsed,
|
|
695
|
-
warnings,
|
|
696
|
-
durationMs: Date.now() - startMs,
|
|
697
|
-
});
|
|
698
|
-
}
|
|
699
|
-
if (opts.incrementalSince) {
|
|
700
|
-
memories = narrowToIncrementalCandidates(memories, opts.incrementalSince, warnings, opts.neighborsPerChanged);
|
|
701
|
-
if (memories.length === 0) {
|
|
702
|
-
return makeConsolidateResult({
|
|
729
|
+
return {
|
|
730
|
+
done: true,
|
|
731
|
+
result: makeConsolidateResult({
|
|
703
732
|
dryRun: opts.dryRun ?? false,
|
|
704
733
|
target: opts.target ?? stashDir,
|
|
734
|
+
// #617: the deterministic dedup pre-pass may have emptied the pool by
|
|
735
|
+
// collapsing every remaining memory into a canonical. Surface those
|
|
736
|
+
// collapses in `deleted` so the run reports the work it actually did.
|
|
737
|
+
deleted: dedupCollapsed,
|
|
705
738
|
warnings,
|
|
706
739
|
durationMs: Date.now() - startMs,
|
|
707
|
-
})
|
|
740
|
+
}),
|
|
741
|
+
};
|
|
742
|
+
}
|
|
743
|
+
if (opts.incrementalSince) {
|
|
744
|
+
memories = narrowToIncrementalCandidates(memories, opts.incrementalSince, warnings, opts.neighborsPerChanged);
|
|
745
|
+
if (memories.length === 0) {
|
|
746
|
+
return {
|
|
747
|
+
done: true,
|
|
748
|
+
result: makeConsolidateResult({
|
|
749
|
+
dryRun: opts.dryRun ?? false,
|
|
750
|
+
target: opts.target ?? stashDir,
|
|
751
|
+
warnings,
|
|
752
|
+
durationMs: Date.now() - startMs,
|
|
753
|
+
}),
|
|
754
|
+
};
|
|
708
755
|
}
|
|
709
756
|
}
|
|
710
757
|
// WS-5 perf telemetry accumulators. These are collected throughout the run and
|
|
@@ -769,13 +816,16 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
769
816
|
warnings.push(`Judged-state cache: skipped ${skipped} memor${skipped === 1 ? "y" : "ies"} judged-unchanged (no LLM); ${memories.length} remain for judging.`);
|
|
770
817
|
}
|
|
771
818
|
if (memories.length === 0) {
|
|
772
|
-
return
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
819
|
+
return {
|
|
820
|
+
done: true,
|
|
821
|
+
result: makeConsolidateResult({
|
|
822
|
+
dryRun: opts.dryRun ?? false,
|
|
823
|
+
target: opts.target ?? stashDir,
|
|
824
|
+
deleted: dedupCollapsed,
|
|
825
|
+
warnings,
|
|
826
|
+
durationMs: Date.now() - startMs,
|
|
827
|
+
}),
|
|
828
|
+
};
|
|
779
829
|
}
|
|
780
830
|
}
|
|
781
831
|
if (opts.limit === undefined && memories.length > 150) {
|
|
@@ -802,13 +852,25 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
802
852
|
warnings.push(`Consolidation: pool capped at ${opts.limit} of ${memories.length} memories (limit option, oldest-modified first).`);
|
|
803
853
|
memories = memories.slice(0, opts.limit);
|
|
804
854
|
}
|
|
855
|
+
return { done: false, memories, dedupCollapsed, perfMs, judgedCacheEnabled, currentHashByName };
|
|
856
|
+
}
|
|
857
|
+
/**
|
|
858
|
+
* Pass 2 — turn the narrowed pool into an executable plan. Sizes chunks to the
|
|
859
|
+
* model context window, clusters by embedding similarity, injects the
|
|
860
|
+
* anti-collapse random fraction, applies the cold-start budget cap, runs the
|
|
861
|
+
* per-chunk LLM calls (with retry + failure-rate abort), records judged-state
|
|
862
|
+
* cache outcomes, and reconciles the per-chunk op arrays via {@link mergePlans}.
|
|
863
|
+
* Populates `accounting` in place. Behavior-identical to the former inlined
|
|
864
|
+
* plan-generation block.
|
|
865
|
+
*/
|
|
866
|
+
async function planConsolidation(opts, config, stashDir, startMs, memories, warnings, sharedStateDb, judgedCacheEnabled, currentHashByName, accounting) {
|
|
805
867
|
// Consolidation always uses the HTTP LLM client directly — never the agent
|
|
806
868
|
// CLI. The agent CLI is for interactive agent sessions (reflect, propose);
|
|
807
869
|
// structured JSON generation works better and faster via HTTP.
|
|
808
870
|
//
|
|
809
871
|
// Honor `profiles.improve.default.processes.consolidate.profile` first; fall
|
|
810
872
|
// back to the default LLM. See {@link resolveConsolidateLlmConfig}.
|
|
811
|
-
const llmConfig = resolveConsolidateLlmConfig(config);
|
|
873
|
+
const llmConfig = resolveConsolidateLlmConfig(config, opts.improveProfile);
|
|
812
874
|
const isHttpPath = !!llmConfig;
|
|
813
875
|
// Chunk sizing: derive a safe chunk size from the configured model context
|
|
814
876
|
// window so that the full prompt (system prompt + chunk user prompt) never
|
|
@@ -841,7 +903,7 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
841
903
|
// DEFAULT ON since R5 — opt out via antiCollapse.enabled: false.
|
|
842
904
|
let finalClusteredMemories = clusteredMemories;
|
|
843
905
|
{
|
|
844
|
-
const antiCollapseForCluster = config.
|
|
906
|
+
const antiCollapseForCluster = getImproveProcessConfig(config, "consolidate", opts.improveProfile)?.antiCollapse ?? {};
|
|
845
907
|
if (antiCollapseForCluster.enabled !== false && clusteredMemories.length > 2) {
|
|
846
908
|
const fraction = antiCollapseForCluster.randomClusterFraction ?? 0.05;
|
|
847
909
|
const randomCount = Math.max(1, Math.floor(clusteredMemories.length * fraction));
|
|
@@ -932,41 +994,13 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
932
994
|
// per chunk).
|
|
933
995
|
const standardsContext = resolveStashStandards(stashDir);
|
|
934
996
|
const chunkOpsArrays = [];
|
|
935
|
-
// Structured skip-reason histogram (2026-05-26): every deterministic
|
|
936
|
-
// post-LLM op rejection site below also calls `pushSkipReason` so the
|
|
937
|
-
// health rollup can aggregate without regex-parsing English warning
|
|
938
|
-
// strings. See `/tmp/akm-health-investigations/tuning-reasons-investigation.md` §Q2.
|
|
939
|
-
const skipReasons = [];
|
|
940
|
-
// Per-ref grouping of skipReasons entries. A ref occupies exactly one
|
|
941
|
-
// accounting bucket and therefore exactly one skipReasons array entry;
|
|
942
|
-
// subsequent skip ops for the same ref append to that entry's `skips[]`
|
|
943
|
-
// rather than pushing a second array entry (that would inflate
|
|
944
|
-
// Σ(skipReasons) and break the invariant by +1 per duplicate).
|
|
945
|
-
const skipReasonByRef = new Map();
|
|
946
|
-
const pushSkipReason = (op, ref, reason) => {
|
|
947
|
-
// 2026-05-27 cross-chunk double-count fix: if `ref` already contributed
|
|
948
|
-
// to judgedNoAction in its own chunk (a different chunk proposed an op
|
|
949
|
-
// for it that is now being rejected here), promote it from the
|
|
950
|
-
// judgedNoAction bucket into the more specific skipReason bucket.
|
|
951
|
-
// Preserves the invariant: processed == actioned + judgedNoAction +
|
|
952
|
-
// Σ(skipReasons) + failedChunkMemories.
|
|
953
|
-
if (judgedNoActionRefs.delete(ref))
|
|
954
|
-
judgedNoAction--;
|
|
955
|
-
const existing = skipReasonByRef.get(ref);
|
|
956
|
-
if (existing) {
|
|
957
|
-
// Already counted once for accounting. Append the extra skip to the
|
|
958
|
-
// ref's grouped entry for observability without adding a new array
|
|
959
|
-
// entry (which would break the accounting invariant).
|
|
960
|
-
existing.skips.push({ op, reason });
|
|
961
|
-
return;
|
|
962
|
-
}
|
|
963
|
-
const entry = { ref, skips: [{ op, reason }] };
|
|
964
|
-
skipReasonByRef.set(ref, entry);
|
|
965
|
-
skipReasons.push(entry);
|
|
966
|
-
};
|
|
967
997
|
// judgedNoAction tracks memories the LLM saw inside a chunk but proposed
|
|
968
998
|
// no op for. Computed per chunk as `chunk.length − unique(targetRefs in ops)`.
|
|
969
|
-
|
|
999
|
+
// The structured skip-reason histogram (2026-05-26) plus the cross-chunk
|
|
1000
|
+
// double-count fixes now live on `accounting`; every deterministic post-LLM
|
|
1001
|
+
// op rejection site calls `accounting.pushSkipReason`. See
|
|
1002
|
+
// `/tmp/akm-health-investigations/tuning-reasons-investigation.md` §Q2.
|
|
1003
|
+
//
|
|
970
1004
|
// Judged-state cache (#581): coarse outcome per memory NAME the LLM actually
|
|
971
1005
|
// judged in a successfully-parsed chunk this run. "actioned" = an op targeted
|
|
972
1006
|
// it; "no_action" = the LLM saw it and proposed nothing. Populated only when
|
|
@@ -974,24 +1008,12 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
974
1008
|
// step is a no-op). Memories in failed/aborted chunks are NOT recorded, so a
|
|
975
1009
|
// transient LLM failure never poisons the cache into skipping them next run.
|
|
976
1010
|
const judgedOutcomeByName = new Map();
|
|
977
|
-
// 2026-05-27 cross-chunk double-count fix: refs that contributed to
|
|
978
|
-
// judgedNoAction in their own chunk. When a different chunk's op references
|
|
979
|
-
// one of these as a secondary and that op later fails, the ref would land
|
|
980
|
-
// in BOTH judgedNoAction and skipReasons (delta +1 per occurrence). Track
|
|
981
|
-
// the set so the merge-failure path can decrement and re-bucket.
|
|
982
|
-
const judgedNoActionRefs = new Set();
|
|
983
|
-
// 2026-05-26 accounting-leak fix: memories that belong to a chunk whose
|
|
984
|
-
// LLM call failed before any per-chunk noAction calculation runs. They
|
|
985
|
-
// would otherwise vanish from the envelope's accounting (no judgedNoAction
|
|
986
|
-
// bump, no skipReasons entry, no actioned counter).
|
|
987
|
-
let failedChunkMemories = 0;
|
|
988
1011
|
// C-6 / #392: Replace two-consecutive-failures abort with failure-rate threshold.
|
|
989
1012
|
// Consecutive-count policies are brittle against transient LM Studio reloads:
|
|
990
1013
|
// two transient failures abort the run even though the next chunk would succeed.
|
|
991
1014
|
// Rate-based abort (≥50% failure over ≥4 chunks) is more robust.
|
|
992
1015
|
// Tanenbaum, Distributed Systems §8 — rate-based policies with minimum sample sizes.
|
|
993
1016
|
let totalChunksProcessed = 0;
|
|
994
|
-
let totalChunksFailed = 0;
|
|
995
1017
|
const ABORT_MIN_CHUNKS = 4;
|
|
996
1018
|
const ABORT_FAILURE_RATE = 0.5;
|
|
997
1019
|
for (let chunkIdx = 0; chunkIdx < chunks.length; chunkIdx++) {
|
|
@@ -1004,13 +1026,13 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
1004
1026
|
warnings.push(msg);
|
|
1005
1027
|
// Account for memories in unprocessed chunks.
|
|
1006
1028
|
for (let i = chunkIdx; i < chunks.length; i++) {
|
|
1007
|
-
failedChunkMemories += chunks[i].length;
|
|
1029
|
+
accounting.failedChunkMemories += chunks[i].length;
|
|
1008
1030
|
}
|
|
1009
1031
|
break;
|
|
1010
1032
|
}
|
|
1011
1033
|
// Abort if failure rate >= 50% over at least 4 processed chunks.
|
|
1012
1034
|
if (totalChunksProcessed >= ABORT_MIN_CHUNKS) {
|
|
1013
|
-
const failureRate = totalChunksFailed / totalChunksProcessed;
|
|
1035
|
+
const failureRate = accounting.totalChunksFailed / totalChunksProcessed;
|
|
1014
1036
|
if (failureRate >= ABORT_FAILURE_RATE) {
|
|
1015
1037
|
const skipped = chunks.length - chunkIdx;
|
|
1016
1038
|
const abortMsg = `Consolidation aborted — failure rate ${(failureRate * 100).toFixed(0)}% over ${totalChunksProcessed} chunks (>= ${ABORT_FAILURE_RATE * 100}% threshold). LLM may be unavailable. ${skipped} chunk(s) skipped.`;
|
|
@@ -1021,7 +1043,7 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
1021
1043
|
// rejected). Without this, the accounting invariant fails by
|
|
1022
1044
|
// `Σ(unattempted_chunk.length)` whenever the abort fires.
|
|
1023
1045
|
for (let i = chunkIdx; i < chunks.length; i++) {
|
|
1024
|
-
failedChunkMemories += chunks[i].length;
|
|
1046
|
+
accounting.failedChunkMemories += chunks[i].length;
|
|
1025
1047
|
}
|
|
1026
1048
|
break;
|
|
1027
1049
|
}
|
|
@@ -1039,8 +1061,8 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
1039
1061
|
// LLM-failure-rate abort policy — no request was attempted.
|
|
1040
1062
|
if (chunk.length > 0 && chunk.every((m) => isHotCapturedMemory(m.filePath))) {
|
|
1041
1063
|
for (const m of chunk)
|
|
1042
|
-
judgedNoActionRefs.add(`memory:${m.name}`);
|
|
1043
|
-
judgedNoAction += chunk.length;
|
|
1064
|
+
accounting.judgedNoActionRefs.add(`memory:${m.name}`);
|
|
1065
|
+
accounting.judgedNoAction += chunk.length;
|
|
1044
1066
|
warn(`[consolidate] chunk ${chunkIdx + 1}/${chunks.length}: all ${chunk.length} memories are captureMode: hot — skipping LLM (judged no-action).`);
|
|
1045
1067
|
continue;
|
|
1046
1068
|
}
|
|
@@ -1077,12 +1099,12 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
1077
1099
|
warn(retry.error ?? `chunk ${chunkIdx + 1} failed after retry`);
|
|
1078
1100
|
warnings.push(retry.error ?? `chunk ${chunkIdx + 1} failed after retry`);
|
|
1079
1101
|
totalChunksProcessed++;
|
|
1080
|
-
totalChunksFailed++;
|
|
1102
|
+
accounting.totalChunksFailed++;
|
|
1081
1103
|
// Account for the chunk's memories under the failed-chunk bucket.
|
|
1082
1104
|
// judgedNoAction does NOT run on this path (it's after the success
|
|
1083
1105
|
// guards) so without this the accounting invariant breaks on every
|
|
1084
1106
|
// chunk-level transport/parse failure.
|
|
1085
|
-
failedChunkMemories += chunk.length;
|
|
1107
|
+
accounting.failedChunkMemories += chunk.length;
|
|
1086
1108
|
continue;
|
|
1087
1109
|
}
|
|
1088
1110
|
raw = retry;
|
|
@@ -1099,8 +1121,8 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
1099
1121
|
warn(`Chunk ${chunkIdx + 1}: invalid plan from AI — skipping.${hint}`);
|
|
1100
1122
|
warnings.push(`Chunk ${chunkIdx + 1}: invalid plan from AI — skipping.${hint}`);
|
|
1101
1123
|
totalChunksProcessed++;
|
|
1102
|
-
totalChunksFailed++;
|
|
1103
|
-
failedChunkMemories += chunk.length;
|
|
1124
|
+
accounting.totalChunksFailed++;
|
|
1125
|
+
accounting.failedChunkMemories += chunk.length;
|
|
1104
1126
|
continue;
|
|
1105
1127
|
}
|
|
1106
1128
|
totalChunksProcessed++; // success
|
|
@@ -1140,7 +1162,7 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
1140
1162
|
const memRef = `memory:${m.name}`;
|
|
1141
1163
|
if (!targetRefs.has(memRef)) {
|
|
1142
1164
|
chunkNoAction++;
|
|
1143
|
-
judgedNoActionRefs.add(memRef);
|
|
1165
|
+
accounting.judgedNoActionRefs.add(memRef);
|
|
1144
1166
|
// Judged-state cache (#581): the LLM saw this memory and proposed
|
|
1145
1167
|
// nothing → record judged-unchanged so the next run can skip it.
|
|
1146
1168
|
if (judgedCacheEnabled)
|
|
@@ -1151,7 +1173,7 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
1151
1173
|
judgedOutcomeByName.set(m.name, "actioned");
|
|
1152
1174
|
}
|
|
1153
1175
|
}
|
|
1154
|
-
judgedNoAction += chunkNoAction;
|
|
1176
|
+
accounting.judgedNoAction += chunkNoAction;
|
|
1155
1177
|
chunkOpsArrays.push(ops);
|
|
1156
1178
|
}
|
|
1157
1179
|
// ── Judged-state cache recording (#581) ─────────────────────────────────────
|
|
@@ -1199,63 +1221,16 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
1199
1221
|
const knownRefs = new Set(memories.map((m) => `memory:${m.name}`));
|
|
1200
1222
|
const { ops: allOps, warnings: mergeWarnings } = mergePlans(chunkOpsArrays, knownRefs);
|
|
1201
1223
|
warnings.push(...mergeWarnings);
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
skipReasons,
|
|
1213
|
-
// No merge has executed on the preview path — the per-secondary tally is
|
|
1214
|
-
// provably still 0 here (it only increments in the op-execution loop).
|
|
1215
|
-
mergedSecondaries: 0,
|
|
1216
|
-
failedChunkMemories,
|
|
1217
|
-
planned: allOps,
|
|
1218
|
-
warnings,
|
|
1219
|
-
durationMs: Date.now() - startMs,
|
|
1220
|
-
});
|
|
1221
|
-
}
|
|
1222
|
-
warn(`[consolidate] plan: ${allOps.length} operation(s)`);
|
|
1223
|
-
// -- HTTP path: warn about quality and confirm unless auto-accepted --------
|
|
1224
|
-
if (isHttpPath) {
|
|
1225
|
-
warnings.push("Running on HTTP path — plan generated from truncated memory excerpts; quality may vary.");
|
|
1226
|
-
// Per-proposal confidence gating is handled by the caller (improve.ts)
|
|
1227
|
-
// via runAutoAcceptGate after this function returns. The gate reads
|
|
1228
|
-
// proposal.confidence (forwarded from op.confidence above) and applies
|
|
1229
|
-
// a minimumThreshold floor of 95 for consolidate's destructive ops.
|
|
1230
|
-
// Here we only gate the interactive-confirm path for manual/HTTP invocations.
|
|
1231
|
-
if (opts.autoAccept === undefined && allOps.length > 0) {
|
|
1232
|
-
const n = allOps.length;
|
|
1233
|
-
// Non-interactive contexts (CI / test runners / piped stdin) must not
|
|
1234
|
-
// block on an unanswerable prompt. Default to a non-destructive "no"
|
|
1235
|
-
// so callers in those contexts get the same "aborted, preview only"
|
|
1236
|
-
// shape they'd get from explicit user dismissal. AKM_NON_INTERACTIVE
|
|
1237
|
-
// lets callers force this path even when stdin happens to be a TTY.
|
|
1238
|
-
const nonInteractive = process.stdin.isTTY === false || process.env.AKM_NON_INTERACTIVE === "1";
|
|
1239
|
-
const answer = nonInteractive ? false : await promptConfirm(`Apply ${n} operations? [y/N] `);
|
|
1240
|
-
if (!answer) {
|
|
1241
|
-
return makeConsolidateResult({
|
|
1242
|
-
previewOnly: true,
|
|
1243
|
-
target: sourceName,
|
|
1244
|
-
processed: memories.length,
|
|
1245
|
-
failedChunks: totalChunksFailed,
|
|
1246
|
-
totalChunks: chunks.length,
|
|
1247
|
-
judgedNoAction,
|
|
1248
|
-
skipReasons,
|
|
1249
|
-
// No merge executed on the abort path — mergedSecondaries is still 0.
|
|
1250
|
-
mergedSecondaries: 0,
|
|
1251
|
-
failedChunkMemories,
|
|
1252
|
-
planned: allOps,
|
|
1253
|
-
warnings: [...warnings, nonInteractive ? "Non-interactive context: skipped apply." : "Aborted by user."],
|
|
1254
|
-
durationMs: Date.now() - startMs,
|
|
1255
|
-
});
|
|
1256
|
-
}
|
|
1257
|
-
}
|
|
1258
|
-
}
|
|
1224
|
+
return { allOps, totalChunks: chunks.length, llmPoolSize, embedTelemetry, isHttpPath, sourceName };
|
|
1225
|
+
}
|
|
1226
|
+
/**
|
|
1227
|
+
* Pass 3 — execute the reconciled plan against the filesystem: resolve the
|
|
1228
|
+
* write target, journal the batch, dispatch each op to its handler, then commit
|
|
1229
|
+
* the batch at the boundary and clean up the journal. Mutates `accounting` via
|
|
1230
|
+
* the op-handlers' `pushSkipReason`. Behavior-identical to the former inlined
|
|
1231
|
+
* write block. Never invoked on the dry-run or aborted-confirm paths.
|
|
1232
|
+
*/
|
|
1233
|
+
async function applyConsolidationPlan(config, stashDir, sourceRun, memories, warnings, allOps, accounting, dedupCollapsed, activeProfile) {
|
|
1259
1234
|
// -- Phase B + writes -------------------------------------------------------
|
|
1260
1235
|
const target = resolveWriteTarget(config);
|
|
1261
1236
|
const timestamp = timestampForFilename();
|
|
@@ -1282,6 +1257,7 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
1282
1257
|
}
|
|
1283
1258
|
const opCtx = {
|
|
1284
1259
|
config,
|
|
1260
|
+
improveProfile: activeProfile,
|
|
1285
1261
|
stashDir,
|
|
1286
1262
|
sourceRun,
|
|
1287
1263
|
target,
|
|
@@ -1291,7 +1267,7 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
1291
1267
|
promotedSourceRefs,
|
|
1292
1268
|
warnings,
|
|
1293
1269
|
counts,
|
|
1294
|
-
pushSkipReason,
|
|
1270
|
+
pushSkipReason: accounting.pushSkipReason,
|
|
1295
1271
|
};
|
|
1296
1272
|
// Thin dispatch over the op discriminator — each branch is now an isolated,
|
|
1297
1273
|
// independently-testable handler that mutates `opCtx`.
|
|
@@ -1335,6 +1311,76 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
1335
1311
|
const totalChanged = merged + deleted + dedupCollapsed;
|
|
1336
1312
|
warnings.push(`Changed ${totalChanged} file(s) this run. Recover any via git if needed (git history is the backstop).`);
|
|
1337
1313
|
}
|
|
1314
|
+
return { merged, deleted, contradicted, mergeFloorViolations, mergedSecondaries, promoted };
|
|
1315
|
+
}
|
|
1316
|
+
async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, warnings, sharedStateDb) {
|
|
1317
|
+
// -- Pass 1: narrow the memory pool (may early-return an envelope) ----------
|
|
1318
|
+
const narrowed = await narrowConsolidationPool(opts, config, stashDir, startMs, warnings, sharedStateDb);
|
|
1319
|
+
if (narrowed.done)
|
|
1320
|
+
return narrowed.result;
|
|
1321
|
+
const { memories, dedupCollapsed, perfMs, judgedCacheEnabled, currentHashByName } = narrowed;
|
|
1322
|
+
// -- Pass 2: build the LLM plan (populates the shared accounting counters) ---
|
|
1323
|
+
const accounting = createConsolidateAccounting();
|
|
1324
|
+
const { allOps, totalChunks, llmPoolSize, embedTelemetry, isHttpPath, sourceName } = await planConsolidation(opts, config, stashDir, startMs, memories, warnings, sharedStateDb, judgedCacheEnabled, currentHashByName, accounting);
|
|
1325
|
+
// -- Dry-run: show AI plan without executing any writes --------------------
|
|
1326
|
+
if (opts.dryRun) {
|
|
1327
|
+
return makeConsolidateResult({
|
|
1328
|
+
dryRun: true,
|
|
1329
|
+
previewOnly: true,
|
|
1330
|
+
target: sourceName,
|
|
1331
|
+
processed: memories.length,
|
|
1332
|
+
failedChunks: accounting.totalChunksFailed,
|
|
1333
|
+
totalChunks,
|
|
1334
|
+
judgedNoAction: accounting.judgedNoAction,
|
|
1335
|
+
skipReasons: accounting.skipReasons,
|
|
1336
|
+
// No merge has executed on the preview path — the per-secondary tally is
|
|
1337
|
+
// provably still 0 here (it only increments in the op-execution loop).
|
|
1338
|
+
mergedSecondaries: 0,
|
|
1339
|
+
failedChunkMemories: accounting.failedChunkMemories,
|
|
1340
|
+
planned: allOps,
|
|
1341
|
+
warnings,
|
|
1342
|
+
durationMs: Date.now() - startMs,
|
|
1343
|
+
});
|
|
1344
|
+
}
|
|
1345
|
+
warn(`[consolidate] plan: ${allOps.length} operation(s)`);
|
|
1346
|
+
// -- HTTP path: warn about quality and confirm unless auto-accepted --------
|
|
1347
|
+
if (isHttpPath) {
|
|
1348
|
+
warnings.push("Running on HTTP path — plan generated from truncated memory excerpts; quality may vary.");
|
|
1349
|
+
// Per-proposal confidence gating is handled by the caller (improve.ts)
|
|
1350
|
+
// via runAutoAcceptGate after this function returns. The gate reads
|
|
1351
|
+
// proposal.confidence (forwarded from op.confidence above) and applies
|
|
1352
|
+
// a minimumThreshold floor of 95 for consolidate's destructive ops.
|
|
1353
|
+
// Here we only gate the interactive-confirm path for manual/HTTP invocations.
|
|
1354
|
+
if (opts.autoAccept === undefined && allOps.length > 0) {
|
|
1355
|
+
const n = allOps.length;
|
|
1356
|
+
// Non-interactive contexts (CI / test runners / piped stdin) must not
|
|
1357
|
+
// block on an unanswerable prompt. Default to a non-destructive "no"
|
|
1358
|
+
// so callers in those contexts get the same "aborted, preview only"
|
|
1359
|
+
// shape they'd get from explicit user dismissal. AKM_NON_INTERACTIVE
|
|
1360
|
+
// lets callers force this path even when stdin happens to be a TTY.
|
|
1361
|
+
const nonInteractive = process.stdin.isTTY === false || process.env.AKM_NON_INTERACTIVE === "1";
|
|
1362
|
+
const answer = nonInteractive ? false : await promptConfirm(`Apply ${n} operations? [y/N] `);
|
|
1363
|
+
if (!answer) {
|
|
1364
|
+
return makeConsolidateResult({
|
|
1365
|
+
previewOnly: true,
|
|
1366
|
+
target: sourceName,
|
|
1367
|
+
processed: memories.length,
|
|
1368
|
+
failedChunks: accounting.totalChunksFailed,
|
|
1369
|
+
totalChunks,
|
|
1370
|
+
judgedNoAction: accounting.judgedNoAction,
|
|
1371
|
+
skipReasons: accounting.skipReasons,
|
|
1372
|
+
// No merge executed on the abort path — mergedSecondaries is still 0.
|
|
1373
|
+
mergedSecondaries: 0,
|
|
1374
|
+
failedChunkMemories: accounting.failedChunkMemories,
|
|
1375
|
+
planned: allOps,
|
|
1376
|
+
warnings: [...warnings, nonInteractive ? "Non-interactive context: skipped apply." : "Aborted by user."],
|
|
1377
|
+
durationMs: Date.now() - startMs,
|
|
1378
|
+
});
|
|
1379
|
+
}
|
|
1380
|
+
}
|
|
1381
|
+
}
|
|
1382
|
+
// -- Pass 3: execute the plan against the filesystem ------------------------
|
|
1383
|
+
const { merged, deleted, contradicted, mergeFloorViolations, mergedSecondaries, promoted } = await applyConsolidationPlan(config, stashDir, sourceRun, memories, warnings, allOps, accounting, dedupCollapsed, opts.improveProfile);
|
|
1338
1384
|
const runDurationMs = Date.now() - startMs;
|
|
1339
1385
|
const budgetFraction = opts.runBudgetMs !== undefined && opts.runBudgetMs > 0 ? runDurationMs / opts.runBudgetMs : undefined;
|
|
1340
1386
|
return {
|
|
@@ -1353,12 +1399,12 @@ async function akmConsolidateInner(opts, config, stashDir, startMs, sourceRun, w
|
|
|
1353
1399
|
promoted,
|
|
1354
1400
|
contradicted,
|
|
1355
1401
|
mergeFloorViolations,
|
|
1356
|
-
failedChunks: totalChunksFailed,
|
|
1357
|
-
totalChunks
|
|
1358
|
-
judgedNoAction,
|
|
1359
|
-
skipReasons,
|
|
1402
|
+
failedChunks: accounting.totalChunksFailed,
|
|
1403
|
+
totalChunks,
|
|
1404
|
+
judgedNoAction: accounting.judgedNoAction,
|
|
1405
|
+
skipReasons: accounting.skipReasons,
|
|
1360
1406
|
mergedSecondaries,
|
|
1361
|
-
failedChunkMemories,
|
|
1407
|
+
failedChunkMemories: accounting.failedChunkMemories,
|
|
1362
1408
|
warnings,
|
|
1363
1409
|
durationMs: runDurationMs,
|
|
1364
1410
|
perfTelemetry: {
|
|
@@ -1456,7 +1502,7 @@ export async function handleMergeOp(op, opIndex, ctx) {
|
|
|
1456
1502
|
emitMergeFailureSkips("merge_read_failed");
|
|
1457
1503
|
return;
|
|
1458
1504
|
}
|
|
1459
|
-
const mergeResult = await generateMergedContent(config, op.primary, primaryBody, op.secondaries, memoryByRef);
|
|
1505
|
+
const mergeResult = await generateMergedContent(config, op.primary, primaryBody, op.secondaries, memoryByRef, ctx.improveProfile);
|
|
1460
1506
|
if ("error" in mergeResult) {
|
|
1461
1507
|
warnings.push(`Merge: ${mergeResult.error} for ${mergeResult.detail}.`);
|
|
1462
1508
|
emitMergeFailureSkips(mergeResult.error);
|
|
@@ -1521,7 +1567,7 @@ export async function handleMergeOp(op, opIndex, ctx) {
|
|
|
1521
1567
|
// to merge two assets both above generation N (default 2) — prevents the
|
|
1522
1568
|
// pipeline from building ever-deeper LLM-merged trees that lose the
|
|
1523
1569
|
// source fidelity of the original episodes.
|
|
1524
|
-
const antiCollapseConfig = config.
|
|
1570
|
+
const antiCollapseConfig = getImproveProcessConfig(config, "consolidate", ctx.improveProfile)?.antiCollapse ?? {};
|
|
1525
1571
|
if (antiCollapseConfig.enabled !== false) {
|
|
1526
1572
|
const allParticipants = [op.primary, ...op.secondaries];
|
|
1527
1573
|
// One read per participant: generation counter, stripped body (for the
|
|
@@ -1989,11 +2035,13 @@ async function checkPreEmitDedup(opts) {
|
|
|
1989
2035
|
* doesn't match the pattern (assumed to already be an ISO timestamp).
|
|
1990
2036
|
*/
|
|
1991
2037
|
function parseSinceToIso(since) {
|
|
1992
|
-
|
|
1993
|
-
|
|
2038
|
+
// Canonical CLI unit grammar: `m` = minutes, `M` = months (see core/time.ts
|
|
2039
|
+
// DURATION_UNITS). Non-matching input is returned unchanged (assumed to
|
|
2040
|
+
// already be an ISO timestamp).
|
|
2041
|
+
const ms = parseDuration(since, DURATION_UNITS);
|
|
2042
|
+
if (ms === null)
|
|
1994
2043
|
return since;
|
|
1995
|
-
|
|
1996
|
-
return new Date(Date.now() - parseInt(m[1], 10) * multiplier).toISOString();
|
|
2044
|
+
return new Date(Date.now() - ms).toISOString();
|
|
1997
2045
|
}
|
|
1998
2046
|
export function narrowToIncrementalCandidates(memories, since, warnings, neighborsPerChanged = 5) {
|
|
1999
2047
|
const sinceIso = parseSinceToIso(since);
|
|
@@ -2105,7 +2153,7 @@ function loadMemoriesForSource(source, stashDir, warnings) {
|
|
|
2105
2153
|
}
|
|
2106
2154
|
return memories;
|
|
2107
2155
|
}
|
|
2108
|
-
async function generateMergedContent(config, primaryRef, primaryBody, secondaryRefs, memoryByRef) {
|
|
2156
|
+
async function generateMergedContent(config, primaryRef, primaryBody, secondaryRefs, memoryByRef, activeProfile) {
|
|
2109
2157
|
// Only handle single-secondary merges per design (one call per merge op)
|
|
2110
2158
|
const secRef = secondaryRefs[0];
|
|
2111
2159
|
const secEntry = memoryByRef.get(secRef);
|
|
@@ -2149,7 +2197,7 @@ async function generateMergedContent(config, primaryRef, primaryBody, secondaryR
|
|
|
2149
2197
|
.join("\n");
|
|
2150
2198
|
// Use the same per-process profile resolution as the chunk-plan call above
|
|
2151
2199
|
// so the merge generation step doesn't silently revert to the default LLM.
|
|
2152
|
-
const llmConfig = resolveConsolidateLlmConfig(config);
|
|
2200
|
+
const llmConfig = resolveConsolidateLlmConfig(config, activeProfile);
|
|
2153
2201
|
const result = await tryLlmFeature("memory_consolidation", config, async () => {
|
|
2154
2202
|
if (!llmConfig)
|
|
2155
2203
|
return { ok: false, error: "No LLM configured for consolidation" };
|
|
@@ -152,9 +152,10 @@ export async function promoteMemoryToKnowledge(ctx) {
|
|
|
152
152
|
}
|
|
153
153
|
return writeQualityRejection(stash, inputRef, promotion.knowledgeRef, resolvedPromotionContent, judgeResult.score, judgeResult.reason, {}, ctx.eligibilitySource);
|
|
154
154
|
}
|
|
155
|
-
// Normalize 1-5 judge score to [0, 1].
|
|
156
|
-
// (
|
|
157
|
-
//
|
|
155
|
+
// Normalize 1-5 judge score to [0, 1]. Only a real passing verdict reaches
|
|
156
|
+
// here (07 P0-2: the judge now fails CLOSED on no-LLM / timeout / parse
|
|
157
|
+
// failure, so those return pass:false and early-return above). The score>0
|
|
158
|
+
// guard defensively leaves confidence undefined for any non-positive score.
|
|
158
159
|
if (judgeResult.score > 0)
|
|
159
160
|
knowledgeJudgeConfidence = judgeResult.score / 5;
|
|
160
161
|
}
|
|
@@ -107,14 +107,17 @@ export function buildJudgePrompt(lessonContent, sourceContent, similarLessons) {
|
|
|
107
107
|
* `profiles.improve.default.processes.distill.qualityGate.enabled` (and the
|
|
108
108
|
* corresponding `.reflect.qualityGate.enabled` for proposals).
|
|
109
109
|
*
|
|
110
|
-
* Fail-
|
|
110
|
+
* Fail-CLOSED (07 P0-2): returns `pass: false` (score -1) on timeout, parse
|
|
111
|
+
* failure, or missing LLM. Minted content that cannot be judged is rejected,
|
|
112
|
+
* not passed through — an unverifiable judge must never wave content into the
|
|
113
|
+
* stash. The rejection is `quality_rejected`, not `review_needed`.
|
|
111
114
|
*/
|
|
112
115
|
export async function runLessonQualityJudge(config, lessonContent, sourceContent, chat,
|
|
113
116
|
/** D-4 / #390: top-3 similar existing lessons for dedup check. */
|
|
114
117
|
similarLessons) {
|
|
115
118
|
const llmConfig = getDefaultLlmConfig(config);
|
|
116
119
|
if (!llmConfig) {
|
|
117
|
-
return { pass:
|
|
120
|
+
return { pass: false, score: -1, reason: "no LLM configured — cannot judge, failing closed" };
|
|
118
121
|
}
|
|
119
122
|
const judgeLlmConfig = llmConfig.judgeModel ? { ...llmConfig, model: llmConfig.judgeModel } : llmConfig;
|
|
120
123
|
const JUDGE_TIMEOUT_MS = 8_000;
|
|
@@ -128,7 +131,7 @@ similarLessons) {
|
|
|
128
131
|
]);
|
|
129
132
|
const parsed = parseEmbeddedJsonResponse(raw);
|
|
130
133
|
if (!parsed || typeof parsed.score !== "number") {
|
|
131
|
-
return { pass:
|
|
134
|
+
return { pass: false, score: -1, reason: "judge parse failed — cannot judge, failing closed" };
|
|
132
135
|
}
|
|
133
136
|
// D-5 / #388: Three-band system (MT-Bench arXiv:2306.05685 — ~±0.5 judge variance).
|
|
134
137
|
// >= 3.5: auto-queue as pending (pass: true)
|
|
@@ -146,7 +149,7 @@ similarLessons) {
|
|
|
146
149
|
return { pass: false, score, reason };
|
|
147
150
|
}
|
|
148
151
|
catch {
|
|
149
|
-
return { pass:
|
|
152
|
+
return { pass: false, score: -1, reason: "judge timeout/error — cannot judge, failing closed" };
|
|
150
153
|
}
|
|
151
154
|
}
|
|
152
155
|
// ── Quality-rejection helper ─────────────────────────────────────────────────
|