akm-cli 0.9.0-beta.53 → 0.9.0-beta.55
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/clack.js +56 -0
- package/dist/cli/confirm.js +1 -1
- package/dist/cli.js +5 -3
- package/dist/commands/agent/contribute-cli.js +2 -3
- package/dist/commands/env/env-cli.js +187 -202
- package/dist/commands/env/secret-cli.js +109 -121
- package/dist/commands/feedback-cli.js +152 -155
- package/dist/commands/health/advisories.js +151 -0
- package/dist/commands/health/html-report.js +33 -10
- package/dist/commands/health/improve-metrics.js +754 -0
- package/dist/commands/health/llm-usage.js +65 -0
- package/dist/commands/health/md-report.js +103 -0
- package/dist/commands/health/metrics.js +278 -0
- package/dist/commands/health/task-runs.js +135 -0
- package/dist/commands/health/types.js +18 -0
- package/dist/commands/health/windows.js +196 -0
- package/dist/commands/health.js +15 -1492
- package/dist/commands/improve/anti-collapse.js +170 -0
- package/dist/commands/improve/collapse-detector.js +3 -2
- package/dist/commands/improve/consolidate.js +636 -633
- package/dist/commands/improve/dedup.js +1 -1
- package/dist/commands/improve/distill/content-repair.js +202 -0
- package/dist/commands/improve/distill/promote-memory.js +228 -0
- package/dist/commands/improve/distill/quality-gate.js +233 -0
- package/dist/commands/improve/distill-guards.js +127 -0
- package/dist/commands/improve/distill.js +49 -575
- package/dist/commands/improve/extract-cli.js +74 -76
- package/dist/commands/improve/extract.js +6 -4
- package/dist/commands/improve/hot-probation.js +45 -0
- package/dist/commands/improve/improve-auto-accept.js +3 -2
- package/dist/commands/improve/improve-cli.js +14 -13
- package/dist/commands/improve/improve-result-file.js +2 -1
- package/dist/commands/improve/improve.js +6 -5
- package/dist/commands/improve/loop-stages.js +19 -21
- package/dist/commands/improve/outcome-loop.js +18 -16
- package/dist/commands/improve/preparation.js +23 -5
- package/dist/commands/improve/procedural.js +10 -31
- package/dist/commands/improve/recombine.js +19 -43
- package/dist/commands/improve/reflect.js +1 -1
- package/dist/commands/improve/schema-similarity-gate.js +168 -0
- package/dist/commands/improve/shared.js +48 -0
- package/dist/commands/observability-cli.js +4 -4
- package/dist/commands/proposal/drain-policies.js +2 -2
- package/dist/commands/proposal/drain.js +1 -1
- package/dist/commands/proposal/legacy-import.js +115 -0
- package/dist/commands/proposal/proposal-cli.js +3 -3
- package/dist/commands/proposal/proposal.js +2 -1
- package/dist/commands/proposal/propose.js +1 -1
- package/dist/commands/proposal/repository.js +829 -0
- package/dist/commands/proposal/validators/proposals.js +5 -920
- package/dist/commands/read/curate.js +4 -4
- package/dist/commands/read/remember-cli.js +132 -137
- package/dist/commands/read/search-cli.js +7 -5
- package/dist/commands/read/search.js +7 -3
- package/dist/commands/read/show.js +3 -5
- package/dist/commands/registry-cli.js +76 -87
- package/dist/commands/sources/add-cli.js +91 -95
- package/dist/commands/sources/history.js +1 -1
- package/dist/commands/sources/init.js +12 -0
- package/dist/commands/sources/schema-repair.js +1 -1
- package/dist/commands/sources/sources-cli.js +3 -3
- package/dist/commands/sources/stash-cli.js +2 -2
- package/dist/commands/tasks/default-tasks.js +12 -0
- package/dist/commands/tasks/tasks-cli.js +1 -2
- package/dist/commands/wiki-cli.js +2 -3
- package/dist/core/common.js +3 -3
- package/dist/core/config/config-schema.js +6 -0
- package/dist/core/config/config.js +12 -0
- package/dist/core/deep-merge.js +38 -0
- package/dist/core/events.js +2 -1
- package/dist/core/logs-db.js +8 -13
- package/dist/core/paths.js +14 -14
- package/dist/core/state-db.js +13 -1140
- package/dist/core/warn.js +21 -0
- package/dist/indexer/db/db.js +72 -709
- package/dist/indexer/db/entry-mapper.js +41 -0
- package/dist/indexer/db/schema.js +516 -0
- package/dist/indexer/ensure-index.js +3 -2
- package/dist/indexer/feedback/utility-policy.js +85 -0
- package/dist/indexer/graph/graph-extraction.js +2 -1
- package/dist/indexer/index-writer-lock.js +18 -0
- package/dist/indexer/indexer.js +94 -27
- package/dist/indexer/read-preflight.js +23 -0
- package/dist/indexer/search/fts-query.js +51 -0
- package/dist/indexer/walk/walker.js +21 -13
- package/dist/integrations/agent/detect.js +9 -0
- package/dist/integrations/agent/index.js +1 -1
- package/dist/integrations/agent/spawn.js +15 -66
- package/dist/llm/client.js +12 -0
- package/dist/llm/embedder.js +26 -2
- package/dist/llm/embedders/local.js +7 -1
- package/dist/output/text/helpers.js +13 -0
- package/dist/scripts/migrate-storage.js +6903 -7424
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +49 -44
- package/dist/setup/detect.js +9 -0
- package/dist/setup/legacy-config.js +106 -0
- package/dist/setup/prompt.js +57 -0
- package/dist/setup/providers.js +14 -0
- package/dist/setup/registry-stash-loader.js +12 -0
- package/dist/setup/semantic-assets.js +124 -0
- package/dist/setup/setup.js +25 -1608
- package/dist/setup/steps/connection.js +734 -0
- package/dist/setup/steps/output.js +31 -0
- package/dist/setup/steps/platforms.js +124 -0
- package/dist/setup/steps/semantic.js +27 -0
- package/dist/setup/steps/sources.js +222 -0
- package/dist/setup/steps/stashdir.js +42 -0
- package/dist/setup/steps/tasks.js +152 -0
- package/dist/storage/repositories/canaries-repository.js +107 -0
- package/dist/storage/repositories/consolidation-repository.js +38 -0
- package/dist/storage/repositories/embeddings-repository.js +72 -0
- package/dist/storage/repositories/events-repository.js +187 -0
- package/dist/storage/repositories/extract-sessions-repository.js +96 -0
- package/dist/storage/repositories/improve-runs-repository.js +130 -0
- package/dist/storage/repositories/index-db.js +4 -7
- package/dist/storage/repositories/proposals-repository.js +220 -0
- package/dist/storage/repositories/recombine-repository.js +213 -0
- package/dist/storage/repositories/task-history-repository.js +93 -0
- package/dist/storage/sqlite-pragmas.js +3 -3
- package/dist/tasks/backends/index.js +9 -0
- package/dist/tasks/runner.js +11 -1
- package/package.json +2 -2
- package/dist/commands/improve/homeostatic.js +0 -497
package/dist/commands/health.js
CHANGED
|
@@ -1,23 +1,25 @@
|
|
|
1
1
|
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
2
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
3
|
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
|
-
import fs from "node:fs";
|
|
5
4
|
import { loadConfig } from "../core/config/config.js";
|
|
6
5
|
import { ConfigError, UsageError } from "../core/errors.js";
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
6
|
+
import { readEvents } from "../core/events.js";
|
|
7
|
+
import { openLogsDatabase } from "../core/logs-db.js";
|
|
9
8
|
import { getStateDbPathInDataDir } from "../core/paths.js";
|
|
10
|
-
import {
|
|
9
|
+
import { listExistingTableNames, openStateDatabase } from "../core/state-db.js";
|
|
11
10
|
import { parseSinceToIso } from "../core/time.js";
|
|
12
11
|
import { readSemanticStatus } from "../indexer/search/semantic-status.js";
|
|
13
12
|
import { getExecutionLogCandidates } from "../integrations/session-logs/index.js";
|
|
14
|
-
import {
|
|
13
|
+
import { queryTaskHistory } from "../storage/repositories/task-history-repository.js";
|
|
14
|
+
import { collectImproveAdvisories } from "./health/advisories.js";
|
|
15
15
|
import { HEALTH_CHECKS } from "./health/checks.js";
|
|
16
|
-
import {
|
|
16
|
+
import { buildImproveSkipSummary, computeWallTimeStats, parseTaskMetadata, roundRate, summarizeImproveCompleted, summarizeImproveRuns, } from "./health/improve-metrics.js";
|
|
17
|
+
import { readLlmUsageAggregate } from "./health/llm-usage.js";
|
|
18
|
+
import { computeDegradationMetrics, computeDenominatorFixedCoverage, computeEnrichmentMintingRollup, probeStateDbRoundTrip, readCalibration, } from "./health/metrics.js";
|
|
19
|
+
import { buildPerRunSummaries } from "./health/task-runs.js";
|
|
20
|
+
import { ACTIVE_RUN_WARN_MS, IMPROVE_COMPLETED_EVENT, } from "./health/types.js";
|
|
21
|
+
import { buildWindowMetrics, computeDeltas, partitionLogBackedRows, resolveWindowCompare } from "./health/windows.js";
|
|
17
22
|
const DEFAULT_SINCE_MS = 24 * 60 * 60 * 1000;
|
|
18
|
-
const IMPROVE_COMPLETED_EVENT = "improve_completed";
|
|
19
|
-
const HEALTH_PROBE_EVENT = "health_probe";
|
|
20
|
-
const ACTIVE_RUN_WARN_MS = 15 * 60 * 1000;
|
|
21
23
|
export function parseHealthSince(since) {
|
|
22
24
|
if (since === undefined || since.trim() === "") {
|
|
23
25
|
return new Date(Date.now() - DEFAULT_SINCE_MS).toISOString();
|
|
@@ -35,1318 +37,6 @@ export function parseHealthSince(since) {
|
|
|
35
37
|
}
|
|
36
38
|
return parseSinceToIso(trimmed);
|
|
37
39
|
}
|
|
38
|
-
function roundRate(value) {
|
|
39
|
-
return Number(value.toFixed(4));
|
|
40
|
-
}
|
|
41
|
-
function parseTaskMetadata(row) {
|
|
42
|
-
try {
|
|
43
|
-
return JSON.parse(row.metadata_json);
|
|
44
|
-
}
|
|
45
|
-
catch {
|
|
46
|
-
return {};
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
function createUnknownImproveMetrics() {
|
|
50
|
-
return {
|
|
51
|
-
invoked: 0,
|
|
52
|
-
completed: 0,
|
|
53
|
-
skipped: 0,
|
|
54
|
-
skipReasons: {},
|
|
55
|
-
plannedRefs: 0,
|
|
56
|
-
profileFilteredRefs: 0,
|
|
57
|
-
actions: {
|
|
58
|
-
reflect: { ok: 0, failed: 0, cooldown: 0, skipped: 0, guardRejected: 0, skippedByReason: {} },
|
|
59
|
-
distill: {
|
|
60
|
-
queued: 0,
|
|
61
|
-
llmFailed: 0,
|
|
62
|
-
qualityRejected: 0,
|
|
63
|
-
judgeRejected: 0,
|
|
64
|
-
validatorRejected: 0,
|
|
65
|
-
configDisabled: 0,
|
|
66
|
-
skipped: 0,
|
|
67
|
-
skippedByReason: {},
|
|
68
|
-
deferred: 0,
|
|
69
|
-
deferredByReason: {},
|
|
70
|
-
},
|
|
71
|
-
memoryPrune: 0,
|
|
72
|
-
memoryInference: 0,
|
|
73
|
-
graphExtraction: 0,
|
|
74
|
-
error: 0,
|
|
75
|
-
},
|
|
76
|
-
autoAccept: { promoted: 0, validationFailed: 0 },
|
|
77
|
-
calibration: summarizeCalibration([]),
|
|
78
|
-
reflectsWithErrorContext: 0,
|
|
79
|
-
coverageGapCount: 0,
|
|
80
|
-
evalCasesWritten: 0,
|
|
81
|
-
deadUrlCount: 0,
|
|
82
|
-
memorySummary: { eligible: 0, derived: 0 },
|
|
83
|
-
memoryCleanup: {
|
|
84
|
-
pruneCandidates: 0,
|
|
85
|
-
contradictionCandidates: 0,
|
|
86
|
-
beliefStateTransitions: 0,
|
|
87
|
-
consolidationCandidates: 0,
|
|
88
|
-
archived: 0,
|
|
89
|
-
warnings: 0,
|
|
90
|
-
},
|
|
91
|
-
consolidation: {
|
|
92
|
-
ran: false,
|
|
93
|
-
processed: 0,
|
|
94
|
-
promoted: 0,
|
|
95
|
-
merged: 0,
|
|
96
|
-
deleted: 0,
|
|
97
|
-
contradicted: 0,
|
|
98
|
-
judgedNoAction: 0,
|
|
99
|
-
mergedSecondaries: 0,
|
|
100
|
-
failedChunkMemories: 0,
|
|
101
|
-
skipReasons: {},
|
|
102
|
-
failedChunks: 0,
|
|
103
|
-
totalChunks: 0,
|
|
104
|
-
durationMs: 0,
|
|
105
|
-
},
|
|
106
|
-
memoryInference: {
|
|
107
|
-
ran: false,
|
|
108
|
-
considered: 0,
|
|
109
|
-
cacheHits: 0,
|
|
110
|
-
retryAttempts: 0,
|
|
111
|
-
freshAttempts: 0,
|
|
112
|
-
splitParents: 0,
|
|
113
|
-
written: 0,
|
|
114
|
-
skippedNoFacts: 0,
|
|
115
|
-
skippedChildExists: 0,
|
|
116
|
-
skippedAborted: 0,
|
|
117
|
-
unaccounted: 0,
|
|
118
|
-
htmlErrorCount: 0,
|
|
119
|
-
yieldEligibleRuns: 0,
|
|
120
|
-
yieldEligibleConsidered: 0,
|
|
121
|
-
yieldEligibleWritten: 0,
|
|
122
|
-
yieldRate: 0,
|
|
123
|
-
durationMs: 0,
|
|
124
|
-
writes: 0,
|
|
125
|
-
},
|
|
126
|
-
graphExtraction: {
|
|
127
|
-
ran: false,
|
|
128
|
-
extractedFiles: 0,
|
|
129
|
-
entities: 0,
|
|
130
|
-
relations: 0,
|
|
131
|
-
cacheHits: 0,
|
|
132
|
-
cacheMisses: 0,
|
|
133
|
-
cacheHitRate: 0,
|
|
134
|
-
truncations: 0,
|
|
135
|
-
failures: 0,
|
|
136
|
-
htmlErrors: 0,
|
|
137
|
-
retryAttempts: 0,
|
|
138
|
-
nonArrayBatchFailures: 0,
|
|
139
|
-
durationMs: 0,
|
|
140
|
-
},
|
|
141
|
-
sessionExtraction: {
|
|
142
|
-
ran: false,
|
|
143
|
-
sessionsScanned: 0,
|
|
144
|
-
sessionsExtracted: 0,
|
|
145
|
-
sessionsSkipped: 0,
|
|
146
|
-
proposalsCreated: 0,
|
|
147
|
-
warnings: 0,
|
|
148
|
-
durationMs: 0,
|
|
149
|
-
},
|
|
150
|
-
wallTime: {
|
|
151
|
-
count: 0,
|
|
152
|
-
medianMs: 0,
|
|
153
|
-
p95Ms: 0,
|
|
154
|
-
minMs: 0,
|
|
155
|
-
maxMs: 0,
|
|
156
|
-
byPhase: {
|
|
157
|
-
consolidation: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
|
|
158
|
-
memoryInference: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
|
|
159
|
-
graphExtraction: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
|
|
160
|
-
},
|
|
161
|
-
},
|
|
162
|
-
perfTelemetry: {
|
|
163
|
-
dedupPoolSize: 0,
|
|
164
|
-
llmPoolSize: 0,
|
|
165
|
-
judgedCacheSkipped: 0,
|
|
166
|
-
embedMs: 0,
|
|
167
|
-
embedCacheHits: 0,
|
|
168
|
-
embedCacheMisses: 0,
|
|
169
|
-
overBudgetRuns: 0,
|
|
170
|
-
runsWithTelemetry: 0,
|
|
171
|
-
},
|
|
172
|
-
coverage: {
|
|
173
|
-
rate: Number.NaN,
|
|
174
|
-
eligibleFraction: Number.NaN,
|
|
175
|
-
acceptedProposals: 0,
|
|
176
|
-
totalAssets: 0,
|
|
177
|
-
},
|
|
178
|
-
};
|
|
179
|
-
}
|
|
180
|
-
function toFiniteNumber(value) {
|
|
181
|
-
if (typeof value === "number" && Number.isFinite(value))
|
|
182
|
-
return value;
|
|
183
|
-
if (typeof value === "string" && value.trim()) {
|
|
184
|
-
const parsed = Number(value);
|
|
185
|
-
if (Number.isFinite(parsed))
|
|
186
|
-
return parsed;
|
|
187
|
-
}
|
|
188
|
-
return 0;
|
|
189
|
-
}
|
|
190
|
-
/**
|
|
191
|
-
* Event-derived metrics. Only `completed` and skipReasons/invoked are sourced
|
|
192
|
-
* from events in v2 — the richer fields come from {@link summarizeImproveRuns}.
|
|
193
|
-
* The function still receives `improve_completed` events so that the completed
|
|
194
|
-
* count reflects the canonical event stream (it lines up 1:1 with improve_runs
|
|
195
|
-
* rows in practice, but the events table remains the system-of-record for the
|
|
196
|
-
* existence of a run).
|
|
197
|
-
*/
|
|
198
|
-
function summarizeImproveCompleted(events) {
|
|
199
|
-
const metrics = createUnknownImproveMetrics();
|
|
200
|
-
metrics.completed = events.length;
|
|
201
|
-
return metrics;
|
|
202
|
-
}
|
|
203
|
-
/**
|
|
204
|
-
* Project a single `improve_runs.result_json` envelope into an accumulator-shaped
|
|
205
|
-
* ImproveHealthMetrics. The aggregator merges these per-row metrics into one
|
|
206
|
-
* window-level metric.
|
|
207
|
-
*/
|
|
208
|
-
function projectRunMetrics(result) {
|
|
209
|
-
const metrics = createUnknownImproveMetrics();
|
|
210
|
-
// plannedRefs (array of {ref, reason})
|
|
211
|
-
const plannedRefs = result.plannedRefs;
|
|
212
|
-
if (Array.isArray(plannedRefs))
|
|
213
|
-
metrics.plannedRefs += plannedRefs.length;
|
|
214
|
-
// profileFilteredRefs (array of {ref, reason}) — 2026-05-27: pre-filter
|
|
215
|
-
// bucket from `collectEligibleRefs` so the metric reflects work the
|
|
216
|
-
// planner dropped before signal-delta / per-pass dispatch.
|
|
217
|
-
const profileFilteredRefs = result.profileFilteredRefs;
|
|
218
|
-
if (Array.isArray(profileFilteredRefs))
|
|
219
|
-
metrics.profileFilteredRefs += profileFilteredRefs.length;
|
|
220
|
-
// actions: split reflect / distill by outcome, count others.
|
|
221
|
-
const actions = result.actions;
|
|
222
|
-
if (Array.isArray(actions)) {
|
|
223
|
-
for (const action of actions) {
|
|
224
|
-
const mode = typeof action.mode === "string" ? action.mode : "";
|
|
225
|
-
switch (mode) {
|
|
226
|
-
case "reflect":
|
|
227
|
-
metrics.actions.reflect.ok += 1;
|
|
228
|
-
break;
|
|
229
|
-
case "reflect-failed":
|
|
230
|
-
metrics.actions.reflect.failed += 1;
|
|
231
|
-
break;
|
|
232
|
-
case "reflect-cooldown":
|
|
233
|
-
metrics.actions.reflect.cooldown += 1;
|
|
234
|
-
break;
|
|
235
|
-
case "reflect-skipped": {
|
|
236
|
-
metrics.actions.reflect.skipped += 1;
|
|
237
|
-
const r = action.result;
|
|
238
|
-
const reason = typeof r?.reason === "string" && r.reason.trim() ? r.reason : "unknown";
|
|
239
|
-
metrics.actions.reflect.skippedByReason[reason] = (metrics.actions.reflect.skippedByReason[reason] ?? 0) + 1;
|
|
240
|
-
break;
|
|
241
|
-
}
|
|
242
|
-
case "reflect-guard-rejected":
|
|
243
|
-
metrics.actions.reflect.guardRejected += 1;
|
|
244
|
-
break;
|
|
245
|
-
case "distill": {
|
|
246
|
-
const r = action.result;
|
|
247
|
-
const outcome = typeof r?.outcome === "string" ? r.outcome : "";
|
|
248
|
-
switch (outcome) {
|
|
249
|
-
case "queued":
|
|
250
|
-
metrics.actions.distill.queued += 1;
|
|
251
|
-
break;
|
|
252
|
-
case "llm_failed":
|
|
253
|
-
metrics.actions.distill.llmFailed += 1;
|
|
254
|
-
break;
|
|
255
|
-
case "quality_rejected":
|
|
256
|
-
case "review_needed":
|
|
257
|
-
metrics.actions.distill.qualityRejected += 1;
|
|
258
|
-
metrics.actions.distill.judgeRejected += 1;
|
|
259
|
-
break;
|
|
260
|
-
case "validation_failed":
|
|
261
|
-
metrics.actions.distill.qualityRejected += 1;
|
|
262
|
-
metrics.actions.distill.validatorRejected += 1;
|
|
263
|
-
break;
|
|
264
|
-
case "config_disabled":
|
|
265
|
-
metrics.actions.distill.configDisabled += 1;
|
|
266
|
-
break;
|
|
267
|
-
case "skipped": {
|
|
268
|
-
// Previously dropped on the floor. The four sub-paths that emit
|
|
269
|
-
// `outcome: "skipped"` (see distill.ts:893, 1024, 1120, 1576):
|
|
270
|
-
// - recursive_lesson_input (type guard refused a lesson input)
|
|
271
|
-
// - conflict_noop (LLM resolved destination conflict as NOOP)
|
|
272
|
-
// - proposal-skipped cooldown / dedup at persistence
|
|
273
|
-
// 465 events/7d in the user's live stack. The result message
|
|
274
|
-
// typically encodes the reason; we also accept an explicit
|
|
275
|
-
// `skipReason` field when downstream code sets it.
|
|
276
|
-
metrics.actions.distill.deferred += 1;
|
|
277
|
-
const explicitReason = typeof r?.skipReason === "string" ? r.skipReason : undefined;
|
|
278
|
-
const msg = typeof r?.message === "string" ? r.message : "";
|
|
279
|
-
let reason = explicitReason ?? "unknown";
|
|
280
|
-
if (!explicitReason) {
|
|
281
|
-
if (/lesson inputs/i.test(msg))
|
|
282
|
-
reason = "recursive_lesson_input";
|
|
283
|
-
else if (/NOOP/.test(msg))
|
|
284
|
-
reason = "conflict_noop";
|
|
285
|
-
else if (/cooldown/i.test(msg))
|
|
286
|
-
reason = "proposal_cooldown";
|
|
287
|
-
else if (/content[_ ]?hash/i.test(msg))
|
|
288
|
-
reason = "content_hash_match";
|
|
289
|
-
}
|
|
290
|
-
metrics.actions.distill.deferredByReason[reason] =
|
|
291
|
-
(metrics.actions.distill.deferredByReason[reason] ?? 0) + 1;
|
|
292
|
-
break;
|
|
293
|
-
}
|
|
294
|
-
default:
|
|
295
|
-
break;
|
|
296
|
-
}
|
|
297
|
-
break;
|
|
298
|
-
}
|
|
299
|
-
case "distill-skipped": {
|
|
300
|
-
metrics.actions.distill.skipped += 1;
|
|
301
|
-
const r = action.result;
|
|
302
|
-
const reason = typeof r?.reason === "string" && r.reason.trim() ? r.reason : "unknown";
|
|
303
|
-
metrics.actions.distill.skippedByReason[reason] = (metrics.actions.distill.skippedByReason[reason] ?? 0) + 1;
|
|
304
|
-
break;
|
|
305
|
-
}
|
|
306
|
-
case "memory-prune":
|
|
307
|
-
metrics.actions.memoryPrune += 1;
|
|
308
|
-
break;
|
|
309
|
-
case "memory-inference":
|
|
310
|
-
metrics.actions.memoryInference += 1;
|
|
311
|
-
break;
|
|
312
|
-
case "graph-extraction":
|
|
313
|
-
metrics.actions.graphExtraction += 1;
|
|
314
|
-
break;
|
|
315
|
-
case "error":
|
|
316
|
-
metrics.actions.error += 1;
|
|
317
|
-
break;
|
|
318
|
-
}
|
|
319
|
-
}
|
|
320
|
-
}
|
|
321
|
-
metrics.autoAccept.promoted += toFiniteNumber(result.gateAutoAcceptedCount);
|
|
322
|
-
metrics.autoAccept.validationFailed += toFiniteNumber(result.gateAutoAcceptFailedCount);
|
|
323
|
-
metrics.reflectsWithErrorContext += toFiniteNumber(result.reflectsWithErrorContext);
|
|
324
|
-
if (Array.isArray(result.coverageGaps))
|
|
325
|
-
metrics.coverageGapCount += result.coverageGaps.length;
|
|
326
|
-
metrics.evalCasesWritten += toFiniteNumber(result.evalCasesWritten);
|
|
327
|
-
if (Array.isArray(result.deadUrls))
|
|
328
|
-
metrics.deadUrlCount += result.deadUrls.length;
|
|
329
|
-
const memorySummary = result.memorySummary;
|
|
330
|
-
if (memorySummary) {
|
|
331
|
-
metrics.memorySummary.eligible += toFiniteNumber(memorySummary.eligible);
|
|
332
|
-
metrics.memorySummary.derived += toFiniteNumber(memorySummary.derived);
|
|
333
|
-
}
|
|
334
|
-
const memoryCleanup = result.memoryCleanup;
|
|
335
|
-
if (memoryCleanup) {
|
|
336
|
-
if (Array.isArray(memoryCleanup.pruneCandidates))
|
|
337
|
-
metrics.memoryCleanup.pruneCandidates += memoryCleanup.pruneCandidates.length;
|
|
338
|
-
if (Array.isArray(memoryCleanup.contradictionCandidates))
|
|
339
|
-
metrics.memoryCleanup.contradictionCandidates += memoryCleanup.contradictionCandidates.length;
|
|
340
|
-
if (Array.isArray(memoryCleanup.beliefStateTransitions))
|
|
341
|
-
metrics.memoryCleanup.beliefStateTransitions += memoryCleanup.beliefStateTransitions.length;
|
|
342
|
-
if (Array.isArray(memoryCleanup.consolidationCandidates))
|
|
343
|
-
metrics.memoryCleanup.consolidationCandidates += memoryCleanup.consolidationCandidates.length;
|
|
344
|
-
if (Array.isArray(memoryCleanup.archived))
|
|
345
|
-
metrics.memoryCleanup.archived += memoryCleanup.archived.length;
|
|
346
|
-
if (Array.isArray(memoryCleanup.warnings))
|
|
347
|
-
metrics.memoryCleanup.warnings += memoryCleanup.warnings.length;
|
|
348
|
-
}
|
|
349
|
-
const consolidation = result.consolidation;
|
|
350
|
-
if (consolidation) {
|
|
351
|
-
metrics.consolidation.processed += toFiniteNumber(consolidation.processed);
|
|
352
|
-
metrics.consolidation.merged += toFiniteNumber(consolidation.merged);
|
|
353
|
-
metrics.consolidation.deleted += toFiniteNumber(consolidation.deleted);
|
|
354
|
-
metrics.consolidation.contradicted += toFiniteNumber(consolidation.contradicted);
|
|
355
|
-
if (Array.isArray(consolidation.promoted))
|
|
356
|
-
metrics.consolidation.promoted += consolidation.promoted.length;
|
|
357
|
-
metrics.consolidation.failedChunks += toFiniteNumber(consolidation.failedChunks);
|
|
358
|
-
metrics.consolidation.totalChunks += toFiniteNumber(consolidation.totalChunks);
|
|
359
|
-
metrics.consolidation.durationMs += toFiniteNumber(consolidation.durationMs);
|
|
360
|
-
metrics.consolidation.judgedNoAction += toFiniteNumber(consolidation.judgedNoAction);
|
|
361
|
-
metrics.consolidation.mergedSecondaries += toFiniteNumber(consolidation.mergedSecondaries);
|
|
362
|
-
metrics.consolidation.failedChunkMemories += toFiniteNumber(consolidation.failedChunkMemories);
|
|
363
|
-
// Structured emitter (new on this branch): consolidate.ts now pushes
|
|
364
|
-
// per-ref grouped `{ref, skips: [{op, reason}]}` entries to `skipReasons`
|
|
365
|
-
// for every deterministic post-LLM rejection. Each ref appears once but
|
|
366
|
-
// may carry multiple skips; aggregate every reason. Pre-fix envelopes have
|
|
367
|
-
// neither field, so be defensive.
|
|
368
|
-
const skipReasons = consolidation.skipReasons;
|
|
369
|
-
if (Array.isArray(skipReasons)) {
|
|
370
|
-
for (const entry of skipReasons) {
|
|
371
|
-
if (!entry || typeof entry !== "object")
|
|
372
|
-
continue;
|
|
373
|
-
const skips = entry.skips;
|
|
374
|
-
if (!Array.isArray(skips))
|
|
375
|
-
continue;
|
|
376
|
-
for (const skip of skips) {
|
|
377
|
-
if (!skip || typeof skip !== "object")
|
|
378
|
-
continue;
|
|
379
|
-
const reason = skip.reason;
|
|
380
|
-
if (typeof reason !== "string" || !reason.trim())
|
|
381
|
-
continue;
|
|
382
|
-
metrics.consolidation.skipReasons[reason] = (metrics.consolidation.skipReasons[reason] ?? 0) + 1;
|
|
383
|
-
}
|
|
384
|
-
}
|
|
385
|
-
}
|
|
386
|
-
// WS-5: extract perf telemetry from the consolidation envelope.
|
|
387
|
-
// Pre-WS-5 envelopes lack `perfTelemetry`; be defensive.
|
|
388
|
-
const perf = consolidation.perfTelemetry;
|
|
389
|
-
if (perf) {
|
|
390
|
-
metrics.perfTelemetry.runsWithTelemetry += 1;
|
|
391
|
-
metrics.perfTelemetry.dedupPoolSize += toFiniteNumber(perf.dedupPoolSize);
|
|
392
|
-
metrics.perfTelemetry.llmPoolSize += toFiniteNumber(perf.llmPoolSize);
|
|
393
|
-
metrics.perfTelemetry.judgedCacheSkipped += toFiniteNumber(perf.judgedCacheSkipped);
|
|
394
|
-
metrics.perfTelemetry.embedMs += toFiniteNumber(perf.embedMs);
|
|
395
|
-
metrics.perfTelemetry.embedCacheHits += toFiniteNumber(perf.embedCacheHits);
|
|
396
|
-
metrics.perfTelemetry.embedCacheMisses += toFiniteNumber(perf.embedCacheMisses);
|
|
397
|
-
const budgetFrac = toFiniteNumber(perf.estimatedBudgetFractionUsed);
|
|
398
|
-
if (budgetFrac > 1.0)
|
|
399
|
-
metrics.perfTelemetry.overBudgetRuns += 1;
|
|
400
|
-
}
|
|
401
|
-
}
|
|
402
|
-
const memoryInference = result.memoryInference;
|
|
403
|
-
if (memoryInference) {
|
|
404
|
-
const considered = toFiniteNumber(memoryInference.considered);
|
|
405
|
-
const writtenFacts = toFiniteNumber(memoryInference.writtenFacts);
|
|
406
|
-
metrics.memoryInference.considered += considered;
|
|
407
|
-
metrics.memoryInference.cacheHits += toFiniteNumber(memoryInference.cacheHits);
|
|
408
|
-
metrics.memoryInference.retryAttempts += toFiniteNumber(memoryInference.retryAttempts);
|
|
409
|
-
metrics.memoryInference.splitParents += toFiniteNumber(memoryInference.splitParents);
|
|
410
|
-
metrics.memoryInference.written += writtenFacts;
|
|
411
|
-
metrics.memoryInference.skippedNoFacts += toFiniteNumber(memoryInference.skippedNoFacts);
|
|
412
|
-
metrics.memoryInference.skippedChildExists += toFiniteNumber(memoryInference.skippedChildExists);
|
|
413
|
-
metrics.memoryInference.skippedAborted += toFiniteNumber(memoryInference.skippedAborted);
|
|
414
|
-
metrics.memoryInference.unaccounted += toFiniteNumber(memoryInference.unaccounted);
|
|
415
|
-
metrics.memoryInference.htmlErrorCount += toFiniteNumber(memoryInference.htmlErrorCount);
|
|
416
|
-
// Yield-rate gating: pre-cache-feature envelopes lack the `cacheHits`
|
|
417
|
-
// field entirely. Treating their `considered` as freshAttempts (since
|
|
418
|
-
// cacheHits=0) is mathematically tempting but operationally wrong —
|
|
419
|
-
// historical runs with the legacy schema have no cache instrumentation
|
|
420
|
-
// and the SUM dragged the reported rate to ~14% in local data. Only
|
|
421
|
-
// contribute to the yield aggregate when the envelope actually carries
|
|
422
|
-
// the field. See investigation 2026-05-26.
|
|
423
|
-
if (Object.hasOwn(memoryInference, "cacheHits")) {
|
|
424
|
-
metrics.memoryInference.yieldEligibleRuns += 1;
|
|
425
|
-
metrics.memoryInference.yieldEligibleConsidered += considered;
|
|
426
|
-
metrics.memoryInference.yieldEligibleWritten += writtenFacts;
|
|
427
|
-
}
|
|
428
|
-
}
|
|
429
|
-
metrics.memoryInference.durationMs += toFiniteNumber(result.memoryInferenceDurationMs);
|
|
430
|
-
const graphExtraction = result.graphExtraction;
|
|
431
|
-
if (graphExtraction) {
|
|
432
|
-
const quality = graphExtraction.quality;
|
|
433
|
-
if (quality)
|
|
434
|
-
metrics.graphExtraction.extractedFiles += toFiniteNumber(quality.extractedFiles);
|
|
435
|
-
metrics.graphExtraction.entities += toFiniteNumber(graphExtraction.totalEntities);
|
|
436
|
-
metrics.graphExtraction.relations += toFiniteNumber(graphExtraction.totalRelations);
|
|
437
|
-
const telemetry = graphExtraction.telemetry;
|
|
438
|
-
if (telemetry) {
|
|
439
|
-
metrics.graphExtraction.cacheHits += toFiniteNumber(telemetry.cacheHits);
|
|
440
|
-
metrics.graphExtraction.cacheMisses += toFiniteNumber(telemetry.cacheMisses);
|
|
441
|
-
metrics.graphExtraction.truncations += toFiniteNumber(telemetry.truncationCount);
|
|
442
|
-
metrics.graphExtraction.failures += toFiniteNumber(telemetry.failureCount);
|
|
443
|
-
metrics.graphExtraction.htmlErrors += toFiniteNumber(telemetry.htmlErrorCount);
|
|
444
|
-
metrics.graphExtraction.retryAttempts += toFiniteNumber(telemetry.retryAttempts);
|
|
445
|
-
metrics.graphExtraction.nonArrayBatchFailures += toFiniteNumber(telemetry.nonArrayBatchFailures);
|
|
446
|
-
}
|
|
447
|
-
}
|
|
448
|
-
metrics.graphExtraction.durationMs += toFiniteNumber(result.graphExtractionDurationMs);
|
|
449
|
-
if (Array.isArray(result.extract)) {
|
|
450
|
-
for (const e of result.extract) {
|
|
451
|
-
metrics.sessionExtraction.sessionsScanned += toFiniteNumber(e.sessionsProcessed);
|
|
452
|
-
metrics.sessionExtraction.sessionsSkipped += toFiniteNumber(e.sessionsSkipped);
|
|
453
|
-
if (Array.isArray(e.sessions)) {
|
|
454
|
-
metrics.sessionExtraction.sessionsExtracted += e.sessions.filter((s) => Array.isArray(s.proposalIds) && s.proposalIds.length > 0).length;
|
|
455
|
-
}
|
|
456
|
-
metrics.sessionExtraction.proposalsCreated += Array.isArray(e.proposals) ? e.proposals.length : 0;
|
|
457
|
-
metrics.sessionExtraction.warnings += Array.isArray(e.warnings) ? e.warnings.length : 0;
|
|
458
|
-
metrics.sessionExtraction.durationMs += toFiniteNumber(e.durationMs);
|
|
459
|
-
}
|
|
460
|
-
}
|
|
461
|
-
return metrics;
|
|
462
|
-
}
|
|
463
|
-
/**
|
|
464
|
-
* Finalize derived flags and rates on an accumulator. Used both for the
|
|
465
|
-
* window-level aggregate and for each per-run row in --detail per-run mode
|
|
466
|
-
* so the single-row metrics still expose `ran` / `yieldRate` / `cacheHitRate`.
|
|
467
|
-
*/
|
|
468
|
-
function finalizeImproveMetrics(metrics) {
|
|
469
|
-
metrics.consolidation.ran =
|
|
470
|
-
metrics.consolidation.processed > 0 ||
|
|
471
|
-
metrics.consolidation.durationMs > 0 ||
|
|
472
|
-
metrics.consolidation.promoted > 0 ||
|
|
473
|
-
metrics.consolidation.merged > 0 ||
|
|
474
|
-
metrics.consolidation.deleted > 0 ||
|
|
475
|
-
metrics.consolidation.contradicted > 0 ||
|
|
476
|
-
metrics.consolidation.totalChunks > 0;
|
|
477
|
-
metrics.memoryInference.ran =
|
|
478
|
-
metrics.memoryInference.considered > 0 ||
|
|
479
|
-
metrics.memoryInference.written > 0 ||
|
|
480
|
-
metrics.memoryInference.durationMs > 0;
|
|
481
|
-
metrics.memoryInference.writes = metrics.memoryInference.written;
|
|
482
|
-
// Yield denominator excludes cache hits AND legacy (pre-cacheHits-field)
|
|
483
|
-
// envelopes. Only runs whose envelope carries a `cacheHits` field
|
|
484
|
-
// contribute to freshAttempts/yieldRate; legacy rows remain in
|
|
485
|
-
// `considered`/`written` for totals but are excluded from the rate so
|
|
486
|
-
// they cannot drag it down. See ImproveHealthMetrics.memoryInference
|
|
487
|
-
// jsdoc for the rationale.
|
|
488
|
-
metrics.memoryInference.freshAttempts = Math.max(0, metrics.memoryInference.yieldEligibleConsidered -
|
|
489
|
-
metrics.memoryInference.cacheHits -
|
|
490
|
-
metrics.memoryInference.skippedAborted);
|
|
491
|
-
metrics.memoryInference.yieldRate =
|
|
492
|
-
metrics.memoryInference.freshAttempts > 0
|
|
493
|
-
? roundRate(metrics.memoryInference.yieldEligibleWritten / metrics.memoryInference.freshAttempts)
|
|
494
|
-
: 0;
|
|
495
|
-
metrics.graphExtraction.ran =
|
|
496
|
-
metrics.graphExtraction.extractedFiles > 0 ||
|
|
497
|
-
metrics.graphExtraction.entities > 0 ||
|
|
498
|
-
metrics.graphExtraction.durationMs > 0;
|
|
499
|
-
const cacheTotal = metrics.graphExtraction.cacheHits + metrics.graphExtraction.cacheMisses;
|
|
500
|
-
metrics.graphExtraction.cacheHitRate = cacheTotal > 0 ? roundRate(metrics.graphExtraction.cacheHits / cacheTotal) : 0;
|
|
501
|
-
metrics.sessionExtraction.ran =
|
|
502
|
-
metrics.sessionExtraction.sessionsScanned > 0 ||
|
|
503
|
-
metrics.sessionExtraction.proposalsCreated > 0 ||
|
|
504
|
-
metrics.sessionExtraction.durationMs > 0;
|
|
505
|
-
}
|
|
506
|
-
/**
|
|
507
|
-
* Merge per-row metrics from `src` into accumulator `dst`. All numeric fields
|
|
508
|
-
* are additive; cumulative rates are recomputed by finalizeImproveMetrics.
|
|
509
|
-
*/
|
|
510
|
-
function mergeImproveMetrics(dst, src) {
|
|
511
|
-
dst.plannedRefs += src.plannedRefs;
|
|
512
|
-
// profileFilteredRefs is the count of refs the planner drops up-front for the
|
|
513
|
-
// active profile — recomputed against the (stable) stash every run, so it is a
|
|
514
|
-
// snapshot, NOT a per-run increment. Summing it re-counts the same refs each
|
|
515
|
-
// run (the ~2.4M bug). Set from the most recent run in summarizeImproveRuns.
|
|
516
|
-
dst.actions.reflect.ok += src.actions.reflect.ok;
|
|
517
|
-
dst.actions.reflect.failed += src.actions.reflect.failed;
|
|
518
|
-
dst.actions.reflect.cooldown += src.actions.reflect.cooldown;
|
|
519
|
-
dst.actions.reflect.skipped += src.actions.reflect.skipped;
|
|
520
|
-
dst.actions.reflect.guardRejected += src.actions.reflect.guardRejected;
|
|
521
|
-
for (const [reason, count] of Object.entries(src.actions.reflect.skippedByReason)) {
|
|
522
|
-
dst.actions.reflect.skippedByReason[reason] = (dst.actions.reflect.skippedByReason[reason] ?? 0) + count;
|
|
523
|
-
}
|
|
524
|
-
dst.actions.distill.queued += src.actions.distill.queued;
|
|
525
|
-
dst.actions.distill.llmFailed += src.actions.distill.llmFailed;
|
|
526
|
-
dst.actions.distill.qualityRejected += src.actions.distill.qualityRejected;
|
|
527
|
-
dst.actions.distill.judgeRejected += src.actions.distill.judgeRejected;
|
|
528
|
-
dst.actions.distill.validatorRejected += src.actions.distill.validatorRejected;
|
|
529
|
-
dst.actions.distill.configDisabled += src.actions.distill.configDisabled;
|
|
530
|
-
dst.actions.distill.skipped += src.actions.distill.skipped;
|
|
531
|
-
for (const [reason, count] of Object.entries(src.actions.distill.skippedByReason)) {
|
|
532
|
-
dst.actions.distill.skippedByReason[reason] = (dst.actions.distill.skippedByReason[reason] ?? 0) + count;
|
|
533
|
-
}
|
|
534
|
-
dst.actions.distill.deferred += src.actions.distill.deferred;
|
|
535
|
-
for (const [reason, count] of Object.entries(src.actions.distill.deferredByReason)) {
|
|
536
|
-
dst.actions.distill.deferredByReason[reason] = (dst.actions.distill.deferredByReason[reason] ?? 0) + count;
|
|
537
|
-
}
|
|
538
|
-
dst.actions.memoryPrune += src.actions.memoryPrune;
|
|
539
|
-
dst.actions.memoryInference += src.actions.memoryInference;
|
|
540
|
-
dst.actions.graphExtraction += src.actions.graphExtraction;
|
|
541
|
-
dst.actions.error += src.actions.error;
|
|
542
|
-
dst.autoAccept.promoted += src.autoAccept.promoted;
|
|
543
|
-
dst.autoAccept.validationFailed += src.autoAccept.validationFailed;
|
|
544
|
-
dst.reflectsWithErrorContext += src.reflectsWithErrorContext;
|
|
545
|
-
dst.coverageGapCount += src.coverageGapCount;
|
|
546
|
-
dst.evalCasesWritten += src.evalCasesWritten;
|
|
547
|
-
dst.deadUrlCount += src.deadUrlCount;
|
|
548
|
-
// NOTE: memorySummary (derived/eligible) is a WHOLE-STASH snapshot recorded on
|
|
549
|
-
// every run, NOT a per-run increment — summing it across the window inflates
|
|
550
|
-
// it ~N× (the 1.2M-eligible bug). It is set from the most recent run in
|
|
551
|
-
// summarizeImproveRuns instead, so it is intentionally not merged here.
|
|
552
|
-
dst.memoryCleanup.pruneCandidates += src.memoryCleanup.pruneCandidates;
|
|
553
|
-
dst.memoryCleanup.contradictionCandidates += src.memoryCleanup.contradictionCandidates;
|
|
554
|
-
dst.memoryCleanup.beliefStateTransitions += src.memoryCleanup.beliefStateTransitions;
|
|
555
|
-
dst.memoryCleanup.consolidationCandidates += src.memoryCleanup.consolidationCandidates;
|
|
556
|
-
dst.memoryCleanup.archived += src.memoryCleanup.archived;
|
|
557
|
-
dst.memoryCleanup.warnings += src.memoryCleanup.warnings;
|
|
558
|
-
dst.consolidation.processed += src.consolidation.processed;
|
|
559
|
-
dst.consolidation.promoted += src.consolidation.promoted;
|
|
560
|
-
dst.consolidation.merged += src.consolidation.merged;
|
|
561
|
-
dst.consolidation.deleted += src.consolidation.deleted;
|
|
562
|
-
dst.consolidation.contradicted += src.consolidation.contradicted;
|
|
563
|
-
dst.consolidation.failedChunks += src.consolidation.failedChunks;
|
|
564
|
-
dst.consolidation.totalChunks += src.consolidation.totalChunks;
|
|
565
|
-
dst.consolidation.durationMs += src.consolidation.durationMs;
|
|
566
|
-
dst.consolidation.judgedNoAction += src.consolidation.judgedNoAction;
|
|
567
|
-
dst.consolidation.mergedSecondaries += src.consolidation.mergedSecondaries;
|
|
568
|
-
dst.consolidation.failedChunkMemories += src.consolidation.failedChunkMemories;
|
|
569
|
-
for (const [reason, count] of Object.entries(src.consolidation.skipReasons)) {
|
|
570
|
-
dst.consolidation.skipReasons[reason] = (dst.consolidation.skipReasons[reason] ?? 0) + count;
|
|
571
|
-
}
|
|
572
|
-
dst.memoryInference.considered += src.memoryInference.considered;
|
|
573
|
-
dst.memoryInference.cacheHits += src.memoryInference.cacheHits;
|
|
574
|
-
dst.memoryInference.splitParents += src.memoryInference.splitParents;
|
|
575
|
-
dst.memoryInference.written += src.memoryInference.written;
|
|
576
|
-
dst.memoryInference.skippedNoFacts += src.memoryInference.skippedNoFacts;
|
|
577
|
-
dst.memoryInference.skippedChildExists += src.memoryInference.skippedChildExists;
|
|
578
|
-
dst.memoryInference.skippedAborted += src.memoryInference.skippedAborted;
|
|
579
|
-
dst.memoryInference.unaccounted += src.memoryInference.unaccounted;
|
|
580
|
-
dst.memoryInference.htmlErrorCount += src.memoryInference.htmlErrorCount;
|
|
581
|
-
dst.memoryInference.yieldEligibleRuns += src.memoryInference.yieldEligibleRuns;
|
|
582
|
-
dst.memoryInference.yieldEligibleConsidered += src.memoryInference.yieldEligibleConsidered;
|
|
583
|
-
dst.memoryInference.yieldEligibleWritten += src.memoryInference.yieldEligibleWritten;
|
|
584
|
-
dst.memoryInference.durationMs += src.memoryInference.durationMs;
|
|
585
|
-
dst.graphExtraction.extractedFiles += src.graphExtraction.extractedFiles;
|
|
586
|
-
dst.graphExtraction.entities += src.graphExtraction.entities;
|
|
587
|
-
dst.graphExtraction.relations += src.graphExtraction.relations;
|
|
588
|
-
dst.graphExtraction.cacheHits += src.graphExtraction.cacheHits;
|
|
589
|
-
dst.graphExtraction.cacheMisses += src.graphExtraction.cacheMisses;
|
|
590
|
-
dst.graphExtraction.truncations += src.graphExtraction.truncations;
|
|
591
|
-
dst.graphExtraction.failures += src.graphExtraction.failures;
|
|
592
|
-
dst.graphExtraction.htmlErrors += src.graphExtraction.htmlErrors;
|
|
593
|
-
dst.graphExtraction.nonArrayBatchFailures += src.graphExtraction.nonArrayBatchFailures;
|
|
594
|
-
dst.graphExtraction.durationMs += src.graphExtraction.durationMs;
|
|
595
|
-
dst.sessionExtraction.sessionsScanned += src.sessionExtraction.sessionsScanned;
|
|
596
|
-
dst.sessionExtraction.sessionsExtracted += src.sessionExtraction.sessionsExtracted;
|
|
597
|
-
dst.sessionExtraction.sessionsSkipped += src.sessionExtraction.sessionsSkipped;
|
|
598
|
-
dst.sessionExtraction.proposalsCreated += src.sessionExtraction.proposalsCreated;
|
|
599
|
-
dst.sessionExtraction.warnings += src.sessionExtraction.warnings;
|
|
600
|
-
dst.sessionExtraction.durationMs += src.sessionExtraction.durationMs;
|
|
601
|
-
// WS-5: merge perf telemetry (additive sums).
|
|
602
|
-
dst.perfTelemetry.dedupPoolSize += src.perfTelemetry.dedupPoolSize;
|
|
603
|
-
dst.perfTelemetry.llmPoolSize += src.perfTelemetry.llmPoolSize;
|
|
604
|
-
dst.perfTelemetry.judgedCacheSkipped += src.perfTelemetry.judgedCacheSkipped;
|
|
605
|
-
dst.perfTelemetry.embedMs += src.perfTelemetry.embedMs;
|
|
606
|
-
dst.perfTelemetry.embedCacheHits += src.perfTelemetry.embedCacheHits;
|
|
607
|
-
dst.perfTelemetry.embedCacheMisses += src.perfTelemetry.embedCacheMisses;
|
|
608
|
-
dst.perfTelemetry.overBudgetRuns += src.perfTelemetry.overBudgetRuns;
|
|
609
|
-
dst.perfTelemetry.runsWithTelemetry += src.perfTelemetry.runsWithTelemetry;
|
|
610
|
-
// coverage: acceptedProposals is additive; totalAssets is a snapshot (like memorySummary).
|
|
611
|
-
// totalAssets is intentionally NOT merged here — set from the most recent run in summarizeImproveRuns.
|
|
612
|
-
dst.coverage.acceptedProposals += src.coverage.acceptedProposals;
|
|
613
|
-
}
|
|
614
|
-
function summarizeImproveRuns(db, since, until) {
|
|
615
|
-
const accum = createUnknownImproveMetrics();
|
|
616
|
-
const rows = queryImproveRuns(db, since, until);
|
|
617
|
-
// Per-phase wall-time samples. Each entry is one envelope's durationMs for
|
|
618
|
-
// that phase. Phases that did not run on a given envelope are simply
|
|
619
|
-
// omitted (NOT counted as 0) so the median/p95 reflect actual phase work.
|
|
620
|
-
const phaseDurations = {
|
|
621
|
-
consolidation: [],
|
|
622
|
-
memoryInference: [],
|
|
623
|
-
graphExtraction: [],
|
|
624
|
-
};
|
|
625
|
-
// memorySummary is a whole-stash snapshot per run, so the window value is the
|
|
626
|
-
// MOST RECENT run's snapshot (current state) — not a sum across runs.
|
|
627
|
-
let latestStartMs = Number.NEGATIVE_INFINITY;
|
|
628
|
-
let latestMemorySummary;
|
|
629
|
-
let latestProfileFilteredRefs = 0;
|
|
630
|
-
for (const row of rows) {
|
|
631
|
-
let result;
|
|
632
|
-
try {
|
|
633
|
-
result = JSON.parse(row.result_json);
|
|
634
|
-
}
|
|
635
|
-
catch {
|
|
636
|
-
continue;
|
|
637
|
-
}
|
|
638
|
-
const perRow = projectRunMetrics(result);
|
|
639
|
-
mergeImproveMetrics(accum, perRow);
|
|
640
|
-
const startMs = new Date(row.started_at).getTime();
|
|
641
|
-
if (Number.isFinite(startMs) && startMs >= latestStartMs) {
|
|
642
|
-
latestStartMs = startMs;
|
|
643
|
-
latestMemorySummary = perRow.memorySummary;
|
|
644
|
-
latestProfileFilteredRefs = perRow.profileFilteredRefs;
|
|
645
|
-
}
|
|
646
|
-
// Collect per-phase durations directly off the envelope. consolidation's
|
|
647
|
-
// duration lives inside the sub-object; memoryInference and graphExtraction
|
|
648
|
-
// expose top-level *DurationMs keys (`memoryInferenceDurationMs`,
|
|
649
|
-
// `graphExtractionDurationMs`) when they actually ran on that envelope.
|
|
650
|
-
const consol = result.consolidation;
|
|
651
|
-
const consolMs = toFiniteNumber(consol?.durationMs);
|
|
652
|
-
if (consolMs > 0)
|
|
653
|
-
phaseDurations.consolidation.push(consolMs);
|
|
654
|
-
const memMs = toFiniteNumber(result.memoryInferenceDurationMs);
|
|
655
|
-
if (memMs > 0)
|
|
656
|
-
phaseDurations.memoryInference.push(memMs);
|
|
657
|
-
const graphMs = toFiniteNumber(result.graphExtractionDurationMs);
|
|
658
|
-
if (graphMs > 0)
|
|
659
|
-
phaseDurations.graphExtraction.push(graphMs);
|
|
660
|
-
}
|
|
661
|
-
finalizeImproveMetrics(accum);
|
|
662
|
-
if (latestMemorySummary)
|
|
663
|
-
accum.memorySummary = latestMemorySummary;
|
|
664
|
-
accum.profileFilteredRefs = latestProfileFilteredRefs;
|
|
665
|
-
accum.wallTime.byPhase = {
|
|
666
|
-
consolidation: summarizePhaseDurations(phaseDurations.consolidation),
|
|
667
|
-
memoryInference: summarizePhaseDurations(phaseDurations.memoryInference),
|
|
668
|
-
graphExtraction: summarizePhaseDurations(phaseDurations.graphExtraction),
|
|
669
|
-
};
|
|
670
|
-
return { metrics: accum, runCount: rows.length };
|
|
671
|
-
}
|
|
672
|
-
/**
|
|
673
|
-
* Aggregate a list of per-envelope phase durations into the
|
|
674
|
-
* `wallTime.byPhase.*` shape: count, total, median, p95. Median/p95 use the
|
|
675
|
-
* same nearest-rank picker as the top-level wallTime stats so the two are
|
|
676
|
-
* comparable.
|
|
677
|
-
*/
|
|
678
|
-
function summarizePhaseDurations(samples) {
|
|
679
|
-
if (samples.length === 0)
|
|
680
|
-
return { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 };
|
|
681
|
-
const sorted = [...samples].sort((a, b) => a - b);
|
|
682
|
-
const pick = (q) => sorted[Math.min(sorted.length - 1, Math.floor(q * sorted.length))] ?? 0;
|
|
683
|
-
const totalMs = sorted.reduce((acc, n) => acc + n, 0);
|
|
684
|
-
return {
|
|
685
|
-
count: sorted.length,
|
|
686
|
-
totalMs,
|
|
687
|
-
medianMs: pick(0.5),
|
|
688
|
-
p95Ms: pick(0.95),
|
|
689
|
-
};
|
|
690
|
-
}
|
|
691
|
-
/**
|
|
692
|
-
* Project an improve_runs row + wall-time lookup into a single ImproveRunSummary.
|
|
693
|
-
* Used by `akm health --detail per-run`.
|
|
694
|
-
*/
|
|
695
|
-
function projectImproveRunSummary(row, wallTimeMs, taskId) {
|
|
696
|
-
let result = {};
|
|
697
|
-
try {
|
|
698
|
-
result = JSON.parse(row.result_json);
|
|
699
|
-
}
|
|
700
|
-
catch {
|
|
701
|
-
// fall through with empty result so per-stage rollups are zeros
|
|
702
|
-
}
|
|
703
|
-
const perRow = projectRunMetrics(result);
|
|
704
|
-
finalizeImproveMetrics(perRow);
|
|
705
|
-
const orphansPurged = toFiniteNumber(result.orphansPurged);
|
|
706
|
-
const lintSummary = result.lintSummary;
|
|
707
|
-
const lintFixed = lintSummary ? toFiniteNumber(lintSummary.fixed) : 0;
|
|
708
|
-
const lintFlagged = lintSummary ? toFiniteNumber(lintSummary.flagged) : 0;
|
|
709
|
-
return {
|
|
710
|
-
id: row.id,
|
|
711
|
-
startedAt: row.started_at,
|
|
712
|
-
completedAt: row.completed_at,
|
|
713
|
-
wallTimeMs,
|
|
714
|
-
ok: row.ok === 1,
|
|
715
|
-
scope: {
|
|
716
|
-
mode: row.scope_mode,
|
|
717
|
-
...(row.scope_value ? { value: row.scope_value } : {}),
|
|
718
|
-
},
|
|
719
|
-
taskId,
|
|
720
|
-
actions: perRow.actions,
|
|
721
|
-
memorySummary: perRow.memorySummary,
|
|
722
|
-
memoryCleanup: perRow.memoryCleanup,
|
|
723
|
-
consolidation: perRow.consolidation,
|
|
724
|
-
memoryInference: perRow.memoryInference,
|
|
725
|
-
graphExtraction: perRow.graphExtraction,
|
|
726
|
-
reflectsWithErrorContext: perRow.reflectsWithErrorContext,
|
|
727
|
-
evalCasesWritten: perRow.evalCasesWritten,
|
|
728
|
-
orphansPurged,
|
|
729
|
-
lintFixed,
|
|
730
|
-
lintFlagged,
|
|
731
|
-
};
|
|
732
|
-
}
|
|
733
|
-
/**
|
|
734
|
-
* Load task_history intervals for `task_id='akm-improve'` in the window.
|
|
735
|
-
* Returned sorted by startMs ascending so containment lookups can use a
|
|
736
|
-
* linear scan (typical N is ~24/day; not worth a tree).
|
|
737
|
-
*
|
|
738
|
-
* The window filter is widened by 5 minutes on each side because the cron
|
|
739
|
-
* task wraps `akm improve` — the task `started_at` fires at e.g. :07:01
|
|
740
|
-
* while `recordImproveRun` writes the matching `improve_runs.started_at`
|
|
741
|
-
* later (after config load, planning, etc.), so the improve_runs row can
|
|
742
|
-
* be inside the window even when its enclosing task_history row started
|
|
743
|
-
* just before the window opened.
|
|
744
|
-
*/
|
|
745
|
-
function loadTaskIntervals(db, since, until) {
|
|
746
|
-
const sinceMs = new Date(since).getTime();
|
|
747
|
-
const untilMs = until ? new Date(until).getTime() : Number.POSITIVE_INFINITY;
|
|
748
|
-
const widenedSince = new Date(sinceMs - 5 * 60 * 1000).toISOString();
|
|
749
|
-
const widenedUntil = Number.isFinite(untilMs) ? new Date(untilMs + 5 * 60 * 1000).toISOString() : undefined;
|
|
750
|
-
const rows = queryCompletedTaskIntervals(db, widenedSince, widenedUntil);
|
|
751
|
-
const intervals = [];
|
|
752
|
-
for (const row of rows) {
|
|
753
|
-
const startMs = new Date(row.started_at).getTime();
|
|
754
|
-
const endMs = new Date(row.completed_at).getTime();
|
|
755
|
-
if (!Number.isFinite(startMs) || !Number.isFinite(endMs) || endMs < startMs)
|
|
756
|
-
continue;
|
|
757
|
-
intervals.push({ startMs, endMs, durationMs: endMs - startMs });
|
|
758
|
-
}
|
|
759
|
-
return intervals;
|
|
760
|
-
}
|
|
761
|
-
/**
|
|
762
|
-
* Find the task_history interval that contains the given timestamp. The
|
|
763
|
-
* task wraps `akm improve`, so `improve_runs.started_at` (when
|
|
764
|
-
* `recordImproveRun` writes) always falls inside the enclosing task's
|
|
765
|
-
* [started_at, completed_at]. Returns undefined when no interval
|
|
766
|
-
* contains the timestamp (which happens for manually-invoked improve
|
|
767
|
-
* runs not driven by the `akm-improve` task).
|
|
768
|
-
*
|
|
769
|
-
* Linear scan because N is small. We tolerate a 1s slop on the upper
|
|
770
|
-
* bound to handle clock skew between the wrapper's `completed_at` write
|
|
771
|
-
* and recordImproveRun's `started_at` write.
|
|
772
|
-
*/
|
|
773
|
-
function findContainingTaskInterval(timestampMs, intervals) {
|
|
774
|
-
const SLOP_MS = 1000;
|
|
775
|
-
for (const interval of intervals) {
|
|
776
|
-
if (timestampMs >= interval.startMs && timestampMs <= interval.endMs + SLOP_MS) {
|
|
777
|
-
return interval;
|
|
778
|
-
}
|
|
779
|
-
}
|
|
780
|
-
return undefined;
|
|
781
|
-
}
|
|
782
|
-
/**
|
|
783
|
-
* Load `task_history` rows whose `task_id` begins `akm-improve` (the scheduled
|
|
784
|
-
* improve tasks: `akm-improve-frequent`, `akm-improve-proactive-weekly`, …) in
|
|
785
|
-
* the window, widened ±5 min so a task that fired just before the window opened
|
|
786
|
-
* still matches a run inside it. Used to attribute each improve run to the task
|
|
787
|
-
* that launched it.
|
|
788
|
-
*/
|
|
789
|
-
function loadImproveTaskRuns(db, since, until) {
|
|
790
|
-
const sinceMs = new Date(since).getTime();
|
|
791
|
-
const untilMs = until ? new Date(until).getTime() : undefined;
|
|
792
|
-
const widenedSince = new Date(sinceMs - 5 * 60 * 1000).toISOString();
|
|
793
|
-
const widenedUntil = untilMs !== undefined ? new Date(untilMs + 5 * 60 * 1000).toISOString() : undefined;
|
|
794
|
-
const runs = [];
|
|
795
|
-
for (const row of queryTaskHistory(db, { since: widenedSince, until: widenedUntil })) {
|
|
796
|
-
if (!row.task_id.startsWith("akm-improve"))
|
|
797
|
-
continue;
|
|
798
|
-
const startMs = new Date(row.started_at).getTime();
|
|
799
|
-
if (!Number.isFinite(startMs))
|
|
800
|
-
continue;
|
|
801
|
-
const endIso = row.completed_at ?? row.failed_at;
|
|
802
|
-
const endMs = endIso ? new Date(endIso).getTime() : Number.NaN;
|
|
803
|
-
runs.push({ taskId: row.task_id, startMs, endMs });
|
|
804
|
-
}
|
|
805
|
-
return runs;
|
|
806
|
-
}
|
|
807
|
-
/**
|
|
808
|
-
* Attribute an improve run to the scheduled task that launched it by matching
|
|
809
|
-
* start times within ±5 min, scored by start delta (plus end delta when both
|
|
810
|
-
* ends are known). Port of the health-report skill's `match_task_id`. Returns
|
|
811
|
-
* `"manual"` when no scheduled improve task matches.
|
|
812
|
-
*/
|
|
813
|
-
function matchImproveTaskId(startedAt, completedAt, taskRuns) {
|
|
814
|
-
const startMs = new Date(startedAt).getTime();
|
|
815
|
-
if (!Number.isFinite(startMs))
|
|
816
|
-
return "manual";
|
|
817
|
-
const endMs = completedAt ? new Date(completedAt).getTime() : Number.NaN;
|
|
818
|
-
let best;
|
|
819
|
-
let bestScore = Number.POSITIVE_INFINITY;
|
|
820
|
-
for (const task of taskRuns) {
|
|
821
|
-
const startDelta = Math.abs(task.startMs - startMs);
|
|
822
|
-
if (startDelta > 5 * 60 * 1000)
|
|
823
|
-
continue;
|
|
824
|
-
let score = startDelta;
|
|
825
|
-
if (Number.isFinite(endMs) && Number.isFinite(task.endMs))
|
|
826
|
-
score += Math.abs(task.endMs - endMs);
|
|
827
|
-
if (score < bestScore) {
|
|
828
|
-
bestScore = score;
|
|
829
|
-
best = task.taskId;
|
|
830
|
-
}
|
|
831
|
-
}
|
|
832
|
-
return best ?? "manual";
|
|
833
|
-
}
|
|
834
|
-
function buildPerRunSummaries(db, since, until) {
|
|
835
|
-
const rows = queryImproveRuns(db, since, until);
|
|
836
|
-
const taskIntervals = loadTaskIntervals(db, since, until);
|
|
837
|
-
const improveTaskRuns = loadImproveTaskRuns(db, since, until);
|
|
838
|
-
const summaries = [];
|
|
839
|
-
for (const row of rows) {
|
|
840
|
-
const startMs = new Date(row.started_at).getTime();
|
|
841
|
-
const endMs = new Date(row.completed_at).getTime();
|
|
842
|
-
// Prefer the improve_runs row's own (completed_at - started_at) delta:
|
|
843
|
-
// recordImproveRun now persists distinct start/end timestamps, so the
|
|
844
|
-
// row's own delta is the authoritative per-run wall time even for
|
|
845
|
-
// manually-invoked `akm improve` runs with no enclosing task_history.
|
|
846
|
-
// Only fall back to the task_history containing-interval join for legacy/
|
|
847
|
-
// backfill rows where started_at == completed_at (row delta is 0).
|
|
848
|
-
const hasRowDelta = Number.isFinite(startMs) && Number.isFinite(endMs) && endMs > startMs;
|
|
849
|
-
let wallTimeMs;
|
|
850
|
-
if (hasRowDelta) {
|
|
851
|
-
wallTimeMs = endMs - startMs;
|
|
852
|
-
}
|
|
853
|
-
else {
|
|
854
|
-
const interval = Number.isFinite(startMs) ? findContainingTaskInterval(startMs, taskIntervals) : undefined;
|
|
855
|
-
wallTimeMs = interval?.durationMs ?? 0;
|
|
856
|
-
}
|
|
857
|
-
const taskId = matchImproveTaskId(row.started_at, row.completed_at, improveTaskRuns);
|
|
858
|
-
summaries.push(projectImproveRunSummary(row, wallTimeMs, taskId));
|
|
859
|
-
}
|
|
860
|
-
return summaries;
|
|
861
|
-
}
|
|
862
|
-
function emptyPhaseStats() {
|
|
863
|
-
return {
|
|
864
|
-
consolidation: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
|
|
865
|
-
memoryInference: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
|
|
866
|
-
graphExtraction: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
|
|
867
|
-
};
|
|
868
|
-
}
|
|
869
|
-
function computeWallTimeStats(durationsMs, byPhase) {
|
|
870
|
-
const phase = byPhase ?? emptyPhaseStats();
|
|
871
|
-
if (durationsMs.length === 0)
|
|
872
|
-
return { count: 0, medianMs: 0, p95Ms: 0, minMs: 0, maxMs: 0, byPhase: phase };
|
|
873
|
-
const sorted = [...durationsMs].sort((a, b) => a - b);
|
|
874
|
-
const pick = (q) => sorted[Math.min(sorted.length - 1, Math.floor(q * sorted.length))] ?? 0;
|
|
875
|
-
return {
|
|
876
|
-
count: sorted.length,
|
|
877
|
-
medianMs: pick(0.5),
|
|
878
|
-
p95Ms: pick(0.95),
|
|
879
|
-
minMs: sorted[0] ?? 0,
|
|
880
|
-
maxMs: sorted[sorted.length - 1] ?? 0,
|
|
881
|
-
byPhase: phase,
|
|
882
|
-
};
|
|
883
|
-
}
|
|
884
|
-
function buildImproveSkipSummary(events) {
|
|
885
|
-
// Two kinds of skip events:
|
|
886
|
-
// - Per-occurrence (no `count`): one event per skipped ref → SUM is correct.
|
|
887
|
-
// - Aggregated snapshot (carries `count`): a single per-run event whose count
|
|
888
|
-
// is the number of refs that hit a STABLE, whole-stash condition that run
|
|
889
|
-
// (`no_new_signal`, `profile_filtered_all_passes`). Each run re-counts the
|
|
890
|
-
// same stable set, so summing across the window re-counts it N times (the
|
|
891
|
-
// 2.7M / 3M inflation). For these we keep the MOST RECENT run's count — the
|
|
892
|
-
// current snapshot — matching how memorySummary/profileFilteredRefs are
|
|
893
|
-
// handled. Events arrive in chronological (offset) order, so the last
|
|
894
|
-
// count-bearing event per reason is the latest run's value.
|
|
895
|
-
const summed = {};
|
|
896
|
-
const latestSnapshot = {};
|
|
897
|
-
for (const event of events) {
|
|
898
|
-
const reason = typeof event.metadata?.reason === "string" && event.metadata.reason.trim() ? event.metadata.reason : "unknown";
|
|
899
|
-
const rawCount = event.metadata?.count;
|
|
900
|
-
if (typeof rawCount === "number" && Number.isFinite(rawCount) && rawCount > 0) {
|
|
901
|
-
latestSnapshot[reason] = rawCount; // overwrite → keeps the latest run's snapshot
|
|
902
|
-
}
|
|
903
|
-
else {
|
|
904
|
-
summed[reason] = (summed[reason] ?? 0) + 1;
|
|
905
|
-
}
|
|
906
|
-
}
|
|
907
|
-
const skipReasons = { ...summed };
|
|
908
|
-
for (const [reason, count] of Object.entries(latestSnapshot)) {
|
|
909
|
-
skipReasons[reason] = (skipReasons[reason] ?? 0) + count;
|
|
910
|
-
}
|
|
911
|
-
const skipped = Object.values(skipReasons).reduce((a, b) => a + b, 0);
|
|
912
|
-
return { skipped, skipReasons };
|
|
913
|
-
}
|
|
914
|
-
function probeStateDbRoundTrip(stateDbPath) {
|
|
915
|
-
const before = readEvents({}, { dbPath: stateDbPath }).nextOffset;
|
|
916
|
-
const started = Date.now();
|
|
917
|
-
appendEvent({ eventType: HEALTH_PROBE_EVENT, ref: "health:probe", metadata: { source: "akm health" } }, { dbPath: stateDbPath });
|
|
918
|
-
const after = readEvents({ sinceOffset: before, type: HEALTH_PROBE_EVENT, ref: "health:probe" }, { dbPath: stateDbPath });
|
|
919
|
-
const durationMs = Date.now() - started;
|
|
920
|
-
if (after.events.length === 0 || after.nextOffset <= before) {
|
|
921
|
-
return { ok: false, durationMs, error: "probe event was not readable after append" };
|
|
922
|
-
}
|
|
923
|
-
return { ok: true, durationMs };
|
|
924
|
-
}
|
|
925
|
-
/**
|
|
926
|
-
* Parse a `--window-compare <duration>` shorthand into two adjacent windows
|
|
927
|
-
* (current, prior). Duration syntax matches {@link parseHealthSince}.
|
|
928
|
-
*/
|
|
929
|
-
function resolveWindowCompare(duration, now = () => Date.now()) {
|
|
930
|
-
const trimmed = duration.trim();
|
|
931
|
-
const durationMatch = trimmed.match(/^(\d+)([dhm])$/i);
|
|
932
|
-
if (!durationMatch) {
|
|
933
|
-
throw new UsageError("--window-compare must be a duration like '24h', '7d', or '30m'.", "INVALID_FLAG_VALUE");
|
|
934
|
-
}
|
|
935
|
-
const amount = Number.parseInt(durationMatch[1] ?? "0", 10);
|
|
936
|
-
const unit = (durationMatch[2] ?? "h").toLowerCase();
|
|
937
|
-
if (!Number.isFinite(amount) || amount <= 0) {
|
|
938
|
-
throw new UsageError("--window-compare must be a positive duration.", "INVALID_FLAG_VALUE");
|
|
939
|
-
}
|
|
940
|
-
const multiplier = unit === "h" ? 60 * 60 * 1000 : unit === "m" ? 60 * 1000 : 24 * 60 * 60 * 1000;
|
|
941
|
-
const ms = amount * multiplier;
|
|
942
|
-
const nowMs = now();
|
|
943
|
-
const currentSince = new Date(nowMs - ms).toISOString();
|
|
944
|
-
const currentUntil = new Date(nowMs).toISOString();
|
|
945
|
-
const priorSince = new Date(nowMs - 2 * ms).toISOString();
|
|
946
|
-
const priorUntil = currentSince;
|
|
947
|
-
return [
|
|
948
|
-
{ name: "current", since: currentSince, until: currentUntil },
|
|
949
|
-
{ name: "prior", since: priorSince, until: priorUntil },
|
|
950
|
-
];
|
|
951
|
-
}
|
|
952
|
-
/**
|
|
953
|
-
* Parse a single repeatable `--windows` value of the form
|
|
954
|
-
* `name=...,since=...,until=...`. All keys are optional EXCEPT name and since.
|
|
955
|
-
*/
|
|
956
|
-
export function parseWindowSpec(raw) {
|
|
957
|
-
const fields = {};
|
|
958
|
-
for (const part of raw.split(",")) {
|
|
959
|
-
const trimmed = part.trim();
|
|
960
|
-
if (!trimmed)
|
|
961
|
-
continue;
|
|
962
|
-
const eq = trimmed.indexOf("=");
|
|
963
|
-
if (eq < 0) {
|
|
964
|
-
throw new UsageError(`--windows entry must be a comma-separated list of key=value pairs: ${raw}`, "INVALID_FLAG_VALUE");
|
|
965
|
-
}
|
|
966
|
-
const key = trimmed.slice(0, eq).trim();
|
|
967
|
-
const value = trimmed.slice(eq + 1).trim();
|
|
968
|
-
fields[key] = value;
|
|
969
|
-
}
|
|
970
|
-
if (!fields.name) {
|
|
971
|
-
throw new UsageError(`--windows entry is missing required 'name': ${raw}`, "INVALID_FLAG_VALUE");
|
|
972
|
-
}
|
|
973
|
-
if (!fields.since) {
|
|
974
|
-
throw new UsageError(`--windows entry is missing required 'since': ${raw}`, "INVALID_FLAG_VALUE");
|
|
975
|
-
}
|
|
976
|
-
return {
|
|
977
|
-
name: fields.name,
|
|
978
|
-
since: fields.since,
|
|
979
|
-
...(fields.until ? { until: fields.until } : {}),
|
|
980
|
-
};
|
|
981
|
-
}
|
|
982
|
-
/** Hard-coded list of "interesting" metric paths for window-compare deltas. */
|
|
983
|
-
const INTERESTING_DELTA_PATHS = [
|
|
984
|
-
"improve.actions.reflect.failed",
|
|
985
|
-
"improve.actions.reflect.guardRejected",
|
|
986
|
-
"improve.actions.distill.llmFailed",
|
|
987
|
-
"improve.actions.distill.queued",
|
|
988
|
-
"improve.actions.distill.deferred",
|
|
989
|
-
"improve.consolidation.promoted",
|
|
990
|
-
"improve.memoryInference.written",
|
|
991
|
-
"improve.memoryInference.yieldRate",
|
|
992
|
-
"improve.memoryInference.skippedNoFacts",
|
|
993
|
-
"improve.memoryInference.htmlErrorCount",
|
|
994
|
-
"improve.graphExtraction.cacheHitRate",
|
|
995
|
-
"improve.graphExtraction.failures",
|
|
996
|
-
"improve.graphExtraction.htmlErrors",
|
|
997
|
-
"improve.graphExtraction.nonArrayBatchFailures",
|
|
998
|
-
"improve.sessionExtraction.sessionsScanned",
|
|
999
|
-
"improve.sessionExtraction.proposalsCreated",
|
|
1000
|
-
"improve.autoAccept.promoted",
|
|
1001
|
-
"improve.autoAccept.validationFailed",
|
|
1002
|
-
"improve.wallTime.medianMs",
|
|
1003
|
-
"improve.wallTime.p95Ms",
|
|
1004
|
-
];
|
|
1005
|
-
function readNumericPath(obj, path) {
|
|
1006
|
-
const parts = path.split(".");
|
|
1007
|
-
let cursor = obj;
|
|
1008
|
-
for (const part of parts) {
|
|
1009
|
-
if (typeof cursor !== "object" || cursor === null)
|
|
1010
|
-
return 0;
|
|
1011
|
-
cursor = cursor[part];
|
|
1012
|
-
}
|
|
1013
|
-
return typeof cursor === "number" && Number.isFinite(cursor) ? cursor : 0;
|
|
1014
|
-
}
|
|
1015
|
-
function computeDeltas(first, last) {
|
|
1016
|
-
const out = {};
|
|
1017
|
-
for (const path of INTERESTING_DELTA_PATHS) {
|
|
1018
|
-
const from = readNumericPath(first, path);
|
|
1019
|
-
const to = readNumericPath(last, path);
|
|
1020
|
-
if (from === 0 && to === 0)
|
|
1021
|
-
continue;
|
|
1022
|
-
let pctChange;
|
|
1023
|
-
if (from === 0) {
|
|
1024
|
-
pctChange = to === 0 ? 0 : "+inf";
|
|
1025
|
-
}
|
|
1026
|
-
else {
|
|
1027
|
-
pctChange = Number((((to - from) / from) * 100).toFixed(2));
|
|
1028
|
-
}
|
|
1029
|
-
out[path] = { from, to, pctChange };
|
|
1030
|
-
}
|
|
1031
|
-
return out;
|
|
1032
|
-
}
|
|
1033
|
-
/**
|
|
1034
|
-
* Partition task_history rows into "should have a log" (non-null log_path) and
|
|
1035
|
-
* "log is actually backed". A run counts as backed when logs.db holds rows for
|
|
1036
|
-
* its run_id (#579 — the DB is the primary record); rows written before logs.db
|
|
1037
|
-
* existed fall back to the transitional on-disk file check. `logsDb` may be
|
|
1038
|
-
* undefined when logs.db could not be opened — then only the file check runs.
|
|
1039
|
-
*/
|
|
1040
|
-
function partitionLogBackedRows(taskRows, logsDb) {
|
|
1041
|
-
const withLogs = taskRows.filter((row) => row.log_path !== null);
|
|
1042
|
-
const loggedRunIds = logsDb
|
|
1043
|
-
? getLoggedRunIds(logsDb, withLogs.map((row) => buildTaskRunId(row.task_id, row.started_at)))
|
|
1044
|
-
: new Set();
|
|
1045
|
-
const backed = withLogs.filter((row) => loggedRunIds.has(buildTaskRunId(row.task_id, row.started_at)) ||
|
|
1046
|
-
(row.log_path !== null && fs.existsSync(row.log_path)));
|
|
1047
|
-
return { withLogs, backed };
|
|
1048
|
-
}
|
|
1049
|
-
/** Stage key used for `llm_usage` events recorded outside any stage scope. */
|
|
1050
|
-
const UNATTRIBUTED_STAGE = "unattributed";
|
|
1051
|
-
function emptyLlmUsageStageAggregate() {
|
|
1052
|
-
return {
|
|
1053
|
-
calls: 0,
|
|
1054
|
-
totalDurationMs: 0,
|
|
1055
|
-
promptTokens: 0,
|
|
1056
|
-
completionTokens: 0,
|
|
1057
|
-
totalTokens: 0,
|
|
1058
|
-
reasoningTokens: 0,
|
|
1059
|
-
};
|
|
1060
|
-
}
|
|
1061
|
-
function emptyLlmUsageAggregate() {
|
|
1062
|
-
return { ...emptyLlmUsageStageAggregate(), byStage: {} };
|
|
1063
|
-
}
|
|
1064
|
-
/**
|
|
1065
|
-
* Aggregate `llm_usage` events (#576) into a window total plus a per-stage
|
|
1066
|
-
* breakdown of call count, wall-time, and token usage. Token fields absent from
|
|
1067
|
-
* a best-effort record contribute 0. Calls with no `stage` land under
|
|
1068
|
-
* {@link UNATTRIBUTED_STAGE}.
|
|
1069
|
-
*/
|
|
1070
|
-
function summarizeLlmUsage(events) {
|
|
1071
|
-
const aggregate = emptyLlmUsageAggregate();
|
|
1072
|
-
for (const event of events) {
|
|
1073
|
-
const meta = event.metadata ?? {};
|
|
1074
|
-
const stageKey = typeof meta.stage === "string" && meta.stage ? meta.stage : UNATTRIBUTED_STAGE;
|
|
1075
|
-
let stage = aggregate.byStage[stageKey];
|
|
1076
|
-
if (!stage) {
|
|
1077
|
-
stage = emptyLlmUsageStageAggregate();
|
|
1078
|
-
aggregate.byStage[stageKey] = stage;
|
|
1079
|
-
}
|
|
1080
|
-
const durationMs = toFiniteNumber(meta.durationMs);
|
|
1081
|
-
const promptTokens = toFiniteNumber(meta.promptTokens);
|
|
1082
|
-
const completionTokens = toFiniteNumber(meta.completionTokens);
|
|
1083
|
-
const totalTokens = toFiniteNumber(meta.totalTokens);
|
|
1084
|
-
const reasoningTokens = toFiniteNumber(meta.reasoningTokens);
|
|
1085
|
-
for (const target of [aggregate, stage]) {
|
|
1086
|
-
target.calls += 1;
|
|
1087
|
-
target.totalDurationMs += durationMs;
|
|
1088
|
-
target.promptTokens += promptTokens;
|
|
1089
|
-
target.completionTokens += completionTokens;
|
|
1090
|
-
target.totalTokens += totalTokens;
|
|
1091
|
-
target.reasoningTokens += reasoningTokens;
|
|
1092
|
-
}
|
|
1093
|
-
}
|
|
1094
|
-
return aggregate;
|
|
1095
|
-
}
|
|
1096
|
-
function readLlmUsageAggregate(stateDbPath, since, until) {
|
|
1097
|
-
const events = readEvents({ since, type: LLM_USAGE_EVENT }, { dbPath: stateDbPath }).events.filter((event) => {
|
|
1098
|
-
if (until === undefined)
|
|
1099
|
-
return true;
|
|
1100
|
-
return new Date(event.ts ?? since).getTime() < new Date(until).getTime();
|
|
1101
|
-
});
|
|
1102
|
-
return summarizeLlmUsage(events);
|
|
1103
|
-
}
|
|
1104
|
-
/**
|
|
1105
|
-
* Read the auto-accept gate calibration summary (#612) over `[since, until)`.
|
|
1106
|
-
* Reads every proposal's `gateDecision` from the open state.db, projects the
|
|
1107
|
-
* acted-on (auto-accepted / auto-rejected) decisions into calibration samples
|
|
1108
|
-
* within the window, and aggregates them deterministically.
|
|
1109
|
-
*/
|
|
1110
|
-
function readCalibration(db, since, until) {
|
|
1111
|
-
const decisions = listProposalGateDecisions(db);
|
|
1112
|
-
const samples = gateDecisionsToSamples(decisions, { since, ...(until !== undefined ? { until } : {}) });
|
|
1113
|
-
return summarizeCalibration(samples);
|
|
1114
|
-
}
|
|
1115
|
-
// ── WS-5 Observability helpers ───────────────────────────────────────────────
|
|
1116
|
-
/**
|
|
1117
|
-
* Compute WS-5 denominator-fixed coverage metrics.
|
|
1118
|
-
*
|
|
1119
|
-
* `coverage = accepted_proposals / total_assets` (Part V §3).
|
|
1120
|
-
* The denominator is the TOTAL stash size (not the moving eligible set) so
|
|
1121
|
-
* more-inclusive WS-1 ranking cannot spuriously inflate coverage.
|
|
1122
|
-
* `eligibleFraction = eligible_assets / total_assets` is reported separately.
|
|
1123
|
-
*
|
|
1124
|
-
* Proposals are counted only when their `updatedAt` falls within `[since, until)`
|
|
1125
|
-
* so the rate is genuinely window-scoped (matching the JSDoc on the type).
|
|
1126
|
-
*
|
|
1127
|
-
* @param db - Open state.db connection.
|
|
1128
|
-
* @param totalAssets - Total stash asset count (eligible + derived) from the
|
|
1129
|
-
* most recent run's memorySummary. 0 = denominator unknown, returns NaN rates.
|
|
1130
|
-
* @param eligibleAssets - Eligible (non-derived) asset count from the most recent run.
|
|
1131
|
-
* @param since - Window start (ISO-8601). Proposals accepted before this are excluded.
|
|
1132
|
-
* @param until - Window end (ISO-8601, exclusive). Absent = open-ended (up to now).
|
|
1133
|
-
* @param stashDir - Optional: scope accepted proposals to one stash. Absent = all stashes.
|
|
1134
|
-
*/
|
|
1135
|
-
function computeDenominatorFixedCoverage(db, totalAssets, eligibleAssets, since, until, stashDir) {
|
|
1136
|
-
let acceptedProposals = 0;
|
|
1137
|
-
try {
|
|
1138
|
-
const proposals = listStateProposals(db, {
|
|
1139
|
-
status: "accepted",
|
|
1140
|
-
...(stashDir ? { stashDir } : {}),
|
|
1141
|
-
}).filter((p) => {
|
|
1142
|
-
const updatedAt = p.updatedAt ?? "";
|
|
1143
|
-
if (updatedAt < since)
|
|
1144
|
-
return false;
|
|
1145
|
-
if (until !== undefined && updatedAt >= until)
|
|
1146
|
-
return false;
|
|
1147
|
-
return true;
|
|
1148
|
-
});
|
|
1149
|
-
acceptedProposals = proposals.length;
|
|
1150
|
-
}
|
|
1151
|
-
catch {
|
|
1152
|
-
// Fail open: table may not exist on older installs.
|
|
1153
|
-
}
|
|
1154
|
-
if (totalAssets === 0) {
|
|
1155
|
-
return {
|
|
1156
|
-
rate: Number.NaN,
|
|
1157
|
-
eligibleFraction: Number.NaN,
|
|
1158
|
-
acceptedProposals,
|
|
1159
|
-
totalAssets: 0,
|
|
1160
|
-
};
|
|
1161
|
-
}
|
|
1162
|
-
return {
|
|
1163
|
-
rate: roundRate(acceptedProposals / totalAssets),
|
|
1164
|
-
eligibleFraction: roundRate(eligibleAssets / totalAssets),
|
|
1165
|
-
acceptedProposals,
|
|
1166
|
-
totalAssets,
|
|
1167
|
-
};
|
|
1168
|
-
}
|
|
1169
|
-
/**
|
|
1170
|
-
* Compute WS-5 per-run degradation metrics (Part V §4).
|
|
1171
|
-
*
|
|
1172
|
-
* Health VIEWS only — reads from state.db tables populated by prior improve
|
|
1173
|
-
* runs. Gracefully returns partial data when tables are absent (pre-WS-1/2).
|
|
1174
|
-
*
|
|
1175
|
-
* @param db - Open state.db connection.
|
|
1176
|
-
* @param since - Window start (ISO-8601).
|
|
1177
|
-
* @param until - Window end (ISO-8601).
|
|
1178
|
-
*/
|
|
1179
|
-
function computeDegradationMetrics(db, since, until) {
|
|
1180
|
-
// (a) Corpus diversity — salience rank distribution of the top-100 assets.
|
|
1181
|
-
// We use the Gini coefficient of retrieval_salience scores as an intra-corpus
|
|
1182
|
-
// diversity proxy. A Gini close to 1 = highly concentrated (entrenched top
|
|
1183
|
-
// assets), Gini near 0 = flat/diverse. This is a single-snapshot metric;
|
|
1184
|
-
// consecutive-run centroid distance requires cross-run history not yet stored.
|
|
1185
|
-
let corpusCentroidDistance = Number.NaN;
|
|
1186
|
-
let entrenchmentFlagged;
|
|
1187
|
-
try {
|
|
1188
|
-
const rows = db
|
|
1189
|
-
.prepare(`SELECT retrieval_salience FROM asset_salience
|
|
1190
|
-
ORDER BY rank_score DESC LIMIT 100`)
|
|
1191
|
-
.all();
|
|
1192
|
-
if (rows.length >= 5) {
|
|
1193
|
-
const vals = rows.map((r) => r.retrieval_salience).sort((a, b) => a - b);
|
|
1194
|
-
const n = vals.length;
|
|
1195
|
-
const sumAbsDiff = vals.reduce((acc, xi, i) => {
|
|
1196
|
-
return acc + vals.slice(i + 1).reduce((a, xj) => a + Math.abs(xi - xj), 0);
|
|
1197
|
-
}, 0);
|
|
1198
|
-
const mean = vals.reduce((a, b) => a + b, 0) / n;
|
|
1199
|
-
// Gini = (sum |xi - xj|) / (2 n^2 mean); 0 = perfect equality, 1 = perfect inequality.
|
|
1200
|
-
const gini = mean > 0 ? sumAbsDiff / (2 * n * n * mean) : 0;
|
|
1201
|
-
// Re-express as a diversity proxy in [0,1]: high gini = low diversity.
|
|
1202
|
-
// corpusCentroidDistance approximation: gini is "distance from uniform".
|
|
1203
|
-
// Note: retrieval_salience values are in [0,1], so the max achievable Gini
|
|
1204
|
-
// with this formula is ~0.5 (when one asset dominates and others are near 0).
|
|
1205
|
-
// Threshold: >0.35 flags entrenchment (robustly above the ~0.1 uniform baseline).
|
|
1206
|
-
corpusCentroidDistance = roundRate(gini);
|
|
1207
|
-
entrenchmentFlagged = gini > 0.35;
|
|
1208
|
-
}
|
|
1209
|
-
}
|
|
1210
|
-
catch {
|
|
1211
|
-
// Table not present (pre-WS-1 install) — leave NaN.
|
|
1212
|
-
}
|
|
1213
|
-
// (b) Merge fidelity — fraction of consolidate accepted proposals in the window
|
|
1214
|
-
// whose ref also has a consolidate skip-reason of "contradict_target_missing"
|
|
1215
|
-
// or an event indicating contradiction. Uses the improve_runs result_json
|
|
1216
|
-
// consolidation.contradicted count as a proxy.
|
|
1217
|
-
// Simple implementation: contradictionRate = total_contradicted / max(1, total_processed)
|
|
1218
|
-
// sourced from the window's consolidation envelope.
|
|
1219
|
-
// (The full "merge proposal → later contradiction" correlation requires cross-run
|
|
1220
|
-
// history; this is the available proxy.)
|
|
1221
|
-
let mergeFidelityContradictionRate = 0;
|
|
1222
|
-
try {
|
|
1223
|
-
const runs = queryImproveRuns(db, since, until);
|
|
1224
|
-
let totalContradicted = 0;
|
|
1225
|
-
let totalProcessed = 0;
|
|
1226
|
-
for (const row of runs) {
|
|
1227
|
-
try {
|
|
1228
|
-
const result = JSON.parse(row.result_json);
|
|
1229
|
-
const cons = result.consolidation;
|
|
1230
|
-
if (cons) {
|
|
1231
|
-
totalContradicted += toFiniteNumber(cons.contradicted);
|
|
1232
|
-
totalProcessed += toFiniteNumber(cons.processed);
|
|
1233
|
-
}
|
|
1234
|
-
}
|
|
1235
|
-
catch {
|
|
1236
|
-
// Skip malformed rows.
|
|
1237
|
-
}
|
|
1238
|
-
}
|
|
1239
|
-
if (totalProcessed > 0) {
|
|
1240
|
-
mergeFidelityContradictionRate = roundRate(totalContradicted / totalProcessed);
|
|
1241
|
-
}
|
|
1242
|
-
}
|
|
1243
|
-
catch {
|
|
1244
|
-
// Fail open.
|
|
1245
|
-
}
|
|
1246
|
-
// (c) Generation distribution — fraction of asset_salience rows with
|
|
1247
|
-
// generation >= 2. Generation is NOT currently stored in asset_salience
|
|
1248
|
-
// (it's in frontmatter). We approximate using consecutive_no_ops as a
|
|
1249
|
-
// maturity proxy: assets that have never been no-op'd are "fresh".
|
|
1250
|
-
// TODO(0.10+): store generation in asset_salience for proper tracking.
|
|
1251
|
-
let highGenerationFraction = Number.NaN;
|
|
1252
|
-
try {
|
|
1253
|
-
const genRows = db.prepare("SELECT consecutive_no_ops FROM asset_salience").all();
|
|
1254
|
-
if (genRows.length > 0) {
|
|
1255
|
-
// Use consecutive_no_ops >= 2 as a proxy for "has been through merge cycles".
|
|
1256
|
-
const highGen = genRows.filter((r) => r.consecutive_no_ops >= 2).length;
|
|
1257
|
-
highGenerationFraction = roundRate(highGen / genRows.length);
|
|
1258
|
-
}
|
|
1259
|
-
}
|
|
1260
|
-
catch {
|
|
1261
|
-
// Table not present.
|
|
1262
|
-
}
|
|
1263
|
-
// (d) Oracle spot-check — up to 5 recently accepted proposals in the window.
|
|
1264
|
-
const oracleSpotCheck = [];
|
|
1265
|
-
try {
|
|
1266
|
-
const accepted = listStateProposals(db, { status: "accepted" }).filter((p) => {
|
|
1267
|
-
const updatedAt = p.updatedAt ?? "";
|
|
1268
|
-
return updatedAt >= since && updatedAt < until;
|
|
1269
|
-
});
|
|
1270
|
-
// Sample up to 5: pick evenly spaced (not just the first 5).
|
|
1271
|
-
const step = Math.max(1, Math.floor(accepted.length / 5));
|
|
1272
|
-
for (let i = 0; i < accepted.length && oracleSpotCheck.length < 5; i += step) {
|
|
1273
|
-
const p = accepted[i];
|
|
1274
|
-
if (p) {
|
|
1275
|
-
oracleSpotCheck.push({
|
|
1276
|
-
proposalId: p.id,
|
|
1277
|
-
ref: p.ref,
|
|
1278
|
-
source: p.source ?? "unknown",
|
|
1279
|
-
acceptedAt: p.updatedAt ?? p.createdAt ?? "",
|
|
1280
|
-
});
|
|
1281
|
-
}
|
|
1282
|
-
}
|
|
1283
|
-
}
|
|
1284
|
-
catch {
|
|
1285
|
-
// Fail open.
|
|
1286
|
-
}
|
|
1287
|
-
return {
|
|
1288
|
-
corpusCentroidDistance,
|
|
1289
|
-
entrenchmentFlagged,
|
|
1290
|
-
mergeFidelityContradictionRate,
|
|
1291
|
-
highGenerationFraction,
|
|
1292
|
-
oracleSpotCheck,
|
|
1293
|
-
};
|
|
1294
|
-
}
|
|
1295
|
-
function buildWindowMetrics(db, stateDbPath, since, until, now = () => Date.now(), logsDb) {
|
|
1296
|
-
const taskRows = queryTaskHistory(db, { since }).filter((row) => {
|
|
1297
|
-
const startMs = new Date(row.started_at).getTime();
|
|
1298
|
-
const untilMs = new Date(until).getTime();
|
|
1299
|
-
return !Number.isFinite(untilMs) || startMs < untilMs;
|
|
1300
|
-
});
|
|
1301
|
-
const { withLogs: taskRowsWithLogs, backed: existingLogRows } = partitionLogBackedRows(taskRows, logsDb);
|
|
1302
|
-
const failedTaskRows = taskRows.filter((row) => row.status === "failed");
|
|
1303
|
-
const activeRows = taskRows.filter((row) => row.status === "active");
|
|
1304
|
-
const stuckActiveRuns = activeRows.filter((row) => now() - new Date(row.started_at).getTime() > ACTIVE_RUN_WARN_MS).length;
|
|
1305
|
-
const promptRows = taskRows.filter((row) => row.target_kind === "prompt");
|
|
1306
|
-
const promptFailures = promptRows.filter((row) => {
|
|
1307
|
-
const detail = parseTaskMetadata(row).detail;
|
|
1308
|
-
return typeof detail?.reason === "string" && detail.reason.length > 0;
|
|
1309
|
-
});
|
|
1310
|
-
const logBackingRate = taskRowsWithLogs.length === 0 ? 1 : existingLogRows.length / taskRowsWithLogs.length;
|
|
1311
|
-
const taskFailRate = taskRows.length === 0 ? 0 : failedTaskRows.length / taskRows.length;
|
|
1312
|
-
const agentFailureRate = promptRows.length === 0 ? 0 : promptFailures.length / promptRows.length;
|
|
1313
|
-
const improveInvoked = readEvents({ since, type: "improve_invoked" }, { dbPath: stateDbPath }).events.filter((event) => new Date(event.ts ?? since).getTime() < new Date(until).getTime()).length;
|
|
1314
|
-
const improveCompletedEvents = readEvents({ since, type: IMPROVE_COMPLETED_EVENT }, { dbPath: stateDbPath }).events.filter((event) => new Date(event.ts ?? since).getTime() < new Date(until).getTime());
|
|
1315
|
-
const improveSkippedEvents = readEvents({ since, type: "improve_skipped" }, { dbPath: stateDbPath }).events.filter((event) => new Date(event.ts ?? since).getTime() < new Date(until).getTime());
|
|
1316
|
-
const eventsMetrics = summarizeImproveCompleted(improveCompletedEvents);
|
|
1317
|
-
const { metrics: improveSummary, runCount } = summarizeImproveRuns(db, since, until);
|
|
1318
|
-
improveSummary.invoked = improveInvoked;
|
|
1319
|
-
improveSummary.completed = eventsMetrics.completed;
|
|
1320
|
-
const skipSummary = buildImproveSkipSummary(improveSkippedEvents);
|
|
1321
|
-
improveSummary.skipped = skipSummary.skipped;
|
|
1322
|
-
improveSummary.skipReasons = skipSummary.skipReasons;
|
|
1323
|
-
// Preserve the per-phase aggregation computed by summarizeImproveRuns and
|
|
1324
|
-
// derive top-level wall times from the same improve-runs window so counts
|
|
1325
|
-
// and percentiles stay aligned with per-run reporting.
|
|
1326
|
-
const perRunSummaries = buildPerRunSummaries(db, since, until);
|
|
1327
|
-
const wallTimes = perRunSummaries.map((run) => run.wallTimeMs).filter((ms) => Number.isFinite(ms) && ms > 0);
|
|
1328
|
-
improveSummary.wallTime = computeWallTimeStats(wallTimes, improveSummary.wallTime.byPhase);
|
|
1329
|
-
improveSummary.calibration = readCalibration(db, since, until);
|
|
1330
|
-
// WS-5: Compute denominator-fixed coverage from the most recent run's
|
|
1331
|
-
// memorySummary (totalAssets = eligible + derived — the fixed denominator).
|
|
1332
|
-
const totalAssets = improveSummary.memorySummary.eligible + improveSummary.memorySummary.derived;
|
|
1333
|
-
improveSummary.coverage = computeDenominatorFixedCoverage(db, totalAssets, improveSummary.memorySummary.eligible, since, until);
|
|
1334
|
-
// WS-5: Compute per-run degradation metrics (corpus diversity, merge fidelity,
|
|
1335
|
-
// generation distribution, oracle spot-check). Health VIEWS only.
|
|
1336
|
-
const degradation = computeDegradationMetrics(db, since, until);
|
|
1337
|
-
if (degradation) {
|
|
1338
|
-
improveSummary.degradation = degradation;
|
|
1339
|
-
}
|
|
1340
|
-
const metrics = {
|
|
1341
|
-
taskFailRate: roundRate(taskFailRate),
|
|
1342
|
-
agentFailureRate: roundRate(agentFailureRate),
|
|
1343
|
-
stuckActiveRuns,
|
|
1344
|
-
logBackingRate: roundRate(logBackingRate),
|
|
1345
|
-
probeRoundTripMs: null,
|
|
1346
|
-
llmUsage: readLlmUsageAggregate(stateDbPath, since, until),
|
|
1347
|
-
};
|
|
1348
|
-
return { improve: improveSummary, metrics, runs: runCount };
|
|
1349
|
-
}
|
|
1350
40
|
function validateAkmHealthOptions(options) {
|
|
1351
41
|
if (options.groupBy !== undefined && options.groupBy !== "run") {
|
|
1352
42
|
throw new UsageError(`Invalid value for --group-by: ${options.groupBy}. Expected: run`, "INVALID_FLAG_VALUE");
|
|
@@ -1447,77 +137,8 @@ export function akmHealth(options = {}) {
|
|
|
1447
137
|
if (degradationMain) {
|
|
1448
138
|
improveSummary.degradation = degradationMain;
|
|
1449
139
|
}
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
// rich in-session signal is no longer deferrable.
|
|
1453
|
-
const proxyInvertedEvents = readEvents({ since, type: "outcome_proxy_inverted" }, { dbPath: stateDbPath }).events;
|
|
1454
|
-
if (proxyInvertedEvents.length > 0) {
|
|
1455
|
-
const lastEvent = proxyInvertedEvents[proxyInvertedEvents.length - 1];
|
|
1456
|
-
const correlation = typeof lastEvent.metadata?.correlation === "number" ? lastEvent.metadata.correlation.toFixed(3) : "unknown";
|
|
1457
|
-
advisories.push({
|
|
1458
|
-
name: "outcome-proxy-adequacy",
|
|
1459
|
-
status: "warn",
|
|
1460
|
-
kind: "deterministic",
|
|
1461
|
-
confidence: "high",
|
|
1462
|
-
message: `WS-2 outcome proxy inverted (${proxyInvertedEvents.length} event(s) in window). ` +
|
|
1463
|
-
`corr(outcome_score, accepted_change_rate) = ${correlation} < −0.3. ` +
|
|
1464
|
-
"Popular assets are also the most-needing-improvement assets — " +
|
|
1465
|
-
"the retrieval-based proxy is inverted. " +
|
|
1466
|
-
"The 0.10+ rich in-session outcome signal is no longer deferrable. See plan §WS-2.",
|
|
1467
|
-
});
|
|
1468
|
-
}
|
|
1469
|
-
// R5 collapse/churn detector: surface any collapse_detector_alert events
|
|
1470
|
-
// in the health window, plus the latest cycle row's headline numbers so
|
|
1471
|
-
// the operator can act without opening the DB. `unknown` when the detector
|
|
1472
|
-
// has never produced a cycle row (no consolidate/recombine work yet).
|
|
1473
|
-
try {
|
|
1474
|
-
// Reuse the already-open state.db handle (readEvents supports a
|
|
1475
|
-
// borrowed connection) — no extra open/migrate/close per health call.
|
|
1476
|
-
const collapseAlertEvents = readEvents({ since, type: "collapse_detector_alert" }, { dbPath: stateDbPath, db }).events;
|
|
1477
|
-
const latestCycle = getLatestCycleMetrics(db);
|
|
1478
|
-
const cycleSummary = latestCycle
|
|
1479
|
-
? `Latest cycle (${latestCycle.ts}, ${latestCycle.pass}): mean canary recall ${latestCycle.mean_recall.toFixed(3)}, ` +
|
|
1480
|
-
`distinct-content ratio ${latestCycle.distinct_content_ratio.toFixed(3)}, ` +
|
|
1481
|
-
`${latestCycle.accepted_actions} accepted action(s).`
|
|
1482
|
-
: "";
|
|
1483
|
-
if (collapseAlertEvents.length > 0) {
|
|
1484
|
-
const kinds = [...new Set(collapseAlertEvents.map((e) => String(e.metadata?.kind ?? "unknown")))];
|
|
1485
|
-
const collapseKinds = kinds.filter((k) => k.startsWith("collapse"));
|
|
1486
|
-
advisories.push({
|
|
1487
|
-
name: "collapse-churn-detector",
|
|
1488
|
-
status: "warn",
|
|
1489
|
-
kind: "deterministic",
|
|
1490
|
-
// Collapse kinds are measured, not inferred; churn/merge-floor
|
|
1491
|
-
// volume thresholds are still being tuned (design doc §7).
|
|
1492
|
-
confidence: collapseKinds.length > 0 ? "high" : "medium",
|
|
1493
|
-
message: `R5 detector fired ${collapseAlertEvents.length} alert(s) in window (kinds: ${kinds.join(", ")}). ` +
|
|
1494
|
-
`${cycleSummary} See docs/design/improve-collapse-churn-detector-design.md §6.3 runbook queries.`,
|
|
1495
|
-
});
|
|
1496
|
-
}
|
|
1497
|
-
else if (latestCycle) {
|
|
1498
|
-
advisories.push({
|
|
1499
|
-
name: "collapse-churn-detector",
|
|
1500
|
-
status: "pass",
|
|
1501
|
-
kind: "deterministic",
|
|
1502
|
-
confidence: "high",
|
|
1503
|
-
message: `No collapse/churn alerts in window. ${cycleSummary}`,
|
|
1504
|
-
});
|
|
1505
|
-
}
|
|
1506
|
-
else {
|
|
1507
|
-
advisories.push({
|
|
1508
|
-
name: "collapse-churn-detector",
|
|
1509
|
-
status: "unknown",
|
|
1510
|
-
kind: "deterministic",
|
|
1511
|
-
confidence: "high",
|
|
1512
|
-
message: "No detector cycle rows yet — the collapse/churn detector runs only on improve cycles " +
|
|
1513
|
-
"where consolidate/recombine did work (synthesis lanes may be idle).",
|
|
1514
|
-
});
|
|
1515
|
-
}
|
|
1516
|
-
}
|
|
1517
|
-
catch {
|
|
1518
|
-
// Table may predate migration 016 in odd mixed-version setups — advisory
|
|
1519
|
-
// is best-effort and must never fail the health command.
|
|
1520
|
-
}
|
|
140
|
+
improveSummary.enrichmentMinting = computeEnrichmentMintingRollup(db, since, until);
|
|
141
|
+
advisories.push(...collectImproveAdvisories(db, stateDbPath, since, improveSummary));
|
|
1521
142
|
let sessionLogEntries = [];
|
|
1522
143
|
try {
|
|
1523
144
|
const sinceDays = Math.max(0, Math.ceil((now() - new Date(since).getTime()) / (24 * 60 * 60 * 1000)));
|
|
@@ -1648,103 +269,5 @@ export function akmHealth(options = {}) {
|
|
|
1648
269
|
}
|
|
1649
270
|
}
|
|
1650
271
|
}
|
|
1651
|
-
//
|
|
1652
|
-
|
|
1653
|
-
return s.length >= width ? s : s + " ".repeat(width - s.length);
|
|
1654
|
-
}
|
|
1655
|
-
function renderTable(headers, rows) {
|
|
1656
|
-
const widths = headers.map((h, i) => Math.max(h.length, ...rows.map((r) => (r[i] ?? "").length)));
|
|
1657
|
-
const lines = [];
|
|
1658
|
-
lines.push(headers.map((h, i) => padRight(h, widths[i] ?? 0)).join(" "));
|
|
1659
|
-
for (const row of rows) {
|
|
1660
|
-
lines.push(row.map((cell, i) => padRight(cell ?? "", widths[i] ?? 0)).join(" "));
|
|
1661
|
-
}
|
|
1662
|
-
return lines.join("\n");
|
|
1663
|
-
}
|
|
1664
|
-
/**
|
|
1665
|
-
* Render `--detail per-run` rows as a TSV-ish aligned table. The column
|
|
1666
|
-
* shape was originally inherited from the retired
|
|
1667
|
-
* `scripts/improve-stats/runs-detail` bash helper; keep the same shape
|
|
1668
|
-
* so operator muscle memory carries over.
|
|
1669
|
-
*
|
|
1670
|
-
* Columns: ts | ok | actions | refl_ok/fail/cd/skip |
|
|
1671
|
-
* distill_q/llm-fail/qrej/cfg/skip | cons_proc/promo/merge/del |
|
|
1672
|
-
* mem_cons/written/skip | graph_f/e/r | orphans | lint_f/fl
|
|
1673
|
-
*/
|
|
1674
|
-
export function renderRunsDetailMd(runs) {
|
|
1675
|
-
const headers = [
|
|
1676
|
-
"ts",
|
|
1677
|
-
"ok",
|
|
1678
|
-
"actions",
|
|
1679
|
-
"refl_ok/fail/cd/skip",
|
|
1680
|
-
"distill_q/llm-fail/qrej/cfg/skip",
|
|
1681
|
-
"cons_proc/promo/merge/del",
|
|
1682
|
-
"mem_cons/written/skip",
|
|
1683
|
-
"graph_f/e/r",
|
|
1684
|
-
"orphans",
|
|
1685
|
-
"lint_f/fl",
|
|
1686
|
-
];
|
|
1687
|
-
const rows = runs.map((r) => {
|
|
1688
|
-
const totalActions = r.actions.reflect.ok +
|
|
1689
|
-
r.actions.reflect.failed +
|
|
1690
|
-
r.actions.reflect.cooldown +
|
|
1691
|
-
r.actions.reflect.skipped +
|
|
1692
|
-
r.actions.distill.queued +
|
|
1693
|
-
r.actions.distill.llmFailed +
|
|
1694
|
-
r.actions.distill.qualityRejected +
|
|
1695
|
-
r.actions.distill.configDisabled +
|
|
1696
|
-
r.actions.distill.skipped +
|
|
1697
|
-
r.actions.memoryPrune +
|
|
1698
|
-
r.actions.memoryInference +
|
|
1699
|
-
r.actions.graphExtraction +
|
|
1700
|
-
r.actions.error;
|
|
1701
|
-
return [
|
|
1702
|
-
r.startedAt,
|
|
1703
|
-
String(r.ok),
|
|
1704
|
-
String(totalActions),
|
|
1705
|
-
`${r.actions.reflect.ok}/${r.actions.reflect.failed}/${r.actions.reflect.cooldown}/${r.actions.reflect.skipped}`,
|
|
1706
|
-
`${r.actions.distill.queued}/${r.actions.distill.llmFailed}/${r.actions.distill.qualityRejected}/${r.actions.distill.configDisabled}/${r.actions.distill.skipped}`,
|
|
1707
|
-
`${r.consolidation.processed}/${r.consolidation.promoted}/${r.consolidation.merged}/${r.consolidation.deleted}`,
|
|
1708
|
-
`${r.memoryInference.considered}/${r.memoryInference.written}/${r.memoryInference.skippedNoFacts}`,
|
|
1709
|
-
`${r.graphExtraction.extractedFiles}/${r.graphExtraction.entities}/${r.graphExtraction.relations}`,
|
|
1710
|
-
String(r.orphansPurged),
|
|
1711
|
-
`${r.lintFixed}/${r.lintFlagged}`,
|
|
1712
|
-
];
|
|
1713
|
-
});
|
|
1714
|
-
return renderTable(headers, rows);
|
|
1715
|
-
}
|
|
1716
|
-
/**
|
|
1717
|
-
* Render a window-compare comparison as a side-by-side metric table with a
|
|
1718
|
-
* delta column. Bad-direction deltas (e.g. +pct on failed counts) get a `!`
|
|
1719
|
-
* marker prefix.
|
|
1720
|
-
*/
|
|
1721
|
-
export function renderWindowCompareMd(windows, deltas) {
|
|
1722
|
-
if (windows.length === 0)
|
|
1723
|
-
return "";
|
|
1724
|
-
const headers = ["metric", ...windows.map((w) => w.name), "delta"];
|
|
1725
|
-
const badIfPositive = new Set([
|
|
1726
|
-
"improve.actions.reflect.failed",
|
|
1727
|
-
"improve.actions.distill.llmFailed",
|
|
1728
|
-
"improve.graphExtraction.failures",
|
|
1729
|
-
"improve.graphExtraction.nonArrayBatchFailures",
|
|
1730
|
-
"improve.wallTime.medianMs",
|
|
1731
|
-
"improve.wallTime.p95Ms",
|
|
1732
|
-
"improve.memoryInference.skippedNoFacts",
|
|
1733
|
-
]);
|
|
1734
|
-
const rows = [];
|
|
1735
|
-
for (const path of INTERESTING_DELTA_PATHS) {
|
|
1736
|
-
const values = windows.map((w) => String(readNumericPath(w, path)));
|
|
1737
|
-
const delta = deltas?.[path];
|
|
1738
|
-
let deltaStr = "—";
|
|
1739
|
-
if (delta) {
|
|
1740
|
-
const pct = delta.pctChange;
|
|
1741
|
-
const num = typeof pct === "number" ? pct : pct;
|
|
1742
|
-
const sign = typeof num === "number" && num > 0 ? "+" : "";
|
|
1743
|
-
const formatted = typeof num === "number" ? `${sign}${num}%` : String(num);
|
|
1744
|
-
const marker = badIfPositive.has(path) && typeof num === "number" && num > 0 ? "!" : "";
|
|
1745
|
-
deltaStr = marker + formatted;
|
|
1746
|
-
}
|
|
1747
|
-
rows.push([path, ...values, deltaStr]);
|
|
1748
|
-
}
|
|
1749
|
-
return renderTable(headers, rows);
|
|
1750
|
-
}
|
|
272
|
+
// Markdown renderers (renderRunsDetailMd / renderWindowCompareMd) live in
|
|
273
|
+
// health/md-report.ts, mirroring the HTML extraction in health/html-report.ts.
|