akm-cli 0.9.0-beta.54 → 0.9.0-beta.55
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +5 -3
- package/dist/commands/agent/contribute-cli.js +2 -3
- package/dist/commands/env/env-cli.js +187 -202
- package/dist/commands/env/secret-cli.js +109 -121
- package/dist/commands/feedback-cli.js +152 -155
- package/dist/commands/health/advisories.js +151 -0
- package/dist/commands/health/improve-metrics.js +754 -0
- package/dist/commands/health/llm-usage.js +65 -0
- package/dist/commands/health/md-report.js +103 -0
- package/dist/commands/health/metrics.js +278 -0
- package/dist/commands/health/task-runs.js +135 -0
- package/dist/commands/health/types.js +18 -0
- package/dist/commands/health/windows.js +196 -0
- package/dist/commands/health.js +14 -1624
- package/dist/commands/improve/anti-collapse.js +170 -0
- package/dist/commands/improve/collapse-detector.js +3 -2
- package/dist/commands/improve/consolidate.js +636 -633
- package/dist/commands/improve/dedup.js +1 -1
- package/dist/commands/improve/distill/content-repair.js +202 -0
- package/dist/commands/improve/distill/promote-memory.js +228 -0
- package/dist/commands/improve/distill/quality-gate.js +233 -0
- package/dist/commands/improve/distill-guards.js +127 -0
- package/dist/commands/improve/distill.js +49 -575
- package/dist/commands/improve/extract-cli.js +74 -76
- package/dist/commands/improve/extract.js +6 -4
- package/dist/commands/improve/hot-probation.js +45 -0
- package/dist/commands/improve/improve-auto-accept.js +3 -2
- package/dist/commands/improve/improve-cli.js +14 -13
- package/dist/commands/improve/improve-result-file.js +2 -1
- package/dist/commands/improve/improve.js +6 -5
- package/dist/commands/improve/loop-stages.js +19 -21
- package/dist/commands/improve/preparation.js +4 -2
- package/dist/commands/improve/procedural.js +10 -31
- package/dist/commands/improve/recombine.js +19 -43
- package/dist/commands/improve/reflect.js +1 -1
- package/dist/commands/improve/schema-similarity-gate.js +168 -0
- package/dist/commands/improve/shared.js +48 -0
- package/dist/commands/observability-cli.js +4 -4
- package/dist/commands/proposal/drain-policies.js +2 -2
- package/dist/commands/proposal/drain.js +1 -1
- package/dist/commands/proposal/legacy-import.js +115 -0
- package/dist/commands/proposal/proposal-cli.js +3 -3
- package/dist/commands/proposal/proposal.js +2 -1
- package/dist/commands/proposal/propose.js +1 -1
- package/dist/commands/proposal/repository.js +829 -0
- package/dist/commands/proposal/validators/proposals.js +5 -920
- package/dist/commands/read/remember-cli.js +132 -137
- package/dist/commands/read/search-cli.js +1 -1
- package/dist/commands/registry-cli.js +76 -87
- package/dist/commands/sources/add-cli.js +90 -94
- package/dist/commands/sources/history.js +1 -1
- package/dist/commands/sources/schema-repair.js +1 -1
- package/dist/commands/sources/sources-cli.js +3 -3
- package/dist/commands/sources/stash-cli.js +1 -1
- package/dist/commands/tasks/tasks-cli.js +1 -2
- package/dist/commands/wiki-cli.js +2 -3
- package/dist/core/common.js +3 -3
- package/dist/core/config/config-schema.js +6 -0
- package/dist/core/deep-merge.js +38 -0
- package/dist/core/events.js +2 -1
- package/dist/core/logs-db.js +8 -13
- package/dist/core/paths.js +14 -14
- package/dist/core/state-db.js +13 -1140
- package/dist/indexer/db/db.js +66 -709
- package/dist/indexer/db/entry-mapper.js +41 -0
- package/dist/indexer/db/schema.js +516 -0
- package/dist/indexer/feedback/utility-policy.js +85 -0
- package/dist/indexer/graph/graph-extraction.js +2 -1
- package/dist/indexer/index-writer-lock.js +9 -0
- package/dist/indexer/indexer.js +78 -23
- package/dist/indexer/search/fts-query.js +51 -0
- package/dist/integrations/agent/spawn.js +15 -66
- package/dist/output/text/helpers.js +13 -0
- package/dist/scripts/migrate-storage.js +6891 -7436
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +44 -43
- package/dist/setup/legacy-config.js +106 -0
- package/dist/setup/prompt.js +57 -0
- package/dist/setup/providers.js +14 -0
- package/dist/setup/semantic-assets.js +124 -0
- package/dist/setup/setup.js +24 -1607
- package/dist/setup/steps/connection.js +734 -0
- package/dist/setup/steps/output.js +31 -0
- package/dist/setup/steps/platforms.js +124 -0
- package/dist/setup/steps/semantic.js +27 -0
- package/dist/setup/steps/sources.js +222 -0
- package/dist/setup/steps/stashdir.js +42 -0
- package/dist/setup/steps/tasks.js +152 -0
- package/dist/storage/repositories/canaries-repository.js +107 -0
- package/dist/storage/repositories/consolidation-repository.js +38 -0
- package/dist/storage/repositories/embeddings-repository.js +72 -0
- package/dist/storage/repositories/events-repository.js +187 -0
- package/dist/storage/repositories/extract-sessions-repository.js +96 -0
- package/dist/storage/repositories/improve-runs-repository.js +130 -0
- package/dist/storage/repositories/index-db.js +4 -7
- package/dist/storage/repositories/proposals-repository.js +220 -0
- package/dist/storage/repositories/recombine-repository.js +213 -0
- package/dist/storage/repositories/task-history-repository.js +93 -0
- package/dist/storage/sqlite-pragmas.js +3 -3
- package/dist/tasks/runner.js +2 -1
- package/package.json +1 -1
- package/dist/commands/improve/homeostatic.js +0 -497
|
@@ -0,0 +1,754 @@
|
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
|
+
import { queryImproveRuns } from "../../storage/repositories/improve-runs-repository.js";
|
|
5
|
+
import { summarizeCalibration } from "../improve/calibration.js";
|
|
6
|
+
export function roundRate(value) {
|
|
7
|
+
return Number(value.toFixed(4));
|
|
8
|
+
}
|
|
9
|
+
export function parseTaskMetadata(row) {
|
|
10
|
+
try {
|
|
11
|
+
return JSON.parse(row.metadata_json);
|
|
12
|
+
}
|
|
13
|
+
catch {
|
|
14
|
+
return {};
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
function createUnknownImproveMetrics() {
|
|
18
|
+
return {
|
|
19
|
+
invoked: 0,
|
|
20
|
+
completed: 0,
|
|
21
|
+
skipped: 0,
|
|
22
|
+
skipReasons: {},
|
|
23
|
+
plannedRefs: 0,
|
|
24
|
+
profileFilteredRefs: 0,
|
|
25
|
+
actions: {
|
|
26
|
+
reflect: { ok: 0, failed: 0, cooldown: 0, skipped: 0, guardRejected: 0, skippedByReason: {} },
|
|
27
|
+
distill: {
|
|
28
|
+
queued: 0,
|
|
29
|
+
llmFailed: 0,
|
|
30
|
+
qualityRejected: 0,
|
|
31
|
+
judgeRejected: 0,
|
|
32
|
+
validatorRejected: 0,
|
|
33
|
+
configDisabled: 0,
|
|
34
|
+
skipped: 0,
|
|
35
|
+
skippedByReason: {},
|
|
36
|
+
deferred: 0,
|
|
37
|
+
deferredByReason: {},
|
|
38
|
+
},
|
|
39
|
+
memoryPrune: 0,
|
|
40
|
+
memoryInference: 0,
|
|
41
|
+
graphExtraction: 0,
|
|
42
|
+
error: 0,
|
|
43
|
+
},
|
|
44
|
+
autoAccept: { promoted: 0, validationFailed: 0 },
|
|
45
|
+
calibration: summarizeCalibration([]),
|
|
46
|
+
reflectsWithErrorContext: 0,
|
|
47
|
+
coverageGapCount: 0,
|
|
48
|
+
evalCasesWritten: 0,
|
|
49
|
+
deadUrlCount: 0,
|
|
50
|
+
memorySummary: { eligible: 0, derived: 0 },
|
|
51
|
+
memoryCleanup: {
|
|
52
|
+
pruneCandidates: 0,
|
|
53
|
+
contradictionCandidates: 0,
|
|
54
|
+
beliefStateTransitions: 0,
|
|
55
|
+
consolidationCandidates: 0,
|
|
56
|
+
archived: 0,
|
|
57
|
+
warnings: 0,
|
|
58
|
+
},
|
|
59
|
+
consolidation: {
|
|
60
|
+
ran: false,
|
|
61
|
+
processed: 0,
|
|
62
|
+
promoted: 0,
|
|
63
|
+
merged: 0,
|
|
64
|
+
deleted: 0,
|
|
65
|
+
contradicted: 0,
|
|
66
|
+
judgedNoAction: 0,
|
|
67
|
+
mergedSecondaries: 0,
|
|
68
|
+
failedChunkMemories: 0,
|
|
69
|
+
skipReasons: {},
|
|
70
|
+
failedChunks: 0,
|
|
71
|
+
totalChunks: 0,
|
|
72
|
+
durationMs: 0,
|
|
73
|
+
},
|
|
74
|
+
memoryInference: {
|
|
75
|
+
ran: false,
|
|
76
|
+
considered: 0,
|
|
77
|
+
cacheHits: 0,
|
|
78
|
+
retryAttempts: 0,
|
|
79
|
+
freshAttempts: 0,
|
|
80
|
+
splitParents: 0,
|
|
81
|
+
written: 0,
|
|
82
|
+
skippedNoFacts: 0,
|
|
83
|
+
skippedChildExists: 0,
|
|
84
|
+
skippedAborted: 0,
|
|
85
|
+
unaccounted: 0,
|
|
86
|
+
htmlErrorCount: 0,
|
|
87
|
+
yieldEligibleRuns: 0,
|
|
88
|
+
yieldEligibleConsidered: 0,
|
|
89
|
+
yieldEligibleWritten: 0,
|
|
90
|
+
yieldRate: 0,
|
|
91
|
+
durationMs: 0,
|
|
92
|
+
writes: 0,
|
|
93
|
+
},
|
|
94
|
+
graphExtraction: {
|
|
95
|
+
ran: false,
|
|
96
|
+
extractedFiles: 0,
|
|
97
|
+
entities: 0,
|
|
98
|
+
relations: 0,
|
|
99
|
+
cacheHits: 0,
|
|
100
|
+
cacheMisses: 0,
|
|
101
|
+
cacheHitRate: 0,
|
|
102
|
+
truncations: 0,
|
|
103
|
+
failures: 0,
|
|
104
|
+
htmlErrors: 0,
|
|
105
|
+
retryAttempts: 0,
|
|
106
|
+
nonArrayBatchFailures: 0,
|
|
107
|
+
durationMs: 0,
|
|
108
|
+
},
|
|
109
|
+
sessionExtraction: {
|
|
110
|
+
ran: false,
|
|
111
|
+
sessionsScanned: 0,
|
|
112
|
+
sessionsExtracted: 0,
|
|
113
|
+
sessionsSkipped: 0,
|
|
114
|
+
proposalsCreated: 0,
|
|
115
|
+
warnings: 0,
|
|
116
|
+
durationMs: 0,
|
|
117
|
+
},
|
|
118
|
+
wallTime: {
|
|
119
|
+
count: 0,
|
|
120
|
+
medianMs: 0,
|
|
121
|
+
p95Ms: 0,
|
|
122
|
+
minMs: 0,
|
|
123
|
+
maxMs: 0,
|
|
124
|
+
byPhase: {
|
|
125
|
+
consolidation: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
|
|
126
|
+
memoryInference: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
|
|
127
|
+
graphExtraction: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
|
|
128
|
+
},
|
|
129
|
+
},
|
|
130
|
+
perfTelemetry: {
|
|
131
|
+
dedupPoolSize: 0,
|
|
132
|
+
llmPoolSize: 0,
|
|
133
|
+
judgedCacheSkipped: 0,
|
|
134
|
+
embedMs: 0,
|
|
135
|
+
embedCacheHits: 0,
|
|
136
|
+
embedCacheMisses: 0,
|
|
137
|
+
overBudgetRuns: 0,
|
|
138
|
+
runsWithTelemetry: 0,
|
|
139
|
+
},
|
|
140
|
+
coverage: {
|
|
141
|
+
rate: Number.NaN,
|
|
142
|
+
eligibleFraction: Number.NaN,
|
|
143
|
+
acceptedProposals: 0,
|
|
144
|
+
distinctRefs: 0,
|
|
145
|
+
churnRatio: Number.NaN,
|
|
146
|
+
totalAssets: 0,
|
|
147
|
+
},
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
export function toFiniteNumber(value) {
|
|
151
|
+
if (typeof value === "number" && Number.isFinite(value))
|
|
152
|
+
return value;
|
|
153
|
+
if (typeof value === "string" && value.trim()) {
|
|
154
|
+
const parsed = Number(value);
|
|
155
|
+
if (Number.isFinite(parsed))
|
|
156
|
+
return parsed;
|
|
157
|
+
}
|
|
158
|
+
return 0;
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Event-derived metrics. Only `completed` and skipReasons/invoked are sourced
|
|
162
|
+
* from events in v2 — the richer fields come from {@link summarizeImproveRuns}.
|
|
163
|
+
* The function still receives `improve_completed` events so that the completed
|
|
164
|
+
* count reflects the canonical event stream (it lines up 1:1 with improve_runs
|
|
165
|
+
* rows in practice, but the events table remains the system-of-record for the
|
|
166
|
+
* existence of a run).
|
|
167
|
+
*/
|
|
168
|
+
export function summarizeImproveCompleted(events) {
|
|
169
|
+
const metrics = createUnknownImproveMetrics();
|
|
170
|
+
metrics.completed = events.length;
|
|
171
|
+
return metrics;
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Project a single `improve_runs.result_json` envelope into an accumulator-shaped
|
|
175
|
+
* ImproveHealthMetrics. The aggregator merges these per-row metrics into one
|
|
176
|
+
* window-level metric.
|
|
177
|
+
*/
|
|
178
|
+
function projectRunMetrics(result) {
|
|
179
|
+
const metrics = createUnknownImproveMetrics();
|
|
180
|
+
// plannedRefs (array of {ref, reason})
|
|
181
|
+
const plannedRefs = result.plannedRefs;
|
|
182
|
+
if (Array.isArray(plannedRefs))
|
|
183
|
+
metrics.plannedRefs += plannedRefs.length;
|
|
184
|
+
// profileFilteredRefs (array of {ref, reason}) — 2026-05-27: pre-filter
|
|
185
|
+
// bucket from `collectEligibleRefs` so the metric reflects work the
|
|
186
|
+
// planner dropped before signal-delta / per-pass dispatch.
|
|
187
|
+
const profileFilteredRefs = result.profileFilteredRefs;
|
|
188
|
+
if (Array.isArray(profileFilteredRefs))
|
|
189
|
+
metrics.profileFilteredRefs += profileFilteredRefs.length;
|
|
190
|
+
// actions: split reflect / distill by outcome, count others.
|
|
191
|
+
const actions = result.actions;
|
|
192
|
+
if (Array.isArray(actions)) {
|
|
193
|
+
for (const action of actions) {
|
|
194
|
+
const mode = typeof action.mode === "string" ? action.mode : "";
|
|
195
|
+
switch (mode) {
|
|
196
|
+
case "reflect":
|
|
197
|
+
metrics.actions.reflect.ok += 1;
|
|
198
|
+
break;
|
|
199
|
+
case "reflect-failed":
|
|
200
|
+
metrics.actions.reflect.failed += 1;
|
|
201
|
+
break;
|
|
202
|
+
case "reflect-cooldown":
|
|
203
|
+
metrics.actions.reflect.cooldown += 1;
|
|
204
|
+
break;
|
|
205
|
+
case "reflect-skipped": {
|
|
206
|
+
metrics.actions.reflect.skipped += 1;
|
|
207
|
+
const r = action.result;
|
|
208
|
+
const reason = typeof r?.reason === "string" && r.reason.trim() ? r.reason : "unknown";
|
|
209
|
+
metrics.actions.reflect.skippedByReason[reason] = (metrics.actions.reflect.skippedByReason[reason] ?? 0) + 1;
|
|
210
|
+
break;
|
|
211
|
+
}
|
|
212
|
+
case "reflect-guard-rejected":
|
|
213
|
+
metrics.actions.reflect.guardRejected += 1;
|
|
214
|
+
break;
|
|
215
|
+
case "distill": {
|
|
216
|
+
const r = action.result;
|
|
217
|
+
const outcome = typeof r?.outcome === "string" ? r.outcome : "";
|
|
218
|
+
switch (outcome) {
|
|
219
|
+
case "queued":
|
|
220
|
+
metrics.actions.distill.queued += 1;
|
|
221
|
+
break;
|
|
222
|
+
case "llm_failed":
|
|
223
|
+
metrics.actions.distill.llmFailed += 1;
|
|
224
|
+
break;
|
|
225
|
+
case "quality_rejected":
|
|
226
|
+
case "review_needed":
|
|
227
|
+
metrics.actions.distill.qualityRejected += 1;
|
|
228
|
+
metrics.actions.distill.judgeRejected += 1;
|
|
229
|
+
break;
|
|
230
|
+
case "validation_failed":
|
|
231
|
+
metrics.actions.distill.qualityRejected += 1;
|
|
232
|
+
metrics.actions.distill.validatorRejected += 1;
|
|
233
|
+
break;
|
|
234
|
+
case "config_disabled":
|
|
235
|
+
metrics.actions.distill.configDisabled += 1;
|
|
236
|
+
break;
|
|
237
|
+
case "skipped": {
|
|
238
|
+
// Previously dropped on the floor. The four sub-paths that emit
|
|
239
|
+
// `outcome: "skipped"` (see distill.ts:893, 1024, 1120, 1576):
|
|
240
|
+
// - recursive_lesson_input (type guard refused a lesson input)
|
|
241
|
+
// - conflict_noop (LLM resolved destination conflict as NOOP)
|
|
242
|
+
// - proposal-skipped cooldown / dedup at persistence
|
|
243
|
+
// 465 events/7d in the user's live stack. The result message
|
|
244
|
+
// typically encodes the reason; we also accept an explicit
|
|
245
|
+
// `skipReason` field when downstream code sets it.
|
|
246
|
+
metrics.actions.distill.deferred += 1;
|
|
247
|
+
const explicitReason = typeof r?.skipReason === "string" ? r.skipReason : undefined;
|
|
248
|
+
const msg = typeof r?.message === "string" ? r.message : "";
|
|
249
|
+
let reason = explicitReason ?? "unknown";
|
|
250
|
+
if (!explicitReason) {
|
|
251
|
+
if (/lesson inputs/i.test(msg))
|
|
252
|
+
reason = "recursive_lesson_input";
|
|
253
|
+
else if (/NOOP/.test(msg))
|
|
254
|
+
reason = "conflict_noop";
|
|
255
|
+
else if (/cooldown/i.test(msg))
|
|
256
|
+
reason = "proposal_cooldown";
|
|
257
|
+
else if (/content[_ ]?hash/i.test(msg))
|
|
258
|
+
reason = "content_hash_match";
|
|
259
|
+
}
|
|
260
|
+
metrics.actions.distill.deferredByReason[reason] =
|
|
261
|
+
(metrics.actions.distill.deferredByReason[reason] ?? 0) + 1;
|
|
262
|
+
break;
|
|
263
|
+
}
|
|
264
|
+
default:
|
|
265
|
+
break;
|
|
266
|
+
}
|
|
267
|
+
break;
|
|
268
|
+
}
|
|
269
|
+
case "distill-skipped": {
|
|
270
|
+
metrics.actions.distill.skipped += 1;
|
|
271
|
+
const r = action.result;
|
|
272
|
+
const reason = typeof r?.reason === "string" && r.reason.trim() ? r.reason : "unknown";
|
|
273
|
+
metrics.actions.distill.skippedByReason[reason] = (metrics.actions.distill.skippedByReason[reason] ?? 0) + 1;
|
|
274
|
+
break;
|
|
275
|
+
}
|
|
276
|
+
case "memory-prune":
|
|
277
|
+
metrics.actions.memoryPrune += 1;
|
|
278
|
+
break;
|
|
279
|
+
case "memory-inference":
|
|
280
|
+
metrics.actions.memoryInference += 1;
|
|
281
|
+
break;
|
|
282
|
+
case "graph-extraction":
|
|
283
|
+
metrics.actions.graphExtraction += 1;
|
|
284
|
+
break;
|
|
285
|
+
case "error":
|
|
286
|
+
metrics.actions.error += 1;
|
|
287
|
+
break;
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
metrics.autoAccept.promoted += toFiniteNumber(result.gateAutoAcceptedCount);
|
|
292
|
+
metrics.autoAccept.validationFailed += toFiniteNumber(result.gateAutoAcceptFailedCount);
|
|
293
|
+
metrics.reflectsWithErrorContext += toFiniteNumber(result.reflectsWithErrorContext);
|
|
294
|
+
if (Array.isArray(result.coverageGaps))
|
|
295
|
+
metrics.coverageGapCount += result.coverageGaps.length;
|
|
296
|
+
metrics.evalCasesWritten += toFiniteNumber(result.evalCasesWritten);
|
|
297
|
+
if (Array.isArray(result.deadUrls))
|
|
298
|
+
metrics.deadUrlCount += result.deadUrls.length;
|
|
299
|
+
const memorySummary = result.memorySummary;
|
|
300
|
+
if (memorySummary) {
|
|
301
|
+
metrics.memorySummary.eligible += toFiniteNumber(memorySummary.eligible);
|
|
302
|
+
metrics.memorySummary.derived += toFiniteNumber(memorySummary.derived);
|
|
303
|
+
}
|
|
304
|
+
const memoryCleanup = result.memoryCleanup;
|
|
305
|
+
if (memoryCleanup) {
|
|
306
|
+
if (Array.isArray(memoryCleanup.pruneCandidates))
|
|
307
|
+
metrics.memoryCleanup.pruneCandidates += memoryCleanup.pruneCandidates.length;
|
|
308
|
+
if (Array.isArray(memoryCleanup.contradictionCandidates))
|
|
309
|
+
metrics.memoryCleanup.contradictionCandidates += memoryCleanup.contradictionCandidates.length;
|
|
310
|
+
if (Array.isArray(memoryCleanup.beliefStateTransitions))
|
|
311
|
+
metrics.memoryCleanup.beliefStateTransitions += memoryCleanup.beliefStateTransitions.length;
|
|
312
|
+
if (Array.isArray(memoryCleanup.consolidationCandidates))
|
|
313
|
+
metrics.memoryCleanup.consolidationCandidates += memoryCleanup.consolidationCandidates.length;
|
|
314
|
+
if (Array.isArray(memoryCleanup.archived))
|
|
315
|
+
metrics.memoryCleanup.archived += memoryCleanup.archived.length;
|
|
316
|
+
if (Array.isArray(memoryCleanup.warnings))
|
|
317
|
+
metrics.memoryCleanup.warnings += memoryCleanup.warnings.length;
|
|
318
|
+
}
|
|
319
|
+
const consolidation = result.consolidation;
|
|
320
|
+
if (consolidation) {
|
|
321
|
+
metrics.consolidation.processed += toFiniteNumber(consolidation.processed);
|
|
322
|
+
metrics.consolidation.merged += toFiniteNumber(consolidation.merged);
|
|
323
|
+
metrics.consolidation.deleted += toFiniteNumber(consolidation.deleted);
|
|
324
|
+
metrics.consolidation.contradicted += toFiniteNumber(consolidation.contradicted);
|
|
325
|
+
if (Array.isArray(consolidation.promoted))
|
|
326
|
+
metrics.consolidation.promoted += consolidation.promoted.length;
|
|
327
|
+
metrics.consolidation.failedChunks += toFiniteNumber(consolidation.failedChunks);
|
|
328
|
+
metrics.consolidation.totalChunks += toFiniteNumber(consolidation.totalChunks);
|
|
329
|
+
metrics.consolidation.durationMs += toFiniteNumber(consolidation.durationMs);
|
|
330
|
+
metrics.consolidation.judgedNoAction += toFiniteNumber(consolidation.judgedNoAction);
|
|
331
|
+
metrics.consolidation.mergedSecondaries += toFiniteNumber(consolidation.mergedSecondaries);
|
|
332
|
+
metrics.consolidation.failedChunkMemories += toFiniteNumber(consolidation.failedChunkMemories);
|
|
333
|
+
// Structured emitter (new on this branch): consolidate.ts now pushes
|
|
334
|
+
// per-ref grouped `{ref, skips: [{op, reason}]}` entries to `skipReasons`
|
|
335
|
+
// for every deterministic post-LLM rejection. Each ref appears once but
|
|
336
|
+
// may carry multiple skips; aggregate every reason. Pre-fix envelopes have
|
|
337
|
+
// neither field, so be defensive.
|
|
338
|
+
const skipReasons = consolidation.skipReasons;
|
|
339
|
+
if (Array.isArray(skipReasons)) {
|
|
340
|
+
for (const entry of skipReasons) {
|
|
341
|
+
if (!entry || typeof entry !== "object")
|
|
342
|
+
continue;
|
|
343
|
+
const skips = entry.skips;
|
|
344
|
+
if (!Array.isArray(skips))
|
|
345
|
+
continue;
|
|
346
|
+
for (const skip of skips) {
|
|
347
|
+
if (!skip || typeof skip !== "object")
|
|
348
|
+
continue;
|
|
349
|
+
const reason = skip.reason;
|
|
350
|
+
if (typeof reason !== "string" || !reason.trim())
|
|
351
|
+
continue;
|
|
352
|
+
metrics.consolidation.skipReasons[reason] = (metrics.consolidation.skipReasons[reason] ?? 0) + 1;
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
// WS-5: extract perf telemetry from the consolidation envelope.
|
|
357
|
+
// Pre-WS-5 envelopes lack `perfTelemetry`; be defensive.
|
|
358
|
+
const perf = consolidation.perfTelemetry;
|
|
359
|
+
if (perf) {
|
|
360
|
+
metrics.perfTelemetry.runsWithTelemetry += 1;
|
|
361
|
+
metrics.perfTelemetry.dedupPoolSize += toFiniteNumber(perf.dedupPoolSize);
|
|
362
|
+
metrics.perfTelemetry.llmPoolSize += toFiniteNumber(perf.llmPoolSize);
|
|
363
|
+
metrics.perfTelemetry.judgedCacheSkipped += toFiniteNumber(perf.judgedCacheSkipped);
|
|
364
|
+
metrics.perfTelemetry.embedMs += toFiniteNumber(perf.embedMs);
|
|
365
|
+
metrics.perfTelemetry.embedCacheHits += toFiniteNumber(perf.embedCacheHits);
|
|
366
|
+
metrics.perfTelemetry.embedCacheMisses += toFiniteNumber(perf.embedCacheMisses);
|
|
367
|
+
const budgetFrac = toFiniteNumber(perf.estimatedBudgetFractionUsed);
|
|
368
|
+
if (budgetFrac > 1.0)
|
|
369
|
+
metrics.perfTelemetry.overBudgetRuns += 1;
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
const memoryInference = result.memoryInference;
|
|
373
|
+
if (memoryInference) {
|
|
374
|
+
const considered = toFiniteNumber(memoryInference.considered);
|
|
375
|
+
const writtenFacts = toFiniteNumber(memoryInference.writtenFacts);
|
|
376
|
+
metrics.memoryInference.considered += considered;
|
|
377
|
+
metrics.memoryInference.cacheHits += toFiniteNumber(memoryInference.cacheHits);
|
|
378
|
+
metrics.memoryInference.retryAttempts += toFiniteNumber(memoryInference.retryAttempts);
|
|
379
|
+
metrics.memoryInference.splitParents += toFiniteNumber(memoryInference.splitParents);
|
|
380
|
+
metrics.memoryInference.written += writtenFacts;
|
|
381
|
+
metrics.memoryInference.skippedNoFacts += toFiniteNumber(memoryInference.skippedNoFacts);
|
|
382
|
+
metrics.memoryInference.skippedChildExists += toFiniteNumber(memoryInference.skippedChildExists);
|
|
383
|
+
metrics.memoryInference.skippedAborted += toFiniteNumber(memoryInference.skippedAborted);
|
|
384
|
+
metrics.memoryInference.unaccounted += toFiniteNumber(memoryInference.unaccounted);
|
|
385
|
+
metrics.memoryInference.htmlErrorCount += toFiniteNumber(memoryInference.htmlErrorCount);
|
|
386
|
+
// Yield-rate gating: pre-cache-feature envelopes lack the `cacheHits`
|
|
387
|
+
// field entirely. Treating their `considered` as freshAttempts (since
|
|
388
|
+
// cacheHits=0) is mathematically tempting but operationally wrong —
|
|
389
|
+
// historical runs with the legacy schema have no cache instrumentation
|
|
390
|
+
// and the SUM dragged the reported rate to ~14% in local data. Only
|
|
391
|
+
// contribute to the yield aggregate when the envelope actually carries
|
|
392
|
+
// the field. See investigation 2026-05-26.
|
|
393
|
+
if (Object.hasOwn(memoryInference, "cacheHits")) {
|
|
394
|
+
metrics.memoryInference.yieldEligibleRuns += 1;
|
|
395
|
+
metrics.memoryInference.yieldEligibleConsidered += considered;
|
|
396
|
+
metrics.memoryInference.yieldEligibleWritten += writtenFacts;
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
metrics.memoryInference.durationMs += toFiniteNumber(result.memoryInferenceDurationMs);
|
|
400
|
+
const graphExtraction = result.graphExtraction;
|
|
401
|
+
if (graphExtraction) {
|
|
402
|
+
const quality = graphExtraction.quality;
|
|
403
|
+
if (quality)
|
|
404
|
+
metrics.graphExtraction.extractedFiles += toFiniteNumber(quality.extractedFiles);
|
|
405
|
+
metrics.graphExtraction.entities += toFiniteNumber(graphExtraction.totalEntities);
|
|
406
|
+
metrics.graphExtraction.relations += toFiniteNumber(graphExtraction.totalRelations);
|
|
407
|
+
const telemetry = graphExtraction.telemetry;
|
|
408
|
+
if (telemetry) {
|
|
409
|
+
metrics.graphExtraction.cacheHits += toFiniteNumber(telemetry.cacheHits);
|
|
410
|
+
metrics.graphExtraction.cacheMisses += toFiniteNumber(telemetry.cacheMisses);
|
|
411
|
+
metrics.graphExtraction.truncations += toFiniteNumber(telemetry.truncationCount);
|
|
412
|
+
metrics.graphExtraction.failures += toFiniteNumber(telemetry.failureCount);
|
|
413
|
+
metrics.graphExtraction.htmlErrors += toFiniteNumber(telemetry.htmlErrorCount);
|
|
414
|
+
metrics.graphExtraction.retryAttempts += toFiniteNumber(telemetry.retryAttempts);
|
|
415
|
+
metrics.graphExtraction.nonArrayBatchFailures += toFiniteNumber(telemetry.nonArrayBatchFailures);
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
metrics.graphExtraction.durationMs += toFiniteNumber(result.graphExtractionDurationMs);
|
|
419
|
+
if (Array.isArray(result.extract)) {
|
|
420
|
+
for (const e of result.extract) {
|
|
421
|
+
metrics.sessionExtraction.sessionsScanned += toFiniteNumber(e.sessionsProcessed);
|
|
422
|
+
metrics.sessionExtraction.sessionsSkipped += toFiniteNumber(e.sessionsSkipped);
|
|
423
|
+
if (Array.isArray(e.sessions)) {
|
|
424
|
+
metrics.sessionExtraction.sessionsExtracted += e.sessions.filter((s) => Array.isArray(s.proposalIds) && s.proposalIds.length > 0).length;
|
|
425
|
+
}
|
|
426
|
+
metrics.sessionExtraction.proposalsCreated += Array.isArray(e.proposals) ? e.proposals.length : 0;
|
|
427
|
+
metrics.sessionExtraction.warnings += Array.isArray(e.warnings) ? e.warnings.length : 0;
|
|
428
|
+
metrics.sessionExtraction.durationMs += toFiniteNumber(e.durationMs);
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
return metrics;
|
|
432
|
+
}
|
|
433
|
+
/**
|
|
434
|
+
* Finalize derived flags and rates on an accumulator. Used both for the
|
|
435
|
+
* window-level aggregate and for each per-run row in --detail per-run mode
|
|
436
|
+
* so the single-row metrics still expose `ran` / `yieldRate` / `cacheHitRate`.
|
|
437
|
+
*/
|
|
438
|
+
function finalizeImproveMetrics(metrics) {
|
|
439
|
+
metrics.consolidation.ran =
|
|
440
|
+
metrics.consolidation.processed > 0 ||
|
|
441
|
+
metrics.consolidation.durationMs > 0 ||
|
|
442
|
+
metrics.consolidation.promoted > 0 ||
|
|
443
|
+
metrics.consolidation.merged > 0 ||
|
|
444
|
+
metrics.consolidation.deleted > 0 ||
|
|
445
|
+
metrics.consolidation.contradicted > 0 ||
|
|
446
|
+
metrics.consolidation.totalChunks > 0;
|
|
447
|
+
metrics.memoryInference.ran =
|
|
448
|
+
metrics.memoryInference.considered > 0 ||
|
|
449
|
+
metrics.memoryInference.written > 0 ||
|
|
450
|
+
metrics.memoryInference.durationMs > 0;
|
|
451
|
+
metrics.memoryInference.writes = metrics.memoryInference.written;
|
|
452
|
+
// Yield denominator excludes cache hits AND legacy (pre-cacheHits-field)
|
|
453
|
+
// envelopes. Only runs whose envelope carries a `cacheHits` field
|
|
454
|
+
// contribute to freshAttempts/yieldRate; legacy rows remain in
|
|
455
|
+
// `considered`/`written` for totals but are excluded from the rate so
|
|
456
|
+
// they cannot drag it down. See ImproveHealthMetrics.memoryInference
|
|
457
|
+
// jsdoc for the rationale.
|
|
458
|
+
metrics.memoryInference.freshAttempts = Math.max(0, metrics.memoryInference.yieldEligibleConsidered -
|
|
459
|
+
metrics.memoryInference.cacheHits -
|
|
460
|
+
metrics.memoryInference.skippedAborted);
|
|
461
|
+
metrics.memoryInference.yieldRate =
|
|
462
|
+
metrics.memoryInference.freshAttempts > 0
|
|
463
|
+
? roundRate(metrics.memoryInference.yieldEligibleWritten / metrics.memoryInference.freshAttempts)
|
|
464
|
+
: 0;
|
|
465
|
+
metrics.graphExtraction.ran =
|
|
466
|
+
metrics.graphExtraction.extractedFiles > 0 ||
|
|
467
|
+
metrics.graphExtraction.entities > 0 ||
|
|
468
|
+
metrics.graphExtraction.durationMs > 0;
|
|
469
|
+
const cacheTotal = metrics.graphExtraction.cacheHits + metrics.graphExtraction.cacheMisses;
|
|
470
|
+
metrics.graphExtraction.cacheHitRate = cacheTotal > 0 ? roundRate(metrics.graphExtraction.cacheHits / cacheTotal) : 0;
|
|
471
|
+
metrics.sessionExtraction.ran =
|
|
472
|
+
metrics.sessionExtraction.sessionsScanned > 0 ||
|
|
473
|
+
metrics.sessionExtraction.proposalsCreated > 0 ||
|
|
474
|
+
metrics.sessionExtraction.durationMs > 0;
|
|
475
|
+
}
|
|
476
|
+
/**
|
|
477
|
+
* Merge per-row metrics from `src` into accumulator `dst`. All numeric fields
|
|
478
|
+
* are additive; cumulative rates are recomputed by finalizeImproveMetrics.
|
|
479
|
+
*/
|
|
480
|
+
function mergeImproveMetrics(dst, src) {
|
|
481
|
+
dst.plannedRefs += src.plannedRefs;
|
|
482
|
+
// profileFilteredRefs is the count of refs the planner drops up-front for the
|
|
483
|
+
// active profile — recomputed against the (stable) stash every run, so it is a
|
|
484
|
+
// snapshot, NOT a per-run increment. Summing it re-counts the same refs each
|
|
485
|
+
// run (the ~2.4M bug). Set from the most recent run in summarizeImproveRuns.
|
|
486
|
+
dst.actions.reflect.ok += src.actions.reflect.ok;
|
|
487
|
+
dst.actions.reflect.failed += src.actions.reflect.failed;
|
|
488
|
+
dst.actions.reflect.cooldown += src.actions.reflect.cooldown;
|
|
489
|
+
dst.actions.reflect.skipped += src.actions.reflect.skipped;
|
|
490
|
+
dst.actions.reflect.guardRejected += src.actions.reflect.guardRejected;
|
|
491
|
+
for (const [reason, count] of Object.entries(src.actions.reflect.skippedByReason)) {
|
|
492
|
+
dst.actions.reflect.skippedByReason[reason] = (dst.actions.reflect.skippedByReason[reason] ?? 0) + count;
|
|
493
|
+
}
|
|
494
|
+
dst.actions.distill.queued += src.actions.distill.queued;
|
|
495
|
+
dst.actions.distill.llmFailed += src.actions.distill.llmFailed;
|
|
496
|
+
dst.actions.distill.qualityRejected += src.actions.distill.qualityRejected;
|
|
497
|
+
dst.actions.distill.judgeRejected += src.actions.distill.judgeRejected;
|
|
498
|
+
dst.actions.distill.validatorRejected += src.actions.distill.validatorRejected;
|
|
499
|
+
dst.actions.distill.configDisabled += src.actions.distill.configDisabled;
|
|
500
|
+
dst.actions.distill.skipped += src.actions.distill.skipped;
|
|
501
|
+
for (const [reason, count] of Object.entries(src.actions.distill.skippedByReason)) {
|
|
502
|
+
dst.actions.distill.skippedByReason[reason] = (dst.actions.distill.skippedByReason[reason] ?? 0) + count;
|
|
503
|
+
}
|
|
504
|
+
dst.actions.distill.deferred += src.actions.distill.deferred;
|
|
505
|
+
for (const [reason, count] of Object.entries(src.actions.distill.deferredByReason)) {
|
|
506
|
+
dst.actions.distill.deferredByReason[reason] = (dst.actions.distill.deferredByReason[reason] ?? 0) + count;
|
|
507
|
+
}
|
|
508
|
+
dst.actions.memoryPrune += src.actions.memoryPrune;
|
|
509
|
+
dst.actions.memoryInference += src.actions.memoryInference;
|
|
510
|
+
dst.actions.graphExtraction += src.actions.graphExtraction;
|
|
511
|
+
dst.actions.error += src.actions.error;
|
|
512
|
+
dst.autoAccept.promoted += src.autoAccept.promoted;
|
|
513
|
+
dst.autoAccept.validationFailed += src.autoAccept.validationFailed;
|
|
514
|
+
dst.reflectsWithErrorContext += src.reflectsWithErrorContext;
|
|
515
|
+
dst.coverageGapCount += src.coverageGapCount;
|
|
516
|
+
dst.evalCasesWritten += src.evalCasesWritten;
|
|
517
|
+
dst.deadUrlCount += src.deadUrlCount;
|
|
518
|
+
// NOTE: memorySummary (derived/eligible) is a WHOLE-STASH snapshot recorded on
|
|
519
|
+
// every run, NOT a per-run increment — summing it across the window inflates
|
|
520
|
+
// it ~N× (the 1.2M-eligible bug). It is set from the most recent run in
|
|
521
|
+
// summarizeImproveRuns instead, so it is intentionally not merged here.
|
|
522
|
+
dst.memoryCleanup.pruneCandidates += src.memoryCleanup.pruneCandidates;
|
|
523
|
+
dst.memoryCleanup.contradictionCandidates += src.memoryCleanup.contradictionCandidates;
|
|
524
|
+
dst.memoryCleanup.beliefStateTransitions += src.memoryCleanup.beliefStateTransitions;
|
|
525
|
+
dst.memoryCleanup.consolidationCandidates += src.memoryCleanup.consolidationCandidates;
|
|
526
|
+
dst.memoryCleanup.archived += src.memoryCleanup.archived;
|
|
527
|
+
dst.memoryCleanup.warnings += src.memoryCleanup.warnings;
|
|
528
|
+
dst.consolidation.processed += src.consolidation.processed;
|
|
529
|
+
dst.consolidation.promoted += src.consolidation.promoted;
|
|
530
|
+
dst.consolidation.merged += src.consolidation.merged;
|
|
531
|
+
dst.consolidation.deleted += src.consolidation.deleted;
|
|
532
|
+
dst.consolidation.contradicted += src.consolidation.contradicted;
|
|
533
|
+
dst.consolidation.failedChunks += src.consolidation.failedChunks;
|
|
534
|
+
dst.consolidation.totalChunks += src.consolidation.totalChunks;
|
|
535
|
+
dst.consolidation.durationMs += src.consolidation.durationMs;
|
|
536
|
+
dst.consolidation.judgedNoAction += src.consolidation.judgedNoAction;
|
|
537
|
+
dst.consolidation.mergedSecondaries += src.consolidation.mergedSecondaries;
|
|
538
|
+
dst.consolidation.failedChunkMemories += src.consolidation.failedChunkMemories;
|
|
539
|
+
for (const [reason, count] of Object.entries(src.consolidation.skipReasons)) {
|
|
540
|
+
dst.consolidation.skipReasons[reason] = (dst.consolidation.skipReasons[reason] ?? 0) + count;
|
|
541
|
+
}
|
|
542
|
+
dst.memoryInference.considered += src.memoryInference.considered;
|
|
543
|
+
dst.memoryInference.cacheHits += src.memoryInference.cacheHits;
|
|
544
|
+
dst.memoryInference.splitParents += src.memoryInference.splitParents;
|
|
545
|
+
dst.memoryInference.written += src.memoryInference.written;
|
|
546
|
+
dst.memoryInference.skippedNoFacts += src.memoryInference.skippedNoFacts;
|
|
547
|
+
dst.memoryInference.skippedChildExists += src.memoryInference.skippedChildExists;
|
|
548
|
+
dst.memoryInference.skippedAborted += src.memoryInference.skippedAborted;
|
|
549
|
+
dst.memoryInference.unaccounted += src.memoryInference.unaccounted;
|
|
550
|
+
dst.memoryInference.htmlErrorCount += src.memoryInference.htmlErrorCount;
|
|
551
|
+
dst.memoryInference.yieldEligibleRuns += src.memoryInference.yieldEligibleRuns;
|
|
552
|
+
dst.memoryInference.yieldEligibleConsidered += src.memoryInference.yieldEligibleConsidered;
|
|
553
|
+
dst.memoryInference.yieldEligibleWritten += src.memoryInference.yieldEligibleWritten;
|
|
554
|
+
dst.memoryInference.durationMs += src.memoryInference.durationMs;
|
|
555
|
+
dst.graphExtraction.extractedFiles += src.graphExtraction.extractedFiles;
|
|
556
|
+
dst.graphExtraction.entities += src.graphExtraction.entities;
|
|
557
|
+
dst.graphExtraction.relations += src.graphExtraction.relations;
|
|
558
|
+
dst.graphExtraction.cacheHits += src.graphExtraction.cacheHits;
|
|
559
|
+
dst.graphExtraction.cacheMisses += src.graphExtraction.cacheMisses;
|
|
560
|
+
dst.graphExtraction.truncations += src.graphExtraction.truncations;
|
|
561
|
+
dst.graphExtraction.failures += src.graphExtraction.failures;
|
|
562
|
+
dst.graphExtraction.htmlErrors += src.graphExtraction.htmlErrors;
|
|
563
|
+
dst.graphExtraction.nonArrayBatchFailures += src.graphExtraction.nonArrayBatchFailures;
|
|
564
|
+
dst.graphExtraction.durationMs += src.graphExtraction.durationMs;
|
|
565
|
+
dst.sessionExtraction.sessionsScanned += src.sessionExtraction.sessionsScanned;
|
|
566
|
+
dst.sessionExtraction.sessionsExtracted += src.sessionExtraction.sessionsExtracted;
|
|
567
|
+
dst.sessionExtraction.sessionsSkipped += src.sessionExtraction.sessionsSkipped;
|
|
568
|
+
dst.sessionExtraction.proposalsCreated += src.sessionExtraction.proposalsCreated;
|
|
569
|
+
dst.sessionExtraction.warnings += src.sessionExtraction.warnings;
|
|
570
|
+
dst.sessionExtraction.durationMs += src.sessionExtraction.durationMs;
|
|
571
|
+
// WS-5: merge perf telemetry (additive sums).
|
|
572
|
+
dst.perfTelemetry.dedupPoolSize += src.perfTelemetry.dedupPoolSize;
|
|
573
|
+
dst.perfTelemetry.llmPoolSize += src.perfTelemetry.llmPoolSize;
|
|
574
|
+
dst.perfTelemetry.judgedCacheSkipped += src.perfTelemetry.judgedCacheSkipped;
|
|
575
|
+
dst.perfTelemetry.embedMs += src.perfTelemetry.embedMs;
|
|
576
|
+
dst.perfTelemetry.embedCacheHits += src.perfTelemetry.embedCacheHits;
|
|
577
|
+
dst.perfTelemetry.embedCacheMisses += src.perfTelemetry.embedCacheMisses;
|
|
578
|
+
dst.perfTelemetry.overBudgetRuns += src.perfTelemetry.overBudgetRuns;
|
|
579
|
+
dst.perfTelemetry.runsWithTelemetry += src.perfTelemetry.runsWithTelemetry;
|
|
580
|
+
// coverage: acceptedProposals is additive; totalAssets is a snapshot (like memorySummary).
|
|
581
|
+
// totalAssets is intentionally NOT merged here — set from the most recent run in summarizeImproveRuns.
|
|
582
|
+
dst.coverage.acceptedProposals += src.coverage.acceptedProposals;
|
|
583
|
+
}
|
|
584
|
+
export function summarizeImproveRuns(db, since, until) {
|
|
585
|
+
const accum = createUnknownImproveMetrics();
|
|
586
|
+
const rows = queryImproveRuns(db, since, until);
|
|
587
|
+
// Per-phase wall-time samples. Each entry is one envelope's durationMs for
|
|
588
|
+
// that phase. Phases that did not run on a given envelope are simply
|
|
589
|
+
// omitted (NOT counted as 0) so the median/p95 reflect actual phase work.
|
|
590
|
+
const phaseDurations = {
|
|
591
|
+
consolidation: [],
|
|
592
|
+
memoryInference: [],
|
|
593
|
+
graphExtraction: [],
|
|
594
|
+
};
|
|
595
|
+
// memorySummary is a whole-stash snapshot per run, so the window value is the
|
|
596
|
+
// MOST RECENT run's snapshot (current state) — not a sum across runs.
|
|
597
|
+
let latestStartMs = Number.NEGATIVE_INFINITY;
|
|
598
|
+
let latestMemorySummary;
|
|
599
|
+
let latestProfileFilteredRefs = 0;
|
|
600
|
+
for (const row of rows) {
|
|
601
|
+
let result;
|
|
602
|
+
try {
|
|
603
|
+
result = JSON.parse(row.result_json);
|
|
604
|
+
}
|
|
605
|
+
catch {
|
|
606
|
+
continue;
|
|
607
|
+
}
|
|
608
|
+
const perRow = projectRunMetrics(result);
|
|
609
|
+
mergeImproveMetrics(accum, perRow);
|
|
610
|
+
const startMs = new Date(row.started_at).getTime();
|
|
611
|
+
if (Number.isFinite(startMs) && startMs >= latestStartMs) {
|
|
612
|
+
latestStartMs = startMs;
|
|
613
|
+
latestMemorySummary = perRow.memorySummary;
|
|
614
|
+
latestProfileFilteredRefs = perRow.profileFilteredRefs;
|
|
615
|
+
}
|
|
616
|
+
// Collect per-phase durations directly off the envelope. consolidation's
|
|
617
|
+
// duration lives inside the sub-object; memoryInference and graphExtraction
|
|
618
|
+
// expose top-level *DurationMs keys (`memoryInferenceDurationMs`,
|
|
619
|
+
// `graphExtractionDurationMs`) when they actually ran on that envelope.
|
|
620
|
+
const consol = result.consolidation;
|
|
621
|
+
const consolMs = toFiniteNumber(consol?.durationMs);
|
|
622
|
+
if (consolMs > 0)
|
|
623
|
+
phaseDurations.consolidation.push(consolMs);
|
|
624
|
+
const memMs = toFiniteNumber(result.memoryInferenceDurationMs);
|
|
625
|
+
if (memMs > 0)
|
|
626
|
+
phaseDurations.memoryInference.push(memMs);
|
|
627
|
+
const graphMs = toFiniteNumber(result.graphExtractionDurationMs);
|
|
628
|
+
if (graphMs > 0)
|
|
629
|
+
phaseDurations.graphExtraction.push(graphMs);
|
|
630
|
+
}
|
|
631
|
+
finalizeImproveMetrics(accum);
|
|
632
|
+
if (latestMemorySummary)
|
|
633
|
+
accum.memorySummary = latestMemorySummary;
|
|
634
|
+
accum.profileFilteredRefs = latestProfileFilteredRefs;
|
|
635
|
+
accum.wallTime.byPhase = {
|
|
636
|
+
consolidation: summarizePhaseDurations(phaseDurations.consolidation),
|
|
637
|
+
memoryInference: summarizePhaseDurations(phaseDurations.memoryInference),
|
|
638
|
+
graphExtraction: summarizePhaseDurations(phaseDurations.graphExtraction),
|
|
639
|
+
};
|
|
640
|
+
return { metrics: accum, runCount: rows.length };
|
|
641
|
+
}
|
|
642
|
+
/**
|
|
643
|
+
* Aggregate a list of per-envelope phase durations into the
|
|
644
|
+
* `wallTime.byPhase.*` shape: count, total, median, p95. Median/p95 use the
|
|
645
|
+
* same nearest-rank picker as the top-level wallTime stats so the two are
|
|
646
|
+
* comparable.
|
|
647
|
+
*/
|
|
648
|
+
export function summarizePhaseDurations(samples) {
|
|
649
|
+
if (samples.length === 0)
|
|
650
|
+
return { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 };
|
|
651
|
+
const sorted = [...samples].sort((a, b) => a - b);
|
|
652
|
+
const pick = (q) => sorted[Math.min(sorted.length - 1, Math.floor(q * sorted.length))] ?? 0;
|
|
653
|
+
const totalMs = sorted.reduce((acc, n) => acc + n, 0);
|
|
654
|
+
return {
|
|
655
|
+
count: sorted.length,
|
|
656
|
+
totalMs,
|
|
657
|
+
medianMs: pick(0.5),
|
|
658
|
+
p95Ms: pick(0.95),
|
|
659
|
+
};
|
|
660
|
+
}
|
|
661
|
+
/**
|
|
662
|
+
* Project an improve_runs row + wall-time lookup into a single ImproveRunSummary.
|
|
663
|
+
* Used by `akm health --detail per-run`.
|
|
664
|
+
*/
|
|
665
|
+
export function projectImproveRunSummary(row, wallTimeMs, taskId) {
|
|
666
|
+
let result = {};
|
|
667
|
+
try {
|
|
668
|
+
result = JSON.parse(row.result_json);
|
|
669
|
+
}
|
|
670
|
+
catch {
|
|
671
|
+
// fall through with empty result so per-stage rollups are zeros
|
|
672
|
+
}
|
|
673
|
+
const perRow = projectRunMetrics(result);
|
|
674
|
+
finalizeImproveMetrics(perRow);
|
|
675
|
+
const orphansPurged = toFiniteNumber(result.orphansPurged);
|
|
676
|
+
const lintSummary = result.lintSummary;
|
|
677
|
+
const lintFixed = lintSummary ? toFiniteNumber(lintSummary.fixed) : 0;
|
|
678
|
+
const lintFlagged = lintSummary ? toFiniteNumber(lintSummary.flagged) : 0;
|
|
679
|
+
return {
|
|
680
|
+
id: row.id,
|
|
681
|
+
startedAt: row.started_at,
|
|
682
|
+
completedAt: row.completed_at,
|
|
683
|
+
wallTimeMs,
|
|
684
|
+
ok: row.ok === 1,
|
|
685
|
+
scope: {
|
|
686
|
+
mode: row.scope_mode,
|
|
687
|
+
...(row.scope_value ? { value: row.scope_value } : {}),
|
|
688
|
+
},
|
|
689
|
+
taskId,
|
|
690
|
+
actions: perRow.actions,
|
|
691
|
+
memorySummary: perRow.memorySummary,
|
|
692
|
+
memoryCleanup: perRow.memoryCleanup,
|
|
693
|
+
consolidation: perRow.consolidation,
|
|
694
|
+
memoryInference: perRow.memoryInference,
|
|
695
|
+
graphExtraction: perRow.graphExtraction,
|
|
696
|
+
reflectsWithErrorContext: perRow.reflectsWithErrorContext,
|
|
697
|
+
evalCasesWritten: perRow.evalCasesWritten,
|
|
698
|
+
orphansPurged,
|
|
699
|
+
lintFixed,
|
|
700
|
+
lintFlagged,
|
|
701
|
+
};
|
|
702
|
+
}
|
|
703
|
+
function emptyPhaseStats() {
|
|
704
|
+
return {
|
|
705
|
+
consolidation: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
|
|
706
|
+
memoryInference: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
|
|
707
|
+
graphExtraction: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
|
|
708
|
+
};
|
|
709
|
+
}
|
|
710
|
+
export function computeWallTimeStats(durationsMs, byPhase) {
|
|
711
|
+
const phase = byPhase ?? emptyPhaseStats();
|
|
712
|
+
if (durationsMs.length === 0)
|
|
713
|
+
return { count: 0, medianMs: 0, p95Ms: 0, minMs: 0, maxMs: 0, byPhase: phase };
|
|
714
|
+
const sorted = [...durationsMs].sort((a, b) => a - b);
|
|
715
|
+
const pick = (q) => sorted[Math.min(sorted.length - 1, Math.floor(q * sorted.length))] ?? 0;
|
|
716
|
+
return {
|
|
717
|
+
count: sorted.length,
|
|
718
|
+
medianMs: pick(0.5),
|
|
719
|
+
p95Ms: pick(0.95),
|
|
720
|
+
minMs: sorted[0] ?? 0,
|
|
721
|
+
maxMs: sorted[sorted.length - 1] ?? 0,
|
|
722
|
+
byPhase: phase,
|
|
723
|
+
};
|
|
724
|
+
}
|
|
725
|
+
export function buildImproveSkipSummary(events) {
|
|
726
|
+
// Two kinds of skip events:
|
|
727
|
+
// - Per-occurrence (no `count`): one event per skipped ref → SUM is correct.
|
|
728
|
+
// - Aggregated snapshot (carries `count`): a single per-run event whose count
|
|
729
|
+
// is the number of refs that hit a STABLE, whole-stash condition that run
|
|
730
|
+
// (`no_new_signal`, `profile_filtered_all_passes`). Each run re-counts the
|
|
731
|
+
// same stable set, so summing across the window re-counts it N times (the
|
|
732
|
+
// 2.7M / 3M inflation). For these we keep the MOST RECENT run's count — the
|
|
733
|
+
// current snapshot — matching how memorySummary/profileFilteredRefs are
|
|
734
|
+
// handled. Events arrive in chronological (offset) order, so the last
|
|
735
|
+
// count-bearing event per reason is the latest run's value.
|
|
736
|
+
const summed = {};
|
|
737
|
+
const latestSnapshot = {};
|
|
738
|
+
for (const event of events) {
|
|
739
|
+
const reason = typeof event.metadata?.reason === "string" && event.metadata.reason.trim() ? event.metadata.reason : "unknown";
|
|
740
|
+
const rawCount = event.metadata?.count;
|
|
741
|
+
if (typeof rawCount === "number" && Number.isFinite(rawCount) && rawCount > 0) {
|
|
742
|
+
latestSnapshot[reason] = rawCount; // overwrite → keeps the latest run's snapshot
|
|
743
|
+
}
|
|
744
|
+
else {
|
|
745
|
+
summed[reason] = (summed[reason] ?? 0) + 1;
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
const skipReasons = { ...summed };
|
|
749
|
+
for (const [reason, count] of Object.entries(latestSnapshot)) {
|
|
750
|
+
skipReasons[reason] = (skipReasons[reason] ?? 0) + count;
|
|
751
|
+
}
|
|
752
|
+
const skipped = Object.values(skipReasons).reduce((a, b) => a + b, 0);
|
|
753
|
+
return { skipped, skipReasons };
|
|
754
|
+
}
|