akm-cli 0.9.0-beta.53 → 0.9.0-beta.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/dist/cli/clack.js +56 -0
  2. package/dist/cli/confirm.js +1 -1
  3. package/dist/cli.js +5 -3
  4. package/dist/commands/agent/contribute-cli.js +2 -3
  5. package/dist/commands/env/env-cli.js +187 -202
  6. package/dist/commands/env/secret-cli.js +109 -121
  7. package/dist/commands/feedback-cli.js +152 -155
  8. package/dist/commands/health/advisories.js +151 -0
  9. package/dist/commands/health/html-report.js +33 -10
  10. package/dist/commands/health/improve-metrics.js +754 -0
  11. package/dist/commands/health/llm-usage.js +65 -0
  12. package/dist/commands/health/md-report.js +103 -0
  13. package/dist/commands/health/metrics.js +278 -0
  14. package/dist/commands/health/task-runs.js +135 -0
  15. package/dist/commands/health/types.js +18 -0
  16. package/dist/commands/health/windows.js +196 -0
  17. package/dist/commands/health.js +15 -1492
  18. package/dist/commands/improve/anti-collapse.js +170 -0
  19. package/dist/commands/improve/collapse-detector.js +3 -2
  20. package/dist/commands/improve/consolidate.js +636 -633
  21. package/dist/commands/improve/dedup.js +1 -1
  22. package/dist/commands/improve/distill/content-repair.js +202 -0
  23. package/dist/commands/improve/distill/promote-memory.js +228 -0
  24. package/dist/commands/improve/distill/quality-gate.js +233 -0
  25. package/dist/commands/improve/distill-guards.js +127 -0
  26. package/dist/commands/improve/distill.js +49 -575
  27. package/dist/commands/improve/extract-cli.js +74 -76
  28. package/dist/commands/improve/extract.js +6 -4
  29. package/dist/commands/improve/hot-probation.js +45 -0
  30. package/dist/commands/improve/improve-auto-accept.js +3 -2
  31. package/dist/commands/improve/improve-cli.js +14 -13
  32. package/dist/commands/improve/improve-result-file.js +2 -1
  33. package/dist/commands/improve/improve.js +6 -5
  34. package/dist/commands/improve/loop-stages.js +19 -21
  35. package/dist/commands/improve/outcome-loop.js +18 -16
  36. package/dist/commands/improve/preparation.js +23 -5
  37. package/dist/commands/improve/procedural.js +10 -31
  38. package/dist/commands/improve/recombine.js +19 -43
  39. package/dist/commands/improve/reflect.js +1 -1
  40. package/dist/commands/improve/schema-similarity-gate.js +168 -0
  41. package/dist/commands/improve/shared.js +48 -0
  42. package/dist/commands/observability-cli.js +4 -4
  43. package/dist/commands/proposal/drain-policies.js +2 -2
  44. package/dist/commands/proposal/drain.js +1 -1
  45. package/dist/commands/proposal/legacy-import.js +115 -0
  46. package/dist/commands/proposal/proposal-cli.js +3 -3
  47. package/dist/commands/proposal/proposal.js +2 -1
  48. package/dist/commands/proposal/propose.js +1 -1
  49. package/dist/commands/proposal/repository.js +829 -0
  50. package/dist/commands/proposal/validators/proposals.js +5 -920
  51. package/dist/commands/read/curate.js +4 -4
  52. package/dist/commands/read/remember-cli.js +132 -137
  53. package/dist/commands/read/search-cli.js +7 -5
  54. package/dist/commands/read/search.js +7 -3
  55. package/dist/commands/read/show.js +3 -5
  56. package/dist/commands/registry-cli.js +76 -87
  57. package/dist/commands/sources/add-cli.js +91 -95
  58. package/dist/commands/sources/history.js +1 -1
  59. package/dist/commands/sources/init.js +12 -0
  60. package/dist/commands/sources/schema-repair.js +1 -1
  61. package/dist/commands/sources/sources-cli.js +3 -3
  62. package/dist/commands/sources/stash-cli.js +2 -2
  63. package/dist/commands/tasks/default-tasks.js +12 -0
  64. package/dist/commands/tasks/tasks-cli.js +1 -2
  65. package/dist/commands/wiki-cli.js +2 -3
  66. package/dist/core/common.js +3 -3
  67. package/dist/core/config/config-schema.js +6 -0
  68. package/dist/core/config/config.js +12 -0
  69. package/dist/core/deep-merge.js +38 -0
  70. package/dist/core/events.js +2 -1
  71. package/dist/core/logs-db.js +8 -13
  72. package/dist/core/paths.js +14 -14
  73. package/dist/core/state-db.js +13 -1140
  74. package/dist/core/warn.js +21 -0
  75. package/dist/indexer/db/db.js +72 -709
  76. package/dist/indexer/db/entry-mapper.js +41 -0
  77. package/dist/indexer/db/schema.js +516 -0
  78. package/dist/indexer/ensure-index.js +3 -2
  79. package/dist/indexer/feedback/utility-policy.js +85 -0
  80. package/dist/indexer/graph/graph-extraction.js +2 -1
  81. package/dist/indexer/index-writer-lock.js +18 -0
  82. package/dist/indexer/indexer.js +94 -27
  83. package/dist/indexer/read-preflight.js +23 -0
  84. package/dist/indexer/search/fts-query.js +51 -0
  85. package/dist/indexer/walk/walker.js +21 -13
  86. package/dist/integrations/agent/detect.js +9 -0
  87. package/dist/integrations/agent/index.js +1 -1
  88. package/dist/integrations/agent/spawn.js +15 -66
  89. package/dist/llm/client.js +12 -0
  90. package/dist/llm/embedder.js +26 -2
  91. package/dist/llm/embedders/local.js +7 -1
  92. package/dist/output/text/helpers.js +13 -0
  93. package/dist/scripts/migrate-storage.js +6903 -7424
  94. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +49 -44
  95. package/dist/setup/detect.js +9 -0
  96. package/dist/setup/legacy-config.js +106 -0
  97. package/dist/setup/prompt.js +57 -0
  98. package/dist/setup/providers.js +14 -0
  99. package/dist/setup/registry-stash-loader.js +12 -0
  100. package/dist/setup/semantic-assets.js +124 -0
  101. package/dist/setup/setup.js +25 -1608
  102. package/dist/setup/steps/connection.js +734 -0
  103. package/dist/setup/steps/output.js +31 -0
  104. package/dist/setup/steps/platforms.js +124 -0
  105. package/dist/setup/steps/semantic.js +27 -0
  106. package/dist/setup/steps/sources.js +222 -0
  107. package/dist/setup/steps/stashdir.js +42 -0
  108. package/dist/setup/steps/tasks.js +152 -0
  109. package/dist/storage/repositories/canaries-repository.js +107 -0
  110. package/dist/storage/repositories/consolidation-repository.js +38 -0
  111. package/dist/storage/repositories/embeddings-repository.js +72 -0
  112. package/dist/storage/repositories/events-repository.js +187 -0
  113. package/dist/storage/repositories/extract-sessions-repository.js +96 -0
  114. package/dist/storage/repositories/improve-runs-repository.js +130 -0
  115. package/dist/storage/repositories/index-db.js +4 -7
  116. package/dist/storage/repositories/proposals-repository.js +220 -0
  117. package/dist/storage/repositories/recombine-repository.js +213 -0
  118. package/dist/storage/repositories/task-history-repository.js +93 -0
  119. package/dist/storage/sqlite-pragmas.js +3 -3
  120. package/dist/tasks/backends/index.js +9 -0
  121. package/dist/tasks/runner.js +11 -1
  122. package/package.json +2 -2
  123. package/dist/commands/improve/homeostatic.js +0 -497
@@ -1,23 +1,25 @@
1
1
  // This Source Code Form is subject to the terms of the Mozilla Public
2
2
  // License, v. 2.0. If a copy of the MPL was not distributed with this
3
3
  // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
- import fs from "node:fs";
5
4
  import { loadConfig } from "../core/config/config.js";
6
5
  import { ConfigError, UsageError } from "../core/errors.js";
7
- import { appendEvent, readEvents } from "../core/events.js";
8
- import { buildTaskRunId, getLoggedRunIds, openLogsDatabase } from "../core/logs-db.js";
6
+ import { readEvents } from "../core/events.js";
7
+ import { openLogsDatabase } from "../core/logs-db.js";
9
8
  import { getStateDbPathInDataDir } from "../core/paths.js";
10
- import { getLatestCycleMetrics, listExistingTableNames, listProposalGateDecisions, listStateProposals, openStateDatabase, queryCompletedTaskIntervals, queryImproveRuns, queryTaskHistory, } from "../core/state-db.js";
9
+ import { listExistingTableNames, openStateDatabase } from "../core/state-db.js";
11
10
  import { parseSinceToIso } from "../core/time.js";
12
11
  import { readSemanticStatus } from "../indexer/search/semantic-status.js";
13
12
  import { getExecutionLogCandidates } from "../integrations/session-logs/index.js";
14
- import { LLM_USAGE_EVENT } from "../llm/usage-persist.js";
13
+ import { queryTaskHistory } from "../storage/repositories/task-history-repository.js";
14
+ import { collectImproveAdvisories } from "./health/advisories.js";
15
15
  import { HEALTH_CHECKS } from "./health/checks.js";
16
- import { gateDecisionsToSamples, summarizeCalibration } from "./improve/calibration.js";
16
+ import { buildImproveSkipSummary, computeWallTimeStats, parseTaskMetadata, roundRate, summarizeImproveCompleted, summarizeImproveRuns, } from "./health/improve-metrics.js";
17
+ import { readLlmUsageAggregate } from "./health/llm-usage.js";
18
+ import { computeDegradationMetrics, computeDenominatorFixedCoverage, computeEnrichmentMintingRollup, probeStateDbRoundTrip, readCalibration, } from "./health/metrics.js";
19
+ import { buildPerRunSummaries } from "./health/task-runs.js";
20
+ import { ACTIVE_RUN_WARN_MS, IMPROVE_COMPLETED_EVENT, } from "./health/types.js";
21
+ import { buildWindowMetrics, computeDeltas, partitionLogBackedRows, resolveWindowCompare } from "./health/windows.js";
17
22
  const DEFAULT_SINCE_MS = 24 * 60 * 60 * 1000;
18
- const IMPROVE_COMPLETED_EVENT = "improve_completed";
19
- const HEALTH_PROBE_EVENT = "health_probe";
20
- const ACTIVE_RUN_WARN_MS = 15 * 60 * 1000;
21
23
  export function parseHealthSince(since) {
22
24
  if (since === undefined || since.trim() === "") {
23
25
  return new Date(Date.now() - DEFAULT_SINCE_MS).toISOString();
@@ -35,1318 +37,6 @@ export function parseHealthSince(since) {
35
37
  }
36
38
  return parseSinceToIso(trimmed);
37
39
  }
38
- function roundRate(value) {
39
- return Number(value.toFixed(4));
40
- }
41
- function parseTaskMetadata(row) {
42
- try {
43
- return JSON.parse(row.metadata_json);
44
- }
45
- catch {
46
- return {};
47
- }
48
- }
49
- function createUnknownImproveMetrics() {
50
- return {
51
- invoked: 0,
52
- completed: 0,
53
- skipped: 0,
54
- skipReasons: {},
55
- plannedRefs: 0,
56
- profileFilteredRefs: 0,
57
- actions: {
58
- reflect: { ok: 0, failed: 0, cooldown: 0, skipped: 0, guardRejected: 0, skippedByReason: {} },
59
- distill: {
60
- queued: 0,
61
- llmFailed: 0,
62
- qualityRejected: 0,
63
- judgeRejected: 0,
64
- validatorRejected: 0,
65
- configDisabled: 0,
66
- skipped: 0,
67
- skippedByReason: {},
68
- deferred: 0,
69
- deferredByReason: {},
70
- },
71
- memoryPrune: 0,
72
- memoryInference: 0,
73
- graphExtraction: 0,
74
- error: 0,
75
- },
76
- autoAccept: { promoted: 0, validationFailed: 0 },
77
- calibration: summarizeCalibration([]),
78
- reflectsWithErrorContext: 0,
79
- coverageGapCount: 0,
80
- evalCasesWritten: 0,
81
- deadUrlCount: 0,
82
- memorySummary: { eligible: 0, derived: 0 },
83
- memoryCleanup: {
84
- pruneCandidates: 0,
85
- contradictionCandidates: 0,
86
- beliefStateTransitions: 0,
87
- consolidationCandidates: 0,
88
- archived: 0,
89
- warnings: 0,
90
- },
91
- consolidation: {
92
- ran: false,
93
- processed: 0,
94
- promoted: 0,
95
- merged: 0,
96
- deleted: 0,
97
- contradicted: 0,
98
- judgedNoAction: 0,
99
- mergedSecondaries: 0,
100
- failedChunkMemories: 0,
101
- skipReasons: {},
102
- failedChunks: 0,
103
- totalChunks: 0,
104
- durationMs: 0,
105
- },
106
- memoryInference: {
107
- ran: false,
108
- considered: 0,
109
- cacheHits: 0,
110
- retryAttempts: 0,
111
- freshAttempts: 0,
112
- splitParents: 0,
113
- written: 0,
114
- skippedNoFacts: 0,
115
- skippedChildExists: 0,
116
- skippedAborted: 0,
117
- unaccounted: 0,
118
- htmlErrorCount: 0,
119
- yieldEligibleRuns: 0,
120
- yieldEligibleConsidered: 0,
121
- yieldEligibleWritten: 0,
122
- yieldRate: 0,
123
- durationMs: 0,
124
- writes: 0,
125
- },
126
- graphExtraction: {
127
- ran: false,
128
- extractedFiles: 0,
129
- entities: 0,
130
- relations: 0,
131
- cacheHits: 0,
132
- cacheMisses: 0,
133
- cacheHitRate: 0,
134
- truncations: 0,
135
- failures: 0,
136
- htmlErrors: 0,
137
- retryAttempts: 0,
138
- nonArrayBatchFailures: 0,
139
- durationMs: 0,
140
- },
141
- sessionExtraction: {
142
- ran: false,
143
- sessionsScanned: 0,
144
- sessionsExtracted: 0,
145
- sessionsSkipped: 0,
146
- proposalsCreated: 0,
147
- warnings: 0,
148
- durationMs: 0,
149
- },
150
- wallTime: {
151
- count: 0,
152
- medianMs: 0,
153
- p95Ms: 0,
154
- minMs: 0,
155
- maxMs: 0,
156
- byPhase: {
157
- consolidation: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
158
- memoryInference: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
159
- graphExtraction: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
160
- },
161
- },
162
- perfTelemetry: {
163
- dedupPoolSize: 0,
164
- llmPoolSize: 0,
165
- judgedCacheSkipped: 0,
166
- embedMs: 0,
167
- embedCacheHits: 0,
168
- embedCacheMisses: 0,
169
- overBudgetRuns: 0,
170
- runsWithTelemetry: 0,
171
- },
172
- coverage: {
173
- rate: Number.NaN,
174
- eligibleFraction: Number.NaN,
175
- acceptedProposals: 0,
176
- totalAssets: 0,
177
- },
178
- };
179
- }
180
- function toFiniteNumber(value) {
181
- if (typeof value === "number" && Number.isFinite(value))
182
- return value;
183
- if (typeof value === "string" && value.trim()) {
184
- const parsed = Number(value);
185
- if (Number.isFinite(parsed))
186
- return parsed;
187
- }
188
- return 0;
189
- }
190
- /**
191
- * Event-derived metrics. Only `completed` and skipReasons/invoked are sourced
192
- * from events in v2 — the richer fields come from {@link summarizeImproveRuns}.
193
- * The function still receives `improve_completed` events so that the completed
194
- * count reflects the canonical event stream (it lines up 1:1 with improve_runs
195
- * rows in practice, but the events table remains the system-of-record for the
196
- * existence of a run).
197
- */
198
- function summarizeImproveCompleted(events) {
199
- const metrics = createUnknownImproveMetrics();
200
- metrics.completed = events.length;
201
- return metrics;
202
- }
203
- /**
204
- * Project a single `improve_runs.result_json` envelope into an accumulator-shaped
205
- * ImproveHealthMetrics. The aggregator merges these per-row metrics into one
206
- * window-level metric.
207
- */
208
- function projectRunMetrics(result) {
209
- const metrics = createUnknownImproveMetrics();
210
- // plannedRefs (array of {ref, reason})
211
- const plannedRefs = result.plannedRefs;
212
- if (Array.isArray(plannedRefs))
213
- metrics.plannedRefs += plannedRefs.length;
214
- // profileFilteredRefs (array of {ref, reason}) — 2026-05-27: pre-filter
215
- // bucket from `collectEligibleRefs` so the metric reflects work the
216
- // planner dropped before signal-delta / per-pass dispatch.
217
- const profileFilteredRefs = result.profileFilteredRefs;
218
- if (Array.isArray(profileFilteredRefs))
219
- metrics.profileFilteredRefs += profileFilteredRefs.length;
220
- // actions: split reflect / distill by outcome, count others.
221
- const actions = result.actions;
222
- if (Array.isArray(actions)) {
223
- for (const action of actions) {
224
- const mode = typeof action.mode === "string" ? action.mode : "";
225
- switch (mode) {
226
- case "reflect":
227
- metrics.actions.reflect.ok += 1;
228
- break;
229
- case "reflect-failed":
230
- metrics.actions.reflect.failed += 1;
231
- break;
232
- case "reflect-cooldown":
233
- metrics.actions.reflect.cooldown += 1;
234
- break;
235
- case "reflect-skipped": {
236
- metrics.actions.reflect.skipped += 1;
237
- const r = action.result;
238
- const reason = typeof r?.reason === "string" && r.reason.trim() ? r.reason : "unknown";
239
- metrics.actions.reflect.skippedByReason[reason] = (metrics.actions.reflect.skippedByReason[reason] ?? 0) + 1;
240
- break;
241
- }
242
- case "reflect-guard-rejected":
243
- metrics.actions.reflect.guardRejected += 1;
244
- break;
245
- case "distill": {
246
- const r = action.result;
247
- const outcome = typeof r?.outcome === "string" ? r.outcome : "";
248
- switch (outcome) {
249
- case "queued":
250
- metrics.actions.distill.queued += 1;
251
- break;
252
- case "llm_failed":
253
- metrics.actions.distill.llmFailed += 1;
254
- break;
255
- case "quality_rejected":
256
- case "review_needed":
257
- metrics.actions.distill.qualityRejected += 1;
258
- metrics.actions.distill.judgeRejected += 1;
259
- break;
260
- case "validation_failed":
261
- metrics.actions.distill.qualityRejected += 1;
262
- metrics.actions.distill.validatorRejected += 1;
263
- break;
264
- case "config_disabled":
265
- metrics.actions.distill.configDisabled += 1;
266
- break;
267
- case "skipped": {
268
- // Previously dropped on the floor. The four sub-paths that emit
269
- // `outcome: "skipped"` (see distill.ts:893, 1024, 1120, 1576):
270
- // - recursive_lesson_input (type guard refused a lesson input)
271
- // - conflict_noop (LLM resolved destination conflict as NOOP)
272
- // - proposal-skipped cooldown / dedup at persistence
273
- // 465 events/7d in the user's live stack. The result message
274
- // typically encodes the reason; we also accept an explicit
275
- // `skipReason` field when downstream code sets it.
276
- metrics.actions.distill.deferred += 1;
277
- const explicitReason = typeof r?.skipReason === "string" ? r.skipReason : undefined;
278
- const msg = typeof r?.message === "string" ? r.message : "";
279
- let reason = explicitReason ?? "unknown";
280
- if (!explicitReason) {
281
- if (/lesson inputs/i.test(msg))
282
- reason = "recursive_lesson_input";
283
- else if (/NOOP/.test(msg))
284
- reason = "conflict_noop";
285
- else if (/cooldown/i.test(msg))
286
- reason = "proposal_cooldown";
287
- else if (/content[_ ]?hash/i.test(msg))
288
- reason = "content_hash_match";
289
- }
290
- metrics.actions.distill.deferredByReason[reason] =
291
- (metrics.actions.distill.deferredByReason[reason] ?? 0) + 1;
292
- break;
293
- }
294
- default:
295
- break;
296
- }
297
- break;
298
- }
299
- case "distill-skipped": {
300
- metrics.actions.distill.skipped += 1;
301
- const r = action.result;
302
- const reason = typeof r?.reason === "string" && r.reason.trim() ? r.reason : "unknown";
303
- metrics.actions.distill.skippedByReason[reason] = (metrics.actions.distill.skippedByReason[reason] ?? 0) + 1;
304
- break;
305
- }
306
- case "memory-prune":
307
- metrics.actions.memoryPrune += 1;
308
- break;
309
- case "memory-inference":
310
- metrics.actions.memoryInference += 1;
311
- break;
312
- case "graph-extraction":
313
- metrics.actions.graphExtraction += 1;
314
- break;
315
- case "error":
316
- metrics.actions.error += 1;
317
- break;
318
- }
319
- }
320
- }
321
- metrics.autoAccept.promoted += toFiniteNumber(result.gateAutoAcceptedCount);
322
- metrics.autoAccept.validationFailed += toFiniteNumber(result.gateAutoAcceptFailedCount);
323
- metrics.reflectsWithErrorContext += toFiniteNumber(result.reflectsWithErrorContext);
324
- if (Array.isArray(result.coverageGaps))
325
- metrics.coverageGapCount += result.coverageGaps.length;
326
- metrics.evalCasesWritten += toFiniteNumber(result.evalCasesWritten);
327
- if (Array.isArray(result.deadUrls))
328
- metrics.deadUrlCount += result.deadUrls.length;
329
- const memorySummary = result.memorySummary;
330
- if (memorySummary) {
331
- metrics.memorySummary.eligible += toFiniteNumber(memorySummary.eligible);
332
- metrics.memorySummary.derived += toFiniteNumber(memorySummary.derived);
333
- }
334
- const memoryCleanup = result.memoryCleanup;
335
- if (memoryCleanup) {
336
- if (Array.isArray(memoryCleanup.pruneCandidates))
337
- metrics.memoryCleanup.pruneCandidates += memoryCleanup.pruneCandidates.length;
338
- if (Array.isArray(memoryCleanup.contradictionCandidates))
339
- metrics.memoryCleanup.contradictionCandidates += memoryCleanup.contradictionCandidates.length;
340
- if (Array.isArray(memoryCleanup.beliefStateTransitions))
341
- metrics.memoryCleanup.beliefStateTransitions += memoryCleanup.beliefStateTransitions.length;
342
- if (Array.isArray(memoryCleanup.consolidationCandidates))
343
- metrics.memoryCleanup.consolidationCandidates += memoryCleanup.consolidationCandidates.length;
344
- if (Array.isArray(memoryCleanup.archived))
345
- metrics.memoryCleanup.archived += memoryCleanup.archived.length;
346
- if (Array.isArray(memoryCleanup.warnings))
347
- metrics.memoryCleanup.warnings += memoryCleanup.warnings.length;
348
- }
349
- const consolidation = result.consolidation;
350
- if (consolidation) {
351
- metrics.consolidation.processed += toFiniteNumber(consolidation.processed);
352
- metrics.consolidation.merged += toFiniteNumber(consolidation.merged);
353
- metrics.consolidation.deleted += toFiniteNumber(consolidation.deleted);
354
- metrics.consolidation.contradicted += toFiniteNumber(consolidation.contradicted);
355
- if (Array.isArray(consolidation.promoted))
356
- metrics.consolidation.promoted += consolidation.promoted.length;
357
- metrics.consolidation.failedChunks += toFiniteNumber(consolidation.failedChunks);
358
- metrics.consolidation.totalChunks += toFiniteNumber(consolidation.totalChunks);
359
- metrics.consolidation.durationMs += toFiniteNumber(consolidation.durationMs);
360
- metrics.consolidation.judgedNoAction += toFiniteNumber(consolidation.judgedNoAction);
361
- metrics.consolidation.mergedSecondaries += toFiniteNumber(consolidation.mergedSecondaries);
362
- metrics.consolidation.failedChunkMemories += toFiniteNumber(consolidation.failedChunkMemories);
363
- // Structured emitter (new on this branch): consolidate.ts now pushes
364
- // per-ref grouped `{ref, skips: [{op, reason}]}` entries to `skipReasons`
365
- // for every deterministic post-LLM rejection. Each ref appears once but
366
- // may carry multiple skips; aggregate every reason. Pre-fix envelopes have
367
- // neither field, so be defensive.
368
- const skipReasons = consolidation.skipReasons;
369
- if (Array.isArray(skipReasons)) {
370
- for (const entry of skipReasons) {
371
- if (!entry || typeof entry !== "object")
372
- continue;
373
- const skips = entry.skips;
374
- if (!Array.isArray(skips))
375
- continue;
376
- for (const skip of skips) {
377
- if (!skip || typeof skip !== "object")
378
- continue;
379
- const reason = skip.reason;
380
- if (typeof reason !== "string" || !reason.trim())
381
- continue;
382
- metrics.consolidation.skipReasons[reason] = (metrics.consolidation.skipReasons[reason] ?? 0) + 1;
383
- }
384
- }
385
- }
386
- // WS-5: extract perf telemetry from the consolidation envelope.
387
- // Pre-WS-5 envelopes lack `perfTelemetry`; be defensive.
388
- const perf = consolidation.perfTelemetry;
389
- if (perf) {
390
- metrics.perfTelemetry.runsWithTelemetry += 1;
391
- metrics.perfTelemetry.dedupPoolSize += toFiniteNumber(perf.dedupPoolSize);
392
- metrics.perfTelemetry.llmPoolSize += toFiniteNumber(perf.llmPoolSize);
393
- metrics.perfTelemetry.judgedCacheSkipped += toFiniteNumber(perf.judgedCacheSkipped);
394
- metrics.perfTelemetry.embedMs += toFiniteNumber(perf.embedMs);
395
- metrics.perfTelemetry.embedCacheHits += toFiniteNumber(perf.embedCacheHits);
396
- metrics.perfTelemetry.embedCacheMisses += toFiniteNumber(perf.embedCacheMisses);
397
- const budgetFrac = toFiniteNumber(perf.estimatedBudgetFractionUsed);
398
- if (budgetFrac > 1.0)
399
- metrics.perfTelemetry.overBudgetRuns += 1;
400
- }
401
- }
402
- const memoryInference = result.memoryInference;
403
- if (memoryInference) {
404
- const considered = toFiniteNumber(memoryInference.considered);
405
- const writtenFacts = toFiniteNumber(memoryInference.writtenFacts);
406
- metrics.memoryInference.considered += considered;
407
- metrics.memoryInference.cacheHits += toFiniteNumber(memoryInference.cacheHits);
408
- metrics.memoryInference.retryAttempts += toFiniteNumber(memoryInference.retryAttempts);
409
- metrics.memoryInference.splitParents += toFiniteNumber(memoryInference.splitParents);
410
- metrics.memoryInference.written += writtenFacts;
411
- metrics.memoryInference.skippedNoFacts += toFiniteNumber(memoryInference.skippedNoFacts);
412
- metrics.memoryInference.skippedChildExists += toFiniteNumber(memoryInference.skippedChildExists);
413
- metrics.memoryInference.skippedAborted += toFiniteNumber(memoryInference.skippedAborted);
414
- metrics.memoryInference.unaccounted += toFiniteNumber(memoryInference.unaccounted);
415
- metrics.memoryInference.htmlErrorCount += toFiniteNumber(memoryInference.htmlErrorCount);
416
- // Yield-rate gating: pre-cache-feature envelopes lack the `cacheHits`
417
- // field entirely. Treating their `considered` as freshAttempts (since
418
- // cacheHits=0) is mathematically tempting but operationally wrong —
419
- // historical runs with the legacy schema have no cache instrumentation
420
- // and the SUM dragged the reported rate to ~14% in local data. Only
421
- // contribute to the yield aggregate when the envelope actually carries
422
- // the field. See investigation 2026-05-26.
423
- if (Object.hasOwn(memoryInference, "cacheHits")) {
424
- metrics.memoryInference.yieldEligibleRuns += 1;
425
- metrics.memoryInference.yieldEligibleConsidered += considered;
426
- metrics.memoryInference.yieldEligibleWritten += writtenFacts;
427
- }
428
- }
429
- metrics.memoryInference.durationMs += toFiniteNumber(result.memoryInferenceDurationMs);
430
- const graphExtraction = result.graphExtraction;
431
- if (graphExtraction) {
432
- const quality = graphExtraction.quality;
433
- if (quality)
434
- metrics.graphExtraction.extractedFiles += toFiniteNumber(quality.extractedFiles);
435
- metrics.graphExtraction.entities += toFiniteNumber(graphExtraction.totalEntities);
436
- metrics.graphExtraction.relations += toFiniteNumber(graphExtraction.totalRelations);
437
- const telemetry = graphExtraction.telemetry;
438
- if (telemetry) {
439
- metrics.graphExtraction.cacheHits += toFiniteNumber(telemetry.cacheHits);
440
- metrics.graphExtraction.cacheMisses += toFiniteNumber(telemetry.cacheMisses);
441
- metrics.graphExtraction.truncations += toFiniteNumber(telemetry.truncationCount);
442
- metrics.graphExtraction.failures += toFiniteNumber(telemetry.failureCount);
443
- metrics.graphExtraction.htmlErrors += toFiniteNumber(telemetry.htmlErrorCount);
444
- metrics.graphExtraction.retryAttempts += toFiniteNumber(telemetry.retryAttempts);
445
- metrics.graphExtraction.nonArrayBatchFailures += toFiniteNumber(telemetry.nonArrayBatchFailures);
446
- }
447
- }
448
- metrics.graphExtraction.durationMs += toFiniteNumber(result.graphExtractionDurationMs);
449
- if (Array.isArray(result.extract)) {
450
- for (const e of result.extract) {
451
- metrics.sessionExtraction.sessionsScanned += toFiniteNumber(e.sessionsProcessed);
452
- metrics.sessionExtraction.sessionsSkipped += toFiniteNumber(e.sessionsSkipped);
453
- if (Array.isArray(e.sessions)) {
454
- metrics.sessionExtraction.sessionsExtracted += e.sessions.filter((s) => Array.isArray(s.proposalIds) && s.proposalIds.length > 0).length;
455
- }
456
- metrics.sessionExtraction.proposalsCreated += Array.isArray(e.proposals) ? e.proposals.length : 0;
457
- metrics.sessionExtraction.warnings += Array.isArray(e.warnings) ? e.warnings.length : 0;
458
- metrics.sessionExtraction.durationMs += toFiniteNumber(e.durationMs);
459
- }
460
- }
461
- return metrics;
462
- }
463
- /**
464
- * Finalize derived flags and rates on an accumulator. Used both for the
465
- * window-level aggregate and for each per-run row in --detail per-run mode
466
- * so the single-row metrics still expose `ran` / `yieldRate` / `cacheHitRate`.
467
- */
468
- function finalizeImproveMetrics(metrics) {
469
- metrics.consolidation.ran =
470
- metrics.consolidation.processed > 0 ||
471
- metrics.consolidation.durationMs > 0 ||
472
- metrics.consolidation.promoted > 0 ||
473
- metrics.consolidation.merged > 0 ||
474
- metrics.consolidation.deleted > 0 ||
475
- metrics.consolidation.contradicted > 0 ||
476
- metrics.consolidation.totalChunks > 0;
477
- metrics.memoryInference.ran =
478
- metrics.memoryInference.considered > 0 ||
479
- metrics.memoryInference.written > 0 ||
480
- metrics.memoryInference.durationMs > 0;
481
- metrics.memoryInference.writes = metrics.memoryInference.written;
482
- // Yield denominator excludes cache hits AND legacy (pre-cacheHits-field)
483
- // envelopes. Only runs whose envelope carries a `cacheHits` field
484
- // contribute to freshAttempts/yieldRate; legacy rows remain in
485
- // `considered`/`written` for totals but are excluded from the rate so
486
- // they cannot drag it down. See ImproveHealthMetrics.memoryInference
487
- // jsdoc for the rationale.
488
- metrics.memoryInference.freshAttempts = Math.max(0, metrics.memoryInference.yieldEligibleConsidered -
489
- metrics.memoryInference.cacheHits -
490
- metrics.memoryInference.skippedAborted);
491
- metrics.memoryInference.yieldRate =
492
- metrics.memoryInference.freshAttempts > 0
493
- ? roundRate(metrics.memoryInference.yieldEligibleWritten / metrics.memoryInference.freshAttempts)
494
- : 0;
495
- metrics.graphExtraction.ran =
496
- metrics.graphExtraction.extractedFiles > 0 ||
497
- metrics.graphExtraction.entities > 0 ||
498
- metrics.graphExtraction.durationMs > 0;
499
- const cacheTotal = metrics.graphExtraction.cacheHits + metrics.graphExtraction.cacheMisses;
500
- metrics.graphExtraction.cacheHitRate = cacheTotal > 0 ? roundRate(metrics.graphExtraction.cacheHits / cacheTotal) : 0;
501
- metrics.sessionExtraction.ran =
502
- metrics.sessionExtraction.sessionsScanned > 0 ||
503
- metrics.sessionExtraction.proposalsCreated > 0 ||
504
- metrics.sessionExtraction.durationMs > 0;
505
- }
506
- /**
507
- * Merge per-row metrics from `src` into accumulator `dst`. All numeric fields
508
- * are additive; cumulative rates are recomputed by finalizeImproveMetrics.
509
- */
510
- function mergeImproveMetrics(dst, src) {
511
- dst.plannedRefs += src.plannedRefs;
512
- // profileFilteredRefs is the count of refs the planner drops up-front for the
513
- // active profile — recomputed against the (stable) stash every run, so it is a
514
- // snapshot, NOT a per-run increment. Summing it re-counts the same refs each
515
- // run (the ~2.4M bug). Set from the most recent run in summarizeImproveRuns.
516
- dst.actions.reflect.ok += src.actions.reflect.ok;
517
- dst.actions.reflect.failed += src.actions.reflect.failed;
518
- dst.actions.reflect.cooldown += src.actions.reflect.cooldown;
519
- dst.actions.reflect.skipped += src.actions.reflect.skipped;
520
- dst.actions.reflect.guardRejected += src.actions.reflect.guardRejected;
521
- for (const [reason, count] of Object.entries(src.actions.reflect.skippedByReason)) {
522
- dst.actions.reflect.skippedByReason[reason] = (dst.actions.reflect.skippedByReason[reason] ?? 0) + count;
523
- }
524
- dst.actions.distill.queued += src.actions.distill.queued;
525
- dst.actions.distill.llmFailed += src.actions.distill.llmFailed;
526
- dst.actions.distill.qualityRejected += src.actions.distill.qualityRejected;
527
- dst.actions.distill.judgeRejected += src.actions.distill.judgeRejected;
528
- dst.actions.distill.validatorRejected += src.actions.distill.validatorRejected;
529
- dst.actions.distill.configDisabled += src.actions.distill.configDisabled;
530
- dst.actions.distill.skipped += src.actions.distill.skipped;
531
- for (const [reason, count] of Object.entries(src.actions.distill.skippedByReason)) {
532
- dst.actions.distill.skippedByReason[reason] = (dst.actions.distill.skippedByReason[reason] ?? 0) + count;
533
- }
534
- dst.actions.distill.deferred += src.actions.distill.deferred;
535
- for (const [reason, count] of Object.entries(src.actions.distill.deferredByReason)) {
536
- dst.actions.distill.deferredByReason[reason] = (dst.actions.distill.deferredByReason[reason] ?? 0) + count;
537
- }
538
- dst.actions.memoryPrune += src.actions.memoryPrune;
539
- dst.actions.memoryInference += src.actions.memoryInference;
540
- dst.actions.graphExtraction += src.actions.graphExtraction;
541
- dst.actions.error += src.actions.error;
542
- dst.autoAccept.promoted += src.autoAccept.promoted;
543
- dst.autoAccept.validationFailed += src.autoAccept.validationFailed;
544
- dst.reflectsWithErrorContext += src.reflectsWithErrorContext;
545
- dst.coverageGapCount += src.coverageGapCount;
546
- dst.evalCasesWritten += src.evalCasesWritten;
547
- dst.deadUrlCount += src.deadUrlCount;
548
- // NOTE: memorySummary (derived/eligible) is a WHOLE-STASH snapshot recorded on
549
- // every run, NOT a per-run increment — summing it across the window inflates
550
- // it ~N× (the 1.2M-eligible bug). It is set from the most recent run in
551
- // summarizeImproveRuns instead, so it is intentionally not merged here.
552
- dst.memoryCleanup.pruneCandidates += src.memoryCleanup.pruneCandidates;
553
- dst.memoryCleanup.contradictionCandidates += src.memoryCleanup.contradictionCandidates;
554
- dst.memoryCleanup.beliefStateTransitions += src.memoryCleanup.beliefStateTransitions;
555
- dst.memoryCleanup.consolidationCandidates += src.memoryCleanup.consolidationCandidates;
556
- dst.memoryCleanup.archived += src.memoryCleanup.archived;
557
- dst.memoryCleanup.warnings += src.memoryCleanup.warnings;
558
- dst.consolidation.processed += src.consolidation.processed;
559
- dst.consolidation.promoted += src.consolidation.promoted;
560
- dst.consolidation.merged += src.consolidation.merged;
561
- dst.consolidation.deleted += src.consolidation.deleted;
562
- dst.consolidation.contradicted += src.consolidation.contradicted;
563
- dst.consolidation.failedChunks += src.consolidation.failedChunks;
564
- dst.consolidation.totalChunks += src.consolidation.totalChunks;
565
- dst.consolidation.durationMs += src.consolidation.durationMs;
566
- dst.consolidation.judgedNoAction += src.consolidation.judgedNoAction;
567
- dst.consolidation.mergedSecondaries += src.consolidation.mergedSecondaries;
568
- dst.consolidation.failedChunkMemories += src.consolidation.failedChunkMemories;
569
- for (const [reason, count] of Object.entries(src.consolidation.skipReasons)) {
570
- dst.consolidation.skipReasons[reason] = (dst.consolidation.skipReasons[reason] ?? 0) + count;
571
- }
572
- dst.memoryInference.considered += src.memoryInference.considered;
573
- dst.memoryInference.cacheHits += src.memoryInference.cacheHits;
574
- dst.memoryInference.splitParents += src.memoryInference.splitParents;
575
- dst.memoryInference.written += src.memoryInference.written;
576
- dst.memoryInference.skippedNoFacts += src.memoryInference.skippedNoFacts;
577
- dst.memoryInference.skippedChildExists += src.memoryInference.skippedChildExists;
578
- dst.memoryInference.skippedAborted += src.memoryInference.skippedAborted;
579
- dst.memoryInference.unaccounted += src.memoryInference.unaccounted;
580
- dst.memoryInference.htmlErrorCount += src.memoryInference.htmlErrorCount;
581
- dst.memoryInference.yieldEligibleRuns += src.memoryInference.yieldEligibleRuns;
582
- dst.memoryInference.yieldEligibleConsidered += src.memoryInference.yieldEligibleConsidered;
583
- dst.memoryInference.yieldEligibleWritten += src.memoryInference.yieldEligibleWritten;
584
- dst.memoryInference.durationMs += src.memoryInference.durationMs;
585
- dst.graphExtraction.extractedFiles += src.graphExtraction.extractedFiles;
586
- dst.graphExtraction.entities += src.graphExtraction.entities;
587
- dst.graphExtraction.relations += src.graphExtraction.relations;
588
- dst.graphExtraction.cacheHits += src.graphExtraction.cacheHits;
589
- dst.graphExtraction.cacheMisses += src.graphExtraction.cacheMisses;
590
- dst.graphExtraction.truncations += src.graphExtraction.truncations;
591
- dst.graphExtraction.failures += src.graphExtraction.failures;
592
- dst.graphExtraction.htmlErrors += src.graphExtraction.htmlErrors;
593
- dst.graphExtraction.nonArrayBatchFailures += src.graphExtraction.nonArrayBatchFailures;
594
- dst.graphExtraction.durationMs += src.graphExtraction.durationMs;
595
- dst.sessionExtraction.sessionsScanned += src.sessionExtraction.sessionsScanned;
596
- dst.sessionExtraction.sessionsExtracted += src.sessionExtraction.sessionsExtracted;
597
- dst.sessionExtraction.sessionsSkipped += src.sessionExtraction.sessionsSkipped;
598
- dst.sessionExtraction.proposalsCreated += src.sessionExtraction.proposalsCreated;
599
- dst.sessionExtraction.warnings += src.sessionExtraction.warnings;
600
- dst.sessionExtraction.durationMs += src.sessionExtraction.durationMs;
601
- // WS-5: merge perf telemetry (additive sums).
602
- dst.perfTelemetry.dedupPoolSize += src.perfTelemetry.dedupPoolSize;
603
- dst.perfTelemetry.llmPoolSize += src.perfTelemetry.llmPoolSize;
604
- dst.perfTelemetry.judgedCacheSkipped += src.perfTelemetry.judgedCacheSkipped;
605
- dst.perfTelemetry.embedMs += src.perfTelemetry.embedMs;
606
- dst.perfTelemetry.embedCacheHits += src.perfTelemetry.embedCacheHits;
607
- dst.perfTelemetry.embedCacheMisses += src.perfTelemetry.embedCacheMisses;
608
- dst.perfTelemetry.overBudgetRuns += src.perfTelemetry.overBudgetRuns;
609
- dst.perfTelemetry.runsWithTelemetry += src.perfTelemetry.runsWithTelemetry;
610
- // coverage: acceptedProposals is additive; totalAssets is a snapshot (like memorySummary).
611
- // totalAssets is intentionally NOT merged here — set from the most recent run in summarizeImproveRuns.
612
- dst.coverage.acceptedProposals += src.coverage.acceptedProposals;
613
- }
614
- function summarizeImproveRuns(db, since, until) {
615
- const accum = createUnknownImproveMetrics();
616
- const rows = queryImproveRuns(db, since, until);
617
- // Per-phase wall-time samples. Each entry is one envelope's durationMs for
618
- // that phase. Phases that did not run on a given envelope are simply
619
- // omitted (NOT counted as 0) so the median/p95 reflect actual phase work.
620
- const phaseDurations = {
621
- consolidation: [],
622
- memoryInference: [],
623
- graphExtraction: [],
624
- };
625
- // memorySummary is a whole-stash snapshot per run, so the window value is the
626
- // MOST RECENT run's snapshot (current state) — not a sum across runs.
627
- let latestStartMs = Number.NEGATIVE_INFINITY;
628
- let latestMemorySummary;
629
- let latestProfileFilteredRefs = 0;
630
- for (const row of rows) {
631
- let result;
632
- try {
633
- result = JSON.parse(row.result_json);
634
- }
635
- catch {
636
- continue;
637
- }
638
- const perRow = projectRunMetrics(result);
639
- mergeImproveMetrics(accum, perRow);
640
- const startMs = new Date(row.started_at).getTime();
641
- if (Number.isFinite(startMs) && startMs >= latestStartMs) {
642
- latestStartMs = startMs;
643
- latestMemorySummary = perRow.memorySummary;
644
- latestProfileFilteredRefs = perRow.profileFilteredRefs;
645
- }
646
- // Collect per-phase durations directly off the envelope. consolidation's
647
- // duration lives inside the sub-object; memoryInference and graphExtraction
648
- // expose top-level *DurationMs keys (`memoryInferenceDurationMs`,
649
- // `graphExtractionDurationMs`) when they actually ran on that envelope.
650
- const consol = result.consolidation;
651
- const consolMs = toFiniteNumber(consol?.durationMs);
652
- if (consolMs > 0)
653
- phaseDurations.consolidation.push(consolMs);
654
- const memMs = toFiniteNumber(result.memoryInferenceDurationMs);
655
- if (memMs > 0)
656
- phaseDurations.memoryInference.push(memMs);
657
- const graphMs = toFiniteNumber(result.graphExtractionDurationMs);
658
- if (graphMs > 0)
659
- phaseDurations.graphExtraction.push(graphMs);
660
- }
661
- finalizeImproveMetrics(accum);
662
- if (latestMemorySummary)
663
- accum.memorySummary = latestMemorySummary;
664
- accum.profileFilteredRefs = latestProfileFilteredRefs;
665
- accum.wallTime.byPhase = {
666
- consolidation: summarizePhaseDurations(phaseDurations.consolidation),
667
- memoryInference: summarizePhaseDurations(phaseDurations.memoryInference),
668
- graphExtraction: summarizePhaseDurations(phaseDurations.graphExtraction),
669
- };
670
- return { metrics: accum, runCount: rows.length };
671
- }
672
- /**
673
- * Aggregate a list of per-envelope phase durations into the
674
- * `wallTime.byPhase.*` shape: count, total, median, p95. Median/p95 use the
675
- * same nearest-rank picker as the top-level wallTime stats so the two are
676
- * comparable.
677
- */
678
- function summarizePhaseDurations(samples) {
679
- if (samples.length === 0)
680
- return { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 };
681
- const sorted = [...samples].sort((a, b) => a - b);
682
- const pick = (q) => sorted[Math.min(sorted.length - 1, Math.floor(q * sorted.length))] ?? 0;
683
- const totalMs = sorted.reduce((acc, n) => acc + n, 0);
684
- return {
685
- count: sorted.length,
686
- totalMs,
687
- medianMs: pick(0.5),
688
- p95Ms: pick(0.95),
689
- };
690
- }
691
- /**
692
- * Project an improve_runs row + wall-time lookup into a single ImproveRunSummary.
693
- * Used by `akm health --detail per-run`.
694
- */
695
- function projectImproveRunSummary(row, wallTimeMs, taskId) {
696
- let result = {};
697
- try {
698
- result = JSON.parse(row.result_json);
699
- }
700
- catch {
701
- // fall through with empty result so per-stage rollups are zeros
702
- }
703
- const perRow = projectRunMetrics(result);
704
- finalizeImproveMetrics(perRow);
705
- const orphansPurged = toFiniteNumber(result.orphansPurged);
706
- const lintSummary = result.lintSummary;
707
- const lintFixed = lintSummary ? toFiniteNumber(lintSummary.fixed) : 0;
708
- const lintFlagged = lintSummary ? toFiniteNumber(lintSummary.flagged) : 0;
709
- return {
710
- id: row.id,
711
- startedAt: row.started_at,
712
- completedAt: row.completed_at,
713
- wallTimeMs,
714
- ok: row.ok === 1,
715
- scope: {
716
- mode: row.scope_mode,
717
- ...(row.scope_value ? { value: row.scope_value } : {}),
718
- },
719
- taskId,
720
- actions: perRow.actions,
721
- memorySummary: perRow.memorySummary,
722
- memoryCleanup: perRow.memoryCleanup,
723
- consolidation: perRow.consolidation,
724
- memoryInference: perRow.memoryInference,
725
- graphExtraction: perRow.graphExtraction,
726
- reflectsWithErrorContext: perRow.reflectsWithErrorContext,
727
- evalCasesWritten: perRow.evalCasesWritten,
728
- orphansPurged,
729
- lintFixed,
730
- lintFlagged,
731
- };
732
- }
733
- /**
734
- * Load task_history intervals for `task_id='akm-improve'` in the window.
735
- * Returned sorted by startMs ascending so containment lookups can use a
736
- * linear scan (typical N is ~24/day; not worth a tree).
737
- *
738
- * The window filter is widened by 5 minutes on each side because the cron
739
- * task wraps `akm improve` — the task `started_at` fires at e.g. :07:01
740
- * while `recordImproveRun` writes the matching `improve_runs.started_at`
741
- * later (after config load, planning, etc.), so the improve_runs row can
742
- * be inside the window even when its enclosing task_history row started
743
- * just before the window opened.
744
- */
745
- function loadTaskIntervals(db, since, until) {
746
- const sinceMs = new Date(since).getTime();
747
- const untilMs = until ? new Date(until).getTime() : Number.POSITIVE_INFINITY;
748
- const widenedSince = new Date(sinceMs - 5 * 60 * 1000).toISOString();
749
- const widenedUntil = Number.isFinite(untilMs) ? new Date(untilMs + 5 * 60 * 1000).toISOString() : undefined;
750
- const rows = queryCompletedTaskIntervals(db, widenedSince, widenedUntil);
751
- const intervals = [];
752
- for (const row of rows) {
753
- const startMs = new Date(row.started_at).getTime();
754
- const endMs = new Date(row.completed_at).getTime();
755
- if (!Number.isFinite(startMs) || !Number.isFinite(endMs) || endMs < startMs)
756
- continue;
757
- intervals.push({ startMs, endMs, durationMs: endMs - startMs });
758
- }
759
- return intervals;
760
- }
761
- /**
762
- * Find the task_history interval that contains the given timestamp. The
763
- * task wraps `akm improve`, so `improve_runs.started_at` (when
764
- * `recordImproveRun` writes) always falls inside the enclosing task's
765
- * [started_at, completed_at]. Returns undefined when no interval
766
- * contains the timestamp (which happens for manually-invoked improve
767
- * runs not driven by the `akm-improve` task).
768
- *
769
- * Linear scan because N is small. We tolerate a 1s slop on the upper
770
- * bound to handle clock skew between the wrapper's `completed_at` write
771
- * and recordImproveRun's `started_at` write.
772
- */
773
- function findContainingTaskInterval(timestampMs, intervals) {
774
- const SLOP_MS = 1000;
775
- for (const interval of intervals) {
776
- if (timestampMs >= interval.startMs && timestampMs <= interval.endMs + SLOP_MS) {
777
- return interval;
778
- }
779
- }
780
- return undefined;
781
- }
782
- /**
783
- * Load `task_history` rows whose `task_id` begins `akm-improve` (the scheduled
784
- * improve tasks: `akm-improve-frequent`, `akm-improve-proactive-weekly`, …) in
785
- * the window, widened ±5 min so a task that fired just before the window opened
786
- * still matches a run inside it. Used to attribute each improve run to the task
787
- * that launched it.
788
- */
789
- function loadImproveTaskRuns(db, since, until) {
790
- const sinceMs = new Date(since).getTime();
791
- const untilMs = until ? new Date(until).getTime() : undefined;
792
- const widenedSince = new Date(sinceMs - 5 * 60 * 1000).toISOString();
793
- const widenedUntil = untilMs !== undefined ? new Date(untilMs + 5 * 60 * 1000).toISOString() : undefined;
794
- const runs = [];
795
- for (const row of queryTaskHistory(db, { since: widenedSince, until: widenedUntil })) {
796
- if (!row.task_id.startsWith("akm-improve"))
797
- continue;
798
- const startMs = new Date(row.started_at).getTime();
799
- if (!Number.isFinite(startMs))
800
- continue;
801
- const endIso = row.completed_at ?? row.failed_at;
802
- const endMs = endIso ? new Date(endIso).getTime() : Number.NaN;
803
- runs.push({ taskId: row.task_id, startMs, endMs });
804
- }
805
- return runs;
806
- }
807
- /**
808
- * Attribute an improve run to the scheduled task that launched it by matching
809
- * start times within ±5 min, scored by start delta (plus end delta when both
810
- * ends are known). Port of the health-report skill's `match_task_id`. Returns
811
- * `"manual"` when no scheduled improve task matches.
812
- */
813
- function matchImproveTaskId(startedAt, completedAt, taskRuns) {
814
- const startMs = new Date(startedAt).getTime();
815
- if (!Number.isFinite(startMs))
816
- return "manual";
817
- const endMs = completedAt ? new Date(completedAt).getTime() : Number.NaN;
818
- let best;
819
- let bestScore = Number.POSITIVE_INFINITY;
820
- for (const task of taskRuns) {
821
- const startDelta = Math.abs(task.startMs - startMs);
822
- if (startDelta > 5 * 60 * 1000)
823
- continue;
824
- let score = startDelta;
825
- if (Number.isFinite(endMs) && Number.isFinite(task.endMs))
826
- score += Math.abs(task.endMs - endMs);
827
- if (score < bestScore) {
828
- bestScore = score;
829
- best = task.taskId;
830
- }
831
- }
832
- return best ?? "manual";
833
- }
834
- function buildPerRunSummaries(db, since, until) {
835
- const rows = queryImproveRuns(db, since, until);
836
- const taskIntervals = loadTaskIntervals(db, since, until);
837
- const improveTaskRuns = loadImproveTaskRuns(db, since, until);
838
- const summaries = [];
839
- for (const row of rows) {
840
- const startMs = new Date(row.started_at).getTime();
841
- const endMs = new Date(row.completed_at).getTime();
842
- // Prefer the improve_runs row's own (completed_at - started_at) delta:
843
- // recordImproveRun now persists distinct start/end timestamps, so the
844
- // row's own delta is the authoritative per-run wall time even for
845
- // manually-invoked `akm improve` runs with no enclosing task_history.
846
- // Only fall back to the task_history containing-interval join for legacy/
847
- // backfill rows where started_at == completed_at (row delta is 0).
848
- const hasRowDelta = Number.isFinite(startMs) && Number.isFinite(endMs) && endMs > startMs;
849
- let wallTimeMs;
850
- if (hasRowDelta) {
851
- wallTimeMs = endMs - startMs;
852
- }
853
- else {
854
- const interval = Number.isFinite(startMs) ? findContainingTaskInterval(startMs, taskIntervals) : undefined;
855
- wallTimeMs = interval?.durationMs ?? 0;
856
- }
857
- const taskId = matchImproveTaskId(row.started_at, row.completed_at, improveTaskRuns);
858
- summaries.push(projectImproveRunSummary(row, wallTimeMs, taskId));
859
- }
860
- return summaries;
861
- }
862
- function emptyPhaseStats() {
863
- return {
864
- consolidation: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
865
- memoryInference: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
866
- graphExtraction: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
867
- };
868
- }
869
- function computeWallTimeStats(durationsMs, byPhase) {
870
- const phase = byPhase ?? emptyPhaseStats();
871
- if (durationsMs.length === 0)
872
- return { count: 0, medianMs: 0, p95Ms: 0, minMs: 0, maxMs: 0, byPhase: phase };
873
- const sorted = [...durationsMs].sort((a, b) => a - b);
874
- const pick = (q) => sorted[Math.min(sorted.length - 1, Math.floor(q * sorted.length))] ?? 0;
875
- return {
876
- count: sorted.length,
877
- medianMs: pick(0.5),
878
- p95Ms: pick(0.95),
879
- minMs: sorted[0] ?? 0,
880
- maxMs: sorted[sorted.length - 1] ?? 0,
881
- byPhase: phase,
882
- };
883
- }
884
- function buildImproveSkipSummary(events) {
885
- // Two kinds of skip events:
886
- // - Per-occurrence (no `count`): one event per skipped ref → SUM is correct.
887
- // - Aggregated snapshot (carries `count`): a single per-run event whose count
888
- // is the number of refs that hit a STABLE, whole-stash condition that run
889
- // (`no_new_signal`, `profile_filtered_all_passes`). Each run re-counts the
890
- // same stable set, so summing across the window re-counts it N times (the
891
- // 2.7M / 3M inflation). For these we keep the MOST RECENT run's count — the
892
- // current snapshot — matching how memorySummary/profileFilteredRefs are
893
- // handled. Events arrive in chronological (offset) order, so the last
894
- // count-bearing event per reason is the latest run's value.
895
- const summed = {};
896
- const latestSnapshot = {};
897
- for (const event of events) {
898
- const reason = typeof event.metadata?.reason === "string" && event.metadata.reason.trim() ? event.metadata.reason : "unknown";
899
- const rawCount = event.metadata?.count;
900
- if (typeof rawCount === "number" && Number.isFinite(rawCount) && rawCount > 0) {
901
- latestSnapshot[reason] = rawCount; // overwrite → keeps the latest run's snapshot
902
- }
903
- else {
904
- summed[reason] = (summed[reason] ?? 0) + 1;
905
- }
906
- }
907
- const skipReasons = { ...summed };
908
- for (const [reason, count] of Object.entries(latestSnapshot)) {
909
- skipReasons[reason] = (skipReasons[reason] ?? 0) + count;
910
- }
911
- const skipped = Object.values(skipReasons).reduce((a, b) => a + b, 0);
912
- return { skipped, skipReasons };
913
- }
914
- function probeStateDbRoundTrip(stateDbPath) {
915
- const before = readEvents({}, { dbPath: stateDbPath }).nextOffset;
916
- const started = Date.now();
917
- appendEvent({ eventType: HEALTH_PROBE_EVENT, ref: "health:probe", metadata: { source: "akm health" } }, { dbPath: stateDbPath });
918
- const after = readEvents({ sinceOffset: before, type: HEALTH_PROBE_EVENT, ref: "health:probe" }, { dbPath: stateDbPath });
919
- const durationMs = Date.now() - started;
920
- if (after.events.length === 0 || after.nextOffset <= before) {
921
- return { ok: false, durationMs, error: "probe event was not readable after append" };
922
- }
923
- return { ok: true, durationMs };
924
- }
925
- /**
926
- * Parse a `--window-compare <duration>` shorthand into two adjacent windows
927
- * (current, prior). Duration syntax matches {@link parseHealthSince}.
928
- */
929
- function resolveWindowCompare(duration, now = () => Date.now()) {
930
- const trimmed = duration.trim();
931
- const durationMatch = trimmed.match(/^(\d+)([dhm])$/i);
932
- if (!durationMatch) {
933
- throw new UsageError("--window-compare must be a duration like '24h', '7d', or '30m'.", "INVALID_FLAG_VALUE");
934
- }
935
- const amount = Number.parseInt(durationMatch[1] ?? "0", 10);
936
- const unit = (durationMatch[2] ?? "h").toLowerCase();
937
- if (!Number.isFinite(amount) || amount <= 0) {
938
- throw new UsageError("--window-compare must be a positive duration.", "INVALID_FLAG_VALUE");
939
- }
940
- const multiplier = unit === "h" ? 60 * 60 * 1000 : unit === "m" ? 60 * 1000 : 24 * 60 * 60 * 1000;
941
- const ms = amount * multiplier;
942
- const nowMs = now();
943
- const currentSince = new Date(nowMs - ms).toISOString();
944
- const currentUntil = new Date(nowMs).toISOString();
945
- const priorSince = new Date(nowMs - 2 * ms).toISOString();
946
- const priorUntil = currentSince;
947
- return [
948
- { name: "current", since: currentSince, until: currentUntil },
949
- { name: "prior", since: priorSince, until: priorUntil },
950
- ];
951
- }
952
- /**
953
- * Parse a single repeatable `--windows` value of the form
954
- * `name=...,since=...,until=...`. All keys are optional EXCEPT name and since.
955
- */
956
- export function parseWindowSpec(raw) {
957
- const fields = {};
958
- for (const part of raw.split(",")) {
959
- const trimmed = part.trim();
960
- if (!trimmed)
961
- continue;
962
- const eq = trimmed.indexOf("=");
963
- if (eq < 0) {
964
- throw new UsageError(`--windows entry must be a comma-separated list of key=value pairs: ${raw}`, "INVALID_FLAG_VALUE");
965
- }
966
- const key = trimmed.slice(0, eq).trim();
967
- const value = trimmed.slice(eq + 1).trim();
968
- fields[key] = value;
969
- }
970
- if (!fields.name) {
971
- throw new UsageError(`--windows entry is missing required 'name': ${raw}`, "INVALID_FLAG_VALUE");
972
- }
973
- if (!fields.since) {
974
- throw new UsageError(`--windows entry is missing required 'since': ${raw}`, "INVALID_FLAG_VALUE");
975
- }
976
- return {
977
- name: fields.name,
978
- since: fields.since,
979
- ...(fields.until ? { until: fields.until } : {}),
980
- };
981
- }
982
- /** Hard-coded list of "interesting" metric paths for window-compare deltas. */
983
- const INTERESTING_DELTA_PATHS = [
984
- "improve.actions.reflect.failed",
985
- "improve.actions.reflect.guardRejected",
986
- "improve.actions.distill.llmFailed",
987
- "improve.actions.distill.queued",
988
- "improve.actions.distill.deferred",
989
- "improve.consolidation.promoted",
990
- "improve.memoryInference.written",
991
- "improve.memoryInference.yieldRate",
992
- "improve.memoryInference.skippedNoFacts",
993
- "improve.memoryInference.htmlErrorCount",
994
- "improve.graphExtraction.cacheHitRate",
995
- "improve.graphExtraction.failures",
996
- "improve.graphExtraction.htmlErrors",
997
- "improve.graphExtraction.nonArrayBatchFailures",
998
- "improve.sessionExtraction.sessionsScanned",
999
- "improve.sessionExtraction.proposalsCreated",
1000
- "improve.autoAccept.promoted",
1001
- "improve.autoAccept.validationFailed",
1002
- "improve.wallTime.medianMs",
1003
- "improve.wallTime.p95Ms",
1004
- ];
1005
- function readNumericPath(obj, path) {
1006
- const parts = path.split(".");
1007
- let cursor = obj;
1008
- for (const part of parts) {
1009
- if (typeof cursor !== "object" || cursor === null)
1010
- return 0;
1011
- cursor = cursor[part];
1012
- }
1013
- return typeof cursor === "number" && Number.isFinite(cursor) ? cursor : 0;
1014
- }
1015
- function computeDeltas(first, last) {
1016
- const out = {};
1017
- for (const path of INTERESTING_DELTA_PATHS) {
1018
- const from = readNumericPath(first, path);
1019
- const to = readNumericPath(last, path);
1020
- if (from === 0 && to === 0)
1021
- continue;
1022
- let pctChange;
1023
- if (from === 0) {
1024
- pctChange = to === 0 ? 0 : "+inf";
1025
- }
1026
- else {
1027
- pctChange = Number((((to - from) / from) * 100).toFixed(2));
1028
- }
1029
- out[path] = { from, to, pctChange };
1030
- }
1031
- return out;
1032
- }
1033
- /**
1034
- * Partition task_history rows into "should have a log" (non-null log_path) and
1035
- * "log is actually backed". A run counts as backed when logs.db holds rows for
1036
- * its run_id (#579 — the DB is the primary record); rows written before logs.db
1037
- * existed fall back to the transitional on-disk file check. `logsDb` may be
1038
- * undefined when logs.db could not be opened — then only the file check runs.
1039
- */
1040
- function partitionLogBackedRows(taskRows, logsDb) {
1041
- const withLogs = taskRows.filter((row) => row.log_path !== null);
1042
- const loggedRunIds = logsDb
1043
- ? getLoggedRunIds(logsDb, withLogs.map((row) => buildTaskRunId(row.task_id, row.started_at)))
1044
- : new Set();
1045
- const backed = withLogs.filter((row) => loggedRunIds.has(buildTaskRunId(row.task_id, row.started_at)) ||
1046
- (row.log_path !== null && fs.existsSync(row.log_path)));
1047
- return { withLogs, backed };
1048
- }
1049
- /** Stage key used for `llm_usage` events recorded outside any stage scope. */
1050
- const UNATTRIBUTED_STAGE = "unattributed";
1051
- function emptyLlmUsageStageAggregate() {
1052
- return {
1053
- calls: 0,
1054
- totalDurationMs: 0,
1055
- promptTokens: 0,
1056
- completionTokens: 0,
1057
- totalTokens: 0,
1058
- reasoningTokens: 0,
1059
- };
1060
- }
1061
- function emptyLlmUsageAggregate() {
1062
- return { ...emptyLlmUsageStageAggregate(), byStage: {} };
1063
- }
1064
- /**
1065
- * Aggregate `llm_usage` events (#576) into a window total plus a per-stage
1066
- * breakdown of call count, wall-time, and token usage. Token fields absent from
1067
- * a best-effort record contribute 0. Calls with no `stage` land under
1068
- * {@link UNATTRIBUTED_STAGE}.
1069
- */
1070
- function summarizeLlmUsage(events) {
1071
- const aggregate = emptyLlmUsageAggregate();
1072
- for (const event of events) {
1073
- const meta = event.metadata ?? {};
1074
- const stageKey = typeof meta.stage === "string" && meta.stage ? meta.stage : UNATTRIBUTED_STAGE;
1075
- let stage = aggregate.byStage[stageKey];
1076
- if (!stage) {
1077
- stage = emptyLlmUsageStageAggregate();
1078
- aggregate.byStage[stageKey] = stage;
1079
- }
1080
- const durationMs = toFiniteNumber(meta.durationMs);
1081
- const promptTokens = toFiniteNumber(meta.promptTokens);
1082
- const completionTokens = toFiniteNumber(meta.completionTokens);
1083
- const totalTokens = toFiniteNumber(meta.totalTokens);
1084
- const reasoningTokens = toFiniteNumber(meta.reasoningTokens);
1085
- for (const target of [aggregate, stage]) {
1086
- target.calls += 1;
1087
- target.totalDurationMs += durationMs;
1088
- target.promptTokens += promptTokens;
1089
- target.completionTokens += completionTokens;
1090
- target.totalTokens += totalTokens;
1091
- target.reasoningTokens += reasoningTokens;
1092
- }
1093
- }
1094
- return aggregate;
1095
- }
1096
- function readLlmUsageAggregate(stateDbPath, since, until) {
1097
- const events = readEvents({ since, type: LLM_USAGE_EVENT }, { dbPath: stateDbPath }).events.filter((event) => {
1098
- if (until === undefined)
1099
- return true;
1100
- return new Date(event.ts ?? since).getTime() < new Date(until).getTime();
1101
- });
1102
- return summarizeLlmUsage(events);
1103
- }
1104
- /**
1105
- * Read the auto-accept gate calibration summary (#612) over `[since, until)`.
1106
- * Reads every proposal's `gateDecision` from the open state.db, projects the
1107
- * acted-on (auto-accepted / auto-rejected) decisions into calibration samples
1108
- * within the window, and aggregates them deterministically.
1109
- */
1110
- function readCalibration(db, since, until) {
1111
- const decisions = listProposalGateDecisions(db);
1112
- const samples = gateDecisionsToSamples(decisions, { since, ...(until !== undefined ? { until } : {}) });
1113
- return summarizeCalibration(samples);
1114
- }
1115
- // ── WS-5 Observability helpers ───────────────────────────────────────────────
1116
- /**
1117
- * Compute WS-5 denominator-fixed coverage metrics.
1118
- *
1119
- * `coverage = accepted_proposals / total_assets` (Part V §3).
1120
- * The denominator is the TOTAL stash size (not the moving eligible set) so
1121
- * more-inclusive WS-1 ranking cannot spuriously inflate coverage.
1122
- * `eligibleFraction = eligible_assets / total_assets` is reported separately.
1123
- *
1124
- * Proposals are counted only when their `updatedAt` falls within `[since, until)`
1125
- * so the rate is genuinely window-scoped (matching the JSDoc on the type).
1126
- *
1127
- * @param db - Open state.db connection.
1128
- * @param totalAssets - Total stash asset count (eligible + derived) from the
1129
- * most recent run's memorySummary. 0 = denominator unknown, returns NaN rates.
1130
- * @param eligibleAssets - Eligible (non-derived) asset count from the most recent run.
1131
- * @param since - Window start (ISO-8601). Proposals accepted before this are excluded.
1132
- * @param until - Window end (ISO-8601, exclusive). Absent = open-ended (up to now).
1133
- * @param stashDir - Optional: scope accepted proposals to one stash. Absent = all stashes.
1134
- */
1135
- function computeDenominatorFixedCoverage(db, totalAssets, eligibleAssets, since, until, stashDir) {
1136
- let acceptedProposals = 0;
1137
- try {
1138
- const proposals = listStateProposals(db, {
1139
- status: "accepted",
1140
- ...(stashDir ? { stashDir } : {}),
1141
- }).filter((p) => {
1142
- const updatedAt = p.updatedAt ?? "";
1143
- if (updatedAt < since)
1144
- return false;
1145
- if (until !== undefined && updatedAt >= until)
1146
- return false;
1147
- return true;
1148
- });
1149
- acceptedProposals = proposals.length;
1150
- }
1151
- catch {
1152
- // Fail open: table may not exist on older installs.
1153
- }
1154
- if (totalAssets === 0) {
1155
- return {
1156
- rate: Number.NaN,
1157
- eligibleFraction: Number.NaN,
1158
- acceptedProposals,
1159
- totalAssets: 0,
1160
- };
1161
- }
1162
- return {
1163
- rate: roundRate(acceptedProposals / totalAssets),
1164
- eligibleFraction: roundRate(eligibleAssets / totalAssets),
1165
- acceptedProposals,
1166
- totalAssets,
1167
- };
1168
- }
1169
- /**
1170
- * Compute WS-5 per-run degradation metrics (Part V §4).
1171
- *
1172
- * Health VIEWS only — reads from state.db tables populated by prior improve
1173
- * runs. Gracefully returns partial data when tables are absent (pre-WS-1/2).
1174
- *
1175
- * @param db - Open state.db connection.
1176
- * @param since - Window start (ISO-8601).
1177
- * @param until - Window end (ISO-8601).
1178
- */
1179
- function computeDegradationMetrics(db, since, until) {
1180
- // (a) Corpus diversity — salience rank distribution of the top-100 assets.
1181
- // We use the Gini coefficient of retrieval_salience scores as an intra-corpus
1182
- // diversity proxy. A Gini close to 1 = highly concentrated (entrenched top
1183
- // assets), Gini near 0 = flat/diverse. This is a single-snapshot metric;
1184
- // consecutive-run centroid distance requires cross-run history not yet stored.
1185
- let corpusCentroidDistance = Number.NaN;
1186
- let entrenchmentFlagged;
1187
- try {
1188
- const rows = db
1189
- .prepare(`SELECT retrieval_salience FROM asset_salience
1190
- ORDER BY rank_score DESC LIMIT 100`)
1191
- .all();
1192
- if (rows.length >= 5) {
1193
- const vals = rows.map((r) => r.retrieval_salience).sort((a, b) => a - b);
1194
- const n = vals.length;
1195
- const sumAbsDiff = vals.reduce((acc, xi, i) => {
1196
- return acc + vals.slice(i + 1).reduce((a, xj) => a + Math.abs(xi - xj), 0);
1197
- }, 0);
1198
- const mean = vals.reduce((a, b) => a + b, 0) / n;
1199
- // Gini = (sum |xi - xj|) / (2 n^2 mean); 0 = perfect equality, 1 = perfect inequality.
1200
- const gini = mean > 0 ? sumAbsDiff / (2 * n * n * mean) : 0;
1201
- // Re-express as a diversity proxy in [0,1]: high gini = low diversity.
1202
- // corpusCentroidDistance approximation: gini is "distance from uniform".
1203
- // Note: retrieval_salience values are in [0,1], so the max achievable Gini
1204
- // with this formula is ~0.5 (when one asset dominates and others are near 0).
1205
- // Threshold: >0.35 flags entrenchment (robustly above the ~0.1 uniform baseline).
1206
- corpusCentroidDistance = roundRate(gini);
1207
- entrenchmentFlagged = gini > 0.35;
1208
- }
1209
- }
1210
- catch {
1211
- // Table not present (pre-WS-1 install) — leave NaN.
1212
- }
1213
- // (b) Merge fidelity — fraction of consolidate accepted proposals in the window
1214
- // whose ref also has a consolidate skip-reason of "contradict_target_missing"
1215
- // or an event indicating contradiction. Uses the improve_runs result_json
1216
- // consolidation.contradicted count as a proxy.
1217
- // Simple implementation: contradictionRate = total_contradicted / max(1, total_processed)
1218
- // sourced from the window's consolidation envelope.
1219
- // (The full "merge proposal → later contradiction" correlation requires cross-run
1220
- // history; this is the available proxy.)
1221
- let mergeFidelityContradictionRate = 0;
1222
- try {
1223
- const runs = queryImproveRuns(db, since, until);
1224
- let totalContradicted = 0;
1225
- let totalProcessed = 0;
1226
- for (const row of runs) {
1227
- try {
1228
- const result = JSON.parse(row.result_json);
1229
- const cons = result.consolidation;
1230
- if (cons) {
1231
- totalContradicted += toFiniteNumber(cons.contradicted);
1232
- totalProcessed += toFiniteNumber(cons.processed);
1233
- }
1234
- }
1235
- catch {
1236
- // Skip malformed rows.
1237
- }
1238
- }
1239
- if (totalProcessed > 0) {
1240
- mergeFidelityContradictionRate = roundRate(totalContradicted / totalProcessed);
1241
- }
1242
- }
1243
- catch {
1244
- // Fail open.
1245
- }
1246
- // (c) Generation distribution — fraction of asset_salience rows with
1247
- // generation >= 2. Generation is NOT currently stored in asset_salience
1248
- // (it's in frontmatter). We approximate using consecutive_no_ops as a
1249
- // maturity proxy: assets that have never been no-op'd are "fresh".
1250
- // TODO(0.10+): store generation in asset_salience for proper tracking.
1251
- let highGenerationFraction = Number.NaN;
1252
- try {
1253
- const genRows = db.prepare("SELECT consecutive_no_ops FROM asset_salience").all();
1254
- if (genRows.length > 0) {
1255
- // Use consecutive_no_ops >= 2 as a proxy for "has been through merge cycles".
1256
- const highGen = genRows.filter((r) => r.consecutive_no_ops >= 2).length;
1257
- highGenerationFraction = roundRate(highGen / genRows.length);
1258
- }
1259
- }
1260
- catch {
1261
- // Table not present.
1262
- }
1263
- // (d) Oracle spot-check — up to 5 recently accepted proposals in the window.
1264
- const oracleSpotCheck = [];
1265
- try {
1266
- const accepted = listStateProposals(db, { status: "accepted" }).filter((p) => {
1267
- const updatedAt = p.updatedAt ?? "";
1268
- return updatedAt >= since && updatedAt < until;
1269
- });
1270
- // Sample up to 5: pick evenly spaced (not just the first 5).
1271
- const step = Math.max(1, Math.floor(accepted.length / 5));
1272
- for (let i = 0; i < accepted.length && oracleSpotCheck.length < 5; i += step) {
1273
- const p = accepted[i];
1274
- if (p) {
1275
- oracleSpotCheck.push({
1276
- proposalId: p.id,
1277
- ref: p.ref,
1278
- source: p.source ?? "unknown",
1279
- acceptedAt: p.updatedAt ?? p.createdAt ?? "",
1280
- });
1281
- }
1282
- }
1283
- }
1284
- catch {
1285
- // Fail open.
1286
- }
1287
- return {
1288
- corpusCentroidDistance,
1289
- entrenchmentFlagged,
1290
- mergeFidelityContradictionRate,
1291
- highGenerationFraction,
1292
- oracleSpotCheck,
1293
- };
1294
- }
1295
- function buildWindowMetrics(db, stateDbPath, since, until, now = () => Date.now(), logsDb) {
1296
- const taskRows = queryTaskHistory(db, { since }).filter((row) => {
1297
- const startMs = new Date(row.started_at).getTime();
1298
- const untilMs = new Date(until).getTime();
1299
- return !Number.isFinite(untilMs) || startMs < untilMs;
1300
- });
1301
- const { withLogs: taskRowsWithLogs, backed: existingLogRows } = partitionLogBackedRows(taskRows, logsDb);
1302
- const failedTaskRows = taskRows.filter((row) => row.status === "failed");
1303
- const activeRows = taskRows.filter((row) => row.status === "active");
1304
- const stuckActiveRuns = activeRows.filter((row) => now() - new Date(row.started_at).getTime() > ACTIVE_RUN_WARN_MS).length;
1305
- const promptRows = taskRows.filter((row) => row.target_kind === "prompt");
1306
- const promptFailures = promptRows.filter((row) => {
1307
- const detail = parseTaskMetadata(row).detail;
1308
- return typeof detail?.reason === "string" && detail.reason.length > 0;
1309
- });
1310
- const logBackingRate = taskRowsWithLogs.length === 0 ? 1 : existingLogRows.length / taskRowsWithLogs.length;
1311
- const taskFailRate = taskRows.length === 0 ? 0 : failedTaskRows.length / taskRows.length;
1312
- const agentFailureRate = promptRows.length === 0 ? 0 : promptFailures.length / promptRows.length;
1313
- const improveInvoked = readEvents({ since, type: "improve_invoked" }, { dbPath: stateDbPath }).events.filter((event) => new Date(event.ts ?? since).getTime() < new Date(until).getTime()).length;
1314
- const improveCompletedEvents = readEvents({ since, type: IMPROVE_COMPLETED_EVENT }, { dbPath: stateDbPath }).events.filter((event) => new Date(event.ts ?? since).getTime() < new Date(until).getTime());
1315
- const improveSkippedEvents = readEvents({ since, type: "improve_skipped" }, { dbPath: stateDbPath }).events.filter((event) => new Date(event.ts ?? since).getTime() < new Date(until).getTime());
1316
- const eventsMetrics = summarizeImproveCompleted(improveCompletedEvents);
1317
- const { metrics: improveSummary, runCount } = summarizeImproveRuns(db, since, until);
1318
- improveSummary.invoked = improveInvoked;
1319
- improveSummary.completed = eventsMetrics.completed;
1320
- const skipSummary = buildImproveSkipSummary(improveSkippedEvents);
1321
- improveSummary.skipped = skipSummary.skipped;
1322
- improveSummary.skipReasons = skipSummary.skipReasons;
1323
- // Preserve the per-phase aggregation computed by summarizeImproveRuns and
1324
- // derive top-level wall times from the same improve-runs window so counts
1325
- // and percentiles stay aligned with per-run reporting.
1326
- const perRunSummaries = buildPerRunSummaries(db, since, until);
1327
- const wallTimes = perRunSummaries.map((run) => run.wallTimeMs).filter((ms) => Number.isFinite(ms) && ms > 0);
1328
- improveSummary.wallTime = computeWallTimeStats(wallTimes, improveSummary.wallTime.byPhase);
1329
- improveSummary.calibration = readCalibration(db, since, until);
1330
- // WS-5: Compute denominator-fixed coverage from the most recent run's
1331
- // memorySummary (totalAssets = eligible + derived — the fixed denominator).
1332
- const totalAssets = improveSummary.memorySummary.eligible + improveSummary.memorySummary.derived;
1333
- improveSummary.coverage = computeDenominatorFixedCoverage(db, totalAssets, improveSummary.memorySummary.eligible, since, until);
1334
- // WS-5: Compute per-run degradation metrics (corpus diversity, merge fidelity,
1335
- // generation distribution, oracle spot-check). Health VIEWS only.
1336
- const degradation = computeDegradationMetrics(db, since, until);
1337
- if (degradation) {
1338
- improveSummary.degradation = degradation;
1339
- }
1340
- const metrics = {
1341
- taskFailRate: roundRate(taskFailRate),
1342
- agentFailureRate: roundRate(agentFailureRate),
1343
- stuckActiveRuns,
1344
- logBackingRate: roundRate(logBackingRate),
1345
- probeRoundTripMs: null,
1346
- llmUsage: readLlmUsageAggregate(stateDbPath, since, until),
1347
- };
1348
- return { improve: improveSummary, metrics, runs: runCount };
1349
- }
1350
40
  function validateAkmHealthOptions(options) {
1351
41
  if (options.groupBy !== undefined && options.groupBy !== "run") {
1352
42
  throw new UsageError(`Invalid value for --group-by: ${options.groupBy}. Expected: run`, "INVALID_FLAG_VALUE");
@@ -1447,77 +137,8 @@ export function akmHealth(options = {}) {
1447
137
  if (degradationMain) {
1448
138
  improveSummary.degradation = degradationMain;
1449
139
  }
1450
- // WS-2 proxy-adequacy tripwire: surface any outcome_proxy_inverted events
1451
- // in the health window as an advisory so operators know when the 0.10+
1452
- // rich in-session signal is no longer deferrable.
1453
- const proxyInvertedEvents = readEvents({ since, type: "outcome_proxy_inverted" }, { dbPath: stateDbPath }).events;
1454
- if (proxyInvertedEvents.length > 0) {
1455
- const lastEvent = proxyInvertedEvents[proxyInvertedEvents.length - 1];
1456
- const correlation = typeof lastEvent.metadata?.correlation === "number" ? lastEvent.metadata.correlation.toFixed(3) : "unknown";
1457
- advisories.push({
1458
- name: "outcome-proxy-adequacy",
1459
- status: "warn",
1460
- kind: "deterministic",
1461
- confidence: "high",
1462
- message: `WS-2 outcome proxy inverted (${proxyInvertedEvents.length} event(s) in window). ` +
1463
- `corr(outcome_score, accepted_change_rate) = ${correlation} < −0.3. ` +
1464
- "Popular assets are also the most-needing-improvement assets — " +
1465
- "the retrieval-based proxy is inverted. " +
1466
- "The 0.10+ rich in-session outcome signal is no longer deferrable. See plan §WS-2.",
1467
- });
1468
- }
1469
- // R5 collapse/churn detector: surface any collapse_detector_alert events
1470
- // in the health window, plus the latest cycle row's headline numbers so
1471
- // the operator can act without opening the DB. `unknown` when the detector
1472
- // has never produced a cycle row (no consolidate/recombine work yet).
1473
- try {
1474
- // Reuse the already-open state.db handle (readEvents supports a
1475
- // borrowed connection) — no extra open/migrate/close per health call.
1476
- const collapseAlertEvents = readEvents({ since, type: "collapse_detector_alert" }, { dbPath: stateDbPath, db }).events;
1477
- const latestCycle = getLatestCycleMetrics(db);
1478
- const cycleSummary = latestCycle
1479
- ? `Latest cycle (${latestCycle.ts}, ${latestCycle.pass}): mean canary recall ${latestCycle.mean_recall.toFixed(3)}, ` +
1480
- `distinct-content ratio ${latestCycle.distinct_content_ratio.toFixed(3)}, ` +
1481
- `${latestCycle.accepted_actions} accepted action(s).`
1482
- : "";
1483
- if (collapseAlertEvents.length > 0) {
1484
- const kinds = [...new Set(collapseAlertEvents.map((e) => String(e.metadata?.kind ?? "unknown")))];
1485
- const collapseKinds = kinds.filter((k) => k.startsWith("collapse"));
1486
- advisories.push({
1487
- name: "collapse-churn-detector",
1488
- status: "warn",
1489
- kind: "deterministic",
1490
- // Collapse kinds are measured, not inferred; churn/merge-floor
1491
- // volume thresholds are still being tuned (design doc §7).
1492
- confidence: collapseKinds.length > 0 ? "high" : "medium",
1493
- message: `R5 detector fired ${collapseAlertEvents.length} alert(s) in window (kinds: ${kinds.join(", ")}). ` +
1494
- `${cycleSummary} See docs/design/improve-collapse-churn-detector-design.md §6.3 runbook queries.`,
1495
- });
1496
- }
1497
- else if (latestCycle) {
1498
- advisories.push({
1499
- name: "collapse-churn-detector",
1500
- status: "pass",
1501
- kind: "deterministic",
1502
- confidence: "high",
1503
- message: `No collapse/churn alerts in window. ${cycleSummary}`,
1504
- });
1505
- }
1506
- else {
1507
- advisories.push({
1508
- name: "collapse-churn-detector",
1509
- status: "unknown",
1510
- kind: "deterministic",
1511
- confidence: "high",
1512
- message: "No detector cycle rows yet — the collapse/churn detector runs only on improve cycles " +
1513
- "where consolidate/recombine did work (synthesis lanes may be idle).",
1514
- });
1515
- }
1516
- }
1517
- catch {
1518
- // Table may predate migration 016 in odd mixed-version setups — advisory
1519
- // is best-effort and must never fail the health command.
1520
- }
140
+ improveSummary.enrichmentMinting = computeEnrichmentMintingRollup(db, since, until);
141
+ advisories.push(...collectImproveAdvisories(db, stateDbPath, since, improveSummary));
1521
142
  let sessionLogEntries = [];
1522
143
  try {
1523
144
  const sinceDays = Math.max(0, Math.ceil((now() - new Date(since).getTime()) / (24 * 60 * 60 * 1000)));
@@ -1648,103 +269,5 @@ export function akmHealth(options = {}) {
1648
269
  }
1649
270
  }
1650
271
  }
1651
- // ── Markdown renderers ───────────────────────────────────────────────────────
1652
- function padRight(s, width) {
1653
- return s.length >= width ? s : s + " ".repeat(width - s.length);
1654
- }
1655
- function renderTable(headers, rows) {
1656
- const widths = headers.map((h, i) => Math.max(h.length, ...rows.map((r) => (r[i] ?? "").length)));
1657
- const lines = [];
1658
- lines.push(headers.map((h, i) => padRight(h, widths[i] ?? 0)).join(" "));
1659
- for (const row of rows) {
1660
- lines.push(row.map((cell, i) => padRight(cell ?? "", widths[i] ?? 0)).join(" "));
1661
- }
1662
- return lines.join("\n");
1663
- }
1664
- /**
1665
- * Render `--detail per-run` rows as a TSV-ish aligned table. The column
1666
- * shape was originally inherited from the retired
1667
- * `scripts/improve-stats/runs-detail` bash helper; keep the same shape
1668
- * so operator muscle memory carries over.
1669
- *
1670
- * Columns: ts | ok | actions | refl_ok/fail/cd/skip |
1671
- * distill_q/llm-fail/qrej/cfg/skip | cons_proc/promo/merge/del |
1672
- * mem_cons/written/skip | graph_f/e/r | orphans | lint_f/fl
1673
- */
1674
- export function renderRunsDetailMd(runs) {
1675
- const headers = [
1676
- "ts",
1677
- "ok",
1678
- "actions",
1679
- "refl_ok/fail/cd/skip",
1680
- "distill_q/llm-fail/qrej/cfg/skip",
1681
- "cons_proc/promo/merge/del",
1682
- "mem_cons/written/skip",
1683
- "graph_f/e/r",
1684
- "orphans",
1685
- "lint_f/fl",
1686
- ];
1687
- const rows = runs.map((r) => {
1688
- const totalActions = r.actions.reflect.ok +
1689
- r.actions.reflect.failed +
1690
- r.actions.reflect.cooldown +
1691
- r.actions.reflect.skipped +
1692
- r.actions.distill.queued +
1693
- r.actions.distill.llmFailed +
1694
- r.actions.distill.qualityRejected +
1695
- r.actions.distill.configDisabled +
1696
- r.actions.distill.skipped +
1697
- r.actions.memoryPrune +
1698
- r.actions.memoryInference +
1699
- r.actions.graphExtraction +
1700
- r.actions.error;
1701
- return [
1702
- r.startedAt,
1703
- String(r.ok),
1704
- String(totalActions),
1705
- `${r.actions.reflect.ok}/${r.actions.reflect.failed}/${r.actions.reflect.cooldown}/${r.actions.reflect.skipped}`,
1706
- `${r.actions.distill.queued}/${r.actions.distill.llmFailed}/${r.actions.distill.qualityRejected}/${r.actions.distill.configDisabled}/${r.actions.distill.skipped}`,
1707
- `${r.consolidation.processed}/${r.consolidation.promoted}/${r.consolidation.merged}/${r.consolidation.deleted}`,
1708
- `${r.memoryInference.considered}/${r.memoryInference.written}/${r.memoryInference.skippedNoFacts}`,
1709
- `${r.graphExtraction.extractedFiles}/${r.graphExtraction.entities}/${r.graphExtraction.relations}`,
1710
- String(r.orphansPurged),
1711
- `${r.lintFixed}/${r.lintFlagged}`,
1712
- ];
1713
- });
1714
- return renderTable(headers, rows);
1715
- }
1716
- /**
1717
- * Render a window-compare comparison as a side-by-side metric table with a
1718
- * delta column. Bad-direction deltas (e.g. +pct on failed counts) get a `!`
1719
- * marker prefix.
1720
- */
1721
- export function renderWindowCompareMd(windows, deltas) {
1722
- if (windows.length === 0)
1723
- return "";
1724
- const headers = ["metric", ...windows.map((w) => w.name), "delta"];
1725
- const badIfPositive = new Set([
1726
- "improve.actions.reflect.failed",
1727
- "improve.actions.distill.llmFailed",
1728
- "improve.graphExtraction.failures",
1729
- "improve.graphExtraction.nonArrayBatchFailures",
1730
- "improve.wallTime.medianMs",
1731
- "improve.wallTime.p95Ms",
1732
- "improve.memoryInference.skippedNoFacts",
1733
- ]);
1734
- const rows = [];
1735
- for (const path of INTERESTING_DELTA_PATHS) {
1736
- const values = windows.map((w) => String(readNumericPath(w, path)));
1737
- const delta = deltas?.[path];
1738
- let deltaStr = "—";
1739
- if (delta) {
1740
- const pct = delta.pctChange;
1741
- const num = typeof pct === "number" ? pct : pct;
1742
- const sign = typeof num === "number" && num > 0 ? "+" : "";
1743
- const formatted = typeof num === "number" ? `${sign}${num}%` : String(num);
1744
- const marker = badIfPositive.has(path) && typeof num === "number" && num > 0 ? "!" : "";
1745
- deltaStr = marker + formatted;
1746
- }
1747
- rows.push([path, ...values, deltaStr]);
1748
- }
1749
- return renderTable(headers, rows);
1750
- }
272
+ // Markdown renderers (renderRunsDetailMd / renderWindowCompareMd) live in
273
+ // health/md-report.ts, mirroring the HTML extraction in health/html-report.ts.