akm-cli 0.9.0-beta.54 → 0.9.0-beta.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/dist/cli.js +5 -3
  2. package/dist/commands/agent/contribute-cli.js +2 -3
  3. package/dist/commands/env/env-cli.js +187 -202
  4. package/dist/commands/env/secret-cli.js +109 -121
  5. package/dist/commands/feedback-cli.js +152 -155
  6. package/dist/commands/health/advisories.js +151 -0
  7. package/dist/commands/health/improve-metrics.js +754 -0
  8. package/dist/commands/health/llm-usage.js +65 -0
  9. package/dist/commands/health/md-report.js +103 -0
  10. package/dist/commands/health/metrics.js +278 -0
  11. package/dist/commands/health/task-runs.js +135 -0
  12. package/dist/commands/health/types.js +18 -0
  13. package/dist/commands/health/windows.js +196 -0
  14. package/dist/commands/health.js +14 -1624
  15. package/dist/commands/improve/anti-collapse.js +170 -0
  16. package/dist/commands/improve/collapse-detector.js +3 -2
  17. package/dist/commands/improve/consolidate.js +636 -633
  18. package/dist/commands/improve/dedup.js +1 -1
  19. package/dist/commands/improve/distill/content-repair.js +202 -0
  20. package/dist/commands/improve/distill/promote-memory.js +228 -0
  21. package/dist/commands/improve/distill/quality-gate.js +233 -0
  22. package/dist/commands/improve/distill-guards.js +127 -0
  23. package/dist/commands/improve/distill.js +49 -575
  24. package/dist/commands/improve/extract-cli.js +74 -76
  25. package/dist/commands/improve/extract.js +6 -4
  26. package/dist/commands/improve/hot-probation.js +45 -0
  27. package/dist/commands/improve/improve-auto-accept.js +3 -2
  28. package/dist/commands/improve/improve-cli.js +14 -13
  29. package/dist/commands/improve/improve-result-file.js +2 -1
  30. package/dist/commands/improve/improve.js +6 -5
  31. package/dist/commands/improve/loop-stages.js +19 -21
  32. package/dist/commands/improve/preparation.js +4 -2
  33. package/dist/commands/improve/procedural.js +10 -31
  34. package/dist/commands/improve/recombine.js +19 -43
  35. package/dist/commands/improve/reflect.js +1 -1
  36. package/dist/commands/improve/schema-similarity-gate.js +168 -0
  37. package/dist/commands/improve/shared.js +48 -0
  38. package/dist/commands/observability-cli.js +4 -4
  39. package/dist/commands/proposal/drain-policies.js +2 -2
  40. package/dist/commands/proposal/drain.js +1 -1
  41. package/dist/commands/proposal/legacy-import.js +115 -0
  42. package/dist/commands/proposal/proposal-cli.js +3 -3
  43. package/dist/commands/proposal/proposal.js +2 -1
  44. package/dist/commands/proposal/propose.js +1 -1
  45. package/dist/commands/proposal/repository.js +829 -0
  46. package/dist/commands/proposal/validators/proposals.js +5 -920
  47. package/dist/commands/read/remember-cli.js +132 -137
  48. package/dist/commands/read/search-cli.js +1 -1
  49. package/dist/commands/registry-cli.js +76 -87
  50. package/dist/commands/sources/add-cli.js +90 -94
  51. package/dist/commands/sources/history.js +1 -1
  52. package/dist/commands/sources/schema-repair.js +1 -1
  53. package/dist/commands/sources/sources-cli.js +3 -3
  54. package/dist/commands/sources/stash-cli.js +1 -1
  55. package/dist/commands/tasks/tasks-cli.js +1 -2
  56. package/dist/commands/wiki-cli.js +2 -3
  57. package/dist/core/common.js +3 -3
  58. package/dist/core/config/config-schema.js +6 -0
  59. package/dist/core/deep-merge.js +38 -0
  60. package/dist/core/events.js +2 -1
  61. package/dist/core/logs-db.js +8 -13
  62. package/dist/core/paths.js +14 -14
  63. package/dist/core/state-db.js +13 -1140
  64. package/dist/indexer/db/db.js +66 -709
  65. package/dist/indexer/db/entry-mapper.js +41 -0
  66. package/dist/indexer/db/schema.js +516 -0
  67. package/dist/indexer/feedback/utility-policy.js +85 -0
  68. package/dist/indexer/graph/graph-extraction.js +2 -1
  69. package/dist/indexer/index-writer-lock.js +9 -0
  70. package/dist/indexer/indexer.js +78 -23
  71. package/dist/indexer/search/fts-query.js +51 -0
  72. package/dist/integrations/agent/spawn.js +15 -66
  73. package/dist/output/text/helpers.js +13 -0
  74. package/dist/scripts/migrate-storage.js +6891 -7436
  75. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +44 -43
  76. package/dist/setup/legacy-config.js +106 -0
  77. package/dist/setup/prompt.js +57 -0
  78. package/dist/setup/providers.js +14 -0
  79. package/dist/setup/semantic-assets.js +124 -0
  80. package/dist/setup/setup.js +24 -1607
  81. package/dist/setup/steps/connection.js +734 -0
  82. package/dist/setup/steps/output.js +31 -0
  83. package/dist/setup/steps/platforms.js +124 -0
  84. package/dist/setup/steps/semantic.js +27 -0
  85. package/dist/setup/steps/sources.js +222 -0
  86. package/dist/setup/steps/stashdir.js +42 -0
  87. package/dist/setup/steps/tasks.js +152 -0
  88. package/dist/storage/repositories/canaries-repository.js +107 -0
  89. package/dist/storage/repositories/consolidation-repository.js +38 -0
  90. package/dist/storage/repositories/embeddings-repository.js +72 -0
  91. package/dist/storage/repositories/events-repository.js +187 -0
  92. package/dist/storage/repositories/extract-sessions-repository.js +96 -0
  93. package/dist/storage/repositories/improve-runs-repository.js +130 -0
  94. package/dist/storage/repositories/index-db.js +4 -7
  95. package/dist/storage/repositories/proposals-repository.js +220 -0
  96. package/dist/storage/repositories/recombine-repository.js +213 -0
  97. package/dist/storage/repositories/task-history-repository.js +93 -0
  98. package/dist/storage/sqlite-pragmas.js +3 -3
  99. package/dist/tasks/runner.js +2 -1
  100. package/package.json +1 -1
  101. package/dist/commands/improve/homeostatic.js +0 -497
@@ -0,0 +1,754 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ import { queryImproveRuns } from "../../storage/repositories/improve-runs-repository.js";
5
+ import { summarizeCalibration } from "../improve/calibration.js";
6
+ export function roundRate(value) {
7
+ return Number(value.toFixed(4));
8
+ }
9
+ export function parseTaskMetadata(row) {
10
+ try {
11
+ return JSON.parse(row.metadata_json);
12
+ }
13
+ catch {
14
+ return {};
15
+ }
16
+ }
17
+ function createUnknownImproveMetrics() {
18
+ return {
19
+ invoked: 0,
20
+ completed: 0,
21
+ skipped: 0,
22
+ skipReasons: {},
23
+ plannedRefs: 0,
24
+ profileFilteredRefs: 0,
25
+ actions: {
26
+ reflect: { ok: 0, failed: 0, cooldown: 0, skipped: 0, guardRejected: 0, skippedByReason: {} },
27
+ distill: {
28
+ queued: 0,
29
+ llmFailed: 0,
30
+ qualityRejected: 0,
31
+ judgeRejected: 0,
32
+ validatorRejected: 0,
33
+ configDisabled: 0,
34
+ skipped: 0,
35
+ skippedByReason: {},
36
+ deferred: 0,
37
+ deferredByReason: {},
38
+ },
39
+ memoryPrune: 0,
40
+ memoryInference: 0,
41
+ graphExtraction: 0,
42
+ error: 0,
43
+ },
44
+ autoAccept: { promoted: 0, validationFailed: 0 },
45
+ calibration: summarizeCalibration([]),
46
+ reflectsWithErrorContext: 0,
47
+ coverageGapCount: 0,
48
+ evalCasesWritten: 0,
49
+ deadUrlCount: 0,
50
+ memorySummary: { eligible: 0, derived: 0 },
51
+ memoryCleanup: {
52
+ pruneCandidates: 0,
53
+ contradictionCandidates: 0,
54
+ beliefStateTransitions: 0,
55
+ consolidationCandidates: 0,
56
+ archived: 0,
57
+ warnings: 0,
58
+ },
59
+ consolidation: {
60
+ ran: false,
61
+ processed: 0,
62
+ promoted: 0,
63
+ merged: 0,
64
+ deleted: 0,
65
+ contradicted: 0,
66
+ judgedNoAction: 0,
67
+ mergedSecondaries: 0,
68
+ failedChunkMemories: 0,
69
+ skipReasons: {},
70
+ failedChunks: 0,
71
+ totalChunks: 0,
72
+ durationMs: 0,
73
+ },
74
+ memoryInference: {
75
+ ran: false,
76
+ considered: 0,
77
+ cacheHits: 0,
78
+ retryAttempts: 0,
79
+ freshAttempts: 0,
80
+ splitParents: 0,
81
+ written: 0,
82
+ skippedNoFacts: 0,
83
+ skippedChildExists: 0,
84
+ skippedAborted: 0,
85
+ unaccounted: 0,
86
+ htmlErrorCount: 0,
87
+ yieldEligibleRuns: 0,
88
+ yieldEligibleConsidered: 0,
89
+ yieldEligibleWritten: 0,
90
+ yieldRate: 0,
91
+ durationMs: 0,
92
+ writes: 0,
93
+ },
94
+ graphExtraction: {
95
+ ran: false,
96
+ extractedFiles: 0,
97
+ entities: 0,
98
+ relations: 0,
99
+ cacheHits: 0,
100
+ cacheMisses: 0,
101
+ cacheHitRate: 0,
102
+ truncations: 0,
103
+ failures: 0,
104
+ htmlErrors: 0,
105
+ retryAttempts: 0,
106
+ nonArrayBatchFailures: 0,
107
+ durationMs: 0,
108
+ },
109
+ sessionExtraction: {
110
+ ran: false,
111
+ sessionsScanned: 0,
112
+ sessionsExtracted: 0,
113
+ sessionsSkipped: 0,
114
+ proposalsCreated: 0,
115
+ warnings: 0,
116
+ durationMs: 0,
117
+ },
118
+ wallTime: {
119
+ count: 0,
120
+ medianMs: 0,
121
+ p95Ms: 0,
122
+ minMs: 0,
123
+ maxMs: 0,
124
+ byPhase: {
125
+ consolidation: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
126
+ memoryInference: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
127
+ graphExtraction: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
128
+ },
129
+ },
130
+ perfTelemetry: {
131
+ dedupPoolSize: 0,
132
+ llmPoolSize: 0,
133
+ judgedCacheSkipped: 0,
134
+ embedMs: 0,
135
+ embedCacheHits: 0,
136
+ embedCacheMisses: 0,
137
+ overBudgetRuns: 0,
138
+ runsWithTelemetry: 0,
139
+ },
140
+ coverage: {
141
+ rate: Number.NaN,
142
+ eligibleFraction: Number.NaN,
143
+ acceptedProposals: 0,
144
+ distinctRefs: 0,
145
+ churnRatio: Number.NaN,
146
+ totalAssets: 0,
147
+ },
148
+ };
149
+ }
150
+ export function toFiniteNumber(value) {
151
+ if (typeof value === "number" && Number.isFinite(value))
152
+ return value;
153
+ if (typeof value === "string" && value.trim()) {
154
+ const parsed = Number(value);
155
+ if (Number.isFinite(parsed))
156
+ return parsed;
157
+ }
158
+ return 0;
159
+ }
160
+ /**
161
+ * Event-derived metrics. Only `completed` and skipReasons/invoked are sourced
162
+ * from events in v2 — the richer fields come from {@link summarizeImproveRuns}.
163
+ * The function still receives `improve_completed` events so that the completed
164
+ * count reflects the canonical event stream (it lines up 1:1 with improve_runs
165
+ * rows in practice, but the events table remains the system-of-record for the
166
+ * existence of a run).
167
+ */
168
+ export function summarizeImproveCompleted(events) {
169
+ const metrics = createUnknownImproveMetrics();
170
+ metrics.completed = events.length;
171
+ return metrics;
172
+ }
173
+ /**
174
+ * Project a single `improve_runs.result_json` envelope into an accumulator-shaped
175
+ * ImproveHealthMetrics. The aggregator merges these per-row metrics into one
176
+ * window-level metric.
177
+ */
178
+ function projectRunMetrics(result) {
179
+ const metrics = createUnknownImproveMetrics();
180
+ // plannedRefs (array of {ref, reason})
181
+ const plannedRefs = result.plannedRefs;
182
+ if (Array.isArray(plannedRefs))
183
+ metrics.plannedRefs += plannedRefs.length;
184
+ // profileFilteredRefs (array of {ref, reason}) — 2026-05-27: pre-filter
185
+ // bucket from `collectEligibleRefs` so the metric reflects work the
186
+ // planner dropped before signal-delta / per-pass dispatch.
187
+ const profileFilteredRefs = result.profileFilteredRefs;
188
+ if (Array.isArray(profileFilteredRefs))
189
+ metrics.profileFilteredRefs += profileFilteredRefs.length;
190
+ // actions: split reflect / distill by outcome, count others.
191
+ const actions = result.actions;
192
+ if (Array.isArray(actions)) {
193
+ for (const action of actions) {
194
+ const mode = typeof action.mode === "string" ? action.mode : "";
195
+ switch (mode) {
196
+ case "reflect":
197
+ metrics.actions.reflect.ok += 1;
198
+ break;
199
+ case "reflect-failed":
200
+ metrics.actions.reflect.failed += 1;
201
+ break;
202
+ case "reflect-cooldown":
203
+ metrics.actions.reflect.cooldown += 1;
204
+ break;
205
+ case "reflect-skipped": {
206
+ metrics.actions.reflect.skipped += 1;
207
+ const r = action.result;
208
+ const reason = typeof r?.reason === "string" && r.reason.trim() ? r.reason : "unknown";
209
+ metrics.actions.reflect.skippedByReason[reason] = (metrics.actions.reflect.skippedByReason[reason] ?? 0) + 1;
210
+ break;
211
+ }
212
+ case "reflect-guard-rejected":
213
+ metrics.actions.reflect.guardRejected += 1;
214
+ break;
215
+ case "distill": {
216
+ const r = action.result;
217
+ const outcome = typeof r?.outcome === "string" ? r.outcome : "";
218
+ switch (outcome) {
219
+ case "queued":
220
+ metrics.actions.distill.queued += 1;
221
+ break;
222
+ case "llm_failed":
223
+ metrics.actions.distill.llmFailed += 1;
224
+ break;
225
+ case "quality_rejected":
226
+ case "review_needed":
227
+ metrics.actions.distill.qualityRejected += 1;
228
+ metrics.actions.distill.judgeRejected += 1;
229
+ break;
230
+ case "validation_failed":
231
+ metrics.actions.distill.qualityRejected += 1;
232
+ metrics.actions.distill.validatorRejected += 1;
233
+ break;
234
+ case "config_disabled":
235
+ metrics.actions.distill.configDisabled += 1;
236
+ break;
237
+ case "skipped": {
238
+ // Previously dropped on the floor. The four sub-paths that emit
239
+ // `outcome: "skipped"` (see distill.ts:893, 1024, 1120, 1576):
240
+ // - recursive_lesson_input (type guard refused a lesson input)
241
+ // - conflict_noop (LLM resolved destination conflict as NOOP)
242
+ // - proposal-skipped cooldown / dedup at persistence
243
+ // 465 events/7d in the user's live stack. The result message
244
+ // typically encodes the reason; we also accept an explicit
245
+ // `skipReason` field when downstream code sets it.
246
+ metrics.actions.distill.deferred += 1;
247
+ const explicitReason = typeof r?.skipReason === "string" ? r.skipReason : undefined;
248
+ const msg = typeof r?.message === "string" ? r.message : "";
249
+ let reason = explicitReason ?? "unknown";
250
+ if (!explicitReason) {
251
+ if (/lesson inputs/i.test(msg))
252
+ reason = "recursive_lesson_input";
253
+ else if (/NOOP/.test(msg))
254
+ reason = "conflict_noop";
255
+ else if (/cooldown/i.test(msg))
256
+ reason = "proposal_cooldown";
257
+ else if (/content[_ ]?hash/i.test(msg))
258
+ reason = "content_hash_match";
259
+ }
260
+ metrics.actions.distill.deferredByReason[reason] =
261
+ (metrics.actions.distill.deferredByReason[reason] ?? 0) + 1;
262
+ break;
263
+ }
264
+ default:
265
+ break;
266
+ }
267
+ break;
268
+ }
269
+ case "distill-skipped": {
270
+ metrics.actions.distill.skipped += 1;
271
+ const r = action.result;
272
+ const reason = typeof r?.reason === "string" && r.reason.trim() ? r.reason : "unknown";
273
+ metrics.actions.distill.skippedByReason[reason] = (metrics.actions.distill.skippedByReason[reason] ?? 0) + 1;
274
+ break;
275
+ }
276
+ case "memory-prune":
277
+ metrics.actions.memoryPrune += 1;
278
+ break;
279
+ case "memory-inference":
280
+ metrics.actions.memoryInference += 1;
281
+ break;
282
+ case "graph-extraction":
283
+ metrics.actions.graphExtraction += 1;
284
+ break;
285
+ case "error":
286
+ metrics.actions.error += 1;
287
+ break;
288
+ }
289
+ }
290
+ }
291
+ metrics.autoAccept.promoted += toFiniteNumber(result.gateAutoAcceptedCount);
292
+ metrics.autoAccept.validationFailed += toFiniteNumber(result.gateAutoAcceptFailedCount);
293
+ metrics.reflectsWithErrorContext += toFiniteNumber(result.reflectsWithErrorContext);
294
+ if (Array.isArray(result.coverageGaps))
295
+ metrics.coverageGapCount += result.coverageGaps.length;
296
+ metrics.evalCasesWritten += toFiniteNumber(result.evalCasesWritten);
297
+ if (Array.isArray(result.deadUrls))
298
+ metrics.deadUrlCount += result.deadUrls.length;
299
+ const memorySummary = result.memorySummary;
300
+ if (memorySummary) {
301
+ metrics.memorySummary.eligible += toFiniteNumber(memorySummary.eligible);
302
+ metrics.memorySummary.derived += toFiniteNumber(memorySummary.derived);
303
+ }
304
+ const memoryCleanup = result.memoryCleanup;
305
+ if (memoryCleanup) {
306
+ if (Array.isArray(memoryCleanup.pruneCandidates))
307
+ metrics.memoryCleanup.pruneCandidates += memoryCleanup.pruneCandidates.length;
308
+ if (Array.isArray(memoryCleanup.contradictionCandidates))
309
+ metrics.memoryCleanup.contradictionCandidates += memoryCleanup.contradictionCandidates.length;
310
+ if (Array.isArray(memoryCleanup.beliefStateTransitions))
311
+ metrics.memoryCleanup.beliefStateTransitions += memoryCleanup.beliefStateTransitions.length;
312
+ if (Array.isArray(memoryCleanup.consolidationCandidates))
313
+ metrics.memoryCleanup.consolidationCandidates += memoryCleanup.consolidationCandidates.length;
314
+ if (Array.isArray(memoryCleanup.archived))
315
+ metrics.memoryCleanup.archived += memoryCleanup.archived.length;
316
+ if (Array.isArray(memoryCleanup.warnings))
317
+ metrics.memoryCleanup.warnings += memoryCleanup.warnings.length;
318
+ }
319
+ const consolidation = result.consolidation;
320
+ if (consolidation) {
321
+ metrics.consolidation.processed += toFiniteNumber(consolidation.processed);
322
+ metrics.consolidation.merged += toFiniteNumber(consolidation.merged);
323
+ metrics.consolidation.deleted += toFiniteNumber(consolidation.deleted);
324
+ metrics.consolidation.contradicted += toFiniteNumber(consolidation.contradicted);
325
+ if (Array.isArray(consolidation.promoted))
326
+ metrics.consolidation.promoted += consolidation.promoted.length;
327
+ metrics.consolidation.failedChunks += toFiniteNumber(consolidation.failedChunks);
328
+ metrics.consolidation.totalChunks += toFiniteNumber(consolidation.totalChunks);
329
+ metrics.consolidation.durationMs += toFiniteNumber(consolidation.durationMs);
330
+ metrics.consolidation.judgedNoAction += toFiniteNumber(consolidation.judgedNoAction);
331
+ metrics.consolidation.mergedSecondaries += toFiniteNumber(consolidation.mergedSecondaries);
332
+ metrics.consolidation.failedChunkMemories += toFiniteNumber(consolidation.failedChunkMemories);
333
+ // Structured emitter (new on this branch): consolidate.ts now pushes
334
+ // per-ref grouped `{ref, skips: [{op, reason}]}` entries to `skipReasons`
335
+ // for every deterministic post-LLM rejection. Each ref appears once but
336
+ // may carry multiple skips; aggregate every reason. Pre-fix envelopes have
337
+ // neither field, so be defensive.
338
+ const skipReasons = consolidation.skipReasons;
339
+ if (Array.isArray(skipReasons)) {
340
+ for (const entry of skipReasons) {
341
+ if (!entry || typeof entry !== "object")
342
+ continue;
343
+ const skips = entry.skips;
344
+ if (!Array.isArray(skips))
345
+ continue;
346
+ for (const skip of skips) {
347
+ if (!skip || typeof skip !== "object")
348
+ continue;
349
+ const reason = skip.reason;
350
+ if (typeof reason !== "string" || !reason.trim())
351
+ continue;
352
+ metrics.consolidation.skipReasons[reason] = (metrics.consolidation.skipReasons[reason] ?? 0) + 1;
353
+ }
354
+ }
355
+ }
356
+ // WS-5: extract perf telemetry from the consolidation envelope.
357
+ // Pre-WS-5 envelopes lack `perfTelemetry`; be defensive.
358
+ const perf = consolidation.perfTelemetry;
359
+ if (perf) {
360
+ metrics.perfTelemetry.runsWithTelemetry += 1;
361
+ metrics.perfTelemetry.dedupPoolSize += toFiniteNumber(perf.dedupPoolSize);
362
+ metrics.perfTelemetry.llmPoolSize += toFiniteNumber(perf.llmPoolSize);
363
+ metrics.perfTelemetry.judgedCacheSkipped += toFiniteNumber(perf.judgedCacheSkipped);
364
+ metrics.perfTelemetry.embedMs += toFiniteNumber(perf.embedMs);
365
+ metrics.perfTelemetry.embedCacheHits += toFiniteNumber(perf.embedCacheHits);
366
+ metrics.perfTelemetry.embedCacheMisses += toFiniteNumber(perf.embedCacheMisses);
367
+ const budgetFrac = toFiniteNumber(perf.estimatedBudgetFractionUsed);
368
+ if (budgetFrac > 1.0)
369
+ metrics.perfTelemetry.overBudgetRuns += 1;
370
+ }
371
+ }
372
+ const memoryInference = result.memoryInference;
373
+ if (memoryInference) {
374
+ const considered = toFiniteNumber(memoryInference.considered);
375
+ const writtenFacts = toFiniteNumber(memoryInference.writtenFacts);
376
+ metrics.memoryInference.considered += considered;
377
+ metrics.memoryInference.cacheHits += toFiniteNumber(memoryInference.cacheHits);
378
+ metrics.memoryInference.retryAttempts += toFiniteNumber(memoryInference.retryAttempts);
379
+ metrics.memoryInference.splitParents += toFiniteNumber(memoryInference.splitParents);
380
+ metrics.memoryInference.written += writtenFacts;
381
+ metrics.memoryInference.skippedNoFacts += toFiniteNumber(memoryInference.skippedNoFacts);
382
+ metrics.memoryInference.skippedChildExists += toFiniteNumber(memoryInference.skippedChildExists);
383
+ metrics.memoryInference.skippedAborted += toFiniteNumber(memoryInference.skippedAborted);
384
+ metrics.memoryInference.unaccounted += toFiniteNumber(memoryInference.unaccounted);
385
+ metrics.memoryInference.htmlErrorCount += toFiniteNumber(memoryInference.htmlErrorCount);
386
+ // Yield-rate gating: pre-cache-feature envelopes lack the `cacheHits`
387
+ // field entirely. Treating their `considered` as freshAttempts (since
388
+ // cacheHits=0) is mathematically tempting but operationally wrong —
389
+ // historical runs with the legacy schema have no cache instrumentation
390
+ // and the SUM dragged the reported rate to ~14% in local data. Only
391
+ // contribute to the yield aggregate when the envelope actually carries
392
+ // the field. See investigation 2026-05-26.
393
+ if (Object.hasOwn(memoryInference, "cacheHits")) {
394
+ metrics.memoryInference.yieldEligibleRuns += 1;
395
+ metrics.memoryInference.yieldEligibleConsidered += considered;
396
+ metrics.memoryInference.yieldEligibleWritten += writtenFacts;
397
+ }
398
+ }
399
+ metrics.memoryInference.durationMs += toFiniteNumber(result.memoryInferenceDurationMs);
400
+ const graphExtraction = result.graphExtraction;
401
+ if (graphExtraction) {
402
+ const quality = graphExtraction.quality;
403
+ if (quality)
404
+ metrics.graphExtraction.extractedFiles += toFiniteNumber(quality.extractedFiles);
405
+ metrics.graphExtraction.entities += toFiniteNumber(graphExtraction.totalEntities);
406
+ metrics.graphExtraction.relations += toFiniteNumber(graphExtraction.totalRelations);
407
+ const telemetry = graphExtraction.telemetry;
408
+ if (telemetry) {
409
+ metrics.graphExtraction.cacheHits += toFiniteNumber(telemetry.cacheHits);
410
+ metrics.graphExtraction.cacheMisses += toFiniteNumber(telemetry.cacheMisses);
411
+ metrics.graphExtraction.truncations += toFiniteNumber(telemetry.truncationCount);
412
+ metrics.graphExtraction.failures += toFiniteNumber(telemetry.failureCount);
413
+ metrics.graphExtraction.htmlErrors += toFiniteNumber(telemetry.htmlErrorCount);
414
+ metrics.graphExtraction.retryAttempts += toFiniteNumber(telemetry.retryAttempts);
415
+ metrics.graphExtraction.nonArrayBatchFailures += toFiniteNumber(telemetry.nonArrayBatchFailures);
416
+ }
417
+ }
418
+ metrics.graphExtraction.durationMs += toFiniteNumber(result.graphExtractionDurationMs);
419
+ if (Array.isArray(result.extract)) {
420
+ for (const e of result.extract) {
421
+ metrics.sessionExtraction.sessionsScanned += toFiniteNumber(e.sessionsProcessed);
422
+ metrics.sessionExtraction.sessionsSkipped += toFiniteNumber(e.sessionsSkipped);
423
+ if (Array.isArray(e.sessions)) {
424
+ metrics.sessionExtraction.sessionsExtracted += e.sessions.filter((s) => Array.isArray(s.proposalIds) && s.proposalIds.length > 0).length;
425
+ }
426
+ metrics.sessionExtraction.proposalsCreated += Array.isArray(e.proposals) ? e.proposals.length : 0;
427
+ metrics.sessionExtraction.warnings += Array.isArray(e.warnings) ? e.warnings.length : 0;
428
+ metrics.sessionExtraction.durationMs += toFiniteNumber(e.durationMs);
429
+ }
430
+ }
431
+ return metrics;
432
+ }
433
+ /**
434
+ * Finalize derived flags and rates on an accumulator. Used both for the
435
+ * window-level aggregate and for each per-run row in --detail per-run mode
436
+ * so the single-row metrics still expose `ran` / `yieldRate` / `cacheHitRate`.
437
+ */
438
+ function finalizeImproveMetrics(metrics) {
439
+ metrics.consolidation.ran =
440
+ metrics.consolidation.processed > 0 ||
441
+ metrics.consolidation.durationMs > 0 ||
442
+ metrics.consolidation.promoted > 0 ||
443
+ metrics.consolidation.merged > 0 ||
444
+ metrics.consolidation.deleted > 0 ||
445
+ metrics.consolidation.contradicted > 0 ||
446
+ metrics.consolidation.totalChunks > 0;
447
+ metrics.memoryInference.ran =
448
+ metrics.memoryInference.considered > 0 ||
449
+ metrics.memoryInference.written > 0 ||
450
+ metrics.memoryInference.durationMs > 0;
451
+ metrics.memoryInference.writes = metrics.memoryInference.written;
452
+ // Yield denominator excludes cache hits AND legacy (pre-cacheHits-field)
453
+ // envelopes. Only runs whose envelope carries a `cacheHits` field
454
+ // contribute to freshAttempts/yieldRate; legacy rows remain in
455
+ // `considered`/`written` for totals but are excluded from the rate so
456
+ // they cannot drag it down. See ImproveHealthMetrics.memoryInference
457
+ // jsdoc for the rationale.
458
+ metrics.memoryInference.freshAttempts = Math.max(0, metrics.memoryInference.yieldEligibleConsidered -
459
+ metrics.memoryInference.cacheHits -
460
+ metrics.memoryInference.skippedAborted);
461
+ metrics.memoryInference.yieldRate =
462
+ metrics.memoryInference.freshAttempts > 0
463
+ ? roundRate(metrics.memoryInference.yieldEligibleWritten / metrics.memoryInference.freshAttempts)
464
+ : 0;
465
+ metrics.graphExtraction.ran =
466
+ metrics.graphExtraction.extractedFiles > 0 ||
467
+ metrics.graphExtraction.entities > 0 ||
468
+ metrics.graphExtraction.durationMs > 0;
469
+ const cacheTotal = metrics.graphExtraction.cacheHits + metrics.graphExtraction.cacheMisses;
470
+ metrics.graphExtraction.cacheHitRate = cacheTotal > 0 ? roundRate(metrics.graphExtraction.cacheHits / cacheTotal) : 0;
471
+ metrics.sessionExtraction.ran =
472
+ metrics.sessionExtraction.sessionsScanned > 0 ||
473
+ metrics.sessionExtraction.proposalsCreated > 0 ||
474
+ metrics.sessionExtraction.durationMs > 0;
475
+ }
476
+ /**
477
+ * Merge per-row metrics from `src` into accumulator `dst`. All numeric fields
478
+ * are additive; cumulative rates are recomputed by finalizeImproveMetrics.
479
+ */
480
+ function mergeImproveMetrics(dst, src) {
481
+ dst.plannedRefs += src.plannedRefs;
482
+ // profileFilteredRefs is the count of refs the planner drops up-front for the
483
+ // active profile — recomputed against the (stable) stash every run, so it is a
484
+ // snapshot, NOT a per-run increment. Summing it re-counts the same refs each
485
+ // run (the ~2.4M bug). Set from the most recent run in summarizeImproveRuns.
486
+ dst.actions.reflect.ok += src.actions.reflect.ok;
487
+ dst.actions.reflect.failed += src.actions.reflect.failed;
488
+ dst.actions.reflect.cooldown += src.actions.reflect.cooldown;
489
+ dst.actions.reflect.skipped += src.actions.reflect.skipped;
490
+ dst.actions.reflect.guardRejected += src.actions.reflect.guardRejected;
491
+ for (const [reason, count] of Object.entries(src.actions.reflect.skippedByReason)) {
492
+ dst.actions.reflect.skippedByReason[reason] = (dst.actions.reflect.skippedByReason[reason] ?? 0) + count;
493
+ }
494
+ dst.actions.distill.queued += src.actions.distill.queued;
495
+ dst.actions.distill.llmFailed += src.actions.distill.llmFailed;
496
+ dst.actions.distill.qualityRejected += src.actions.distill.qualityRejected;
497
+ dst.actions.distill.judgeRejected += src.actions.distill.judgeRejected;
498
+ dst.actions.distill.validatorRejected += src.actions.distill.validatorRejected;
499
+ dst.actions.distill.configDisabled += src.actions.distill.configDisabled;
500
+ dst.actions.distill.skipped += src.actions.distill.skipped;
501
+ for (const [reason, count] of Object.entries(src.actions.distill.skippedByReason)) {
502
+ dst.actions.distill.skippedByReason[reason] = (dst.actions.distill.skippedByReason[reason] ?? 0) + count;
503
+ }
504
+ dst.actions.distill.deferred += src.actions.distill.deferred;
505
+ for (const [reason, count] of Object.entries(src.actions.distill.deferredByReason)) {
506
+ dst.actions.distill.deferredByReason[reason] = (dst.actions.distill.deferredByReason[reason] ?? 0) + count;
507
+ }
508
+ dst.actions.memoryPrune += src.actions.memoryPrune;
509
+ dst.actions.memoryInference += src.actions.memoryInference;
510
+ dst.actions.graphExtraction += src.actions.graphExtraction;
511
+ dst.actions.error += src.actions.error;
512
+ dst.autoAccept.promoted += src.autoAccept.promoted;
513
+ dst.autoAccept.validationFailed += src.autoAccept.validationFailed;
514
+ dst.reflectsWithErrorContext += src.reflectsWithErrorContext;
515
+ dst.coverageGapCount += src.coverageGapCount;
516
+ dst.evalCasesWritten += src.evalCasesWritten;
517
+ dst.deadUrlCount += src.deadUrlCount;
518
+ // NOTE: memorySummary (derived/eligible) is a WHOLE-STASH snapshot recorded on
519
+ // every run, NOT a per-run increment — summing it across the window inflates
520
+ // it ~N× (the 1.2M-eligible bug). It is set from the most recent run in
521
+ // summarizeImproveRuns instead, so it is intentionally not merged here.
522
+ dst.memoryCleanup.pruneCandidates += src.memoryCleanup.pruneCandidates;
523
+ dst.memoryCleanup.contradictionCandidates += src.memoryCleanup.contradictionCandidates;
524
+ dst.memoryCleanup.beliefStateTransitions += src.memoryCleanup.beliefStateTransitions;
525
+ dst.memoryCleanup.consolidationCandidates += src.memoryCleanup.consolidationCandidates;
526
+ dst.memoryCleanup.archived += src.memoryCleanup.archived;
527
+ dst.memoryCleanup.warnings += src.memoryCleanup.warnings;
528
+ dst.consolidation.processed += src.consolidation.processed;
529
+ dst.consolidation.promoted += src.consolidation.promoted;
530
+ dst.consolidation.merged += src.consolidation.merged;
531
+ dst.consolidation.deleted += src.consolidation.deleted;
532
+ dst.consolidation.contradicted += src.consolidation.contradicted;
533
+ dst.consolidation.failedChunks += src.consolidation.failedChunks;
534
+ dst.consolidation.totalChunks += src.consolidation.totalChunks;
535
+ dst.consolidation.durationMs += src.consolidation.durationMs;
536
+ dst.consolidation.judgedNoAction += src.consolidation.judgedNoAction;
537
+ dst.consolidation.mergedSecondaries += src.consolidation.mergedSecondaries;
538
+ dst.consolidation.failedChunkMemories += src.consolidation.failedChunkMemories;
539
+ for (const [reason, count] of Object.entries(src.consolidation.skipReasons)) {
540
+ dst.consolidation.skipReasons[reason] = (dst.consolidation.skipReasons[reason] ?? 0) + count;
541
+ }
542
+ dst.memoryInference.considered += src.memoryInference.considered;
543
+ dst.memoryInference.cacheHits += src.memoryInference.cacheHits;
544
+ dst.memoryInference.splitParents += src.memoryInference.splitParents;
545
+ dst.memoryInference.written += src.memoryInference.written;
546
+ dst.memoryInference.skippedNoFacts += src.memoryInference.skippedNoFacts;
547
+ dst.memoryInference.skippedChildExists += src.memoryInference.skippedChildExists;
548
+ dst.memoryInference.skippedAborted += src.memoryInference.skippedAborted;
549
+ dst.memoryInference.unaccounted += src.memoryInference.unaccounted;
550
+ dst.memoryInference.htmlErrorCount += src.memoryInference.htmlErrorCount;
551
+ dst.memoryInference.yieldEligibleRuns += src.memoryInference.yieldEligibleRuns;
552
+ dst.memoryInference.yieldEligibleConsidered += src.memoryInference.yieldEligibleConsidered;
553
+ dst.memoryInference.yieldEligibleWritten += src.memoryInference.yieldEligibleWritten;
554
+ dst.memoryInference.durationMs += src.memoryInference.durationMs;
555
+ dst.graphExtraction.extractedFiles += src.graphExtraction.extractedFiles;
556
+ dst.graphExtraction.entities += src.graphExtraction.entities;
557
+ dst.graphExtraction.relations += src.graphExtraction.relations;
558
+ dst.graphExtraction.cacheHits += src.graphExtraction.cacheHits;
559
+ dst.graphExtraction.cacheMisses += src.graphExtraction.cacheMisses;
560
+ dst.graphExtraction.truncations += src.graphExtraction.truncations;
561
+ dst.graphExtraction.failures += src.graphExtraction.failures;
562
+ dst.graphExtraction.htmlErrors += src.graphExtraction.htmlErrors;
563
+ dst.graphExtraction.nonArrayBatchFailures += src.graphExtraction.nonArrayBatchFailures;
564
+ dst.graphExtraction.durationMs += src.graphExtraction.durationMs;
565
+ dst.sessionExtraction.sessionsScanned += src.sessionExtraction.sessionsScanned;
566
+ dst.sessionExtraction.sessionsExtracted += src.sessionExtraction.sessionsExtracted;
567
+ dst.sessionExtraction.sessionsSkipped += src.sessionExtraction.sessionsSkipped;
568
+ dst.sessionExtraction.proposalsCreated += src.sessionExtraction.proposalsCreated;
569
+ dst.sessionExtraction.warnings += src.sessionExtraction.warnings;
570
+ dst.sessionExtraction.durationMs += src.sessionExtraction.durationMs;
571
+ // WS-5: merge perf telemetry (additive sums).
572
+ dst.perfTelemetry.dedupPoolSize += src.perfTelemetry.dedupPoolSize;
573
+ dst.perfTelemetry.llmPoolSize += src.perfTelemetry.llmPoolSize;
574
+ dst.perfTelemetry.judgedCacheSkipped += src.perfTelemetry.judgedCacheSkipped;
575
+ dst.perfTelemetry.embedMs += src.perfTelemetry.embedMs;
576
+ dst.perfTelemetry.embedCacheHits += src.perfTelemetry.embedCacheHits;
577
+ dst.perfTelemetry.embedCacheMisses += src.perfTelemetry.embedCacheMisses;
578
+ dst.perfTelemetry.overBudgetRuns += src.perfTelemetry.overBudgetRuns;
579
+ dst.perfTelemetry.runsWithTelemetry += src.perfTelemetry.runsWithTelemetry;
580
+ // coverage: acceptedProposals is additive; totalAssets is a snapshot (like memorySummary).
581
+ // totalAssets is intentionally NOT merged here — set from the most recent run in summarizeImproveRuns.
582
+ dst.coverage.acceptedProposals += src.coverage.acceptedProposals;
583
+ }
584
+ export function summarizeImproveRuns(db, since, until) {
585
+ const accum = createUnknownImproveMetrics();
586
+ const rows = queryImproveRuns(db, since, until);
587
+ // Per-phase wall-time samples. Each entry is one envelope's durationMs for
588
+ // that phase. Phases that did not run on a given envelope are simply
589
+ // omitted (NOT counted as 0) so the median/p95 reflect actual phase work.
590
+ const phaseDurations = {
591
+ consolidation: [],
592
+ memoryInference: [],
593
+ graphExtraction: [],
594
+ };
595
+ // memorySummary is a whole-stash snapshot per run, so the window value is the
596
+ // MOST RECENT run's snapshot (current state) — not a sum across runs.
597
+ let latestStartMs = Number.NEGATIVE_INFINITY;
598
+ let latestMemorySummary;
599
+ let latestProfileFilteredRefs = 0;
600
+ for (const row of rows) {
601
+ let result;
602
+ try {
603
+ result = JSON.parse(row.result_json);
604
+ }
605
+ catch {
606
+ continue;
607
+ }
608
+ const perRow = projectRunMetrics(result);
609
+ mergeImproveMetrics(accum, perRow);
610
+ const startMs = new Date(row.started_at).getTime();
611
+ if (Number.isFinite(startMs) && startMs >= latestStartMs) {
612
+ latestStartMs = startMs;
613
+ latestMemorySummary = perRow.memorySummary;
614
+ latestProfileFilteredRefs = perRow.profileFilteredRefs;
615
+ }
616
+ // Collect per-phase durations directly off the envelope. consolidation's
617
+ // duration lives inside the sub-object; memoryInference and graphExtraction
618
+ // expose top-level *DurationMs keys (`memoryInferenceDurationMs`,
619
+ // `graphExtractionDurationMs`) when they actually ran on that envelope.
620
+ const consol = result.consolidation;
621
+ const consolMs = toFiniteNumber(consol?.durationMs);
622
+ if (consolMs > 0)
623
+ phaseDurations.consolidation.push(consolMs);
624
+ const memMs = toFiniteNumber(result.memoryInferenceDurationMs);
625
+ if (memMs > 0)
626
+ phaseDurations.memoryInference.push(memMs);
627
+ const graphMs = toFiniteNumber(result.graphExtractionDurationMs);
628
+ if (graphMs > 0)
629
+ phaseDurations.graphExtraction.push(graphMs);
630
+ }
631
+ finalizeImproveMetrics(accum);
632
+ if (latestMemorySummary)
633
+ accum.memorySummary = latestMemorySummary;
634
+ accum.profileFilteredRefs = latestProfileFilteredRefs;
635
+ accum.wallTime.byPhase = {
636
+ consolidation: summarizePhaseDurations(phaseDurations.consolidation),
637
+ memoryInference: summarizePhaseDurations(phaseDurations.memoryInference),
638
+ graphExtraction: summarizePhaseDurations(phaseDurations.graphExtraction),
639
+ };
640
+ return { metrics: accum, runCount: rows.length };
641
+ }
642
+ /**
643
+ * Aggregate a list of per-envelope phase durations into the
644
+ * `wallTime.byPhase.*` shape: count, total, median, p95. Median/p95 use the
645
+ * same nearest-rank picker as the top-level wallTime stats so the two are
646
+ * comparable.
647
+ */
648
+ export function summarizePhaseDurations(samples) {
649
+ if (samples.length === 0)
650
+ return { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 };
651
+ const sorted = [...samples].sort((a, b) => a - b);
652
+ const pick = (q) => sorted[Math.min(sorted.length - 1, Math.floor(q * sorted.length))] ?? 0;
653
+ const totalMs = sorted.reduce((acc, n) => acc + n, 0);
654
+ return {
655
+ count: sorted.length,
656
+ totalMs,
657
+ medianMs: pick(0.5),
658
+ p95Ms: pick(0.95),
659
+ };
660
+ }
661
+ /**
662
+ * Project an improve_runs row + wall-time lookup into a single ImproveRunSummary.
663
+ * Used by `akm health --detail per-run`.
664
+ */
665
+ export function projectImproveRunSummary(row, wallTimeMs, taskId) {
666
+ let result = {};
667
+ try {
668
+ result = JSON.parse(row.result_json);
669
+ }
670
+ catch {
671
+ // fall through with empty result so per-stage rollups are zeros
672
+ }
673
+ const perRow = projectRunMetrics(result);
674
+ finalizeImproveMetrics(perRow);
675
+ const orphansPurged = toFiniteNumber(result.orphansPurged);
676
+ const lintSummary = result.lintSummary;
677
+ const lintFixed = lintSummary ? toFiniteNumber(lintSummary.fixed) : 0;
678
+ const lintFlagged = lintSummary ? toFiniteNumber(lintSummary.flagged) : 0;
679
+ return {
680
+ id: row.id,
681
+ startedAt: row.started_at,
682
+ completedAt: row.completed_at,
683
+ wallTimeMs,
684
+ ok: row.ok === 1,
685
+ scope: {
686
+ mode: row.scope_mode,
687
+ ...(row.scope_value ? { value: row.scope_value } : {}),
688
+ },
689
+ taskId,
690
+ actions: perRow.actions,
691
+ memorySummary: perRow.memorySummary,
692
+ memoryCleanup: perRow.memoryCleanup,
693
+ consolidation: perRow.consolidation,
694
+ memoryInference: perRow.memoryInference,
695
+ graphExtraction: perRow.graphExtraction,
696
+ reflectsWithErrorContext: perRow.reflectsWithErrorContext,
697
+ evalCasesWritten: perRow.evalCasesWritten,
698
+ orphansPurged,
699
+ lintFixed,
700
+ lintFlagged,
701
+ };
702
+ }
703
+ function emptyPhaseStats() {
704
+ return {
705
+ consolidation: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
706
+ memoryInference: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
707
+ graphExtraction: { count: 0, totalMs: 0, medianMs: 0, p95Ms: 0 },
708
+ };
709
+ }
710
+ export function computeWallTimeStats(durationsMs, byPhase) {
711
+ const phase = byPhase ?? emptyPhaseStats();
712
+ if (durationsMs.length === 0)
713
+ return { count: 0, medianMs: 0, p95Ms: 0, minMs: 0, maxMs: 0, byPhase: phase };
714
+ const sorted = [...durationsMs].sort((a, b) => a - b);
715
+ const pick = (q) => sorted[Math.min(sorted.length - 1, Math.floor(q * sorted.length))] ?? 0;
716
+ return {
717
+ count: sorted.length,
718
+ medianMs: pick(0.5),
719
+ p95Ms: pick(0.95),
720
+ minMs: sorted[0] ?? 0,
721
+ maxMs: sorted[sorted.length - 1] ?? 0,
722
+ byPhase: phase,
723
+ };
724
+ }
725
+ export function buildImproveSkipSummary(events) {
726
+ // Two kinds of skip events:
727
+ // - Per-occurrence (no `count`): one event per skipped ref → SUM is correct.
728
+ // - Aggregated snapshot (carries `count`): a single per-run event whose count
729
+ // is the number of refs that hit a STABLE, whole-stash condition that run
730
+ // (`no_new_signal`, `profile_filtered_all_passes`). Each run re-counts the
731
+ // same stable set, so summing across the window re-counts it N times (the
732
+ // 2.7M / 3M inflation). For these we keep the MOST RECENT run's count — the
733
+ // current snapshot — matching how memorySummary/profileFilteredRefs are
734
+ // handled. Events arrive in chronological (offset) order, so the last
735
+ // count-bearing event per reason is the latest run's value.
736
+ const summed = {};
737
+ const latestSnapshot = {};
738
+ for (const event of events) {
739
+ const reason = typeof event.metadata?.reason === "string" && event.metadata.reason.trim() ? event.metadata.reason : "unknown";
740
+ const rawCount = event.metadata?.count;
741
+ if (typeof rawCount === "number" && Number.isFinite(rawCount) && rawCount > 0) {
742
+ latestSnapshot[reason] = rawCount; // overwrite → keeps the latest run's snapshot
743
+ }
744
+ else {
745
+ summed[reason] = (summed[reason] ?? 0) + 1;
746
+ }
747
+ }
748
+ const skipReasons = { ...summed };
749
+ for (const [reason, count] of Object.entries(latestSnapshot)) {
750
+ skipReasons[reason] = (skipReasons[reason] ?? 0) + count;
751
+ }
752
+ const skipped = Object.values(skipReasons).reduce((a, b) => a + b, 0);
753
+ return { skipped, skipReasons };
754
+ }