akm-cli 0.9.0-beta.1 → 0.9.0-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +128 -0
- package/dist/assets/templates/html/default.html +78 -0
- package/dist/assets/templates/html/health.html +560 -0
- package/dist/assets/templates/html/vendor/echarts.min.js +45 -0
- package/dist/cli/shared.js +21 -5
- package/dist/cli.js +36 -5
- package/dist/commands/config-cli.js +0 -10
- package/dist/commands/health/html-report.js +448 -0
- package/dist/commands/health.js +97 -6
- package/dist/commands/improve/extract.js +38 -2
- package/dist/commands/improve/improve-auto-accept.js +27 -1
- package/dist/commands/improve/improve-cli.js +7 -0
- package/dist/commands/improve/improve.js +201 -66
- package/dist/commands/improve/reflect-noise.js +0 -0
- package/dist/commands/improve/reflect.js +25 -0
- package/dist/commands/proposal/drain.js +73 -6
- package/dist/commands/proposal/proposal-cli.js +22 -10
- package/dist/commands/proposal/proposal.js +12 -1
- package/dist/commands/proposal/validators/proposals.js +361 -338
- package/dist/commands/remember.js +6 -2
- package/dist/commands/tasks/tasks.js +32 -8
- package/dist/core/config/config-schema.js +5 -0
- package/dist/core/logs-db.js +304 -0
- package/dist/core/state-db.js +107 -14
- package/dist/indexer/db/db.js +2 -2
- package/dist/indexer/passes/memory-inference.js +61 -22
- package/dist/integrations/harnesses/claude/session-log.js +16 -4
- package/dist/llm/client.js +15 -0
- package/dist/llm/usage-persist.js +77 -0
- package/dist/llm/usage-telemetry.js +103 -0
- package/dist/output/context.js +3 -2
- package/dist/output/html-render.js +73 -0
- package/dist/output/shapes/helpers.js +17 -1
- package/dist/output/text/helpers.js +69 -1
- package/dist/scripts/migrate-storage.js +65 -14
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +14 -2
- package/dist/tasks/backends/cron.js +46 -9
- package/dist/tasks/runner.js +99 -16
- package/dist/workflows/db.js +4 -0
- package/package.json +1 -1
- package/dist/commands/config-edit.js +0 -344
|
@@ -119,6 +119,26 @@ export async function runMemoryInferencePass(ctx) {
|
|
|
119
119
|
// 2026-05-26).
|
|
120
120
|
if (signal?.aborted)
|
|
121
121
|
return { aborted: true };
|
|
122
|
+
// Pre-check (#588): when `<parent>.derived.md` is already on disk the
|
|
123
|
+
// inference is by definition complete — the parent only looks pending
|
|
124
|
+
// because `markParentProcessed` never ran (process killed between the
|
|
125
|
+
// child write and the mark) or the child was created externally (e.g.
|
|
126
|
+
// consolidation). Skip the LLM/cache call entirely and mark the parent
|
|
127
|
+
// so it never re-pends. Before this check, production measurements
|
|
128
|
+
// showed ~55% of the pass's LLM budget re-deriving such parents only to
|
|
129
|
+
// discover the existing child after the fact.
|
|
130
|
+
if (fs.existsSync(derivedChildPath(record))) {
|
|
131
|
+
markParentProcessed(record);
|
|
132
|
+
return {
|
|
133
|
+
skipped: false,
|
|
134
|
+
splitParent: false,
|
|
135
|
+
written: 0,
|
|
136
|
+
fromCache: false,
|
|
137
|
+
retryAttempts: 0,
|
|
138
|
+
childExists: true,
|
|
139
|
+
precheck: true,
|
|
140
|
+
};
|
|
141
|
+
}
|
|
122
142
|
// Incremental cache: skip LLM call when body hash is unchanged and
|
|
123
143
|
// --re-enrich was not requested. The cache ref is the absolute file path.
|
|
124
144
|
const validate = (raw) => {
|
|
@@ -171,23 +191,30 @@ export async function runMemoryInferencePass(ctx) {
|
|
|
171
191
|
return { skipped: false, splitParent: true, written: writeOutcome.written, fromCache, retryAttempts };
|
|
172
192
|
}
|
|
173
193
|
// LLM produced a valid derived draft but no file was written — either
|
|
174
|
-
// because `<parent>.derived.md`
|
|
175
|
-
//
|
|
176
|
-
//
|
|
177
|
-
// into the freshAttempts
|
|
194
|
+
// because `<parent>.derived.md` appeared on disk after the pre-check
|
|
195
|
+
// above (a rare mid-flight race) or `writeAssetToSource` threw.
|
|
196
|
+
// Categorise as `childExists` so the consumed attempt is accounted for
|
|
197
|
+
// in health metrics rather than vanishing into the freshAttempts
|
|
198
|
+
// denominator.
|
|
178
199
|
//
|
|
179
|
-
// When the child
|
|
180
|
-
//
|
|
181
|
-
//
|
|
182
|
-
// (
|
|
183
|
-
//
|
|
184
|
-
//
|
|
185
|
-
// should be retried next run — so we key off the explicit `childExists`
|
|
186
|
-
// outcome rather than the conflated `written === 0`.
|
|
200
|
+
// When the child exists the inference is, by definition, complete — so
|
|
201
|
+
// mark the parent processed here too (#550), otherwise
|
|
202
|
+
// `isPendingMemory()` re-queues the same parent every run. A genuine
|
|
203
|
+
// write *failure* (`writeAssetToSource` threw) must NOT mark the parent
|
|
204
|
+
// — it should be retried next run — so we key off the explicit
|
|
205
|
+
// `childExists` outcome rather than the conflated `written === 0`.
|
|
187
206
|
if (writeOutcome.childExists) {
|
|
188
207
|
markParentProcessed(record);
|
|
189
208
|
}
|
|
190
|
-
return {
|
|
209
|
+
return {
|
|
210
|
+
skipped: false,
|
|
211
|
+
splitParent: false,
|
|
212
|
+
written: 0,
|
|
213
|
+
fromCache,
|
|
214
|
+
retryAttempts,
|
|
215
|
+
childExists: true,
|
|
216
|
+
precheck: false,
|
|
217
|
+
};
|
|
191
218
|
},
|
|
192
219
|
// Default concurrency of 4 for cloud APIs. Set `llm.concurrency: 1`
|
|
193
220
|
// in config.json for local model servers (LM Studio, Ollama).
|
|
@@ -224,11 +251,16 @@ export async function runMemoryInferencePass(ctx) {
|
|
|
224
251
|
result.writtenFacts += res.written;
|
|
225
252
|
}
|
|
226
253
|
else if ("childExists" in res && res.childExists) {
|
|
227
|
-
//
|
|
228
|
-
//
|
|
229
|
-
//
|
|
254
|
+
// Derived child already on disk. Track separately so this category is
|
|
255
|
+
// observable in health output and stops bleeding into the
|
|
256
|
+
// freshAttempts denominator. Pre-check skips (#588) are the routine
|
|
257
|
+
// self-healing path — no LLM attempt was consumed and the parent has
|
|
258
|
+
// been marked processed — so only the rare post-LLM case (mid-flight
|
|
259
|
+
// race or write failure) warrants a per-ref warning.
|
|
230
260
|
result.skippedChildExists += 1;
|
|
231
|
-
|
|
261
|
+
if (!res.precheck) {
|
|
262
|
+
warn(`memory inference: derived child for ${pending[i]?.ref ?? "<unknown>"} already existed or write failed; counted as skippedChildExists`);
|
|
263
|
+
}
|
|
232
264
|
}
|
|
233
265
|
else {
|
|
234
266
|
// The per-record state machine should cover every outcome. A hit here
|
|
@@ -324,6 +356,14 @@ function toMemoryName(memoriesDir, filePath) {
|
|
|
324
356
|
// user has organised under memories/.
|
|
325
357
|
return rel.replace(/\\/g, "/").replace(/\.md$/i, "");
|
|
326
358
|
}
|
|
359
|
+
/**
|
|
360
|
+
* Absolute path of the derived child for a parent memory. Single source of
|
|
361
|
+
* truth for the `<parent>.derived.md` naming convention — used both by the
|
|
362
|
+
* pre-LLM existence check (#588) and the write path.
|
|
363
|
+
*/
|
|
364
|
+
function derivedChildPath(parent) {
|
|
365
|
+
return path.join(parent.stashRoot, "memories", `${parent.name}.derived.md`);
|
|
366
|
+
}
|
|
327
367
|
async function writeDerivedMemory(parent, derived) {
|
|
328
368
|
const writeTarget = {
|
|
329
369
|
kind: "filesystem",
|
|
@@ -338,11 +378,10 @@ async function writeDerivedMemory(parent, derived) {
|
|
|
338
378
|
};
|
|
339
379
|
const childName = `${parent.name}.derived`;
|
|
340
380
|
const childRefStr = `memory:${childName}`;
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
//
|
|
344
|
-
//
|
|
345
|
-
// (#550) instead of re-queueing it forever.
|
|
381
|
+
if (fs.existsSync(derivedChildPath(parent))) {
|
|
382
|
+
// The derived child appeared on disk after the caller's pre-check (#588)
|
|
383
|
+
// — a rare mid-flight race. Report `childExists` so the caller marks the
|
|
384
|
+
// parent processed (#550) instead of re-queueing it forever.
|
|
346
385
|
return { written: 0, childExists: true };
|
|
347
386
|
}
|
|
348
387
|
try {
|
|
@@ -5,7 +5,19 @@ import fs from "node:fs";
|
|
|
5
5
|
import os from "node:os";
|
|
6
6
|
import path from "node:path";
|
|
7
7
|
import { extractInlineRefMentions } from "../../session-logs/inline-refs.js";
|
|
8
|
-
|
|
8
|
+
/**
|
|
9
|
+
* Root directory holding Claude Code's per-project JSONL session logs.
|
|
10
|
+
*
|
|
11
|
+
* Resolved per call (not memoized at module load) so the `AKM_CLAUDE_PROJECTS_DIR`
|
|
12
|
+
* override can be set after import. The override exists so tests — and the
|
|
13
|
+
* isolated-storage sandbox — can point the scan at an empty fixture directory
|
|
14
|
+
* instead of the real `~/.claude/projects`, which on an actively-used machine
|
|
15
|
+
* holds many large session files and would make `akm health` (which scans it
|
|
16
|
+
* synchronously) slow and non-hermetic.
|
|
17
|
+
*/
|
|
18
|
+
function claudeProjectsDir() {
|
|
19
|
+
return process.env.AKM_CLAUDE_PROJECTS_DIR ?? path.join(os.homedir(), ".claude", "projects");
|
|
20
|
+
}
|
|
9
21
|
/**
|
|
10
22
|
* Parse a single Claude Code JSONL event into a normalized {@link SessionEvent}.
|
|
11
23
|
* Returns `undefined` for events that don't carry textual content (file
|
|
@@ -93,11 +105,11 @@ export class ClaudeCodeProvider {
|
|
|
93
105
|
// HARNESS_BY_ID.get("claude").runtimeId.
|
|
94
106
|
name = "claude-code";
|
|
95
107
|
isAvailable() {
|
|
96
|
-
return fs.existsSync(
|
|
108
|
+
return fs.existsSync(claudeProjectsDir());
|
|
97
109
|
}
|
|
98
110
|
*readEvents(input) {
|
|
99
111
|
try {
|
|
100
|
-
for (const jsonlPath of this.#walkJsonl(
|
|
112
|
+
for (const jsonlPath of this.#walkJsonl(claudeProjectsDir())) {
|
|
101
113
|
const stat = fs.statSync(jsonlPath);
|
|
102
114
|
if (stat.mtimeMs < input.sinceMs)
|
|
103
115
|
continue;
|
|
@@ -128,7 +140,7 @@ export class ClaudeCodeProvider {
|
|
|
128
140
|
}
|
|
129
141
|
}
|
|
130
142
|
listSessions(input = {}) {
|
|
131
|
-
const root = input.location ??
|
|
143
|
+
const root = input.location ?? claudeProjectsDir();
|
|
132
144
|
const sinceMs = input.sinceMs ?? 0;
|
|
133
145
|
const summaries = [];
|
|
134
146
|
try {
|
package/dist/llm/client.js
CHANGED
|
@@ -14,6 +14,7 @@ import { fetchWithTimeout } from "../core/common.js";
|
|
|
14
14
|
import { resolveSecret } from "../core/config/config.js";
|
|
15
15
|
import { escapeJsonStringControls, parseJsonResponse, stripCodeFences, stripThinkBlocks } from "../core/parse.js";
|
|
16
16
|
import { warnVerbose } from "../core/warn.js";
|
|
17
|
+
import { emitLlmUsage, extractUsageTokens } from "./usage-telemetry.js";
|
|
17
18
|
// Re-export shared parse utilities so existing importers of `client.ts` continue
|
|
18
19
|
// to resolve `parseJsonResponse` and `parseEmbeddedJsonResponse` from this module.
|
|
19
20
|
export { escapeJsonStringControls, parseEmbeddedJsonResponse, parseJsonResponse, stripCodeFences, stripThinkBlocks, } from "../core/parse.js";
|
|
@@ -179,6 +180,10 @@ async function chatCompletionAttempt(config, messages, options, timeoutMs) {
|
|
|
179
180
|
const responseFormat = options?.responseSchema && config.supportsJsonSchema
|
|
180
181
|
? { response_format: { type: "json_schema", json_schema: { schema: options.responseSchema, strict: true } } }
|
|
181
182
|
: {};
|
|
183
|
+
// Wall-clock start for per-call usage telemetry (#576). Captured here so the
|
|
184
|
+
// emitted duration covers the full request/response/parse cycle of a single
|
|
185
|
+
// attempt, not the retry-wrapping `chatCompletion`.
|
|
186
|
+
const requestStartedAt = Date.now();
|
|
182
187
|
let response;
|
|
183
188
|
try {
|
|
184
189
|
response = await fetchWithTimeout(config.endpoint, {
|
|
@@ -241,6 +246,16 @@ async function chatCompletionAttempt(config, messages, options, timeoutMs) {
|
|
|
241
246
|
catch {
|
|
242
247
|
throw new LlmCallError(`LLM response was not valid JSON ${config.endpoint}: ${redactErrorBody(rawOkBody)}`, "parse_error", response.status);
|
|
243
248
|
}
|
|
249
|
+
// Per-call usage telemetry (#576). Best-effort and fully isolated: a missing
|
|
250
|
+
// or garbled usage block still records duration + model, and a throwing sink
|
|
251
|
+
// can never fail the call (emitLlmUsage swallows its own errors). The stage
|
|
252
|
+
// is supplied ambiently by emitLlmUsage; no `stage` param is threaded here.
|
|
253
|
+
emitLlmUsage({
|
|
254
|
+
model: typeof json.model === "string" && json.model ? json.model : config.model,
|
|
255
|
+
durationMs: Date.now() - requestStartedAt,
|
|
256
|
+
finishReason: typeof json.choices?.[0]?.finish_reason === "string" ? json.choices[0].finish_reason : undefined,
|
|
257
|
+
...extractUsageTokens(json.usage),
|
|
258
|
+
});
|
|
244
259
|
const content = (json.choices?.[0]?.message?.content ?? "").trim();
|
|
245
260
|
const reasoning = (json.choices?.[0]?.message?.reasoning_content ?? "").trim();
|
|
246
261
|
return content || reasoning;
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
|
+
/**
|
|
5
|
+
* Bridge per-call LLM usage telemetry (#576) to the events stream.
|
|
6
|
+
*
|
|
7
|
+
* `usage-telemetry.ts` stays dependency-free of the events/db layer so the
|
|
8
|
+
* low-level `client.ts` never imports persistence. This module is the wiring:
|
|
9
|
+
* it installs a {@link LlmUsageSink} that persists each {@link LlmUsageRecord}
|
|
10
|
+
* as one `llm_usage` event.
|
|
11
|
+
*
|
|
12
|
+
* Why reuse the events table (vs a dedicated table): volume is low (~100
|
|
13
|
+
* calls/day), the records are append-only and time-windowed exactly like every
|
|
14
|
+
* other event, and `akm health` already aggregates per-window event reads — a
|
|
15
|
+
* separate table would duplicate retention (`purgeOldEvents`), reads, and
|
|
16
|
+
* migration surface for no benefit. See the commit message for #576.
|
|
17
|
+
*
|
|
18
|
+
* Every record is written through `appendEvent`, which is itself best-effort
|
|
19
|
+
* (a write failure logs once and never throws). Combined with the sink-error
|
|
20
|
+
* swallowing in `emitLlmUsage`, telemetry can never break a real run.
|
|
21
|
+
*/
|
|
22
|
+
import { appendEvent } from "../core/events.js";
|
|
23
|
+
import { clearLlmUsageSink, hasLlmUsageSink, setLlmUsageSink } from "./usage-telemetry.js";
|
|
24
|
+
/** Event type for persisted per-call LLM usage telemetry. */
|
|
25
|
+
export const LLM_USAGE_EVENT = "llm_usage";
|
|
26
|
+
/**
|
|
27
|
+
* Project a usage record into event metadata, dropping `undefined` token
|
|
28
|
+
* fields so an absent-usage call records only `{stage, model, durationMs}`.
|
|
29
|
+
*/
|
|
30
|
+
function toEventMetadata(record) {
|
|
31
|
+
const metadata = { durationMs: record.durationMs };
|
|
32
|
+
if (record.stage !== undefined)
|
|
33
|
+
metadata.stage = record.stage;
|
|
34
|
+
if (record.model !== undefined)
|
|
35
|
+
metadata.model = record.model;
|
|
36
|
+
if (record.finishReason !== undefined)
|
|
37
|
+
metadata.finishReason = record.finishReason;
|
|
38
|
+
if (record.promptTokens !== undefined)
|
|
39
|
+
metadata.promptTokens = record.promptTokens;
|
|
40
|
+
if (record.completionTokens !== undefined)
|
|
41
|
+
metadata.completionTokens = record.completionTokens;
|
|
42
|
+
if (record.totalTokens !== undefined)
|
|
43
|
+
metadata.totalTokens = record.totalTokens;
|
|
44
|
+
if (record.reasoningTokens !== undefined)
|
|
45
|
+
metadata.reasoningTokens = record.reasoningTokens;
|
|
46
|
+
return metadata;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Install a usage sink that persists each LLM call as an `llm_usage` event via
|
|
50
|
+
* `appendEvent`. Returns a disposer that clears the sink — call it in a
|
|
51
|
+
* `finally` block so per-run wiring does not leak across runs (and so the
|
|
52
|
+
* test-isolation harness sees a clean sink between tests).
|
|
53
|
+
*
|
|
54
|
+
* `ctx` should carry the same long-lived `state.db` handle the caller already
|
|
55
|
+
* opened for its other events; when omitted, `appendEvent` falls back to its
|
|
56
|
+
* default open-insert-close path.
|
|
57
|
+
*/
|
|
58
|
+
export function installLlmUsagePersistence(ctx) {
|
|
59
|
+
setLlmUsageSink((record) => {
|
|
60
|
+
appendEvent({ eventType: LLM_USAGE_EVENT, metadata: toEventMetadata(record) }, ctx);
|
|
61
|
+
});
|
|
62
|
+
return () => {
|
|
63
|
+
clearLlmUsageSink();
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Like {@link installLlmUsagePersistence}, but a no-op when a sink is already
|
|
68
|
+
* installed — used by standalone entry points (`akm consolidate`, `akm drain`)
|
|
69
|
+
* that may also run as a sub-step of `akm improve`. When invoked inside an
|
|
70
|
+
* enclosing run the existing per-run sink keeps ownership; the returned
|
|
71
|
+
* disposer then does nothing, so the enclosing run's `finally` still clears it.
|
|
72
|
+
*/
|
|
73
|
+
export function installLlmUsagePersistenceIfAbsent(ctx) {
|
|
74
|
+
if (hasLlmUsageSink())
|
|
75
|
+
return () => { };
|
|
76
|
+
return installLlmUsagePersistence(ctx);
|
|
77
|
+
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
|
+
/**
|
|
5
|
+
* Per-call LLM usage telemetry (#576).
|
|
6
|
+
*
|
|
7
|
+
* `chatCompletion` captures usage + model + finish_reason + wall-time for
|
|
8
|
+
* EVERY OpenAI-compatible call and emits one {@link LlmUsageRecord} through a
|
|
9
|
+
* module-level sink. The sink indirection keeps `client.ts` free of any
|
|
10
|
+
* dependency on the events/db layer: the application wires the sink to
|
|
11
|
+
* persistence at startup / per improve run, and tests can inspect records in
|
|
12
|
+
* memory.
|
|
13
|
+
*
|
|
14
|
+
* The pipeline *stage* that made the call is ambient, not threaded through
|
|
15
|
+
* call sites. A param-threading prototype was deliberately discarded in 0.8.5
|
|
16
|
+
* (every call site would have to forward a `stage` argument it does not care
|
|
17
|
+
* about). Instead callers wrap a well-delimited phase once with
|
|
18
|
+
* {@link withLlmStage}; any `chatCompletion` invoked inside that async region —
|
|
19
|
+
* however deeply nested — is attributed to that stage via `AsyncLocalStorage`.
|
|
20
|
+
*
|
|
21
|
+
* EVERYTHING here is best-effort. Telemetry must NEVER break a real LLM call:
|
|
22
|
+
* a sink that throws, an unset stage, or a malformed usage block all degrade
|
|
23
|
+
* silently. `emitLlmUsage` swallows sink errors; `currentLlmStage` returns
|
|
24
|
+
* `undefined` outside any `withLlmStage` scope.
|
|
25
|
+
*/
|
|
26
|
+
import { AsyncLocalStorage } from "node:async_hooks";
|
|
27
|
+
const stageStorage = new AsyncLocalStorage();
|
|
28
|
+
let usageSink;
|
|
29
|
+
/**
|
|
30
|
+
* Run `fn` with `stage` as the ambient LLM stage. Any `chatCompletion` call
|
|
31
|
+
* made synchronously or asynchronously within `fn` (including through awaited
|
|
32
|
+
* helpers and nested `withLlmStage` calls — the innermost wins) is attributed
|
|
33
|
+
* to `stage`. Returns whatever `fn` returns; never alters control flow.
|
|
34
|
+
*/
|
|
35
|
+
export function withLlmStage(stage, fn) {
|
|
36
|
+
return stageStorage.run(stage, fn);
|
|
37
|
+
}
|
|
38
|
+
/** The ambient LLM stage for the current async context, or `undefined` outside any {@link withLlmStage} scope. */
|
|
39
|
+
export function currentLlmStage() {
|
|
40
|
+
return stageStorage.getStore();
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Install the process-wide usage sink. Replaces any previously installed sink.
|
|
44
|
+
* The application wires this to persistence; tests install an in-memory
|
|
45
|
+
* collector. Pair with {@link clearLlmUsageSink} in a `finally` block.
|
|
46
|
+
*/
|
|
47
|
+
export function setLlmUsageSink(sink) {
|
|
48
|
+
usageSink = sink;
|
|
49
|
+
}
|
|
50
|
+
/** Remove the installed sink so subsequent calls emit nowhere. Idempotent. */
|
|
51
|
+
export function clearLlmUsageSink() {
|
|
52
|
+
usageSink = undefined;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Whether a usage sink is currently installed. Standalone entry points use
|
|
56
|
+
* this to avoid clobbering a sink an enclosing run (e.g. `akm improve`) already
|
|
57
|
+
* installed: they install their own only when none is active.
|
|
58
|
+
*/
|
|
59
|
+
export function hasLlmUsageSink() {
|
|
60
|
+
return usageSink !== undefined;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Emit one usage record to the installed sink, stamping the ambient stage.
|
|
64
|
+
* Best-effort: no sink is a no-op, and a sink that throws is swallowed so
|
|
65
|
+
* telemetry can never fail the LLM call that produced it.
|
|
66
|
+
*/
|
|
67
|
+
export function emitLlmUsage(record) {
|
|
68
|
+
const sink = usageSink;
|
|
69
|
+
if (!sink)
|
|
70
|
+
return;
|
|
71
|
+
try {
|
|
72
|
+
sink({ ...record, stage: record.stage ?? currentLlmStage() });
|
|
73
|
+
}
|
|
74
|
+
catch {
|
|
75
|
+
// Telemetry must never break a real run.
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
function asFiniteNonNegative(value) {
|
|
79
|
+
return typeof value === "number" && Number.isFinite(value) && value >= 0 ? value : undefined;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Project a provider `usage` block into the token fields of an
|
|
83
|
+
* {@link LlmUsageRecord}. Missing or garbled values are omitted (not zeroed)
|
|
84
|
+
* so a best-effort record still distinguishes "0 tokens" from "unknown".
|
|
85
|
+
*/
|
|
86
|
+
export function extractUsageTokens(usage) {
|
|
87
|
+
if (!usage || typeof usage !== "object")
|
|
88
|
+
return {};
|
|
89
|
+
const out = {};
|
|
90
|
+
const prompt = asFiniteNonNegative(usage.prompt_tokens);
|
|
91
|
+
const completion = asFiniteNonNegative(usage.completion_tokens);
|
|
92
|
+
const total = asFiniteNonNegative(usage.total_tokens);
|
|
93
|
+
const reasoning = asFiniteNonNegative(usage.completion_tokens_details?.reasoning_tokens);
|
|
94
|
+
if (prompt !== undefined)
|
|
95
|
+
out.promptTokens = prompt;
|
|
96
|
+
if (completion !== undefined)
|
|
97
|
+
out.completionTokens = completion;
|
|
98
|
+
if (total !== undefined)
|
|
99
|
+
out.totalTokens = total;
|
|
100
|
+
if (reasoning !== undefined)
|
|
101
|
+
out.reasoningTokens = reasoning;
|
|
102
|
+
return out;
|
|
103
|
+
}
|
package/dist/output/context.js
CHANGED
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
* Initialized from `cli.ts` before `runMain`.
|
|
13
13
|
*/
|
|
14
14
|
import { UsageError } from "../core/errors.js";
|
|
15
|
-
export const OUTPUT_FORMATS = ["json", "yaml", "text", "jsonl", "md"];
|
|
15
|
+
export const OUTPUT_FORMATS = ["json", "yaml", "text", "jsonl", "md", "html"];
|
|
16
16
|
export const DETAIL_LEVELS = ["brief", "normal", "full"];
|
|
17
17
|
export const SHAPE_MODES = ["human", "agent", "summary"];
|
|
18
18
|
export function parseOutputFormat(value) {
|
|
@@ -80,7 +80,8 @@ export function resolveOutputMode(argv, defaults = {}) {
|
|
|
80
80
|
// use `--shape`. Unknown `--detail` values fall through to the default.
|
|
81
81
|
const detail = parseDetailLevel(rawDetail) ?? defaults?.detail ?? "brief";
|
|
82
82
|
const shape = parseShapeMode(rawShape) ?? "human";
|
|
83
|
-
|
|
83
|
+
const outputPath = parseFlagValue(argv, "--output");
|
|
84
|
+
return { format, detail, shape, forAgent: shape === "agent", ...(outputPath ? { outputPath } : {}) };
|
|
84
85
|
}
|
|
85
86
|
let _mode;
|
|
86
87
|
/**
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
|
+
/**
|
|
5
|
+
* `--format html` rendering primitives (#582).
|
|
6
|
+
*
|
|
7
|
+
* Templates live in `src/assets/templates/html/` (mirrored to
|
|
8
|
+
* `dist/assets/templates/html/` by `scripts/copy-assets.ts`). A command with a
|
|
9
|
+
* bespoke template ships `<command>.html`; every other command falls back to
|
|
10
|
+
* `default.html`, which renders the command's JSON envelope in a `<pre>`
|
|
11
|
+
* block. Substitution is plain `%%TOKEN%%` string replacement — no template
|
|
12
|
+
* engine, by design.
|
|
13
|
+
*/
|
|
14
|
+
import fs from "node:fs";
|
|
15
|
+
import path from "node:path";
|
|
16
|
+
import { getDirname } from "../runtime.js";
|
|
17
|
+
const TEMPLATES_DIR = path.join(getDirname(import.meta.url), "../assets/templates/html");
|
|
18
|
+
/** Template used by every command without a bespoke `<command>.html`. */
|
|
19
|
+
export const DEFAULT_TEMPLATE = "default";
|
|
20
|
+
/**
|
|
21
|
+
* Resolve the on-disk template path for a command. `<command>.html` when the
|
|
22
|
+
* command ships a bespoke template (today: `health`), otherwise
|
|
23
|
+
* `default.html`. Command names are sanitized to a bare basename so a hostile
|
|
24
|
+
* command string can never escape the templates directory.
|
|
25
|
+
*/
|
|
26
|
+
export function resolveTemplatePath(command) {
|
|
27
|
+
const name = path.basename(command.trim());
|
|
28
|
+
const candidate = path.join(TEMPLATES_DIR, `${name}.html`);
|
|
29
|
+
if (name !== DEFAULT_TEMPLATE && fs.existsSync(candidate))
|
|
30
|
+
return candidate;
|
|
31
|
+
return path.join(TEMPLATES_DIR, `${DEFAULT_TEMPLATE}.html`);
|
|
32
|
+
}
|
|
33
|
+
/** Matches a `%%TOKEN%%` placeholder (uppercase + underscore key). */
|
|
34
|
+
const TOKEN_RE = /%%[A-Z_]+%%/g;
|
|
35
|
+
/**
|
|
36
|
+
* Read a template and substitute every `%%TOKEN%%` in `replacements` in a
|
|
37
|
+
* single pass. Substitution is order-independent: a value that happens to
|
|
38
|
+
* contain another token's literal text is never re-processed (the pass scans
|
|
39
|
+
* the original template, not the growing output). Unknown tokens in the
|
|
40
|
+
* template are left in place (the health template is verified token-complete by
|
|
41
|
+
* tests); replacement keys missing from the template are silently ignored,
|
|
42
|
+
* matching the skill renderer's behaviour.
|
|
43
|
+
*/
|
|
44
|
+
export function renderHtml(templatePath, replacements) {
|
|
45
|
+
const html = fs.readFileSync(templatePath, "utf8");
|
|
46
|
+
return html.replace(TOKEN_RE, (token) => (token in replacements ? replacements[token] : token));
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Minimal HTML entity escaping for text interpolated into templates. Escapes
|
|
50
|
+
* the single quote as well as the double quote so escaped values are safe in
|
|
51
|
+
* both `"…"` and `'…'` attribute contexts, not only the double-quoted
|
|
52
|
+
* attributes the bundled templates use today.
|
|
53
|
+
*/
|
|
54
|
+
export function escapeHtml(value) {
|
|
55
|
+
return value
|
|
56
|
+
.replaceAll("&", "&")
|
|
57
|
+
.replaceAll("<", "<")
|
|
58
|
+
.replaceAll(">", ">")
|
|
59
|
+
.replaceAll('"', """)
|
|
60
|
+
.replaceAll("'", "'");
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Deliver a rendered document: write to `outputPath` when set (`--output`),
|
|
64
|
+
* otherwise print to stdout.
|
|
65
|
+
*/
|
|
66
|
+
export function deliverRendered(content, outputPath) {
|
|
67
|
+
if (outputPath) {
|
|
68
|
+
fs.mkdirSync(path.dirname(path.resolve(outputPath)), { recursive: true });
|
|
69
|
+
fs.writeFileSync(outputPath, content.endsWith("\n") ? content : `${content}\n`);
|
|
70
|
+
return;
|
|
71
|
+
}
|
|
72
|
+
console.log(content);
|
|
73
|
+
}
|
|
@@ -41,7 +41,21 @@ export function shapeProposalEntry(entry, detail) {
|
|
|
41
41
|
return pickFields(entry, ["id", "ref", "status", "source", "createdAt"]);
|
|
42
42
|
}
|
|
43
43
|
if (detail === "normal") {
|
|
44
|
-
|
|
44
|
+
// `confidence` and `gateDecision` (#577) explain why a proposal is pending,
|
|
45
|
+
// so they are projected at `normal` for `akm proposal list/show` — both are
|
|
46
|
+
// optional and absent on legacy proposals.
|
|
47
|
+
return pickFields(entry, [
|
|
48
|
+
"id",
|
|
49
|
+
"ref",
|
|
50
|
+
"status",
|
|
51
|
+
"source",
|
|
52
|
+
"sourceRun",
|
|
53
|
+
"createdAt",
|
|
54
|
+
"updatedAt",
|
|
55
|
+
"confidence",
|
|
56
|
+
"gateDecision",
|
|
57
|
+
"review",
|
|
58
|
+
]);
|
|
45
59
|
}
|
|
46
60
|
// full: project everything including the payload.
|
|
47
61
|
return pickFields(entry, [
|
|
@@ -52,6 +66,8 @@ export function shapeProposalEntry(entry, detail) {
|
|
|
52
66
|
"sourceRun",
|
|
53
67
|
"createdAt",
|
|
54
68
|
"updatedAt",
|
|
69
|
+
"confidence",
|
|
70
|
+
"gateDecision",
|
|
55
71
|
"payload",
|
|
56
72
|
"review",
|
|
57
73
|
]);
|
|
@@ -235,6 +235,50 @@ export function formatProposalProducerPlain(command, r) {
|
|
|
235
235
|
const status = String(proposal.status ?? "pending");
|
|
236
236
|
return `${command}: queued proposal ${id} (${ref}) [${status}]`;
|
|
237
237
|
}
|
|
238
|
+
/**
|
|
239
|
+
* Render a one-line gate-decision summary for the proposal list / show surfaces
|
|
240
|
+
* (#577), e.g. `gate=deferred:below-threshold (0.72 < 0.90)`. Returns the empty
|
|
241
|
+
* string for a missing or malformed decision so legacy proposals render cleanly.
|
|
242
|
+
*/
|
|
243
|
+
export function formatGateDecisionSummary(raw) {
|
|
244
|
+
if (typeof raw !== "object" || raw === null)
|
|
245
|
+
return "";
|
|
246
|
+
const d = raw;
|
|
247
|
+
const outcome = typeof d.outcome === "string" ? d.outcome : undefined;
|
|
248
|
+
if (!outcome)
|
|
249
|
+
return "";
|
|
250
|
+
const reason = typeof d.reason === "string" && d.reason.length > 0 ? `:${d.reason}` : "";
|
|
251
|
+
const cmp = formatGateThresholdComparison(d);
|
|
252
|
+
return `gate=${outcome}${reason}${cmp ? ` (${cmp})` : ""}`;
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* Reconstruct the threshold comparison the gate applied, when both sides are
|
|
256
|
+
* present (e.g. confidence 0.72 vs. autoAccept 0.90 → "0.72 < 0.90"). Returns
|
|
257
|
+
* the empty string when the decision lacks the operands.
|
|
258
|
+
*/
|
|
259
|
+
function formatGateThresholdComparison(d) {
|
|
260
|
+
const thresholds = (typeof d.thresholds === "object" && d.thresholds !== null ? d.thresholds : {});
|
|
261
|
+
const confidence = typeof d.confidence === "number" ? d.confidence : undefined;
|
|
262
|
+
const autoAccept = typeof thresholds.autoAccept === "number" ? thresholds.autoAccept : undefined;
|
|
263
|
+
if (confidence !== undefined && autoAccept !== undefined) {
|
|
264
|
+
const op = confidence >= autoAccept ? ">=" : "<";
|
|
265
|
+
return `${confidence.toFixed(2)} ${op} ${autoAccept.toFixed(2)}`;
|
|
266
|
+
}
|
|
267
|
+
// Drain bands: when the measured value is present, render the full comparison
|
|
268
|
+
// ("210 > 200" / "1 < 5"); otherwise fall back to the bound alone (#577).
|
|
269
|
+
const measured = typeof d.measured === "number" ? d.measured : undefined;
|
|
270
|
+
if (typeof thresholds.maxDiffLines === "number") {
|
|
271
|
+
return measured !== undefined
|
|
272
|
+
? `${measured} > ${thresholds.maxDiffLines}`
|
|
273
|
+
: `maxDiffLines=${thresholds.maxDiffLines}`;
|
|
274
|
+
}
|
|
275
|
+
if (typeof thresholds.minContentLines === "number") {
|
|
276
|
+
return measured !== undefined
|
|
277
|
+
? `${measured} < ${thresholds.minContentLines}`
|
|
278
|
+
: `minContentLines=${thresholds.minContentLines}`;
|
|
279
|
+
}
|
|
280
|
+
return "";
|
|
281
|
+
}
|
|
238
282
|
export function formatProposalListPlain(r) {
|
|
239
283
|
const proposals = Array.isArray(r.proposals) ? r.proposals : [];
|
|
240
284
|
const total = typeof r.totalCount === "number" ? r.totalCount : proposals.length;
|
|
@@ -248,7 +292,11 @@ export function formatProposalListPlain(r) {
|
|
|
248
292
|
const status = String(p.status ?? "?");
|
|
249
293
|
const source = String(p.source ?? "?");
|
|
250
294
|
const created = String(p.createdAt ?? "?");
|
|
251
|
-
|
|
295
|
+
// #577: surface the gate verdict inline so the queue explains itself
|
|
296
|
+
// ("deferred: below-threshold"). Legacy proposals carry no gateDecision.
|
|
297
|
+
const gate = formatGateDecisionSummary(p.gateDecision);
|
|
298
|
+
const gateSuffix = gate ? ` ${gate}` : "";
|
|
299
|
+
lines.push(`${id} [${status}] ${ref} source=${source} ${created}${gateSuffix}`);
|
|
252
300
|
}
|
|
253
301
|
return lines.join("\n").trimEnd();
|
|
254
302
|
}
|
|
@@ -265,6 +313,26 @@ export function formatProposalShowPlain(r) {
|
|
|
265
313
|
lines.push(`createdAt: ${String(p.createdAt)}`);
|
|
266
314
|
if (p.updatedAt)
|
|
267
315
|
lines.push(`updatedAt: ${String(p.updatedAt)}`);
|
|
316
|
+
if (typeof p.confidence === "number")
|
|
317
|
+
lines.push(`confidence: ${p.confidence.toFixed(2)}`);
|
|
318
|
+
// #577: gate decision (auto-accepted / deferred / auto-rejected + reason +
|
|
319
|
+
// thresholds). Absent on legacy proposals — render "unknown" so the field is
|
|
320
|
+
// always present and the operator never sees a silent gap.
|
|
321
|
+
const gate = p.gateDecision;
|
|
322
|
+
if (gate && typeof gate.outcome === "string") {
|
|
323
|
+
lines.push(`gate.decision: ${String(gate.outcome)}`);
|
|
324
|
+
lines.push(`gate.reason: ${gate.reason ? String(gate.reason) : "unknown"}`);
|
|
325
|
+
const cmp = formatGateThresholdComparison(gate);
|
|
326
|
+
if (cmp)
|
|
327
|
+
lines.push(`gate.thresholds: ${cmp}`);
|
|
328
|
+
if (gate.gate)
|
|
329
|
+
lines.push(`gate.by: ${String(gate.gate)}`);
|
|
330
|
+
if (gate.decidedAt)
|
|
331
|
+
lines.push(`gate.decidedAt: ${String(gate.decidedAt)}`);
|
|
332
|
+
}
|
|
333
|
+
else {
|
|
334
|
+
lines.push("gate.decision: unknown");
|
|
335
|
+
}
|
|
268
336
|
const review = p.review;
|
|
269
337
|
if (review) {
|
|
270
338
|
lines.push(`review.outcome: ${String(review.outcome ?? "?")}`);
|