clementine-agent 1.18.103 → 1.18.104
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/dashboard.js +119 -10
- package/package.json +1 -1
package/dist/cli/dashboard.js
CHANGED
|
@@ -5916,6 +5916,90 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
|
|
|
5916
5916
|
res.status(500).json({ ok: false, error: String(err) });
|
|
5917
5917
|
}
|
|
5918
5918
|
});
|
|
5919
|
+
// ── PRD §12 Phase 6.3 / 1.18.104: real latency split ─────────────
|
|
5920
|
+
// Aggregates per-run tool durations from the event store so the
|
|
5921
|
+
// Latency mini-card can show real numbers (model API time / tool
|
|
5922
|
+
// execution time / framework overhead) instead of the heuristic
|
|
5923
|
+
// placeholder. Only runs with path B hook events contribute to the
|
|
5924
|
+
// tool-time numerator; the dashboard falls back to the heuristic
|
|
5925
|
+
// when coverage is too low.
|
|
5926
|
+
//
|
|
5927
|
+
// Implementation note: walking N event-log files is O(events) but
|
|
5928
|
+
// the data is tiny (hundreds of KB per run, mostly text). For 7d of
|
|
5929
|
+
// runs this is well under 100ms even with hundreds of runs. If it
|
|
5930
|
+
// gets slow we'll add an in-memory cache keyed on the file mtime.
|
|
5931
|
+
app.get('/api/runs/latency-summary', async (req, res) => {
|
|
5932
|
+
try {
|
|
5933
|
+
const windowHours = Math.max(1, Math.min(168, parseInt(String(req.query.windowHours ?? '168'), 10) || 168));
|
|
5934
|
+
const cutoffMs = Date.now() - windowHours * 60 * 60 * 1000;
|
|
5935
|
+
const log = new CronRunLog();
|
|
5936
|
+
const runs = log.readAllRecent(500, 30);
|
|
5937
|
+
const inWindow = runs.filter((r) => {
|
|
5938
|
+
const t = r.startedAt ? new Date(r.startedAt).getTime() : 0;
|
|
5939
|
+
return t >= cutoffMs && r.status === 'ok' && typeof r.durationMs === 'number';
|
|
5940
|
+
});
|
|
5941
|
+
const { EventLog } = await import('../gateway/event-log.js');
|
|
5942
|
+
const eventLog = new EventLog();
|
|
5943
|
+
const summaries = [];
|
|
5944
|
+
let withHooks = 0;
|
|
5945
|
+
let totalDurationMs = 0;
|
|
5946
|
+
let totalToolMs = 0;
|
|
5947
|
+
for (const r of inWindow) {
|
|
5948
|
+
const runId = r.id;
|
|
5949
|
+
if (!runId)
|
|
5950
|
+
continue;
|
|
5951
|
+
const events = eventLog.readByRun(runId);
|
|
5952
|
+
let toolMs = 0;
|
|
5953
|
+
let calls = 0;
|
|
5954
|
+
let hasHook = false;
|
|
5955
|
+
for (const ev of events) {
|
|
5956
|
+
// Path B PostToolUse fires after every tool with duration_ms set
|
|
5957
|
+
// (see hook-event ingest endpoint in 1.18.101). The kind='hook'
|
|
5958
|
+
// + hookEventName='PostToolUse' combo is what we sum.
|
|
5959
|
+
const e = ev;
|
|
5960
|
+
if (e.kind === 'hook' && e.hookEventName === 'PostToolUse' && typeof e.durationMs === 'number') {
|
|
5961
|
+
toolMs += e.durationMs;
|
|
5962
|
+
calls += 1;
|
|
5963
|
+
hasHook = true;
|
|
5964
|
+
}
|
|
5965
|
+
}
|
|
5966
|
+
const durationMs = r.durationMs ?? 0;
|
|
5967
|
+
summaries.push({ runId, durationMs, toolDurationMs: toolMs, toolCalls: calls, hasHookData: hasHook });
|
|
5968
|
+
if (hasHook) {
|
|
5969
|
+
withHooks += 1;
|
|
5970
|
+
totalDurationMs += durationMs;
|
|
5971
|
+
totalToolMs += toolMs;
|
|
5972
|
+
}
|
|
5973
|
+
}
|
|
5974
|
+
// Coverage percentage: how many runs in the window contributed real data.
|
|
5975
|
+
const coverage = inWindow.length > 0 ? withHooks / inWindow.length : 0;
|
|
5976
|
+
// Average splits across runs that DID have hook data. The model
|
|
5977
|
+
// segment is what's left after tools + a small framework overhead
|
|
5978
|
+
// (we use 5% as a conservative estimate for SDK plumbing time —
|
|
5979
|
+
// real measurement of this needs a tighter timing pass that we'll
|
|
5980
|
+
// add when path B's SessionStart/Stop events get duration_ms too).
|
|
5981
|
+
const avgDurationMs = withHooks > 0 ? totalDurationMs / withHooks : 0;
|
|
5982
|
+
const avgToolMs = withHooks > 0 ? totalToolMs / withHooks : 0;
|
|
5983
|
+
const overheadFraction = 0.05;
|
|
5984
|
+
const overheadMs = avgDurationMs * overheadFraction;
|
|
5985
|
+
const modelMs = Math.max(0, avgDurationMs - avgToolMs - overheadMs);
|
|
5986
|
+
res.json({
|
|
5987
|
+
ok: true,
|
|
5988
|
+
windowHours,
|
|
5989
|
+
runsTotal: inWindow.length,
|
|
5990
|
+
runsWithHooks: withHooks,
|
|
5991
|
+
coverage,
|
|
5992
|
+
avgDurationMs,
|
|
5993
|
+
avgToolMs,
|
|
5994
|
+
avgModelMs: modelMs,
|
|
5995
|
+
avgOverheadMs: overheadMs,
|
|
5996
|
+
summaries,
|
|
5997
|
+
});
|
|
5998
|
+
}
|
|
5999
|
+
catch (err) {
|
|
6000
|
+
res.status(500).json({ ok: false, error: String(err) });
|
|
6001
|
+
}
|
|
6002
|
+
});
|
|
5919
6003
|
// ── Recent runs across ALL cron jobs ───────────────────────────
|
|
5920
6004
|
// Powers the "Recent History" zone on the Tasks page. Returns the most
|
|
5921
6005
|
// recent N CronRunEntry rows merged from every per-job .jsonl, sorted
|
|
@@ -24615,20 +24699,38 @@ async function refreshMiniDashboards() {
|
|
|
24615
24699
|
var costFigure = totalCost7 < 0.01 ? '$' + totalCost7.toFixed(4) : '$' + totalCost7.toFixed(2);
|
|
24616
24700
|
|
|
24617
24701
|
// ── Latency split card ─────────────────────────────────────────────
|
|
24618
|
-
//
|
|
24619
|
-
//
|
|
24620
|
-
//
|
|
24621
|
-
// that get replaced with real values once PostToolUse durations are
|
|
24622
|
-
// summed from event logs (Phase 4d).
|
|
24702
|
+
// PRD §12 Phase 6.3 / 1.18.104: real latency split when path B hooks
|
|
24703
|
+
// are providing PostToolUse duration_ms data. Falls back to the
|
|
24704
|
+
// heuristic placeholder when coverage is too low.
|
|
24623
24705
|
var okRuns = last7.filter(function(rn) { return rn.status === 'ok' && typeof rn.durationMs === 'number'; });
|
|
24624
24706
|
var avgDur = okRuns.length > 0
|
|
24625
24707
|
? Math.round(okRuns.reduce(function(a, b) { return a + b.durationMs; }, 0) / okRuns.length)
|
|
24626
24708
|
: 0;
|
|
24709
|
+
// Default to heuristic split.
|
|
24627
24710
|
var latToolPct = 35, latModelPct = 55, latOverPct = 10;
|
|
24711
|
+
var latencyMode = 'heuristic'; // becomes 'real' if coverage >= 50%
|
|
24712
|
+
var coverageLabel = '';
|
|
24713
|
+
try {
|
|
24714
|
+
var lr = await apiFetch('/api/runs/latency-summary?windowHours=168');
|
|
24715
|
+
var ld = await lr.json();
|
|
24716
|
+
if (ld && ld.ok && ld.coverage >= 0.5 && ld.avgDurationMs > 0) {
|
|
24717
|
+
var totalMs = ld.avgDurationMs;
|
|
24718
|
+
latToolPct = Math.round((ld.avgToolMs / totalMs) * 100);
|
|
24719
|
+
latModelPct = Math.round((ld.avgModelMs / totalMs) * 100);
|
|
24720
|
+
latOverPct = Math.max(0, 100 - latToolPct - latModelPct);
|
|
24721
|
+
avgDur = Math.round(ld.avgDurationMs);
|
|
24722
|
+
latencyMode = 'real';
|
|
24723
|
+
coverageLabel = ld.runsWithHooks + '/' + ld.runsTotal + ' runs · path B';
|
|
24724
|
+
} else if (ld && ld.ok) {
|
|
24725
|
+
coverageLabel = ld.coverage > 0
|
|
24726
|
+
? Math.round(ld.coverage * 100) + '% coverage — need 50%+ for real split'
|
|
24727
|
+
: 'no path B data yet';
|
|
24728
|
+
}
|
|
24729
|
+
} catch (e) { /* fall through to heuristic */ }
|
|
24628
24730
|
var splitHtml = '<div class="mini-split">'
|
|
24629
|
-
+ '<div class="mini-split-seg" style="background:#3b82f6;width:' + latModelPct + '%" title="Model API time
|
|
24630
|
-
+ '<div class="mini-split-seg" style="background:#8b5cf6;width:' + latToolPct + '%" title="Tool execution time
|
|
24631
|
-
+ '<div class="mini-split-seg" style="background:#6b7280;width:' + latOverPct + '%" title="Framework overhead
|
|
24731
|
+
+ '<div class="mini-split-seg" style="background:#3b82f6;width:' + latModelPct + '%" title="Model API time">' + (latModelPct >= 12 ? 'model ' + latModelPct + '%' : '') + '</div>'
|
|
24732
|
+
+ '<div class="mini-split-seg" style="background:#8b5cf6;width:' + latToolPct + '%" title="Tool execution time">' + (latToolPct >= 12 ? 'tools ' + latToolPct + '%' : '') + '</div>'
|
|
24733
|
+
+ '<div class="mini-split-seg" style="background:#6b7280;width:' + latOverPct + '%" title="Framework overhead">' + (latOverPct >= 12 ? 'overhead ' + latOverPct + '%' : '') + '</div>'
|
|
24632
24734
|
+ '</div>'
|
|
24633
24735
|
+ '<div class="mini-split-legend">'
|
|
24634
24736
|
+ '<span><span class="mini-split-legend-dot" style="background:#3b82f6"></span>model</span>'
|
|
@@ -24636,7 +24738,14 @@ async function refreshMiniDashboards() {
|
|
|
24636
24738
|
+ '<span><span class="mini-split-legend-dot" style="background:#6b7280"></span>overhead</span>'
|
|
24637
24739
|
+ '</div>';
|
|
24638
24740
|
var latFigure = avgDur > 0 ? formatDurationMs(avgDur) : '—';
|
|
24639
|
-
var latSub
|
|
24741
|
+
var latSub;
|
|
24742
|
+
if (okRuns.length === 0) {
|
|
24743
|
+
latSub = 'no successful runs in 7d';
|
|
24744
|
+
} else if (latencyMode === 'real') {
|
|
24745
|
+
latSub = 'avg of ' + okRuns.length + ' successful runs · ' + coverageLabel;
|
|
24746
|
+
} else {
|
|
24747
|
+
latSub = 'avg of ' + okRuns.length + ' successful runs · split is heuristic (' + (coverageLabel || 'install hooks per task to see real numbers') + ')';
|
|
24748
|
+
}
|
|
24640
24749
|
|
|
24641
24750
|
// ── Reliability card ───────────────────────────────────────────────
|
|
24642
24751
|
// Per-day failure column, stacked by category. Categories use the same
|
|
@@ -24750,7 +24859,7 @@ async function refreshMiniDashboards() {
|
|
|
24750
24859
|
+ '<div class="mini-card">'
|
|
24751
24860
|
+ '<div class="mini-card-head"><span class="mini-card-title">Latency · avg</span><span class="mini-card-figure">' + esc(latFigure) + '</span></div>'
|
|
24752
24861
|
+ splitHtml
|
|
24753
|
-
+ '<div class="mini-card-sub">' + esc(latSub) + '
|
|
24862
|
+
+ '<div class="mini-card-sub">' + esc(latSub) + '</div>'
|
|
24754
24863
|
+ '</div>'
|
|
24755
24864
|
+ '<div class="mini-card">'
|
|
24756
24865
|
+ '<div class="mini-card-head"><span class="mini-card-title">Reliability · 7d</span><span class="mini-card-figure">' + totalFails7 + ' fail' + (totalFails7 === 1 ? '' : 's') + '</span></div>'
|