clementine-agent 1.18.102 → 1.18.104

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/cli/dashboard.js +227 -14
  2. package/package.json +1 -1
@@ -5916,6 +5916,90 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
5916
5916
  res.status(500).json({ ok: false, error: String(err) });
5917
5917
  }
5918
5918
  });
5919
+ // ── PRD §12 Phase 6.3 / 1.18.104: real latency split ─────────────
5920
+ // Aggregates per-run tool durations from the event store so the
5921
+ // Latency mini-card can show real numbers (model API time / tool
5922
+ // execution time / framework overhead) instead of the heuristic
5923
+ // placeholder. Only runs with path B hook events contribute to the
5924
+ // tool-time numerator; the dashboard falls back to the heuristic
5925
+ // when coverage is too low.
5926
+ //
5927
+ // Implementation note: walking N event-log files is O(events) but
5928
+ // the data is tiny (hundreds of KB per run, mostly text). For 7d of
5929
+ // runs this is well under 100ms even with hundreds of runs. If it
5930
+ // gets slow we'll add an in-memory cache keyed on the file mtime.
5931
+ app.get('/api/runs/latency-summary', async (req, res) => {
5932
+ try {
5933
+ const windowHours = Math.max(1, Math.min(168, parseInt(String(req.query.windowHours ?? '168'), 10) || 168));
5934
+ const cutoffMs = Date.now() - windowHours * 60 * 60 * 1000;
5935
+ const log = new CronRunLog();
5936
+ const runs = log.readAllRecent(500, 30);
5937
+ const inWindow = runs.filter((r) => {
5938
+ const t = r.startedAt ? new Date(r.startedAt).getTime() : 0;
5939
+ return t >= cutoffMs && r.status === 'ok' && typeof r.durationMs === 'number';
5940
+ });
5941
+ const { EventLog } = await import('../gateway/event-log.js');
5942
+ const eventLog = new EventLog();
5943
+ const summaries = [];
5944
+ let withHooks = 0;
5945
+ let totalDurationMs = 0;
5946
+ let totalToolMs = 0;
5947
+ for (const r of inWindow) {
5948
+ const runId = r.id;
5949
+ if (!runId)
5950
+ continue;
5951
+ const events = eventLog.readByRun(runId);
5952
+ let toolMs = 0;
5953
+ let calls = 0;
5954
+ let hasHook = false;
5955
+ for (const ev of events) {
5956
+ // Path B PostToolUse fires after every tool with duration_ms set
5957
+ // (see hook-event ingest endpoint in 1.18.101). The kind='hook'
5958
+ // + hookEventName='PostToolUse' combo is what we sum.
5959
+ const e = ev;
5960
+ if (e.kind === 'hook' && e.hookEventName === 'PostToolUse' && typeof e.durationMs === 'number') {
5961
+ toolMs += e.durationMs;
5962
+ calls += 1;
5963
+ hasHook = true;
5964
+ }
5965
+ }
5966
+ const durationMs = r.durationMs ?? 0;
5967
+ summaries.push({ runId, durationMs, toolDurationMs: toolMs, toolCalls: calls, hasHookData: hasHook });
5968
+ if (hasHook) {
5969
+ withHooks += 1;
5970
+ totalDurationMs += durationMs;
5971
+ totalToolMs += toolMs;
5972
+ }
5973
+ }
5974
+ // Coverage percentage: how many runs in the window contributed real data.
5975
+ const coverage = inWindow.length > 0 ? withHooks / inWindow.length : 0;
5976
+ // Average splits across runs that DID have hook data. The model
5977
+ // segment is what's left after tools + a small framework overhead
5978
+ // (we use 5% as a conservative estimate for SDK plumbing time —
5979
+ // real measurement of this needs a tighter timing pass that we'll
5980
+ // add when path B's SessionStart/Stop events get duration_ms too).
5981
+ const avgDurationMs = withHooks > 0 ? totalDurationMs / withHooks : 0;
5982
+ const avgToolMs = withHooks > 0 ? totalToolMs / withHooks : 0;
5983
+ const overheadFraction = 0.05;
5984
+ const overheadMs = avgDurationMs * overheadFraction;
5985
+ const modelMs = Math.max(0, avgDurationMs - avgToolMs - overheadMs);
5986
+ res.json({
5987
+ ok: true,
5988
+ windowHours,
5989
+ runsTotal: inWindow.length,
5990
+ runsWithHooks: withHooks,
5991
+ coverage,
5992
+ avgDurationMs,
5993
+ avgToolMs,
5994
+ avgModelMs: modelMs,
5995
+ avgOverheadMs: overheadMs,
5996
+ summaries,
5997
+ });
5998
+ }
5999
+ catch (err) {
6000
+ res.status(500).json({ ok: false, error: String(err) });
6001
+ }
6002
+ });
5919
6003
  // ── Recent runs across ALL cron jobs ───────────────────────────
5920
6004
  // Powers the "Recent History" zone on the Tasks page. Returns the most
5921
6005
  // recent N CronRunEntry rows merged from every per-job .jsonl, sorted
@@ -24615,20 +24699,38 @@ async function refreshMiniDashboards() {
24615
24699
  var costFigure = totalCost7 < 0.01 ? '$' + totalCost7.toFixed(4) : '$' + totalCost7.toFixed(2);
24616
24700
 
24617
24701
  // ── Latency split card ─────────────────────────────────────────────
24618
- // Sum durationMs across last7 OK runs only we don't yet have a clean
24619
- // signal for tool time per run. Until path B hooks land we approximate:
24620
- // tool ~ 35%, model ~ 55%, overhead ~ 10% — these are placeholders
24621
- // that get replaced with real values once PostToolUse durations are
24622
- // summed from event logs (Phase 4d).
24702
+ // PRD §12 Phase 6.3 / 1.18.104: real latency split when path B hooks
24703
+ // are providing PostToolUse duration_ms data. Falls back to the
24704
+ // heuristic placeholder when coverage is too low.
24623
24705
  var okRuns = last7.filter(function(rn) { return rn.status === 'ok' && typeof rn.durationMs === 'number'; });
24624
24706
  var avgDur = okRuns.length > 0
24625
24707
  ? Math.round(okRuns.reduce(function(a, b) { return a + b.durationMs; }, 0) / okRuns.length)
24626
24708
  : 0;
24709
+ // Default to heuristic split.
24627
24710
  var latToolPct = 35, latModelPct = 55, latOverPct = 10;
24711
+ var latencyMode = 'heuristic'; // becomes 'real' if coverage >= 50%
24712
+ var coverageLabel = '';
24713
+ try {
24714
+ var lr = await apiFetch('/api/runs/latency-summary?windowHours=168');
24715
+ var ld = await lr.json();
24716
+ if (ld && ld.ok && ld.coverage >= 0.5 && ld.avgDurationMs > 0) {
24717
+ var totalMs = ld.avgDurationMs;
24718
+ latToolPct = Math.round((ld.avgToolMs / totalMs) * 100);
24719
+ latModelPct = Math.round((ld.avgModelMs / totalMs) * 100);
24720
+ latOverPct = Math.max(0, 100 - latToolPct - latModelPct);
24721
+ avgDur = Math.round(ld.avgDurationMs);
24722
+ latencyMode = 'real';
24723
+ coverageLabel = ld.runsWithHooks + '/' + ld.runsTotal + ' runs · path B';
24724
+ } else if (ld && ld.ok) {
24725
+ coverageLabel = ld.coverage > 0
24726
+ ? Math.round(ld.coverage * 100) + '% coverage — need 50%+ for real split'
24727
+ : 'no path B data yet';
24728
+ }
24729
+ } catch (e) { /* fall through to heuristic */ }
24628
24730
  var splitHtml = '<div class="mini-split">'
24629
- + '<div class="mini-split-seg" style="background:#3b82f6;width:' + latModelPct + '%" title="Model API time (~' + latModelPct + '%)">' + (latModelPct >= 12 ? 'model' : '') + '</div>'
24630
- + '<div class="mini-split-seg" style="background:#8b5cf6;width:' + latToolPct + '%" title="Tool execution time (~' + latToolPct + '%)">' + (latToolPct >= 12 ? 'tools' : '') + '</div>'
24631
- + '<div class="mini-split-seg" style="background:#6b7280;width:' + latOverPct + '%" title="Framework overhead (~' + latOverPct + '%)">' + (latOverPct >= 12 ? 'overhead' : '') + '</div>'
24731
+ + '<div class="mini-split-seg" style="background:#3b82f6;width:' + latModelPct + '%" title="Model API time">' + (latModelPct >= 12 ? 'model ' + latModelPct + '%' : '') + '</div>'
24732
+ + '<div class="mini-split-seg" style="background:#8b5cf6;width:' + latToolPct + '%" title="Tool execution time">' + (latToolPct >= 12 ? 'tools ' + latToolPct + '%' : '') + '</div>'
24733
+ + '<div class="mini-split-seg" style="background:#6b7280;width:' + latOverPct + '%" title="Framework overhead">' + (latOverPct >= 12 ? 'overhead ' + latOverPct + '%' : '') + '</div>'
24632
24734
  + '</div>'
24633
24735
  + '<div class="mini-split-legend">'
24634
24736
  + '<span><span class="mini-split-legend-dot" style="background:#3b82f6"></span>model</span>'
@@ -24636,7 +24738,14 @@ async function refreshMiniDashboards() {
24636
24738
  + '<span><span class="mini-split-legend-dot" style="background:#6b7280"></span>overhead</span>'
24637
24739
  + '</div>';
24638
24740
  var latFigure = avgDur > 0 ? formatDurationMs(avgDur) : '—';
24639
- var latSub = okRuns.length > 0 ? 'avg of ' + okRuns.length + ' successful runs · 7d' : 'no successful runs in 7d';
24741
+ var latSub;
24742
+ if (okRuns.length === 0) {
24743
+ latSub = 'no successful runs in 7d';
24744
+ } else if (latencyMode === 'real') {
24745
+ latSub = 'avg of ' + okRuns.length + ' successful runs · ' + coverageLabel;
24746
+ } else {
24747
+ latSub = 'avg of ' + okRuns.length + ' successful runs · split is heuristic (' + (coverageLabel || 'install hooks per task to see real numbers') + ')';
24748
+ }
24640
24749
 
24641
24750
  // ── Reliability card ───────────────────────────────────────────────
24642
24751
  // Per-day failure column, stacked by category. Categories use the same
@@ -24750,7 +24859,7 @@ async function refreshMiniDashboards() {
24750
24859
  + '<div class="mini-card">'
24751
24860
  + '<div class="mini-card-head"><span class="mini-card-title">Latency · avg</span><span class="mini-card-figure">' + esc(latFigure) + '</span></div>'
24752
24861
  + splitHtml
24753
- + '<div class="mini-card-sub">' + esc(latSub) + ' (split is heuristic; per-tool timing lands with hooks)</div>'
24862
+ + '<div class="mini-card-sub">' + esc(latSub) + '</div>'
24754
24863
  + '</div>'
24755
24864
  + '<div class="mini-card">'
24756
24865
  + '<div class="mini-card-head"><span class="mini-card-title">Reliability · 7d</span><span class="mini-card-figure">' + totalFails7 + ' fail' + (totalFails7 === 1 ? '' : 's') + '</span></div>'
@@ -27341,11 +27450,115 @@ function renderCronLastRunPane(job) {
27341
27450
  return;
27342
27451
  }
27343
27452
  var lr = job && job.lastRun;
27344
- if (!lr) {
27345
- pane.innerHTML = '<div style="padding:36px 24px;color:var(--text-muted);text-align:center;font-size:13px">No runs yet. Click <strong>Run task once</strong> below to fire it now and watch the result here.</div>';
27346
- return;
27453
+ var topHtml = lr ? renderCronRunDetails(lr)
27454
+ : '<div style="padding:36px 24px;color:var(--text-muted);text-align:center;font-size:13px">No runs yet. Click <strong>Run task once</strong> below to fire it now and watch the result here.</div>';
27455
+ // PRD §6 Phase 4d / 1.18.103: Per-task observability section. Shows hook
27456
+ // installation status + a toggle. Appears under the run details so the
27457
+ // most important info (last result) stays primary. Renders a placeholder
27458
+ // immediately and async-loads status; if the dashboard daemon is older
27459
+ // than 1.18.101 the section quietly hides itself when the API 404s.
27460
+ topHtml += '<div id="cron-hooks-section" style="margin-top:14px;padding:14px 18px;background:var(--bg-secondary);border:1px solid var(--border);border-radius:8px">'
27461
+ + '<div style="font-size:11px;color:var(--text-muted);text-transform:uppercase;letter-spacing:0.04em;margin-bottom:8px">Per-task observability</div>'
27462
+ + '<div id="cron-hooks-body" style="font-size:12px;color:var(--text-muted)">Loading hook status…</div>'
27463
+ + '</div>';
27464
+ pane.innerHTML = topHtml;
27465
+ // Fire the async fetch separately. job.name is the key the endpoints take.
27466
+ if (job && job.name) loadCronHooksStatus(job.name);
27467
+ }
27468
+
27469
+ // PRD §6 Phase 4d / 1.18.103: fetch hooks-status for the editing job and
27470
+ // render the appropriate UI (install / installed / conflict). Best-effort —
27471
+ // older daemons don't have the endpoint and we just hide the section.
27472
+ async function loadCronHooksStatus(jobName) {
27473
+ var body = document.getElementById('cron-hooks-body');
27474
+ if (!body) return;
27475
+ try {
27476
+ var r = await apiFetch('/api/cron/' + encodeURIComponent(jobName) + '/hooks-status');
27477
+ if (!r.ok) {
27478
+ var section = document.getElementById('cron-hooks-section');
27479
+ if (section) section.style.display = 'none';
27480
+ return;
27481
+ }
27482
+ var d = await r.json();
27483
+ var st = d.status || {};
27484
+ if (!d.workDir) {
27485
+ body.innerHTML = '<div style="color:var(--text-muted);font-size:12px;line-height:1.5">'
27486
+ + 'This task has no <code>work_dir</code> set, so hooks can\\x27t be installed. '
27487
+ + 'Add a <code>work_dir</code> in the <strong>Scope</strong> tab pointing at a project directory and the hooks toggle will appear here.'
27488
+ + '</div>';
27489
+ return;
27490
+ }
27491
+ var safeName = jsStr(jobName);
27492
+ if (st.installed && st.managedByUs) {
27493
+ var installedAt = st.installedAt ? new Date(st.installedAt).toLocaleString() : 'unknown';
27494
+ body.innerHTML = '<div style="display:flex;align-items:center;gap:10px;flex-wrap:wrap">'
27495
+ + '<span style="display:inline-flex;align-items:center;gap:6px;color:var(--green);font-size:13px;font-weight:500">'
27496
+ + '<span style="font-size:14px">🪝</span> Hooks installed'
27497
+ + '</span>'
27498
+ + '<span style="color:var(--text-muted);font-size:11px">since ' + esc(installedAt) + '</span>'
27499
+ + '<span style="flex:1"></span>'
27500
+ + '<button class="btn-sm btn-danger" onclick="disableCronHooks(\\x27' + safeName + '\\x27)" style="font-size:11px">Disable hooks</button>'
27501
+ + '</div>'
27502
+ + '<div style="margin-top:8px;font-size:11px;color:var(--text-muted);line-height:1.5">'
27503
+ + 'PreToolUse, PostToolUse, SubagentStart/Stop, Stop, Notification, UserPromptSubmit, SessionStart, and PreCompact events are forwarded to the dashboard\\x27s event store. '
27504
+ + 'This unlocks per-tool latency in the Latency mini-card and richer waterfall span detail in Run detail.'
27505
+ + '</div>';
27506
+ } else if (st.installed && st.conflictsWithUser) {
27507
+ body.innerHTML = '<div style="color:var(--yellow);font-size:13px;font-weight:500;margin-bottom:6px">⚠ Hook config conflict</div>'
27508
+ + '<div style="color:var(--text-muted);font-size:12px;line-height:1.5">'
27509
+ + 'A <code>.claude/settings.local.json</code> exists in <code>' + esc(d.workDir) + '</code> but it wasn\\x27t created by Clementine. '
27510
+ + 'Move or delete that file and click below to install our hooks alongside.'
27511
+ + '</div>'
27512
+ + '<div style="margin-top:10px;display:flex;gap:8px">'
27513
+ + '<button class="btn-sm" onclick="loadCronHooksStatus(\\x27' + safeName + '\\x27)" style="font-size:11px">Re-check</button>'
27514
+ + '</div>';
27515
+ } else {
27516
+ body.innerHTML = '<div style="display:flex;align-items:center;gap:10px;flex-wrap:wrap">'
27517
+ + '<span style="color:var(--text-secondary);font-size:13px">'
27518
+ + '<span style="font-size:14px">🪝</span> Hooks not installed'
27519
+ + '</span>'
27520
+ + '<span style="flex:1"></span>'
27521
+ + '<button class="btn-sm btn-success" onclick="enableCronHooks(\\x27' + safeName + '\\x27)" style="font-size:11px">Enable hooks</button>'
27522
+ + '</div>'
27523
+ + '<div style="margin-top:8px;font-size:11px;color:var(--text-muted);line-height:1.5">'
27524
+ + 'Drops a <code>.claude/settings.local.json</code> into <code>' + esc(d.workDir) + '</code> registering command-type hooks for the SDK\\x27s 9 hook events. '
27525
+ + 'Hooks POST event JSON to the dashboard so per-tool durations land in the Run detail viewer + Latency mini-card. '
27526
+ + 'Per-event overhead is &lt;5 ms (curl with --max-time 2). The file is gitignored by convention so this stays per-machine.'
27527
+ + '</div>';
27528
+ }
27529
+ } catch (err) {
27530
+ body.innerHTML = '<div style="color:var(--text-muted);font-size:12px">Could not load hook status: ' + esc(String(err)) + '</div>';
27347
27531
  }
27348
- pane.innerHTML = renderCronRunDetails(lr);
27532
+ }
27533
+
27534
+ async function enableCronHooks(jobName) {
27535
+ try {
27536
+ var r = await apiFetch('/api/cron/' + encodeURIComponent(jobName) + '/enable-hooks', { method: 'POST' });
27537
+ var d = await r.json().catch(function() { return {}; });
27538
+ if (!r.ok) {
27539
+ toast(d.error || 'Failed to enable hooks (HTTP ' + r.status + ')', 'error');
27540
+ // Refresh anyway so the conflict path renders.
27541
+ loadCronHooksStatus(jobName);
27542
+ return;
27543
+ }
27544
+ toast(d.message || 'Hooks installed.', 'success');
27545
+ loadCronHooksStatus(jobName);
27546
+ } catch (err) { toast('Enable hooks failed: ' + err, 'error'); }
27547
+ }
27548
+
27549
+ async function disableCronHooks(jobName) {
27550
+ if (!confirm('Disable hooks for "' + jobName + '"? The next run will only use the in-process tap (path A).')) return;
27551
+ try {
27552
+ var r = await apiFetch('/api/cron/' + encodeURIComponent(jobName) + '/disable-hooks', { method: 'POST' });
27553
+ var d = await r.json().catch(function() { return {}; });
27554
+ if (!r.ok) {
27555
+ toast(d.error || 'Failed to disable hooks (HTTP ' + r.status + ')', 'error');
27556
+ loadCronHooksStatus(jobName);
27557
+ return;
27558
+ }
27559
+ toast(d.message || 'Hooks disabled.', 'success');
27560
+ loadCronHooksStatus(jobName);
27561
+ } catch (err) { toast('Disable hooks failed: ' + err, 'error'); }
27349
27562
  }
27350
27563
 
27351
27564
  function renderCronRunningState(startedAtMs) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.18.102",
3
+ "version": "1.18.104",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",