npm - clementine-agent - Versions diffs - 1.18.102 → 1.18.104 - Mend

clementine-agent 1.18.102 → 1.18.104

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/cli/dashboard.js +227 -14
package/package.json +1 -1

package/dist/cli/dashboard.js CHANGED Viewed

@@ -5916,6 +5916,90 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
             res.status(500).json({ ok: false, error: String(err) });
         }
     });
+    // ── PRD §12 Phase 6.3 / 1.18.104: real latency split ─────────────
+    // Aggregates per-run tool durations from the event store so the
+    // Latency mini-card can show real numbers (model API time / tool
+    // execution time / framework overhead) instead of the heuristic
+    // placeholder. Only runs with path B hook events contribute to the
+    // tool-time numerator; the dashboard falls back to the heuristic
+    // when coverage is too low.
+    //
+    // Implementation note: walking N event-log files is O(events) but
+    // the data is tiny (hundreds of KB per run, mostly text). For 7d of
+    // runs this is well under 100ms even with hundreds of runs. If it
+    // gets slow we'll add an in-memory cache keyed on the file mtime.
+    app.get('/api/runs/latency-summary', async (req, res) => {
+        try {
+            const windowHours = Math.max(1, Math.min(168, parseInt(String(req.query.windowHours ?? '168'), 10) || 168));
+            const cutoffMs = Date.now() - windowHours * 60 * 60 * 1000;
+            const log = new CronRunLog();
+            const runs = log.readAllRecent(500, 30);
+            const inWindow = runs.filter((r) => {
+                const t = r.startedAt ? new Date(r.startedAt).getTime() : 0;
+                return t >= cutoffMs && r.status === 'ok' && typeof r.durationMs === 'number';
+            });
+            const { EventLog } = await import('../gateway/event-log.js');
+            const eventLog = new EventLog();
+            const summaries = [];
+            let withHooks = 0;
+            let totalDurationMs = 0;
+            let totalToolMs = 0;
+            for (const r of inWindow) {
+                const runId = r.id;
+                if (!runId)
+                    continue;
+                const events = eventLog.readByRun(runId);
+                let toolMs = 0;
+                let calls = 0;
+                let hasHook = false;
+                for (const ev of events) {
+                    // Path B PostToolUse fires after every tool with duration_ms set
+                    // (see hook-event ingest endpoint in 1.18.101). The kind='hook'
+                    // + hookEventName='PostToolUse' combo is what we sum.
+                    const e = ev;
+                    if (e.kind === 'hook' && e.hookEventName === 'PostToolUse' && typeof e.durationMs === 'number') {
+                        toolMs += e.durationMs;
+                        calls += 1;
+                        hasHook = true;
+                    }
+                }
+                const durationMs = r.durationMs ?? 0;
+                summaries.push({ runId, durationMs, toolDurationMs: toolMs, toolCalls: calls, hasHookData: hasHook });
+                if (hasHook) {
+                    withHooks += 1;
+                    totalDurationMs += durationMs;
+                    totalToolMs += toolMs;
+                }
+            }
+            // Coverage percentage: how many runs in the window contributed real data.
+            const coverage = inWindow.length > 0 ? withHooks / inWindow.length : 0;
+            // Average splits across runs that DID have hook data. The model
+            // segment is what's left after tools + a small framework overhead
+            // (we use 5% as a conservative estimate for SDK plumbing time —
+            // real measurement of this needs a tighter timing pass that we'll
+            // add when path B's SessionStart/Stop events get duration_ms too).
+            const avgDurationMs = withHooks > 0 ? totalDurationMs / withHooks : 0;
+            const avgToolMs = withHooks > 0 ? totalToolMs / withHooks : 0;
+            const overheadFraction = 0.05;
+            const overheadMs = avgDurationMs * overheadFraction;
+            const modelMs = Math.max(0, avgDurationMs - avgToolMs - overheadMs);
+            res.json({
+                ok: true,
+                windowHours,
+                runsTotal: inWindow.length,
+                runsWithHooks: withHooks,
+                coverage,
+                avgDurationMs,
+                avgToolMs,
+                avgModelMs: modelMs,
+                avgOverheadMs: overheadMs,
+                summaries,
+            });
+        }
+        catch (err) {
+            res.status(500).json({ ok: false, error: String(err) });
+        }
+    });
     // ── Recent runs across ALL cron jobs ───────────────────────────
     // Powers the "Recent History" zone on the Tasks page. Returns the most
     // recent N CronRunEntry rows merged from every per-job .jsonl, sorted
@@ -24615,20 +24699,38 @@ async function refreshMiniDashboards() {
   var costFigure = totalCost7 < 0.01 ? '$' + totalCost7.toFixed(4) : '$' + totalCost7.toFixed(2);
   // ── Latency split card ─────────────────────────────────────────────
-  // Sum durationMs across last7 OK runs only — we don't yet have a clean
-  // signal for tool time per run. Until path B hooks land we approximate:
-  //   tool ~ 35%, model ~ 55%, overhead ~ 10% — these are placeholders
-  //   that get replaced with real values once PostToolUse durations are
-  //   summed from event logs (Phase 4d).
+  // PRD §12 Phase 6.3 / 1.18.104: real latency split when path B hooks
+  // are providing PostToolUse duration_ms data. Falls back to the
+  // heuristic placeholder when coverage is too low.
   var okRuns = last7.filter(function(rn) { return rn.status === 'ok' && typeof rn.durationMs === 'number'; });
   var avgDur = okRuns.length > 0
     ? Math.round(okRuns.reduce(function(a, b) { return a + b.durationMs; }, 0) / okRuns.length)
     : 0;
+  // Default to heuristic split.
   var latToolPct = 35, latModelPct = 55, latOverPct = 10;
+  var latencyMode = 'heuristic'; // becomes 'real' if coverage >= 50%
+  var coverageLabel = '';
+  try {
+    var lr = await apiFetch('/api/runs/latency-summary?windowHours=168');
+    var ld = await lr.json();
+    if (ld && ld.ok && ld.coverage >= 0.5 && ld.avgDurationMs > 0) {
+      var totalMs = ld.avgDurationMs;
+      latToolPct = Math.round((ld.avgToolMs / totalMs) * 100);
+      latModelPct = Math.round((ld.avgModelMs / totalMs) * 100);
+      latOverPct = Math.max(0, 100 - latToolPct - latModelPct);
+      avgDur = Math.round(ld.avgDurationMs);
+      latencyMode = 'real';
+      coverageLabel = ld.runsWithHooks + '/' + ld.runsTotal + ' runs · path B';
+    } else if (ld && ld.ok) {
+      coverageLabel = ld.coverage > 0
+        ? Math.round(ld.coverage * 100) + '% coverage — need 50%+ for real split'
+        : 'no path B data yet';
+    }
+  } catch (e) { /* fall through to heuristic */ }
   var splitHtml = '<div class="mini-split">'
-    + '<div class="mini-split-seg" style="background:#3b82f6;width:' + latModelPct + '%" title="Model API time (~' + latModelPct + '%)">' + (latModelPct >= 12 ? 'model' : '') + '</div>'
-    + '<div class="mini-split-seg" style="background:#8b5cf6;width:' + latToolPct + '%" title="Tool execution time (~' + latToolPct + '%)">' + (latToolPct >= 12 ? 'tools' : '') + '</div>'
-    + '<div class="mini-split-seg" style="background:#6b7280;width:' + latOverPct + '%" title="Framework overhead (~' + latOverPct + '%)">' + (latOverPct >= 12 ? 'overhead' : '') + '</div>'
+    + '<div class="mini-split-seg" style="background:#3b82f6;width:' + latModelPct + '%" title="Model API time">' + (latModelPct >= 12 ? 'model ' + latModelPct + '%' : '') + '</div>'
+    + '<div class="mini-split-seg" style="background:#8b5cf6;width:' + latToolPct + '%" title="Tool execution time">' + (latToolPct >= 12 ? 'tools ' + latToolPct + '%' : '') + '</div>'
+    + '<div class="mini-split-seg" style="background:#6b7280;width:' + latOverPct + '%" title="Framework overhead">' + (latOverPct >= 12 ? 'overhead ' + latOverPct + '%' : '') + '</div>'
     + '</div>'
     + '<div class="mini-split-legend">'
     +   '<span><span class="mini-split-legend-dot" style="background:#3b82f6"></span>model</span>'
@@ -24636,7 +24738,14 @@ async function refreshMiniDashboards() {
     +   '<span><span class="mini-split-legend-dot" style="background:#6b7280"></span>overhead</span>'
     + '</div>';
   var latFigure = avgDur > 0 ? formatDurationMs(avgDur) : '—';
-  var latSub = okRuns.length > 0 ? 'avg of ' + okRuns.length + ' successful runs · 7d' : 'no successful runs in 7d';
+  var latSub;
+  if (okRuns.length === 0) {
+    latSub = 'no successful runs in 7d';
+  } else if (latencyMode === 'real') {
+    latSub = 'avg of ' + okRuns.length + ' successful runs · ' + coverageLabel;
+  } else {
+    latSub = 'avg of ' + okRuns.length + ' successful runs · split is heuristic (' + (coverageLabel || 'install hooks per task to see real numbers') + ')';
+  }
   // ── Reliability card ───────────────────────────────────────────────
   // Per-day failure column, stacked by category. Categories use the same
@@ -24750,7 +24859,7 @@ async function refreshMiniDashboards() {
     + '<div class="mini-card">'
     +   '<div class="mini-card-head"><span class="mini-card-title">Latency · avg</span><span class="mini-card-figure">' + esc(latFigure) + '</span></div>'
     +   splitHtml
-    +   '<div class="mini-card-sub">' + esc(latSub) + ' (split is heuristic; per-tool timing lands with hooks)</div>'
+    +   '<div class="mini-card-sub">' + esc(latSub) + '</div>'
     + '</div>'
     + '<div class="mini-card">'
     +   '<div class="mini-card-head"><span class="mini-card-title">Reliability · 7d</span><span class="mini-card-figure">' + totalFails7 + ' fail' + (totalFails7 === 1 ? '' : 's') + '</span></div>'
@@ -27341,11 +27450,115 @@ function renderCronLastRunPane(job) {
     return;
   }
   var lr = job && job.lastRun;
-  if (!lr) {
-    pane.innerHTML = '<div style="padding:36px 24px;color:var(--text-muted);text-align:center;font-size:13px">No runs yet. Click <strong>Run task once</strong> below to fire it now and watch the result here.</div>';
-    return;
+  var topHtml = lr ? renderCronRunDetails(lr)
+    : '<div style="padding:36px 24px;color:var(--text-muted);text-align:center;font-size:13px">No runs yet. Click <strong>Run task once</strong> below to fire it now and watch the result here.</div>';
+  // PRD §6 Phase 4d / 1.18.103: Per-task observability section. Shows hook
+  // installation status + a toggle. Appears under the run details so the
+  // most important info (last result) stays primary. Renders a placeholder
+  // immediately and async-loads status; if the dashboard daemon is older
+  // than 1.18.101 the section quietly hides itself when the API 404s.
+  topHtml += '<div id="cron-hooks-section" style="margin-top:14px;padding:14px 18px;background:var(--bg-secondary);border:1px solid var(--border);border-radius:8px">'
+    + '<div style="font-size:11px;color:var(--text-muted);text-transform:uppercase;letter-spacing:0.04em;margin-bottom:8px">Per-task observability</div>'
+    + '<div id="cron-hooks-body" style="font-size:12px;color:var(--text-muted)">Loading hook status…</div>'
+    + '</div>';
+  pane.innerHTML = topHtml;
+  // Fire the async fetch separately. job.name is the key the endpoints take.
+  if (job && job.name) loadCronHooksStatus(job.name);
+}
+// PRD §6 Phase 4d / 1.18.103: fetch hooks-status for the editing job and
+// render the appropriate UI (install / installed / conflict). Best-effort —
+// older daemons don't have the endpoint and we just hide the section.
+async function loadCronHooksStatus(jobName) {
+  var body = document.getElementById('cron-hooks-body');
+  if (!body) return;
+  try {
+    var r = await apiFetch('/api/cron/' + encodeURIComponent(jobName) + '/hooks-status');
+    if (!r.ok) {
+      var section = document.getElementById('cron-hooks-section');
+      if (section) section.style.display = 'none';
+      return;
+    }
+    var d = await r.json();
+    var st = d.status || {};
+    if (!d.workDir) {
+      body.innerHTML = '<div style="color:var(--text-muted);font-size:12px;line-height:1.5">'
+        + 'This task has no <code>work_dir</code> set, so hooks can\\x27t be installed. '
+        + 'Add a <code>work_dir</code> in the <strong>Scope</strong> tab pointing at a project directory and the hooks toggle will appear here.'
+        + '</div>';
+      return;
+    }
+    var safeName = jsStr(jobName);
+    if (st.installed && st.managedByUs) {
+      var installedAt = st.installedAt ? new Date(st.installedAt).toLocaleString() : 'unknown';
+      body.innerHTML = '<div style="display:flex;align-items:center;gap:10px;flex-wrap:wrap">'
+        + '<span style="display:inline-flex;align-items:center;gap:6px;color:var(--green);font-size:13px;font-weight:500">'
+        +   '<span style="font-size:14px">🪝</span> Hooks installed'
+        + '</span>'
+        + '<span style="color:var(--text-muted);font-size:11px">since ' + esc(installedAt) + '</span>'
+        + '<span style="flex:1"></span>'
+        + '<button class="btn-sm btn-danger" onclick="disableCronHooks(\\x27' + safeName + '\\x27)" style="font-size:11px">Disable hooks</button>'
+        + '</div>'
+        + '<div style="margin-top:8px;font-size:11px;color:var(--text-muted);line-height:1.5">'
+        + 'PreToolUse, PostToolUse, SubagentStart/Stop, Stop, Notification, UserPromptSubmit, SessionStart, and PreCompact events are forwarded to the dashboard\\x27s event store. '
+        + 'This unlocks per-tool latency in the Latency mini-card and richer waterfall span detail in Run detail.'
+        + '</div>';
+    } else if (st.installed && st.conflictsWithUser) {
+      body.innerHTML = '<div style="color:var(--yellow);font-size:13px;font-weight:500;margin-bottom:6px">⚠ Hook config conflict</div>'
+        + '<div style="color:var(--text-muted);font-size:12px;line-height:1.5">'
+        + 'A <code>.claude/settings.local.json</code> exists in <code>' + esc(d.workDir) + '</code> but it wasn\\x27t created by Clementine. '
+        + 'Move or delete that file and click below to install our hooks alongside.'
+        + '</div>'
+        + '<div style="margin-top:10px;display:flex;gap:8px">'
+        + '<button class="btn-sm" onclick="loadCronHooksStatus(\\x27' + safeName + '\\x27)" style="font-size:11px">Re-check</button>'
+        + '</div>';
+    } else {
+      body.innerHTML = '<div style="display:flex;align-items:center;gap:10px;flex-wrap:wrap">'
+        + '<span style="color:var(--text-secondary);font-size:13px">'
+        +   '<span style="font-size:14px">🪝</span> Hooks not installed'
+        + '</span>'
+        + '<span style="flex:1"></span>'
+        + '<button class="btn-sm btn-success" onclick="enableCronHooks(\\x27' + safeName + '\\x27)" style="font-size:11px">Enable hooks</button>'
+        + '</div>'
+        + '<div style="margin-top:8px;font-size:11px;color:var(--text-muted);line-height:1.5">'
+        + 'Drops a <code>.claude/settings.local.json</code> into <code>' + esc(d.workDir) + '</code> registering command-type hooks for the SDK\\x27s 9 hook events. '
+        + 'Hooks POST event JSON to the dashboard so per-tool durations land in the Run detail viewer + Latency mini-card. '
+        + 'Per-event overhead is &lt;5 ms (curl with --max-time 2). The file is gitignored by convention so this stays per-machine.'
+        + '</div>';
+    }
+  } catch (err) {
+    body.innerHTML = '<div style="color:var(--text-muted);font-size:12px">Could not load hook status: ' + esc(String(err)) + '</div>';
   }
-  pane.innerHTML = renderCronRunDetails(lr);
+}
+async function enableCronHooks(jobName) {
+  try {
+    var r = await apiFetch('/api/cron/' + encodeURIComponent(jobName) + '/enable-hooks', { method: 'POST' });
+    var d = await r.json().catch(function() { return {}; });
+    if (!r.ok) {
+      toast(d.error || 'Failed to enable hooks (HTTP ' + r.status + ')', 'error');
+      // Refresh anyway so the conflict path renders.
+      loadCronHooksStatus(jobName);
+      return;
+    }
+    toast(d.message || 'Hooks installed.', 'success');
+    loadCronHooksStatus(jobName);
+  } catch (err) { toast('Enable hooks failed: ' + err, 'error'); }
+}
+async function disableCronHooks(jobName) {
+  if (!confirm('Disable hooks for "' + jobName + '"? The next run will only use the in-process tap (path A).')) return;
+  try {
+    var r = await apiFetch('/api/cron/' + encodeURIComponent(jobName) + '/disable-hooks', { method: 'POST' });
+    var d = await r.json().catch(function() { return {}; });
+    if (!r.ok) {
+      toast(d.error || 'Failed to disable hooks (HTTP ' + r.status + ')', 'error');
+      loadCronHooksStatus(jobName);
+      return;
+    }
+    toast(d.message || 'Hooks disabled.', 'success');
+    loadCronHooksStatus(jobName);
+  } catch (err) { toast('Disable hooks failed: ' + err, 'error'); }
 }
 function renderCronRunningState(startedAtMs) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clementine-agent",
-  "version": "1.18.102",
+  "version": "1.18.104",
   "description": "Clementine — Personal AI Assistant (TypeScript)",
   "type": "module",
   "main": "dist/index.js",