clementine-agent 1.1.8 → 1.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1506,6 +1506,33 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
1506
1506
  }
1507
1507
  catch { /* non-fatal */ }
1508
1508
  }
1509
+ // Conversational context — same signals the insight engine surfaces
1510
+ // proactively (Phase 10), but injected directly into the agent's prompt
1511
+ // so it can adjust its own approach. Scoped to chat sessions because
1512
+ // cron/heartbeat don't have a "user feeling frustrated" axis to react to,
1513
+ // and inflating their prompt doesn't help. Only injected when at least
1514
+ // one signal fires — keeps the prompt clean during normal sessions.
1515
+ if (!isAutonomous) {
1516
+ try {
1517
+ const { detectFrustrationSignals, detectRepeatedTopics } = require('./insight-engine.js');
1518
+ const since24h = new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString();
1519
+ const since7d = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString();
1520
+ const recent = this.getRecentActivity(since24h, 50);
1521
+ const week = this.getRecentActivity(since7d, 200);
1522
+ const frustration = detectFrustrationSignals(recent);
1523
+ const topics = detectRepeatedTopics(week);
1524
+ const allSignals = [...frustration, ...topics];
1525
+ if (allSignals.length > 0) {
1526
+ const guidance = frustration.length > 0
1527
+ ? '\n\n**Adjust your approach:** When friction signals are present, lead with a clarifying question instead of assuming. Acknowledge the prior misunderstanding briefly without over-apologizing. Confirm understanding before acting.'
1528
+ : '\n\n**Use this context naturally:** Recurring topics may indicate an unresolved thread — if relevant, offer to close the loop or summarize current state. Do not force callbacks if not directly applicable.';
1529
+ volatileParts.push(`## Conversational Context\n\nSignals from recent sessions:\n` +
1530
+ allSignals.map(s => `- ${s}`).join('\n') +
1531
+ guidance);
1532
+ }
1533
+ }
1534
+ catch { /* non-fatal — insight-engine optional */ }
1535
+ }
1509
1536
  // Current context — date/time changes every minute, so it's volatile.
1510
1537
  const channel = deriveChannel({ sessionKey, isAutonomous, cronTier });
1511
1538
  const resolvedModel = resolveModel(model) ?? MODEL;
@@ -3786,6 +3813,12 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
3786
3813
  }
3787
3814
  async runCronJob(jobName, jobPrompt, tier = 1, maxTurns, model, workDir, timeoutMs, successCriteria, agentSlug) {
3788
3815
  setInteractionSource('autonomous');
3816
+ // Tag every tool_use audit event with the cron job name + agent so
3817
+ // analytics tool-usage can show "Bash×893 driven by market-leader-followup"
3818
+ // instead of "driven by: unknown". Cleared on next setInteractionSource
3819
+ // (cron/heartbeat boundary or interactive chat takeover).
3820
+ const { setActiveQueryContext } = await import('./hooks.js');
3821
+ setActiveQueryContext({ job: jobName, source: 'cron', agentSlug });
3789
3822
  const cronProfile = agentSlug && agentSlug !== 'clementine'
3790
3823
  ? this.profileManager.get(agentSlug)
3791
3824
  : null;
@@ -4274,6 +4307,10 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
4274
4307
  logger.info(`Unleashed task ${jobName}: starting phase ${phase}`);
4275
4308
  // Re-assert autonomous source — a chat message may have changed it between phases
4276
4309
  setInteractionSource('autonomous');
4310
+ // Tag tool_use audit events with the unleashed job name (Phase 11).
4311
+ // Re-asserted each phase since setInteractionSource clears the context.
4312
+ const { setActiveQueryContext: _setActiveQueryContext } = await import('./hooks.js');
4313
+ _setActiveQueryContext({ job: jobName, source: 'unleashed', agentSlug });
4277
4314
  // Unleashed phases run side-effect-heavy work; same logic as cron mode.
4278
4315
  const phaseGuard = new StallGuard('unleashed');
4279
4316
  const sdkOptions = this.buildOptions({
@@ -62,6 +62,12 @@ export declare function getInteractionSource(): 'owner-dm' | 'owner-channel' | '
62
62
  export declare function getProfileTier(): number | null;
63
63
  export declare function getAuditLog(): string[];
64
64
  export declare function clearAuditLog(): void;
65
+ export declare function setActiveQueryContext(ctx: {
66
+ job?: string | null;
67
+ source?: string | null;
68
+ agentSlug?: string | null;
69
+ }): void;
70
+ export declare function clearActiveQueryContext(): void;
65
71
  export declare function logToolUse(toolName: string, toolInput: Record<string, unknown>): void;
66
72
  export declare function getHeartbeatDisallowedTools(): string[];
67
73
  export declare const PRIVATE_URL_PATTERNS: RegExp[];
@@ -141,6 +141,11 @@ export function setSendPolicyChecker(checker) {
141
141
  }
142
142
  export function setInteractionSource(source) {
143
143
  interactionSource = source;
144
+ // Clear any leftover query attribution context. Cron / unleashed paths
145
+ // immediately call setActiveQueryContext after this; interactive chat
146
+ // doesn't, so anything still set from a prior cron run gets reset.
147
+ activeJob = null;
148
+ activeSource = null;
144
149
  }
145
150
  export function getInteractionSource() {
146
151
  return interactionSource;
@@ -154,6 +159,25 @@ export function getAuditLog() {
154
159
  export function clearAuditLog() {
155
160
  auditLog.length = 0;
156
161
  }
162
+ // Ambient job/source context so audit tool_use events carry attribution.
163
+ // Set by the assistant before running a query; cleared after. Without this
164
+ // the analytics view shows everything as "driven by: unknown". The
165
+ // activeAgentSlug field is already declared above (line ~27) for the
166
+ // existing send-policy infrastructure — we read but don't redeclare it.
167
+ let activeJob = null;
168
+ let activeSource = null;
169
+ export function setActiveQueryContext(ctx) {
170
+ activeJob = ctx.job ?? null;
171
+ activeSource = ctx.source ?? null;
172
+ if (ctx.agentSlug !== undefined)
173
+ activeAgentSlug = ctx.agentSlug;
174
+ }
175
+ export function clearActiveQueryContext() {
176
+ activeJob = null;
177
+ activeSource = null;
178
+ // Don't clear activeAgentSlug — it's owned by the send-policy path,
179
+ // not by us. setInteractionSource resets it in the relevant transitions.
180
+ }
157
181
  export function logToolUse(toolName, toolInput) {
158
182
  const timestamp = new Date().toLocaleTimeString('en-US', { hour12: false });
159
183
  const summary = summarizeToolCall(toolName, toolInput);
@@ -164,6 +188,9 @@ export function logToolUse(toolName, toolInput) {
164
188
  event_type: 'tool_use',
165
189
  tool_name: toolName,
166
190
  summary,
191
+ ...(activeJob ? { job: activeJob } : {}),
192
+ ...(activeSource ? { source: activeSource } : {}),
193
+ ...(activeAgentSlug ? { agent_slug: activeAgentSlug } : {}),
167
194
  });
168
195
  }
169
196
  // ── Heartbeat tool restrictions ─────────────────────────────────────
@@ -0,0 +1,61 @@
1
+ /**
2
+ * Tool-usage analytics.
3
+ *
4
+ * Reads ~/.clementine/logs/audit.jsonl and aggregates tool_use events by
5
+ * family + name + source so a CLI report can answer:
6
+ *
7
+ * - "What is the agent spending its tool calls on?"
8
+ * - "Which integration (mcp__ family) is hottest?"
9
+ * - "Which job/source is the biggest tool consumer?"
10
+ *
11
+ * Pure file read + in-memory aggregation — no daemon access required.
12
+ * Designed to run on multi-MB audit logs without buffering everything;
13
+ * we stream line-by-line.
14
+ */
15
+ export interface ToolFamilyStats {
16
+ /** Family label — collapses mcp__ subnames into one bucket per server. */
17
+ family: string;
18
+ totalCalls: number;
19
+ /** Estimated cost attributed to this family (USD). Heuristic — see attributeCostsToToolUses. */
20
+ estimatedCostUsd: number;
21
+ /** Per-tool breakdown within the family, sorted by count desc. */
22
+ byTool: Array<{
23
+ tool: string;
24
+ count: number;
25
+ }>;
26
+ /** Per-source breakdown — which job/context drives this family. */
27
+ bySource: Array<{
28
+ source: string;
29
+ count: number;
30
+ }>;
31
+ }
32
+ export interface ToolUsageReport {
33
+ windowStart: string;
34
+ windowEnd: string;
35
+ totalToolCalls: number;
36
+ totalQueries: number;
37
+ families: ToolFamilyStats[];
38
+ /** Total cost (sum of query_complete events) over the window — context for tool counts. */
39
+ totalCostUsd: number;
40
+ /** Sum of cost attributed to tool calls (≤ totalCostUsd). The gap is the cost of
41
+ * query_completes whose tool calls fell outside the window or weren't logged. */
42
+ attributedCostUsd: number;
43
+ }
44
+ /**
45
+ * Family normalization. Built-in SDK tools keep their name; MCP tools are
46
+ * grouped by server (mcp__<server>__<tool> → "mcp:<server>"). Anything
47
+ * else falls into "other".
48
+ */
49
+ export declare function classifyToolFamily(toolName: string): string;
50
+ /**
51
+ * Aggregate tool_use + query_complete events from audit.jsonl over the
52
+ * given window. Window bounds are ISO strings; entries outside are ignored.
53
+ *
54
+ * The function is forgiving: malformed lines are skipped, missing fields
55
+ * default to 'unknown'. Audit logs are append-only so we never need to
56
+ * worry about ordering.
57
+ */
58
+ export declare function buildToolUsageReport(auditLogPath: string, windowStart: string, windowEnd: string): ToolUsageReport;
59
+ /** Default audit log path — passed-through for CLI default + tests. */
60
+ export declare function defaultAuditLogPath(baseDir: string): string;
61
+ //# sourceMappingURL=tool-usage.d.ts.map
@@ -0,0 +1,186 @@
1
+ /**
2
+ * Tool-usage analytics.
3
+ *
4
+ * Reads ~/.clementine/logs/audit.jsonl and aggregates tool_use events by
5
+ * family + name + source so a CLI report can answer:
6
+ *
7
+ * - "What is the agent spending its tool calls on?"
8
+ * - "Which integration (mcp__ family) is hottest?"
9
+ * - "Which job/source is the biggest tool consumer?"
10
+ *
11
+ * Pure file read + in-memory aggregation — no daemon access required.
12
+ * Designed to run on multi-MB audit logs without buffering everything;
13
+ * we stream line-by-line.
14
+ */
15
+ import { existsSync, readFileSync } from 'node:fs';
16
+ import path from 'node:path';
17
+ /**
18
+ * Family normalization. Built-in SDK tools keep their name; MCP tools are
19
+ * grouped by server (mcp__<server>__<tool> → "mcp:<server>"). Anything
20
+ * else falls into "other".
21
+ */
22
+ export function classifyToolFamily(toolName) {
23
+ if (!toolName)
24
+ return 'other';
25
+ // mcp__server-name__tool_name → mcp:server-name
26
+ const mcpMatch = toolName.match(/^mcp__([^_]+(?:[-_][^_]+)*)__/);
27
+ if (mcpMatch)
28
+ return `mcp:${mcpMatch[1]}`;
29
+ // Built-ins kept as their own families
30
+ const BUILTIN_FAMILIES = {
31
+ Bash: 'shell',
32
+ Read: 'fs-read',
33
+ Glob: 'fs-read',
34
+ Grep: 'fs-read',
35
+ Edit: 'fs-write',
36
+ Write: 'fs-write',
37
+ NotebookEdit: 'fs-write',
38
+ WebFetch: 'web',
39
+ WebSearch: 'web',
40
+ Agent: 'subagent',
41
+ Task: 'subagent',
42
+ };
43
+ return BUILTIN_FAMILIES[toolName] ?? toolName;
44
+ }
45
+ /**
46
+ * Time-proximity cost attribution. Audit events don't carry an explicit
47
+ * query_id linking tool_use to query_complete, so we group by a sliding
48
+ * window: tool_use events that occur AFTER the previous query_complete
49
+ * (or the window start) and AT-OR-BEFORE the next query_complete are
50
+ * attributed to that query. The query's cost is then divided evenly
51
+ * across the tool calls in its window.
52
+ *
53
+ * Caveats:
54
+ * - Concurrent queries (e.g. cron + chat in the same window) will mix.
55
+ * Best-effort heuristic, not exact accounting.
56
+ * - Tool calls without a closing query_complete in the window get
57
+ * attributed nothing — captured in the gap between totalCostUsd
58
+ * and attributedCostUsd in the report.
59
+ * - The even-distribution assumption ignores per-call cost variance
60
+ * (a single Bash that consumed 50k tokens vs a Read that consumed
61
+ * 200). For our purposes (aggregate "where is my budget going?")
62
+ * this is good enough — actionable to within ~15% per family.
63
+ */
64
+ function attributeCostsToToolUses(events) {
65
+ const perToolCost = new Map();
66
+ let pendingToolIndices = [];
67
+ for (const e of events) {
68
+ if (!e.isQueryComplete) {
69
+ if (e.toolEntryIndex !== undefined)
70
+ pendingToolIndices.push(e.toolEntryIndex);
71
+ continue;
72
+ }
73
+ // Query closed — distribute cost.
74
+ if (pendingToolIndices.length > 0 && typeof e.cost_usd === 'number' && Number.isFinite(e.cost_usd)) {
75
+ const perCall = e.cost_usd / pendingToolIndices.length;
76
+ for (const idx of pendingToolIndices) {
77
+ perToolCost.set(idx, (perToolCost.get(idx) ?? 0) + perCall);
78
+ }
79
+ }
80
+ pendingToolIndices = [];
81
+ }
82
+ return perToolCost;
83
+ }
84
+ /**
85
+ * Aggregate tool_use + query_complete events from audit.jsonl over the
86
+ * given window. Window bounds are ISO strings; entries outside are ignored.
87
+ *
88
+ * The function is forgiving: malformed lines are skipped, missing fields
89
+ * default to 'unknown'. Audit logs are append-only so we never need to
90
+ * worry about ordering.
91
+ */
92
+ export function buildToolUsageReport(auditLogPath, windowStart, windowEnd) {
93
+ const startMs = Date.parse(windowStart);
94
+ const endMs = Date.parse(windowEnd);
95
+ // family → { totalCalls, totalCost, perTool, perSource }
96
+ const families = new Map();
97
+ let totalToolCalls = 0;
98
+ let totalQueries = 0;
99
+ let totalCost = 0;
100
+ if (!existsSync(auditLogPath)) {
101
+ return {
102
+ windowStart, windowEnd, totalToolCalls: 0, totalQueries: 0,
103
+ families: [], totalCostUsd: 0, attributedCostUsd: 0,
104
+ };
105
+ }
106
+ const raw = readFileSync(auditLogPath, 'utf-8');
107
+ const toolEntries = [];
108
+ const sequence = [];
109
+ for (const line of raw.split('\n')) {
110
+ if (!line)
111
+ continue;
112
+ let entry;
113
+ try {
114
+ entry = JSON.parse(line);
115
+ }
116
+ catch {
117
+ continue;
118
+ }
119
+ if (!entry.ts)
120
+ continue;
121
+ const tsMs = Date.parse(entry.ts);
122
+ if (Number.isNaN(tsMs))
123
+ continue;
124
+ if (tsMs < startMs || tsMs > endMs)
125
+ continue;
126
+ if (entry.event_type === 'tool_use' && entry.tool_name) {
127
+ const family = classifyToolFamily(entry.tool_name);
128
+ const source = entry.job || entry.source || 'unknown';
129
+ toolEntries.push({ family, source, toolName: entry.tool_name });
130
+ sequence.push({ ts: tsMs, isQueryComplete: false, toolEntryIndex: toolEntries.length - 1 });
131
+ totalToolCalls++;
132
+ }
133
+ else if (entry.event_type === 'query_complete') {
134
+ totalQueries++;
135
+ const cost = typeof entry.cost_usd === 'number' && Number.isFinite(entry.cost_usd) ? entry.cost_usd : 0;
136
+ totalCost += cost;
137
+ sequence.push({ ts: tsMs, isQueryComplete: true, cost_usd: cost });
138
+ }
139
+ }
140
+ // Second pass: attribute each query's cost across its preceding tool_use events.
141
+ const perToolCost = attributeCostsToToolUses(sequence);
142
+ let attributedCost = 0;
143
+ // Third pass: bucket toolEntries into family stats, summing attributed cost.
144
+ for (let i = 0; i < toolEntries.length; i++) {
145
+ const t = toolEntries[i];
146
+ const cost = perToolCost.get(i) ?? 0;
147
+ attributedCost += cost;
148
+ let bucket = families.get(t.family);
149
+ if (!bucket) {
150
+ bucket = { totalCalls: 0, totalCost: 0, perTool: new Map(), perSource: new Map() };
151
+ families.set(t.family, bucket);
152
+ }
153
+ bucket.totalCalls++;
154
+ bucket.totalCost += cost;
155
+ bucket.perTool.set(t.toolName, (bucket.perTool.get(t.toolName) ?? 0) + 1);
156
+ bucket.perSource.set(t.source, (bucket.perSource.get(t.source) ?? 0) + 1);
157
+ }
158
+ const familyStats = [...families.entries()]
159
+ .map(([family, b]) => ({
160
+ family,
161
+ totalCalls: b.totalCalls,
162
+ estimatedCostUsd: Number(b.totalCost.toFixed(4)),
163
+ byTool: [...b.perTool.entries()]
164
+ .map(([tool, count]) => ({ tool, count }))
165
+ .sort((a, c) => c.count - a.count),
166
+ bySource: [...b.perSource.entries()]
167
+ .map(([source, count]) => ({ source, count }))
168
+ .sort((a, c) => c.count - a.count),
169
+ }))
170
+ // Sort by cost first (the actionable signal); fall back to call count.
171
+ .sort((a, b) => b.estimatedCostUsd - a.estimatedCostUsd || b.totalCalls - a.totalCalls);
172
+ return {
173
+ windowStart,
174
+ windowEnd,
175
+ totalToolCalls,
176
+ totalQueries,
177
+ families: familyStats,
178
+ totalCostUsd: Number(totalCost.toFixed(4)),
179
+ attributedCostUsd: Number(attributedCost.toFixed(4)),
180
+ };
181
+ }
182
+ /** Default audit log path — passed-through for CLI default + tests. */
183
+ export function defaultAuditLogPath(baseDir) {
184
+ return path.join(baseDir, 'logs', 'audit.jsonl');
185
+ }
186
+ //# sourceMappingURL=tool-usage.js.map
package/dist/cli/index.js CHANGED
@@ -1270,6 +1270,59 @@ async function cmdConfigKeychainFixAcl(opts) {
1270
1270
  }
1271
1271
  console.log();
1272
1272
  }
1273
+ // ── Analytics ────────────────────────────────────────────────────────
1274
+ async function cmdAnalyticsToolUsage(opts) {
1275
+ const { buildToolUsageReport, defaultAuditLogPath } = await import('../analytics/tool-usage.js');
1276
+ const hours = Math.max(1, parseInt(opts.hours ?? '24', 10) || 24);
1277
+ const limit = Math.max(1, parseInt(opts.limit ?? '10', 10) || 10);
1278
+ const end = new Date();
1279
+ const start = new Date(end.getTime() - hours * 60 * 60 * 1000);
1280
+ const report = buildToolUsageReport(defaultAuditLogPath(BASE_DIR), start.toISOString(), end.toISOString());
1281
+ if (opts.json) {
1282
+ console.log(JSON.stringify(report, null, 2));
1283
+ return;
1284
+ }
1285
+ const DIM = '\x1b[0;90m';
1286
+ const BOLD = '\x1b[1m';
1287
+ const CYAN = '\x1b[0;36m';
1288
+ const GREEN = '\x1b[0;32m';
1289
+ const YELLOW = '\x1b[0;33m';
1290
+ const RESET = '\x1b[0m';
1291
+ console.log();
1292
+ console.log(` ${BOLD}Window:${RESET} last ${hours}h ${DIM}(${start.toISOString()} → ${end.toISOString()})${RESET}`);
1293
+ console.log(` ${BOLD}Total tool calls:${RESET} ${report.totalToolCalls.toLocaleString()}`);
1294
+ console.log(` ${BOLD}Total queries:${RESET} ${report.totalQueries.toLocaleString()}`);
1295
+ console.log(` ${BOLD}Total cost:${RESET} ${GREEN}$${report.totalCostUsd.toFixed(4)}${RESET} ${DIM}(attributed to tools: $${report.attributedCostUsd.toFixed(4)})${RESET}`);
1296
+ console.log();
1297
+ if (report.families.length === 0) {
1298
+ console.log(` ${DIM}No tool_use events in window.${RESET}`);
1299
+ console.log();
1300
+ return;
1301
+ }
1302
+ const top = report.families.slice(0, limit);
1303
+ const maxCost = Math.max(...top.map(f => f.estimatedCostUsd), 0.0001);
1304
+ const familyWidth = Math.max(...top.map(f => f.family.length), 12);
1305
+ console.log(` ${BOLD}Top ${top.length} tool families ${DIM}(ranked by attributed cost)${RESET}`);
1306
+ for (const f of top) {
1307
+ const pct = report.attributedCostUsd > 0
1308
+ ? ((f.estimatedCostUsd / report.attributedCostUsd) * 100).toFixed(1)
1309
+ : '0.0';
1310
+ const barLen = Math.round((f.estimatedCostUsd / maxCost) * 24);
1311
+ const bar = '█'.repeat(barLen).padEnd(24);
1312
+ console.log(` ${CYAN}${f.family.padEnd(familyWidth)}${RESET} ` +
1313
+ `${GREEN}$${f.estimatedCostUsd.toFixed(2).padStart(7)}${RESET} ` +
1314
+ `${pct.padStart(5)}% ` +
1315
+ `${DIM}${String(f.totalCalls).padStart(5)} calls${RESET} ` +
1316
+ `${YELLOW}${bar}${RESET}`);
1317
+ const topTools = f.byTool.slice(0, 2).map(t => `${t.tool}×${t.count}`).join(', ');
1318
+ const topSource = f.bySource[0];
1319
+ console.log(` ${DIM}top tools: ${topTools}${RESET}`);
1320
+ if (topSource && topSource.source !== 'unknown') {
1321
+ console.log(` ${DIM}driven by: ${topSource.source} (${topSource.count} calls)${RESET}`);
1322
+ }
1323
+ }
1324
+ console.log();
1325
+ }
1273
1326
  // ── Advisor commands ────────────────────────────────────────────────
1274
1327
  const ADVISOR_MODES = ['off', 'shadow', 'primary'];
1275
1328
  function readAdvisorMode() {
@@ -1817,6 +1870,18 @@ advisorCmd
1817
1870
  .command('rules')
1818
1871
  .description('List loaded advisor rules')
1819
1872
  .action(cmdAdvisorRules);
1873
+ const analyticsCmd = program
1874
+ .command('analytics')
1875
+ .description('Production telemetry: tool usage, cost breakdowns');
1876
+ analyticsCmd
1877
+ .command('tool-usage')
1878
+ .description('Show which tool families are firing most over a time window')
1879
+ .option('-h, --hours <n>', 'Window size in hours (default 24)', '24')
1880
+ .option('--json', 'Emit machine-readable JSON')
1881
+ .option('-l, --limit <n>', 'Show top N families (default 10)', '10')
1882
+ .action(async (opts) => {
1883
+ await cmdAnalyticsToolUsage(opts);
1884
+ });
1820
1885
  const dashCmd = program
1821
1886
  .command('dashboard')
1822
1887
  .description('Launch local command center')
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.1.8",
3
+ "version": "1.1.10",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",