clementine-agent 1.1.7 → 1.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/assistant.d.ts +1 -1
- package/dist/agent/assistant.js +39 -2
- package/dist/agent/hooks.d.ts +6 -0
- package/dist/agent/hooks.js +27 -0
- package/dist/agent/insight-engine.d.ts +13 -1
- package/dist/agent/insight-engine.js +110 -1
- package/dist/analytics/tool-usage.d.ts +56 -0
- package/dist/analytics/tool-usage.js +129 -0
- package/dist/cli/index.js +64 -0
- package/dist/gateway/router.d.ts +1 -1
- package/dist/gateway/router.js +2 -2
- package/package.json +1 -1
|
@@ -271,7 +271,7 @@ export declare class PersonalAssistant {
|
|
|
271
271
|
* so follow-up conversation has context.
|
|
272
272
|
*/
|
|
273
273
|
injectContext(sessionKey: string, userText: string, assistantText: string): void;
|
|
274
|
-
getRecentActivity(sinceIso: string): Array<{
|
|
274
|
+
getRecentActivity(sinceIso: string, maxEntries?: number): Array<{
|
|
275
275
|
sessionKey: string;
|
|
276
276
|
role: string;
|
|
277
277
|
content: string;
|
package/dist/agent/assistant.js
CHANGED
|
@@ -1506,6 +1506,33 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
1506
1506
|
}
|
|
1507
1507
|
catch { /* non-fatal */ }
|
|
1508
1508
|
}
|
|
1509
|
+
// Conversational context — same signals the insight engine surfaces
|
|
1510
|
+
// proactively (Phase 10), but injected directly into the agent's prompt
|
|
1511
|
+
// so it can adjust its own approach. Scoped to chat sessions because
|
|
1512
|
+
// cron/heartbeat don't have a "user feeling frustrated" axis to react to,
|
|
1513
|
+
// and inflating their prompt doesn't help. Only injected when at least
|
|
1514
|
+
// one signal fires — keeps the prompt clean during normal sessions.
|
|
1515
|
+
if (!isAutonomous) {
|
|
1516
|
+
try {
|
|
1517
|
+
const { detectFrustrationSignals, detectRepeatedTopics } = require('./insight-engine.js');
|
|
1518
|
+
const since24h = new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString();
|
|
1519
|
+
const since7d = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString();
|
|
1520
|
+
const recent = this.getRecentActivity(since24h, 50);
|
|
1521
|
+
const week = this.getRecentActivity(since7d, 200);
|
|
1522
|
+
const frustration = detectFrustrationSignals(recent);
|
|
1523
|
+
const topics = detectRepeatedTopics(week);
|
|
1524
|
+
const allSignals = [...frustration, ...topics];
|
|
1525
|
+
if (allSignals.length > 0) {
|
|
1526
|
+
const guidance = frustration.length > 0
|
|
1527
|
+
? '\n\n**Adjust your approach:** When friction signals are present, lead with a clarifying question instead of assuming. Acknowledge the prior misunderstanding briefly without over-apologizing. Confirm understanding before acting.'
|
|
1528
|
+
: '\n\n**Use this context naturally:** Recurring topics may indicate an unresolved thread — if relevant, offer to close the loop or summarize current state. Do not force callbacks if not directly applicable.';
|
|
1529
|
+
volatileParts.push(`## Conversational Context\n\nSignals from recent sessions:\n` +
|
|
1530
|
+
allSignals.map(s => `- ${s}`).join('\n') +
|
|
1531
|
+
guidance);
|
|
1532
|
+
}
|
|
1533
|
+
}
|
|
1534
|
+
catch { /* non-fatal — insight-engine optional */ }
|
|
1535
|
+
}
|
|
1509
1536
|
// Current context — date/time changes every minute, so it's volatile.
|
|
1510
1537
|
const channel = deriveChannel({ sessionKey, isAutonomous, cronTier });
|
|
1511
1538
|
const resolvedModel = resolveModel(model) ?? MODEL;
|
|
@@ -3786,6 +3813,12 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
3786
3813
|
}
|
|
3787
3814
|
async runCronJob(jobName, jobPrompt, tier = 1, maxTurns, model, workDir, timeoutMs, successCriteria, agentSlug) {
|
|
3788
3815
|
setInteractionSource('autonomous');
|
|
3816
|
+
// Tag every tool_use audit event with the cron job name + agent so
|
|
3817
|
+
// analytics tool-usage can show "Bash×893 driven by market-leader-followup"
|
|
3818
|
+
// instead of "driven by: unknown". Cleared on next setInteractionSource
|
|
3819
|
+
// (cron/heartbeat boundary or interactive chat takeover).
|
|
3820
|
+
const { setActiveQueryContext } = await import('./hooks.js');
|
|
3821
|
+
setActiveQueryContext({ job: jobName, source: 'cron', agentSlug });
|
|
3789
3822
|
const cronProfile = agentSlug && agentSlug !== 'clementine'
|
|
3790
3823
|
? this.profileManager.get(agentSlug)
|
|
3791
3824
|
: null;
|
|
@@ -4274,6 +4307,10 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
4274
4307
|
logger.info(`Unleashed task ${jobName}: starting phase ${phase}`);
|
|
4275
4308
|
// Re-assert autonomous source — a chat message may have changed it between phases
|
|
4276
4309
|
setInteractionSource('autonomous');
|
|
4310
|
+
// Tag tool_use audit events with the unleashed job name (Phase 11).
|
|
4311
|
+
// Re-asserted each phase since setInteractionSource clears the context.
|
|
4312
|
+
const { setActiveQueryContext: _setActiveQueryContext } = await import('./hooks.js');
|
|
4313
|
+
_setActiveQueryContext({ job: jobName, source: 'unleashed', agentSlug });
|
|
4277
4314
|
// Unleashed phases run side-effect-heavy work; same logic as cron mode.
|
|
4278
4315
|
const phaseGuard = new StallGuard('unleashed');
|
|
4279
4316
|
const sdkOptions = this.buildOptions({
|
|
@@ -4828,11 +4865,11 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
|
|
|
4828
4865
|
}
|
|
4829
4866
|
}
|
|
4830
4867
|
}
|
|
4831
|
-
getRecentActivity(sinceIso) {
|
|
4868
|
+
getRecentActivity(sinceIso, maxEntries) {
|
|
4832
4869
|
if (!this.memoryStore)
|
|
4833
4870
|
return [];
|
|
4834
4871
|
try {
|
|
4835
|
-
return this.memoryStore.getRecentActivity(sinceIso);
|
|
4872
|
+
return this.memoryStore.getRecentActivity(sinceIso, maxEntries);
|
|
4836
4873
|
}
|
|
4837
4874
|
catch {
|
|
4838
4875
|
return [];
|
package/dist/agent/hooks.d.ts
CHANGED
|
@@ -62,6 +62,12 @@ export declare function getInteractionSource(): 'owner-dm' | 'owner-channel' | '
|
|
|
62
62
|
export declare function getProfileTier(): number | null;
|
|
63
63
|
export declare function getAuditLog(): string[];
|
|
64
64
|
export declare function clearAuditLog(): void;
|
|
65
|
+
export declare function setActiveQueryContext(ctx: {
|
|
66
|
+
job?: string | null;
|
|
67
|
+
source?: string | null;
|
|
68
|
+
agentSlug?: string | null;
|
|
69
|
+
}): void;
|
|
70
|
+
export declare function clearActiveQueryContext(): void;
|
|
65
71
|
export declare function logToolUse(toolName: string, toolInput: Record<string, unknown>): void;
|
|
66
72
|
export declare function getHeartbeatDisallowedTools(): string[];
|
|
67
73
|
export declare const PRIVATE_URL_PATTERNS: RegExp[];
|
package/dist/agent/hooks.js
CHANGED
|
@@ -141,6 +141,11 @@ export function setSendPolicyChecker(checker) {
|
|
|
141
141
|
}
|
|
142
142
|
export function setInteractionSource(source) {
|
|
143
143
|
interactionSource = source;
|
|
144
|
+
// Clear any leftover query attribution context. Cron / unleashed paths
|
|
145
|
+
// immediately call setActiveQueryContext after this; interactive chat
|
|
146
|
+
// doesn't, so anything still set from a prior cron run gets reset.
|
|
147
|
+
activeJob = null;
|
|
148
|
+
activeSource = null;
|
|
144
149
|
}
|
|
145
150
|
export function getInteractionSource() {
|
|
146
151
|
return interactionSource;
|
|
@@ -154,6 +159,25 @@ export function getAuditLog() {
|
|
|
154
159
|
export function clearAuditLog() {
|
|
155
160
|
auditLog.length = 0;
|
|
156
161
|
}
|
|
162
|
+
// Ambient job/source context so audit tool_use events carry attribution.
|
|
163
|
+
// Set by the assistant before running a query; cleared after. Without this
|
|
164
|
+
// the analytics view shows everything as "driven by: unknown". The
|
|
165
|
+
// activeAgentSlug field is already declared above (line ~27) for the
|
|
166
|
+
// existing send-policy infrastructure — we read but don't redeclare it.
|
|
167
|
+
let activeJob = null;
|
|
168
|
+
let activeSource = null;
|
|
169
|
+
export function setActiveQueryContext(ctx) {
|
|
170
|
+
activeJob = ctx.job ?? null;
|
|
171
|
+
activeSource = ctx.source ?? null;
|
|
172
|
+
if (ctx.agentSlug !== undefined)
|
|
173
|
+
activeAgentSlug = ctx.agentSlug;
|
|
174
|
+
}
|
|
175
|
+
export function clearActiveQueryContext() {
|
|
176
|
+
activeJob = null;
|
|
177
|
+
activeSource = null;
|
|
178
|
+
// Don't clear activeAgentSlug — it's owned by the send-policy path,
|
|
179
|
+
// not by us. setInteractionSource resets it in the relevant transitions.
|
|
180
|
+
}
|
|
157
181
|
export function logToolUse(toolName, toolInput) {
|
|
158
182
|
const timestamp = new Date().toLocaleTimeString('en-US', { hour12: false });
|
|
159
183
|
const summary = summarizeToolCall(toolName, toolInput);
|
|
@@ -164,6 +188,9 @@ export function logToolUse(toolName, toolInput) {
|
|
|
164
188
|
event_type: 'tool_use',
|
|
165
189
|
tool_name: toolName,
|
|
166
190
|
summary,
|
|
191
|
+
...(activeJob ? { job: activeJob } : {}),
|
|
192
|
+
...(activeSource ? { source: activeSource } : {}),
|
|
193
|
+
...(activeAgentSlug ? { agent_slug: activeAgentSlug } : {}),
|
|
167
194
|
});
|
|
168
195
|
}
|
|
169
196
|
// ── Heartbeat tool restrictions ─────────────────────────────────────
|
|
@@ -47,13 +47,25 @@ export declare function maybeIncreaseCooldown(state: InsightState): void;
|
|
|
47
47
|
* Returns structured event summaries that can be passed to an LLM for urgency rating.
|
|
48
48
|
*/
|
|
49
49
|
export declare function gatherInsightSignals(gateway: {
|
|
50
|
-
getRecentActivity: (since: string) => Array<{
|
|
50
|
+
getRecentActivity: (since: string, maxEntries?: number) => Array<{
|
|
51
51
|
sessionKey: string;
|
|
52
52
|
role: string;
|
|
53
53
|
content: string;
|
|
54
54
|
createdAt: string;
|
|
55
55
|
}>;
|
|
56
56
|
}): string[];
|
|
57
|
+
export declare function detectFrustrationSignals(activity: Array<{
|
|
58
|
+
sessionKey: string;
|
|
59
|
+
role: string;
|
|
60
|
+
content: string;
|
|
61
|
+
createdAt: string;
|
|
62
|
+
}>): string[];
|
|
63
|
+
export declare function detectRepeatedTopics(activity: Array<{
|
|
64
|
+
sessionKey: string;
|
|
65
|
+
role: string;
|
|
66
|
+
content: string;
|
|
67
|
+
createdAt: string;
|
|
68
|
+
}>): string[];
|
|
57
69
|
/**
|
|
58
70
|
* Build a prompt for urgency rating (to be sent to a lightweight LLM).
|
|
59
71
|
* Returns null if there are no signals worth evaluating.
|
|
@@ -189,7 +189,27 @@ export function gatherInsightSignals(gateway) {
|
|
|
189
189
|
catch (err) {
|
|
190
190
|
logger.debug({ err }, 'Failed to pull broken-jobs signals');
|
|
191
191
|
}
|
|
192
|
-
// 6.
|
|
192
|
+
// 6. Conversational signals derived from recent transcripts.
|
|
193
|
+
// Surfaces patterns IN the conversation itself, not just system events:
|
|
194
|
+
// user frustration markers, repeating topics, etc. These are early
|
|
195
|
+
// warning signs that the agent's responses may be off-track.
|
|
196
|
+
try {
|
|
197
|
+
const since24h = new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString();
|
|
198
|
+
const since7d = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString();
|
|
199
|
+
// 24h frustration scan — 50 entries plenty to count corrections in a day.
|
|
200
|
+
const recent = gateway.getRecentActivity(since24h, 50);
|
|
201
|
+
for (const s of detectFrustrationSignals(recent))
|
|
202
|
+
signals.push(s);
|
|
203
|
+
// 7d repeat-topic scan — pull more entries since topics span sessions.
|
|
204
|
+
// Cap at 200 to keep keyword extraction cheap.
|
|
205
|
+
const week = gateway.getRecentActivity(since7d, 200);
|
|
206
|
+
for (const s of detectRepeatedTopics(week))
|
|
207
|
+
signals.push(s);
|
|
208
|
+
}
|
|
209
|
+
catch (err) {
|
|
210
|
+
logger.debug({ err }, 'Failed to pull conversational signals');
|
|
211
|
+
}
|
|
212
|
+
// 7. Claim tracker — failed claims in the last N hours erode trust.
|
|
193
213
|
// Surface them so the owner sees "Clementine said she'd do X; she
|
|
194
214
|
// didn't" instead of silently swallowing the miss.
|
|
195
215
|
try {
|
|
@@ -214,6 +234,95 @@ export function gatherInsightSignals(gateway) {
|
|
|
214
234
|
}
|
|
215
235
|
return signals;
|
|
216
236
|
}
|
|
237
|
+
// ── Conversational signal detectors ─────────────────────────────────
|
|
238
|
+
//
|
|
239
|
+
// Pure functions over recent transcript activity. Exported so the insight
|
|
240
|
+
// dashboard / debug commands can run them independently of the full
|
|
241
|
+
// gatherInsightSignals path.
|
|
242
|
+
/**
|
|
243
|
+
* Markers that suggest the user is correcting or frustrated with the
|
|
244
|
+
* agent's last response. Tuned to start-of-message tokens since
|
|
245
|
+
* mid-message "no" or "actually" is often just normal narrative.
|
|
246
|
+
*/
|
|
247
|
+
const CORRECTION_PATTERNS = [
|
|
248
|
+
/^(no|nope|not\b)/i,
|
|
249
|
+
/^(actually|wait)\b/i,
|
|
250
|
+
/^(that['’]?s| that is) (wrong|not|incorrect|backwards|opposite)/i,
|
|
251
|
+
/^I (meant|said|wanted|asked)\b/i,
|
|
252
|
+
/^you (didn['’]?t|misunderstood|got it wrong|missed)/i,
|
|
253
|
+
/^(stop|cancel|undo|nevermind|never mind)\b/i,
|
|
254
|
+
];
|
|
255
|
+
export function detectFrustrationSignals(activity) {
|
|
256
|
+
const signals = [];
|
|
257
|
+
let count = 0;
|
|
258
|
+
const sessionsAffected = new Set();
|
|
259
|
+
for (const entry of activity) {
|
|
260
|
+
if (entry.role !== 'user')
|
|
261
|
+
continue;
|
|
262
|
+
const trimmed = entry.content.trim();
|
|
263
|
+
for (const re of CORRECTION_PATTERNS) {
|
|
264
|
+
if (re.test(trimmed)) {
|
|
265
|
+
count++;
|
|
266
|
+
sessionsAffected.add(entry.sessionKey);
|
|
267
|
+
break;
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
if (count >= 3) {
|
|
272
|
+
signals.push(`Conversation friction: ${count} user correction(s) across ${sessionsAffected.size} session(s) in the last 24h — recent agent responses may be off-track`);
|
|
273
|
+
}
|
|
274
|
+
return signals;
|
|
275
|
+
}
|
|
276
|
+
/**
|
|
277
|
+
* Words too generic to count as a topic — would otherwise dominate the
|
|
278
|
+
* "recurring topic" signal with noise like "thanks", "okay", "please".
|
|
279
|
+
*/
|
|
280
|
+
const TOPIC_STOPWORDS = new Set([
|
|
281
|
+
'about', 'after', 'again', 'against', 'because', 'before', 'being', 'between',
|
|
282
|
+
'could', 'doing', 'don’t', 'down', 'during', 'each', 'from', 'further',
|
|
283
|
+
'going', 'gonna', 'have', 'having', 'here', 'into', 'just', 'know', 'like',
|
|
284
|
+
'maybe', 'might', 'more', 'most', 'much', 'need', 'okay', 'only', 'other',
|
|
285
|
+
'over', 'please', 'really', 'said', 'same', 'some', 'still', 'such', 'than',
|
|
286
|
+
'that', 'them', 'then', 'there', 'these', 'they', 'thing', 'think', 'this',
|
|
287
|
+
'those', 'through', 'thanks', 'time', 'told', 'under', 'until', 'using', 'very',
|
|
288
|
+
'want', 'wanted', 'wants', 'were', 'what', 'when', 'where', 'which', 'while',
|
|
289
|
+
'will', 'with', 'would', 'your', 'yours', 'yeah', 'yes',
|
|
290
|
+
'tonight', 'today', 'tomorrow', 'morning', 'evening', 'session', 'work',
|
|
291
|
+
'doing', 'made', 'make', 'making', 'sure', 'right', 'wrong', 'good', 'bad',
|
|
292
|
+
'much', 'many', 'lots',
|
|
293
|
+
]);
|
|
294
|
+
export function detectRepeatedTopics(activity) {
|
|
295
|
+
// Build a (keyword → set of session IDs) map. A keyword that shows up in
|
|
296
|
+
// 3+ DISTINCT sessions across the window is "recurring" — could be an
|
|
297
|
+
// unresolved thread, a project the user is grinding on, or a question
|
|
298
|
+
// they've asked multiple ways.
|
|
299
|
+
const sessionsForKeyword = new Map();
|
|
300
|
+
for (const entry of activity) {
|
|
301
|
+
if (entry.role !== 'user')
|
|
302
|
+
continue;
|
|
303
|
+
const text = entry.content.toLowerCase();
|
|
304
|
+
// Word extraction: 5+ chars, alpha-only (no numbers/punctuation).
|
|
305
|
+
const matches = text.match(/[a-z][a-z’]{4,15}/g) ?? [];
|
|
306
|
+
const seenInThisMessage = new Set();
|
|
307
|
+
for (const w of matches) {
|
|
308
|
+
if (TOPIC_STOPWORDS.has(w))
|
|
309
|
+
continue;
|
|
310
|
+
if (seenInThisMessage.has(w))
|
|
311
|
+
continue; // dedupe within a single message
|
|
312
|
+
seenInThisMessage.add(w);
|
|
313
|
+
if (!sessionsForKeyword.has(w))
|
|
314
|
+
sessionsForKeyword.set(w, new Set());
|
|
315
|
+
sessionsForKeyword.get(w).add(entry.sessionKey);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
// Rank by session-spread; surface the top 2 to avoid flooding insight
|
|
319
|
+
// notifications with too many topic mentions.
|
|
320
|
+
const ranked = [...sessionsForKeyword.entries()]
|
|
321
|
+
.filter(([, sessions]) => sessions.size >= 3)
|
|
322
|
+
.sort((a, b) => b[1].size - a[1].size)
|
|
323
|
+
.slice(0, 2);
|
|
324
|
+
return ranked.map(([keyword, sessions]) => `Recurring topic "${keyword}" came up across ${sessions.size} sessions this week — possible ongoing thread`);
|
|
325
|
+
}
|
|
217
326
|
/**
|
|
218
327
|
* Build a prompt for urgency rating (to be sent to a lightweight LLM).
|
|
219
328
|
* Returns null if there are no signals worth evaluating.
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool-usage analytics.
|
|
3
|
+
*
|
|
4
|
+
* Reads ~/.clementine/logs/audit.jsonl and aggregates tool_use events by
|
|
5
|
+
* family + name + source so a CLI report can answer:
|
|
6
|
+
*
|
|
7
|
+
* - "What is the agent spending its tool calls on?"
|
|
8
|
+
* - "Which integration (mcp__ family) is hottest?"
|
|
9
|
+
* - "Which job/source is the biggest tool consumer?"
|
|
10
|
+
*
|
|
11
|
+
* Pure file read + in-memory aggregation — no daemon access required.
|
|
12
|
+
* Designed to run on multi-MB audit logs without buffering everything;
|
|
13
|
+
* we stream line-by-line.
|
|
14
|
+
*/
|
|
15
|
+
export interface ToolFamilyStats {
|
|
16
|
+
/** Family label — collapses mcp__ subnames into one bucket per server. */
|
|
17
|
+
family: string;
|
|
18
|
+
totalCalls: number;
|
|
19
|
+
/** Per-tool breakdown within the family, sorted by count desc. */
|
|
20
|
+
byTool: Array<{
|
|
21
|
+
tool: string;
|
|
22
|
+
count: number;
|
|
23
|
+
}>;
|
|
24
|
+
/** Per-source breakdown — which job/context drives this family. */
|
|
25
|
+
bySource: Array<{
|
|
26
|
+
source: string;
|
|
27
|
+
count: number;
|
|
28
|
+
}>;
|
|
29
|
+
}
|
|
30
|
+
export interface ToolUsageReport {
|
|
31
|
+
windowStart: string;
|
|
32
|
+
windowEnd: string;
|
|
33
|
+
totalToolCalls: number;
|
|
34
|
+
totalQueries: number;
|
|
35
|
+
families: ToolFamilyStats[];
|
|
36
|
+
/** Total cost (sum of query_complete events) over the window — context for tool counts. */
|
|
37
|
+
totalCostUsd: number;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Family normalization. Built-in SDK tools keep their name; MCP tools are
|
|
41
|
+
* grouped by server (mcp__<server>__<tool> → "mcp:<server>"). Anything
|
|
42
|
+
* else falls into "other".
|
|
43
|
+
*/
|
|
44
|
+
export declare function classifyToolFamily(toolName: string): string;
|
|
45
|
+
/**
|
|
46
|
+
* Aggregate tool_use + query_complete events from audit.jsonl over the
|
|
47
|
+
* given window. Window bounds are ISO strings; entries outside are ignored.
|
|
48
|
+
*
|
|
49
|
+
* The function is forgiving: malformed lines are skipped, missing fields
|
|
50
|
+
* default to 'unknown'. Audit logs are append-only so we never need to
|
|
51
|
+
* worry about ordering.
|
|
52
|
+
*/
|
|
53
|
+
export declare function buildToolUsageReport(auditLogPath: string, windowStart: string, windowEnd: string): ToolUsageReport;
|
|
54
|
+
/** Default audit log path — passed-through for CLI default + tests. */
|
|
55
|
+
export declare function defaultAuditLogPath(baseDir: string): string;
|
|
56
|
+
//# sourceMappingURL=tool-usage.d.ts.map
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool-usage analytics.
|
|
3
|
+
*
|
|
4
|
+
* Reads ~/.clementine/logs/audit.jsonl and aggregates tool_use events by
|
|
5
|
+
* family + name + source so a CLI report can answer:
|
|
6
|
+
*
|
|
7
|
+
* - "What is the agent spending its tool calls on?"
|
|
8
|
+
* - "Which integration (mcp__ family) is hottest?"
|
|
9
|
+
* - "Which job/source is the biggest tool consumer?"
|
|
10
|
+
*
|
|
11
|
+
* Pure file read + in-memory aggregation — no daemon access required.
|
|
12
|
+
* Designed to run on multi-MB audit logs without buffering everything;
|
|
13
|
+
* we stream line-by-line.
|
|
14
|
+
*/
|
|
15
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
16
|
+
import path from 'node:path';
|
|
17
|
+
/**
|
|
18
|
+
* Family normalization. Built-in SDK tools keep their name; MCP tools are
|
|
19
|
+
* grouped by server (mcp__<server>__<tool> → "mcp:<server>"). Anything
|
|
20
|
+
* else falls into "other".
|
|
21
|
+
*/
|
|
22
|
+
export function classifyToolFamily(toolName) {
|
|
23
|
+
if (!toolName)
|
|
24
|
+
return 'other';
|
|
25
|
+
// mcp__server-name__tool_name → mcp:server-name
|
|
26
|
+
const mcpMatch = toolName.match(/^mcp__([^_]+(?:[-_][^_]+)*)__/);
|
|
27
|
+
if (mcpMatch)
|
|
28
|
+
return `mcp:${mcpMatch[1]}`;
|
|
29
|
+
// Built-ins kept as their own families
|
|
30
|
+
const BUILTIN_FAMILIES = {
|
|
31
|
+
Bash: 'shell',
|
|
32
|
+
Read: 'fs-read',
|
|
33
|
+
Glob: 'fs-read',
|
|
34
|
+
Grep: 'fs-read',
|
|
35
|
+
Edit: 'fs-write',
|
|
36
|
+
Write: 'fs-write',
|
|
37
|
+
NotebookEdit: 'fs-write',
|
|
38
|
+
WebFetch: 'web',
|
|
39
|
+
WebSearch: 'web',
|
|
40
|
+
Agent: 'subagent',
|
|
41
|
+
Task: 'subagent',
|
|
42
|
+
};
|
|
43
|
+
return BUILTIN_FAMILIES[toolName] ?? toolName;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Aggregate tool_use + query_complete events from audit.jsonl over the
|
|
47
|
+
* given window. Window bounds are ISO strings; entries outside are ignored.
|
|
48
|
+
*
|
|
49
|
+
* The function is forgiving: malformed lines are skipped, missing fields
|
|
50
|
+
* default to 'unknown'. Audit logs are append-only so we never need to
|
|
51
|
+
* worry about ordering.
|
|
52
|
+
*/
|
|
53
|
+
export function buildToolUsageReport(auditLogPath, windowStart, windowEnd) {
|
|
54
|
+
const startMs = Date.parse(windowStart);
|
|
55
|
+
const endMs = Date.parse(windowEnd);
|
|
56
|
+
// family → { totalCalls, perTool: Map<string,count>, perSource: Map<string,count> }
|
|
57
|
+
const families = new Map();
|
|
58
|
+
let totalToolCalls = 0;
|
|
59
|
+
let totalQueries = 0;
|
|
60
|
+
let totalCost = 0;
|
|
61
|
+
if (!existsSync(auditLogPath)) {
|
|
62
|
+
return { windowStart, windowEnd, totalToolCalls: 0, totalQueries: 0, families: [], totalCostUsd: 0 };
|
|
63
|
+
}
|
|
64
|
+
// Stream-friendly read — each line is independent JSON. Audit logs are
|
|
65
|
+
// typically a few MB; readFileSync is fine at that scale.
|
|
66
|
+
const raw = readFileSync(auditLogPath, 'utf-8');
|
|
67
|
+
for (const line of raw.split('\n')) {
|
|
68
|
+
if (!line)
|
|
69
|
+
continue;
|
|
70
|
+
let entry;
|
|
71
|
+
try {
|
|
72
|
+
entry = JSON.parse(line);
|
|
73
|
+
}
|
|
74
|
+
catch {
|
|
75
|
+
continue;
|
|
76
|
+
}
|
|
77
|
+
if (!entry.ts)
|
|
78
|
+
continue;
|
|
79
|
+
const tsMs = Date.parse(entry.ts);
|
|
80
|
+
if (Number.isNaN(tsMs))
|
|
81
|
+
continue;
|
|
82
|
+
if (tsMs < startMs || tsMs > endMs)
|
|
83
|
+
continue;
|
|
84
|
+
if (entry.event_type === 'tool_use' && entry.tool_name) {
|
|
85
|
+
const family = classifyToolFamily(entry.tool_name);
|
|
86
|
+
const source = entry.job || entry.source || 'unknown';
|
|
87
|
+
let bucket = families.get(family);
|
|
88
|
+
if (!bucket) {
|
|
89
|
+
bucket = { totalCalls: 0, perTool: new Map(), perSource: new Map() };
|
|
90
|
+
families.set(family, bucket);
|
|
91
|
+
}
|
|
92
|
+
bucket.totalCalls++;
|
|
93
|
+
bucket.perTool.set(entry.tool_name, (bucket.perTool.get(entry.tool_name) ?? 0) + 1);
|
|
94
|
+
bucket.perSource.set(source, (bucket.perSource.get(source) ?? 0) + 1);
|
|
95
|
+
totalToolCalls++;
|
|
96
|
+
}
|
|
97
|
+
else if (entry.event_type === 'query_complete') {
|
|
98
|
+
totalQueries++;
|
|
99
|
+
if (typeof entry.cost_usd === 'number' && Number.isFinite(entry.cost_usd)) {
|
|
100
|
+
totalCost += entry.cost_usd;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
const familyStats = [...families.entries()]
|
|
105
|
+
.map(([family, b]) => ({
|
|
106
|
+
family,
|
|
107
|
+
totalCalls: b.totalCalls,
|
|
108
|
+
byTool: [...b.perTool.entries()]
|
|
109
|
+
.map(([tool, count]) => ({ tool, count }))
|
|
110
|
+
.sort((a, c) => c.count - a.count),
|
|
111
|
+
bySource: [...b.perSource.entries()]
|
|
112
|
+
.map(([source, count]) => ({ source, count }))
|
|
113
|
+
.sort((a, c) => c.count - a.count),
|
|
114
|
+
}))
|
|
115
|
+
.sort((a, b) => b.totalCalls - a.totalCalls);
|
|
116
|
+
return {
|
|
117
|
+
windowStart,
|
|
118
|
+
windowEnd,
|
|
119
|
+
totalToolCalls,
|
|
120
|
+
totalQueries,
|
|
121
|
+
families: familyStats,
|
|
122
|
+
totalCostUsd: Number(totalCost.toFixed(4)),
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
/** Default audit log path — passed-through for CLI default + tests. */
|
|
126
|
+
export function defaultAuditLogPath(baseDir) {
|
|
127
|
+
return path.join(baseDir, 'logs', 'audit.jsonl');
|
|
128
|
+
}
|
|
129
|
+
//# sourceMappingURL=tool-usage.js.map
|
package/dist/cli/index.js
CHANGED
|
@@ -1270,6 +1270,58 @@ async function cmdConfigKeychainFixAcl(opts) {
|
|
|
1270
1270
|
}
|
|
1271
1271
|
console.log();
|
|
1272
1272
|
}
|
|
1273
|
+
// ── Analytics ────────────────────────────────────────────────────────
|
|
1274
|
+
async function cmdAnalyticsToolUsage(opts) {
|
|
1275
|
+
const { buildToolUsageReport, defaultAuditLogPath } = await import('../analytics/tool-usage.js');
|
|
1276
|
+
const hours = Math.max(1, parseInt(opts.hours ?? '24', 10) || 24);
|
|
1277
|
+
const limit = Math.max(1, parseInt(opts.limit ?? '10', 10) || 10);
|
|
1278
|
+
const end = new Date();
|
|
1279
|
+
const start = new Date(end.getTime() - hours * 60 * 60 * 1000);
|
|
1280
|
+
const report = buildToolUsageReport(defaultAuditLogPath(BASE_DIR), start.toISOString(), end.toISOString());
|
|
1281
|
+
if (opts.json) {
|
|
1282
|
+
console.log(JSON.stringify(report, null, 2));
|
|
1283
|
+
return;
|
|
1284
|
+
}
|
|
1285
|
+
const DIM = '\x1b[0;90m';
|
|
1286
|
+
const BOLD = '\x1b[1m';
|
|
1287
|
+
const CYAN = '\x1b[0;36m';
|
|
1288
|
+
const GREEN = '\x1b[0;32m';
|
|
1289
|
+
const YELLOW = '\x1b[0;33m';
|
|
1290
|
+
const RESET = '\x1b[0m';
|
|
1291
|
+
console.log();
|
|
1292
|
+
console.log(` ${BOLD}Window:${RESET} last ${hours}h ${DIM}(${start.toISOString()} → ${end.toISOString()})${RESET}`);
|
|
1293
|
+
console.log(` ${BOLD}Total tool calls:${RESET} ${report.totalToolCalls.toLocaleString()}`);
|
|
1294
|
+
console.log(` ${BOLD}Total queries:${RESET} ${report.totalQueries.toLocaleString()}`);
|
|
1295
|
+
console.log(` ${BOLD}Total cost:${RESET} ${GREEN}$${report.totalCostUsd.toFixed(4)}${RESET}`);
|
|
1296
|
+
console.log();
|
|
1297
|
+
if (report.families.length === 0) {
|
|
1298
|
+
console.log(` ${DIM}No tool_use events in window.${RESET}`);
|
|
1299
|
+
console.log();
|
|
1300
|
+
return;
|
|
1301
|
+
}
|
|
1302
|
+
const top = report.families.slice(0, limit);
|
|
1303
|
+
const maxCalls = Math.max(...top.map(f => f.totalCalls));
|
|
1304
|
+
const familyWidth = Math.max(...top.map(f => f.family.length), 12);
|
|
1305
|
+
console.log(` ${BOLD}Top ${top.length} tool families${RESET}`);
|
|
1306
|
+
for (const f of top) {
|
|
1307
|
+
const pct = report.totalToolCalls > 0
|
|
1308
|
+
? ((f.totalCalls / report.totalToolCalls) * 100).toFixed(1)
|
|
1309
|
+
: '0.0';
|
|
1310
|
+
const barLen = Math.round((f.totalCalls / maxCalls) * 28);
|
|
1311
|
+
const bar = '█'.repeat(barLen).padEnd(28);
|
|
1312
|
+
console.log(` ${CYAN}${f.family.padEnd(familyWidth)}${RESET} ` +
|
|
1313
|
+
`${String(f.totalCalls).padStart(5)} ${DIM}calls${RESET} ` +
|
|
1314
|
+
`${pct.padStart(5)}% ${YELLOW}${bar}${RESET}`);
|
|
1315
|
+
// Top 2 individual tools within each family + top source
|
|
1316
|
+
const topTools = f.byTool.slice(0, 2).map(t => `${t.tool}×${t.count}`).join(', ');
|
|
1317
|
+
const topSource = f.bySource[0];
|
|
1318
|
+
console.log(` ${DIM}top tools: ${topTools}${RESET}`);
|
|
1319
|
+
if (topSource) {
|
|
1320
|
+
console.log(` ${DIM}driven by: ${topSource.source} (${topSource.count} calls)${RESET}`);
|
|
1321
|
+
}
|
|
1322
|
+
}
|
|
1323
|
+
console.log();
|
|
1324
|
+
}
|
|
1273
1325
|
// ── Advisor commands ────────────────────────────────────────────────
|
|
1274
1326
|
const ADVISOR_MODES = ['off', 'shadow', 'primary'];
|
|
1275
1327
|
function readAdvisorMode() {
|
|
@@ -1817,6 +1869,18 @@ advisorCmd
|
|
|
1817
1869
|
.command('rules')
|
|
1818
1870
|
.description('List loaded advisor rules')
|
|
1819
1871
|
.action(cmdAdvisorRules);
|
|
1872
|
+
const analyticsCmd = program
|
|
1873
|
+
.command('analytics')
|
|
1874
|
+
.description('Production telemetry: tool usage, cost breakdowns');
|
|
1875
|
+
analyticsCmd
|
|
1876
|
+
.command('tool-usage')
|
|
1877
|
+
.description('Show which tool families are firing most over a time window')
|
|
1878
|
+
.option('-h, --hours <n>', 'Window size in hours (default 24)', '24')
|
|
1879
|
+
.option('--json', 'Emit machine-readable JSON')
|
|
1880
|
+
.option('-l, --limit <n>', 'Show top N families (default 10)', '10')
|
|
1881
|
+
.action(async (opts) => {
|
|
1882
|
+
await cmdAnalyticsToolUsage(opts);
|
|
1883
|
+
});
|
|
1820
1884
|
const dashCmd = program
|
|
1821
1885
|
.command('dashboard')
|
|
1822
1886
|
.description('Launch local command center')
|
package/dist/gateway/router.d.ts
CHANGED
|
@@ -170,7 +170,7 @@ export declare class Gateway {
|
|
|
170
170
|
* Get recent transcript activity across all sessions.
|
|
171
171
|
* Used by heartbeat to know what happened since the last check.
|
|
172
172
|
*/
|
|
173
|
-
getRecentActivity(sinceIso: string): Array<{
|
|
173
|
+
getRecentActivity(sinceIso: string, maxEntries?: number): Array<{
|
|
174
174
|
sessionKey: string;
|
|
175
175
|
role: string;
|
|
176
176
|
content: string;
|
package/dist/gateway/router.js
CHANGED
|
@@ -1447,8 +1447,8 @@ export class Gateway {
|
|
|
1447
1447
|
* Get recent transcript activity across all sessions.
|
|
1448
1448
|
* Used by heartbeat to know what happened since the last check.
|
|
1449
1449
|
*/
|
|
1450
|
-
getRecentActivity(sinceIso) {
|
|
1451
|
-
return this.assistant.getRecentActivity(sinceIso);
|
|
1450
|
+
getRecentActivity(sinceIso, maxEntries) {
|
|
1451
|
+
return this.assistant.getRecentActivity(sinceIso, maxEntries);
|
|
1452
1452
|
}
|
|
1453
1453
|
/**
|
|
1454
1454
|
* Search memory (FTS5) for context relevant to a query.
|