trickle-cli 0.1.185 → 0.1.187
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/cost-report.js +51 -0
- package/dist/commands/monitor.js +110 -0
- package/package.json +1 -1
- package/src/commands/cost-report.ts +49 -0
- package/src/commands/monitor.ts +117 -0
|
@@ -107,10 +107,51 @@ function costReportCommand(opts) {
|
|
|
107
107
|
const monthlyProjection = timeSpanMs > 60000
|
|
108
108
|
? (totalCost / timeSpanMs) * 30 * 24 * 60 * 60 * 1000
|
|
109
109
|
: null;
|
|
110
|
+
// Per-agent cost roll-up — read agents.jsonl and attribute LLM costs to agents
|
|
111
|
+
const agentsFile = path.join(dir, 'agents.jsonl');
|
|
112
|
+
const byAgent = {};
|
|
113
|
+
if (fs.existsSync(agentsFile)) {
|
|
114
|
+
const agentEvents = fs.readFileSync(agentsFile, 'utf-8').split('\n').filter(Boolean)
|
|
115
|
+
.map(l => { try {
|
|
116
|
+
return JSON.parse(l);
|
|
117
|
+
}
|
|
118
|
+
catch {
|
|
119
|
+
return null;
|
|
120
|
+
} }).filter(Boolean);
|
|
121
|
+
// Build agent activity windows: agent_start → agent_end with timestamps
|
|
122
|
+
const activeAgents = [];
|
|
123
|
+
const startTimes = {};
|
|
124
|
+
for (const ev of agentEvents) {
|
|
125
|
+
const name = ev.chain || ev.tool || 'unknown';
|
|
126
|
+
const fw = ev.framework || 'unknown';
|
|
127
|
+
if (ev.event === 'agent_start' || ev.event === 'crew_start') {
|
|
128
|
+
startTimes[name] = { name, framework: fw, ts: ev.timestamp || 0 };
|
|
129
|
+
}
|
|
130
|
+
else if ((ev.event === 'agent_end' || ev.event === 'crew_end') && startTimes[name]) {
|
|
131
|
+
activeAgents.push({ name, framework: fw, start: startTimes[name].ts, end: ev.timestamp || Date.now() });
|
|
132
|
+
delete startTimes[name];
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
// Attribute each LLM call to the most-recently-started agent active at that time
|
|
136
|
+
for (const call of calls) {
|
|
137
|
+
const ts = call.timestamp || 0;
|
|
138
|
+
const matching = activeAgents.filter(a => ts >= a.start && ts <= a.end);
|
|
139
|
+
const agent = matching.length > 0 ? matching[matching.length - 1] : null;
|
|
140
|
+
if (agent) {
|
|
141
|
+
const key = `${agent.framework}/${agent.name}`;
|
|
142
|
+
if (!byAgent[key])
|
|
143
|
+
byAgent[key] = { calls: 0, tokens: 0, cost: 0, framework: agent.framework };
|
|
144
|
+
byAgent[key].calls++;
|
|
145
|
+
byAgent[key].tokens += call.totalTokens || 0;
|
|
146
|
+
byAgent[key].cost += call.estimatedCostUsd || 0;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
110
150
|
if (opts.json) {
|
|
111
151
|
console.log(JSON.stringify({
|
|
112
152
|
summary: { totalCost, totalTokens, totalInputTokens, totalOutputTokens, totalCalls: calls.length, totalDurationMs: totalDuration, errors: errorCount, monthlyProjection },
|
|
113
153
|
byProvider, byModel,
|
|
154
|
+
...(Object.keys(byAgent).length > 0 ? { byAgent } : {}),
|
|
114
155
|
}, null, 2));
|
|
115
156
|
return;
|
|
116
157
|
}
|
|
@@ -154,6 +195,16 @@ function costReportCommand(opts) {
|
|
|
154
195
|
}
|
|
155
196
|
// Top costly calls
|
|
156
197
|
const costlyCalls = calls.filter(c => c.estimatedCostUsd > 0).sort((a, b) => b.estimatedCostUsd - a.estimatedCostUsd).slice(0, 5);
|
|
198
|
+
// By agent (if agent data exists)
|
|
199
|
+
if (Object.keys(byAgent).length > 0) {
|
|
200
|
+
console.log(chalk_1.default.gray('\n ' + '─'.repeat(60)));
|
|
201
|
+
console.log(chalk_1.default.bold(' By Agent/Workflow'));
|
|
202
|
+
const sortedAgents = Object.entries(byAgent).sort((a, b) => b[1].cost - a[1].cost);
|
|
203
|
+
for (const [name, data] of sortedAgents) {
|
|
204
|
+
const pct = totalCost > 0 ? ((data.cost / totalCost) * 100).toFixed(0) : '0';
|
|
205
|
+
console.log(` ${chalk_1.default.cyan(name.padEnd(30))} $${data.cost.toFixed(4).padEnd(10)} ${chalk_1.default.gray(pct + '%')} ${data.calls} calls ${formatTokens(data.tokens)} tokens`);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
157
208
|
if (costlyCalls.length > 0) {
|
|
158
209
|
console.log(chalk_1.default.gray('\n ' + '─'.repeat(60)));
|
|
159
210
|
console.log(chalk_1.default.bold(' Most Expensive Calls'));
|
package/dist/commands/monitor.js
CHANGED
|
@@ -412,6 +412,114 @@ function analyzeCallTrace(trickleDir, rules) {
|
|
|
412
412
|
}
|
|
413
413
|
return alerts;
|
|
414
414
|
}
|
|
415
|
+
function analyzeLlmCalls(trickleDir) {
|
|
416
|
+
const alerts = [];
|
|
417
|
+
const llmFile = path.join(trickleDir, 'llm.jsonl');
|
|
418
|
+
if (!fs.existsSync(llmFile))
|
|
419
|
+
return [];
|
|
420
|
+
const calls = readJsonl(llmFile);
|
|
421
|
+
if (calls.length === 0)
|
|
422
|
+
return [];
|
|
423
|
+
// 1. High error rate
|
|
424
|
+
const errors = calls.filter(c => c.error);
|
|
425
|
+
if (errors.length > 0 && errors.length / calls.length > 0.3) {
|
|
426
|
+
alerts.push({
|
|
427
|
+
kind: 'alert', severity: 'critical', category: 'llm_errors',
|
|
428
|
+
message: `High LLM error rate: ${errors.length}/${calls.length} calls failed (${Math.round(errors.length / calls.length * 100)}%)`,
|
|
429
|
+
details: { errorCount: errors.length, totalCalls: calls.length },
|
|
430
|
+
timestamp: Date.now(),
|
|
431
|
+
suggestion: `Check API keys, rate limits, and model availability. Common errors: ${[...new Set(errors.slice(0, 3).map(e => (e.error || '').substring(0, 50)))].join('; ')}`,
|
|
432
|
+
});
|
|
433
|
+
}
|
|
434
|
+
// 2. Cost spike — single call > 50% of total
|
|
435
|
+
const totalCost = calls.reduce((s, c) => s + (c.estimatedCostUsd || 0), 0);
|
|
436
|
+
if (totalCost > 0) {
|
|
437
|
+
const maxCall = calls.reduce((max, c) => (c.estimatedCostUsd || 0) > (max.estimatedCostUsd || 0) ? c : max, calls[0]);
|
|
438
|
+
if (maxCall.estimatedCostUsd > totalCost * 0.5 && calls.length > 2) {
|
|
439
|
+
alerts.push({
|
|
440
|
+
kind: 'alert', severity: 'warning', category: 'llm_cost_spike',
|
|
441
|
+
message: `Single LLM call consumed ${Math.round(maxCall.estimatedCostUsd / totalCost * 100)}% of total cost ($${maxCall.estimatedCostUsd.toFixed(4)} of $${totalCost.toFixed(4)})`,
|
|
442
|
+
details: { model: maxCall.model, tokens: maxCall.totalTokens, cost: maxCall.estimatedCostUsd, input: (maxCall.inputPreview || '').substring(0, 80) },
|
|
443
|
+
timestamp: Date.now(),
|
|
444
|
+
suggestion: `Review this prompt for unnecessary length. Consider using a cheaper model (e.g., gpt-4o-mini instead of gpt-4o).`,
|
|
445
|
+
});
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
// 3. Excessive token usage per call (> 10K tokens)
|
|
449
|
+
const highTokenCalls = calls.filter(c => (c.totalTokens || 0) > 10000);
|
|
450
|
+
if (highTokenCalls.length > 0) {
|
|
451
|
+
alerts.push({
|
|
452
|
+
kind: 'alert', severity: 'warning', category: 'llm_high_tokens',
|
|
453
|
+
message: `${highTokenCalls.length} LLM call(s) used >10K tokens`,
|
|
454
|
+
details: { calls: highTokenCalls.map((c) => ({ model: c.model, tokens: c.totalTokens, input: (c.inputPreview || '').substring(0, 50) })) },
|
|
455
|
+
timestamp: Date.now(),
|
|
456
|
+
suggestion: `Large prompts increase cost and latency. Consider chunking input, using summarization, or reducing context window.`,
|
|
457
|
+
});
|
|
458
|
+
}
|
|
459
|
+
return alerts;
|
|
460
|
+
}
|
|
461
|
+
function analyzeAgentEvents(trickleDir) {
|
|
462
|
+
const alerts = [];
|
|
463
|
+
const agentsFile = path.join(trickleDir, 'agents.jsonl');
|
|
464
|
+
if (!fs.existsSync(agentsFile))
|
|
465
|
+
return [];
|
|
466
|
+
const events = readJsonl(agentsFile);
|
|
467
|
+
if (events.length === 0)
|
|
468
|
+
return [];
|
|
469
|
+
// 1. Repeated tool retries — same tool called 3+ times in a row
|
|
470
|
+
const toolCalls = events.filter(e => e.event === 'tool_start' || e.event === 'tool_end');
|
|
471
|
+
const toolNames = toolCalls.filter(e => e.event === 'tool_start').map(e => e.tool || '');
|
|
472
|
+
for (let i = 0; i < toolNames.length - 2; i++) {
|
|
473
|
+
if (toolNames[i] === toolNames[i + 1] && toolNames[i] === toolNames[i + 2] && toolNames[i]) {
|
|
474
|
+
alerts.push({
|
|
475
|
+
kind: 'alert', severity: 'warning', category: 'agent_tool_retry',
|
|
476
|
+
message: `Tool "${toolNames[i]}" called 3+ times in a row — possible retry loop`,
|
|
477
|
+
details: { tool: toolNames[i], consecutiveCalls: 3 },
|
|
478
|
+
timestamp: Date.now(),
|
|
479
|
+
suggestion: `The agent may be retrying a failing tool. Check if the tool input is correct or if the agent misunderstands the tool's capabilities.`,
|
|
480
|
+
});
|
|
481
|
+
break; // Only report once
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
// 2. Tool errors
|
|
485
|
+
const toolErrors = events.filter(e => e.event === 'tool_error');
|
|
486
|
+
if (toolErrors.length > 0) {
|
|
487
|
+
alerts.push({
|
|
488
|
+
kind: 'alert', severity: toolErrors.length >= 3 ? 'critical' : 'warning', category: 'agent_tool_errors',
|
|
489
|
+
message: `${toolErrors.length} tool execution error(s) during agent run`,
|
|
490
|
+
details: { errors: toolErrors.slice(0, 5).map((e) => ({ tool: e.tool, error: (e.error || '').substring(0, 100) })) },
|
|
491
|
+
timestamp: Date.now(),
|
|
492
|
+
suggestion: `Agent tools are failing. Check tool implementations and ensure inputs match expected schemas.`,
|
|
493
|
+
});
|
|
494
|
+
}
|
|
495
|
+
// 3. Agent errors / crew failures
|
|
496
|
+
const agentErrors = events.filter(e => e.event === 'crew_error' || e.event === 'chain_error' || e.event === 'agent_error');
|
|
497
|
+
if (agentErrors.length > 0) {
|
|
498
|
+
for (const err of agentErrors.slice(0, 3)) {
|
|
499
|
+
alerts.push({
|
|
500
|
+
kind: 'alert', severity: 'critical', category: 'agent_failure',
|
|
501
|
+
message: `Agent workflow failed: ${(err.error || err.chain || 'unknown error').substring(0, 100)}`,
|
|
502
|
+
details: { event: err.event, framework: err.framework, error: err.error },
|
|
503
|
+
timestamp: Date.now(),
|
|
504
|
+
suggestion: `Use \`trickle why\` to trace the causal chain leading to this failure.`,
|
|
505
|
+
});
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
// 4. Long agent runs (> 30s)
|
|
509
|
+
const crewEnds = events.filter(e => e.event === 'crew_end' && e.durationMs);
|
|
510
|
+
for (const run of crewEnds) {
|
|
511
|
+
if (run.durationMs > 30000) {
|
|
512
|
+
alerts.push({
|
|
513
|
+
kind: 'alert', severity: 'warning', category: 'agent_slow',
|
|
514
|
+
message: `Agent run took ${(run.durationMs / 1000).toFixed(1)}s — consider optimizing`,
|
|
515
|
+
details: { framework: run.framework, durationMs: run.durationMs },
|
|
516
|
+
timestamp: Date.now(),
|
|
517
|
+
suggestion: `Long agent runs increase cost and user wait time. Check for unnecessary tool calls or verbose prompts.`,
|
|
518
|
+
});
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
return alerts;
|
|
522
|
+
}
|
|
415
523
|
function runMonitor(opts) {
|
|
416
524
|
const trickleDir = findTrickleDir(opts.dir);
|
|
417
525
|
if (!fs.existsSync(trickleDir)) {
|
|
@@ -428,6 +536,8 @@ function runMonitor(opts) {
|
|
|
428
536
|
...analyzeMemory(trickleDir, memoryThresholdMb, rules),
|
|
429
537
|
...analyzeFunctions(trickleDir, slowFunctionMs, rules),
|
|
430
538
|
...analyzeCallTrace(trickleDir, rules),
|
|
539
|
+
...analyzeLlmCalls(trickleDir),
|
|
540
|
+
...analyzeAgentEvents(trickleDir),
|
|
431
541
|
];
|
|
432
542
|
// Write alerts to file for agent consumption
|
|
433
543
|
const alertsFile = path.join(trickleDir, 'alerts.jsonl');
|
package/package.json
CHANGED
|
@@ -85,10 +85,48 @@ export function costReportCommand(opts: { json?: boolean; budget?: string }): vo
|
|
|
85
85
|
? (totalCost / timeSpanMs) * 30 * 24 * 60 * 60 * 1000
|
|
86
86
|
: null;
|
|
87
87
|
|
|
88
|
+
// Per-agent cost roll-up — read agents.jsonl and attribute LLM costs to agents
|
|
89
|
+
const agentsFile = path.join(dir, 'agents.jsonl');
|
|
90
|
+
const byAgent: Record<string, { calls: number; tokens: number; cost: number; framework: string }> = {};
|
|
91
|
+
if (fs.existsSync(agentsFile)) {
|
|
92
|
+
const agentEvents = fs.readFileSync(agentsFile, 'utf-8').split('\n').filter(Boolean)
|
|
93
|
+
.map(l => { try { return JSON.parse(l); } catch { return null; } }).filter(Boolean);
|
|
94
|
+
|
|
95
|
+
// Build agent activity windows: agent_start → agent_end with timestamps
|
|
96
|
+
const activeAgents: { name: string; framework: string; start: number; end: number }[] = [];
|
|
97
|
+
const startTimes: Record<string, { name: string; framework: string; ts: number }> = {};
|
|
98
|
+
|
|
99
|
+
for (const ev of agentEvents) {
|
|
100
|
+
const name = ev.chain || ev.tool || 'unknown';
|
|
101
|
+
const fw = ev.framework || 'unknown';
|
|
102
|
+
if (ev.event === 'agent_start' || ev.event === 'crew_start') {
|
|
103
|
+
startTimes[name] = { name, framework: fw, ts: ev.timestamp || 0 };
|
|
104
|
+
} else if ((ev.event === 'agent_end' || ev.event === 'crew_end') && startTimes[name]) {
|
|
105
|
+
activeAgents.push({ name, framework: fw, start: startTimes[name].ts, end: ev.timestamp || Date.now() });
|
|
106
|
+
delete startTimes[name];
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Attribute each LLM call to the most-recently-started agent active at that time
|
|
111
|
+
for (const call of calls) {
|
|
112
|
+
const ts = call.timestamp || 0;
|
|
113
|
+
const matching = activeAgents.filter(a => ts >= a.start && ts <= a.end);
|
|
114
|
+
const agent = matching.length > 0 ? matching[matching.length - 1] : null;
|
|
115
|
+
if (agent) {
|
|
116
|
+
const key = `${agent.framework}/${agent.name}`;
|
|
117
|
+
if (!byAgent[key]) byAgent[key] = { calls: 0, tokens: 0, cost: 0, framework: agent.framework };
|
|
118
|
+
byAgent[key].calls++;
|
|
119
|
+
byAgent[key].tokens += call.totalTokens || 0;
|
|
120
|
+
byAgent[key].cost += call.estimatedCostUsd || 0;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
88
125
|
if (opts.json) {
|
|
89
126
|
console.log(JSON.stringify({
|
|
90
127
|
summary: { totalCost, totalTokens, totalInputTokens, totalOutputTokens, totalCalls: calls.length, totalDurationMs: totalDuration, errors: errorCount, monthlyProjection },
|
|
91
128
|
byProvider, byModel,
|
|
129
|
+
...(Object.keys(byAgent).length > 0 ? { byAgent } : {}),
|
|
92
130
|
}, null, 2));
|
|
93
131
|
return;
|
|
94
132
|
}
|
|
@@ -137,6 +175,17 @@ export function costReportCommand(opts: { json?: boolean; budget?: string }): vo
|
|
|
137
175
|
|
|
138
176
|
// Top costly calls
|
|
139
177
|
const costlyCalls = calls.filter(c => c.estimatedCostUsd > 0).sort((a, b) => b.estimatedCostUsd - a.estimatedCostUsd).slice(0, 5);
|
|
178
|
+
// By agent (if agent data exists)
|
|
179
|
+
if (Object.keys(byAgent).length > 0) {
|
|
180
|
+
console.log(chalk.gray('\n ' + '─'.repeat(60)));
|
|
181
|
+
console.log(chalk.bold(' By Agent/Workflow'));
|
|
182
|
+
const sortedAgents = Object.entries(byAgent).sort((a, b) => b[1].cost - a[1].cost);
|
|
183
|
+
for (const [name, data] of sortedAgents) {
|
|
184
|
+
const pct = totalCost > 0 ? ((data.cost / totalCost) * 100).toFixed(0) : '0';
|
|
185
|
+
console.log(` ${chalk.cyan(name.padEnd(30))} $${data.cost.toFixed(4).padEnd(10)} ${chalk.gray(pct + '%')} ${data.calls} calls ${formatTokens(data.tokens)} tokens`);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
140
189
|
if (costlyCalls.length > 0) {
|
|
141
190
|
console.log(chalk.gray('\n ' + '─'.repeat(60)));
|
|
142
191
|
console.log(chalk.bold(' Most Expensive Calls'));
|
package/src/commands/monitor.ts
CHANGED
|
@@ -435,6 +435,121 @@ function analyzeCallTrace(trickleDir: string, rules: RulesConfig): Alert[] {
|
|
|
435
435
|
return alerts;
|
|
436
436
|
}
|
|
437
437
|
|
|
438
|
+
function analyzeLlmCalls(trickleDir: string): Alert[] {
|
|
439
|
+
const alerts: Alert[] = [];
|
|
440
|
+
const llmFile = path.join(trickleDir, 'llm.jsonl');
|
|
441
|
+
if (!fs.existsSync(llmFile)) return [];
|
|
442
|
+
const calls = readJsonl(llmFile) as any[];
|
|
443
|
+
if (calls.length === 0) return [];
|
|
444
|
+
|
|
445
|
+
// 1. High error rate
|
|
446
|
+
const errors = calls.filter(c => c.error);
|
|
447
|
+
if (errors.length > 0 && errors.length / calls.length > 0.3) {
|
|
448
|
+
alerts.push({
|
|
449
|
+
kind: 'alert', severity: 'critical', category: 'llm_errors',
|
|
450
|
+
message: `High LLM error rate: ${errors.length}/${calls.length} calls failed (${Math.round(errors.length / calls.length * 100)}%)`,
|
|
451
|
+
details: { errorCount: errors.length, totalCalls: calls.length },
|
|
452
|
+
timestamp: Date.now(),
|
|
453
|
+
suggestion: `Check API keys, rate limits, and model availability. Common errors: ${[...new Set(errors.slice(0, 3).map(e => (e.error || '').substring(0, 50)))].join('; ')}`,
|
|
454
|
+
});
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
// 2. Cost spike — single call > 50% of total
|
|
458
|
+
const totalCost = calls.reduce((s: number, c: any) => s + (c.estimatedCostUsd || 0), 0);
|
|
459
|
+
if (totalCost > 0) {
|
|
460
|
+
const maxCall = calls.reduce((max: any, c: any) => (c.estimatedCostUsd || 0) > (max.estimatedCostUsd || 0) ? c : max, calls[0]);
|
|
461
|
+
if (maxCall.estimatedCostUsd > totalCost * 0.5 && calls.length > 2) {
|
|
462
|
+
alerts.push({
|
|
463
|
+
kind: 'alert', severity: 'warning', category: 'llm_cost_spike',
|
|
464
|
+
message: `Single LLM call consumed ${Math.round(maxCall.estimatedCostUsd / totalCost * 100)}% of total cost ($${maxCall.estimatedCostUsd.toFixed(4)} of $${totalCost.toFixed(4)})`,
|
|
465
|
+
details: { model: maxCall.model, tokens: maxCall.totalTokens, cost: maxCall.estimatedCostUsd, input: (maxCall.inputPreview || '').substring(0, 80) },
|
|
466
|
+
timestamp: Date.now(),
|
|
467
|
+
suggestion: `Review this prompt for unnecessary length. Consider using a cheaper model (e.g., gpt-4o-mini instead of gpt-4o).`,
|
|
468
|
+
});
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
// 3. Excessive token usage per call (> 10K tokens)
|
|
473
|
+
const highTokenCalls = calls.filter(c => (c.totalTokens || 0) > 10000);
|
|
474
|
+
if (highTokenCalls.length > 0) {
|
|
475
|
+
alerts.push({
|
|
476
|
+
kind: 'alert', severity: 'warning', category: 'llm_high_tokens',
|
|
477
|
+
message: `${highTokenCalls.length} LLM call(s) used >10K tokens`,
|
|
478
|
+
details: { calls: highTokenCalls.map((c: any) => ({ model: c.model, tokens: c.totalTokens, input: (c.inputPreview || '').substring(0, 50) })) },
|
|
479
|
+
timestamp: Date.now(),
|
|
480
|
+
suggestion: `Large prompts increase cost and latency. Consider chunking input, using summarization, or reducing context window.`,
|
|
481
|
+
});
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
return alerts;
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
function analyzeAgentEvents(trickleDir: string): Alert[] {
|
|
488
|
+
const alerts: Alert[] = [];
|
|
489
|
+
const agentsFile = path.join(trickleDir, 'agents.jsonl');
|
|
490
|
+
if (!fs.existsSync(agentsFile)) return [];
|
|
491
|
+
const events = readJsonl(agentsFile) as any[];
|
|
492
|
+
if (events.length === 0) return [];
|
|
493
|
+
|
|
494
|
+
// 1. Repeated tool retries — same tool called 3+ times in a row
|
|
495
|
+
const toolCalls = events.filter(e => e.event === 'tool_start' || e.event === 'tool_end');
|
|
496
|
+
const toolNames = toolCalls.filter(e => e.event === 'tool_start').map(e => e.tool || '');
|
|
497
|
+
for (let i = 0; i < toolNames.length - 2; i++) {
|
|
498
|
+
if (toolNames[i] === toolNames[i + 1] && toolNames[i] === toolNames[i + 2] && toolNames[i]) {
|
|
499
|
+
alerts.push({
|
|
500
|
+
kind: 'alert', severity: 'warning', category: 'agent_tool_retry',
|
|
501
|
+
message: `Tool "${toolNames[i]}" called 3+ times in a row — possible retry loop`,
|
|
502
|
+
details: { tool: toolNames[i], consecutiveCalls: 3 },
|
|
503
|
+
timestamp: Date.now(),
|
|
504
|
+
suggestion: `The agent may be retrying a failing tool. Check if the tool input is correct or if the agent misunderstands the tool's capabilities.`,
|
|
505
|
+
});
|
|
506
|
+
break; // Only report once
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
// 2. Tool errors
|
|
511
|
+
const toolErrors = events.filter(e => e.event === 'tool_error');
|
|
512
|
+
if (toolErrors.length > 0) {
|
|
513
|
+
alerts.push({
|
|
514
|
+
kind: 'alert', severity: toolErrors.length >= 3 ? 'critical' : 'warning', category: 'agent_tool_errors',
|
|
515
|
+
message: `${toolErrors.length} tool execution error(s) during agent run`,
|
|
516
|
+
details: { errors: toolErrors.slice(0, 5).map((e: any) => ({ tool: e.tool, error: (e.error || '').substring(0, 100) })) },
|
|
517
|
+
timestamp: Date.now(),
|
|
518
|
+
suggestion: `Agent tools are failing. Check tool implementations and ensure inputs match expected schemas.`,
|
|
519
|
+
});
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
// 3. Agent errors / crew failures
|
|
523
|
+
const agentErrors = events.filter(e => e.event === 'crew_error' || e.event === 'chain_error' || e.event === 'agent_error');
|
|
524
|
+
if (agentErrors.length > 0) {
|
|
525
|
+
for (const err of agentErrors.slice(0, 3)) {
|
|
526
|
+
alerts.push({
|
|
527
|
+
kind: 'alert', severity: 'critical', category: 'agent_failure',
|
|
528
|
+
message: `Agent workflow failed: ${(err.error || err.chain || 'unknown error').substring(0, 100)}`,
|
|
529
|
+
details: { event: err.event, framework: err.framework, error: err.error },
|
|
530
|
+
timestamp: Date.now(),
|
|
531
|
+
suggestion: `Use \`trickle why\` to trace the causal chain leading to this failure.`,
|
|
532
|
+
});
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
// 4. Long agent runs (> 30s)
|
|
537
|
+
const crewEnds = events.filter(e => e.event === 'crew_end' && e.durationMs);
|
|
538
|
+
for (const run of crewEnds) {
|
|
539
|
+
if (run.durationMs > 30000) {
|
|
540
|
+
alerts.push({
|
|
541
|
+
kind: 'alert', severity: 'warning', category: 'agent_slow',
|
|
542
|
+
message: `Agent run took ${(run.durationMs / 1000).toFixed(1)}s — consider optimizing`,
|
|
543
|
+
details: { framework: run.framework, durationMs: run.durationMs },
|
|
544
|
+
timestamp: Date.now(),
|
|
545
|
+
suggestion: `Long agent runs increase cost and user wait time. Check for unnecessary tool calls or verbose prompts.`,
|
|
546
|
+
});
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
return alerts;
|
|
551
|
+
}
|
|
552
|
+
|
|
438
553
|
export function runMonitor(opts: MonitorOptions): Alert[] {
|
|
439
554
|
const trickleDir = findTrickleDir(opts.dir);
|
|
440
555
|
if (!fs.existsSync(trickleDir)) {
|
|
@@ -453,6 +568,8 @@ export function runMonitor(opts: MonitorOptions): Alert[] {
|
|
|
453
568
|
...analyzeMemory(trickleDir, memoryThresholdMb, rules),
|
|
454
569
|
...analyzeFunctions(trickleDir, slowFunctionMs, rules),
|
|
455
570
|
...analyzeCallTrace(trickleDir, rules),
|
|
571
|
+
...analyzeLlmCalls(trickleDir),
|
|
572
|
+
...analyzeAgentEvents(trickleDir),
|
|
456
573
|
];
|
|
457
574
|
|
|
458
575
|
// Write alerts to file for agent consumption
|