trickle-cli 0.1.185 → 0.1.187

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -107,10 +107,51 @@ function costReportCommand(opts) {
107
107
  const monthlyProjection = timeSpanMs > 60000
108
108
  ? (totalCost / timeSpanMs) * 30 * 24 * 60 * 60 * 1000
109
109
  : null;
110
+ // Per-agent cost roll-up — read agents.jsonl and attribute LLM costs to agents
111
+ const agentsFile = path.join(dir, 'agents.jsonl');
112
+ const byAgent = {};
113
+ if (fs.existsSync(agentsFile)) {
114
+ const agentEvents = fs.readFileSync(agentsFile, 'utf-8').split('\n').filter(Boolean)
115
+ .map(l => { try {
116
+ return JSON.parse(l);
117
+ }
118
+ catch {
119
+ return null;
120
+ } }).filter(Boolean);
121
+ // Build agent activity windows: agent_start → agent_end with timestamps
122
+ const activeAgents = [];
123
+ const startTimes = {};
124
+ for (const ev of agentEvents) {
125
+ const name = ev.chain || ev.tool || 'unknown';
126
+ const fw = ev.framework || 'unknown';
127
+ if (ev.event === 'agent_start' || ev.event === 'crew_start') {
128
+ startTimes[name] = { name, framework: fw, ts: ev.timestamp || 0 };
129
+ }
130
+ else if ((ev.event === 'agent_end' || ev.event === 'crew_end') && startTimes[name]) {
131
+ activeAgents.push({ name, framework: fw, start: startTimes[name].ts, end: ev.timestamp || Date.now() });
132
+ delete startTimes[name];
133
+ }
134
+ }
135
+ // Attribute each LLM call to the most-recently-started agent active at that time
136
+ for (const call of calls) {
137
+ const ts = call.timestamp || 0;
138
+ const matching = activeAgents.filter(a => ts >= a.start && ts <= a.end);
139
+ const agent = matching.length > 0 ? matching[matching.length - 1] : null;
140
+ if (agent) {
141
+ const key = `${agent.framework}/${agent.name}`;
142
+ if (!byAgent[key])
143
+ byAgent[key] = { calls: 0, tokens: 0, cost: 0, framework: agent.framework };
144
+ byAgent[key].calls++;
145
+ byAgent[key].tokens += call.totalTokens || 0;
146
+ byAgent[key].cost += call.estimatedCostUsd || 0;
147
+ }
148
+ }
149
+ }
110
150
  if (opts.json) {
111
151
  console.log(JSON.stringify({
112
152
  summary: { totalCost, totalTokens, totalInputTokens, totalOutputTokens, totalCalls: calls.length, totalDurationMs: totalDuration, errors: errorCount, monthlyProjection },
113
153
  byProvider, byModel,
154
+ ...(Object.keys(byAgent).length > 0 ? { byAgent } : {}),
114
155
  }, null, 2));
115
156
  return;
116
157
  }
@@ -154,6 +195,16 @@ function costReportCommand(opts) {
154
195
  }
155
196
  // Top costly calls
156
197
  const costlyCalls = calls.filter(c => c.estimatedCostUsd > 0).sort((a, b) => b.estimatedCostUsd - a.estimatedCostUsd).slice(0, 5);
198
+ // By agent (if agent data exists)
199
+ if (Object.keys(byAgent).length > 0) {
200
+ console.log(chalk_1.default.gray('\n ' + '─'.repeat(60)));
201
+ console.log(chalk_1.default.bold(' By Agent/Workflow'));
202
+ const sortedAgents = Object.entries(byAgent).sort((a, b) => b[1].cost - a[1].cost);
203
+ for (const [name, data] of sortedAgents) {
204
+ const pct = totalCost > 0 ? ((data.cost / totalCost) * 100).toFixed(0) : '0';
205
+ console.log(` ${chalk_1.default.cyan(name.padEnd(30))} $${data.cost.toFixed(4).padEnd(10)} ${chalk_1.default.gray(pct + '%')} ${data.calls} calls ${formatTokens(data.tokens)} tokens`);
206
+ }
207
+ }
157
208
  if (costlyCalls.length > 0) {
158
209
  console.log(chalk_1.default.gray('\n ' + '─'.repeat(60)));
159
210
  console.log(chalk_1.default.bold(' Most Expensive Calls'));
@@ -412,6 +412,114 @@ function analyzeCallTrace(trickleDir, rules) {
412
412
  }
413
413
  return alerts;
414
414
  }
415
+ function analyzeLlmCalls(trickleDir) {
416
+ const alerts = [];
417
+ const llmFile = path.join(trickleDir, 'llm.jsonl');
418
+ if (!fs.existsSync(llmFile))
419
+ return [];
420
+ const calls = readJsonl(llmFile);
421
+ if (calls.length === 0)
422
+ return [];
423
+ // 1. High error rate
424
+ const errors = calls.filter(c => c.error);
425
+ if (errors.length > 0 && errors.length / calls.length > 0.3) {
426
+ alerts.push({
427
+ kind: 'alert', severity: 'critical', category: 'llm_errors',
428
+ message: `High LLM error rate: ${errors.length}/${calls.length} calls failed (${Math.round(errors.length / calls.length * 100)}%)`,
429
+ details: { errorCount: errors.length, totalCalls: calls.length },
430
+ timestamp: Date.now(),
431
+ suggestion: `Check API keys, rate limits, and model availability. Common errors: ${[...new Set(errors.slice(0, 3).map(e => (e.error || '').substring(0, 50)))].join('; ')}`,
432
+ });
433
+ }
434
+ // 2. Cost spike — single call > 50% of total
435
+ const totalCost = calls.reduce((s, c) => s + (c.estimatedCostUsd || 0), 0);
436
+ if (totalCost > 0) {
437
+ const maxCall = calls.reduce((max, c) => (c.estimatedCostUsd || 0) > (max.estimatedCostUsd || 0) ? c : max, calls[0]);
438
+ if (maxCall.estimatedCostUsd > totalCost * 0.5 && calls.length > 2) {
439
+ alerts.push({
440
+ kind: 'alert', severity: 'warning', category: 'llm_cost_spike',
441
+ message: `Single LLM call consumed ${Math.round(maxCall.estimatedCostUsd / totalCost * 100)}% of total cost ($${maxCall.estimatedCostUsd.toFixed(4)} of $${totalCost.toFixed(4)})`,
442
+ details: { model: maxCall.model, tokens: maxCall.totalTokens, cost: maxCall.estimatedCostUsd, input: (maxCall.inputPreview || '').substring(0, 80) },
443
+ timestamp: Date.now(),
444
+ suggestion: `Review this prompt for unnecessary length. Consider using a cheaper model (e.g., gpt-4o-mini instead of gpt-4o).`,
445
+ });
446
+ }
447
+ }
448
+ // 3. Excessive token usage per call (> 10K tokens)
449
+ const highTokenCalls = calls.filter(c => (c.totalTokens || 0) > 10000);
450
+ if (highTokenCalls.length > 0) {
451
+ alerts.push({
452
+ kind: 'alert', severity: 'warning', category: 'llm_high_tokens',
453
+ message: `${highTokenCalls.length} LLM call(s) used >10K tokens`,
454
+ details: { calls: highTokenCalls.map((c) => ({ model: c.model, tokens: c.totalTokens, input: (c.inputPreview || '').substring(0, 50) })) },
455
+ timestamp: Date.now(),
456
+ suggestion: `Large prompts increase cost and latency. Consider chunking input, using summarization, or reducing context window.`,
457
+ });
458
+ }
459
+ return alerts;
460
+ }
461
+ function analyzeAgentEvents(trickleDir) {
462
+ const alerts = [];
463
+ const agentsFile = path.join(trickleDir, 'agents.jsonl');
464
+ if (!fs.existsSync(agentsFile))
465
+ return [];
466
+ const events = readJsonl(agentsFile);
467
+ if (events.length === 0)
468
+ return [];
469
+ // 1. Repeated tool retries — same tool called 3+ times in a row
470
+ const toolCalls = events.filter(e => e.event === 'tool_start' || e.event === 'tool_end');
471
+ const toolNames = toolCalls.filter(e => e.event === 'tool_start').map(e => e.tool || '');
472
+ for (let i = 0; i < toolNames.length - 2; i++) {
473
+ if (toolNames[i] === toolNames[i + 1] && toolNames[i] === toolNames[i + 2] && toolNames[i]) {
474
+ alerts.push({
475
+ kind: 'alert', severity: 'warning', category: 'agent_tool_retry',
476
+ message: `Tool "${toolNames[i]}" called 3+ times in a row — possible retry loop`,
477
+ details: { tool: toolNames[i], consecutiveCalls: 3 },
478
+ timestamp: Date.now(),
479
+ suggestion: `The agent may be retrying a failing tool. Check if the tool input is correct or if the agent misunderstands the tool's capabilities.`,
480
+ });
481
+ break; // Only report once
482
+ }
483
+ }
484
+ // 2. Tool errors
485
+ const toolErrors = events.filter(e => e.event === 'tool_error');
486
+ if (toolErrors.length > 0) {
487
+ alerts.push({
488
+ kind: 'alert', severity: toolErrors.length >= 3 ? 'critical' : 'warning', category: 'agent_tool_errors',
489
+ message: `${toolErrors.length} tool execution error(s) during agent run`,
490
+ details: { errors: toolErrors.slice(0, 5).map((e) => ({ tool: e.tool, error: (e.error || '').substring(0, 100) })) },
491
+ timestamp: Date.now(),
492
+ suggestion: `Agent tools are failing. Check tool implementations and ensure inputs match expected schemas.`,
493
+ });
494
+ }
495
+ // 3. Agent errors / crew failures
496
+ const agentErrors = events.filter(e => e.event === 'crew_error' || e.event === 'chain_error' || e.event === 'agent_error');
497
+ if (agentErrors.length > 0) {
498
+ for (const err of agentErrors.slice(0, 3)) {
499
+ alerts.push({
500
+ kind: 'alert', severity: 'critical', category: 'agent_failure',
501
+ message: `Agent workflow failed: ${(err.error || err.chain || 'unknown error').substring(0, 100)}`,
502
+ details: { event: err.event, framework: err.framework, error: err.error },
503
+ timestamp: Date.now(),
504
+ suggestion: `Use \`trickle why\` to trace the causal chain leading to this failure.`,
505
+ });
506
+ }
507
+ }
508
+ // 4. Long agent runs (> 30s)
509
+ const crewEnds = events.filter(e => e.event === 'crew_end' && e.durationMs);
510
+ for (const run of crewEnds) {
511
+ if (run.durationMs > 30000) {
512
+ alerts.push({
513
+ kind: 'alert', severity: 'warning', category: 'agent_slow',
514
+ message: `Agent run took ${(run.durationMs / 1000).toFixed(1)}s — consider optimizing`,
515
+ details: { framework: run.framework, durationMs: run.durationMs },
516
+ timestamp: Date.now(),
517
+ suggestion: `Long agent runs increase cost and user wait time. Check for unnecessary tool calls or verbose prompts.`,
518
+ });
519
+ }
520
+ }
521
+ return alerts;
522
+ }
415
523
  function runMonitor(opts) {
416
524
  const trickleDir = findTrickleDir(opts.dir);
417
525
  if (!fs.existsSync(trickleDir)) {
@@ -428,6 +536,8 @@ function runMonitor(opts) {
428
536
  ...analyzeMemory(trickleDir, memoryThresholdMb, rules),
429
537
  ...analyzeFunctions(trickleDir, slowFunctionMs, rules),
430
538
  ...analyzeCallTrace(trickleDir, rules),
539
+ ...analyzeLlmCalls(trickleDir),
540
+ ...analyzeAgentEvents(trickleDir),
431
541
  ];
432
542
  // Write alerts to file for agent consumption
433
543
  const alertsFile = path.join(trickleDir, 'alerts.jsonl');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "trickle-cli",
3
- "version": "0.1.185",
3
+ "version": "0.1.187",
4
4
  "description": "CLI for trickle runtime type observability",
5
5
  "bin": {
6
6
  "trickle": "dist/index.js"
@@ -85,10 +85,48 @@ export function costReportCommand(opts: { json?: boolean; budget?: string }): vo
85
85
  ? (totalCost / timeSpanMs) * 30 * 24 * 60 * 60 * 1000
86
86
  : null;
87
87
 
88
+ // Per-agent cost roll-up — read agents.jsonl and attribute LLM costs to agents
89
+ const agentsFile = path.join(dir, 'agents.jsonl');
90
+ const byAgent: Record<string, { calls: number; tokens: number; cost: number; framework: string }> = {};
91
+ if (fs.existsSync(agentsFile)) {
92
+ const agentEvents = fs.readFileSync(agentsFile, 'utf-8').split('\n').filter(Boolean)
93
+ .map(l => { try { return JSON.parse(l); } catch { return null; } }).filter(Boolean);
94
+
95
+ // Build agent activity windows: agent_start → agent_end with timestamps
96
+ const activeAgents: { name: string; framework: string; start: number; end: number }[] = [];
97
+ const startTimes: Record<string, { name: string; framework: string; ts: number }> = {};
98
+
99
+ for (const ev of agentEvents) {
100
+ const name = ev.chain || ev.tool || 'unknown';
101
+ const fw = ev.framework || 'unknown';
102
+ if (ev.event === 'agent_start' || ev.event === 'crew_start') {
103
+ startTimes[name] = { name, framework: fw, ts: ev.timestamp || 0 };
104
+ } else if ((ev.event === 'agent_end' || ev.event === 'crew_end') && startTimes[name]) {
105
+ activeAgents.push({ name, framework: fw, start: startTimes[name].ts, end: ev.timestamp || Date.now() });
106
+ delete startTimes[name];
107
+ }
108
+ }
109
+
110
+ // Attribute each LLM call to the most-recently-started agent active at that time
111
+ for (const call of calls) {
112
+ const ts = call.timestamp || 0;
113
+ const matching = activeAgents.filter(a => ts >= a.start && ts <= a.end);
114
+ const agent = matching.length > 0 ? matching[matching.length - 1] : null;
115
+ if (agent) {
116
+ const key = `${agent.framework}/${agent.name}`;
117
+ if (!byAgent[key]) byAgent[key] = { calls: 0, tokens: 0, cost: 0, framework: agent.framework };
118
+ byAgent[key].calls++;
119
+ byAgent[key].tokens += call.totalTokens || 0;
120
+ byAgent[key].cost += call.estimatedCostUsd || 0;
121
+ }
122
+ }
123
+ }
124
+
88
125
  if (opts.json) {
89
126
  console.log(JSON.stringify({
90
127
  summary: { totalCost, totalTokens, totalInputTokens, totalOutputTokens, totalCalls: calls.length, totalDurationMs: totalDuration, errors: errorCount, monthlyProjection },
91
128
  byProvider, byModel,
129
+ ...(Object.keys(byAgent).length > 0 ? { byAgent } : {}),
92
130
  }, null, 2));
93
131
  return;
94
132
  }
@@ -137,6 +175,17 @@ export function costReportCommand(opts: { json?: boolean; budget?: string }): vo
137
175
 
138
176
  // Top costly calls
139
177
  const costlyCalls = calls.filter(c => c.estimatedCostUsd > 0).sort((a, b) => b.estimatedCostUsd - a.estimatedCostUsd).slice(0, 5);
178
+ // By agent (if agent data exists)
179
+ if (Object.keys(byAgent).length > 0) {
180
+ console.log(chalk.gray('\n ' + '─'.repeat(60)));
181
+ console.log(chalk.bold(' By Agent/Workflow'));
182
+ const sortedAgents = Object.entries(byAgent).sort((a, b) => b[1].cost - a[1].cost);
183
+ for (const [name, data] of sortedAgents) {
184
+ const pct = totalCost > 0 ? ((data.cost / totalCost) * 100).toFixed(0) : '0';
185
+ console.log(` ${chalk.cyan(name.padEnd(30))} $${data.cost.toFixed(4).padEnd(10)} ${chalk.gray(pct + '%')} ${data.calls} calls ${formatTokens(data.tokens)} tokens`);
186
+ }
187
+ }
188
+
140
189
  if (costlyCalls.length > 0) {
141
190
  console.log(chalk.gray('\n ' + '─'.repeat(60)));
142
191
  console.log(chalk.bold(' Most Expensive Calls'));
@@ -435,6 +435,121 @@ function analyzeCallTrace(trickleDir: string, rules: RulesConfig): Alert[] {
435
435
  return alerts;
436
436
  }
437
437
 
438
+ function analyzeLlmCalls(trickleDir: string): Alert[] {
439
+ const alerts: Alert[] = [];
440
+ const llmFile = path.join(trickleDir, 'llm.jsonl');
441
+ if (!fs.existsSync(llmFile)) return [];
442
+ const calls = readJsonl(llmFile) as any[];
443
+ if (calls.length === 0) return [];
444
+
445
+ // 1. High error rate
446
+ const errors = calls.filter(c => c.error);
447
+ if (errors.length > 0 && errors.length / calls.length > 0.3) {
448
+ alerts.push({
449
+ kind: 'alert', severity: 'critical', category: 'llm_errors',
450
+ message: `High LLM error rate: ${errors.length}/${calls.length} calls failed (${Math.round(errors.length / calls.length * 100)}%)`,
451
+ details: { errorCount: errors.length, totalCalls: calls.length },
452
+ timestamp: Date.now(),
453
+ suggestion: `Check API keys, rate limits, and model availability. Common errors: ${[...new Set(errors.slice(0, 3).map(e => (e.error || '').substring(0, 50)))].join('; ')}`,
454
+ });
455
+ }
456
+
457
+ // 2. Cost spike — single call > 50% of total
458
+ const totalCost = calls.reduce((s: number, c: any) => s + (c.estimatedCostUsd || 0), 0);
459
+ if (totalCost > 0) {
460
+ const maxCall = calls.reduce((max: any, c: any) => (c.estimatedCostUsd || 0) > (max.estimatedCostUsd || 0) ? c : max, calls[0]);
461
+ if (maxCall.estimatedCostUsd > totalCost * 0.5 && calls.length > 2) {
462
+ alerts.push({
463
+ kind: 'alert', severity: 'warning', category: 'llm_cost_spike',
464
+ message: `Single LLM call consumed ${Math.round(maxCall.estimatedCostUsd / totalCost * 100)}% of total cost ($${maxCall.estimatedCostUsd.toFixed(4)} of $${totalCost.toFixed(4)})`,
465
+ details: { model: maxCall.model, tokens: maxCall.totalTokens, cost: maxCall.estimatedCostUsd, input: (maxCall.inputPreview || '').substring(0, 80) },
466
+ timestamp: Date.now(),
467
+ suggestion: `Review this prompt for unnecessary length. Consider using a cheaper model (e.g., gpt-4o-mini instead of gpt-4o).`,
468
+ });
469
+ }
470
+ }
471
+
472
+ // 3. Excessive token usage per call (> 10K tokens)
473
+ const highTokenCalls = calls.filter(c => (c.totalTokens || 0) > 10000);
474
+ if (highTokenCalls.length > 0) {
475
+ alerts.push({
476
+ kind: 'alert', severity: 'warning', category: 'llm_high_tokens',
477
+ message: `${highTokenCalls.length} LLM call(s) used >10K tokens`,
478
+ details: { calls: highTokenCalls.map((c: any) => ({ model: c.model, tokens: c.totalTokens, input: (c.inputPreview || '').substring(0, 50) })) },
479
+ timestamp: Date.now(),
480
+ suggestion: `Large prompts increase cost and latency. Consider chunking input, using summarization, or reducing context window.`,
481
+ });
482
+ }
483
+
484
+ return alerts;
485
+ }
486
+
487
+ function analyzeAgentEvents(trickleDir: string): Alert[] {
488
+ const alerts: Alert[] = [];
489
+ const agentsFile = path.join(trickleDir, 'agents.jsonl');
490
+ if (!fs.existsSync(agentsFile)) return [];
491
+ const events = readJsonl(agentsFile) as any[];
492
+ if (events.length === 0) return [];
493
+
494
+ // 1. Repeated tool retries — same tool called 3+ times in a row
495
+ const toolCalls = events.filter(e => e.event === 'tool_start' || e.event === 'tool_end');
496
+ const toolNames = toolCalls.filter(e => e.event === 'tool_start').map(e => e.tool || '');
497
+ for (let i = 0; i < toolNames.length - 2; i++) {
498
+ if (toolNames[i] === toolNames[i + 1] && toolNames[i] === toolNames[i + 2] && toolNames[i]) {
499
+ alerts.push({
500
+ kind: 'alert', severity: 'warning', category: 'agent_tool_retry',
501
+ message: `Tool "${toolNames[i]}" called 3+ times in a row — possible retry loop`,
502
+ details: { tool: toolNames[i], consecutiveCalls: 3 },
503
+ timestamp: Date.now(),
504
+ suggestion: `The agent may be retrying a failing tool. Check if the tool input is correct or if the agent misunderstands the tool's capabilities.`,
505
+ });
506
+ break; // Only report once
507
+ }
508
+ }
509
+
510
+ // 2. Tool errors
511
+ const toolErrors = events.filter(e => e.event === 'tool_error');
512
+ if (toolErrors.length > 0) {
513
+ alerts.push({
514
+ kind: 'alert', severity: toolErrors.length >= 3 ? 'critical' : 'warning', category: 'agent_tool_errors',
515
+ message: `${toolErrors.length} tool execution error(s) during agent run`,
516
+ details: { errors: toolErrors.slice(0, 5).map((e: any) => ({ tool: e.tool, error: (e.error || '').substring(0, 100) })) },
517
+ timestamp: Date.now(),
518
+ suggestion: `Agent tools are failing. Check tool implementations and ensure inputs match expected schemas.`,
519
+ });
520
+ }
521
+
522
+ // 3. Agent errors / crew failures
523
+ const agentErrors = events.filter(e => e.event === 'crew_error' || e.event === 'chain_error' || e.event === 'agent_error');
524
+ if (agentErrors.length > 0) {
525
+ for (const err of agentErrors.slice(0, 3)) {
526
+ alerts.push({
527
+ kind: 'alert', severity: 'critical', category: 'agent_failure',
528
+ message: `Agent workflow failed: ${(err.error || err.chain || 'unknown error').substring(0, 100)}`,
529
+ details: { event: err.event, framework: err.framework, error: err.error },
530
+ timestamp: Date.now(),
531
+ suggestion: `Use \`trickle why\` to trace the causal chain leading to this failure.`,
532
+ });
533
+ }
534
+ }
535
+
536
+ // 4. Long agent runs (> 30s)
537
+ const crewEnds = events.filter(e => e.event === 'crew_end' && e.durationMs);
538
+ for (const run of crewEnds) {
539
+ if (run.durationMs > 30000) {
540
+ alerts.push({
541
+ kind: 'alert', severity: 'warning', category: 'agent_slow',
542
+ message: `Agent run took ${(run.durationMs / 1000).toFixed(1)}s — consider optimizing`,
543
+ details: { framework: run.framework, durationMs: run.durationMs },
544
+ timestamp: Date.now(),
545
+ suggestion: `Long agent runs increase cost and user wait time. Check for unnecessary tool calls or verbose prompts.`,
546
+ });
547
+ }
548
+ }
549
+
550
+ return alerts;
551
+ }
552
+
438
553
  export function runMonitor(opts: MonitorOptions): Alert[] {
439
554
  const trickleDir = findTrickleDir(opts.dir);
440
555
  if (!fs.existsSync(trickleDir)) {
@@ -453,6 +568,8 @@ export function runMonitor(opts: MonitorOptions): Alert[] {
453
568
  ...analyzeMemory(trickleDir, memoryThresholdMb, rules),
454
569
  ...analyzeFunctions(trickleDir, slowFunctionMs, rules),
455
570
  ...analyzeCallTrace(trickleDir, rules),
571
+ ...analyzeLlmCalls(trickleDir),
572
+ ...analyzeAgentEvents(trickleDir),
456
573
  ];
457
574
 
458
575
  // Write alerts to file for agent consumption