trickle-cli 0.1.205 → 0.1.206

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -301,12 +301,33 @@ function costReportCommand(opts) {
301
301
  }
302
302
  }
303
303
  }
304
- if (cacheDetected) {
304
+ // Provider-reported cache tokens (Anthropic cache_read/cache_creation)
305
+ const cacheReadTotal = calls.reduce((s, c) => s + (c.cacheReadTokens || 0), 0);
306
+ const cacheWriteTotal = calls.reduce((s, c) => s + (c.cacheWriteTokens || 0), 0);
307
+ const callsWithCache = calls.filter((c) => c.cacheReadTokens > 0 || c.cacheWriteTokens > 0);
308
+ if (callsWithCache.length > 0 || cacheDetected) {
305
309
  console.log(chalk_1.default.gray('\n ' + '─'.repeat(60)));
306
- console.log(chalk_1.default.bold(' Cache Analysis') + chalk_1.default.gray(' (detected from latency bimodality)'));
307
- for (const ca of cacheAnalysis) {
308
- const speedup = (ca.slowAvg / Math.max(1, ca.fastAvg)).toFixed(0);
309
- console.log(` ${chalk_1.default.cyan(ca.model.padEnd(25))} hit rate: ${chalk_1.default.green(ca.hitRate + '%')} (${ca.fastCalls} fast, ${ca.slowCalls} slow) ${speedup}x speedup fast=${ca.fastAvg}ms slow=${ca.slowAvg}ms`);
310
+ console.log(chalk_1.default.bold(' Cache Analysis'));
311
+ if (callsWithCache.length > 0) {
312
+ const cacheHitCalls = calls.filter((c) => c.cacheReadTokens > 0);
313
+ const hitRate = calls.length > 0 ? Math.round((cacheHitCalls.length / calls.length) * 100) : 0;
314
+ // Estimate savings: cached tokens cost ~90% less
315
+ const savedTokens = cacheReadTotal;
316
+ const avgInputPrice = totalCost > 0 && totalTokens > 0 ? (totalCost / totalTokens) : 0.000003;
317
+ const estimatedSavings = savedTokens * avgInputPrice * 0.9;
318
+ console.log(chalk_1.default.gray(' Provider-reported cache tokens:'));
319
+ console.log(` Hit rate: ${chalk_1.default.green(hitRate + '%')} (${cacheHitCalls.length}/${calls.length} calls used cache)`);
320
+ console.log(` Cache read: ${formatTokens(cacheReadTotal)} tokens | Cache write: ${formatTokens(cacheWriteTotal)} tokens`);
321
+ if (estimatedSavings > 0) {
322
+ console.log(` Estimated savings: ${chalk_1.default.green('~$' + estimatedSavings.toFixed(4))} from cached tokens`);
323
+ }
324
+ }
325
+ if (cacheDetected) {
326
+ console.log(chalk_1.default.gray(' Latency-based detection:'));
327
+ for (const ca of cacheAnalysis) {
328
+ const speedup = (ca.slowAvg / Math.max(1, ca.fastAvg)).toFixed(0);
329
+ console.log(` ${chalk_1.default.cyan(ca.model.padEnd(25))} hit rate: ${chalk_1.default.green(ca.hitRate + '%')} (${ca.fastCalls} fast, ${ca.slowCalls} slow) ${speedup}x speedup`);
330
+ }
310
331
  }
311
332
  }
312
333
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "trickle-cli",
3
- "version": "0.1.205",
3
+ "version": "0.1.206",
4
4
  "description": "Zero-code runtime observability for JS/Python + AI agent debugging. Traces LangChain, CrewAI, OpenAI, Anthropic, Gemini. Eval, security, compliance, cost tracking. Free, local-first.",
5
5
  "keywords": ["observability", "tracing", "llm", "openai", "anthropic", "langchain", "crewai", "agent", "mcp", "debugging", "typescript", "python", "security", "eval", "compliance"],
6
6
  "bin": {
@@ -281,12 +281,36 @@ export function costReportCommand(opts: { json?: boolean; budget?: string }): vo
281
281
  }
282
282
  }
283
283
 
284
- if (cacheDetected) {
284
+ // Provider-reported cache tokens (Anthropic cache_read/cache_creation)
285
+ const cacheReadTotal = calls.reduce((s: number, c: any) => s + (c.cacheReadTokens || 0), 0);
286
+ const cacheWriteTotal = calls.reduce((s: number, c: any) => s + (c.cacheWriteTokens || 0), 0);
287
+ const callsWithCache = calls.filter((c: any) => c.cacheReadTokens > 0 || c.cacheWriteTokens > 0);
288
+
289
+ if (callsWithCache.length > 0 || cacheDetected) {
285
290
  console.log(chalk.gray('\n ' + '─'.repeat(60)));
286
- console.log(chalk.bold(' Cache Analysis') + chalk.gray(' (detected from latency bimodality)'));
287
- for (const ca of cacheAnalysis) {
288
- const speedup = (ca.slowAvg / Math.max(1, ca.fastAvg)).toFixed(0);
289
- console.log(` ${chalk.cyan(ca.model.padEnd(25))} hit rate: ${chalk.green(ca.hitRate + '%')} (${ca.fastCalls} fast, ${ca.slowCalls} slow) ${speedup}x speedup fast=${ca.fastAvg}ms slow=${ca.slowAvg}ms`);
291
+ console.log(chalk.bold(' Cache Analysis'));
292
+
293
+ if (callsWithCache.length > 0) {
294
+ const cacheHitCalls = calls.filter((c: any) => c.cacheReadTokens > 0);
295
+ const hitRate = calls.length > 0 ? Math.round((cacheHitCalls.length / calls.length) * 100) : 0;
296
+ // Estimate savings: cached tokens cost ~90% less
297
+ const savedTokens = cacheReadTotal;
298
+ const avgInputPrice = totalCost > 0 && totalTokens > 0 ? (totalCost / totalTokens) : 0.000003;
299
+ const estimatedSavings = savedTokens * avgInputPrice * 0.9;
300
+ console.log(chalk.gray(' Provider-reported cache tokens:'));
301
+ console.log(` Hit rate: ${chalk.green(hitRate + '%')} (${cacheHitCalls.length}/${calls.length} calls used cache)`);
302
+ console.log(` Cache read: ${formatTokens(cacheReadTotal)} tokens | Cache write: ${formatTokens(cacheWriteTotal)} tokens`);
303
+ if (estimatedSavings > 0) {
304
+ console.log(` Estimated savings: ${chalk.green('~$' + estimatedSavings.toFixed(4))} from cached tokens`);
305
+ }
306
+ }
307
+
308
+ if (cacheDetected) {
309
+ console.log(chalk.gray(' Latency-based detection:'));
310
+ for (const ca of cacheAnalysis) {
311
+ const speedup = (ca.slowAvg / Math.max(1, ca.fastAvg)).toFixed(0);
312
+ console.log(` ${chalk.cyan(ca.model.padEnd(25))} hit rate: ${chalk.green(ca.hitRate + '%')} (${ca.fastCalls} fast, ${ca.slowCalls} slow) ${speedup}x speedup`);
313
+ }
290
314
  }
291
315
  }
292
316
  }