npm - trickle-cli - Versions diffs - 0.1.205 → 0.1.206 - Mend

trickle-cli 0.1.205 → 0.1.206

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/commands/cost-report.js +26 -5
package/package.json +1 -1
package/src/commands/cost-report.ts +29 -5

package/dist/commands/cost-report.js CHANGED Viewed

@@ -301,12 +301,33 @@ function costReportCommand(opts) {
                 }
             }
         }
-        if (cacheDetected) {
+        // Provider-reported cache tokens (Anthropic cache_read/cache_creation)
+        const cacheReadTotal = calls.reduce((s, c) => s + (c.cacheReadTokens || 0), 0);
+        const cacheWriteTotal = calls.reduce((s, c) => s + (c.cacheWriteTokens || 0), 0);
+        const callsWithCache = calls.filter((c) => c.cacheReadTokens > 0 || c.cacheWriteTokens > 0);
+        if (callsWithCache.length > 0 || cacheDetected) {
             console.log(chalk_1.default.gray('\n  ' + '─'.repeat(60)));
-            console.log(chalk_1.default.bold('  Cache Analysis') + chalk_1.default.gray(' (detected from latency bimodality)'));
-            for (const ca of cacheAnalysis) {
-                const speedup = (ca.slowAvg / Math.max(1, ca.fastAvg)).toFixed(0);
-                console.log(`  ${chalk_1.default.cyan(ca.model.padEnd(25))} hit rate: ${chalk_1.default.green(ca.hitRate + '%')} (${ca.fastCalls} fast, ${ca.slowCalls} slow)  ${speedup}x speedup  fast=${ca.fastAvg}ms slow=${ca.slowAvg}ms`);
+            console.log(chalk_1.default.bold('  Cache Analysis'));
+            if (callsWithCache.length > 0) {
+                const cacheHitCalls = calls.filter((c) => c.cacheReadTokens > 0);
+                const hitRate = calls.length > 0 ? Math.round((cacheHitCalls.length / calls.length) * 100) : 0;
+                // Estimate savings: cached tokens cost ~90% less
+                const savedTokens = cacheReadTotal;
+                const avgInputPrice = totalCost > 0 && totalTokens > 0 ? (totalCost / totalTokens) : 0.000003;
+                const estimatedSavings = savedTokens * avgInputPrice * 0.9;
+                console.log(chalk_1.default.gray('  Provider-reported cache tokens:'));
+                console.log(`  Hit rate: ${chalk_1.default.green(hitRate + '%')} (${cacheHitCalls.length}/${calls.length} calls used cache)`);
+                console.log(`  Cache read: ${formatTokens(cacheReadTotal)} tokens | Cache write: ${formatTokens(cacheWriteTotal)} tokens`);
+                if (estimatedSavings > 0) {
+                    console.log(`  Estimated savings: ${chalk_1.default.green('~$' + estimatedSavings.toFixed(4))} from cached tokens`);
+                }
+            }
+            if (cacheDetected) {
+                console.log(chalk_1.default.gray('  Latency-based detection:'));
+                for (const ca of cacheAnalysis) {
+                    const speedup = (ca.slowAvg / Math.max(1, ca.fastAvg)).toFixed(0);
+                    console.log(`  ${chalk_1.default.cyan(ca.model.padEnd(25))} hit rate: ${chalk_1.default.green(ca.hitRate + '%')} (${ca.fastCalls} fast, ${ca.slowCalls} slow)  ${speedup}x speedup`);
+                }
             }
         }
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "trickle-cli",
-  "version": "0.1.205",
+  "version": "0.1.206",
   "description": "Zero-code runtime observability for JS/Python + AI agent debugging. Traces LangChain, CrewAI, OpenAI, Anthropic, Gemini. Eval, security, compliance, cost tracking. Free, local-first.",
   "keywords": ["observability", "tracing", "llm", "openai", "anthropic", "langchain", "crewai", "agent", "mcp", "debugging", "typescript", "python", "security", "eval", "compliance"],
   "bin": {

package/src/commands/cost-report.ts CHANGED Viewed

@@ -281,12 +281,36 @@ export function costReportCommand(opts: { json?: boolean; budget?: string }): vo
       }
     }
-    if (cacheDetected) {
+    // Provider-reported cache tokens (Anthropic cache_read/cache_creation)
+    const cacheReadTotal = calls.reduce((s: number, c: any) => s + (c.cacheReadTokens || 0), 0);
+    const cacheWriteTotal = calls.reduce((s: number, c: any) => s + (c.cacheWriteTokens || 0), 0);
+    const callsWithCache = calls.filter((c: any) => c.cacheReadTokens > 0 || c.cacheWriteTokens > 0);
+    if (callsWithCache.length > 0 || cacheDetected) {
       console.log(chalk.gray('\n  ' + '─'.repeat(60)));
-      console.log(chalk.bold('  Cache Analysis') + chalk.gray(' (detected from latency bimodality)'));
-      for (const ca of cacheAnalysis) {
-        const speedup = (ca.slowAvg / Math.max(1, ca.fastAvg)).toFixed(0);
-        console.log(`  ${chalk.cyan(ca.model.padEnd(25))} hit rate: ${chalk.green(ca.hitRate + '%')} (${ca.fastCalls} fast, ${ca.slowCalls} slow)  ${speedup}x speedup  fast=${ca.fastAvg}ms slow=${ca.slowAvg}ms`);
+      console.log(chalk.bold('  Cache Analysis'));
+      if (callsWithCache.length > 0) {
+        const cacheHitCalls = calls.filter((c: any) => c.cacheReadTokens > 0);
+        const hitRate = calls.length > 0 ? Math.round((cacheHitCalls.length / calls.length) * 100) : 0;
+        // Estimate savings: cached tokens cost ~90% less
+        const savedTokens = cacheReadTotal;
+        const avgInputPrice = totalCost > 0 && totalTokens > 0 ? (totalCost / totalTokens) : 0.000003;
+        const estimatedSavings = savedTokens * avgInputPrice * 0.9;
+        console.log(chalk.gray('  Provider-reported cache tokens:'));
+        console.log(`  Hit rate: ${chalk.green(hitRate + '%')} (${cacheHitCalls.length}/${calls.length} calls used cache)`);
+        console.log(`  Cache read: ${formatTokens(cacheReadTotal)} tokens | Cache write: ${formatTokens(cacheWriteTotal)} tokens`);
+        if (estimatedSavings > 0) {
+          console.log(`  Estimated savings: ${chalk.green('~$' + estimatedSavings.toFixed(4))} from cached tokens`);
+        }
+      }
+      if (cacheDetected) {
+        console.log(chalk.gray('  Latency-based detection:'));
+        for (const ca of cacheAnalysis) {
+          const speedup = (ca.slowAvg / Math.max(1, ca.fastAvg)).toFixed(0);
+          console.log(`  ${chalk.cyan(ca.model.padEnd(25))} hit rate: ${chalk.green(ca.hitRate + '%')} (${ca.fastCalls} fast, ${ca.slowCalls} slow)  ${speedup}x speedup`);
+        }
       }
     }
   }