npm - trickle-cli - Versions diffs - 0.1.196 → 0.1.197 - Mend

trickle-cli 0.1.196 → 0.1.197

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/commands/cost-report.js +46 -0
package/package.json +1 -1
package/src/commands/cost-report.ts +48 -0

package/dist/commands/cost-report.js CHANGED Viewed

@@ -262,6 +262,52 @@ function costReportCommand(opts) {
             console.log(`  ${chalk_1.default.cyan(name.padEnd(30))} $${data.cost.toFixed(4).padEnd(10)} ${chalk_1.default.gray(pct + '%')}  ${data.calls} calls  ${formatTokens(data.tokens)} tokens`);
         }
     }
+    // Cache hit/miss analysis — detect from latency bimodality
+    if (calls.length >= 4) {
+        // Group by model, find bimodal latency distribution
+        const modelLatencies = {};
+        for (const c of calls) {
+            if (!c.durationMs || c.error)
+                continue;
+            const key = c.model || 'unknown';
+            if (!modelLatencies[key])
+                modelLatencies[key] = [];
+            modelLatencies[key].push(c.durationMs);
+        }
+        let cacheDetected = false;
+        const cacheAnalysis = [];
+        for (const [model, latencies] of Object.entries(modelLatencies)) {
+            if (latencies.length < 3)
+                continue;
+            latencies.sort((a, b) => a - b);
+            const median = latencies[Math.floor(latencies.length / 2)];
+            // Split into fast (< 30% of median) and slow (>= 30% of median)
+            const threshold = median * 0.3;
+            const fast = latencies.filter(l => l < threshold);
+            const slow = latencies.filter(l => l >= threshold);
+            if (fast.length >= 1 && slow.length >= 1 && fast.length / latencies.length >= 0.1) {
+                const fastAvg = fast.reduce((s, l) => s + l, 0) / fast.length;
+                const slowAvg = slow.reduce((s, l) => s + l, 0) / slow.length;
+                // Only report if there's a significant speed difference (5x+)
+                if (slowAvg / Math.max(1, fastAvg) >= 5) {
+                    cacheDetected = true;
+                    cacheAnalysis.push({
+                        model, fastCalls: fast.length, slowCalls: slow.length,
+                        fastAvg: Math.round(fastAvg), slowAvg: Math.round(slowAvg),
+                        hitRate: Math.round((fast.length / latencies.length) * 100),
+                    });
+                }
+            }
+        }
+        if (cacheDetected) {
+            console.log(chalk_1.default.gray('\n  ' + '─'.repeat(60)));
+            console.log(chalk_1.default.bold('  Cache Analysis') + chalk_1.default.gray(' (detected from latency bimodality)'));
+            for (const ca of cacheAnalysis) {
+                const speedup = (ca.slowAvg / Math.max(1, ca.fastAvg)).toFixed(0);
+                console.log(`  ${chalk_1.default.cyan(ca.model.padEnd(25))} hit rate: ${chalk_1.default.green(ca.hitRate + '%')} (${ca.fastCalls} fast, ${ca.slowCalls} slow)  ${speedup}x speedup  fast=${ca.fastAvg}ms slow=${ca.slowAvg}ms`);
+            }
+        }
+    }
     if (costlyCalls.length > 0) {
         console.log(chalk_1.default.gray('\n  ' + '─'.repeat(60)));
         console.log(chalk_1.default.bold('  Most Expensive Calls'));

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "trickle-cli",
-  "version": "0.1.196",
+  "version": "0.1.197",
   "description": "CLI for trickle runtime type observability",
   "bin": {
     "trickle": "dist/index.js"

package/src/commands/cost-report.ts CHANGED Viewed

@@ -241,6 +241,54 @@ export function costReportCommand(opts: { json?: boolean; budget?: string }): vo
     }
   }
+  // Cache hit/miss analysis — detect from latency bimodality
+  if (calls.length >= 4) {
+    // Group by model, find bimodal latency distribution
+    const modelLatencies: Record<string, number[]> = {};
+    for (const c of calls) {
+      if (!c.durationMs || c.error) continue;
+      const key = c.model || 'unknown';
+      if (!modelLatencies[key]) modelLatencies[key] = [];
+      modelLatencies[key].push(c.durationMs);
+    }
+    let cacheDetected = false;
+    const cacheAnalysis: Array<{ model: string; fastCalls: number; slowCalls: number; fastAvg: number; slowAvg: number; hitRate: number }> = [];
+    for (const [model, latencies] of Object.entries(modelLatencies)) {
+      if (latencies.length < 3) continue;
+      latencies.sort((a, b) => a - b);
+      const median = latencies[Math.floor(latencies.length / 2)];
+      // Split into fast (< 30% of median) and slow (>= 30% of median)
+      const threshold = median * 0.3;
+      const fast = latencies.filter(l => l < threshold);
+      const slow = latencies.filter(l => l >= threshold);
+      if (fast.length >= 1 && slow.length >= 1 && fast.length / latencies.length >= 0.1) {
+        const fastAvg = fast.reduce((s, l) => s + l, 0) / fast.length;
+        const slowAvg = slow.reduce((s, l) => s + l, 0) / slow.length;
+        // Only report if there's a significant speed difference (5x+)
+        if (slowAvg / Math.max(1, fastAvg) >= 5) {
+          cacheDetected = true;
+          cacheAnalysis.push({
+            model, fastCalls: fast.length, slowCalls: slow.length,
+            fastAvg: Math.round(fastAvg), slowAvg: Math.round(slowAvg),
+            hitRate: Math.round((fast.length / latencies.length) * 100),
+          });
+        }
+      }
+    }
+    if (cacheDetected) {
+      console.log(chalk.gray('\n  ' + '─'.repeat(60)));
+      console.log(chalk.bold('  Cache Analysis') + chalk.gray(' (detected from latency bimodality)'));
+      for (const ca of cacheAnalysis) {
+        const speedup = (ca.slowAvg / Math.max(1, ca.fastAvg)).toFixed(0);
+        console.log(`  ${chalk.cyan(ca.model.padEnd(25))} hit rate: ${chalk.green(ca.hitRate + '%')} (${ca.fastCalls} fast, ${ca.slowCalls} slow)  ${speedup}x speedup  fast=${ca.fastAvg}ms slow=${ca.slowAvg}ms`);
+      }
+    }
+  }
   if (costlyCalls.length > 0) {
     console.log(chalk.gray('\n  ' + '─'.repeat(60)));
     console.log(chalk.bold('  Most Expensive Calls'));