npm - trickle-cli - Versions diffs - 0.1.195 → 0.1.197 - Mend

trickle-cli 0.1.195 → 0.1.197

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/commands/cost-report.js +104 -1
package/package.json +1 -1
package/src/commands/cost-report.ts +104 -1

package/dist/commands/cost-report.js CHANGED Viewed

@@ -147,10 +147,46 @@ function costReportCommand(opts) {
             }
         }
     }
+    // Model tier analysis — classify models into frontier/standard/mini tiers
+    // Ordered longest-first to avoid substring matches (gpt-4o-mini before gpt-4o)
+    const TIER_RULES = [
+        ['gpt-4o-mini', 'mini'], ['gpt-4-turbo', 'frontier'], ['gpt-4o', 'standard'], ['gpt-4', 'frontier'],
+        ['gpt-3.5-turbo', 'mini'], ['o1-mini', 'standard'], ['o1-pro', 'frontier'], ['o1', 'frontier'],
+        ['o3-mini', 'standard'], ['o3', 'frontier'], ['o4-mini', 'standard'],
+        ['claude-opus', 'frontier'], ['claude-sonnet', 'standard'], ['claude-haiku', 'mini'],
+        ['gemini-2.5-flash-lite', 'mini'], ['gemini-2.5-flash', 'standard'], ['gemini-2.5-pro', 'frontier'],
+        ['gemini-2.0-flash', 'mini'], ['gemini-1.5-pro', 'frontier'], ['gemini-1.5-flash', 'mini'],
+    ];
+    function classifyTier(model) {
+        for (const [pattern, tier] of TIER_RULES) {
+            if (model.includes(pattern))
+                return tier;
+        }
+        if (model.includes('mini') || model.includes('lite') || model.includes('haiku') || model.includes('flash'))
+            return 'mini';
+        if (model.includes('pro') || model.includes('opus') || model.includes('turbo'))
+            return 'frontier';
+        return 'standard';
+    }
+    const byTier = {};
+    for (const c of calls) {
+        const tier = classifyTier(c.model || '');
+        if (!byTier[tier])
+            byTier[tier] = { calls: 0, tokens: 0, cost: 0, avgLatency: 0, errors: 0 };
+        byTier[tier].calls++;
+        byTier[tier].tokens += c.totalTokens || 0;
+        byTier[tier].cost += c.estimatedCostUsd || 0;
+        byTier[tier].avgLatency += c.durationMs || 0;
+        if (c.error)
+            byTier[tier].errors++;
+    }
+    for (const t of Object.values(byTier)) {
+        t.avgLatency = t.calls > 0 ? t.avgLatency / t.calls : 0;
+    }
     if (opts.json) {
         console.log(JSON.stringify({
             summary: { totalCost, totalTokens, totalInputTokens, totalOutputTokens, totalCalls: calls.length, totalDurationMs: totalDuration, errors: errorCount, monthlyProjection },
-            byProvider, byModel,
+            byProvider, byModel, byTier,
             ...(Object.keys(byAgent).length > 0 ? { byAgent } : {}),
         }, null, 2));
         return;
@@ -195,6 +231,27 @@ function costReportCommand(opts) {
     }
     // Top costly calls
     const costlyCalls = calls.filter(c => c.estimatedCostUsd > 0).sort((a, b) => b.estimatedCostUsd - a.estimatedCostUsd).slice(0, 5);
+    // By tier
+    if (Object.keys(byTier).length > 1) {
+        console.log(chalk_1.default.gray('\n  ' + '─'.repeat(60)));
+        console.log(chalk_1.default.bold('  Model Tier Analysis'));
+        const tierOrder = ['frontier', 'standard', 'mini'];
+        const tierLabels = { frontier: '🔴 Frontier', standard: '🟡 Standard', mini: '🟢 Mini' };
+        for (const tier of tierOrder) {
+            const data = byTier[tier];
+            if (!data)
+                continue;
+            const pct = totalCost > 0 ? ((data.cost / totalCost) * 100).toFixed(0) : '0';
+            const callPct = calls.length > 0 ? ((data.calls / calls.length) * 100).toFixed(0) : '0';
+            const errRate = data.calls > 0 ? ((data.errors / data.calls) * 100).toFixed(0) : '0';
+            console.log(`  ${(tierLabels[tier] || tier).padEnd(16)} $${data.cost.toFixed(4).padEnd(10)} ${chalk_1.default.gray(pct + '% cost')}  ${data.calls} calls (${callPct}%)  avg ${data.avgLatency.toFixed(0)}ms  ${data.errors > 0 ? chalk_1.default.red(errRate + '% err') : chalk_1.default.green('0% err')}`);
+        }
+        // Tier optimization suggestion
+        const frontierPct = byTier.frontier ? (byTier.frontier.calls / calls.length) * 100 : 0;
+        if (frontierPct > 50) {
+            console.log(chalk_1.default.yellow(`  💡 ${frontierPct.toFixed(0)}% of calls use frontier models. Consider routing simple tasks to mini tier for ~75% savings.`));
+        }
+    }
     // By agent (if agent data exists)
     if (Object.keys(byAgent).length > 0) {
         console.log(chalk_1.default.gray('\n  ' + '─'.repeat(60)));
@@ -205,6 +262,52 @@ function costReportCommand(opts) {
             console.log(`  ${chalk_1.default.cyan(name.padEnd(30))} $${data.cost.toFixed(4).padEnd(10)} ${chalk_1.default.gray(pct + '%')}  ${data.calls} calls  ${formatTokens(data.tokens)} tokens`);
         }
     }
+    // Cache hit/miss analysis — detect from latency bimodality
+    if (calls.length >= 4) {
+        // Group by model, find bimodal latency distribution
+        const modelLatencies = {};
+        for (const c of calls) {
+            if (!c.durationMs || c.error)
+                continue;
+            const key = c.model || 'unknown';
+            if (!modelLatencies[key])
+                modelLatencies[key] = [];
+            modelLatencies[key].push(c.durationMs);
+        }
+        let cacheDetected = false;
+        const cacheAnalysis = [];
+        for (const [model, latencies] of Object.entries(modelLatencies)) {
+            if (latencies.length < 3)
+                continue;
+            latencies.sort((a, b) => a - b);
+            const median = latencies[Math.floor(latencies.length / 2)];
+            // Split into fast (< 30% of median) and slow (>= 30% of median)
+            const threshold = median * 0.3;
+            const fast = latencies.filter(l => l < threshold);
+            const slow = latencies.filter(l => l >= threshold);
+            if (fast.length >= 1 && slow.length >= 1 && fast.length / latencies.length >= 0.1) {
+                const fastAvg = fast.reduce((s, l) => s + l, 0) / fast.length;
+                const slowAvg = slow.reduce((s, l) => s + l, 0) / slow.length;
+                // Only report if there's a significant speed difference (5x+)
+                if (slowAvg / Math.max(1, fastAvg) >= 5) {
+                    cacheDetected = true;
+                    cacheAnalysis.push({
+                        model, fastCalls: fast.length, slowCalls: slow.length,
+                        fastAvg: Math.round(fastAvg), slowAvg: Math.round(slowAvg),
+                        hitRate: Math.round((fast.length / latencies.length) * 100),
+                    });
+                }
+            }
+        }
+        if (cacheDetected) {
+            console.log(chalk_1.default.gray('\n  ' + '─'.repeat(60)));
+            console.log(chalk_1.default.bold('  Cache Analysis') + chalk_1.default.gray(' (detected from latency bimodality)'));
+            for (const ca of cacheAnalysis) {
+                const speedup = (ca.slowAvg / Math.max(1, ca.fastAvg)).toFixed(0);
+                console.log(`  ${chalk_1.default.cyan(ca.model.padEnd(25))} hit rate: ${chalk_1.default.green(ca.hitRate + '%')} (${ca.fastCalls} fast, ${ca.slowCalls} slow)  ${speedup}x speedup  fast=${ca.fastAvg}ms slow=${ca.slowAvg}ms`);
+            }
+        }
+    }
     if (costlyCalls.length > 0) {
         console.log(chalk_1.default.gray('\n  ' + '─'.repeat(60)));
         console.log(chalk_1.default.bold('  Most Expensive Calls'));

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "trickle-cli",
-  "version": "0.1.195",
+  "version": "0.1.197",
   "description": "CLI for trickle runtime type observability",
   "bin": {
     "trickle": "dist/index.js"

package/src/commands/cost-report.ts CHANGED Viewed

@@ -122,10 +122,44 @@ export function costReportCommand(opts: { json?: boolean; budget?: string }): vo
     }
   }
+  // Model tier analysis — classify models into frontier/standard/mini tiers
+  // Ordered longest-first to avoid substring matches (gpt-4o-mini before gpt-4o)
+  const TIER_RULES: Array<[string, string]> = [
+    ['gpt-4o-mini', 'mini'], ['gpt-4-turbo', 'frontier'], ['gpt-4o', 'standard'], ['gpt-4', 'frontier'],
+    ['gpt-3.5-turbo', 'mini'], ['o1-mini', 'standard'], ['o1-pro', 'frontier'], ['o1', 'frontier'],
+    ['o3-mini', 'standard'], ['o3', 'frontier'], ['o4-mini', 'standard'],
+    ['claude-opus', 'frontier'], ['claude-sonnet', 'standard'], ['claude-haiku', 'mini'],
+    ['gemini-2.5-flash-lite', 'mini'], ['gemini-2.5-flash', 'standard'], ['gemini-2.5-pro', 'frontier'],
+    ['gemini-2.0-flash', 'mini'], ['gemini-1.5-pro', 'frontier'], ['gemini-1.5-flash', 'mini'],
+  ];
+  function classifyTier(model: string): string {
+    for (const [pattern, tier] of TIER_RULES) {
+      if (model.includes(pattern)) return tier;
+    }
+    if (model.includes('mini') || model.includes('lite') || model.includes('haiku') || model.includes('flash')) return 'mini';
+    if (model.includes('pro') || model.includes('opus') || model.includes('turbo')) return 'frontier';
+    return 'standard';
+  }
+  const byTier: Record<string, { calls: number; tokens: number; cost: number; avgLatency: number; errors: number }> = {};
+  for (const c of calls) {
+    const tier = classifyTier(c.model || '');
+    if (!byTier[tier]) byTier[tier] = { calls: 0, tokens: 0, cost: 0, avgLatency: 0, errors: 0 };
+    byTier[tier].calls++;
+    byTier[tier].tokens += c.totalTokens || 0;
+    byTier[tier].cost += c.estimatedCostUsd || 0;
+    byTier[tier].avgLatency += c.durationMs || 0;
+    if (c.error) byTier[tier].errors++;
+  }
+  for (const t of Object.values(byTier)) {
+    t.avgLatency = t.calls > 0 ? t.avgLatency / t.calls : 0;
+  }
   if (opts.json) {
     console.log(JSON.stringify({
       summary: { totalCost, totalTokens, totalInputTokens, totalOutputTokens, totalCalls: calls.length, totalDurationMs: totalDuration, errors: errorCount, monthlyProjection },
-      byProvider, byModel,
+      byProvider, byModel, byTier,
       ...(Object.keys(byAgent).length > 0 ? { byAgent } : {}),
     }, null, 2));
     return;
@@ -175,6 +209,27 @@ export function costReportCommand(opts: { json?: boolean; budget?: string }): vo
   // Top costly calls
   const costlyCalls = calls.filter(c => c.estimatedCostUsd > 0).sort((a, b) => b.estimatedCostUsd - a.estimatedCostUsd).slice(0, 5);
+  // By tier
+  if (Object.keys(byTier).length > 1) {
+    console.log(chalk.gray('\n  ' + '─'.repeat(60)));
+    console.log(chalk.bold('  Model Tier Analysis'));
+    const tierOrder = ['frontier', 'standard', 'mini'];
+    const tierLabels: Record<string, string> = { frontier: '🔴 Frontier', standard: '🟡 Standard', mini: '🟢 Mini' };
+    for (const tier of tierOrder) {
+      const data = byTier[tier];
+      if (!data) continue;
+      const pct = totalCost > 0 ? ((data.cost / totalCost) * 100).toFixed(0) : '0';
+      const callPct = calls.length > 0 ? ((data.calls / calls.length) * 100).toFixed(0) : '0';
+      const errRate = data.calls > 0 ? ((data.errors / data.calls) * 100).toFixed(0) : '0';
+      console.log(`  ${(tierLabels[tier] || tier).padEnd(16)} $${data.cost.toFixed(4).padEnd(10)} ${chalk.gray(pct + '% cost')}  ${data.calls} calls (${callPct}%)  avg ${data.avgLatency.toFixed(0)}ms  ${data.errors > 0 ? chalk.red(errRate + '% err') : chalk.green('0% err')}`);
+    }
+    // Tier optimization suggestion
+    const frontierPct = byTier.frontier ? (byTier.frontier.calls / calls.length) * 100 : 0;
+    if (frontierPct > 50) {
+      console.log(chalk.yellow(`  💡 ${frontierPct.toFixed(0)}% of calls use frontier models. Consider routing simple tasks to mini tier for ~75% savings.`));
+    }
+  }
   // By agent (if agent data exists)
   if (Object.keys(byAgent).length > 0) {
     console.log(chalk.gray('\n  ' + '─'.repeat(60)));
@@ -186,6 +241,54 @@ export function costReportCommand(opts: { json?: boolean; budget?: string }): vo
     }
   }
+  // Cache hit/miss analysis — detect from latency bimodality
+  if (calls.length >= 4) {
+    // Group by model, find bimodal latency distribution
+    const modelLatencies: Record<string, number[]> = {};
+    for (const c of calls) {
+      if (!c.durationMs || c.error) continue;
+      const key = c.model || 'unknown';
+      if (!modelLatencies[key]) modelLatencies[key] = [];
+      modelLatencies[key].push(c.durationMs);
+    }
+    let cacheDetected = false;
+    const cacheAnalysis: Array<{ model: string; fastCalls: number; slowCalls: number; fastAvg: number; slowAvg: number; hitRate: number }> = [];
+    for (const [model, latencies] of Object.entries(modelLatencies)) {
+      if (latencies.length < 3) continue;
+      latencies.sort((a, b) => a - b);
+      const median = latencies[Math.floor(latencies.length / 2)];
+      // Split into fast (< 30% of median) and slow (>= 30% of median)
+      const threshold = median * 0.3;
+      const fast = latencies.filter(l => l < threshold);
+      const slow = latencies.filter(l => l >= threshold);
+      if (fast.length >= 1 && slow.length >= 1 && fast.length / latencies.length >= 0.1) {
+        const fastAvg = fast.reduce((s, l) => s + l, 0) / fast.length;
+        const slowAvg = slow.reduce((s, l) => s + l, 0) / slow.length;
+        // Only report if there's a significant speed difference (5x+)
+        if (slowAvg / Math.max(1, fastAvg) >= 5) {
+          cacheDetected = true;
+          cacheAnalysis.push({
+            model, fastCalls: fast.length, slowCalls: slow.length,
+            fastAvg: Math.round(fastAvg), slowAvg: Math.round(slowAvg),
+            hitRate: Math.round((fast.length / latencies.length) * 100),
+          });
+        }
+      }
+    }
+    if (cacheDetected) {
+      console.log(chalk.gray('\n  ' + '─'.repeat(60)));
+      console.log(chalk.bold('  Cache Analysis') + chalk.gray(' (detected from latency bimodality)'));
+      for (const ca of cacheAnalysis) {
+        const speedup = (ca.slowAvg / Math.max(1, ca.fastAvg)).toFixed(0);
+        console.log(`  ${chalk.cyan(ca.model.padEnd(25))} hit rate: ${chalk.green(ca.hitRate + '%')} (${ca.fastCalls} fast, ${ca.slowCalls} slow)  ${speedup}x speedup  fast=${ca.fastAvg}ms slow=${ca.slowAvg}ms`);
+      }
+    }
+  }
   if (costlyCalls.length > 0) {
     console.log(chalk.gray('\n  ' + '─'.repeat(60)));
     console.log(chalk.bold('  Most Expensive Calls'));