trickle-cli 0.1.196 → 0.1.197

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -262,6 +262,52 @@ function costReportCommand(opts) {
262
262
  console.log(` ${chalk_1.default.cyan(name.padEnd(30))} $${data.cost.toFixed(4).padEnd(10)} ${chalk_1.default.gray(pct + '%')} ${data.calls} calls ${formatTokens(data.tokens)} tokens`);
263
263
  }
264
264
  }
265
+ // Cache hit/miss analysis — detect from latency bimodality
266
+ if (calls.length >= 4) {
267
+ // Group by model, find bimodal latency distribution
268
+ const modelLatencies = {};
269
+ for (const c of calls) {
270
+ if (!c.durationMs || c.error)
271
+ continue;
272
+ const key = c.model || 'unknown';
273
+ if (!modelLatencies[key])
274
+ modelLatencies[key] = [];
275
+ modelLatencies[key].push(c.durationMs);
276
+ }
277
+ let cacheDetected = false;
278
+ const cacheAnalysis = [];
279
+ for (const [model, latencies] of Object.entries(modelLatencies)) {
280
+ if (latencies.length < 3)
281
+ continue;
282
+ latencies.sort((a, b) => a - b);
283
+ const median = latencies[Math.floor(latencies.length / 2)];
284
+ // Split into fast (< 30% of median) and slow (>= 30% of median)
285
+ const threshold = median * 0.3;
286
+ const fast = latencies.filter(l => l < threshold);
287
+ const slow = latencies.filter(l => l >= threshold);
288
+ if (fast.length >= 1 && slow.length >= 1 && fast.length / latencies.length >= 0.1) {
289
+ const fastAvg = fast.reduce((s, l) => s + l, 0) / fast.length;
290
+ const slowAvg = slow.reduce((s, l) => s + l, 0) / slow.length;
291
+ // Only report if there's a significant speed difference (5x+)
292
+ if (slowAvg / Math.max(1, fastAvg) >= 5) {
293
+ cacheDetected = true;
294
+ cacheAnalysis.push({
295
+ model, fastCalls: fast.length, slowCalls: slow.length,
296
+ fastAvg: Math.round(fastAvg), slowAvg: Math.round(slowAvg),
297
+ hitRate: Math.round((fast.length / latencies.length) * 100),
298
+ });
299
+ }
300
+ }
301
+ }
302
+ if (cacheDetected) {
303
+ console.log(chalk_1.default.gray('\n ' + '─'.repeat(60)));
304
+ console.log(chalk_1.default.bold(' Cache Analysis') + chalk_1.default.gray(' (detected from latency bimodality)'));
305
+ for (const ca of cacheAnalysis) {
306
+ const speedup = (ca.slowAvg / Math.max(1, ca.fastAvg)).toFixed(0);
307
+ console.log(` ${chalk_1.default.cyan(ca.model.padEnd(25))} hit rate: ${chalk_1.default.green(ca.hitRate + '%')} (${ca.fastCalls} fast, ${ca.slowCalls} slow) ${speedup}x speedup fast=${ca.fastAvg}ms slow=${ca.slowAvg}ms`);
308
+ }
309
+ }
310
+ }
265
311
  if (costlyCalls.length > 0) {
266
312
  console.log(chalk_1.default.gray('\n ' + '─'.repeat(60)));
267
313
  console.log(chalk_1.default.bold(' Most Expensive Calls'));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "trickle-cli",
3
- "version": "0.1.196",
3
+ "version": "0.1.197",
4
4
  "description": "CLI for trickle runtime type observability",
5
5
  "bin": {
6
6
  "trickle": "dist/index.js"
@@ -241,6 +241,54 @@ export function costReportCommand(opts: { json?: boolean; budget?: string }): vo
241
241
  }
242
242
  }
243
243
 
244
+ // Cache hit/miss analysis — detect from latency bimodality
245
+ if (calls.length >= 4) {
246
+ // Group by model, find bimodal latency distribution
247
+ const modelLatencies: Record<string, number[]> = {};
248
+ for (const c of calls) {
249
+ if (!c.durationMs || c.error) continue;
250
+ const key = c.model || 'unknown';
251
+ if (!modelLatencies[key]) modelLatencies[key] = [];
252
+ modelLatencies[key].push(c.durationMs);
253
+ }
254
+
255
+ let cacheDetected = false;
256
+ const cacheAnalysis: Array<{ model: string; fastCalls: number; slowCalls: number; fastAvg: number; slowAvg: number; hitRate: number }> = [];
257
+
258
+ for (const [model, latencies] of Object.entries(modelLatencies)) {
259
+ if (latencies.length < 3) continue;
260
+ latencies.sort((a, b) => a - b);
261
+ const median = latencies[Math.floor(latencies.length / 2)];
262
+ // Split into fast (< 30% of median) and slow (>= 30% of median)
263
+ const threshold = median * 0.3;
264
+ const fast = latencies.filter(l => l < threshold);
265
+ const slow = latencies.filter(l => l >= threshold);
266
+
267
+ if (fast.length >= 1 && slow.length >= 1 && fast.length / latencies.length >= 0.1) {
268
+ const fastAvg = fast.reduce((s, l) => s + l, 0) / fast.length;
269
+ const slowAvg = slow.reduce((s, l) => s + l, 0) / slow.length;
270
+ // Only report if there's a significant speed difference (5x+)
271
+ if (slowAvg / Math.max(1, fastAvg) >= 5) {
272
+ cacheDetected = true;
273
+ cacheAnalysis.push({
274
+ model, fastCalls: fast.length, slowCalls: slow.length,
275
+ fastAvg: Math.round(fastAvg), slowAvg: Math.round(slowAvg),
276
+ hitRate: Math.round((fast.length / latencies.length) * 100),
277
+ });
278
+ }
279
+ }
280
+ }
281
+
282
+ if (cacheDetected) {
283
+ console.log(chalk.gray('\n ' + '─'.repeat(60)));
284
+ console.log(chalk.bold(' Cache Analysis') + chalk.gray(' (detected from latency bimodality)'));
285
+ for (const ca of cacheAnalysis) {
286
+ const speedup = (ca.slowAvg / Math.max(1, ca.fastAvg)).toFixed(0);
287
+ console.log(` ${chalk.cyan(ca.model.padEnd(25))} hit rate: ${chalk.green(ca.hitRate + '%')} (${ca.fastCalls} fast, ${ca.slowCalls} slow) ${speedup}x speedup fast=${ca.fastAvg}ms slow=${ca.slowAvg}ms`);
288
+ }
289
+ }
290
+ }
291
+
244
292
  if (costlyCalls.length > 0) {
245
293
  console.log(chalk.gray('\n ' + '─'.repeat(60)));
246
294
  console.log(chalk.bold(' Most Expensive Calls'));