trickle-cli 0.1.196 → 0.1.197
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -262,6 +262,52 @@ function costReportCommand(opts) {
|
|
|
262
262
|
console.log(` ${chalk_1.default.cyan(name.padEnd(30))} $${data.cost.toFixed(4).padEnd(10)} ${chalk_1.default.gray(pct + '%')} ${data.calls} calls ${formatTokens(data.tokens)} tokens`);
|
|
263
263
|
}
|
|
264
264
|
}
|
|
265
|
+
// Cache hit/miss analysis — detect from latency bimodality
|
|
266
|
+
if (calls.length >= 4) {
|
|
267
|
+
// Group by model, find bimodal latency distribution
|
|
268
|
+
const modelLatencies = {};
|
|
269
|
+
for (const c of calls) {
|
|
270
|
+
if (!c.durationMs || c.error)
|
|
271
|
+
continue;
|
|
272
|
+
const key = c.model || 'unknown';
|
|
273
|
+
if (!modelLatencies[key])
|
|
274
|
+
modelLatencies[key] = [];
|
|
275
|
+
modelLatencies[key].push(c.durationMs);
|
|
276
|
+
}
|
|
277
|
+
let cacheDetected = false;
|
|
278
|
+
const cacheAnalysis = [];
|
|
279
|
+
for (const [model, latencies] of Object.entries(modelLatencies)) {
|
|
280
|
+
if (latencies.length < 3)
|
|
281
|
+
continue;
|
|
282
|
+
latencies.sort((a, b) => a - b);
|
|
283
|
+
const median = latencies[Math.floor(latencies.length / 2)];
|
|
284
|
+
// Split into fast (< 30% of median) and slow (>= 30% of median)
|
|
285
|
+
const threshold = median * 0.3;
|
|
286
|
+
const fast = latencies.filter(l => l < threshold);
|
|
287
|
+
const slow = latencies.filter(l => l >= threshold);
|
|
288
|
+
if (fast.length >= 1 && slow.length >= 1 && fast.length / latencies.length >= 0.1) {
|
|
289
|
+
const fastAvg = fast.reduce((s, l) => s + l, 0) / fast.length;
|
|
290
|
+
const slowAvg = slow.reduce((s, l) => s + l, 0) / slow.length;
|
|
291
|
+
// Only report if there's a significant speed difference (5x+)
|
|
292
|
+
if (slowAvg / Math.max(1, fastAvg) >= 5) {
|
|
293
|
+
cacheDetected = true;
|
|
294
|
+
cacheAnalysis.push({
|
|
295
|
+
model, fastCalls: fast.length, slowCalls: slow.length,
|
|
296
|
+
fastAvg: Math.round(fastAvg), slowAvg: Math.round(slowAvg),
|
|
297
|
+
hitRate: Math.round((fast.length / latencies.length) * 100),
|
|
298
|
+
});
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
if (cacheDetected) {
|
|
303
|
+
console.log(chalk_1.default.gray('\n ' + '─'.repeat(60)));
|
|
304
|
+
console.log(chalk_1.default.bold(' Cache Analysis') + chalk_1.default.gray(' (detected from latency bimodality)'));
|
|
305
|
+
for (const ca of cacheAnalysis) {
|
|
306
|
+
const speedup = (ca.slowAvg / Math.max(1, ca.fastAvg)).toFixed(0);
|
|
307
|
+
console.log(` ${chalk_1.default.cyan(ca.model.padEnd(25))} hit rate: ${chalk_1.default.green(ca.hitRate + '%')} (${ca.fastCalls} fast, ${ca.slowCalls} slow) ${speedup}x speedup fast=${ca.fastAvg}ms slow=${ca.slowAvg}ms`);
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
}
|
|
265
311
|
if (costlyCalls.length > 0) {
|
|
266
312
|
console.log(chalk_1.default.gray('\n ' + '─'.repeat(60)));
|
|
267
313
|
console.log(chalk_1.default.bold(' Most Expensive Calls'));
|
package/package.json
CHANGED
|
@@ -241,6 +241,54 @@ export function costReportCommand(opts: { json?: boolean; budget?: string }): vo
|
|
|
241
241
|
}
|
|
242
242
|
}
|
|
243
243
|
|
|
244
|
+
// Cache hit/miss analysis — detect from latency bimodality
|
|
245
|
+
if (calls.length >= 4) {
|
|
246
|
+
// Group by model, find bimodal latency distribution
|
|
247
|
+
const modelLatencies: Record<string, number[]> = {};
|
|
248
|
+
for (const c of calls) {
|
|
249
|
+
if (!c.durationMs || c.error) continue;
|
|
250
|
+
const key = c.model || 'unknown';
|
|
251
|
+
if (!modelLatencies[key]) modelLatencies[key] = [];
|
|
252
|
+
modelLatencies[key].push(c.durationMs);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
let cacheDetected = false;
|
|
256
|
+
const cacheAnalysis: Array<{ model: string; fastCalls: number; slowCalls: number; fastAvg: number; slowAvg: number; hitRate: number }> = [];
|
|
257
|
+
|
|
258
|
+
for (const [model, latencies] of Object.entries(modelLatencies)) {
|
|
259
|
+
if (latencies.length < 3) continue;
|
|
260
|
+
latencies.sort((a, b) => a - b);
|
|
261
|
+
const median = latencies[Math.floor(latencies.length / 2)];
|
|
262
|
+
// Split into fast (< 30% of median) and slow (>= 30% of median)
|
|
263
|
+
const threshold = median * 0.3;
|
|
264
|
+
const fast = latencies.filter(l => l < threshold);
|
|
265
|
+
const slow = latencies.filter(l => l >= threshold);
|
|
266
|
+
|
|
267
|
+
if (fast.length >= 1 && slow.length >= 1 && fast.length / latencies.length >= 0.1) {
|
|
268
|
+
const fastAvg = fast.reduce((s, l) => s + l, 0) / fast.length;
|
|
269
|
+
const slowAvg = slow.reduce((s, l) => s + l, 0) / slow.length;
|
|
270
|
+
// Only report if there's a significant speed difference (5x+)
|
|
271
|
+
if (slowAvg / Math.max(1, fastAvg) >= 5) {
|
|
272
|
+
cacheDetected = true;
|
|
273
|
+
cacheAnalysis.push({
|
|
274
|
+
model, fastCalls: fast.length, slowCalls: slow.length,
|
|
275
|
+
fastAvg: Math.round(fastAvg), slowAvg: Math.round(slowAvg),
|
|
276
|
+
hitRate: Math.round((fast.length / latencies.length) * 100),
|
|
277
|
+
});
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
if (cacheDetected) {
|
|
283
|
+
console.log(chalk.gray('\n ' + '─'.repeat(60)));
|
|
284
|
+
console.log(chalk.bold(' Cache Analysis') + chalk.gray(' (detected from latency bimodality)'));
|
|
285
|
+
for (const ca of cacheAnalysis) {
|
|
286
|
+
const speedup = (ca.slowAvg / Math.max(1, ca.fastAvg)).toFixed(0);
|
|
287
|
+
console.log(` ${chalk.cyan(ca.model.padEnd(25))} hit rate: ${chalk.green(ca.hitRate + '%')} (${ca.fastCalls} fast, ${ca.slowCalls} slow) ${speedup}x speedup fast=${ca.fastAvg}ms slow=${ca.slowAvg}ms`);
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
|
|
244
292
|
if (costlyCalls.length > 0) {
|
|
245
293
|
console.log(chalk.gray('\n ' + '─'.repeat(60)));
|
|
246
294
|
console.log(chalk.bold(' Most Expensive Calls'));
|