trickle-cli 0.1.195 → 0.1.197

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -147,10 +147,46 @@ function costReportCommand(opts) {
147
147
  }
148
148
  }
149
149
  }
150
+ // Model tier analysis — classify models into frontier/standard/mini tiers
151
+ // Ordered longest-first to avoid substring matches (gpt-4o-mini before gpt-4o)
152
+ const TIER_RULES = [
153
+ ['gpt-4o-mini', 'mini'], ['gpt-4-turbo', 'frontier'], ['gpt-4o', 'standard'], ['gpt-4', 'frontier'],
154
+ ['gpt-3.5-turbo', 'mini'], ['o1-mini', 'standard'], ['o1-pro', 'frontier'], ['o1', 'frontier'],
155
+ ['o3-mini', 'standard'], ['o3', 'frontier'], ['o4-mini', 'standard'],
156
+ ['claude-opus', 'frontier'], ['claude-sonnet', 'standard'], ['claude-haiku', 'mini'],
157
+ ['gemini-2.5-flash-lite', 'mini'], ['gemini-2.5-flash', 'standard'], ['gemini-2.5-pro', 'frontier'],
158
+ ['gemini-2.0-flash', 'mini'], ['gemini-1.5-pro', 'frontier'], ['gemini-1.5-flash', 'mini'],
159
+ ];
160
+ function classifyTier(model) {
161
+ for (const [pattern, tier] of TIER_RULES) {
162
+ if (model.includes(pattern))
163
+ return tier;
164
+ }
165
+ if (model.includes('mini') || model.includes('lite') || model.includes('haiku') || model.includes('flash'))
166
+ return 'mini';
167
+ if (model.includes('pro') || model.includes('opus') || model.includes('turbo'))
168
+ return 'frontier';
169
+ return 'standard';
170
+ }
171
+ const byTier = {};
172
+ for (const c of calls) {
173
+ const tier = classifyTier(c.model || '');
174
+ if (!byTier[tier])
175
+ byTier[tier] = { calls: 0, tokens: 0, cost: 0, avgLatency: 0, errors: 0 };
176
+ byTier[tier].calls++;
177
+ byTier[tier].tokens += c.totalTokens || 0;
178
+ byTier[tier].cost += c.estimatedCostUsd || 0;
179
+ byTier[tier].avgLatency += c.durationMs || 0;
180
+ if (c.error)
181
+ byTier[tier].errors++;
182
+ }
183
+ for (const t of Object.values(byTier)) {
184
+ t.avgLatency = t.calls > 0 ? t.avgLatency / t.calls : 0;
185
+ }
150
186
  if (opts.json) {
151
187
  console.log(JSON.stringify({
152
188
  summary: { totalCost, totalTokens, totalInputTokens, totalOutputTokens, totalCalls: calls.length, totalDurationMs: totalDuration, errors: errorCount, monthlyProjection },
153
- byProvider, byModel,
189
+ byProvider, byModel, byTier,
154
190
  ...(Object.keys(byAgent).length > 0 ? { byAgent } : {}),
155
191
  }, null, 2));
156
192
  return;
@@ -195,6 +231,27 @@ function costReportCommand(opts) {
195
231
  }
196
232
  // Top costly calls
197
233
  const costlyCalls = calls.filter(c => c.estimatedCostUsd > 0).sort((a, b) => b.estimatedCostUsd - a.estimatedCostUsd).slice(0, 5);
234
+ // By tier
235
+ if (Object.keys(byTier).length > 1) {
236
+ console.log(chalk_1.default.gray('\n ' + '─'.repeat(60)));
237
+ console.log(chalk_1.default.bold(' Model Tier Analysis'));
238
+ const tierOrder = ['frontier', 'standard', 'mini'];
239
+ const tierLabels = { frontier: '🔴 Frontier', standard: '🟡 Standard', mini: '🟢 Mini' };
240
+ for (const tier of tierOrder) {
241
+ const data = byTier[tier];
242
+ if (!data)
243
+ continue;
244
+ const pct = totalCost > 0 ? ((data.cost / totalCost) * 100).toFixed(0) : '0';
245
+ const callPct = calls.length > 0 ? ((data.calls / calls.length) * 100).toFixed(0) : '0';
246
+ const errRate = data.calls > 0 ? ((data.errors / data.calls) * 100).toFixed(0) : '0';
247
+ console.log(` ${(tierLabels[tier] || tier).padEnd(16)} $${data.cost.toFixed(4).padEnd(10)} ${chalk_1.default.gray(pct + '% cost')} ${data.calls} calls (${callPct}%) avg ${data.avgLatency.toFixed(0)}ms ${data.errors > 0 ? chalk_1.default.red(errRate + '% err') : chalk_1.default.green('0% err')}`);
248
+ }
249
+ // Tier optimization suggestion
250
+ const frontierPct = byTier.frontier ? (byTier.frontier.calls / calls.length) * 100 : 0;
251
+ if (frontierPct > 50) {
252
+ console.log(chalk_1.default.yellow(` 💡 ${frontierPct.toFixed(0)}% of calls use frontier models. Consider routing simple tasks to mini tier for ~75% savings.`));
253
+ }
254
+ }
198
255
  // By agent (if agent data exists)
199
256
  if (Object.keys(byAgent).length > 0) {
200
257
  console.log(chalk_1.default.gray('\n ' + '─'.repeat(60)));
@@ -205,6 +262,52 @@ function costReportCommand(opts) {
205
262
  console.log(` ${chalk_1.default.cyan(name.padEnd(30))} $${data.cost.toFixed(4).padEnd(10)} ${chalk_1.default.gray(pct + '%')} ${data.calls} calls ${formatTokens(data.tokens)} tokens`);
206
263
  }
207
264
  }
265
+ // Cache hit/miss analysis — detect from latency bimodality
266
+ if (calls.length >= 4) {
267
+ // Group by model, find bimodal latency distribution
268
+ const modelLatencies = {};
269
+ for (const c of calls) {
270
+ if (!c.durationMs || c.error)
271
+ continue;
272
+ const key = c.model || 'unknown';
273
+ if (!modelLatencies[key])
274
+ modelLatencies[key] = [];
275
+ modelLatencies[key].push(c.durationMs);
276
+ }
277
+ let cacheDetected = false;
278
+ const cacheAnalysis = [];
279
+ for (const [model, latencies] of Object.entries(modelLatencies)) {
280
+ if (latencies.length < 3)
281
+ continue;
282
+ latencies.sort((a, b) => a - b);
283
+ const median = latencies[Math.floor(latencies.length / 2)];
284
+ // Split into fast (< 30% of median) and slow (>= 30% of median)
285
+ const threshold = median * 0.3;
286
+ const fast = latencies.filter(l => l < threshold);
287
+ const slow = latencies.filter(l => l >= threshold);
288
+ if (fast.length >= 1 && slow.length >= 1 && fast.length / latencies.length >= 0.1) {
289
+ const fastAvg = fast.reduce((s, l) => s + l, 0) / fast.length;
290
+ const slowAvg = slow.reduce((s, l) => s + l, 0) / slow.length;
291
+ // Only report if there's a significant speed difference (5x+)
292
+ if (slowAvg / Math.max(1, fastAvg) >= 5) {
293
+ cacheDetected = true;
294
+ cacheAnalysis.push({
295
+ model, fastCalls: fast.length, slowCalls: slow.length,
296
+ fastAvg: Math.round(fastAvg), slowAvg: Math.round(slowAvg),
297
+ hitRate: Math.round((fast.length / latencies.length) * 100),
298
+ });
299
+ }
300
+ }
301
+ }
302
+ if (cacheDetected) {
303
+ console.log(chalk_1.default.gray('\n ' + '─'.repeat(60)));
304
+ console.log(chalk_1.default.bold(' Cache Analysis') + chalk_1.default.gray(' (detected from latency bimodality)'));
305
+ for (const ca of cacheAnalysis) {
306
+ const speedup = (ca.slowAvg / Math.max(1, ca.fastAvg)).toFixed(0);
307
+ console.log(` ${chalk_1.default.cyan(ca.model.padEnd(25))} hit rate: ${chalk_1.default.green(ca.hitRate + '%')} (${ca.fastCalls} fast, ${ca.slowCalls} slow) ${speedup}x speedup fast=${ca.fastAvg}ms slow=${ca.slowAvg}ms`);
308
+ }
309
+ }
310
+ }
208
311
  if (costlyCalls.length > 0) {
209
312
  console.log(chalk_1.default.gray('\n ' + '─'.repeat(60)));
210
313
  console.log(chalk_1.default.bold(' Most Expensive Calls'));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "trickle-cli",
3
- "version": "0.1.195",
3
+ "version": "0.1.197",
4
4
  "description": "CLI for trickle runtime type observability",
5
5
  "bin": {
6
6
  "trickle": "dist/index.js"
@@ -122,10 +122,44 @@ export function costReportCommand(opts: { json?: boolean; budget?: string }): vo
122
122
  }
123
123
  }
124
124
 
125
+ // Model tier analysis — classify models into frontier/standard/mini tiers
126
+ // Ordered longest-first to avoid substring matches (gpt-4o-mini before gpt-4o)
127
+ const TIER_RULES: Array<[string, string]> = [
128
+ ['gpt-4o-mini', 'mini'], ['gpt-4-turbo', 'frontier'], ['gpt-4o', 'standard'], ['gpt-4', 'frontier'],
129
+ ['gpt-3.5-turbo', 'mini'], ['o1-mini', 'standard'], ['o1-pro', 'frontier'], ['o1', 'frontier'],
130
+ ['o3-mini', 'standard'], ['o3', 'frontier'], ['o4-mini', 'standard'],
131
+ ['claude-opus', 'frontier'], ['claude-sonnet', 'standard'], ['claude-haiku', 'mini'],
132
+ ['gemini-2.5-flash-lite', 'mini'], ['gemini-2.5-flash', 'standard'], ['gemini-2.5-pro', 'frontier'],
133
+ ['gemini-2.0-flash', 'mini'], ['gemini-1.5-pro', 'frontier'], ['gemini-1.5-flash', 'mini'],
134
+ ];
135
+
136
+ function classifyTier(model: string): string {
137
+ for (const [pattern, tier] of TIER_RULES) {
138
+ if (model.includes(pattern)) return tier;
139
+ }
140
+ if (model.includes('mini') || model.includes('lite') || model.includes('haiku') || model.includes('flash')) return 'mini';
141
+ if (model.includes('pro') || model.includes('opus') || model.includes('turbo')) return 'frontier';
142
+ return 'standard';
143
+ }
144
+
145
+ const byTier: Record<string, { calls: number; tokens: number; cost: number; avgLatency: number; errors: number }> = {};
146
+ for (const c of calls) {
147
+ const tier = classifyTier(c.model || '');
148
+ if (!byTier[tier]) byTier[tier] = { calls: 0, tokens: 0, cost: 0, avgLatency: 0, errors: 0 };
149
+ byTier[tier].calls++;
150
+ byTier[tier].tokens += c.totalTokens || 0;
151
+ byTier[tier].cost += c.estimatedCostUsd || 0;
152
+ byTier[tier].avgLatency += c.durationMs || 0;
153
+ if (c.error) byTier[tier].errors++;
154
+ }
155
+ for (const t of Object.values(byTier)) {
156
+ t.avgLatency = t.calls > 0 ? t.avgLatency / t.calls : 0;
157
+ }
158
+
125
159
  if (opts.json) {
126
160
  console.log(JSON.stringify({
127
161
  summary: { totalCost, totalTokens, totalInputTokens, totalOutputTokens, totalCalls: calls.length, totalDurationMs: totalDuration, errors: errorCount, monthlyProjection },
128
- byProvider, byModel,
162
+ byProvider, byModel, byTier,
129
163
  ...(Object.keys(byAgent).length > 0 ? { byAgent } : {}),
130
164
  }, null, 2));
131
165
  return;
@@ -175,6 +209,27 @@ export function costReportCommand(opts: { json?: boolean; budget?: string }): vo
175
209
 
176
210
  // Top costly calls
177
211
  const costlyCalls = calls.filter(c => c.estimatedCostUsd > 0).sort((a, b) => b.estimatedCostUsd - a.estimatedCostUsd).slice(0, 5);
212
+ // By tier
213
+ if (Object.keys(byTier).length > 1) {
214
+ console.log(chalk.gray('\n ' + '─'.repeat(60)));
215
+ console.log(chalk.bold(' Model Tier Analysis'));
216
+ const tierOrder = ['frontier', 'standard', 'mini'];
217
+ const tierLabels: Record<string, string> = { frontier: '🔴 Frontier', standard: '🟡 Standard', mini: '🟢 Mini' };
218
+ for (const tier of tierOrder) {
219
+ const data = byTier[tier];
220
+ if (!data) continue;
221
+ const pct = totalCost > 0 ? ((data.cost / totalCost) * 100).toFixed(0) : '0';
222
+ const callPct = calls.length > 0 ? ((data.calls / calls.length) * 100).toFixed(0) : '0';
223
+ const errRate = data.calls > 0 ? ((data.errors / data.calls) * 100).toFixed(0) : '0';
224
+ console.log(` ${(tierLabels[tier] || tier).padEnd(16)} $${data.cost.toFixed(4).padEnd(10)} ${chalk.gray(pct + '% cost')} ${data.calls} calls (${callPct}%) avg ${data.avgLatency.toFixed(0)}ms ${data.errors > 0 ? chalk.red(errRate + '% err') : chalk.green('0% err')}`);
225
+ }
226
+ // Tier optimization suggestion
227
+ const frontierPct = byTier.frontier ? (byTier.frontier.calls / calls.length) * 100 : 0;
228
+ if (frontierPct > 50) {
229
+ console.log(chalk.yellow(` 💡 ${frontierPct.toFixed(0)}% of calls use frontier models. Consider routing simple tasks to mini tier for ~75% savings.`));
230
+ }
231
+ }
232
+
178
233
  // By agent (if agent data exists)
179
234
  if (Object.keys(byAgent).length > 0) {
180
235
  console.log(chalk.gray('\n ' + '─'.repeat(60)));
@@ -186,6 +241,54 @@ export function costReportCommand(opts: { json?: boolean; budget?: string }): vo
186
241
  }
187
242
  }
188
243
 
244
+ // Cache hit/miss analysis — detect from latency bimodality
245
+ if (calls.length >= 4) {
246
+ // Group by model, find bimodal latency distribution
247
+ const modelLatencies: Record<string, number[]> = {};
248
+ for (const c of calls) {
249
+ if (!c.durationMs || c.error) continue;
250
+ const key = c.model || 'unknown';
251
+ if (!modelLatencies[key]) modelLatencies[key] = [];
252
+ modelLatencies[key].push(c.durationMs);
253
+ }
254
+
255
+ let cacheDetected = false;
256
+ const cacheAnalysis: Array<{ model: string; fastCalls: number; slowCalls: number; fastAvg: number; slowAvg: number; hitRate: number }> = [];
257
+
258
+ for (const [model, latencies] of Object.entries(modelLatencies)) {
259
+ if (latencies.length < 3) continue;
260
+ latencies.sort((a, b) => a - b);
261
+ const median = latencies[Math.floor(latencies.length / 2)];
262
+ // Split into fast (< 30% of median) and slow (>= 30% of median)
263
+ const threshold = median * 0.3;
264
+ const fast = latencies.filter(l => l < threshold);
265
+ const slow = latencies.filter(l => l >= threshold);
266
+
267
+ if (fast.length >= 1 && slow.length >= 1 && fast.length / latencies.length >= 0.1) {
268
+ const fastAvg = fast.reduce((s, l) => s + l, 0) / fast.length;
269
+ const slowAvg = slow.reduce((s, l) => s + l, 0) / slow.length;
270
+ // Only report if there's a significant speed difference (5x+)
271
+ if (slowAvg / Math.max(1, fastAvg) >= 5) {
272
+ cacheDetected = true;
273
+ cacheAnalysis.push({
274
+ model, fastCalls: fast.length, slowCalls: slow.length,
275
+ fastAvg: Math.round(fastAvg), slowAvg: Math.round(slowAvg),
276
+ hitRate: Math.round((fast.length / latencies.length) * 100),
277
+ });
278
+ }
279
+ }
280
+ }
281
+
282
+ if (cacheDetected) {
283
+ console.log(chalk.gray('\n ' + '─'.repeat(60)));
284
+ console.log(chalk.bold(' Cache Analysis') + chalk.gray(' (detected from latency bimodality)'));
285
+ for (const ca of cacheAnalysis) {
286
+ const speedup = (ca.slowAvg / Math.max(1, ca.fastAvg)).toFixed(0);
287
+ console.log(` ${chalk.cyan(ca.model.padEnd(25))} hit rate: ${chalk.green(ca.hitRate + '%')} (${ca.fastCalls} fast, ${ca.slowCalls} slow) ${speedup}x speedup fast=${ca.fastAvg}ms slow=${ca.slowAvg}ms`);
288
+ }
289
+ }
290
+ }
291
+
189
292
  if (costlyCalls.length > 0) {
190
293
  console.log(chalk.gray('\n ' + '─'.repeat(60)));
191
294
  console.log(chalk.bold(' Most Expensive Calls'));