trickle-cli 0.1.195 → 0.1.196
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -147,10 +147,46 @@ function costReportCommand(opts) {
|
|
|
147
147
|
}
|
|
148
148
|
}
|
|
149
149
|
}
|
|
150
|
+
// Model tier analysis — classify models into frontier/standard/mini tiers
|
|
151
|
+
// Ordered longest-first to avoid substring matches (gpt-4o-mini before gpt-4o)
|
|
152
|
+
const TIER_RULES = [
|
|
153
|
+
['gpt-4o-mini', 'mini'], ['gpt-4-turbo', 'frontier'], ['gpt-4o', 'standard'], ['gpt-4', 'frontier'],
|
|
154
|
+
['gpt-3.5-turbo', 'mini'], ['o1-mini', 'standard'], ['o1-pro', 'frontier'], ['o1', 'frontier'],
|
|
155
|
+
['o3-mini', 'standard'], ['o3', 'frontier'], ['o4-mini', 'standard'],
|
|
156
|
+
['claude-opus', 'frontier'], ['claude-sonnet', 'standard'], ['claude-haiku', 'mini'],
|
|
157
|
+
['gemini-2.5-flash-lite', 'mini'], ['gemini-2.5-flash', 'standard'], ['gemini-2.5-pro', 'frontier'],
|
|
158
|
+
['gemini-2.0-flash', 'mini'], ['gemini-1.5-pro', 'frontier'], ['gemini-1.5-flash', 'mini'],
|
|
159
|
+
];
|
|
160
|
+
function classifyTier(model) {
|
|
161
|
+
for (const [pattern, tier] of TIER_RULES) {
|
|
162
|
+
if (model.includes(pattern))
|
|
163
|
+
return tier;
|
|
164
|
+
}
|
|
165
|
+
if (model.includes('mini') || model.includes('lite') || model.includes('haiku') || model.includes('flash'))
|
|
166
|
+
return 'mini';
|
|
167
|
+
if (model.includes('pro') || model.includes('opus') || model.includes('turbo'))
|
|
168
|
+
return 'frontier';
|
|
169
|
+
return 'standard';
|
|
170
|
+
}
|
|
171
|
+
const byTier = {};
|
|
172
|
+
for (const c of calls) {
|
|
173
|
+
const tier = classifyTier(c.model || '');
|
|
174
|
+
if (!byTier[tier])
|
|
175
|
+
byTier[tier] = { calls: 0, tokens: 0, cost: 0, avgLatency: 0, errors: 0 };
|
|
176
|
+
byTier[tier].calls++;
|
|
177
|
+
byTier[tier].tokens += c.totalTokens || 0;
|
|
178
|
+
byTier[tier].cost += c.estimatedCostUsd || 0;
|
|
179
|
+
byTier[tier].avgLatency += c.durationMs || 0;
|
|
180
|
+
if (c.error)
|
|
181
|
+
byTier[tier].errors++;
|
|
182
|
+
}
|
|
183
|
+
for (const t of Object.values(byTier)) {
|
|
184
|
+
t.avgLatency = t.calls > 0 ? t.avgLatency / t.calls : 0;
|
|
185
|
+
}
|
|
150
186
|
if (opts.json) {
|
|
151
187
|
console.log(JSON.stringify({
|
|
152
188
|
summary: { totalCost, totalTokens, totalInputTokens, totalOutputTokens, totalCalls: calls.length, totalDurationMs: totalDuration, errors: errorCount, monthlyProjection },
|
|
153
|
-
byProvider, byModel,
|
|
189
|
+
byProvider, byModel, byTier,
|
|
154
190
|
...(Object.keys(byAgent).length > 0 ? { byAgent } : {}),
|
|
155
191
|
}, null, 2));
|
|
156
192
|
return;
|
|
@@ -195,6 +231,27 @@ function costReportCommand(opts) {
|
|
|
195
231
|
}
|
|
196
232
|
// Top costly calls
|
|
197
233
|
const costlyCalls = calls.filter(c => c.estimatedCostUsd > 0).sort((a, b) => b.estimatedCostUsd - a.estimatedCostUsd).slice(0, 5);
|
|
234
|
+
// By tier
|
|
235
|
+
if (Object.keys(byTier).length > 1) {
|
|
236
|
+
console.log(chalk_1.default.gray('\n ' + '─'.repeat(60)));
|
|
237
|
+
console.log(chalk_1.default.bold(' Model Tier Analysis'));
|
|
238
|
+
const tierOrder = ['frontier', 'standard', 'mini'];
|
|
239
|
+
const tierLabels = { frontier: '🔴 Frontier', standard: '🟡 Standard', mini: '🟢 Mini' };
|
|
240
|
+
for (const tier of tierOrder) {
|
|
241
|
+
const data = byTier[tier];
|
|
242
|
+
if (!data)
|
|
243
|
+
continue;
|
|
244
|
+
const pct = totalCost > 0 ? ((data.cost / totalCost) * 100).toFixed(0) : '0';
|
|
245
|
+
const callPct = calls.length > 0 ? ((data.calls / calls.length) * 100).toFixed(0) : '0';
|
|
246
|
+
const errRate = data.calls > 0 ? ((data.errors / data.calls) * 100).toFixed(0) : '0';
|
|
247
|
+
console.log(` ${(tierLabels[tier] || tier).padEnd(16)} $${data.cost.toFixed(4).padEnd(10)} ${chalk_1.default.gray(pct + '% cost')} ${data.calls} calls (${callPct}%) avg ${data.avgLatency.toFixed(0)}ms ${data.errors > 0 ? chalk_1.default.red(errRate + '% err') : chalk_1.default.green('0% err')}`);
|
|
248
|
+
}
|
|
249
|
+
// Tier optimization suggestion
|
|
250
|
+
const frontierPct = byTier.frontier ? (byTier.frontier.calls / calls.length) * 100 : 0;
|
|
251
|
+
if (frontierPct > 50) {
|
|
252
|
+
console.log(chalk_1.default.yellow(` 💡 ${frontierPct.toFixed(0)}% of calls use frontier models. Consider routing simple tasks to mini tier for ~75% savings.`));
|
|
253
|
+
}
|
|
254
|
+
}
|
|
198
255
|
// By agent (if agent data exists)
|
|
199
256
|
if (Object.keys(byAgent).length > 0) {
|
|
200
257
|
console.log(chalk_1.default.gray('\n ' + '─'.repeat(60)));
|
package/package.json
CHANGED
|
@@ -122,10 +122,44 @@ export function costReportCommand(opts: { json?: boolean; budget?: string }): vo
|
|
|
122
122
|
}
|
|
123
123
|
}
|
|
124
124
|
|
|
125
|
+
// Model tier analysis — classify models into frontier/standard/mini tiers
|
|
126
|
+
// Ordered longest-first to avoid substring matches (gpt-4o-mini before gpt-4o)
|
|
127
|
+
const TIER_RULES: Array<[string, string]> = [
|
|
128
|
+
['gpt-4o-mini', 'mini'], ['gpt-4-turbo', 'frontier'], ['gpt-4o', 'standard'], ['gpt-4', 'frontier'],
|
|
129
|
+
['gpt-3.5-turbo', 'mini'], ['o1-mini', 'standard'], ['o1-pro', 'frontier'], ['o1', 'frontier'],
|
|
130
|
+
['o3-mini', 'standard'], ['o3', 'frontier'], ['o4-mini', 'standard'],
|
|
131
|
+
['claude-opus', 'frontier'], ['claude-sonnet', 'standard'], ['claude-haiku', 'mini'],
|
|
132
|
+
['gemini-2.5-flash-lite', 'mini'], ['gemini-2.5-flash', 'standard'], ['gemini-2.5-pro', 'frontier'],
|
|
133
|
+
['gemini-2.0-flash', 'mini'], ['gemini-1.5-pro', 'frontier'], ['gemini-1.5-flash', 'mini'],
|
|
134
|
+
];
|
|
135
|
+
|
|
136
|
+
function classifyTier(model: string): string {
|
|
137
|
+
for (const [pattern, tier] of TIER_RULES) {
|
|
138
|
+
if (model.includes(pattern)) return tier;
|
|
139
|
+
}
|
|
140
|
+
if (model.includes('mini') || model.includes('lite') || model.includes('haiku') || model.includes('flash')) return 'mini';
|
|
141
|
+
if (model.includes('pro') || model.includes('opus') || model.includes('turbo')) return 'frontier';
|
|
142
|
+
return 'standard';
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
const byTier: Record<string, { calls: number; tokens: number; cost: number; avgLatency: number; errors: number }> = {};
|
|
146
|
+
for (const c of calls) {
|
|
147
|
+
const tier = classifyTier(c.model || '');
|
|
148
|
+
if (!byTier[tier]) byTier[tier] = { calls: 0, tokens: 0, cost: 0, avgLatency: 0, errors: 0 };
|
|
149
|
+
byTier[tier].calls++;
|
|
150
|
+
byTier[tier].tokens += c.totalTokens || 0;
|
|
151
|
+
byTier[tier].cost += c.estimatedCostUsd || 0;
|
|
152
|
+
byTier[tier].avgLatency += c.durationMs || 0;
|
|
153
|
+
if (c.error) byTier[tier].errors++;
|
|
154
|
+
}
|
|
155
|
+
for (const t of Object.values(byTier)) {
|
|
156
|
+
t.avgLatency = t.calls > 0 ? t.avgLatency / t.calls : 0;
|
|
157
|
+
}
|
|
158
|
+
|
|
125
159
|
if (opts.json) {
|
|
126
160
|
console.log(JSON.stringify({
|
|
127
161
|
summary: { totalCost, totalTokens, totalInputTokens, totalOutputTokens, totalCalls: calls.length, totalDurationMs: totalDuration, errors: errorCount, monthlyProjection },
|
|
128
|
-
byProvider, byModel,
|
|
162
|
+
byProvider, byModel, byTier,
|
|
129
163
|
...(Object.keys(byAgent).length > 0 ? { byAgent } : {}),
|
|
130
164
|
}, null, 2));
|
|
131
165
|
return;
|
|
@@ -175,6 +209,27 @@ export function costReportCommand(opts: { json?: boolean; budget?: string }): vo
|
|
|
175
209
|
|
|
176
210
|
// Top costly calls
|
|
177
211
|
const costlyCalls = calls.filter(c => c.estimatedCostUsd > 0).sort((a, b) => b.estimatedCostUsd - a.estimatedCostUsd).slice(0, 5);
|
|
212
|
+
// By tier
|
|
213
|
+
if (Object.keys(byTier).length > 1) {
|
|
214
|
+
console.log(chalk.gray('\n ' + '─'.repeat(60)));
|
|
215
|
+
console.log(chalk.bold(' Model Tier Analysis'));
|
|
216
|
+
const tierOrder = ['frontier', 'standard', 'mini'];
|
|
217
|
+
const tierLabels: Record<string, string> = { frontier: '🔴 Frontier', standard: '🟡 Standard', mini: '🟢 Mini' };
|
|
218
|
+
for (const tier of tierOrder) {
|
|
219
|
+
const data = byTier[tier];
|
|
220
|
+
if (!data) continue;
|
|
221
|
+
const pct = totalCost > 0 ? ((data.cost / totalCost) * 100).toFixed(0) : '0';
|
|
222
|
+
const callPct = calls.length > 0 ? ((data.calls / calls.length) * 100).toFixed(0) : '0';
|
|
223
|
+
const errRate = data.calls > 0 ? ((data.errors / data.calls) * 100).toFixed(0) : '0';
|
|
224
|
+
console.log(` ${(tierLabels[tier] || tier).padEnd(16)} $${data.cost.toFixed(4).padEnd(10)} ${chalk.gray(pct + '% cost')} ${data.calls} calls (${callPct}%) avg ${data.avgLatency.toFixed(0)}ms ${data.errors > 0 ? chalk.red(errRate + '% err') : chalk.green('0% err')}`);
|
|
225
|
+
}
|
|
226
|
+
// Tier optimization suggestion
|
|
227
|
+
const frontierPct = byTier.frontier ? (byTier.frontier.calls / calls.length) * 100 : 0;
|
|
228
|
+
if (frontierPct > 50) {
|
|
229
|
+
console.log(chalk.yellow(` 💡 ${frontierPct.toFixed(0)}% of calls use frontier models. Consider routing simple tasks to mini tier for ~75% savings.`));
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
178
233
|
// By agent (if agent data exists)
|
|
179
234
|
if (Object.keys(byAgent).length > 0) {
|
|
180
235
|
console.log(chalk.gray('\n ' + '─'.repeat(60)));
|