trickle-cli 0.1.187 → 0.1.188
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/run-diff.d.ts +20 -0
- package/dist/commands/run-diff.js +72 -4
- package/package.json +1 -1
- package/src/commands/run-diff.ts +91 -4
|
@@ -46,6 +46,26 @@ export interface RunDiff {
|
|
|
46
46
|
newAlerts: string[];
|
|
47
47
|
resolvedAlerts: string[];
|
|
48
48
|
};
|
|
49
|
+
llm: {
|
|
50
|
+
beforeCalls: number;
|
|
51
|
+
afterCalls: number;
|
|
52
|
+
beforeCost: number;
|
|
53
|
+
afterCost: number;
|
|
54
|
+
costDelta: number;
|
|
55
|
+
beforeTokens: number;
|
|
56
|
+
afterTokens: number;
|
|
57
|
+
modelChanges: string[];
|
|
58
|
+
};
|
|
59
|
+
agents: {
|
|
60
|
+
beforeSteps: number;
|
|
61
|
+
afterSteps: number;
|
|
62
|
+
beforeTools: string[];
|
|
63
|
+
afterTools: string[];
|
|
64
|
+
newTools: string[];
|
|
65
|
+
removedTools: string[];
|
|
66
|
+
beforeErrors: number;
|
|
67
|
+
afterErrors: number;
|
|
68
|
+
};
|
|
49
69
|
verdict: 'improved' | 'regressed' | 'unchanged' | 'mixed';
|
|
50
70
|
}
|
|
51
71
|
export declare function diffRuns(beforeDir: string, afterDir: string): RunDiff;
|
|
@@ -81,7 +81,21 @@ function collectRunData(dir) {
|
|
|
81
81
|
}
|
|
82
82
|
const errorMessages = new Set(errors.map((e) => (e.message || '').substring(0, 100)));
|
|
83
83
|
const alertMessages = new Set(alerts.map((a) => (a.message || '').substring(0, 100)));
|
|
84
|
-
|
|
84
|
+
// LLM data
|
|
85
|
+
const llmCalls = readJsonl(path.join(dir, 'llm.jsonl'));
|
|
86
|
+
const llmCost = llmCalls.reduce((s, c) => s + (c.estimatedCostUsd || 0), 0);
|
|
87
|
+
const llmTokens = llmCalls.reduce((s, c) => s + (c.totalTokens || 0), 0);
|
|
88
|
+
const llmModels = new Set(llmCalls.map((c) => `${c.provider}/${c.model}`));
|
|
89
|
+
// Agent data
|
|
90
|
+
const agentEvents = readJsonl(path.join(dir, 'agents.jsonl'));
|
|
91
|
+
const agentTools = new Set(agentEvents.filter((e) => e.event === 'tool_start' || e.event === 'tool_end').map((e) => e.tool || ''));
|
|
92
|
+
const agentErrors = agentEvents.filter((e) => e.event?.includes('error'));
|
|
93
|
+
return {
|
|
94
|
+
funcMap, queryPatterns, errorMessages, alertMessages,
|
|
95
|
+
queryCount: queries.length, errorCount: errors.length, alertCount: alerts.length,
|
|
96
|
+
llmCalls: llmCalls.length, llmCost, llmTokens, llmModels,
|
|
97
|
+
agentEvents: agentEvents.length, agentTools, agentErrors: agentErrors.length,
|
|
98
|
+
};
|
|
85
99
|
}
|
|
86
100
|
function diffRuns(beforeDir, afterDir) {
|
|
87
101
|
const before = collectRunData(beforeDir);
|
|
@@ -116,9 +130,29 @@ function diffRuns(beforeDir, afterDir) {
|
|
|
116
130
|
// Alerts
|
|
117
131
|
const newAlerts = [...after.alertMessages].filter(a => !before.alertMessages.has(a));
|
|
118
132
|
const resolvedAlerts = [...before.alertMessages].filter(a => !after.alertMessages.has(a));
|
|
133
|
+
// LLM comparison
|
|
134
|
+
const costDelta = after.llmCost - before.llmCost;
|
|
135
|
+
const afterModels = [...after.llmModels];
|
|
136
|
+
const beforeModels = [...before.llmModels];
|
|
137
|
+
const modelChanges = [];
|
|
138
|
+
for (const m of afterModels)
|
|
139
|
+
if (!before.llmModels.has(m))
|
|
140
|
+
modelChanges.push(`+ ${m}`);
|
|
141
|
+
for (const m of beforeModels)
|
|
142
|
+
if (!after.llmModels.has(m))
|
|
143
|
+
modelChanges.push(`- ${m}`);
|
|
144
|
+
// Agent comparison
|
|
145
|
+
const afterTools = [...after.agentTools];
|
|
146
|
+
const beforeTools = [...before.agentTools];
|
|
147
|
+
const newAgentTools = afterTools.filter(t => !before.agentTools.has(t));
|
|
148
|
+
const removedAgentTools = beforeTools.filter(t => !after.agentTools.has(t));
|
|
119
149
|
// Verdict
|
|
120
|
-
const improvements = resolvedErrors.length + resolvedAlerts.length + fasterBy.length +
|
|
121
|
-
|
|
150
|
+
const improvements = resolvedErrors.length + resolvedAlerts.length + fasterBy.length +
|
|
151
|
+
(nPlusOneAfter < nPlusOneBefore ? 1 : 0) + (costDelta < -0.001 ? 1 : 0) +
|
|
152
|
+
(after.agentErrors < before.agentErrors ? 1 : 0);
|
|
153
|
+
const regressions = newErrors.length + newAlerts.length + slowerBy.length +
|
|
154
|
+
(nPlusOneAfter > nPlusOneBefore ? 1 : 0) + (costDelta > before.llmCost * 0.2 ? 1 : 0) +
|
|
155
|
+
(after.agentErrors > before.agentErrors ? 1 : 0);
|
|
122
156
|
const verdict = improvements > 0 && regressions === 0 ? 'improved' :
|
|
123
157
|
regressions > 0 && improvements === 0 ? 'regressed' :
|
|
124
158
|
improvements > 0 && regressions > 0 ? 'mixed' : 'unchanged';
|
|
@@ -127,6 +161,19 @@ function diffRuns(beforeDir, afterDir) {
|
|
|
127
161
|
queries: { beforeTotal: before.queryCount, afterTotal: after.queryCount, newPatterns: newPatterns.slice(0, 5), removedPatterns: removedPatterns.slice(0, 5), nPlusOneBefore, nPlusOneAfter },
|
|
128
162
|
errors: { beforeCount: before.errorCount, afterCount: after.errorCount, newErrors, resolvedErrors },
|
|
129
163
|
alerts: { beforeCount: before.alertCount, afterCount: after.alertCount, newAlerts, resolvedAlerts },
|
|
164
|
+
llm: {
|
|
165
|
+
beforeCalls: before.llmCalls, afterCalls: after.llmCalls,
|
|
166
|
+
beforeCost: Math.round(before.llmCost * 10000) / 10000, afterCost: Math.round(after.llmCost * 10000) / 10000,
|
|
167
|
+
costDelta: Math.round(costDelta * 10000) / 10000,
|
|
168
|
+
beforeTokens: before.llmTokens, afterTokens: after.llmTokens,
|
|
169
|
+
modelChanges,
|
|
170
|
+
},
|
|
171
|
+
agents: {
|
|
172
|
+
beforeSteps: before.agentEvents, afterSteps: after.agentEvents,
|
|
173
|
+
beforeTools, afterTools,
|
|
174
|
+
newTools: newAgentTools, removedTools: removedAgentTools,
|
|
175
|
+
beforeErrors: before.agentErrors, afterErrors: after.agentErrors,
|
|
176
|
+
},
|
|
130
177
|
verdict,
|
|
131
178
|
};
|
|
132
179
|
}
|
|
@@ -141,7 +188,7 @@ function runDiffCommand(opts) {
|
|
|
141
188
|
}
|
|
142
189
|
if (!fs.existsSync(snapshotDir))
|
|
143
190
|
fs.mkdirSync(snapshotDir, { recursive: true });
|
|
144
|
-
for (const f of ['observations.jsonl', 'queries.jsonl', 'errors.jsonl', 'alerts.jsonl', 'calltrace.jsonl']) {
|
|
191
|
+
for (const f of ['observations.jsonl', 'queries.jsonl', 'errors.jsonl', 'alerts.jsonl', 'calltrace.jsonl', 'llm.jsonl', 'agents.jsonl', 'mcp.jsonl']) {
|
|
145
192
|
const src = path.join(trickleDir, f);
|
|
146
193
|
if (fs.existsSync(src))
|
|
147
194
|
fs.copyFileSync(src, path.join(snapshotDir, f));
|
|
@@ -188,6 +235,27 @@ function runDiffCommand(opts) {
|
|
|
188
235
|
console.log(chalk_1.default.red(` New errors: ${diff.errors.newErrors.join(', ').substring(0, 80)}`));
|
|
189
236
|
if (diff.errors.resolvedErrors.length > 0)
|
|
190
237
|
console.log(chalk_1.default.green(` Resolved: ${diff.errors.resolvedErrors.join(', ').substring(0, 80)}`));
|
|
238
|
+
// LLM diff
|
|
239
|
+
if (diff.llm.beforeCalls > 0 || diff.llm.afterCalls > 0) {
|
|
240
|
+
console.log(` LLM calls: ${diff.llm.beforeCalls} → ${diff.llm.afterCalls}`);
|
|
241
|
+
const costColor = diff.llm.costDelta > 0 ? chalk_1.default.red : diff.llm.costDelta < 0 ? chalk_1.default.green : chalk_1.default.gray;
|
|
242
|
+
const costSign = diff.llm.costDelta > 0 ? '+' : '';
|
|
243
|
+
console.log(` LLM cost: $${diff.llm.beforeCost} → $${diff.llm.afterCost} (${costColor(costSign + '$' + diff.llm.costDelta.toFixed(4))})`);
|
|
244
|
+
if (diff.llm.modelChanges.length > 0)
|
|
245
|
+
console.log(chalk_1.default.cyan(` Model changes: ${diff.llm.modelChanges.join(', ')}`));
|
|
246
|
+
}
|
|
247
|
+
// Agent diff
|
|
248
|
+
if (diff.agents.beforeSteps > 0 || diff.agents.afterSteps > 0) {
|
|
249
|
+
console.log(` Agent steps: ${diff.agents.beforeSteps} → ${diff.agents.afterSteps}`);
|
|
250
|
+
if (diff.agents.newTools.length > 0)
|
|
251
|
+
console.log(chalk_1.default.green(` + New tools: ${diff.agents.newTools.join(', ')}`));
|
|
252
|
+
if (diff.agents.removedTools.length > 0)
|
|
253
|
+
console.log(chalk_1.default.red(` - Removed tools: ${diff.agents.removedTools.join(', ')}`));
|
|
254
|
+
if (diff.agents.beforeErrors !== diff.agents.afterErrors) {
|
|
255
|
+
const errColor = diff.agents.afterErrors > diff.agents.beforeErrors ? chalk_1.default.red : chalk_1.default.green;
|
|
256
|
+
console.log(errColor(` Agent errors: ${diff.agents.beforeErrors} → ${diff.agents.afterErrors}`));
|
|
257
|
+
}
|
|
258
|
+
}
|
|
191
259
|
console.log(chalk_1.default.gray(' ' + '─'.repeat(50)));
|
|
192
260
|
console.log('');
|
|
193
261
|
}
|
package/package.json
CHANGED
package/src/commands/run-diff.ts
CHANGED
|
@@ -48,6 +48,26 @@ export interface RunDiff {
|
|
|
48
48
|
newAlerts: string[];
|
|
49
49
|
resolvedAlerts: string[];
|
|
50
50
|
};
|
|
51
|
+
llm: {
|
|
52
|
+
beforeCalls: number;
|
|
53
|
+
afterCalls: number;
|
|
54
|
+
beforeCost: number;
|
|
55
|
+
afterCost: number;
|
|
56
|
+
costDelta: number;
|
|
57
|
+
beforeTokens: number;
|
|
58
|
+
afterTokens: number;
|
|
59
|
+
modelChanges: string[];
|
|
60
|
+
};
|
|
61
|
+
agents: {
|
|
62
|
+
beforeSteps: number;
|
|
63
|
+
afterSteps: number;
|
|
64
|
+
beforeTools: string[];
|
|
65
|
+
afterTools: string[];
|
|
66
|
+
newTools: string[];
|
|
67
|
+
removedTools: string[];
|
|
68
|
+
beforeErrors: number;
|
|
69
|
+
afterErrors: number;
|
|
70
|
+
};
|
|
51
71
|
verdict: 'improved' | 'regressed' | 'unchanged' | 'mixed';
|
|
52
72
|
}
|
|
53
73
|
|
|
@@ -72,7 +92,23 @@ function collectRunData(dir: string) {
|
|
|
72
92
|
const errorMessages = new Set(errors.map((e: any) => (e.message || '').substring(0, 100)));
|
|
73
93
|
const alertMessages = new Set(alerts.map((a: any) => (a.message || '').substring(0, 100)));
|
|
74
94
|
|
|
75
|
-
|
|
95
|
+
// LLM data
|
|
96
|
+
const llmCalls = readJsonl(path.join(dir, 'llm.jsonl'));
|
|
97
|
+
const llmCost = llmCalls.reduce((s: number, c: any) => s + (c.estimatedCostUsd || 0), 0);
|
|
98
|
+
const llmTokens = llmCalls.reduce((s: number, c: any) => s + (c.totalTokens || 0), 0);
|
|
99
|
+
const llmModels = new Set(llmCalls.map((c: any) => `${c.provider}/${c.model}`));
|
|
100
|
+
|
|
101
|
+
// Agent data
|
|
102
|
+
const agentEvents = readJsonl(path.join(dir, 'agents.jsonl'));
|
|
103
|
+
const agentTools = new Set(agentEvents.filter((e: any) => e.event === 'tool_start' || e.event === 'tool_end').map((e: any) => e.tool || ''));
|
|
104
|
+
const agentErrors = agentEvents.filter((e: any) => e.event?.includes('error'));
|
|
105
|
+
|
|
106
|
+
return {
|
|
107
|
+
funcMap, queryPatterns, errorMessages, alertMessages,
|
|
108
|
+
queryCount: queries.length, errorCount: errors.length, alertCount: alerts.length,
|
|
109
|
+
llmCalls: llmCalls.length, llmCost, llmTokens, llmModels,
|
|
110
|
+
agentEvents: agentEvents.length, agentTools, agentErrors: agentErrors.length,
|
|
111
|
+
};
|
|
76
112
|
}
|
|
77
113
|
|
|
78
114
|
export function diffRuns(beforeDir: string, afterDir: string): RunDiff {
|
|
@@ -112,9 +148,27 @@ export function diffRuns(beforeDir: string, afterDir: string): RunDiff {
|
|
|
112
148
|
const newAlerts = [...after.alertMessages].filter(a => !before.alertMessages.has(a));
|
|
113
149
|
const resolvedAlerts = [...before.alertMessages].filter(a => !after.alertMessages.has(a));
|
|
114
150
|
|
|
151
|
+
// LLM comparison
|
|
152
|
+
const costDelta = after.llmCost - before.llmCost;
|
|
153
|
+
const afterModels = [...after.llmModels];
|
|
154
|
+
const beforeModels = [...before.llmModels];
|
|
155
|
+
const modelChanges: string[] = [];
|
|
156
|
+
for (const m of afterModels) if (!before.llmModels.has(m)) modelChanges.push(`+ ${m}`);
|
|
157
|
+
for (const m of beforeModels) if (!after.llmModels.has(m)) modelChanges.push(`- ${m}`);
|
|
158
|
+
|
|
159
|
+
// Agent comparison
|
|
160
|
+
const afterTools = [...after.agentTools];
|
|
161
|
+
const beforeTools = [...before.agentTools];
|
|
162
|
+
const newAgentTools = afterTools.filter(t => !before.agentTools.has(t));
|
|
163
|
+
const removedAgentTools = beforeTools.filter(t => !after.agentTools.has(t));
|
|
164
|
+
|
|
115
165
|
// Verdict
|
|
116
|
-
const improvements = resolvedErrors.length + resolvedAlerts.length + fasterBy.length +
|
|
117
|
-
|
|
166
|
+
const improvements = resolvedErrors.length + resolvedAlerts.length + fasterBy.length +
|
|
167
|
+
(nPlusOneAfter < nPlusOneBefore ? 1 : 0) + (costDelta < -0.001 ? 1 : 0) +
|
|
168
|
+
(after.agentErrors < before.agentErrors ? 1 : 0);
|
|
169
|
+
const regressions = newErrors.length + newAlerts.length + slowerBy.length +
|
|
170
|
+
(nPlusOneAfter > nPlusOneBefore ? 1 : 0) + (costDelta > before.llmCost * 0.2 ? 1 : 0) +
|
|
171
|
+
(after.agentErrors > before.agentErrors ? 1 : 0);
|
|
118
172
|
const verdict: RunDiff['verdict'] = improvements > 0 && regressions === 0 ? 'improved' :
|
|
119
173
|
regressions > 0 && improvements === 0 ? 'regressed' :
|
|
120
174
|
improvements > 0 && regressions > 0 ? 'mixed' : 'unchanged';
|
|
@@ -124,6 +178,19 @@ export function diffRuns(beforeDir: string, afterDir: string): RunDiff {
|
|
|
124
178
|
queries: { beforeTotal: before.queryCount, afterTotal: after.queryCount, newPatterns: newPatterns.slice(0, 5), removedPatterns: removedPatterns.slice(0, 5), nPlusOneBefore, nPlusOneAfter },
|
|
125
179
|
errors: { beforeCount: before.errorCount, afterCount: after.errorCount, newErrors, resolvedErrors },
|
|
126
180
|
alerts: { beforeCount: before.alertCount, afterCount: after.alertCount, newAlerts, resolvedAlerts },
|
|
181
|
+
llm: {
|
|
182
|
+
beforeCalls: before.llmCalls, afterCalls: after.llmCalls,
|
|
183
|
+
beforeCost: Math.round(before.llmCost * 10000) / 10000, afterCost: Math.round(after.llmCost * 10000) / 10000,
|
|
184
|
+
costDelta: Math.round(costDelta * 10000) / 10000,
|
|
185
|
+
beforeTokens: before.llmTokens, afterTokens: after.llmTokens,
|
|
186
|
+
modelChanges,
|
|
187
|
+
},
|
|
188
|
+
agents: {
|
|
189
|
+
beforeSteps: before.agentEvents, afterSteps: after.agentEvents,
|
|
190
|
+
beforeTools, afterTools,
|
|
191
|
+
newTools: newAgentTools, removedTools: removedAgentTools,
|
|
192
|
+
beforeErrors: before.agentErrors, afterErrors: after.agentErrors,
|
|
193
|
+
},
|
|
127
194
|
verdict,
|
|
128
195
|
};
|
|
129
196
|
}
|
|
@@ -146,7 +213,7 @@ export function runDiffCommand(opts: DiffOptions): void {
|
|
|
146
213
|
return;
|
|
147
214
|
}
|
|
148
215
|
if (!fs.existsSync(snapshotDir)) fs.mkdirSync(snapshotDir, { recursive: true });
|
|
149
|
-
for (const f of ['observations.jsonl', 'queries.jsonl', 'errors.jsonl', 'alerts.jsonl', 'calltrace.jsonl']) {
|
|
216
|
+
for (const f of ['observations.jsonl', 'queries.jsonl', 'errors.jsonl', 'alerts.jsonl', 'calltrace.jsonl', 'llm.jsonl', 'agents.jsonl', 'mcp.jsonl']) {
|
|
150
217
|
const src = path.join(trickleDir, f);
|
|
151
218
|
if (fs.existsSync(src)) fs.copyFileSync(src, path.join(snapshotDir, f));
|
|
152
219
|
}
|
|
@@ -199,6 +266,26 @@ export function runDiffCommand(opts: DiffOptions): void {
|
|
|
199
266
|
if (diff.errors.newErrors.length > 0) console.log(chalk.red(` New errors: ${diff.errors.newErrors.join(', ').substring(0, 80)}`));
|
|
200
267
|
if (diff.errors.resolvedErrors.length > 0) console.log(chalk.green(` Resolved: ${diff.errors.resolvedErrors.join(', ').substring(0, 80)}`));
|
|
201
268
|
|
|
269
|
+
// LLM diff
|
|
270
|
+
if (diff.llm.beforeCalls > 0 || diff.llm.afterCalls > 0) {
|
|
271
|
+
console.log(` LLM calls: ${diff.llm.beforeCalls} → ${diff.llm.afterCalls}`);
|
|
272
|
+
const costColor = diff.llm.costDelta > 0 ? chalk.red : diff.llm.costDelta < 0 ? chalk.green : chalk.gray;
|
|
273
|
+
const costSign = diff.llm.costDelta > 0 ? '+' : '';
|
|
274
|
+
console.log(` LLM cost: $${diff.llm.beforeCost} → $${diff.llm.afterCost} (${costColor(costSign + '$' + diff.llm.costDelta.toFixed(4))})`);
|
|
275
|
+
if (diff.llm.modelChanges.length > 0) console.log(chalk.cyan(` Model changes: ${diff.llm.modelChanges.join(', ')}`));
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
// Agent diff
|
|
279
|
+
if (diff.agents.beforeSteps > 0 || diff.agents.afterSteps > 0) {
|
|
280
|
+
console.log(` Agent steps: ${diff.agents.beforeSteps} → ${diff.agents.afterSteps}`);
|
|
281
|
+
if (diff.agents.newTools.length > 0) console.log(chalk.green(` + New tools: ${diff.agents.newTools.join(', ')}`));
|
|
282
|
+
if (diff.agents.removedTools.length > 0) console.log(chalk.red(` - Removed tools: ${diff.agents.removedTools.join(', ')}`));
|
|
283
|
+
if (diff.agents.beforeErrors !== diff.agents.afterErrors) {
|
|
284
|
+
const errColor = diff.agents.afterErrors > diff.agents.beforeErrors ? chalk.red : chalk.green;
|
|
285
|
+
console.log(errColor(` Agent errors: ${diff.agents.beforeErrors} → ${diff.agents.afterErrors}`));
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
202
289
|
console.log(chalk.gray(' ' + '─'.repeat(50)));
|
|
203
290
|
console.log('');
|
|
204
291
|
}
|