@townco/debugger 0.1.31 → 0.1.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +14 -14
- package/src/App.tsx +1 -0
- package/src/analysis/analyzer.ts +1 -2
- package/src/analysis/comparison-analyzer.ts +528 -0
- package/src/analysis/comparison-schema.ts +151 -0
- package/src/analysis/comparison-types.ts +194 -0
- package/src/analysis-db.ts +13 -6
- package/src/comparison-db.ts +75 -3
- package/src/components/AnalyzeAllButton.tsx +6 -2
- package/src/components/ComparisonAnalysisDialog.tsx +591 -0
- package/src/components/DebuggerHeader.tsx +0 -1
- package/src/components/LogList.tsx +9 -0
- package/src/components/SessionTraceList.tsx +9 -0
- package/src/components/SpanDetailsPanel.tsx +20 -1
- package/src/components/SpanTimeline.tsx +31 -4
- package/src/components/SpanTree.tsx +10 -1
- package/src/components/TurnMetadataPanel.tsx +0 -1
- package/src/components/UnifiedTimeline.tsx +25 -35
- package/src/components/ui/button.tsx +1 -1
- package/src/components/ui/card.tsx +1 -1
- package/src/components/ui/checkbox.tsx +43 -0
- package/src/components/ui/input.tsx +1 -1
- package/src/components/ui/label.tsx +1 -1
- package/src/components/ui/select.tsx +1 -1
- package/src/components/ui/textarea.tsx +1 -1
- package/src/frontend.tsx +2 -0
- package/src/lib/metrics.test.ts +2 -0
- package/src/lib/turnExtractor.ts +28 -0
- package/src/pages/ComparisonView.tsx +1310 -322
- package/src/pages/FindSessions.tsx +3 -1
- package/src/pages/TownHall.tsx +30 -14
- package/src/server.ts +177 -7
- package/src/types.ts +4 -0
- package/styles/globals.css +120 -0
- package/tsconfig.json +2 -2
|
@@ -0,0 +1,591 @@
|
|
|
1
|
+
import { useState } from "react";
|
|
2
|
+
import type { SessionComparisonAnalysis } from "../analysis/comparison-types";
|
|
3
|
+
import { Dialog, DialogContent, DialogHeader, DialogTitle } from "./ui/dialog";
|
|
4
|
+
|
|
5
|
+
interface Props {
|
|
6
|
+
open: boolean;
|
|
7
|
+
onClose: () => void;
|
|
8
|
+
analysis: SessionComparisonAnalysis;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
function Section({
|
|
12
|
+
title,
|
|
13
|
+
children,
|
|
14
|
+
}: {
|
|
15
|
+
title: string;
|
|
16
|
+
children: React.ReactNode;
|
|
17
|
+
}) {
|
|
18
|
+
return (
|
|
19
|
+
<div className="space-y-3">
|
|
20
|
+
<h3 className="text-sm font-semibold text-foreground border-b pb-2">
|
|
21
|
+
{title}
|
|
22
|
+
</h3>
|
|
23
|
+
<div className="space-y-3">{children}</div>
|
|
24
|
+
</div>
|
|
25
|
+
);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function Field({ label, value }: { label: string; value: string }) {
|
|
29
|
+
return (
|
|
30
|
+
<div className="space-y-1">
|
|
31
|
+
<div className="text-xs font-medium text-muted-foreground">{label}</div>
|
|
32
|
+
<div className="text-sm text-foreground whitespace-pre-wrap break-words">
|
|
33
|
+
{value}
|
|
34
|
+
</div>
|
|
35
|
+
</div>
|
|
36
|
+
);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function VerdictBadge({ verdict }: { verdict: string }) {
|
|
40
|
+
const colors: Record<string, string> = {
|
|
41
|
+
// Reproducibility verdicts
|
|
42
|
+
STABLE: "bg-green-100 text-green-800 dark:bg-green-900 dark:text-green-200",
|
|
43
|
+
UNSTABLE: "bg-red-100 text-red-800 dark:bg-red-900 dark:text-red-200",
|
|
44
|
+
PARTIALLY_STABLE:
|
|
45
|
+
"bg-yellow-100 text-yellow-800 dark:bg-yellow-900 dark:text-yellow-200",
|
|
46
|
+
// Impact verdicts
|
|
47
|
+
IMPROVED:
|
|
48
|
+
"bg-green-100 text-green-800 dark:bg-green-900 dark:text-green-200",
|
|
49
|
+
DEGRADED: "bg-red-100 text-red-800 dark:bg-red-900 dark:text-red-200",
|
|
50
|
+
NEUTRAL: "bg-gray-100 text-gray-800 dark:bg-gray-700 dark:text-gray-200",
|
|
51
|
+
MIXED:
|
|
52
|
+
"bg-orange-100 text-orange-800 dark:bg-orange-900 dark:text-orange-200",
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
return (
|
|
56
|
+
<span
|
|
57
|
+
className={`inline-flex items-center px-3 py-1 rounded-full text-sm font-semibold ${colors[verdict] || "bg-gray-100 text-gray-800"}`}
|
|
58
|
+
>
|
|
59
|
+
{verdict.replace(/_/g, " ")}
|
|
60
|
+
</span>
|
|
61
|
+
);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function ConfidenceBadge({ confidence }: { confidence: string }) {
|
|
65
|
+
const colors: Record<string, string> = {
|
|
66
|
+
HIGH: "text-green-600 dark:text-green-400",
|
|
67
|
+
MEDIUM: "text-yellow-600 dark:text-yellow-400",
|
|
68
|
+
LOW: "text-red-600 dark:text-red-400",
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
return (
|
|
72
|
+
<span
|
|
73
|
+
className={`text-xs font-medium ${colors[confidence] || "text-gray-600"}`}
|
|
74
|
+
>
|
|
75
|
+
{confidence} confidence
|
|
76
|
+
</span>
|
|
77
|
+
);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function PriorityBadge({ priority }: { priority: string }) {
|
|
81
|
+
const colors: Record<string, string> = {
|
|
82
|
+
HIGH: "bg-red-100 text-red-700 dark:bg-red-900 dark:text-red-300",
|
|
83
|
+
MEDIUM:
|
|
84
|
+
"bg-yellow-100 text-yellow-700 dark:bg-yellow-900 dark:text-yellow-300",
|
|
85
|
+
LOW: "bg-blue-100 text-blue-700 dark:bg-blue-900 dark:text-blue-300",
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
return (
|
|
89
|
+
<span
|
|
90
|
+
className={`inline-flex items-center px-2 py-0.5 rounded text-xs font-medium ${colors[priority] || "bg-gray-100"}`}
|
|
91
|
+
>
|
|
92
|
+
{priority}
|
|
93
|
+
</span>
|
|
94
|
+
);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function AssessmentBadge({ assessment }: { assessment: string }) {
|
|
98
|
+
const colors: Record<string, string> = {
|
|
99
|
+
ACHIEVED:
|
|
100
|
+
"bg-green-100 text-green-700 dark:bg-green-900 dark:text-green-300",
|
|
101
|
+
PARTIALLY_ACHIEVED:
|
|
102
|
+
"bg-yellow-100 text-yellow-700 dark:bg-yellow-900 dark:text-yellow-300",
|
|
103
|
+
NOT_ACHIEVED: "bg-red-100 text-red-700 dark:bg-red-900 dark:text-red-300",
|
|
104
|
+
OPPOSITE_EFFECT:
|
|
105
|
+
"bg-purple-100 text-purple-700 dark:bg-purple-900 dark:text-purple-300",
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
return (
|
|
109
|
+
<span
|
|
110
|
+
className={`inline-flex items-center px-2 py-0.5 rounded text-xs font-medium ${colors[assessment] || "bg-gray-100"}`}
|
|
111
|
+
>
|
|
112
|
+
{assessment.replace(/_/g, " ")}
|
|
113
|
+
</span>
|
|
114
|
+
);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function SignificanceBadge({ significance }: { significance: string }) {
|
|
118
|
+
const colors: Record<string, string> = {
|
|
119
|
+
CRITICAL: "text-red-600 dark:text-red-400",
|
|
120
|
+
NOTABLE: "text-yellow-600 dark:text-yellow-400",
|
|
121
|
+
MINOR: "text-gray-500 dark:text-gray-400",
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
return (
|
|
125
|
+
<span className={`text-xs font-medium ${colors[significance]}`}>
|
|
126
|
+
{significance}
|
|
127
|
+
</span>
|
|
128
|
+
);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function MetricDelta({ value, label }: { value: number; label: string }) {
|
|
132
|
+
const isPositive = value > 0;
|
|
133
|
+
const isNegative = value < 0;
|
|
134
|
+
const color = isPositive
|
|
135
|
+
? "text-red-600 dark:text-red-400"
|
|
136
|
+
: isNegative
|
|
137
|
+
? "text-green-600 dark:text-green-400"
|
|
138
|
+
: "text-gray-600 dark:text-gray-400";
|
|
139
|
+
|
|
140
|
+
return (
|
|
141
|
+
<div className="text-center">
|
|
142
|
+
<div className={`text-lg font-semibold ${color}`}>
|
|
143
|
+
{isPositive ? "+" : ""}
|
|
144
|
+
{value.toFixed(1)}%
|
|
145
|
+
</div>
|
|
146
|
+
<div className="text-xs text-muted-foreground">{label}</div>
|
|
147
|
+
</div>
|
|
148
|
+
);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
function TabButton({
|
|
152
|
+
active,
|
|
153
|
+
onClick,
|
|
154
|
+
children,
|
|
155
|
+
}: {
|
|
156
|
+
active: boolean;
|
|
157
|
+
onClick: () => void;
|
|
158
|
+
children: React.ReactNode;
|
|
159
|
+
}) {
|
|
160
|
+
return (
|
|
161
|
+
<button
|
|
162
|
+
type="button"
|
|
163
|
+
onClick={onClick}
|
|
164
|
+
className={`px-4 py-2 text-sm font-medium rounded-t-lg transition-colors ${
|
|
165
|
+
active
|
|
166
|
+
? "bg-background text-foreground border-b-2 border-primary"
|
|
167
|
+
: "text-muted-foreground hover:text-foreground hover:bg-muted/50"
|
|
168
|
+
}`}
|
|
169
|
+
>
|
|
170
|
+
{children}
|
|
171
|
+
</button>
|
|
172
|
+
);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
export function ComparisonAnalysisDialog({ open, onClose, analysis }: Props) {
|
|
176
|
+
const [activeTab, setActiveTab] = useState<
|
|
177
|
+
"reproducibility" | "impact" | "experiments"
|
|
178
|
+
>("reproducibility");
|
|
179
|
+
|
|
180
|
+
return (
|
|
181
|
+
<Dialog open={open} onOpenChange={onClose}>
|
|
182
|
+
<DialogContent className="max-w-4xl max-h-[90vh] overflow-y-auto">
|
|
183
|
+
<DialogHeader>
|
|
184
|
+
<DialogTitle className="flex items-center gap-3">
|
|
185
|
+
Comparison Analysis
|
|
186
|
+
<span className="text-sm font-normal text-muted-foreground">
|
|
187
|
+
Run: {analysis.comparison_run_id.slice(0, 8)}...
|
|
188
|
+
</span>
|
|
189
|
+
</DialogTitle>
|
|
190
|
+
</DialogHeader>
|
|
191
|
+
|
|
192
|
+
{/* Hypothesis Banner */}
|
|
193
|
+
{analysis.hypothesis && (
|
|
194
|
+
<div className="bg-blue-50 dark:bg-blue-950 border border-blue-200 dark:border-blue-800 rounded-lg p-3">
|
|
195
|
+
<div className="text-xs font-medium text-blue-600 dark:text-blue-400 mb-1">
|
|
196
|
+
Hypothesis
|
|
197
|
+
</div>
|
|
198
|
+
<div className="text-sm text-blue-900 dark:text-blue-100">
|
|
199
|
+
{analysis.hypothesis}
|
|
200
|
+
</div>
|
|
201
|
+
</div>
|
|
202
|
+
)}
|
|
203
|
+
|
|
204
|
+
{/* Tab Navigation */}
|
|
205
|
+
<div className="flex gap-1 border-b">
|
|
206
|
+
<TabButton
|
|
207
|
+
active={activeTab === "reproducibility"}
|
|
208
|
+
onClick={() => setActiveTab("reproducibility")}
|
|
209
|
+
>
|
|
210
|
+
Reproducibility Report
|
|
211
|
+
</TabButton>
|
|
212
|
+
<TabButton
|
|
213
|
+
active={activeTab === "impact"}
|
|
214
|
+
onClick={() => setActiveTab("impact")}
|
|
215
|
+
>
|
|
216
|
+
Change Impact Report
|
|
217
|
+
</TabButton>
|
|
218
|
+
<TabButton
|
|
219
|
+
active={activeTab === "experiments"}
|
|
220
|
+
onClick={() => setActiveTab("experiments")}
|
|
221
|
+
>
|
|
222
|
+
Next Experiments
|
|
223
|
+
</TabButton>
|
|
224
|
+
</div>
|
|
225
|
+
|
|
226
|
+
{/* Tab Content */}
|
|
227
|
+
<div className="space-y-6 pt-2">
|
|
228
|
+
{activeTab === "reproducibility" && (
|
|
229
|
+
<ReproducibilityTab report={analysis.reproducibility} />
|
|
230
|
+
)}
|
|
231
|
+
{activeTab === "impact" && (
|
|
232
|
+
<ChangeImpactTab
|
|
233
|
+
report={analysis.change_impact}
|
|
234
|
+
configSummary={analysis.config_summary}
|
|
235
|
+
/>
|
|
236
|
+
)}
|
|
237
|
+
{activeTab === "experiments" && (
|
|
238
|
+
<ExperimentsTab experiments={analysis.next_experiments} />
|
|
239
|
+
)}
|
|
240
|
+
</div>
|
|
241
|
+
</DialogContent>
|
|
242
|
+
</Dialog>
|
|
243
|
+
);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
function ReproducibilityTab({
|
|
247
|
+
report,
|
|
248
|
+
}: {
|
|
249
|
+
report: SessionComparisonAnalysis["reproducibility"];
|
|
250
|
+
}) {
|
|
251
|
+
return (
|
|
252
|
+
<div className="space-y-6">
|
|
253
|
+
{/* Header with verdict */}
|
|
254
|
+
<div className="flex items-center justify-between">
|
|
255
|
+
<div className="flex items-center gap-3">
|
|
256
|
+
<VerdictBadge verdict={report.verdict} />
|
|
257
|
+
<ConfidenceBadge confidence={report.confidence} />
|
|
258
|
+
</div>
|
|
259
|
+
</div>
|
|
260
|
+
|
|
261
|
+
{/* Summary */}
|
|
262
|
+
<Section title="Summary">
|
|
263
|
+
<p className="text-sm text-foreground">{report.summary}</p>
|
|
264
|
+
</Section>
|
|
265
|
+
|
|
266
|
+
{/* Behavioral Differences */}
|
|
267
|
+
{report.behavioral_differences.length > 0 && (
|
|
268
|
+
<Section title="Behavioral Differences">
|
|
269
|
+
<div className="space-y-3">
|
|
270
|
+
{report.behavioral_differences.map((diff) => (
|
|
271
|
+
<div
|
|
272
|
+
key={`diff-${diff.category}-${diff.significance}`}
|
|
273
|
+
className="border rounded-lg p-3 bg-muted/30"
|
|
274
|
+
>
|
|
275
|
+
<div className="flex items-center justify-between mb-2">
|
|
276
|
+
<span className="text-xs font-medium px-2 py-0.5 bg-secondary rounded">
|
|
277
|
+
{diff.category.replace(/_/g, " ")}
|
|
278
|
+
</span>
|
|
279
|
+
<SignificanceBadge significance={diff.significance} />
|
|
280
|
+
</div>
|
|
281
|
+
<p className="text-sm mb-2">{diff.observation}</p>
|
|
282
|
+
<div className="text-xs text-muted-foreground bg-muted/50 rounded p-2 font-mono">
|
|
283
|
+
{diff.evidence}
|
|
284
|
+
</div>
|
|
285
|
+
</div>
|
|
286
|
+
))}
|
|
287
|
+
</div>
|
|
288
|
+
</Section>
|
|
289
|
+
)}
|
|
290
|
+
|
|
291
|
+
{/* Metrics Comparison */}
|
|
292
|
+
<Section title="Metrics (Original vs Control)">
|
|
293
|
+
<div className="grid grid-cols-4 gap-4 mb-3">
|
|
294
|
+
<MetricDelta
|
|
295
|
+
value={report.metric_comparison.duration_delta_pct}
|
|
296
|
+
label="Duration"
|
|
297
|
+
/>
|
|
298
|
+
<MetricDelta
|
|
299
|
+
value={report.metric_comparison.token_delta_pct}
|
|
300
|
+
label="Tokens"
|
|
301
|
+
/>
|
|
302
|
+
<MetricDelta
|
|
303
|
+
value={report.metric_comparison.cost_delta_pct}
|
|
304
|
+
label="Cost"
|
|
305
|
+
/>
|
|
306
|
+
<div className="text-center">
|
|
307
|
+
<div className="text-lg font-semibold">
|
|
308
|
+
{report.metric_comparison.tool_call_delta > 0 ? "+" : ""}
|
|
309
|
+
{report.metric_comparison.tool_call_delta}
|
|
310
|
+
</div>
|
|
311
|
+
<div className="text-xs text-muted-foreground">Tool Calls</div>
|
|
312
|
+
</div>
|
|
313
|
+
</div>
|
|
314
|
+
<p className="text-sm text-muted-foreground italic">
|
|
315
|
+
{report.metric_comparison.interpretation}
|
|
316
|
+
</p>
|
|
317
|
+
</Section>
|
|
318
|
+
|
|
319
|
+
{/* Recommendations */}
|
|
320
|
+
{report.recommendations.length > 0 && (
|
|
321
|
+
<Section title="Recommendations">
|
|
322
|
+
<div className="space-y-2">
|
|
323
|
+
{report.recommendations.map((rec) => (
|
|
324
|
+
<div key={`rec-${rec.action}`} className="border rounded-lg p-3">
|
|
325
|
+
<div className="flex items-start gap-2 mb-2">
|
|
326
|
+
<PriorityBadge priority={rec.priority} />
|
|
327
|
+
<span className="text-sm font-medium">{rec.action}</span>
|
|
328
|
+
</div>
|
|
329
|
+
<p className="text-xs text-muted-foreground">{rec.rationale}</p>
|
|
330
|
+
</div>
|
|
331
|
+
))}
|
|
332
|
+
</div>
|
|
333
|
+
</Section>
|
|
334
|
+
)}
|
|
335
|
+
</div>
|
|
336
|
+
);
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
function ChangeImpactTab({
|
|
340
|
+
report,
|
|
341
|
+
configSummary,
|
|
342
|
+
}: {
|
|
343
|
+
report: SessionComparisonAnalysis["change_impact"];
|
|
344
|
+
configSummary: SessionComparisonAnalysis["config_summary"];
|
|
345
|
+
}) {
|
|
346
|
+
return (
|
|
347
|
+
<div className="space-y-6">
|
|
348
|
+
{/* Header with verdict */}
|
|
349
|
+
<div className="flex items-center justify-between">
|
|
350
|
+
<div className="flex items-center gap-3">
|
|
351
|
+
<VerdictBadge verdict={report.verdict} />
|
|
352
|
+
<ConfidenceBadge confidence={report.confidence} />
|
|
353
|
+
</div>
|
|
354
|
+
</div>
|
|
355
|
+
|
|
356
|
+
{/* Config Changes Summary */}
|
|
357
|
+
<div className="bg-muted/30 rounded-lg p-3">
|
|
358
|
+
<div className="text-xs font-medium text-muted-foreground mb-2">
|
|
359
|
+
Changes Applied
|
|
360
|
+
</div>
|
|
361
|
+
<div className="flex flex-wrap gap-2">
|
|
362
|
+
{configSummary.model_change && (
|
|
363
|
+
<span className="text-xs px-2 py-1 bg-purple-100 dark:bg-purple-900 text-purple-700 dark:text-purple-300 rounded">
|
|
364
|
+
Model: {configSummary.model_change.from} →{" "}
|
|
365
|
+
{configSummary.model_change.to}
|
|
366
|
+
</span>
|
|
367
|
+
)}
|
|
368
|
+
{configSummary.system_prompt_changed && (
|
|
369
|
+
<span className="text-xs px-2 py-1 bg-blue-100 dark:bg-blue-900 text-blue-700 dark:text-blue-300 rounded">
|
|
370
|
+
System Prompt Modified
|
|
371
|
+
</span>
|
|
372
|
+
)}
|
|
373
|
+
{configSummary.tools_added.length > 0 && (
|
|
374
|
+
<span className="text-xs px-2 py-1 bg-green-100 dark:bg-green-900 text-green-700 dark:text-green-300 rounded">
|
|
375
|
+
+{configSummary.tools_added.length} tools
|
|
376
|
+
</span>
|
|
377
|
+
)}
|
|
378
|
+
{configSummary.tools_removed.length > 0 && (
|
|
379
|
+
<span className="text-xs px-2 py-1 bg-red-100 dark:bg-red-900 text-red-700 dark:text-red-300 rounded">
|
|
380
|
+
-{configSummary.tools_removed.length} tools
|
|
381
|
+
</span>
|
|
382
|
+
)}
|
|
383
|
+
</div>
|
|
384
|
+
</div>
|
|
385
|
+
|
|
386
|
+
{/* Hypothesis Assessment */}
|
|
387
|
+
<Section title="Hypothesis Assessment">
|
|
388
|
+
<p className="text-sm text-foreground">
|
|
389
|
+
{report.hypothesis_assessment}
|
|
390
|
+
</p>
|
|
391
|
+
</Section>
|
|
392
|
+
|
|
393
|
+
{/* Summary */}
|
|
394
|
+
<Section title="Summary">
|
|
395
|
+
<p className="text-sm text-foreground">{report.summary}</p>
|
|
396
|
+
</Section>
|
|
397
|
+
|
|
398
|
+
{/* Intended Effects */}
|
|
399
|
+
{report.intended_effects.length > 0 && (
|
|
400
|
+
<Section title="Intended Effects">
|
|
401
|
+
<div className="space-y-3">
|
|
402
|
+
{report.intended_effects.map((effect) => (
|
|
403
|
+
<div
|
|
404
|
+
key={`effect-${effect.expected_change}`}
|
|
405
|
+
className="border rounded-lg p-3 bg-muted/30"
|
|
406
|
+
>
|
|
407
|
+
<div className="flex items-center justify-between mb-2">
|
|
408
|
+
<span className="text-sm font-medium">
|
|
409
|
+
{effect.expected_change}
|
|
410
|
+
</span>
|
|
411
|
+
<AssessmentBadge assessment={effect.assessment} />
|
|
412
|
+
</div>
|
|
413
|
+
<p className="text-sm mb-2">{effect.observed_outcome}</p>
|
|
414
|
+
<div className="text-xs text-muted-foreground bg-muted/50 rounded p-2 font-mono">
|
|
415
|
+
{effect.evidence}
|
|
416
|
+
</div>
|
|
417
|
+
</div>
|
|
418
|
+
))}
|
|
419
|
+
</div>
|
|
420
|
+
</Section>
|
|
421
|
+
)}
|
|
422
|
+
|
|
423
|
+
{/* Unintended Effects */}
|
|
424
|
+
{report.unintended_effects.length > 0 && (
|
|
425
|
+
<Section title="Unintended Effects">
|
|
426
|
+
<div className="space-y-3">
|
|
427
|
+
{report.unintended_effects.map((effect) => (
|
|
428
|
+
<div
|
|
429
|
+
key={`unintended-${effect.observation}`}
|
|
430
|
+
className={`border rounded-lg p-3 ${
|
|
431
|
+
effect.impact === "NEGATIVE"
|
|
432
|
+
? "border-red-200 dark:border-red-800 bg-red-50 dark:bg-red-950"
|
|
433
|
+
: effect.impact === "POSITIVE"
|
|
434
|
+
? "border-green-200 dark:border-green-800 bg-green-50 dark:bg-green-950"
|
|
435
|
+
: "bg-muted/30"
|
|
436
|
+
}`}
|
|
437
|
+
>
|
|
438
|
+
<div className="flex items-center justify-between mb-2">
|
|
439
|
+
<span className="text-xs font-medium px-2 py-0.5 bg-secondary rounded">
|
|
440
|
+
{effect.impact}
|
|
441
|
+
</span>
|
|
442
|
+
<SignificanceBadge significance={effect.severity} />
|
|
443
|
+
</div>
|
|
444
|
+
<p className="text-sm mb-2">{effect.observation}</p>
|
|
445
|
+
<div className="text-xs text-muted-foreground bg-muted/50 rounded p-2 font-mono">
|
|
446
|
+
{effect.evidence}
|
|
447
|
+
</div>
|
|
448
|
+
</div>
|
|
449
|
+
))}
|
|
450
|
+
</div>
|
|
451
|
+
</Section>
|
|
452
|
+
)}
|
|
453
|
+
|
|
454
|
+
{/* Tool Usage Changes */}
|
|
455
|
+
{report.tool_usage_changes.length > 0 && (
|
|
456
|
+
<Section title="Tool Usage Changes">
|
|
457
|
+
<div className="overflow-x-auto">
|
|
458
|
+
<table className="w-full text-sm">
|
|
459
|
+
<thead>
|
|
460
|
+
<tr className="border-b">
|
|
461
|
+
<th className="text-left py-2 font-medium">Tool</th>
|
|
462
|
+
<th className="text-center py-2 font-medium">Control</th>
|
|
463
|
+
<th className="text-center py-2 font-medium">Variant</th>
|
|
464
|
+
<th className="text-left py-2 font-medium">Change</th>
|
|
465
|
+
</tr>
|
|
466
|
+
</thead>
|
|
467
|
+
<tbody>
|
|
468
|
+
{report.tool_usage_changes.map((change) => (
|
|
469
|
+
<tr key={change.tool_name} className="border-b border-muted">
|
|
470
|
+
<td className="py-2 font-mono text-xs">
|
|
471
|
+
{change.tool_name}
|
|
472
|
+
</td>
|
|
473
|
+
<td className="py-2 text-center">{change.control_calls}</td>
|
|
474
|
+
<td className="py-2 text-center">{change.variant_calls}</td>
|
|
475
|
+
<td className="py-2 text-xs text-muted-foreground">
|
|
476
|
+
{change.pattern_change}
|
|
477
|
+
</td>
|
|
478
|
+
</tr>
|
|
479
|
+
))}
|
|
480
|
+
</tbody>
|
|
481
|
+
</table>
|
|
482
|
+
</div>
|
|
483
|
+
</Section>
|
|
484
|
+
)}
|
|
485
|
+
|
|
486
|
+
{/* Metrics Comparison */}
|
|
487
|
+
<Section title="Metrics (Control vs Variant)">
|
|
488
|
+
<div className="grid grid-cols-4 gap-4 mb-3">
|
|
489
|
+
<MetricDelta
|
|
490
|
+
value={report.metric_comparison.duration_delta_pct}
|
|
491
|
+
label="Duration"
|
|
492
|
+
/>
|
|
493
|
+
<MetricDelta
|
|
494
|
+
value={report.metric_comparison.token_delta_pct}
|
|
495
|
+
label="Tokens"
|
|
496
|
+
/>
|
|
497
|
+
<MetricDelta
|
|
498
|
+
value={report.metric_comparison.cost_delta_pct}
|
|
499
|
+
label="Cost"
|
|
500
|
+
/>
|
|
501
|
+
<div className="text-center">
|
|
502
|
+
<div className="text-lg font-semibold">
|
|
503
|
+
{report.metric_comparison.tool_call_delta > 0 ? "+" : ""}
|
|
504
|
+
{report.metric_comparison.tool_call_delta}
|
|
505
|
+
</div>
|
|
506
|
+
<div className="text-xs text-muted-foreground">Tool Calls</div>
|
|
507
|
+
</div>
|
|
508
|
+
</div>
|
|
509
|
+
<p className="text-sm text-muted-foreground italic">
|
|
510
|
+
{report.metric_comparison.interpretation}
|
|
511
|
+
</p>
|
|
512
|
+
</Section>
|
|
513
|
+
|
|
514
|
+
{/* Recommendations */}
|
|
515
|
+
{report.recommendations.length > 0 && (
|
|
516
|
+
<Section title="Recommendations">
|
|
517
|
+
<div className="space-y-2">
|
|
518
|
+
{report.recommendations.map((rec) => (
|
|
519
|
+
<div key={`rec-${rec.action}`} className="border rounded-lg p-3">
|
|
520
|
+
<div className="flex items-start gap-2 mb-2">
|
|
521
|
+
<PriorityBadge priority={rec.priority} />
|
|
522
|
+
<span className="text-sm font-medium">{rec.action}</span>
|
|
523
|
+
</div>
|
|
524
|
+
<p className="text-xs text-muted-foreground mb-1">
|
|
525
|
+
{rec.rationale}
|
|
526
|
+
</p>
|
|
527
|
+
{rec.expected_impact && (
|
|
528
|
+
<p className="text-xs text-green-600 dark:text-green-400">
|
|
529
|
+
Expected: {rec.expected_impact}
|
|
530
|
+
</p>
|
|
531
|
+
)}
|
|
532
|
+
</div>
|
|
533
|
+
))}
|
|
534
|
+
</div>
|
|
535
|
+
</Section>
|
|
536
|
+
)}
|
|
537
|
+
</div>
|
|
538
|
+
);
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
function ExperimentsTab({
|
|
542
|
+
experiments,
|
|
543
|
+
}: {
|
|
544
|
+
experiments: SessionComparisonAnalysis["next_experiments"];
|
|
545
|
+
}) {
|
|
546
|
+
if (experiments.length === 0) {
|
|
547
|
+
return (
|
|
548
|
+
<div className="text-center py-8 text-muted-foreground">
|
|
549
|
+
No suggested experiments
|
|
550
|
+
</div>
|
|
551
|
+
);
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
return (
|
|
555
|
+
<div className="space-y-4">
|
|
556
|
+
<p className="text-sm text-muted-foreground">
|
|
557
|
+
Based on the analysis, here are suggested next experiments to try:
|
|
558
|
+
</p>
|
|
559
|
+
{experiments.map((exp, idx) => (
|
|
560
|
+
<div key={`exp-${exp.hypothesis}`} className="border rounded-lg p-4">
|
|
561
|
+
<div className="flex items-center gap-2 mb-3">
|
|
562
|
+
<span className="text-lg font-semibold text-primary">
|
|
563
|
+
#{idx + 1}
|
|
564
|
+
</span>
|
|
565
|
+
<span className="text-xs px-2 py-0.5 bg-secondary rounded">
|
|
566
|
+
{exp.suggested_change.dimension.replace(/_/g, " ")}
|
|
567
|
+
</span>
|
|
568
|
+
</div>
|
|
569
|
+
<div className="space-y-3">
|
|
570
|
+
<Field label="Hypothesis" value={exp.hypothesis} />
|
|
571
|
+
<Field
|
|
572
|
+
label="Suggested Change"
|
|
573
|
+
value={exp.suggested_change.description}
|
|
574
|
+
/>
|
|
575
|
+
{exp.suggested_change.example && (
|
|
576
|
+
<div className="space-y-1">
|
|
577
|
+
<div className="text-xs font-medium text-muted-foreground">
|
|
578
|
+
Example
|
|
579
|
+
</div>
|
|
580
|
+
<pre className="text-xs bg-muted rounded p-2 whitespace-pre-wrap font-mono">
|
|
581
|
+
{exp.suggested_change.example}
|
|
582
|
+
</pre>
|
|
583
|
+
</div>
|
|
584
|
+
)}
|
|
585
|
+
<Field label="Expected Outcome" value={exp.expected_outcome} />
|
|
586
|
+
</div>
|
|
587
|
+
</div>
|
|
588
|
+
))}
|
|
589
|
+
</div>
|
|
590
|
+
);
|
|
591
|
+
}
|
|
@@ -47,9 +47,18 @@ function LogRow({ log }: { log: Log }) {
|
|
|
47
47
|
|
|
48
48
|
return (
|
|
49
49
|
<div>
|
|
50
|
+
{/* biome-ignore lint/a11y/useSemanticElements: log row with expandable content */}
|
|
50
51
|
<div
|
|
52
|
+
role="button"
|
|
53
|
+
tabIndex={hasDetails ? 0 : undefined}
|
|
51
54
|
className={`flex items-start gap-2 py-1.5 px-2 hover:bg-muted rounded ${hasDetails ? "cursor-pointer" : ""}`}
|
|
52
55
|
onClick={() => hasDetails && setExpanded(!expanded)}
|
|
56
|
+
onKeyDown={(e) => {
|
|
57
|
+
if (hasDetails && (e.key === "Enter" || e.key === " ")) {
|
|
58
|
+
e.preventDefault();
|
|
59
|
+
setExpanded(!expanded);
|
|
60
|
+
}
|
|
61
|
+
}}
|
|
53
62
|
>
|
|
54
63
|
<span
|
|
55
64
|
className={`${getSeverityColor(log.severity_number)} font-medium text-xs w-12 shrink-0`}
|
|
@@ -89,13 +89,22 @@ export function SessionTraceList({
|
|
|
89
89
|
{traces.map((trace) => {
|
|
90
90
|
const isSelected = trace.trace_id === selectedTraceId;
|
|
91
91
|
return (
|
|
92
|
+
// biome-ignore lint/a11y/useSemanticElements: trace item with complex children
|
|
92
93
|
<div
|
|
93
94
|
key={trace.trace_id}
|
|
95
|
+
role="button"
|
|
96
|
+
tabIndex={0}
|
|
94
97
|
className={cn(
|
|
95
98
|
"space-y-2 cursor-pointer p-3 rounded-lg transition-all",
|
|
96
99
|
isSelected && "bg-blue-500/10 border-2 border-blue-500/30",
|
|
97
100
|
)}
|
|
98
101
|
onClick={() => onSelectTrace(trace.trace_id)}
|
|
102
|
+
onKeyDown={(e) => {
|
|
103
|
+
if (e.key === "Enter" || e.key === " ") {
|
|
104
|
+
e.preventDefault();
|
|
105
|
+
onSelectTrace(trace.trace_id);
|
|
106
|
+
}
|
|
107
|
+
}}
|
|
99
108
|
>
|
|
100
109
|
{/* User message - left aligned */}
|
|
101
110
|
<div className="flex justify-start">
|