@townco/debugger 0.1.28 → 0.1.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -112,6 +112,14 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
112
112
  });
113
113
  }, [runId]);
114
114
 
115
+ const generateRequestId = (prefix: string, sessionId?: string) => {
116
+ const randomPart =
117
+ typeof crypto !== "undefined" && "randomUUID" in crypto
118
+ ? crypto.randomUUID()
119
+ : `${Math.random().toString(16).slice(2)}-${Date.now().toString(16)}`;
120
+ return `${prefix}-${sessionId ? `${sessionId}-` : ""}${randomPart}`;
121
+ };
122
+
115
123
  // Create a new session with the agent server
116
124
  const createSession = async (
117
125
  configOverrides?: Record<string, unknown>,
@@ -121,7 +129,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
121
129
  headers: { "Content-Type": "application/json" },
122
130
  body: JSON.stringify({
123
131
  jsonrpc: "2.0",
124
- id: `init-${Date.now()}`,
132
+ id: generateRequestId("init"),
125
133
  method: "initialize",
126
134
  params: {
127
135
  protocolVersion: 1,
@@ -136,7 +144,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
136
144
  headers: { "Content-Type": "application/json" },
137
145
  body: JSON.stringify({
138
146
  jsonrpc: "2.0",
139
- id: `session-${Date.now()}`,
147
+ id: generateRequestId("session"),
140
148
  method: "session/new",
141
149
  params: {
142
150
  cwd: "/",
@@ -154,7 +162,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
154
162
  sessionId: string,
155
163
  message: string,
156
164
  onUpdate: (content: string) => void,
157
- ): Promise<void> => {
165
+ ): Promise<string> => {
158
166
  let accumulatedContent = "";
159
167
  let abortController: AbortController | null = new AbortController();
160
168
 
@@ -228,7 +236,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
228
236
  headers: { "Content-Type": "application/json" },
229
237
  body: JSON.stringify({
230
238
  jsonrpc: "2.0",
231
- id: `prompt-${Date.now()}`,
239
+ id: generateRequestId("prompt", sessionId),
232
240
  method: "session/prompt",
233
241
  params: {
234
242
  sessionId,
@@ -243,6 +251,9 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
243
251
  // Abort the SSE connection since we're done
244
252
  abortController.abort();
245
253
  abortController = null;
254
+
255
+ // Return the accumulated content
256
+ return accumulatedContent;
246
257
  };
247
258
 
248
259
  // Run the comparison
@@ -271,15 +282,16 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
271
282
  });
272
283
 
273
284
  try {
274
- // Build config overrides based on dimension
285
+ // Build config overrides based on all selected dimensions
275
286
  const variantOverrides: Record<string, unknown> = {};
276
- if (config.dimension === "model" && config.variantModel) {
287
+ const dimensions = config.dimensions || [];
288
+ if (dimensions.includes("model") && config.variantModel) {
277
289
  variantOverrides.model = config.variantModel;
278
290
  }
279
- if (config.dimension === "system_prompt" && config.variantSystemPrompt) {
291
+ if (dimensions.includes("system_prompt") && config.variantSystemPrompt) {
280
292
  variantOverrides.systemPrompt = config.variantSystemPrompt;
281
293
  }
282
- if (config.dimension === "tools" && config.variantTools) {
294
+ if (dimensions.includes("tools") && config.variantTools) {
283
295
  variantOverrides.tools = config.variantTools;
284
296
  }
285
297
 
@@ -307,123 +319,157 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
307
319
  const startTime = Date.now();
308
320
 
309
321
  // Track final responses and metrics
310
- let finalControlResponse = "";
311
- let finalVariantResponse = "";
312
- let finalControlMetrics: SessionMetrics | null = null;
313
- let finalVariantMetrics: SessionMetrics | null = null;
314
-
315
- await Promise.all([
316
- // Control session
317
- sendMessageAndCollect(controlSessionId, firstMessage, (content) => {
318
- finalControlResponse = content;
319
- setControlState((prev) => ({
320
- ...prev,
321
- messages: [
322
- { role: "user", content: firstMessage },
323
- { role: "assistant", content },
324
- ],
325
- }));
326
- })
327
- .then(async () => {
328
- const duration = Date.now() - startTime;
329
- // Wait for telemetry data to be written to the database
330
- await new Promise((r) => setTimeout(r, 2000));
331
- // Fetch metrics - use control model for cost calculation
332
- const controlModel =
333
- config.controlModel || "claude-sonnet-4-5-20250929";
334
- try {
335
- const metricsRes = await fetch(
336
- `/api/session-metrics/${controlSessionId}?model=${encodeURIComponent(controlModel)}`,
337
- );
338
- const metrics = await metricsRes.json();
339
- metrics.durationMs = duration;
340
- finalControlMetrics = metrics;
341
- setControlState((prev) => ({
342
- ...prev,
343
- isStreaming: false,
344
- metrics,
345
- }));
346
- } catch {
347
- finalControlMetrics = {
322
+ let finalControlMetrics: SessionMetrics = {
323
+ durationMs: 0,
324
+ inputTokens: 0,
325
+ outputTokens: 0,
326
+ totalTokens: 0,
327
+ estimatedCost: 0,
328
+ toolCallCount: 0,
329
+ };
330
+ let finalVariantMetrics: SessionMetrics = {
331
+ durationMs: 0,
332
+ inputTokens: 0,
333
+ outputTokens: 0,
334
+ totalTokens: 0,
335
+ estimatedCost: 0,
336
+ toolCallCount: 0,
337
+ };
338
+
339
+ // Helper to run a session and fetch metrics
340
+ const runSession = async (
341
+ sessionId: string,
342
+ model: string,
343
+ setState: typeof setControlState,
344
+ onContentUpdate: (content: string) => void,
345
+ ): Promise<{ response: string; metrics: SessionMetrics }> => {
346
+ try {
347
+ const response = await sendMessageAndCollect(
348
+ sessionId,
349
+ firstMessage,
350
+ onContentUpdate,
351
+ );
352
+
353
+ const duration = Date.now() - startTime;
354
+
355
+ // Poll metrics until they stabilize or we hit a max wait window.
356
+ const fetchMetricsWithRetry = async (): Promise<SessionMetrics> => {
357
+ const maxWaitMs = 60_000;
358
+ const pollIntervalMs = 2_000;
359
+ let elapsed = 0;
360
+ let previousTokens = -1;
361
+ let previousTools = -1;
362
+ let lastMetrics: SessionMetrics | null = null;
363
+
364
+ while (elapsed <= maxWaitMs) {
365
+ try {
366
+ const metricsRes = await fetch(
367
+ `/api/session-metrics/${sessionId}?model=${encodeURIComponent(model)}`,
368
+ );
369
+ const metrics = await metricsRes.json();
370
+ lastMetrics = { ...metrics, durationMs: duration };
371
+
372
+ // If tokens/tool calls stopped changing and we have data, treat as final.
373
+ if (
374
+ metrics.totalTokens > 0 &&
375
+ metrics.totalTokens === previousTokens &&
376
+ metrics.toolCallCount === previousTools
377
+ ) {
378
+ return lastMetrics!;
379
+ }
380
+
381
+ previousTokens = metrics.totalTokens ?? 0;
382
+ previousTools = metrics.toolCallCount ?? 0;
383
+ } catch {
384
+ // swallow and retry
385
+ }
386
+
387
+ await new Promise((r) => setTimeout(r, pollIntervalMs));
388
+ elapsed += pollIntervalMs;
389
+ }
390
+
391
+ // Return whatever we last saw (or zeros if nothing ever arrived)
392
+ return (
393
+ lastMetrics ?? {
348
394
  durationMs: duration,
349
395
  inputTokens: 0,
350
396
  outputTokens: 0,
351
397
  totalTokens: 0,
352
398
  estimatedCost: 0,
353
399
  toolCallCount: 0,
354
- };
355
- setControlState((prev) => ({
356
- ...prev,
357
- isStreaming: false,
358
- metrics: finalControlMetrics,
359
- }));
360
- }
361
- })
362
- .catch((err) => {
363
- setControlState((prev) => ({
364
- ...prev,
365
- isStreaming: false,
366
- error: err.message,
367
- }));
368
- }),
400
+ }
401
+ );
402
+ };
403
+
404
+ const metrics = await fetchMetricsWithRetry();
369
405
 
370
- // Variant session
371
- sendMessageAndCollect(variantSessionId, firstMessage, (content) => {
372
- finalVariantResponse = content;
373
- setVariantState((prev) => ({
406
+ setState((prev) => ({
374
407
  ...prev,
375
- messages: [
376
- { role: "user", content: firstMessage },
377
- { role: "assistant", content },
378
- ],
408
+ isStreaming: false,
409
+ metrics,
379
410
  }));
380
- })
381
- .then(async () => {
382
- const duration = Date.now() - startTime;
383
- // Wait for telemetry data to be written to the database
384
- await new Promise((r) => setTimeout(r, 2000));
385
- // Fetch metrics - use variant model for cost calculation
386
- const variantModel =
387
- config.variantModel ||
388
- config.controlModel ||
389
- "claude-sonnet-4-5-20250929";
390
- try {
391
- const metricsRes = await fetch(
392
- `/api/session-metrics/${variantSessionId}?model=${encodeURIComponent(variantModel)}`,
393
- );
394
- const metrics = await metricsRes.json();
395
- metrics.durationMs = duration;
396
- finalVariantMetrics = metrics;
397
- setVariantState((prev) => ({
398
- ...prev,
399
- isStreaming: false,
400
- metrics,
401
- }));
402
- } catch {
403
- finalVariantMetrics = {
404
- durationMs: duration,
405
- inputTokens: 0,
406
- outputTokens: 0,
407
- totalTokens: 0,
408
- estimatedCost: 0,
409
- toolCallCount: 0,
410
- };
411
- setVariantState((prev) => ({
412
- ...prev,
413
- isStreaming: false,
414
- metrics: finalVariantMetrics,
415
- }));
416
- }
417
- })
418
- .catch((err) => {
411
+
412
+ return { response, metrics };
413
+ } catch (err) {
414
+ setState((prev) => ({
415
+ ...prev,
416
+ isStreaming: false,
417
+ error: err instanceof Error ? err.message : "Unknown error",
418
+ }));
419
+ return {
420
+ response: "",
421
+ metrics: {
422
+ durationMs: 0,
423
+ inputTokens: 0,
424
+ outputTokens: 0,
425
+ totalTokens: 0,
426
+ estimatedCost: 0,
427
+ toolCallCount: 0,
428
+ },
429
+ };
430
+ }
431
+ };
432
+
433
+ const controlModel = config.controlModel || "claude-sonnet-4-5-20250929";
434
+ const variantModel =
435
+ config.variantModel ||
436
+ config.controlModel ||
437
+ "claude-sonnet-4-5-20250929";
438
+
439
+ const [controlResult, variantResult] = await Promise.all([
440
+ runSession(
441
+ controlSessionId,
442
+ controlModel,
443
+ setControlState,
444
+ (content) => {
445
+ setControlState((prev) => ({
446
+ ...prev,
447
+ messages: [
448
+ { role: "user", content: firstMessage },
449
+ { role: "assistant", content },
450
+ ],
451
+ }));
452
+ },
453
+ ),
454
+ runSession(
455
+ variantSessionId,
456
+ variantModel,
457
+ setVariantState,
458
+ (content) => {
419
459
  setVariantState((prev) => ({
420
460
  ...prev,
421
- isStreaming: false,
422
- error: err.message,
461
+ messages: [
462
+ { role: "user", content: firstMessage },
463
+ { role: "assistant", content },
464
+ ],
423
465
  }));
424
- }),
466
+ },
467
+ ),
425
468
  ]);
426
469
 
470
+ finalControlMetrics = controlResult.metrics;
471
+ finalVariantMetrics = variantResult.metrics;
472
+
427
473
  // Update run status with responses and metrics
428
474
  await fetch(`/api/comparison-run/${runId}/update`, {
429
475
  method: "POST",
@@ -432,8 +478,8 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
432
478
  status: "completed",
433
479
  controlMetrics: finalControlMetrics,
434
480
  variantMetrics: finalVariantMetrics,
435
- controlResponse: finalControlResponse,
436
- variantResponse: finalVariantResponse,
481
+ controlResponse: controlResult.response,
482
+ variantResponse: variantResult.response,
437
483
  }),
438
484
  });
439
485
  } catch (err) {
@@ -464,31 +510,97 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
464
510
  }
465
511
 
466
512
  const getControlDimensionLabel = () => {
467
- if (!config) return "";
468
- switch (config.dimension) {
469
- case "model":
470
- return `Model: ${config.controlModel || "unknown"}`;
471
- case "system_prompt":
472
- return "System Prompt (original)";
473
- case "tools":
474
- return "Tools (original)";
475
- default:
476
- return "";
513
+ if (!config || !config.dimensions || config.dimensions.length === 0)
514
+ return "";
515
+ const labels: string[] = [];
516
+ for (const dim of config.dimensions) {
517
+ switch (dim) {
518
+ case "model":
519
+ labels.push(`Model: ${config.controlModel || "original"}`);
520
+ break;
521
+ case "system_prompt":
522
+ labels.push("System Prompt: original");
523
+ break;
524
+ case "tools":
525
+ labels.push("Tools: original");
526
+ break;
527
+ }
477
528
  }
529
+ return labels.join(" | ");
478
530
  };
479
531
 
480
532
  const getDimensionLabel = () => {
481
- if (!config) return "";
482
- switch (config.dimension) {
483
- case "model":
484
- return `Model: ${config.variantModel}`;
485
- case "system_prompt":
486
- return "System Prompt (modified)";
487
- case "tools":
488
- return `Tools: ${config.variantTools?.join(", ")}`;
489
- default:
490
- return "";
533
+ if (!config || !config.dimensions || config.dimensions.length === 0)
534
+ return "";
535
+ const labels: string[] = [];
536
+ for (const dim of config.dimensions) {
537
+ switch (dim) {
538
+ case "model":
539
+ labels.push(`Model: ${config.variantModel}`);
540
+ break;
541
+ case "system_prompt":
542
+ labels.push("System Prompt: modified");
543
+ break;
544
+ case "tools":
545
+ labels.push(`Tools: ${config.variantTools?.join(", ")}`);
546
+ break;
547
+ }
491
548
  }
549
+ return labels.join(" | ");
550
+ };
551
+
552
+ const getDimensionsSummary = () => {
553
+ if (!config || !config.dimensions || config.dimensions.length === 0)
554
+ return "";
555
+ return config.dimensions.map((d) => d.replace("_", " ")).join(", ");
556
+ };
557
+
558
+ const formatToolTime = (ns?: number) => {
559
+ if (!ns) return "";
560
+ return new Date(ns / 1_000_000).toLocaleTimeString();
561
+ };
562
+
563
+ const renderToolCalls = (toolCalls?: SessionMetrics["toolCalls"]) => {
564
+ if (!toolCalls || toolCalls.length === 0) {
565
+ return <div className="text-xs text-muted-foreground">No tool calls</div>;
566
+ }
567
+
568
+ return (
569
+ <div className="space-y-2">
570
+ {toolCalls.map((call, idx) => (
571
+ <details
572
+ key={`${call.name}-${call.startTimeUnixNano ?? idx}`}
573
+ className="rounded-md border px-3 py-2 bg-muted/50"
574
+ >
575
+ <summary className="text-xs font-medium cursor-pointer flex items-center justify-between">
576
+ <span>
577
+ {call.name}{" "}
578
+ {call.startTimeUnixNano ? (
579
+ <span className="text-muted-foreground">
580
+ @ {formatToolTime(call.startTimeUnixNano)}
581
+ </span>
582
+ ) : null}
583
+ </span>
584
+ <span className="text-muted-foreground text-[11px]">view</span>
585
+ </summary>
586
+ <div className="mt-2 text-[11px] space-y-1 break-words">
587
+ <div>
588
+ <span className="font-semibold">Args:</span>{" "}
589
+ <code className="break-words">
590
+ {JSON.stringify(call.input, null, 2)}
591
+ </code>
592
+ </div>
593
+ <div>
594
+ <span className="font-semibold">Result:</span>{" "}
595
+ <code className="break-words">
596
+ {JSON.stringify(call.output, null, 2)}
597
+ </code>
598
+ </div>
599
+ </div>
600
+ </details>
601
+ ))}
602
+ </div>
603
+ );
492
604
  };
493
605
 
494
606
  return (
@@ -499,8 +611,7 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
499
611
  <div>
500
612
  <h2 className="text-lg font-semibold">A/B Comparison</h2>
501
613
  <p className="text-sm text-muted-foreground">
502
- Comparing: {config?.dimension?.replace("_", " ")} -{" "}
503
- {getDimensionLabel()}
614
+ Comparing: {getDimensionsSummary()}
504
615
  </p>
505
616
  </div>
506
617
  {!hasRun && (
@@ -613,6 +724,12 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
613
724
  {controlState.metrics.toolCallCount}
614
725
  </div>
615
726
  </div>
727
+ <div className="mt-3">
728
+ <div className="text-[11px] font-semibold mb-1">
729
+ Tool calls
730
+ </div>
731
+ {renderToolCalls(controlState.metrics.toolCalls)}
732
+ </div>
616
733
  </div>
617
734
  )}
618
735
  </Card>
@@ -674,6 +791,12 @@ export function ComparisonView({ runId }: ComparisonViewProps) {
674
791
  {variantState.metrics.toolCallCount}
675
792
  </div>
676
793
  </div>
794
+ <div className="mt-3">
795
+ <div className="text-[11px] font-semibold mb-1">
796
+ Tool calls
797
+ </div>
798
+ {renderToolCalls(variantState.metrics.toolCalls)}
799
+ </div>
677
800
  </div>
678
801
  )}
679
802
  </Card>