@braintrust/pi-extension 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,409 @@
1
+ import { beforeEach, describe, expect, it, vi } from "vitest";
2
+
3
+ const mockState = vi.hoisted(() => ({
4
+ startSpans: [] as Array<Record<string, unknown>>,
5
+ logSpans: [] as Array<Record<string, unknown>>,
6
+ endSpans: [] as Array<Record<string, unknown>>,
7
+ updateSpans: [] as Array<Record<string, unknown>>,
8
+ statuses: [] as Array<{ key: string; text: string | undefined }>,
9
+ widgets: [] as Array<{ key: string; content: string[] | undefined }>,
10
+ initializeCalls: 0,
11
+ flushCalls: 0,
12
+ config: {
13
+ enabled: true,
14
+ apiKey: "test-key",
15
+ apiUrl: undefined,
16
+ appUrl: "https://www.braintrust.dev",
17
+ orgName: undefined,
18
+ projectName: "pi",
19
+ debug: false,
20
+ logFile: undefined,
21
+ stateDir: "/tmp/braintrust-pi-extension-test",
22
+ additionalMetadata: {},
23
+ parentSpanId: undefined,
24
+ rootSpanId: undefined,
25
+ configIssues: [] as Array<{ path: string; message: string; severity: "error" | "warning" }>,
26
+ },
27
+ }));
28
+
29
+ vi.mock("./client.ts", () => {
30
+ class MockBraintrustClient {
31
+ async initialize(): Promise<void> {
32
+ mockState.initializeCalls += 1;
33
+ }
34
+
35
+ startSpan(args: Record<string, unknown>): Record<string, unknown> {
36
+ mockState.startSpans.push(args);
37
+ return {
38
+ id: `record-${String(args.spanId)}`,
39
+ spanId: args.spanId,
40
+ rootSpanId: args.rootSpanId,
41
+ };
42
+ }
43
+
44
+ logSpan(span: Record<string, unknown> | undefined, event: Record<string, unknown>): void {
45
+ mockState.logSpans.push({ span, event });
46
+ }
47
+
48
+ endSpan(span: Record<string, unknown> | undefined, endedAt?: number): void {
49
+ mockState.endSpans.push({ span, endedAt });
50
+ }
51
+
52
+ getSpanLink(span: Record<string, unknown> | undefined): string | undefined {
53
+ if (!span) return undefined;
54
+ return "https://www.braintrust.dev/app/test-org/p/pi/logs?oid=trace-row-1";
55
+ }
56
+
57
+ async getSpanPermalink(span: Record<string, unknown> | undefined): Promise<string | undefined> {
58
+ return this.getSpanLink(span);
59
+ }
60
+
61
+ updateSpan(args: Record<string, unknown>): void {
62
+ mockState.updateSpans.push(args);
63
+ }
64
+
65
+ async flush(): Promise<void> {
66
+ mockState.flushCalls += 1;
67
+ }
68
+ }
69
+
70
+ return {
71
+ BraintrustClient: MockBraintrustClient,
72
+ };
73
+ });
74
+
75
+ vi.mock("./config.ts", () => ({
76
+ loadConfig: () => ({ ...mockState.config }),
77
+ createLogger: () => ({
78
+ filePath: "/tmp/braintrust-pi-extension-test.log",
79
+ debug: () => {},
80
+ info: () => {},
81
+ warn: () => {},
82
+ error: () => {},
83
+ flush: async () => {},
84
+ }),
85
+ }));
86
+
87
+ vi.mock("./state.ts", () => ({
88
+ createStateStore: () => ({
89
+ get: () => undefined,
90
+ set: () => undefined,
91
+ patch: () => undefined,
92
+ delete: () => undefined,
93
+ schedulePersist: () => undefined,
94
+ flush: async () => {},
95
+ }),
96
+ }));
97
+
98
+ beforeEach(() => {
99
+ mockState.startSpans.length = 0;
100
+ mockState.logSpans.length = 0;
101
+ mockState.endSpans.length = 0;
102
+ mockState.updateSpans.length = 0;
103
+ mockState.statuses.length = 0;
104
+ mockState.widgets.length = 0;
105
+ mockState.initializeCalls = 0;
106
+ mockState.flushCalls = 0;
107
+ mockState.config = {
108
+ enabled: true,
109
+ apiKey: "test-key",
110
+ apiUrl: undefined,
111
+ appUrl: "https://www.braintrust.dev",
112
+ orgName: undefined,
113
+ projectName: "pi",
114
+ debug: false,
115
+ logFile: undefined,
116
+ stateDir: "/tmp/braintrust-pi-extension-test",
117
+ additionalMetadata: {},
118
+ parentSpanId: undefined,
119
+ rootSpanId: undefined,
120
+ configIssues: [],
121
+ };
122
+ vi.resetModules();
123
+ });
124
+
125
+ async function createHarness() {
126
+ const handlers = new Map<string, (...args: unknown[]) => unknown>();
127
+ const { default: braintrustPiExtension } = await import("./index.ts");
128
+
129
+ braintrustPiExtension({
130
+ on(eventName: string, handler: (...args: unknown[]) => unknown) {
131
+ handlers.set(eventName, handler);
132
+ },
133
+ } as never);
134
+
135
+ const ctx = {
136
+ cwd: "/tmp/workspace",
137
+ hasUI: true,
138
+ model: "anthropic/claude-sonnet-4",
139
+ ui: {
140
+ theme: {
141
+ fg: (_color: string, text: string) => text,
142
+ underline: (text: string) => text,
143
+ },
144
+ setStatus: (key: string, text: string | undefined) => {
145
+ mockState.statuses.push({ key, text });
146
+ },
147
+ setWidget: (key: string, content: string[] | undefined, _options?: unknown) => {
148
+ mockState.widgets.push({ key, content });
149
+ },
150
+ },
151
+ sessionManager: {
152
+ getSessionFile: () => "/tmp/session.json",
153
+ getSessionId: () => "session-1",
154
+ },
155
+ };
156
+
157
+ async function emit(eventName: string, event: Record<string, unknown> = {}): Promise<void> {
158
+ const handler = handlers.get(eventName);
159
+ if (!handler) throw new Error(`No handler registered for ${eventName}`);
160
+ await handler(event, ctx);
161
+ }
162
+
163
+ return { emit };
164
+ }
165
+
166
+ describe("braintrustPiExtension", () => {
167
+ it("shows a trace url only after the session produces a turn", async () => {
168
+ const { emit } = await createHarness();
169
+
170
+ await emit("session_start");
171
+
172
+ expect(mockState.statuses[0]).toEqual({
173
+ key: "braintrust-tracing",
174
+ text: "Braintrust tracing pi",
175
+ });
176
+ expect(mockState.startSpans).toEqual([]);
177
+ expect(mockState.widgets.at(-1)).toEqual({
178
+ key: "braintrust-trace-link",
179
+ content: undefined,
180
+ });
181
+
182
+ await emit("before_agent_start", {
183
+ prompt: "Inspect the package",
184
+ images: [],
185
+ });
186
+
187
+ expect(mockState.widgets.at(-1)?.key).toBe("braintrust-trace-link");
188
+ expect(mockState.widgets.at(-1)?.content?.[0]).toContain("Braintrust trace ↗");
189
+ expect(mockState.widgets.at(-1)?.content?.[1]).toBe(
190
+ "braintrust.dev/app/test-org/p/pi/logs?oid=trace-row-1",
191
+ );
192
+
193
+ await emit("session_shutdown");
194
+
195
+ expect(mockState.statuses.at(-1)).toEqual({
196
+ key: "braintrust-tracing",
197
+ text: undefined,
198
+ });
199
+ expect(mockState.widgets.at(-1)).toEqual({
200
+ key: "braintrust-trace-link",
201
+ content: undefined,
202
+ });
203
+ });
204
+
205
+ it("surfaces malformed Braintrust config in the UI", async () => {
206
+ mockState.config.configIssues = [
207
+ {
208
+ path: "/Users/test/.pi/agent/braintrust.json",
209
+ message: "Expected double-quoted property name in JSON at position 42",
210
+ severity: "error",
211
+ },
212
+ ];
213
+
214
+ const { emit } = await createHarness();
215
+
216
+ await emit("session_start");
217
+
218
+ expect(mockState.statuses[0]).toEqual({
219
+ key: "braintrust-tracing",
220
+ text: "Braintrust tracing pi (config warning)",
221
+ });
222
+ expect(mockState.widgets.at(-1)?.key).toBe("braintrust-trace-link");
223
+ expect(mockState.widgets.at(-1)?.content).toContain("Braintrust config error");
224
+ expect(mockState.widgets.at(-1)?.content?.[1]).toContain(".pi/agent/braintrust.json");
225
+ });
226
+
227
+ it("does not create a root span for an idle session", async () => {
228
+ const { emit } = await createHarness();
229
+
230
+ await emit("session_start");
231
+ await emit("session_shutdown");
232
+
233
+ expect(mockState.startSpans).toEqual([]);
234
+ expect(mockState.endSpans).toEqual([]);
235
+ expect(mockState.updateSpans).toEqual([]);
236
+ });
237
+
238
+ it("parents tool spans under the llm span that emitted the matching tool call", async () => {
239
+ const { emit } = await createHarness();
240
+
241
+ await emit("session_start");
242
+ await emit("before_agent_start", {
243
+ prompt: "Inspect the package",
244
+ images: [],
245
+ });
246
+ await emit("message_end", {
247
+ message: {
248
+ role: "assistant",
249
+ provider: "anthropic",
250
+ model: "claude-sonnet-4",
251
+ timestamp: 1_700_000_000_000,
252
+ content: [
253
+ { type: "text", text: "I'll inspect package.json." },
254
+ {
255
+ type: "toolCall",
256
+ id: "tool-1",
257
+ name: "read",
258
+ arguments: { path: "package.json" },
259
+ },
260
+ ],
261
+ },
262
+ });
263
+ await emit("tool_execution_start", {
264
+ toolCallId: "tool-1",
265
+ toolName: "read",
266
+ args: { path: "package.json" },
267
+ });
268
+ await emit("tool_execution_end", {
269
+ toolCallId: "tool-1",
270
+ toolName: "read",
271
+ isError: false,
272
+ result: {
273
+ content: [{ type: "text", text: '{"name":"@braintrust/pi-extension"}' }],
274
+ },
275
+ });
276
+
277
+ const llmSpan = mockState.startSpans.find((span) => span.type === "llm");
278
+ const toolSpan = mockState.startSpans.find((span) => span.type === "tool");
279
+
280
+ expect(llmSpan).toBeDefined();
281
+ expect(toolSpan).toBeDefined();
282
+ expect(toolSpan?.parentSpanId).toBe(llmSpan?.spanId);
283
+ expect(toolSpan?.metadata).toMatchObject({
284
+ tool_name: "read",
285
+ tool_call_id: "tool-1",
286
+ parent_llm_span_id: llmSpan?.spanId,
287
+ });
288
+ });
289
+
290
+ it("preserves fork metadata when the root span is created lazily", async () => {
291
+ const { emit } = await createHarness();
292
+
293
+ await emit("session_start", {
294
+ reason: "fork",
295
+ previousSessionFile: "/tmp/parent-session.json",
296
+ });
297
+ await emit("before_agent_start", {
298
+ prompt: "Continue from the fork",
299
+ images: [],
300
+ });
301
+
302
+ expect(mockState.startSpans[0]).toMatchObject({
303
+ type: "task",
304
+ metadata: {
305
+ opened_via: "session_fork",
306
+ parent_session_file: "/tmp/parent-session.json",
307
+ },
308
+ });
309
+ });
310
+
311
+ it("adds the git repo slug to root span metadata when available", async () => {
312
+ const { default: braintrustPiExtension } = await import("./index.ts");
313
+ const handlers = new Map<string, (...args: unknown[]) => unknown>();
314
+
315
+ braintrustPiExtension({
316
+ on(eventName: string, handler: (...args: unknown[]) => unknown) {
317
+ handlers.set(eventName, handler);
318
+ },
319
+ } as never);
320
+
321
+ const ctx = {
322
+ cwd: process.cwd(),
323
+ hasUI: true,
324
+ model: "anthropic/claude-sonnet-4",
325
+ ui: {
326
+ theme: {
327
+ fg: (_color: string, text: string) => text,
328
+ underline: (text: string) => text,
329
+ },
330
+ setStatus: (key: string, text: string | undefined) => {
331
+ mockState.statuses.push({ key, text });
332
+ },
333
+ setWidget: (key: string, content: string[] | undefined, _options?: unknown) => {
334
+ mockState.widgets.push({ key, content });
335
+ },
336
+ },
337
+ sessionManager: {
338
+ getSessionFile: () => "/tmp/session.json",
339
+ getSessionId: () => "session-1",
340
+ },
341
+ };
342
+
343
+ const emit = async (eventName: string, event: Record<string, unknown> = {}): Promise<void> => {
344
+ const handler = handlers.get(eventName);
345
+ if (!handler) throw new Error(`No handler registered for ${eventName}`);
346
+ await handler(event, ctx);
347
+ };
348
+
349
+ await emit("before_agent_start", {
350
+ prompt: "Inspect the package",
351
+ images: [],
352
+ });
353
+
354
+ expect(mockState.startSpans[0]).toMatchObject({
355
+ type: "task",
356
+ metadata: {
357
+ repo: "braintrustdata/braintrust-pi-extension",
358
+ },
359
+ });
360
+ });
361
+
362
+ it("falls back to the turn span when no matching tool call was emitted by the llm", async () => {
363
+ const { emit } = await createHarness();
364
+
365
+ await emit("session_start");
366
+ await emit("before_agent_start", {
367
+ prompt: "Run the test suite",
368
+ images: [],
369
+ });
370
+ await emit("message_end", {
371
+ message: {
372
+ role: "assistant",
373
+ provider: "anthropic",
374
+ model: "claude-sonnet-4",
375
+ timestamp: 1_700_000_000_001,
376
+ content: [{ type: "text", text: "Running the test suite." }],
377
+ },
378
+ });
379
+ await emit("tool_execution_start", {
380
+ toolCallId: "tool-missing",
381
+ toolName: "bash",
382
+ args: { command: "npm test" },
383
+ });
384
+ await emit("tool_execution_end", {
385
+ toolCallId: "tool-missing",
386
+ toolName: "bash",
387
+ isError: false,
388
+ result: {
389
+ content: [{ type: "text", text: "tests passed" }],
390
+ },
391
+ });
392
+
393
+ const turnSpan = mockState.startSpans.find(
394
+ (span) => span.type === "task" && span.name === "Turn 1",
395
+ );
396
+ const toolSpan = mockState.startSpans.find((span) => span.type === "tool");
397
+
398
+ expect(turnSpan).toBeDefined();
399
+ expect(toolSpan).toBeDefined();
400
+ expect(toolSpan?.parentSpanId).toBe(turnSpan?.spanId);
401
+ expect(toolSpan?.metadata).toMatchObject({
402
+ tool_name: "bash",
403
+ tool_call_id: "tool-missing",
404
+ });
405
+ expect((toolSpan?.metadata as Record<string, unknown> | undefined)?.parent_llm_span_id).toBe(
406
+ undefined,
407
+ );
408
+ });
409
+ });