@oh-my-pi/pi-ai 6.9.0 → 6.9.69

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oh-my-pi/pi-ai",
3
- "version": "6.9.0",
3
+ "version": "6.9.69",
4
4
  "description": "Unified LLM API with automatic model discovery and provider configuration",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -17,7 +17,7 @@
17
17
  "test": "bun test"
18
18
  },
19
19
  "dependencies": {
20
- "@oh-my-pi/pi-utils": "6.9.0",
20
+ "@oh-my-pi/pi-utils": "6.9.69",
21
21
  "@anthropic-ai/sdk": "0.71.2",
22
22
  "@aws-sdk/client-bedrock-runtime": "^3.968.0",
23
23
  "@bufbuild/protobuf": "^2.10.2",
@@ -67,6 +67,9 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
67
67
  const stream = new AssistantMessageEventStream();
68
68
 
69
69
  (async () => {
70
+ const startTime = Date.now();
71
+ let firstTokenTime: number | undefined;
72
+
70
73
  const output: AssistantMessage = {
71
74
  role: "assistant",
72
75
  content: [],
@@ -113,8 +116,10 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
113
116
  }
114
117
  stream.push({ type: "start", partial: output });
115
118
  } else if (item.contentBlockStart) {
119
+ if (!firstTokenTime) firstTokenTime = Date.now();
116
120
  handleContentBlockStart(item.contentBlockStart, blocks, output, stream);
117
121
  } else if (item.contentBlockDelta) {
122
+ if (!firstTokenTime) firstTokenTime = Date.now();
118
123
  handleContentBlockDelta(item.contentBlockDelta, blocks, output, stream);
119
124
  } else if (item.contentBlockStop) {
120
125
  handleContentBlockStop(item.contentBlockStop, blocks, output, stream);
@@ -143,6 +148,8 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
143
148
  throw new Error("An unknown error occurred");
144
149
  }
145
150
 
151
+ output.duration = Date.now() - startTime;
152
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
146
153
  stream.push({ type: "done", reason: output.stopReason, message: output });
147
154
  stream.end();
148
155
  } catch (error) {
@@ -152,6 +159,8 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
152
159
  }
153
160
  output.stopReason = options.signal?.aborted ? "aborted" : "error";
154
161
  output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
162
+ output.duration = Date.now() - startTime;
163
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
155
164
  stream.push({ type: "error", reason: output.stopReason, error: output });
156
165
  stream.end();
157
166
  }
@@ -140,6 +140,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
140
140
  const stream = new AssistantMessageEventStream();
141
141
 
142
142
  (async () => {
143
+ const startTime = Date.now();
144
+ let firstTokenTime: number | undefined;
145
+
143
146
  const output: AssistantMessage = {
144
147
  role: "assistant",
145
148
  content: [],
@@ -183,6 +186,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
183
186
  output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
184
187
  calculateCost(model, output.usage);
185
188
  } else if (event.type === "content_block_start") {
189
+ if (!firstTokenTime) firstTokenTime = Date.now();
186
190
  if (event.content_block.type === "text") {
187
191
  const block: Block = {
188
192
  type: "text",
@@ -321,12 +325,16 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
321
325
  throw new Error("An unkown error ocurred");
322
326
  }
323
327
 
328
+ output.duration = Date.now() - startTime;
329
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
324
330
  stream.push({ type: "done", reason: output.stopReason, message: output });
325
331
  stream.end();
326
332
  } catch (error) {
327
333
  for (const block of output.content) delete (block as any).index;
328
334
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
329
335
  output.errorMessage = formatErrorMessageWithRetryAfter(error);
336
+ output.duration = Date.now() - startTime;
337
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
330
338
  stream.push({ type: "error", reason: output.stopReason, error: output });
331
339
  stream.end();
332
340
  }
@@ -294,6 +294,9 @@ export const streamCursor: StreamFunction<"cursor-agent"> = (
294
294
  const stream = new AssistantMessageEventStream();
295
295
 
296
296
  (async () => {
297
+ const startTime = Date.now();
298
+ let firstTokenTime: number | undefined;
299
+
297
300
  const output: AssistantMessage = {
298
301
  role: "assistant",
299
302
  content: [],
@@ -369,6 +372,9 @@ export const streamCursor: StreamFunction<"cursor-agent"> = (
369
372
  get currentToolCall() {
370
373
  return currentToolCall;
371
374
  },
375
+ get firstTokenTime() {
376
+ return firstTokenTime;
377
+ },
372
378
  setTextBlock: (b) => {
373
379
  currentTextBlock = b;
374
380
  },
@@ -378,6 +384,9 @@ export const streamCursor: StreamFunction<"cursor-agent"> = (
378
384
  setToolCall: (t) => {
379
385
  currentToolCall = t;
380
386
  },
387
+ setFirstTokenTime: () => {
388
+ if (!firstTokenTime) firstTokenTime = Date.now();
389
+ },
381
390
  };
382
391
 
383
392
  const onConversationCheckpoint = (checkpoint: ConversationStateStructure) => {
@@ -502,6 +511,8 @@ export const streamCursor: StreamFunction<"cursor-agent"> = (
502
511
 
503
512
  calculateCost(model, output.usage);
504
513
 
514
+ output.duration = Date.now() - startTime;
515
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
505
516
  stream.push({
506
517
  type: "done",
507
518
  reason: output.stopReason as "stop" | "length" | "toolUse",
@@ -511,6 +522,8 @@ export const streamCursor: StreamFunction<"cursor-agent"> = (
511
522
  } catch (error) {
512
523
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
513
524
  output.errorMessage = formatErrorMessageWithRetryAfter(error);
525
+ output.duration = Date.now() - startTime;
526
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
514
527
  stream.push({ type: "error", reason: output.stopReason, error: output });
515
528
  stream.end();
516
529
  } finally {
@@ -532,9 +545,11 @@ interface BlockState {
532
545
  currentTextBlock: (TextContent & { index: number }) | null;
533
546
  currentThinkingBlock: (ThinkingContent & { index: number }) | null;
534
547
  currentToolCall: ToolCallState | null;
548
+ firstTokenTime: number | undefined;
535
549
  setTextBlock: (b: (TextContent & { index: number }) | null) => void;
536
550
  setThinkingBlock: (b: (ThinkingContent & { index: number }) | null) => void;
537
551
  setToolCall: (t: ToolCallState | null) => void;
552
+ setFirstTokenTime: () => void;
538
553
  }
539
554
 
540
555
  interface UsageState {
@@ -1645,6 +1660,7 @@ function processInteractionUpdate(
1645
1660
  log("interactionUpdate", updateCase, update.message?.value);
1646
1661
 
1647
1662
  if (updateCase === "textDelta") {
1663
+ state.setFirstTokenTime();
1648
1664
  const delta = update.message.value.text || "";
1649
1665
  if (!state.currentTextBlock) {
1650
1666
  const block: TextContent & { index: number } = {
@@ -1660,6 +1676,7 @@ function processInteractionUpdate(
1660
1676
  const idx = output.content.indexOf(state.currentTextBlock!);
1661
1677
  stream.push({ type: "text_delta", contentIndex: idx, delta, partial: output });
1662
1678
  } else if (updateCase === "thinkingDelta") {
1679
+ state.setFirstTokenTime();
1663
1680
  const delta = update.message.value.text || "";
1664
1681
  if (!state.currentThinkingBlock) {
1665
1682
  const block: ThinkingContent & { index: number } = {
@@ -365,6 +365,9 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
365
365
  const stream = new AssistantMessageEventStream();
366
366
 
367
367
  (async () => {
368
+ const startTime = Date.now();
369
+ let firstTokenTime: number | undefined;
370
+
368
371
  const output: AssistantMessage = {
369
372
  role: "assistant",
370
373
  content: [],
@@ -489,6 +492,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
489
492
  let started = false;
490
493
  const ensureStarted = () => {
491
494
  if (!started) {
495
+ if (!firstTokenTime) firstTokenTime = Date.now();
492
496
  stream.push({ type: "start", partial: output });
493
497
  started = true;
494
498
  }
@@ -802,6 +806,8 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
802
806
  throw new Error("An unknown error occurred");
803
807
  }
804
808
 
809
+ output.duration = Date.now() - startTime;
810
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
805
811
  stream.push({ type: "done", reason: output.stopReason, message: output });
806
812
  stream.end();
807
813
  } catch (error) {
@@ -812,6 +818,8 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
812
818
  }
813
819
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
814
820
  output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
821
+ output.duration = Date.now() - startTime;
822
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
815
823
  stream.push({ type: "error", reason: output.stopReason, error: output });
816
824
  stream.end();
817
825
  }
@@ -62,6 +62,9 @@ export const streamGoogleVertex: StreamFunction<"google-vertex"> = (
62
62
  const stream = new AssistantMessageEventStream();
63
63
 
64
64
  (async () => {
65
+ const startTime = Date.now();
66
+ let firstTokenTime: number | undefined;
67
+
65
68
  const output: AssistantMessage = {
66
69
  role: "assistant",
67
70
  content: [],
@@ -97,6 +100,7 @@ export const streamGoogleVertex: StreamFunction<"google-vertex"> = (
97
100
  if (candidate?.content?.parts) {
98
101
  for (const part of candidate.content.parts) {
99
102
  if (part.text !== undefined) {
103
+ if (!firstTokenTime) firstTokenTime = Date.now();
100
104
  const isThinking = isThinkingPart(part);
101
105
  if (
102
106
  !currentBlock ||
@@ -258,6 +262,8 @@ export const streamGoogleVertex: StreamFunction<"google-vertex"> = (
258
262
  throw new Error("An unknown error occurred");
259
263
  }
260
264
 
265
+ output.duration = Date.now() - startTime;
266
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
261
267
  stream.push({ type: "done", reason: output.stopReason, message: output });
262
268
  stream.end();
263
269
  } catch (error) {
@@ -269,6 +275,8 @@ export const streamGoogleVertex: StreamFunction<"google-vertex"> = (
269
275
  }
270
276
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
271
277
  output.errorMessage = formatErrorMessageWithRetryAfter(error);
278
+ output.duration = Date.now() - startTime;
279
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
272
280
  stream.push({ type: "error", reason: output.stopReason, error: output });
273
281
  stream.end();
274
282
  }
@@ -53,6 +53,9 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
53
53
  const stream = new AssistantMessageEventStream();
54
54
 
55
55
  (async () => {
56
+ const startTime = Date.now();
57
+ let firstTokenTime: number | undefined;
58
+
56
59
  const output: AssistantMessage = {
57
60
  role: "assistant",
58
61
  content: [],
@@ -88,6 +91,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
88
91
  for (const part of candidate.content.parts) {
89
92
  if (part.text !== undefined) {
90
93
  const isThinking = isThinkingPart(part);
94
+ if (!firstTokenTime) firstTokenTime = Date.now();
91
95
  if (
92
96
  !currentBlock ||
93
97
  (isThinking && currentBlock.type !== "thinking") ||
@@ -245,6 +249,8 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
245
249
  throw new Error("An unkown error ocurred");
246
250
  }
247
251
 
252
+ output.duration = Date.now() - startTime;
253
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
248
254
  stream.push({ type: "done", reason: output.stopReason, message: output });
249
255
  stream.end();
250
256
  } catch (error) {
@@ -256,6 +262,8 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
256
262
  }
257
263
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
258
264
  output.errorMessage = formatErrorMessageWithRetryAfter(error);
265
+ output.duration = Date.now() - startTime;
266
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
259
267
  stream.push({ type: "error", reason: output.stopReason, error: output });
260
268
  stream.end();
261
269
  }
@@ -105,6 +105,9 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
105
105
  const stream = new AssistantMessageEventStream();
106
106
 
107
107
  (async () => {
108
+ const startTime = Date.now();
109
+ let firstTokenTime: number | undefined;
110
+
108
111
  const output: AssistantMessage = {
109
112
  role: "assistant",
110
113
  content: [],
@@ -225,6 +228,7 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
225
228
  if (!eventType) continue;
226
229
 
227
230
  if (eventType === "response.output_item.added") {
231
+ if (!firstTokenTime) firstTokenTime = Date.now();
228
232
  const item = rawEvent.item as ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall;
229
233
  if (item.type === "reasoning") {
230
234
  currentItem = item;
@@ -412,12 +416,16 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
412
416
  throw new Error("Codex response failed");
413
417
  }
414
418
 
419
+ output.duration = Date.now() - startTime;
420
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
415
421
  stream.push({ type: "done", reason: output.stopReason, message: output });
416
422
  stream.end();
417
423
  } catch (error) {
418
424
  for (const block of output.content) delete (block as { index?: number }).index;
419
425
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
420
426
  output.errorMessage = formatErrorMessageWithRetryAfter(error);
427
+ output.duration = Date.now() - startTime;
428
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
421
429
  stream.push({ type: "error", reason: output.stopReason, error: output });
422
430
  stream.end();
423
431
  }
@@ -81,6 +81,9 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
81
81
  const stream = new AssistantMessageEventStream();
82
82
 
83
83
  (async () => {
84
+ const startTime = Date.now();
85
+ let firstTokenTime: number | undefined;
86
+
84
87
  const output: AssistantMessage = {
85
88
  role: "assistant",
86
89
  content: [],
@@ -178,6 +181,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
178
181
  choice.delta.content !== undefined &&
179
182
  choice.delta.content.length > 0
180
183
  ) {
184
+ if (!firstTokenTime) firstTokenTime = Date.now();
181
185
  if (!currentBlock || currentBlock.type !== "text") {
182
186
  finishCurrentBlock(currentBlock);
183
187
  currentBlock = { type: "text", text: "" };
@@ -303,6 +307,8 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
303
307
  throw new Error("An unkown error ocurred");
304
308
  }
305
309
 
310
+ output.duration = Date.now() - startTime;
311
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
306
312
  stream.push({ type: "done", reason: output.stopReason, message: output });
307
313
  stream.end();
308
314
  } catch (error) {
@@ -312,6 +318,8 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
312
318
  // Some providers via OpenRouter include extra details here.
313
319
  const rawMetadata = (error as { error?: { metadata?: { raw?: string } } })?.error?.metadata?.raw;
314
320
  if (rawMetadata) output.errorMessage += `\n${rawMetadata}`;
321
+ output.duration = Date.now() - startTime;
322
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
315
323
  stream.push({ type: "error", reason: output.stopReason, error: output });
316
324
  stream.end();
317
325
  }
@@ -69,6 +69,9 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
69
69
 
70
70
  // Start async processing
71
71
  (async () => {
72
+ const startTime = Date.now();
73
+ let firstTokenTime: number | undefined;
74
+
72
75
  const output: AssistantMessage = {
73
76
  role: "assistant",
74
77
  content: [],
@@ -107,6 +110,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
107
110
  for await (const event of openaiStream) {
108
111
  // Handle output item start
109
112
  if (event.type === "response.output_item.added") {
113
+ if (!firstTokenTime) firstTokenTime = Date.now();
110
114
  const item = event.item;
111
115
  if (item.type === "reasoning") {
112
116
  currentItem = item;
@@ -309,12 +313,16 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
309
313
  throw new Error("An unkown error ocurred");
310
314
  }
311
315
 
316
+ output.duration = Date.now() - startTime;
317
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
312
318
  stream.push({ type: "done", reason: output.stopReason, message: output });
313
319
  stream.end();
314
320
  } catch (error) {
315
321
  for (const block of output.content) delete (block as any).index;
316
322
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
317
323
  output.errorMessage = formatErrorMessageWithRetryAfter(error);
324
+ output.duration = Date.now() - startTime;
325
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
318
326
  stream.push({ type: "error", reason: output.stopReason, error: output });
319
327
  stream.end();
320
328
  }
package/src/types.ts CHANGED
@@ -193,6 +193,8 @@ export interface AssistantMessage {
193
193
  stopReason: StopReason;
194
194
  errorMessage?: string;
195
195
  timestamp: number; // Unix timestamp in milliseconds
196
+ duration?: number; // Request duration in milliseconds
197
+ ttft?: number; // Time to first token in milliseconds
196
198
  }
197
199
 
198
200
  export interface ToolResultMessage<TDetails = any, TInput = unknown> {