@posthog/ai 7.7.0 → 7.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -30,7 +30,7 @@ function _interopNamespace(e) {
30
30
  var uuid__namespace = /*#__PURE__*/_interopNamespace(uuid);
31
31
  var AnthropicOriginal__default = /*#__PURE__*/_interopDefault(AnthropicOriginal);
32
32
 
33
- var version = "7.7.0";
33
+ var version = "7.8.0";
34
34
 
35
35
  // Type guards for safer type checking
36
36
  const isString = value => {
@@ -746,6 +746,7 @@ const sendEventToPosthog = async ({
746
746
  input,
747
747
  output,
748
748
  latency,
749
+ timeToFirstToken,
749
750
  baseURL,
750
751
  params,
751
752
  httpStatus = 200,
@@ -812,6 +813,9 @@ const sendEventToPosthog = async ({
812
813
  } : {}),
813
814
  ...additionalTokenValues,
814
815
  $ai_latency: latency,
816
+ ...(timeToFirstToken !== undefined ? {
817
+ $ai_time_to_first_token: timeToFirstToken
818
+ } : {}),
815
819
  $ai_trace_id: traceId,
816
820
  $ai_base_url: baseURL,
817
821
  ...params.posthogProperties,
@@ -883,6 +887,14 @@ function formatOpenAIResponsesInput(input, instructions) {
883
887
  return messages;
884
888
  }
885
889
 
890
+ /**
891
+ * Checks if a ResponseStreamEvent chunk represents the first token/content from the model.
892
+ * This includes various content types like text, reasoning, audio, and refusals.
893
+ */
894
+ function isResponseTokenChunk(chunk) {
895
+ return chunk.type === 'response.output_item.added' || chunk.type === 'response.content_part.added' || chunk.type === 'response.output_text.delta' || chunk.type === 'response.reasoning_text.delta' || chunk.type === 'response.reasoning_summary_text.delta' || chunk.type === 'response.audio.delta' || chunk.type === 'response.audio.transcript.delta' || chunk.type === 'response.refusal.delta';
896
+ }
897
+
886
898
  const Chat = openai.OpenAI.Chat;
887
899
  const Completions = Chat.Completions;
888
900
  const Responses = openai.OpenAI.Responses;
@@ -932,6 +944,7 @@ let WrappedCompletions$1 = class WrappedCompletions extends Completions {
932
944
  const contentBlocks = [];
933
945
  let accumulatedContent = '';
934
946
  let modelFromResponse;
947
+ let firstTokenTime;
935
948
  let usage = {
936
949
  inputTokens: 0,
937
950
  outputTokens: 0,
@@ -953,11 +966,17 @@ let WrappedCompletions$1 = class WrappedCompletions extends Completions {
953
966
  // Handle text content
954
967
  const deltaContent = choice?.delta?.content;
955
968
  if (deltaContent) {
969
+ if (firstTokenTime === undefined) {
970
+ firstTokenTime = Date.now();
971
+ }
956
972
  accumulatedContent += deltaContent;
957
973
  }
958
974
  // Handle tool calls
959
975
  const deltaToolCalls = choice?.delta?.tool_calls;
960
976
  if (deltaToolCalls && Array.isArray(deltaToolCalls)) {
977
+ if (firstTokenTime === undefined) {
978
+ firstTokenTime = Date.now();
979
+ }
961
980
  for (const toolCall of deltaToolCalls) {
962
981
  const index = toolCall.index;
963
982
  if (index !== undefined) {
@@ -1029,6 +1048,7 @@ let WrappedCompletions$1 = class WrappedCompletions extends Completions {
1029
1048
  }]
1030
1049
  }];
1031
1050
  const latency = (Date.now() - startTime) / 1000;
1051
+ const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
1032
1052
  const availableTools = extractAvailableToolCalls('openai', openAIParams);
1033
1053
  await sendEventToPosthog({
1034
1054
  client: this.phClient,
@@ -1038,6 +1058,7 @@ let WrappedCompletions$1 = class WrappedCompletions extends Completions {
1038
1058
  input: sanitizeOpenAI(openAIParams.messages),
1039
1059
  output: formattedOutput,
1040
1060
  latency,
1061
+ timeToFirstToken,
1041
1062
  baseURL: this.baseURL,
1042
1063
  params: body,
1043
1064
  httpStatus: 200,
@@ -1152,6 +1173,7 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
1152
1173
  try {
1153
1174
  let finalContent = [];
1154
1175
  let modelFromResponse;
1176
+ let firstTokenTime;
1155
1177
  let usage = {
1156
1178
  inputTokens: 0,
1157
1179
  outputTokens: 0,
@@ -1159,6 +1181,10 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
1159
1181
  };
1160
1182
  let rawUsageData;
1161
1183
  for await (const chunk of stream1) {
1184
+ // Track first token time on content delta events
1185
+ if (firstTokenTime === undefined && isResponseTokenChunk(chunk)) {
1186
+ firstTokenTime = Date.now();
1187
+ }
1162
1188
  if ('response' in chunk && chunk.response) {
1163
1189
  // Extract model from response object in chunk (for stored prompts)
1164
1190
  if (!modelFromResponse && chunk.response.model) {
@@ -1184,6 +1210,7 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
1184
1210
  }
1185
1211
  }
1186
1212
  const latency = (Date.now() - startTime) / 1000;
1213
+ const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
1187
1214
  const availableTools = extractAvailableToolCalls('openai', openAIParams);
1188
1215
  await sendEventToPosthog({
1189
1216
  client: this.phClient,
@@ -1193,6 +1220,7 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
1193
1220
  input: formatOpenAIResponsesInput(sanitizeOpenAIResponse(openAIParams.input), openAIParams.instructions),
1194
1221
  output: finalContent,
1195
1222
  latency,
1223
+ timeToFirstToken,
1196
1224
  baseURL: this.baseURL,
1197
1225
  params: body,
1198
1226
  httpStatus: 200,
@@ -1431,12 +1459,17 @@ class WrappedTranscriptions extends Transcriptions {
1431
1459
  (async () => {
1432
1460
  try {
1433
1461
  let finalContent = '';
1462
+ let firstTokenTime;
1434
1463
  let usage = {
1435
1464
  inputTokens: 0,
1436
1465
  outputTokens: 0
1437
1466
  };
1438
1467
  const doneEvent = 'transcript.text.done';
1439
1468
  for await (const chunk of stream1) {
1469
+ // Track first token on text delta events
1470
+ if (firstTokenTime === undefined && chunk.type === 'transcript.text.delta') {
1471
+ firstTokenTime = Date.now();
1472
+ }
1440
1473
  if (chunk.type === doneEvent && 'text' in chunk && chunk.text && chunk.text.length > 0) {
1441
1474
  finalContent = chunk.text;
1442
1475
  }
@@ -1449,6 +1482,7 @@ class WrappedTranscriptions extends Transcriptions {
1449
1482
  }
1450
1483
  }
1451
1484
  const latency = (Date.now() - startTime) / 1000;
1485
+ const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
1452
1486
  const availableTools = extractAvailableToolCalls('openai', openAIParams);
1453
1487
  await sendEventToPosthog({
1454
1488
  client: this.phClient,
@@ -1458,6 +1492,7 @@ class WrappedTranscriptions extends Transcriptions {
1458
1492
  input: openAIParams.prompt,
1459
1493
  output: finalContent,
1460
1494
  latency,
1495
+ timeToFirstToken,
1461
1496
  baseURL: this.baseURL,
1462
1497
  params: body,
1463
1498
  httpStatus: 200,
@@ -1576,6 +1611,7 @@ class WrappedCompletions extends openai.AzureOpenAI.Chat.Completions {
1576
1611
  const contentBlocks = [];
1577
1612
  let accumulatedContent = '';
1578
1613
  let modelFromResponse;
1614
+ let firstTokenTime;
1579
1615
  let usage = {
1580
1616
  inputTokens: 0,
1581
1617
  outputTokens: 0
@@ -1591,11 +1627,17 @@ class WrappedCompletions extends openai.AzureOpenAI.Chat.Completions {
1591
1627
  // Handle text content
1592
1628
  const deltaContent = choice?.delta?.content;
1593
1629
  if (deltaContent) {
1630
+ if (firstTokenTime === undefined) {
1631
+ firstTokenTime = Date.now();
1632
+ }
1594
1633
  accumulatedContent += deltaContent;
1595
1634
  }
1596
1635
  // Handle tool calls
1597
1636
  const deltaToolCalls = choice?.delta?.tool_calls;
1598
1637
  if (deltaToolCalls && Array.isArray(deltaToolCalls)) {
1638
+ if (firstTokenTime === undefined) {
1639
+ firstTokenTime = Date.now();
1640
+ }
1599
1641
  for (const toolCall of deltaToolCalls) {
1600
1642
  const index = toolCall.index;
1601
1643
  if (index !== undefined) {
@@ -1665,6 +1707,7 @@ class WrappedCompletions extends openai.AzureOpenAI.Chat.Completions {
1665
1707
  }]
1666
1708
  }];
1667
1709
  const latency = (Date.now() - startTime) / 1000;
1710
+ const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
1668
1711
  await sendEventToPosthog({
1669
1712
  client: this.phClient,
1670
1713
  ...posthogParams,
@@ -1673,6 +1716,7 @@ class WrappedCompletions extends openai.AzureOpenAI.Chat.Completions {
1673
1716
  input: sanitizeOpenAI(openAIParams.messages),
1674
1717
  output: formattedOutput,
1675
1718
  latency,
1719
+ timeToFirstToken,
1676
1720
  baseURL: this.baseURL,
1677
1721
  params: body,
1678
1722
  httpStatus: 200,
@@ -1774,11 +1818,16 @@ class WrappedResponses extends openai.AzureOpenAI.Responses {
1774
1818
  try {
1775
1819
  let finalContent = [];
1776
1820
  let modelFromResponse;
1821
+ let firstTokenTime;
1777
1822
  let usage = {
1778
1823
  inputTokens: 0,
1779
1824
  outputTokens: 0
1780
1825
  };
1781
1826
  for await (const chunk of stream1) {
1827
+ // Track first token time on content delta events
1828
+ if (firstTokenTime === undefined && isResponseTokenChunk(chunk)) {
1829
+ firstTokenTime = Date.now();
1830
+ }
1782
1831
  if ('response' in chunk && chunk.response) {
1783
1832
  // Extract model from response if not in params (for stored prompts)
1784
1833
  if (!modelFromResponse && chunk.response.model) {
@@ -1798,6 +1847,7 @@ class WrappedResponses extends openai.AzureOpenAI.Responses {
1798
1847
  }
1799
1848
  }
1800
1849
  const latency = (Date.now() - startTime) / 1000;
1850
+ const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
1801
1851
  await sendEventToPosthog({
1802
1852
  client: this.phClient,
1803
1853
  ...posthogParams,
@@ -1806,6 +1856,7 @@ class WrappedResponses extends openai.AzureOpenAI.Responses {
1806
1856
  input: formatOpenAIResponsesInput(openAIParams.input, openAIParams.instructions),
1807
1857
  output: finalContent,
1808
1858
  latency,
1859
+ timeToFirstToken,
1809
1860
  baseURL: this.baseURL,
1810
1861
  params: body,
1811
1862
  httpStatus: 200,
@@ -2375,6 +2426,7 @@ const wrapVercelLanguageModel = (model, phClient, options) => {
2375
2426
  doStream: {
2376
2427
  value: async params => {
2377
2428
  const startTime = Date.now();
2429
+ let firstTokenTime;
2378
2430
  let generatedText = '';
2379
2431
  let reasoningText = '';
2380
2432
  let usage = {};
@@ -2398,13 +2450,22 @@ const wrapVercelLanguageModel = (model, phClient, options) => {
2398
2450
  transform(chunk, controller) {
2399
2451
  // Handle streaming patterns - compatible with both V2 and V3
2400
2452
  if (chunk.type === 'text-delta') {
2453
+ if (firstTokenTime === undefined) {
2454
+ firstTokenTime = Date.now();
2455
+ }
2401
2456
  generatedText += chunk.delta;
2402
2457
  }
2403
2458
  if (chunk.type === 'reasoning-delta') {
2459
+ if (firstTokenTime === undefined) {
2460
+ firstTokenTime = Date.now();
2461
+ }
2404
2462
  reasoningText += chunk.delta;
2405
2463
  }
2406
2464
  // Handle tool call chunks
2407
2465
  if (chunk.type === 'tool-input-start') {
2466
+ if (firstTokenTime === undefined) {
2467
+ firstTokenTime = Date.now();
2468
+ }
2408
2469
  // Initialize a new tool call
2409
2470
  toolCallsInProgress.set(chunk.id, {
2410
2471
  toolCallId: chunk.id,
@@ -2423,6 +2484,9 @@ const wrapVercelLanguageModel = (model, phClient, options) => {
2423
2484
  // Tool call is complete, keep it in the map for final processing
2424
2485
  }
2425
2486
  if (chunk.type === 'tool-call') {
2487
+ if (firstTokenTime === undefined) {
2488
+ firstTokenTime = Date.now();
2489
+ }
2426
2490
  // Direct tool call chunk (complete tool call)
2427
2491
  toolCallsInProgress.set(chunk.toolCallId, {
2428
2492
  toolCallId: chunk.toolCallId,
@@ -2446,6 +2510,7 @@ const wrapVercelLanguageModel = (model, phClient, options) => {
2446
2510
  },
2447
2511
  flush: async () => {
2448
2512
  const latency = (Date.now() - startTime) / 1000;
2513
+ const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
2449
2514
  // Build content array similar to mapVercelOutput structure
2450
2515
  const content = [];
2451
2516
  if (reasoningText) {
@@ -2498,6 +2563,7 @@ const wrapVercelLanguageModel = (model, phClient, options) => {
2498
2563
  input: mergedOptions.posthogPrivacyMode ? '' : mapVercelPrompt(params.prompt),
2499
2564
  output: output,
2500
2565
  latency,
2566
+ timeToFirstToken,
2501
2567
  baseURL,
2502
2568
  params: mergedParams,
2503
2569
  httpStatus: 200,
@@ -2572,6 +2638,7 @@ class WrappedMessages extends AnthropicOriginal__default.default.Messages {
2572
2638
  const contentBlocks = [];
2573
2639
  const toolsInProgress = new Map();
2574
2640
  let currentTextBlock = null;
2641
+ let firstTokenTime;
2575
2642
  const usage = {
2576
2643
  inputTokens: 0,
2577
2644
  outputTokens: 0,
@@ -2594,6 +2661,9 @@ class WrappedMessages extends AnthropicOriginal__default.default.Messages {
2594
2661
  };
2595
2662
  contentBlocks.push(currentTextBlock);
2596
2663
  } else if (chunk.content_block?.type === 'tool_use') {
2664
+ if (firstTokenTime === undefined) {
2665
+ firstTokenTime = Date.now();
2666
+ }
2597
2667
  const toolBlock = {
2598
2668
  type: 'function',
2599
2669
  id: chunk.content_block.id,
@@ -2614,6 +2684,9 @@ class WrappedMessages extends AnthropicOriginal__default.default.Messages {
2614
2684
  if ('delta' in chunk) {
2615
2685
  if ('text' in chunk.delta) {
2616
2686
  const delta = chunk.delta.text;
2687
+ if (firstTokenTime === undefined) {
2688
+ firstTokenTime = Date.now();
2689
+ }
2617
2690
  accumulatedContent += delta;
2618
2691
  if (currentTextBlock) {
2619
2692
  currentTextBlock.text += delta;
@@ -2669,6 +2742,7 @@ class WrappedMessages extends AnthropicOriginal__default.default.Messages {
2669
2742
  }
2670
2743
  usage.rawUsage = lastRawUsage;
2671
2744
  const latency = (Date.now() - startTime) / 1000;
2745
+ const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
2672
2746
  const availableTools = extractAvailableToolCalls('anthropic', anthropicParams);
2673
2747
  // Format output to match non-streaming version
2674
2748
  const formattedOutput = contentBlocks.length > 0 ? [{
@@ -2689,6 +2763,7 @@ class WrappedMessages extends AnthropicOriginal__default.default.Messages {
2689
2763
  input: sanitizeAnthropic(mergeSystemPrompt(anthropicParams, 'anthropic')),
2690
2764
  output: formattedOutput,
2691
2765
  latency,
2766
+ timeToFirstToken,
2692
2767
  baseURL: this.baseURL,
2693
2768
  params: body,
2694
2769
  httpStatus: 200,
@@ -2850,6 +2925,7 @@ class WrappedModels {
2850
2925
  } = extractPosthogParams(params);
2851
2926
  const startTime = Date.now();
2852
2927
  const accumulatedContent = [];
2928
+ let firstTokenTime;
2853
2929
  let usage = {
2854
2930
  inputTokens: 0,
2855
2931
  outputTokens: 0,
@@ -2859,6 +2935,10 @@ class WrappedModels {
2859
2935
  try {
2860
2936
  const stream = await this.client.models.generateContentStream(geminiParams);
2861
2937
  for await (const chunk of stream) {
2938
+ // Track first token time when we get text content
2939
+ if (firstTokenTime === undefined && chunk.text) {
2940
+ firstTokenTime = Date.now();
2941
+ }
2862
2942
  const chunkWebSearchCount = calculateGoogleWebSearchCount(chunk);
2863
2943
  if (chunkWebSearchCount > 0 && chunkWebSearchCount > (usage.webSearchCount ?? 0)) {
2864
2944
  usage.webSearchCount = chunkWebSearchCount;
@@ -2889,6 +2969,9 @@ class WrappedModels {
2889
2969
  for (const part of candidate.content.parts) {
2890
2970
  // Type-safe check for functionCall
2891
2971
  if ('functionCall' in part) {
2972
+ if (firstTokenTime === undefined) {
2973
+ firstTokenTime = Date.now();
2974
+ }
2892
2975
  const funcCall = part.functionCall;
2893
2976
  if (funcCall?.name) {
2894
2977
  accumulatedContent.push({
@@ -2919,6 +3002,7 @@ class WrappedModels {
2919
3002
  yield chunk;
2920
3003
  }
2921
3004
  const latency = (Date.now() - startTime) / 1000;
3005
+ const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
2922
3006
  const availableTools = extractAvailableToolCalls('gemini', geminiParams);
2923
3007
  // Format output similar to formatResponseGemini
2924
3008
  const output = accumulatedContent.length > 0 ? [{
@@ -2933,6 +3017,7 @@ class WrappedModels {
2933
3017
  input: this.formatInputForPostHog(geminiParams),
2934
3018
  output,
2935
3019
  latency,
3020
+ timeToFirstToken,
2936
3021
  baseURL: 'https://generativelanguage.googleapis.com',
2937
3022
  params: params,
2938
3023
  httpStatus: 200,