@posthog/ai 7.7.0 → 7.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,7 @@ import { Buffer } from 'buffer';
3
3
  import { v4 } from 'uuid';
4
4
  import { uuidv7 } from '@posthog/core';
5
5
 
6
- var version = "7.7.0";
6
+ var version = "7.8.1";
7
7
 
8
8
  // Type guards for safer type checking
9
9
 
@@ -496,6 +496,7 @@ const sendEventToPosthog = async ({
496
496
  input,
497
497
  output,
498
498
  latency,
499
+ timeToFirstToken,
499
500
  baseURL,
500
501
  params,
501
502
  httpStatus = 200,
@@ -562,6 +563,9 @@ const sendEventToPosthog = async ({
562
563
  } : {}),
563
564
  ...additionalTokenValues,
564
565
  $ai_latency: latency,
566
+ ...(timeToFirstToken !== undefined ? {
567
+ $ai_time_to_first_token: timeToFirstToken
568
+ } : {}),
565
569
  $ai_trace_id: traceId,
566
570
  $ai_base_url: baseURL,
567
571
  ...params.posthogProperties,
@@ -634,6 +638,14 @@ function formatOpenAIResponsesInput(input, instructions) {
634
638
  return messages;
635
639
  }
636
640
 
641
+ /**
642
+ * Checks if a ResponseStreamEvent chunk represents the first token/content from the model.
643
+ * This includes various content types like text, reasoning, audio, and refusals.
644
+ */
645
+ function isResponseTokenChunk(chunk) {
646
+ return chunk.type === 'response.output_item.added' || chunk.type === 'response.content_part.added' || chunk.type === 'response.output_text.delta' || chunk.type === 'response.reasoning_text.delta' || chunk.type === 'response.reasoning_summary_text.delta' || chunk.type === 'response.audio.delta' || chunk.type === 'response.audio.transcript.delta' || chunk.type === 'response.refusal.delta';
647
+ }
648
+
637
649
  const Chat = OpenAI.Chat;
638
650
  const Completions = Chat.Completions;
639
651
  const Responses = OpenAI.Responses;
@@ -690,6 +702,7 @@ class WrappedCompletions extends Completions {
690
702
  const contentBlocks = [];
691
703
  let accumulatedContent = '';
692
704
  let modelFromResponse;
705
+ let firstTokenTime;
693
706
  let usage = {
694
707
  inputTokens: 0,
695
708
  outputTokens: 0,
@@ -713,12 +726,18 @@ class WrappedCompletions extends Completions {
713
726
  // Handle text content
714
727
  const deltaContent = choice?.delta?.content;
715
728
  if (deltaContent) {
729
+ if (firstTokenTime === undefined) {
730
+ firstTokenTime = Date.now();
731
+ }
716
732
  accumulatedContent += deltaContent;
717
733
  }
718
734
 
719
735
  // Handle tool calls
720
736
  const deltaToolCalls = choice?.delta?.tool_calls;
721
737
  if (deltaToolCalls && Array.isArray(deltaToolCalls)) {
738
+ if (firstTokenTime === undefined) {
739
+ firstTokenTime = Date.now();
740
+ }
722
741
  for (const toolCall of deltaToolCalls) {
723
742
  const index = toolCall.index;
724
743
  if (index !== undefined) {
@@ -794,6 +813,7 @@ class WrappedCompletions extends Completions {
794
813
  }]
795
814
  }];
796
815
  const latency = (Date.now() - startTime) / 1000;
816
+ const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
797
817
  const availableTools = extractAvailableToolCalls('openai', openAIParams);
798
818
  await sendEventToPosthog({
799
819
  client: this.phClient,
@@ -803,6 +823,7 @@ class WrappedCompletions extends Completions {
803
823
  input: sanitizeOpenAI(openAIParams.messages),
804
824
  output: formattedOutput,
805
825
  latency,
826
+ timeToFirstToken,
806
827
  baseURL: this.baseURL,
807
828
  params: body,
808
829
  httpStatus: 200,
@@ -925,6 +946,7 @@ class WrappedResponses extends Responses {
925
946
  try {
926
947
  let finalContent = [];
927
948
  let modelFromResponse;
949
+ let firstTokenTime;
928
950
  let usage = {
929
951
  inputTokens: 0,
930
952
  outputTokens: 0,
@@ -932,6 +954,10 @@ class WrappedResponses extends Responses {
932
954
  };
933
955
  let rawUsageData;
934
956
  for await (const chunk of stream1) {
957
+ // Track first token time on content delta events
958
+ if (firstTokenTime === undefined && isResponseTokenChunk(chunk)) {
959
+ firstTokenTime = Date.now();
960
+ }
935
961
  if ('response' in chunk && chunk.response) {
936
962
  // Extract model from response object in chunk (for stored prompts)
937
963
  if (!modelFromResponse && chunk.response.model) {
@@ -957,6 +983,7 @@ class WrappedResponses extends Responses {
957
983
  }
958
984
  }
959
985
  const latency = (Date.now() - startTime) / 1000;
986
+ const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
960
987
  const availableTools = extractAvailableToolCalls('openai', openAIParams);
961
988
  await sendEventToPosthog({
962
989
  client: this.phClient,
@@ -966,6 +993,7 @@ class WrappedResponses extends Responses {
966
993
  input: formatOpenAIResponsesInput(sanitizeOpenAIResponse(openAIParams.input), openAIParams.instructions),
967
994
  output: finalContent,
968
995
  latency,
996
+ timeToFirstToken,
969
997
  baseURL: this.baseURL,
970
998
  params: body,
971
999
  httpStatus: 200,
@@ -1219,12 +1247,17 @@ class WrappedTranscriptions extends Transcriptions {
1219
1247
  (async () => {
1220
1248
  try {
1221
1249
  let finalContent = '';
1250
+ let firstTokenTime;
1222
1251
  let usage = {
1223
1252
  inputTokens: 0,
1224
1253
  outputTokens: 0
1225
1254
  };
1226
1255
  const doneEvent = 'transcript.text.done';
1227
1256
  for await (const chunk of stream1) {
1257
+ // Track first token on text delta events
1258
+ if (firstTokenTime === undefined && chunk.type === 'transcript.text.delta') {
1259
+ firstTokenTime = Date.now();
1260
+ }
1228
1261
  if (chunk.type === doneEvent && 'text' in chunk && chunk.text && chunk.text.length > 0) {
1229
1262
  finalContent = chunk.text;
1230
1263
  }
@@ -1237,6 +1270,7 @@ class WrappedTranscriptions extends Transcriptions {
1237
1270
  }
1238
1271
  }
1239
1272
  const latency = (Date.now() - startTime) / 1000;
1273
+ const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
1240
1274
  const availableTools = extractAvailableToolCalls('openai', openAIParams);
1241
1275
  await sendEventToPosthog({
1242
1276
  client: this.phClient,
@@ -1246,6 +1280,7 @@ class WrappedTranscriptions extends Transcriptions {
1246
1280
  input: openAIParams.prompt,
1247
1281
  output: finalContent,
1248
1282
  latency,
1283
+ timeToFirstToken,
1249
1284
  baseURL: this.baseURL,
1250
1285
  params: body,
1251
1286
  httpStatus: 200,