@posthog/ai 7.7.0 → 7.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@ var buffer = require('buffer');
7
7
  var uuid = require('uuid');
8
8
  var core = require('@posthog/core');
9
9
 
10
- var version = "7.7.0";
10
+ var version = "7.8.1";
11
11
 
12
12
  // Type guards for safer type checking
13
13
 
@@ -500,6 +500,7 @@ const sendEventToPosthog = async ({
500
500
  input,
501
501
  output,
502
502
  latency,
503
+ timeToFirstToken,
503
504
  baseURL,
504
505
  params,
505
506
  httpStatus = 200,
@@ -566,6 +567,9 @@ const sendEventToPosthog = async ({
566
567
  } : {}),
567
568
  ...additionalTokenValues,
568
569
  $ai_latency: latency,
570
+ ...(timeToFirstToken !== undefined ? {
571
+ $ai_time_to_first_token: timeToFirstToken
572
+ } : {}),
569
573
  $ai_trace_id: traceId,
570
574
  $ai_base_url: baseURL,
571
575
  ...params.posthogProperties,
@@ -638,6 +642,14 @@ function formatOpenAIResponsesInput(input, instructions) {
638
642
  return messages;
639
643
  }
640
644
 
645
+ /**
646
+ * Checks if a ResponseStreamEvent chunk represents the first token/content from the model.
647
+ * This includes various content types like text, reasoning, audio, and refusals.
648
+ */
649
+ function isResponseTokenChunk(chunk) {
650
+ return chunk.type === 'response.output_item.added' || chunk.type === 'response.content_part.added' || chunk.type === 'response.output_text.delta' || chunk.type === 'response.reasoning_text.delta' || chunk.type === 'response.reasoning_summary_text.delta' || chunk.type === 'response.audio.delta' || chunk.type === 'response.audio.transcript.delta' || chunk.type === 'response.refusal.delta';
651
+ }
652
+
641
653
  const Chat = openai.OpenAI.Chat;
642
654
  const Completions = Chat.Completions;
643
655
  const Responses = openai.OpenAI.Responses;
@@ -694,6 +706,7 @@ class WrappedCompletions extends Completions {
694
706
  const contentBlocks = [];
695
707
  let accumulatedContent = '';
696
708
  let modelFromResponse;
709
+ let firstTokenTime;
697
710
  let usage = {
698
711
  inputTokens: 0,
699
712
  outputTokens: 0,
@@ -717,12 +730,18 @@ class WrappedCompletions extends Completions {
717
730
  // Handle text content
718
731
  const deltaContent = choice?.delta?.content;
719
732
  if (deltaContent) {
733
+ if (firstTokenTime === undefined) {
734
+ firstTokenTime = Date.now();
735
+ }
720
736
  accumulatedContent += deltaContent;
721
737
  }
722
738
 
723
739
  // Handle tool calls
724
740
  const deltaToolCalls = choice?.delta?.tool_calls;
725
741
  if (deltaToolCalls && Array.isArray(deltaToolCalls)) {
742
+ if (firstTokenTime === undefined) {
743
+ firstTokenTime = Date.now();
744
+ }
726
745
  for (const toolCall of deltaToolCalls) {
727
746
  const index = toolCall.index;
728
747
  if (index !== undefined) {
@@ -798,6 +817,7 @@ class WrappedCompletions extends Completions {
798
817
  }]
799
818
  }];
800
819
  const latency = (Date.now() - startTime) / 1000;
820
+ const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
801
821
  const availableTools = extractAvailableToolCalls('openai', openAIParams);
802
822
  await sendEventToPosthog({
803
823
  client: this.phClient,
@@ -807,6 +827,7 @@ class WrappedCompletions extends Completions {
807
827
  input: sanitizeOpenAI(openAIParams.messages),
808
828
  output: formattedOutput,
809
829
  latency,
830
+ timeToFirstToken,
810
831
  baseURL: this.baseURL,
811
832
  params: body,
812
833
  httpStatus: 200,
@@ -929,6 +950,7 @@ class WrappedResponses extends Responses {
929
950
  try {
930
951
  let finalContent = [];
931
952
  let modelFromResponse;
953
+ let firstTokenTime;
932
954
  let usage = {
933
955
  inputTokens: 0,
934
956
  outputTokens: 0,
@@ -936,6 +958,10 @@ class WrappedResponses extends Responses {
936
958
  };
937
959
  let rawUsageData;
938
960
  for await (const chunk of stream1) {
961
+ // Track first token time on content delta events
962
+ if (firstTokenTime === undefined && isResponseTokenChunk(chunk)) {
963
+ firstTokenTime = Date.now();
964
+ }
939
965
  if ('response' in chunk && chunk.response) {
940
966
  // Extract model from response object in chunk (for stored prompts)
941
967
  if (!modelFromResponse && chunk.response.model) {
@@ -961,6 +987,7 @@ class WrappedResponses extends Responses {
961
987
  }
962
988
  }
963
989
  const latency = (Date.now() - startTime) / 1000;
990
+ const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
964
991
  const availableTools = extractAvailableToolCalls('openai', openAIParams);
965
992
  await sendEventToPosthog({
966
993
  client: this.phClient,
@@ -970,6 +997,7 @@ class WrappedResponses extends Responses {
970
997
  input: formatOpenAIResponsesInput(sanitizeOpenAIResponse(openAIParams.input), openAIParams.instructions),
971
998
  output: finalContent,
972
999
  latency,
1000
+ timeToFirstToken,
973
1001
  baseURL: this.baseURL,
974
1002
  params: body,
975
1003
  httpStatus: 200,
@@ -1223,12 +1251,17 @@ class WrappedTranscriptions extends Transcriptions {
1223
1251
  (async () => {
1224
1252
  try {
1225
1253
  let finalContent = '';
1254
+ let firstTokenTime;
1226
1255
  let usage = {
1227
1256
  inputTokens: 0,
1228
1257
  outputTokens: 0
1229
1258
  };
1230
1259
  const doneEvent = 'transcript.text.done';
1231
1260
  for await (const chunk of stream1) {
1261
+ // Track first token on text delta events
1262
+ if (firstTokenTime === undefined && chunk.type === 'transcript.text.delta') {
1263
+ firstTokenTime = Date.now();
1264
+ }
1232
1265
  if (chunk.type === doneEvent && 'text' in chunk && chunk.text && chunk.text.length > 0) {
1233
1266
  finalContent = chunk.text;
1234
1267
  }
@@ -1241,6 +1274,7 @@ class WrappedTranscriptions extends Transcriptions {
1241
1274
  }
1242
1275
  }
1243
1276
  const latency = (Date.now() - startTime) / 1000;
1277
+ const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
1244
1278
  const availableTools = extractAvailableToolCalls('openai', openAIParams);
1245
1279
  await sendEventToPosthog({
1246
1280
  client: this.phClient,
@@ -1250,6 +1284,7 @@ class WrappedTranscriptions extends Transcriptions {
1250
1284
  input: openAIParams.prompt,
1251
1285
  output: finalContent,
1252
1286
  latency,
1287
+ timeToFirstToken,
1253
1288
  baseURL: this.baseURL,
1254
1289
  params: body,
1255
1290
  httpStatus: 200,