npm - @posthog/ai - Versions diffs - 7.7.0 → 7.8.0 - Mend

@posthog/ai 7.7.0 → 7.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/anthropic/index.cjs +14 -1
package/dist/anthropic/index.cjs.map +1 -1
package/dist/anthropic/index.mjs +14 -1
package/dist/anthropic/index.mjs.map +1 -1
package/dist/gemini/index.cjs +15 -1
package/dist/gemini/index.cjs.map +1 -1
package/dist/gemini/index.mjs +15 -1
package/dist/gemini/index.mjs.map +1 -1
package/dist/index.cjs +86 -1
package/dist/index.cjs.map +1 -1
package/dist/index.mjs +86 -1
package/dist/index.mjs.map +1 -1
package/dist/langchain/index.cjs +1 -1
package/dist/langchain/index.cjs.map +1 -1
package/dist/langchain/index.mjs +1 -1
package/dist/langchain/index.mjs.map +1 -1
package/dist/openai/index.cjs +36 -1
package/dist/openai/index.cjs.map +1 -1
package/dist/openai/index.mjs +36 -1
package/dist/openai/index.mjs.map +1 -1
package/dist/vercel/index.cjs +20 -1
package/dist/vercel/index.cjs.map +1 -1
package/dist/vercel/index.mjs +20 -1
package/dist/vercel/index.mjs.map +1 -1
package/package.json +3 -3

package/dist/index.mjs CHANGED Viewed

@@ -6,7 +6,7 @@ import { uuidv7 } from '@posthog/core';
 import AnthropicOriginal from '@anthropic-ai/sdk';
 import { GoogleGenAI } from '@google/genai';
-var version = "7.7.0";
+var version = "7.8.0";
 // Type guards for safer type checking
 const isString = value => {
@@ -722,6 +722,7 @@ const sendEventToPosthog = async ({
   input,
   output,
   latency,
+  timeToFirstToken,
   baseURL,
   params,
   httpStatus = 200,
@@ -788,6 +789,9 @@ const sendEventToPosthog = async ({
     } : {}),
     ...additionalTokenValues,
     $ai_latency: latency,
+    ...(timeToFirstToken !== undefined ? {
+      $ai_time_to_first_token: timeToFirstToken
+    } : {}),
     $ai_trace_id: traceId,
     $ai_base_url: baseURL,
     ...params.posthogProperties,
@@ -859,6 +863,14 @@ function formatOpenAIResponsesInput(input, instructions) {
   return messages;
 }
+/**
+ * Checks if a ResponseStreamEvent chunk represents the first token/content from the model.
+ * This includes various content types like text, reasoning, audio, and refusals.
+ */
+function isResponseTokenChunk(chunk) {
+  return chunk.type === 'response.output_item.added' || chunk.type === 'response.content_part.added' || chunk.type === 'response.output_text.delta' || chunk.type === 'response.reasoning_text.delta' || chunk.type === 'response.reasoning_summary_text.delta' || chunk.type === 'response.audio.delta' || chunk.type === 'response.audio.transcript.delta' || chunk.type === 'response.refusal.delta';
+}
 const Chat = OpenAI.Chat;
 const Completions = Chat.Completions;
 const Responses = OpenAI.Responses;
@@ -908,6 +920,7 @@ let WrappedCompletions$1 = class WrappedCompletions extends Completions {
               const contentBlocks = [];
               let accumulatedContent = '';
               let modelFromResponse;
+              let firstTokenTime;
               let usage = {
                 inputTokens: 0,
                 outputTokens: 0,
@@ -929,11 +942,17 @@ let WrappedCompletions$1 = class WrappedCompletions extends Completions {
                 // Handle text content
                 const deltaContent = choice?.delta?.content;
                 if (deltaContent) {
+                  if (firstTokenTime === undefined) {
+                    firstTokenTime = Date.now();
+                  }
                   accumulatedContent += deltaContent;
                 }
                 // Handle tool calls
                 const deltaToolCalls = choice?.delta?.tool_calls;
                 if (deltaToolCalls && Array.isArray(deltaToolCalls)) {
+                  if (firstTokenTime === undefined) {
+                    firstTokenTime = Date.now();
+                  }
                   for (const toolCall of deltaToolCalls) {
                     const index = toolCall.index;
                     if (index !== undefined) {
@@ -1005,6 +1024,7 @@ let WrappedCompletions$1 = class WrappedCompletions extends Completions {
                 }]
               }];
               const latency = (Date.now() - startTime) / 1000;
+              const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
               const availableTools = extractAvailableToolCalls('openai', openAIParams);
               await sendEventToPosthog({
                 client: this.phClient,
@@ -1014,6 +1034,7 @@ let WrappedCompletions$1 = class WrappedCompletions extends Completions {
                 input: sanitizeOpenAI(openAIParams.messages),
                 output: formattedOutput,
                 latency,
+                timeToFirstToken,
                 baseURL: this.baseURL,
                 params: body,
                 httpStatus: 200,
@@ -1128,6 +1149,7 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
             try {
               let finalContent = [];
               let modelFromResponse;
+              let firstTokenTime;
               let usage = {
                 inputTokens: 0,
                 outputTokens: 0,
@@ -1135,6 +1157,10 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
               };
               let rawUsageData;
               for await (const chunk of stream1) {
+                // Track first token time on content delta events
+                if (firstTokenTime === undefined && isResponseTokenChunk(chunk)) {
+                  firstTokenTime = Date.now();
+                }
                 if ('response' in chunk && chunk.response) {
                   // Extract model from response object in chunk (for stored prompts)
                   if (!modelFromResponse && chunk.response.model) {
@@ -1160,6 +1186,7 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
                 }
               }
               const latency = (Date.now() - startTime) / 1000;
+              const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
               const availableTools = extractAvailableToolCalls('openai', openAIParams);
               await sendEventToPosthog({
                 client: this.phClient,
@@ -1169,6 +1196,7 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
                 input: formatOpenAIResponsesInput(sanitizeOpenAIResponse(openAIParams.input), openAIParams.instructions),
                 output: finalContent,
                 latency,
+                timeToFirstToken,
                 baseURL: this.baseURL,
                 params: body,
                 httpStatus: 200,
@@ -1407,12 +1435,17 @@ class WrappedTranscriptions extends Transcriptions {
           (async () => {
             try {
               let finalContent = '';
+              let firstTokenTime;
               let usage = {
                 inputTokens: 0,
                 outputTokens: 0
               };
               const doneEvent = 'transcript.text.done';
               for await (const chunk of stream1) {
+                // Track first token on text delta events
+                if (firstTokenTime === undefined && chunk.type === 'transcript.text.delta') {
+                  firstTokenTime = Date.now();
+                }
                 if (chunk.type === doneEvent && 'text' in chunk && chunk.text && chunk.text.length > 0) {
                   finalContent = chunk.text;
                 }
@@ -1425,6 +1458,7 @@ class WrappedTranscriptions extends Transcriptions {
                 }
               }
               const latency = (Date.now() - startTime) / 1000;
+              const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
               const availableTools = extractAvailableToolCalls('openai', openAIParams);
               await sendEventToPosthog({
                 client: this.phClient,
@@ -1434,6 +1468,7 @@ class WrappedTranscriptions extends Transcriptions {
                 input: openAIParams.prompt,
                 output: finalContent,
                 latency,
+                timeToFirstToken,
                 baseURL: this.baseURL,
                 params: body,
                 httpStatus: 200,
@@ -1552,6 +1587,7 @@ class WrappedCompletions extends AzureOpenAI.Chat.Completions {
               const contentBlocks = [];
               let accumulatedContent = '';
               let modelFromResponse;
+              let firstTokenTime;
               let usage = {
                 inputTokens: 0,
                 outputTokens: 0
@@ -1567,11 +1603,17 @@ class WrappedCompletions extends AzureOpenAI.Chat.Completions {
                 // Handle text content
                 const deltaContent = choice?.delta?.content;
                 if (deltaContent) {
+                  if (firstTokenTime === undefined) {
+                    firstTokenTime = Date.now();
+                  }
                   accumulatedContent += deltaContent;
                 }
                 // Handle tool calls
                 const deltaToolCalls = choice?.delta?.tool_calls;
                 if (deltaToolCalls && Array.isArray(deltaToolCalls)) {
+                  if (firstTokenTime === undefined) {
+                    firstTokenTime = Date.now();
+                  }
                   for (const toolCall of deltaToolCalls) {
                     const index = toolCall.index;
                     if (index !== undefined) {
@@ -1641,6 +1683,7 @@ class WrappedCompletions extends AzureOpenAI.Chat.Completions {
                 }]
               }];
               const latency = (Date.now() - startTime) / 1000;
+              const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
               await sendEventToPosthog({
                 client: this.phClient,
                 ...posthogParams,
@@ -1649,6 +1692,7 @@ class WrappedCompletions extends AzureOpenAI.Chat.Completions {
                 input: sanitizeOpenAI(openAIParams.messages),
                 output: formattedOutput,
                 latency,
+                timeToFirstToken,
                 baseURL: this.baseURL,
                 params: body,
                 httpStatus: 200,
@@ -1750,11 +1794,16 @@ class WrappedResponses extends AzureOpenAI.Responses {
             try {
               let finalContent = [];
               let modelFromResponse;
+              let firstTokenTime;
               let usage = {
                 inputTokens: 0,
                 outputTokens: 0
               };
               for await (const chunk of stream1) {
+                // Track first token time on content delta events
+                if (firstTokenTime === undefined && isResponseTokenChunk(chunk)) {
+                  firstTokenTime = Date.now();
+                }
                 if ('response' in chunk && chunk.response) {
                   // Extract model from response if not in params (for stored prompts)
                   if (!modelFromResponse && chunk.response.model) {
@@ -1774,6 +1823,7 @@ class WrappedResponses extends AzureOpenAI.Responses {
                 }
               }
               const latency = (Date.now() - startTime) / 1000;
+              const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
               await sendEventToPosthog({
                 client: this.phClient,
                 ...posthogParams,
@@ -1782,6 +1832,7 @@ class WrappedResponses extends AzureOpenAI.Responses {
                 input: formatOpenAIResponsesInput(openAIParams.input, openAIParams.instructions),
                 output: finalContent,
                 latency,
+                timeToFirstToken,
                 baseURL: this.baseURL,
                 params: body,
                 httpStatus: 200,
@@ -2351,6 +2402,7 @@ const wrapVercelLanguageModel = (model, phClient, options) => {
     doStream: {
       value: async params => {
         const startTime = Date.now();
+        let firstTokenTime;
         let generatedText = '';
         let reasoningText = '';
         let usage = {};
@@ -2374,13 +2426,22 @@ const wrapVercelLanguageModel = (model, phClient, options) => {
             transform(chunk, controller) {
               // Handle streaming patterns - compatible with both V2 and V3
               if (chunk.type === 'text-delta') {
+                if (firstTokenTime === undefined) {
+                  firstTokenTime = Date.now();
+                }
                 generatedText += chunk.delta;
               }
               if (chunk.type === 'reasoning-delta') {
+                if (firstTokenTime === undefined) {
+                  firstTokenTime = Date.now();
+                }
                 reasoningText += chunk.delta;
               }
               // Handle tool call chunks
               if (chunk.type === 'tool-input-start') {
+                if (firstTokenTime === undefined) {
+                  firstTokenTime = Date.now();
+                }
                 // Initialize a new tool call
                 toolCallsInProgress.set(chunk.id, {
                   toolCallId: chunk.id,
@@ -2399,6 +2460,9 @@ const wrapVercelLanguageModel = (model, phClient, options) => {
                 // Tool call is complete, keep it in the map for final processing
               }
               if (chunk.type === 'tool-call') {
+                if (firstTokenTime === undefined) {
+                  firstTokenTime = Date.now();
+                }
                 // Direct tool call chunk (complete tool call)
                 toolCallsInProgress.set(chunk.toolCallId, {
                   toolCallId: chunk.toolCallId,
@@ -2422,6 +2486,7 @@ const wrapVercelLanguageModel = (model, phClient, options) => {
             },
             flush: async () => {
               const latency = (Date.now() - startTime) / 1000;
+              const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
               // Build content array similar to mapVercelOutput structure
               const content = [];
               if (reasoningText) {
@@ -2474,6 +2539,7 @@ const wrapVercelLanguageModel = (model, phClient, options) => {
                 input: mergedOptions.posthogPrivacyMode ? '' : mapVercelPrompt(params.prompt),
                 output: output,
                 latency,
+                timeToFirstToken,
                 baseURL,
                 params: mergedParams,
                 httpStatus: 200,
@@ -2548,6 +2614,7 @@ class WrappedMessages extends AnthropicOriginal.Messages {
         const contentBlocks = [];
         const toolsInProgress = new Map();
         let currentTextBlock = null;
+        let firstTokenTime;
         const usage = {
           inputTokens: 0,
           outputTokens: 0,
@@ -2570,6 +2637,9 @@ class WrappedMessages extends AnthropicOriginal.Messages {
                     };
                     contentBlocks.push(currentTextBlock);
                   } else if (chunk.content_block?.type === 'tool_use') {
+                    if (firstTokenTime === undefined) {
+                      firstTokenTime = Date.now();
+                    }
                     const toolBlock = {
                       type: 'function',
                       id: chunk.content_block.id,
@@ -2590,6 +2660,9 @@ class WrappedMessages extends AnthropicOriginal.Messages {
                 if ('delta' in chunk) {
                   if ('text' in chunk.delta) {
                     const delta = chunk.delta.text;
+                    if (firstTokenTime === undefined) {
+                      firstTokenTime = Date.now();
+                    }
                     accumulatedContent += delta;
                     if (currentTextBlock) {
                       currentTextBlock.text += delta;
@@ -2645,6 +2718,7 @@ class WrappedMessages extends AnthropicOriginal.Messages {
               }
               usage.rawUsage = lastRawUsage;
               const latency = (Date.now() - startTime) / 1000;
+              const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
               const availableTools = extractAvailableToolCalls('anthropic', anthropicParams);
               // Format output to match non-streaming version
               const formattedOutput = contentBlocks.length > 0 ? [{
@@ -2665,6 +2739,7 @@ class WrappedMessages extends AnthropicOriginal.Messages {
                 input: sanitizeAnthropic(mergeSystemPrompt(anthropicParams, 'anthropic')),
                 output: formattedOutput,
                 latency,
+                timeToFirstToken,
                 baseURL: this.baseURL,
                 params: body,
                 httpStatus: 200,
@@ -2826,6 +2901,7 @@ class WrappedModels {
     } = extractPosthogParams(params);
     const startTime = Date.now();
     const accumulatedContent = [];
+    let firstTokenTime;
     let usage = {
       inputTokens: 0,
       outputTokens: 0,
@@ -2835,6 +2911,10 @@ class WrappedModels {
     try {
       const stream = await this.client.models.generateContentStream(geminiParams);
       for await (const chunk of stream) {
+        // Track first token time when we get text content
+        if (firstTokenTime === undefined && chunk.text) {
+          firstTokenTime = Date.now();
+        }
         const chunkWebSearchCount = calculateGoogleWebSearchCount(chunk);
         if (chunkWebSearchCount > 0 && chunkWebSearchCount > (usage.webSearchCount ?? 0)) {
           usage.webSearchCount = chunkWebSearchCount;
@@ -2865,6 +2945,9 @@ class WrappedModels {
               for (const part of candidate.content.parts) {
                 // Type-safe check for functionCall
                 if ('functionCall' in part) {
+                  if (firstTokenTime === undefined) {
+                    firstTokenTime = Date.now();
+                  }
                   const funcCall = part.functionCall;
                   if (funcCall?.name) {
                     accumulatedContent.push({
@@ -2895,6 +2978,7 @@ class WrappedModels {
         yield chunk;
       }
       const latency = (Date.now() - startTime) / 1000;
+      const timeToFirstToken = firstTokenTime !== undefined ? (firstTokenTime - startTime) / 1000 : undefined;
       const availableTools = extractAvailableToolCalls('gemini', geminiParams);
       // Format output similar to formatResponseGemini
       const output = accumulatedContent.length > 0 ? [{
@@ -2909,6 +2993,7 @@ class WrappedModels {
         input: this.formatInputForPostHog(geminiParams),
         output,
         latency,
+        timeToFirstToken,
         baseURL: 'https://generativelanguage.googleapis.com',
         params: params,
         httpStatus: 200,