PyPI - posthoganalytics - Versions diffs - 6.7.0__py3-none-any.whl → 6.7.2__py3-none-any.whl - Mend

posthoganalytics 6.7.0py3-none-any.whl → 6.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

posthoganalytics/ai/anthropic/__init__.py +10 -0
posthoganalytics/ai/anthropic/anthropic.py +94 -63
posthoganalytics/ai/anthropic/anthropic_async.py +88 -21
posthoganalytics/ai/anthropic/anthropic_converter.py +393 -0
posthoganalytics/ai/gemini/__init__.py +12 -1
posthoganalytics/ai/gemini/gemini.py +61 -67
posthoganalytics/ai/gemini/gemini_converter.py +438 -0
posthoganalytics/ai/langchain/callbacks.py +3 -2
posthoganalytics/ai/openai/__init__.py +16 -1
posthoganalytics/ai/openai/openai.py +114 -148
posthoganalytics/ai/openai/openai_async.py +72 -73
posthoganalytics/ai/openai/openai_converter.py +585 -0
posthoganalytics/ai/sanitization.py +226 -0
posthoganalytics/ai/types.py +142 -0
posthoganalytics/ai/utils.py +232 -255
posthoganalytics/client.py +7 -7
posthoganalytics/test/test_feature_flags.py +2 -2
posthoganalytics/version.py +1 -1
{posthoganalytics-6.7.0.dist-info → posthoganalytics-6.7.2.dist-info}/METADATA +1 -1
{posthoganalytics-6.7.0.dist-info → posthoganalytics-6.7.2.dist-info}/RECORD +23 -18
{posthoganalytics-6.7.0.dist-info → posthoganalytics-6.7.2.dist-info}/WHEEL +0 -0
{posthoganalytics-6.7.0.dist-info → posthoganalytics-6.7.2.dist-info}/licenses/LICENSE +0 -0
{posthoganalytics-6.7.0.dist-info → posthoganalytics-6.7.2.dist-info}/top_level.txt +0 -0

posthoganalytics/ai/anthropic/anthropic_converter.py ADDED Viewed

@@ -0,0 +1,393 @@
+"""
+Anthropic-specific conversion utilities.
+This module handles the conversion of Anthropic API responses and inputs
+into standardized formats for PostHog tracking.
+"""
+import json
+from typing import Any, Dict, List, Optional, Tuple
+from posthoganalytics.ai.types import (
+    FormattedContentItem,
+    FormattedFunctionCall,
+    FormattedMessage,
+    FormattedTextContent,
+    StreamingContentBlock,
+    StreamingUsageStats,
+    TokenUsage,
+    ToolInProgress,
+)
+def format_anthropic_response(response: Any) -> List[FormattedMessage]:
+    """
+    Format an Anthropic response into standardized message format.
+    Args:
+        response: The response object from Anthropic API
+    Returns:
+        List of formatted messages with role and content
+    """
+    output: List[FormattedMessage] = []
+    if response is None:
+        return output
+    content: List[FormattedContentItem] = []
+    # Process content blocks from the response
+    if hasattr(response, "content"):
+        for choice in response.content:
+            if (
+                hasattr(choice, "type")
+                and choice.type == "text"
+                and hasattr(choice, "text")
+                and choice.text
+            ):
+                text_content: FormattedTextContent = {
+                    "type": "text",
+                    "text": choice.text,
+                }
+                content.append(text_content)
+            elif (
+                hasattr(choice, "type")
+                and choice.type == "tool_use"
+                and hasattr(choice, "name")
+                and hasattr(choice, "id")
+            ):
+                function_call: FormattedFunctionCall = {
+                    "type": "function",
+                    "id": choice.id,
+                    "function": {
+                        "name": choice.name,
+                        "arguments": getattr(choice, "input", {}),
+                    },
+                }
+                content.append(function_call)
+    if content:
+        message: FormattedMessage = {
+            "role": "assistant",
+            "content": content,
+        }
+        output.append(message)
+    return output
+def format_anthropic_input(
+    messages: List[Dict[str, Any]], system: Optional[str] = None
+) -> List[FormattedMessage]:
+    """
+    Format Anthropic input messages with optional system prompt.
+    Args:
+        messages: List of message dictionaries
+        system: Optional system prompt to prepend
+    Returns:
+        List of formatted messages
+    """
+    formatted_messages: List[FormattedMessage] = []
+    # Add system message if provided
+    if system is not None:
+        formatted_messages.append({"role": "system", "content": system})
+    # Add user messages
+    if messages:
+        for msg in messages:
+            # Messages are already in the correct format, just ensure type safety
+            formatted_msg: FormattedMessage = {
+                "role": msg.get("role", "user"),
+                "content": msg.get("content", ""),
+            }
+            formatted_messages.append(formatted_msg)
+    return formatted_messages
+def extract_anthropic_tools(kwargs: Dict[str, Any]) -> Optional[Any]:
+    """
+    Extract tool definitions from Anthropic API kwargs.
+    Args:
+        kwargs: Keyword arguments passed to Anthropic API
+    Returns:
+        Tool definitions if present, None otherwise
+    """
+    return kwargs.get("tools", None)
+def format_anthropic_streaming_content(
+    content_blocks: List[StreamingContentBlock],
+) -> List[FormattedContentItem]:
+    """
+    Format content blocks from Anthropic streaming response.
+    Used by streaming handlers to format accumulated content blocks.
+    Args:
+        content_blocks: List of content block dictionaries from streaming
+    Returns:
+        List of formatted content items
+    """
+    formatted: List[FormattedContentItem] = []
+    for block in content_blocks:
+        if block.get("type") == "text":
+            formatted.append(
+                {
+                    "type": "text",
+                    "text": block.get("text") or "",
+                }
+            )
+        elif block.get("type") == "function":
+            formatted.append(
+                {
+                    "type": "function",
+                    "id": block.get("id"),
+                    "function": block.get("function") or {},
+                }
+            )
+    return formatted
+def extract_anthropic_usage_from_event(event: Any) -> StreamingUsageStats:
+    """
+    Extract usage statistics from an Anthropic streaming event.
+    Args:
+        event: Streaming event from Anthropic API
+    Returns:
+        Dictionary of usage statistics
+    """
+    usage: StreamingUsageStats = {}
+    # Handle usage stats from message_start event
+    if hasattr(event, "type") and event.type == "message_start":
+        if hasattr(event, "message") and hasattr(event.message, "usage"):
+            usage["input_tokens"] = getattr(event.message.usage, "input_tokens", 0)
+            usage["cache_creation_input_tokens"] = getattr(
+                event.message.usage, "cache_creation_input_tokens", 0
+            )
+            usage["cache_read_input_tokens"] = getattr(
+                event.message.usage, "cache_read_input_tokens", 0
+            )
+    # Handle usage stats from message_delta event
+    if hasattr(event, "usage") and event.usage:
+        usage["output_tokens"] = getattr(event.usage, "output_tokens", 0)
+    return usage
+def handle_anthropic_content_block_start(
+    event: Any,
+) -> Tuple[Optional[StreamingContentBlock], Optional[ToolInProgress]]:
+    """
+    Handle content block start event from Anthropic streaming.
+    Args:
+        event: Content block start event
+    Returns:
+        Tuple of (content_block, tool_in_progress)
+    """
+    if not (hasattr(event, "type") and event.type == "content_block_start"):
+        return None, None
+    if not hasattr(event, "content_block"):
+        return None, None
+    block = event.content_block
+    if not hasattr(block, "type"):
+        return None, None
+    if block.type == "text":
+        content_block: StreamingContentBlock = {"type": "text", "text": ""}
+        return content_block, None
+    elif block.type == "tool_use":
+        tool_block: StreamingContentBlock = {
+            "type": "function",
+            "id": getattr(block, "id", ""),
+            "function": {"name": getattr(block, "name", ""), "arguments": {}},
+        }
+        tool_in_progress: ToolInProgress = {"block": tool_block, "input_string": ""}
+        return tool_block, tool_in_progress
+    return None, None
+def handle_anthropic_text_delta(
+    event: Any, current_block: Optional[StreamingContentBlock]
+) -> Optional[str]:
+    """
+    Handle text delta event from Anthropic streaming.
+    Args:
+        event: Delta event
+        current_block: Current text block being accumulated
+    Returns:
+        Text delta if present
+    """
+    if hasattr(event, "delta") and hasattr(event.delta, "text"):
+        delta_text = event.delta.text or ""
+        if current_block is not None and current_block.get("type") == "text":
+            text_val = current_block.get("text")
+            if text_val is not None:
+                current_block["text"] = text_val + delta_text
+            else:
+                current_block["text"] = delta_text
+        return delta_text
+    return None
+def handle_anthropic_tool_delta(
+    event: Any,
+    content_blocks: List[StreamingContentBlock],
+    tools_in_progress: Dict[str, ToolInProgress],
+) -> None:
+    """
+    Handle tool input delta event from Anthropic streaming.
+    Args:
+        event: Tool delta event
+        content_blocks: List of content blocks
+        tools_in_progress: Dictionary tracking tools being accumulated
+    """
+    if not (hasattr(event, "type") and event.type == "content_block_delta"):
+        return
+    if not (
+        hasattr(event, "delta")
+        and hasattr(event.delta, "type")
+        and event.delta.type == "input_json_delta"
+    ):
+        return
+    if hasattr(event, "index") and event.index < len(content_blocks):
+        block = content_blocks[event.index]
+        if block.get("type") == "function" and block.get("id") in tools_in_progress:
+            tool = tools_in_progress[block["id"]]
+            partial_json = getattr(event.delta, "partial_json", "")
+            tool["input_string"] += partial_json
+def finalize_anthropic_tool_input(
+    event: Any,
+    content_blocks: List[StreamingContentBlock],
+    tools_in_progress: Dict[str, ToolInProgress],
+) -> None:
+    """
+    Finalize tool input when content block stops.
+    Args:
+        event: Content block stop event
+        content_blocks: List of content blocks
+        tools_in_progress: Dictionary tracking tools being accumulated
+    """
+    if not (hasattr(event, "type") and event.type == "content_block_stop"):
+        return
+    if hasattr(event, "index") and event.index < len(content_blocks):
+        block = content_blocks[event.index]
+        if block.get("type") == "function" and block.get("id") in tools_in_progress:
+            tool = tools_in_progress[block["id"]]
+            try:
+                block["function"]["arguments"] = json.loads(tool["input_string"])
+            except (json.JSONDecodeError, Exception):
+                # Keep empty dict if parsing fails
+                pass
+            del tools_in_progress[block["id"]]
+def standardize_anthropic_usage(usage: Dict[str, Any]) -> TokenUsage:
+    """
+    Standardize Anthropic usage statistics to common TokenUsage format.
+    Anthropic already uses standard field names, so this mainly structures the data.
+    Args:
+        usage: Raw usage statistics from Anthropic
+    Returns:
+        Standardized TokenUsage dict
+    """
+    return TokenUsage(
+        input_tokens=usage.get("input_tokens", 0),
+        output_tokens=usage.get("output_tokens", 0),
+        cache_read_input_tokens=usage.get("cache_read_input_tokens"),
+        cache_creation_input_tokens=usage.get("cache_creation_input_tokens"),
+    )
+def format_anthropic_streaming_input(kwargs: Dict[str, Any]) -> Any:
+    """
+    Format Anthropic streaming input using system prompt merging.
+    Args:
+        kwargs: Keyword arguments passed to Anthropic API
+    Returns:
+        Formatted input ready for PostHog tracking
+    """
+    from posthoganalytics.ai.utils import merge_system_prompt
+    return merge_system_prompt(kwargs, "anthropic")
+def format_anthropic_streaming_output_complete(
+    content_blocks: List[StreamingContentBlock], accumulated_content: str
+) -> List[FormattedMessage]:
+    """
+    Format complete Anthropic streaming output.
+    Combines existing logic for formatting content blocks with fallback to accumulated content.
+    Args:
+        content_blocks: List of content blocks accumulated during streaming
+        accumulated_content: Raw accumulated text content as fallback
+    Returns:
+        Formatted messages ready for PostHog tracking
+    """
+    formatted_content = format_anthropic_streaming_content(content_blocks)
+    if formatted_content:
+        return [{"role": "assistant", "content": formatted_content}]
+    else:
+        # Fallback to accumulated content if no blocks
+        return [
+            {
+                "role": "assistant",
+                "content": [{"type": "text", "text": accumulated_content}],
+            }
+        ]

posthoganalytics/ai/gemini/__init__.py CHANGED Viewed

@@ -1,4 +1,9 @@
 from .gemini import Client
+from .gemini_converter import (
+    format_gemini_input,
+    format_gemini_response,
+    extract_gemini_tools,
+)
 # Create a genai-like module for perfect drop-in replacement
@@ -8,4 +13,10 @@ class _GenAI:
 genai = _GenAI()
-__all__ = ["Client", "genai"]
+__all__ = [
+    "Client",
+    "genai",
+    "format_gemini_input",
+    "format_gemini_response",
+    "extract_gemini_tools",
+]

posthoganalytics/ai/gemini/gemini.py CHANGED Viewed

@@ -13,9 +13,16 @@ except ImportError:
 from posthoganalytics import setup
 from posthoganalytics.ai.utils import (
     call_llm_and_track_usage,
-    get_model_params,
-    with_privacy_mode,
+    capture_streaming_event,
+    merge_usage_stats,
 )
+from posthoganalytics.ai.gemini.gemini_converter import (
+    format_gemini_input,
+    extract_gemini_usage_from_chunk,
+    extract_gemini_content_from_chunk,
+    format_gemini_streaming_output,
+)
+from posthoganalytics.ai.sanitization import sanitize_gemini
 from posthoganalytics.client import Client as PostHogClient
@@ -71,6 +78,7 @@ class Client:
             posthog_groups: Default groups for all calls (can be overridden per call)
             **kwargs: Additional arguments (for future compatibility)
         """
         self._ph_client = posthog_client or setup()
         if self._ph_client is None:
@@ -132,6 +140,7 @@ class Models:
             posthog_groups: Default groups for all calls
             **kwargs: Additional arguments (for future compatibility)
         """
         self._ph_client = posthog_client or setup()
         if self._ph_client is None:
@@ -149,14 +158,19 @@ class Models:
         # Add Vertex AI parameters if provided
         if vertexai is not None:
             client_args["vertexai"] = vertexai
         if credentials is not None:
             client_args["credentials"] = credentials
         if project is not None:
             client_args["project"] = project
         if location is not None:
             client_args["location"] = location
         if debug_config is not None:
             client_args["debug_config"] = debug_config
         if http_options is not None:
             client_args["http_options"] = http_options
@@ -174,6 +188,7 @@ class Models:
                 raise ValueError(
                     "API key must be provided either as parameter or via GOOGLE_API_KEY/API_KEY environment variable"
                 )
             client_args["api_key"] = api_key
         self._client = genai.Client(**client_args)
@@ -188,6 +203,7 @@ class Models:
         call_groups: Optional[Dict[str, Any]],
     ):
         """Merge call-level PostHog parameters with client defaults."""
         # Use call-level values if provided, otherwise fall back to defaults
         distinct_id = (
             call_distinct_id
@@ -203,6 +219,7 @@ class Models:
         # Merge properties: default properties + call properties (call properties override)
         properties = dict(self._default_properties)
         if call_properties:
             properties.update(call_properties)
@@ -238,6 +255,7 @@ class Models:
             posthog_groups: Group analytics properties (overrides client default)
             **kwargs: Arguments passed to Gemini's generate_content
         """
         # Merge PostHog parameters
         distinct_id, trace_id, properties, privacy_mode, groups = (
             self._merge_posthog_params(
@@ -287,25 +305,24 @@ class Models:
             nonlocal accumulated_content  # noqa: F824
             try:
                 for chunk in response:
-                    if hasattr(chunk, "usage_metadata") and chunk.usage_metadata:
-                        usage_stats = {
-                            "input_tokens": getattr(
-                                chunk.usage_metadata, "prompt_token_count", 0
-                            ),
-                            "output_tokens": getattr(
-                                chunk.usage_metadata, "candidates_token_count", 0
-                            ),
-                        }
-                    if hasattr(chunk, "text") and chunk.text:
-                        accumulated_content.append(chunk.text)
+                    # Extract usage stats from chunk
+                    chunk_usage = extract_gemini_usage_from_chunk(chunk)
+                    if chunk_usage:
+                        # Gemini reports cumulative totals, not incremental values
+                        merge_usage_stats(usage_stats, chunk_usage, mode="cumulative")
+                    # Extract content from chunk (now returns content blocks)
+                    content_block = extract_gemini_content_from_chunk(chunk)
+                    if content_block is not None:
+                        accumulated_content.append(content_block)
                     yield chunk
             finally:
                 end_time = time.time()
                 latency = end_time - start_time
-                output = "".join(accumulated_content)
                 self._capture_streaming_event(
                     model,
@@ -318,7 +335,7 @@ class Models:
                     kwargs,
                     usage_stats,
                     latency,
-                    output,
+                    accumulated_content,
                 )
         return generator()
@@ -335,61 +352,38 @@ class Models:
         kwargs: Dict[str, Any],
         usage_stats: Dict[str, int],
         latency: float,
-        output: str,
+        output: Any,
     ):
-        if trace_id is None:
-            trace_id = str(uuid.uuid4())
-        event_properties = {
-            "$ai_provider": "gemini",
-            "$ai_model": model,
-            "$ai_model_parameters": get_model_params(kwargs),
-            "$ai_input": with_privacy_mode(
-                self._ph_client,
-                privacy_mode,
-                self._format_input(contents),
-            ),
-            "$ai_output_choices": with_privacy_mode(
-                self._ph_client,
-                privacy_mode,
-                [{"content": output, "role": "assistant"}],
-            ),
-            "$ai_http_status": 200,
-            "$ai_input_tokens": usage_stats.get("input_tokens", 0),
-            "$ai_output_tokens": usage_stats.get("output_tokens", 0),
-            "$ai_latency": latency,
-            "$ai_trace_id": trace_id,
-            "$ai_base_url": self._base_url,
-            **(properties or {}),
-        }
-        if distinct_id is None:
-            event_properties["$process_person_profile"] = False
-        if hasattr(self._ph_client, "capture"):
-            self._ph_client.capture(
-                distinct_id=distinct_id,
-                event="$ai_generation",
-                properties=event_properties,
-                groups=groups,
-            )
+        from posthoganalytics.ai.types import StreamingEventData
+        from posthoganalytics.ai.gemini.gemini_converter import standardize_gemini_usage
+        # Prepare standardized event data
+        formatted_input = self._format_input(contents)
+        sanitized_input = sanitize_gemini(formatted_input)
+        event_data = StreamingEventData(
+            provider="gemini",
+            model=model,
+            base_url=self._base_url,
+            kwargs=kwargs,
+            formatted_input=sanitized_input,
+            formatted_output=format_gemini_streaming_output(output),
+            usage_stats=standardize_gemini_usage(usage_stats),
+            latency=latency,
+            distinct_id=distinct_id,
+            trace_id=trace_id,
+            properties=properties,
+            privacy_mode=privacy_mode,
+            groups=groups,
+        )
+        # Use the common capture function
+        capture_streaming_event(self._ph_client, event_data)
     def _format_input(self, contents):
         """Format input contents for PostHog tracking"""
-        if isinstance(contents, str):
-            return [{"role": "user", "content": contents}]
-        elif isinstance(contents, list):
-            formatted = []
-            for item in contents:
-                if isinstance(item, str):
-                    formatted.append({"role": "user", "content": item})
-                elif hasattr(item, "text"):
-                    formatted.append({"role": "user", "content": item.text})
-                else:
-                    formatted.append({"role": "user", "content": str(item)})
-            return formatted
-        else:
-            return [{"role": "user", "content": str(contents)}]
+        return format_gemini_input(contents)
     def generate_content_stream(
         self,

posthoganalytics 6.7.0__py3-none-any.whl → 6.7.2__py3-none-any.whl

posthoganalytics 6.7.0py3-none-any.whl → 6.7.2py3-none-any.whl