PyPI - posthog - Versions diffs - 6.7.0__py3-none-any.whl → 6.7.2__py3-none-any.whl - Mend

posthog 6.7.0py3-none-any.whl → 6.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

posthog/ai/anthropic/__init__.py +10 -0
posthog/ai/anthropic/anthropic.py +94 -63
posthog/ai/anthropic/anthropic_async.py +88 -21
posthog/ai/anthropic/anthropic_converter.py +393 -0
posthog/ai/gemini/__init__.py +12 -1
posthog/ai/gemini/gemini.py +61 -67
posthog/ai/gemini/gemini_converter.py +438 -0
posthog/ai/langchain/callbacks.py +3 -2
posthog/ai/openai/__init__.py +16 -1
posthog/ai/openai/openai.py +114 -148
posthog/ai/openai/openai_async.py +72 -73
posthog/ai/openai/openai_converter.py +585 -0
posthog/ai/sanitization.py +226 -0
posthog/ai/types.py +142 -0
posthog/ai/utils.py +232 -255
posthog/client.py +7 -7
posthog/test/test_feature_flags.py +2 -2
posthog/version.py +1 -1
{posthog-6.7.0.dist-info → posthog-6.7.2.dist-info}/METADATA +1 -1
{posthog-6.7.0.dist-info → posthog-6.7.2.dist-info}/RECORD +23 -18
{posthog-6.7.0.dist-info → posthog-6.7.2.dist-info}/WHEEL +0 -0
{posthog-6.7.0.dist-info → posthog-6.7.2.dist-info}/licenses/LICENSE +0 -0
{posthog-6.7.0.dist-info → posthog-6.7.2.dist-info}/top_level.txt +0 -0

posthog/ai/sanitization.py ADDED Viewed

@@ -0,0 +1,226 @@
+import re
+from typing import Any
+from urllib.parse import urlparse
+REDACTED_IMAGE_PLACEHOLDER = "[base64 image redacted]"
+def is_base64_data_url(text: str) -> bool:
+    return re.match(r"^data:([^;]+);base64,", text) is not None
+def is_valid_url(text: str) -> bool:
+    try:
+        result = urlparse(text)
+        return bool(result.scheme and result.netloc)
+    except Exception:
+        pass
+    return text.startswith(("/", "./", "../"))
+def is_raw_base64(text: str) -> bool:
+    if is_valid_url(text):
+        return False
+    return len(text) > 20 and re.match(r"^[A-Za-z0-9+/]+=*$", text) is not None
+def redact_base64_data_url(value: Any) -> Any:
+    if not isinstance(value, str):
+        return value
+    if is_base64_data_url(value):
+        return REDACTED_IMAGE_PLACEHOLDER
+    if is_raw_base64(value):
+        return REDACTED_IMAGE_PLACEHOLDER
+    return value
+def process_messages(messages: Any, transform_content_func) -> Any:
+    if not messages:
+        return messages
+    def process_content(content: Any) -> Any:
+        if isinstance(content, str):
+            return content
+        if not content:
+            return content
+        if isinstance(content, list):
+            return [transform_content_func(item) for item in content]
+        return transform_content_func(content)
+    def process_message(msg: Any) -> Any:
+        if not isinstance(msg, dict) or "content" not in msg:
+            return msg
+        return {**msg, "content": process_content(msg["content"])}
+    if isinstance(messages, list):
+        return [process_message(msg) for msg in messages]
+    return process_message(messages)
+def sanitize_openai_image(item: Any) -> Any:
+    if not isinstance(item, dict):
+        return item
+    if (
+        item.get("type") == "image_url"
+        and isinstance(item.get("image_url"), dict)
+        and "url" in item["image_url"]
+    ):
+        return {
+            **item,
+            "image_url": {
+                **item["image_url"],
+                "url": redact_base64_data_url(item["image_url"]["url"]),
+            },
+        }
+    return item
+def sanitize_openai_response_image(item: Any) -> Any:
+    if not isinstance(item, dict):
+        return item
+    if item.get("type") == "input_image" and "image_url" in item:
+        return {
+            **item,
+            "image_url": redact_base64_data_url(item["image_url"]),
+        }
+    return item
+def sanitize_anthropic_image(item: Any) -> Any:
+    if not isinstance(item, dict):
+        return item
+    if (
+        item.get("type") == "image"
+        and isinstance(item.get("source"), dict)
+        and item["source"].get("type") == "base64"
+        and "data" in item["source"]
+    ):
+        # For Anthropic, if the source type is "base64", we should always redact the data
+        # The provider is explicitly telling us this is base64 data
+        return {
+            **item,
+            "source": {
+                **item["source"],
+                "data": REDACTED_IMAGE_PLACEHOLDER,
+            },
+        }
+    return item
+def sanitize_gemini_part(part: Any) -> Any:
+    if not isinstance(part, dict):
+        return part
+    if (
+        "inline_data" in part
+        and isinstance(part["inline_data"], dict)
+        and "data" in part["inline_data"]
+    ):
+        # For Gemini, the inline_data structure indicates base64 data
+        # We should redact any string data in this context
+        return {
+            **part,
+            "inline_data": {
+                **part["inline_data"],
+                "data": REDACTED_IMAGE_PLACEHOLDER,
+            },
+        }
+    return part
+def process_gemini_item(item: Any) -> Any:
+    if not isinstance(item, dict):
+        return item
+    if "parts" in item and item["parts"]:
+        parts = item["parts"]
+        if isinstance(parts, list):
+            parts = [sanitize_gemini_part(part) for part in parts]
+        else:
+            parts = sanitize_gemini_part(parts)
+        return {**item, "parts": parts}
+    return item
+def sanitize_langchain_image(item: Any) -> Any:
+    if not isinstance(item, dict):
+        return item
+    if (
+        item.get("type") == "image_url"
+        and isinstance(item.get("image_url"), dict)
+        and "url" in item["image_url"]
+    ):
+        return {
+            **item,
+            "image_url": {
+                **item["image_url"],
+                "url": redact_base64_data_url(item["image_url"]["url"]),
+            },
+        }
+    if item.get("type") == "image" and "data" in item:
+        return {**item, "data": redact_base64_data_url(item["data"])}
+    if (
+        item.get("type") == "image"
+        and isinstance(item.get("source"), dict)
+        and "data" in item["source"]
+    ):
+        # Anthropic style - raw base64 in structured format, always redact
+        return {
+            **item,
+            "source": {
+                **item["source"],
+                "data": REDACTED_IMAGE_PLACEHOLDER,
+            },
+        }
+    if item.get("type") == "media" and "data" in item:
+        return {**item, "data": redact_base64_data_url(item["data"])}
+    return item
+def sanitize_openai(data: Any) -> Any:
+    return process_messages(data, sanitize_openai_image)
+def sanitize_openai_response(data: Any) -> Any:
+    return process_messages(data, sanitize_openai_response_image)
+def sanitize_anthropic(data: Any) -> Any:
+    return process_messages(data, sanitize_anthropic_image)
+def sanitize_gemini(data: Any) -> Any:
+    if not data:
+        return data
+    if isinstance(data, list):
+        return [process_gemini_item(item) for item in data]
+    return process_gemini_item(data)
+def sanitize_langchain(data: Any) -> Any:
+    return process_messages(data, sanitize_langchain_image)

posthog/ai/types.py ADDED Viewed

@@ -0,0 +1,142 @@
+"""
+Common type definitions for PostHog AI SDK.
+These types are used for formatting messages and responses across different AI providers
+(Anthropic, OpenAI, Gemini, etc.) to ensure consistency in tracking and data structure.
+"""
+from typing import Any, Dict, List, Optional, TypedDict, Union
+class FormattedTextContent(TypedDict):
+    """Formatted text content item."""
+    type: str  # Literal["text"]
+    text: str
+class FormattedFunctionCall(TypedDict, total=False):
+    """Formatted function/tool call content item."""
+    type: str  # Literal["function"]
+    id: Optional[str]
+    function: Dict[str, Any]  # Contains 'name' and 'arguments'
+class FormattedImageContent(TypedDict):
+    """Formatted image content item."""
+    type: str  # Literal["image"]
+    image: str
+# Union type for all formatted content items
+FormattedContentItem = Union[
+    FormattedTextContent,
+    FormattedFunctionCall,
+    FormattedImageContent,
+    Dict[str, Any],  # Fallback for unknown content types
+]
+class FormattedMessage(TypedDict):
+    """
+    Standardized message format for PostHog tracking.
+    Used across all providers to ensure consistent message structure
+    when sending events to PostHog.
+    """
+    role: str
+    content: Union[str, List[FormattedContentItem], Any]
+class TokenUsage(TypedDict, total=False):
+    """
+    Token usage information for AI model responses.
+    Different providers may populate different fields.
+    """
+    input_tokens: int
+    output_tokens: int
+    cache_read_input_tokens: Optional[int]
+    cache_creation_input_tokens: Optional[int]
+    reasoning_tokens: Optional[int]
+class ProviderResponse(TypedDict, total=False):
+    """
+    Standardized provider response format.
+    Used for consistent response formatting across all providers.
+    """
+    messages: List[FormattedMessage]
+    usage: TokenUsage
+    error: Optional[str]
+class StreamingUsageStats(TypedDict, total=False):
+    """
+    Usage statistics collected during streaming.
+    Different providers populate different fields during streaming.
+    """
+    input_tokens: int
+    output_tokens: int
+    cache_read_input_tokens: Optional[int]
+    cache_creation_input_tokens: Optional[int]
+    reasoning_tokens: Optional[int]
+    # OpenAI-specific names
+    prompt_tokens: Optional[int]
+    completion_tokens: Optional[int]
+    total_tokens: Optional[int]
+class StreamingContentBlock(TypedDict, total=False):
+    """
+    Content block used during streaming to accumulate content.
+    Used for tracking text and function calls as they stream in.
+    """
+    type: str  # "text" or "function"
+    text: Optional[str]
+    id: Optional[str]
+    function: Optional[Dict[str, Any]]
+class ToolInProgress(TypedDict):
+    """
+    Tracks a tool/function call being accumulated during streaming.
+    Used by Anthropic to accumulate JSON input for tools.
+    """
+    block: StreamingContentBlock
+    input_string: str
+class StreamingEventData(TypedDict):
+    """
+    Standardized data for streaming events across all providers.
+    This type ensures consistent data structure when capturing streaming events,
+    with all provider-specific formatting already completed.
+    """
+    provider: str  # "openai", "anthropic", "gemini"
+    model: str
+    base_url: str
+    kwargs: Dict[str, Any]  # Original kwargs for tool extraction and special handling
+    formatted_input: Any  # Provider-formatted input ready for tracking
+    formatted_output: Any  # Provider-formatted output ready for tracking
+    usage_stats: TokenUsage  # Standardized token counts
+    latency: float
+    distinct_id: Optional[str]
+    trace_id: Optional[str]
+    properties: Optional[Dict[str, Any]]
+    privacy_mode: bool
+    groups: Optional[Dict[str, Any]]

posthog 6.7.0__py3-none-any.whl → 6.7.2__py3-none-any.whl

posthog 6.7.0py3-none-any.whl → 6.7.2py3-none-any.whl