posthoganalytics 6.7.0__py3-none-any.whl → 7.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. posthoganalytics/__init__.py +84 -7
  2. posthoganalytics/ai/anthropic/__init__.py +10 -0
  3. posthoganalytics/ai/anthropic/anthropic.py +95 -65
  4. posthoganalytics/ai/anthropic/anthropic_async.py +95 -65
  5. posthoganalytics/ai/anthropic/anthropic_converter.py +443 -0
  6. posthoganalytics/ai/gemini/__init__.py +15 -1
  7. posthoganalytics/ai/gemini/gemini.py +66 -71
  8. posthoganalytics/ai/gemini/gemini_async.py +423 -0
  9. posthoganalytics/ai/gemini/gemini_converter.py +652 -0
  10. posthoganalytics/ai/langchain/callbacks.py +58 -13
  11. posthoganalytics/ai/openai/__init__.py +16 -1
  12. posthoganalytics/ai/openai/openai.py +140 -149
  13. posthoganalytics/ai/openai/openai_async.py +127 -82
  14. posthoganalytics/ai/openai/openai_converter.py +741 -0
  15. posthoganalytics/ai/sanitization.py +248 -0
  16. posthoganalytics/ai/types.py +125 -0
  17. posthoganalytics/ai/utils.py +339 -356
  18. posthoganalytics/client.py +345 -97
  19. posthoganalytics/contexts.py +81 -0
  20. posthoganalytics/exception_utils.py +250 -2
  21. posthoganalytics/feature_flags.py +26 -10
  22. posthoganalytics/flag_definition_cache.py +127 -0
  23. posthoganalytics/integrations/django.py +157 -19
  24. posthoganalytics/request.py +203 -23
  25. posthoganalytics/test/test_client.py +250 -22
  26. posthoganalytics/test/test_exception_capture.py +418 -0
  27. posthoganalytics/test/test_feature_flag_result.py +441 -2
  28. posthoganalytics/test/test_feature_flags.py +308 -104
  29. posthoganalytics/test/test_flag_definition_cache.py +612 -0
  30. posthoganalytics/test/test_module.py +0 -8
  31. posthoganalytics/test/test_request.py +536 -0
  32. posthoganalytics/test/test_utils.py +4 -1
  33. posthoganalytics/types.py +40 -0
  34. posthoganalytics/version.py +1 -1
  35. {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/METADATA +12 -12
  36. posthoganalytics-7.4.3.dist-info/RECORD +57 -0
  37. posthoganalytics-6.7.0.dist-info/RECORD +0 -49
  38. {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/WHEEL +0 -0
  39. {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/licenses/LICENSE +0 -0
  40. {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,248 @@
1
+ import os
2
+ import re
3
+ from typing import Any
4
+ from urllib.parse import urlparse
5
+
6
+ REDACTED_IMAGE_PLACEHOLDER = "[base64 image redacted]"
7
+
8
+
9
+ def _is_multimodal_enabled() -> bool:
10
+ """Check if multimodal capture is enabled via environment variable."""
11
+ return os.environ.get("_INTERNAL_LLMA_MULTIMODAL", "").lower() in (
12
+ "true",
13
+ "1",
14
+ "yes",
15
+ )
16
+
17
+
18
+ def is_base64_data_url(text: str) -> bool:
19
+ return re.match(r"^data:([^;]+);base64,", text) is not None
20
+
21
+
22
+ def is_valid_url(text: str) -> bool:
23
+ try:
24
+ result = urlparse(text)
25
+ return bool(result.scheme and result.netloc)
26
+ except Exception:
27
+ pass
28
+
29
+ return text.startswith(("/", "./", "../"))
30
+
31
+
32
+ def is_raw_base64(text: str) -> bool:
33
+ if is_valid_url(text):
34
+ return False
35
+
36
+ return len(text) > 20 and re.match(r"^[A-Za-z0-9+/]+=*$", text) is not None
37
+
38
+
39
+ def redact_base64_data_url(value: Any) -> Any:
40
+ if _is_multimodal_enabled():
41
+ return value
42
+
43
+ if not isinstance(value, str):
44
+ return value
45
+
46
+ if is_base64_data_url(value):
47
+ return REDACTED_IMAGE_PLACEHOLDER
48
+
49
+ if is_raw_base64(value):
50
+ return REDACTED_IMAGE_PLACEHOLDER
51
+
52
+ return value
53
+
54
+
55
+ def process_messages(messages: Any, transform_content_func) -> Any:
56
+ if not messages:
57
+ return messages
58
+
59
+ def process_content(content: Any) -> Any:
60
+ if isinstance(content, str):
61
+ return content
62
+
63
+ if not content:
64
+ return content
65
+
66
+ if isinstance(content, list):
67
+ return [transform_content_func(item) for item in content]
68
+
69
+ return transform_content_func(content)
70
+
71
+ def process_message(msg: Any) -> Any:
72
+ if not isinstance(msg, dict) or "content" not in msg:
73
+ return msg
74
+ return {**msg, "content": process_content(msg["content"])}
75
+
76
+ if isinstance(messages, list):
77
+ return [process_message(msg) for msg in messages]
78
+
79
+ return process_message(messages)
80
+
81
+
82
+ def sanitize_openai_image(item: Any) -> Any:
83
+ if not isinstance(item, dict):
84
+ return item
85
+
86
+ if (
87
+ item.get("type") == "image_url"
88
+ and isinstance(item.get("image_url"), dict)
89
+ and "url" in item["image_url"]
90
+ ):
91
+ return {
92
+ **item,
93
+ "image_url": {
94
+ **item["image_url"],
95
+ "url": redact_base64_data_url(item["image_url"]["url"]),
96
+ },
97
+ }
98
+
99
+ if item.get("type") == "audio" and "data" in item:
100
+ if _is_multimodal_enabled():
101
+ return item
102
+ return {**item, "data": REDACTED_IMAGE_PLACEHOLDER}
103
+
104
+ return item
105
+
106
+
107
+ def sanitize_openai_response_image(item: Any) -> Any:
108
+ if not isinstance(item, dict):
109
+ return item
110
+
111
+ if item.get("type") == "input_image" and "image_url" in item:
112
+ return {
113
+ **item,
114
+ "image_url": redact_base64_data_url(item["image_url"]),
115
+ }
116
+
117
+ return item
118
+
119
+
120
+ def sanitize_anthropic_image(item: Any) -> Any:
121
+ if _is_multimodal_enabled():
122
+ return item
123
+
124
+ if not isinstance(item, dict):
125
+ return item
126
+
127
+ if (
128
+ item.get("type") == "image"
129
+ and isinstance(item.get("source"), dict)
130
+ and item["source"].get("type") == "base64"
131
+ and "data" in item["source"]
132
+ ):
133
+ return {
134
+ **item,
135
+ "source": {
136
+ **item["source"],
137
+ "data": REDACTED_IMAGE_PLACEHOLDER,
138
+ },
139
+ }
140
+
141
+ return item
142
+
143
+
144
+ def sanitize_gemini_part(part: Any) -> Any:
145
+ if _is_multimodal_enabled():
146
+ return part
147
+
148
+ if not isinstance(part, dict):
149
+ return part
150
+
151
+ if (
152
+ "inline_data" in part
153
+ and isinstance(part["inline_data"], dict)
154
+ and "data" in part["inline_data"]
155
+ ):
156
+ return {
157
+ **part,
158
+ "inline_data": {
159
+ **part["inline_data"],
160
+ "data": REDACTED_IMAGE_PLACEHOLDER,
161
+ },
162
+ }
163
+
164
+ return part
165
+
166
+
167
+ def process_gemini_item(item: Any) -> Any:
168
+ if not isinstance(item, dict):
169
+ return item
170
+
171
+ if "parts" in item and item["parts"]:
172
+ parts = item["parts"]
173
+ if isinstance(parts, list):
174
+ parts = [sanitize_gemini_part(part) for part in parts]
175
+ else:
176
+ parts = sanitize_gemini_part(parts)
177
+
178
+ return {**item, "parts": parts}
179
+
180
+ return item
181
+
182
+
183
+ def sanitize_langchain_image(item: Any) -> Any:
184
+ if not isinstance(item, dict):
185
+ return item
186
+
187
+ if (
188
+ item.get("type") == "image_url"
189
+ and isinstance(item.get("image_url"), dict)
190
+ and "url" in item["image_url"]
191
+ ):
192
+ return {
193
+ **item,
194
+ "image_url": {
195
+ **item["image_url"],
196
+ "url": redact_base64_data_url(item["image_url"]["url"]),
197
+ },
198
+ }
199
+
200
+ if item.get("type") == "image" and "data" in item:
201
+ return {**item, "data": redact_base64_data_url(item["data"])}
202
+
203
+ if (
204
+ item.get("type") == "image"
205
+ and isinstance(item.get("source"), dict)
206
+ and "data" in item["source"]
207
+ ):
208
+ if _is_multimodal_enabled():
209
+ return item
210
+
211
+ return {
212
+ **item,
213
+ "source": {
214
+ **item["source"],
215
+ "data": REDACTED_IMAGE_PLACEHOLDER,
216
+ },
217
+ }
218
+
219
+ if item.get("type") == "media" and "data" in item:
220
+ return {**item, "data": redact_base64_data_url(item["data"])}
221
+
222
+ return item
223
+
224
+
225
+ def sanitize_openai(data: Any) -> Any:
226
+ return process_messages(data, sanitize_openai_image)
227
+
228
+
229
+ def sanitize_openai_response(data: Any) -> Any:
230
+ return process_messages(data, sanitize_openai_response_image)
231
+
232
+
233
+ def sanitize_anthropic(data: Any) -> Any:
234
+ return process_messages(data, sanitize_anthropic_image)
235
+
236
+
237
+ def sanitize_gemini(data: Any) -> Any:
238
+ if not data:
239
+ return data
240
+
241
+ if isinstance(data, list):
242
+ return [process_gemini_item(item) for item in data]
243
+
244
+ return process_gemini_item(data)
245
+
246
+
247
+ def sanitize_langchain(data: Any) -> Any:
248
+ return process_messages(data, sanitize_langchain_image)
@@ -0,0 +1,125 @@
1
+ """
2
+ Common type definitions for PostHog AI SDK.
3
+
4
+ These types are used for formatting messages and responses across different AI providers
5
+ (Anthropic, OpenAI, Gemini, etc.) to ensure consistency in tracking and data structure.
6
+ """
7
+
8
+ from typing import Any, Dict, List, Optional, TypedDict, Union
9
+
10
+
11
+ class FormattedTextContent(TypedDict):
12
+ """Formatted text content item."""
13
+
14
+ type: str # Literal["text"]
15
+ text: str
16
+
17
+
18
+ class FormattedFunctionCall(TypedDict, total=False):
19
+ """Formatted function/tool call content item."""
20
+
21
+ type: str # Literal["function"]
22
+ id: Optional[str]
23
+ function: Dict[str, Any] # Contains 'name' and 'arguments'
24
+
25
+
26
+ class FormattedImageContent(TypedDict):
27
+ """Formatted image content item."""
28
+
29
+ type: str # Literal["image"]
30
+ image: str
31
+
32
+
33
+ # Union type for all formatted content items
34
+ FormattedContentItem = Union[
35
+ FormattedTextContent,
36
+ FormattedFunctionCall,
37
+ FormattedImageContent,
38
+ Dict[str, Any], # Fallback for unknown content types
39
+ ]
40
+
41
+
42
+ class FormattedMessage(TypedDict):
43
+ """
44
+ Standardized message format for PostHog tracking.
45
+
46
+ Used across all providers to ensure consistent message structure
47
+ when sending events to PostHog.
48
+ """
49
+
50
+ role: str
51
+ content: Union[str, List[FormattedContentItem], Any]
52
+
53
+
54
+ class TokenUsage(TypedDict, total=False):
55
+ """
56
+ Token usage information for AI model responses.
57
+
58
+ Different providers may populate different fields.
59
+ """
60
+
61
+ input_tokens: int
62
+ output_tokens: int
63
+ cache_read_input_tokens: Optional[int]
64
+ cache_creation_input_tokens: Optional[int]
65
+ reasoning_tokens: Optional[int]
66
+ web_search_count: Optional[int]
67
+
68
+
69
+ class ProviderResponse(TypedDict, total=False):
70
+ """
71
+ Standardized provider response format.
72
+
73
+ Used for consistent response formatting across all providers.
74
+ """
75
+
76
+ messages: List[FormattedMessage]
77
+ usage: TokenUsage
78
+ error: Optional[str]
79
+
80
+
81
+ class StreamingContentBlock(TypedDict, total=False):
82
+ """
83
+ Content block used during streaming to accumulate content.
84
+
85
+ Used for tracking text and function calls as they stream in.
86
+ """
87
+
88
+ type: str # "text" or "function"
89
+ text: Optional[str]
90
+ id: Optional[str]
91
+ function: Optional[Dict[str, Any]]
92
+
93
+
94
+ class ToolInProgress(TypedDict):
95
+ """
96
+ Tracks a tool/function call being accumulated during streaming.
97
+
98
+ Used by Anthropic to accumulate JSON input for tools.
99
+ """
100
+
101
+ block: StreamingContentBlock
102
+ input_string: str
103
+
104
+
105
+ class StreamingEventData(TypedDict):
106
+ """
107
+ Standardized data for streaming events across all providers.
108
+
109
+ This type ensures consistent data structure when capturing streaming events,
110
+ with all provider-specific formatting already completed.
111
+ """
112
+
113
+ provider: str # "openai", "anthropic", "gemini"
114
+ model: str
115
+ base_url: str
116
+ kwargs: Dict[str, Any] # Original kwargs for tool extraction and special handling
117
+ formatted_input: Any # Provider-formatted input ready for tracking
118
+ formatted_output: Any # Provider-formatted output ready for tracking
119
+ usage_stats: TokenUsage
120
+ latency: float
121
+ distinct_id: Optional[str]
122
+ trace_id: Optional[str]
123
+ properties: Optional[Dict[str, Any]]
124
+ privacy_mode: bool
125
+ groups: Optional[Dict[str, Any]]