posthoganalytics 6.7.0__py3-none-any.whl → 7.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- posthoganalytics/__init__.py +84 -7
- posthoganalytics/ai/anthropic/__init__.py +10 -0
- posthoganalytics/ai/anthropic/anthropic.py +95 -65
- posthoganalytics/ai/anthropic/anthropic_async.py +95 -65
- posthoganalytics/ai/anthropic/anthropic_converter.py +443 -0
- posthoganalytics/ai/gemini/__init__.py +15 -1
- posthoganalytics/ai/gemini/gemini.py +66 -71
- posthoganalytics/ai/gemini/gemini_async.py +423 -0
- posthoganalytics/ai/gemini/gemini_converter.py +652 -0
- posthoganalytics/ai/langchain/callbacks.py +58 -13
- posthoganalytics/ai/openai/__init__.py +16 -1
- posthoganalytics/ai/openai/openai.py +140 -149
- posthoganalytics/ai/openai/openai_async.py +127 -82
- posthoganalytics/ai/openai/openai_converter.py +741 -0
- posthoganalytics/ai/sanitization.py +248 -0
- posthoganalytics/ai/types.py +125 -0
- posthoganalytics/ai/utils.py +339 -356
- posthoganalytics/client.py +345 -97
- posthoganalytics/contexts.py +81 -0
- posthoganalytics/exception_utils.py +250 -2
- posthoganalytics/feature_flags.py +26 -10
- posthoganalytics/flag_definition_cache.py +127 -0
- posthoganalytics/integrations/django.py +157 -19
- posthoganalytics/request.py +203 -23
- posthoganalytics/test/test_client.py +250 -22
- posthoganalytics/test/test_exception_capture.py +418 -0
- posthoganalytics/test/test_feature_flag_result.py +441 -2
- posthoganalytics/test/test_feature_flags.py +308 -104
- posthoganalytics/test/test_flag_definition_cache.py +612 -0
- posthoganalytics/test/test_module.py +0 -8
- posthoganalytics/test/test_request.py +536 -0
- posthoganalytics/test/test_utils.py +4 -1
- posthoganalytics/types.py +40 -0
- posthoganalytics/version.py +1 -1
- {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/METADATA +12 -12
- posthoganalytics-7.4.3.dist-info/RECORD +57 -0
- posthoganalytics-6.7.0.dist-info/RECORD +0 -49
- {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/WHEEL +0 -0
- {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/licenses/LICENSE +0 -0
- {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
from typing import Any
|
|
4
|
+
from urllib.parse import urlparse
|
|
5
|
+
|
|
6
|
+
REDACTED_IMAGE_PLACEHOLDER = "[base64 image redacted]"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _is_multimodal_enabled() -> bool:
|
|
10
|
+
"""Check if multimodal capture is enabled via environment variable."""
|
|
11
|
+
return os.environ.get("_INTERNAL_LLMA_MULTIMODAL", "").lower() in (
|
|
12
|
+
"true",
|
|
13
|
+
"1",
|
|
14
|
+
"yes",
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def is_base64_data_url(text: str) -> bool:
|
|
19
|
+
return re.match(r"^data:([^;]+);base64,", text) is not None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def is_valid_url(text: str) -> bool:
|
|
23
|
+
try:
|
|
24
|
+
result = urlparse(text)
|
|
25
|
+
return bool(result.scheme and result.netloc)
|
|
26
|
+
except Exception:
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
return text.startswith(("/", "./", "../"))
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def is_raw_base64(text: str) -> bool:
|
|
33
|
+
if is_valid_url(text):
|
|
34
|
+
return False
|
|
35
|
+
|
|
36
|
+
return len(text) > 20 and re.match(r"^[A-Za-z0-9+/]+=*$", text) is not None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def redact_base64_data_url(value: Any) -> Any:
|
|
40
|
+
if _is_multimodal_enabled():
|
|
41
|
+
return value
|
|
42
|
+
|
|
43
|
+
if not isinstance(value, str):
|
|
44
|
+
return value
|
|
45
|
+
|
|
46
|
+
if is_base64_data_url(value):
|
|
47
|
+
return REDACTED_IMAGE_PLACEHOLDER
|
|
48
|
+
|
|
49
|
+
if is_raw_base64(value):
|
|
50
|
+
return REDACTED_IMAGE_PLACEHOLDER
|
|
51
|
+
|
|
52
|
+
return value
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def process_messages(messages: Any, transform_content_func) -> Any:
|
|
56
|
+
if not messages:
|
|
57
|
+
return messages
|
|
58
|
+
|
|
59
|
+
def process_content(content: Any) -> Any:
|
|
60
|
+
if isinstance(content, str):
|
|
61
|
+
return content
|
|
62
|
+
|
|
63
|
+
if not content:
|
|
64
|
+
return content
|
|
65
|
+
|
|
66
|
+
if isinstance(content, list):
|
|
67
|
+
return [transform_content_func(item) for item in content]
|
|
68
|
+
|
|
69
|
+
return transform_content_func(content)
|
|
70
|
+
|
|
71
|
+
def process_message(msg: Any) -> Any:
|
|
72
|
+
if not isinstance(msg, dict) or "content" not in msg:
|
|
73
|
+
return msg
|
|
74
|
+
return {**msg, "content": process_content(msg["content"])}
|
|
75
|
+
|
|
76
|
+
if isinstance(messages, list):
|
|
77
|
+
return [process_message(msg) for msg in messages]
|
|
78
|
+
|
|
79
|
+
return process_message(messages)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def sanitize_openai_image(item: Any) -> Any:
|
|
83
|
+
if not isinstance(item, dict):
|
|
84
|
+
return item
|
|
85
|
+
|
|
86
|
+
if (
|
|
87
|
+
item.get("type") == "image_url"
|
|
88
|
+
and isinstance(item.get("image_url"), dict)
|
|
89
|
+
and "url" in item["image_url"]
|
|
90
|
+
):
|
|
91
|
+
return {
|
|
92
|
+
**item,
|
|
93
|
+
"image_url": {
|
|
94
|
+
**item["image_url"],
|
|
95
|
+
"url": redact_base64_data_url(item["image_url"]["url"]),
|
|
96
|
+
},
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if item.get("type") == "audio" and "data" in item:
|
|
100
|
+
if _is_multimodal_enabled():
|
|
101
|
+
return item
|
|
102
|
+
return {**item, "data": REDACTED_IMAGE_PLACEHOLDER}
|
|
103
|
+
|
|
104
|
+
return item
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def sanitize_openai_response_image(item: Any) -> Any:
|
|
108
|
+
if not isinstance(item, dict):
|
|
109
|
+
return item
|
|
110
|
+
|
|
111
|
+
if item.get("type") == "input_image" and "image_url" in item:
|
|
112
|
+
return {
|
|
113
|
+
**item,
|
|
114
|
+
"image_url": redact_base64_data_url(item["image_url"]),
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return item
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def sanitize_anthropic_image(item: Any) -> Any:
|
|
121
|
+
if _is_multimodal_enabled():
|
|
122
|
+
return item
|
|
123
|
+
|
|
124
|
+
if not isinstance(item, dict):
|
|
125
|
+
return item
|
|
126
|
+
|
|
127
|
+
if (
|
|
128
|
+
item.get("type") == "image"
|
|
129
|
+
and isinstance(item.get("source"), dict)
|
|
130
|
+
and item["source"].get("type") == "base64"
|
|
131
|
+
and "data" in item["source"]
|
|
132
|
+
):
|
|
133
|
+
return {
|
|
134
|
+
**item,
|
|
135
|
+
"source": {
|
|
136
|
+
**item["source"],
|
|
137
|
+
"data": REDACTED_IMAGE_PLACEHOLDER,
|
|
138
|
+
},
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
return item
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def sanitize_gemini_part(part: Any) -> Any:
|
|
145
|
+
if _is_multimodal_enabled():
|
|
146
|
+
return part
|
|
147
|
+
|
|
148
|
+
if not isinstance(part, dict):
|
|
149
|
+
return part
|
|
150
|
+
|
|
151
|
+
if (
|
|
152
|
+
"inline_data" in part
|
|
153
|
+
and isinstance(part["inline_data"], dict)
|
|
154
|
+
and "data" in part["inline_data"]
|
|
155
|
+
):
|
|
156
|
+
return {
|
|
157
|
+
**part,
|
|
158
|
+
"inline_data": {
|
|
159
|
+
**part["inline_data"],
|
|
160
|
+
"data": REDACTED_IMAGE_PLACEHOLDER,
|
|
161
|
+
},
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
return part
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def process_gemini_item(item: Any) -> Any:
|
|
168
|
+
if not isinstance(item, dict):
|
|
169
|
+
return item
|
|
170
|
+
|
|
171
|
+
if "parts" in item and item["parts"]:
|
|
172
|
+
parts = item["parts"]
|
|
173
|
+
if isinstance(parts, list):
|
|
174
|
+
parts = [sanitize_gemini_part(part) for part in parts]
|
|
175
|
+
else:
|
|
176
|
+
parts = sanitize_gemini_part(parts)
|
|
177
|
+
|
|
178
|
+
return {**item, "parts": parts}
|
|
179
|
+
|
|
180
|
+
return item
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def sanitize_langchain_image(item: Any) -> Any:
|
|
184
|
+
if not isinstance(item, dict):
|
|
185
|
+
return item
|
|
186
|
+
|
|
187
|
+
if (
|
|
188
|
+
item.get("type") == "image_url"
|
|
189
|
+
and isinstance(item.get("image_url"), dict)
|
|
190
|
+
and "url" in item["image_url"]
|
|
191
|
+
):
|
|
192
|
+
return {
|
|
193
|
+
**item,
|
|
194
|
+
"image_url": {
|
|
195
|
+
**item["image_url"],
|
|
196
|
+
"url": redact_base64_data_url(item["image_url"]["url"]),
|
|
197
|
+
},
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
if item.get("type") == "image" and "data" in item:
|
|
201
|
+
return {**item, "data": redact_base64_data_url(item["data"])}
|
|
202
|
+
|
|
203
|
+
if (
|
|
204
|
+
item.get("type") == "image"
|
|
205
|
+
and isinstance(item.get("source"), dict)
|
|
206
|
+
and "data" in item["source"]
|
|
207
|
+
):
|
|
208
|
+
if _is_multimodal_enabled():
|
|
209
|
+
return item
|
|
210
|
+
|
|
211
|
+
return {
|
|
212
|
+
**item,
|
|
213
|
+
"source": {
|
|
214
|
+
**item["source"],
|
|
215
|
+
"data": REDACTED_IMAGE_PLACEHOLDER,
|
|
216
|
+
},
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
if item.get("type") == "media" and "data" in item:
|
|
220
|
+
return {**item, "data": redact_base64_data_url(item["data"])}
|
|
221
|
+
|
|
222
|
+
return item
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def sanitize_openai(data: Any) -> Any:
|
|
226
|
+
return process_messages(data, sanitize_openai_image)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def sanitize_openai_response(data: Any) -> Any:
|
|
230
|
+
return process_messages(data, sanitize_openai_response_image)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def sanitize_anthropic(data: Any) -> Any:
|
|
234
|
+
return process_messages(data, sanitize_anthropic_image)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def sanitize_gemini(data: Any) -> Any:
|
|
238
|
+
if not data:
|
|
239
|
+
return data
|
|
240
|
+
|
|
241
|
+
if isinstance(data, list):
|
|
242
|
+
return [process_gemini_item(item) for item in data]
|
|
243
|
+
|
|
244
|
+
return process_gemini_item(data)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def sanitize_langchain(data: Any) -> Any:
|
|
248
|
+
return process_messages(data, sanitize_langchain_image)
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Common type definitions for PostHog AI SDK.
|
|
3
|
+
|
|
4
|
+
These types are used for formatting messages and responses across different AI providers
|
|
5
|
+
(Anthropic, OpenAI, Gemini, etc.) to ensure consistency in tracking and data structure.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Any, Dict, List, Optional, TypedDict, Union
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FormattedTextContent(TypedDict):
|
|
12
|
+
"""Formatted text content item."""
|
|
13
|
+
|
|
14
|
+
type: str # Literal["text"]
|
|
15
|
+
text: str
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class FormattedFunctionCall(TypedDict, total=False):
|
|
19
|
+
"""Formatted function/tool call content item."""
|
|
20
|
+
|
|
21
|
+
type: str # Literal["function"]
|
|
22
|
+
id: Optional[str]
|
|
23
|
+
function: Dict[str, Any] # Contains 'name' and 'arguments'
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class FormattedImageContent(TypedDict):
|
|
27
|
+
"""Formatted image content item."""
|
|
28
|
+
|
|
29
|
+
type: str # Literal["image"]
|
|
30
|
+
image: str
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Union type for all formatted content items
|
|
34
|
+
FormattedContentItem = Union[
|
|
35
|
+
FormattedTextContent,
|
|
36
|
+
FormattedFunctionCall,
|
|
37
|
+
FormattedImageContent,
|
|
38
|
+
Dict[str, Any], # Fallback for unknown content types
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class FormattedMessage(TypedDict):
|
|
43
|
+
"""
|
|
44
|
+
Standardized message format for PostHog tracking.
|
|
45
|
+
|
|
46
|
+
Used across all providers to ensure consistent message structure
|
|
47
|
+
when sending events to PostHog.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
role: str
|
|
51
|
+
content: Union[str, List[FormattedContentItem], Any]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class TokenUsage(TypedDict, total=False):
|
|
55
|
+
"""
|
|
56
|
+
Token usage information for AI model responses.
|
|
57
|
+
|
|
58
|
+
Different providers may populate different fields.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
input_tokens: int
|
|
62
|
+
output_tokens: int
|
|
63
|
+
cache_read_input_tokens: Optional[int]
|
|
64
|
+
cache_creation_input_tokens: Optional[int]
|
|
65
|
+
reasoning_tokens: Optional[int]
|
|
66
|
+
web_search_count: Optional[int]
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class ProviderResponse(TypedDict, total=False):
|
|
70
|
+
"""
|
|
71
|
+
Standardized provider response format.
|
|
72
|
+
|
|
73
|
+
Used for consistent response formatting across all providers.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
messages: List[FormattedMessage]
|
|
77
|
+
usage: TokenUsage
|
|
78
|
+
error: Optional[str]
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class StreamingContentBlock(TypedDict, total=False):
|
|
82
|
+
"""
|
|
83
|
+
Content block used during streaming to accumulate content.
|
|
84
|
+
|
|
85
|
+
Used for tracking text and function calls as they stream in.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
type: str # "text" or "function"
|
|
89
|
+
text: Optional[str]
|
|
90
|
+
id: Optional[str]
|
|
91
|
+
function: Optional[Dict[str, Any]]
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class ToolInProgress(TypedDict):
|
|
95
|
+
"""
|
|
96
|
+
Tracks a tool/function call being accumulated during streaming.
|
|
97
|
+
|
|
98
|
+
Used by Anthropic to accumulate JSON input for tools.
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
block: StreamingContentBlock
|
|
102
|
+
input_string: str
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class StreamingEventData(TypedDict):
|
|
106
|
+
"""
|
|
107
|
+
Standardized data for streaming events across all providers.
|
|
108
|
+
|
|
109
|
+
This type ensures consistent data structure when capturing streaming events,
|
|
110
|
+
with all provider-specific formatting already completed.
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
provider: str # "openai", "anthropic", "gemini"
|
|
114
|
+
model: str
|
|
115
|
+
base_url: str
|
|
116
|
+
kwargs: Dict[str, Any] # Original kwargs for tool extraction and special handling
|
|
117
|
+
formatted_input: Any # Provider-formatted input ready for tracking
|
|
118
|
+
formatted_output: Any # Provider-formatted output ready for tracking
|
|
119
|
+
usage_stats: TokenUsage
|
|
120
|
+
latency: float
|
|
121
|
+
distinct_id: Optional[str]
|
|
122
|
+
trace_id: Optional[str]
|
|
123
|
+
properties: Optional[Dict[str, Any]]
|
|
124
|
+
privacy_mode: bool
|
|
125
|
+
groups: Optional[Dict[str, Any]]
|