posthoganalytics 6.7.0__py3-none-any.whl → 7.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. posthoganalytics/__init__.py +84 -7
  2. posthoganalytics/ai/anthropic/__init__.py +10 -0
  3. posthoganalytics/ai/anthropic/anthropic.py +95 -65
  4. posthoganalytics/ai/anthropic/anthropic_async.py +95 -65
  5. posthoganalytics/ai/anthropic/anthropic_converter.py +443 -0
  6. posthoganalytics/ai/gemini/__init__.py +15 -1
  7. posthoganalytics/ai/gemini/gemini.py +66 -71
  8. posthoganalytics/ai/gemini/gemini_async.py +423 -0
  9. posthoganalytics/ai/gemini/gemini_converter.py +652 -0
  10. posthoganalytics/ai/langchain/callbacks.py +58 -13
  11. posthoganalytics/ai/openai/__init__.py +16 -1
  12. posthoganalytics/ai/openai/openai.py +140 -149
  13. posthoganalytics/ai/openai/openai_async.py +127 -82
  14. posthoganalytics/ai/openai/openai_converter.py +741 -0
  15. posthoganalytics/ai/sanitization.py +248 -0
  16. posthoganalytics/ai/types.py +125 -0
  17. posthoganalytics/ai/utils.py +339 -356
  18. posthoganalytics/client.py +345 -97
  19. posthoganalytics/contexts.py +81 -0
  20. posthoganalytics/exception_utils.py +250 -2
  21. posthoganalytics/feature_flags.py +26 -10
  22. posthoganalytics/flag_definition_cache.py +127 -0
  23. posthoganalytics/integrations/django.py +157 -19
  24. posthoganalytics/request.py +203 -23
  25. posthoganalytics/test/test_client.py +250 -22
  26. posthoganalytics/test/test_exception_capture.py +418 -0
  27. posthoganalytics/test/test_feature_flag_result.py +441 -2
  28. posthoganalytics/test/test_feature_flags.py +308 -104
  29. posthoganalytics/test/test_flag_definition_cache.py +612 -0
  30. posthoganalytics/test/test_module.py +0 -8
  31. posthoganalytics/test/test_request.py +536 -0
  32. posthoganalytics/test/test_utils.py +4 -1
  33. posthoganalytics/types.py +40 -0
  34. posthoganalytics/version.py +1 -1
  35. {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/METADATA +12 -12
  36. posthoganalytics-7.4.3.dist-info/RECORD +57 -0
  37. posthoganalytics-6.7.0.dist-info/RECORD +0 -49
  38. {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/WHEEL +0 -0
  39. {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/licenses/LICENSE +0 -0
  40. {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/top_level.txt +0 -0
@@ -1,10 +1,83 @@
1
1
  import time
2
2
  import uuid
3
- from typing import Any, Callable, Dict, List, Optional
4
-
5
- from httpx import URL
3
+ from typing import Any, Callable, Dict, List, Optional, cast
6
4
 
7
5
  from posthoganalytics.client import Client as PostHogClient
6
+ from posthoganalytics.ai.types import FormattedMessage, StreamingEventData, TokenUsage
7
+ from posthoganalytics.ai.sanitization import (
8
+ sanitize_openai,
9
+ sanitize_anthropic,
10
+ sanitize_gemini,
11
+ sanitize_langchain,
12
+ )
13
+
14
+
15
+ def merge_usage_stats(
16
+ target: TokenUsage, source: TokenUsage, mode: str = "incremental"
17
+ ) -> None:
18
+ """
19
+ Merge streaming usage statistics into target dict, handling None values.
20
+
21
+ Supports two modes:
22
+ - "incremental": Add source values to target (for APIs that report new tokens)
23
+ - "cumulative": Replace target with source values (for APIs that report totals)
24
+
25
+ Args:
26
+ target: Dictionary to update with usage stats
27
+ source: TokenUsage that may contain None values
28
+ mode: Either "incremental" or "cumulative"
29
+ """
30
+ if mode == "incremental":
31
+ # Add new values to existing totals
32
+ source_input = source.get("input_tokens")
33
+ if source_input is not None:
34
+ current = target.get("input_tokens") or 0
35
+ target["input_tokens"] = current + source_input
36
+
37
+ source_output = source.get("output_tokens")
38
+ if source_output is not None:
39
+ current = target.get("output_tokens") or 0
40
+ target["output_tokens"] = current + source_output
41
+
42
+ source_cache_read = source.get("cache_read_input_tokens")
43
+ if source_cache_read is not None:
44
+ current = target.get("cache_read_input_tokens") or 0
45
+ target["cache_read_input_tokens"] = current + source_cache_read
46
+
47
+ source_cache_creation = source.get("cache_creation_input_tokens")
48
+ if source_cache_creation is not None:
49
+ current = target.get("cache_creation_input_tokens") or 0
50
+ target["cache_creation_input_tokens"] = current + source_cache_creation
51
+
52
+ source_reasoning = source.get("reasoning_tokens")
53
+ if source_reasoning is not None:
54
+ current = target.get("reasoning_tokens") or 0
55
+ target["reasoning_tokens"] = current + source_reasoning
56
+
57
+ source_web_search = source.get("web_search_count")
58
+ if source_web_search is not None:
59
+ current = target.get("web_search_count") or 0
60
+ target["web_search_count"] = max(current, source_web_search)
61
+
62
+ elif mode == "cumulative":
63
+ # Replace with latest values (already cumulative)
64
+ if source.get("input_tokens") is not None:
65
+ target["input_tokens"] = source["input_tokens"]
66
+ if source.get("output_tokens") is not None:
67
+ target["output_tokens"] = source["output_tokens"]
68
+ if source.get("cache_read_input_tokens") is not None:
69
+ target["cache_read_input_tokens"] = source["cache_read_input_tokens"]
70
+ if source.get("cache_creation_input_tokens") is not None:
71
+ target["cache_creation_input_tokens"] = source[
72
+ "cache_creation_input_tokens"
73
+ ]
74
+ if source.get("reasoning_tokens") is not None:
75
+ target["reasoning_tokens"] = source["reasoning_tokens"]
76
+ if source.get("web_search_count") is not None:
77
+ target["web_search_count"] = source["web_search_count"]
78
+
79
+ else:
80
+ raise ValueError(f"Invalid mode: {mode}. Must be 'incremental' or 'cumulative'")
8
81
 
9
82
 
10
83
  def get_model_params(kwargs: Dict[str, Any]) -> Dict[str, Any]:
@@ -29,349 +102,135 @@ def get_model_params(kwargs: Dict[str, Any]) -> Dict[str, Any]:
29
102
  return model_params
30
103
 
31
104
 
32
- def get_usage(response, provider: str) -> Dict[str, Any]:
105
+ def get_usage(response, provider: str) -> TokenUsage:
106
+ """
107
+ Extract usage statistics from response based on provider.
108
+ Delegates to provider-specific converter functions.
109
+ """
33
110
  if provider == "anthropic":
34
- return {
35
- "input_tokens": response.usage.input_tokens,
36
- "output_tokens": response.usage.output_tokens,
37
- "cache_read_input_tokens": response.usage.cache_read_input_tokens,
38
- "cache_creation_input_tokens": response.usage.cache_creation_input_tokens,
39
- }
111
+ from posthoganalytics.ai.anthropic.anthropic_converter import (
112
+ extract_anthropic_usage_from_response,
113
+ )
114
+
115
+ return extract_anthropic_usage_from_response(response)
40
116
  elif provider == "openai":
41
- cached_tokens = 0
42
- input_tokens = 0
43
- output_tokens = 0
44
- reasoning_tokens = 0
45
-
46
- # responses api
47
- if hasattr(response.usage, "input_tokens"):
48
- input_tokens = response.usage.input_tokens
49
- if hasattr(response.usage, "output_tokens"):
50
- output_tokens = response.usage.output_tokens
51
- if hasattr(response.usage, "input_tokens_details") and hasattr(
52
- response.usage.input_tokens_details, "cached_tokens"
53
- ):
54
- cached_tokens = response.usage.input_tokens_details.cached_tokens
55
- if hasattr(response.usage, "output_tokens_details") and hasattr(
56
- response.usage.output_tokens_details, "reasoning_tokens"
57
- ):
58
- reasoning_tokens = response.usage.output_tokens_details.reasoning_tokens
59
-
60
- # chat completions
61
- if hasattr(response.usage, "prompt_tokens"):
62
- input_tokens = response.usage.prompt_tokens
63
- if hasattr(response.usage, "completion_tokens"):
64
- output_tokens = response.usage.completion_tokens
65
- if hasattr(response.usage, "prompt_tokens_details") and hasattr(
66
- response.usage.prompt_tokens_details, "cached_tokens"
67
- ):
68
- cached_tokens = response.usage.prompt_tokens_details.cached_tokens
117
+ from posthoganalytics.ai.openai.openai_converter import (
118
+ extract_openai_usage_from_response,
119
+ )
69
120
 
70
- return {
71
- "input_tokens": input_tokens,
72
- "output_tokens": output_tokens,
73
- "cache_read_input_tokens": cached_tokens,
74
- "reasoning_tokens": reasoning_tokens,
75
- }
121
+ return extract_openai_usage_from_response(response)
76
122
  elif provider == "gemini":
77
- input_tokens = 0
78
- output_tokens = 0
123
+ from posthoganalytics.ai.gemini.gemini_converter import (
124
+ extract_gemini_usage_from_response,
125
+ )
79
126
 
80
- if hasattr(response, "usage_metadata") and response.usage_metadata:
81
- input_tokens = getattr(response.usage_metadata, "prompt_token_count", 0)
82
- output_tokens = getattr(
83
- response.usage_metadata, "candidates_token_count", 0
84
- )
127
+ return extract_gemini_usage_from_response(response)
85
128
 
86
- return {
87
- "input_tokens": input_tokens,
88
- "output_tokens": output_tokens,
89
- "cache_read_input_tokens": 0,
90
- "cache_creation_input_tokens": 0,
91
- "reasoning_tokens": 0,
92
- }
93
- return {
94
- "input_tokens": 0,
95
- "output_tokens": 0,
96
- "cache_read_input_tokens": 0,
97
- "cache_creation_input_tokens": 0,
98
- "reasoning_tokens": 0,
99
- }
129
+ return TokenUsage(input_tokens=0, output_tokens=0)
100
130
 
101
131
 
102
132
  def format_response(response, provider: str):
103
133
  """
104
134
  Format a regular (non-streaming) response.
105
135
  """
106
- output = []
107
- if response is None:
108
- return output
109
136
  if provider == "anthropic":
110
- return format_response_anthropic(response)
111
- elif provider == "openai":
112
- return format_response_openai(response)
113
- elif provider == "gemini":
114
- return format_response_gemini(response)
115
- return output
116
-
117
-
118
- def format_response_anthropic(response):
119
- output = []
120
- content = []
137
+ from posthoganalytics.ai.anthropic.anthropic_converter import format_anthropic_response
121
138
 
122
- for choice in response.content:
123
- if (
124
- hasattr(choice, "type")
125
- and choice.type == "text"
126
- and hasattr(choice, "text")
127
- and choice.text
128
- ):
129
- content.append({"type": "text", "text": choice.text})
130
- elif (
131
- hasattr(choice, "type")
132
- and choice.type == "tool_use"
133
- and hasattr(choice, "name")
134
- and hasattr(choice, "id")
135
- ):
136
- tool_call = {
137
- "type": "function",
138
- "id": choice.id,
139
- "function": {
140
- "name": choice.name,
141
- "arguments": getattr(choice, "input", {}),
142
- },
143
- }
144
- content.append(tool_call)
145
-
146
- if content:
147
- message = {
148
- "role": "assistant",
149
- "content": content,
150
- }
151
- output.append(message)
152
-
153
- return output
154
-
155
-
156
- def format_response_openai(response):
157
- output = []
158
-
159
- if hasattr(response, "choices"):
160
- content = []
161
- role = "assistant"
162
-
163
- for choice in response.choices:
164
- # Handle Chat Completions response format
165
- if hasattr(choice, "message") and choice.message:
166
- if choice.message.role:
167
- role = choice.message.role
168
-
169
- if choice.message.content:
170
- content.append({"type": "text", "text": choice.message.content})
171
-
172
- if hasattr(choice.message, "tool_calls") and choice.message.tool_calls:
173
- for tool_call in choice.message.tool_calls:
174
- content.append(
175
- {
176
- "type": "function",
177
- "id": tool_call.id,
178
- "function": {
179
- "name": tool_call.function.name,
180
- "arguments": tool_call.function.arguments,
181
- },
182
- }
183
- )
184
-
185
- if content:
186
- message = {
187
- "role": role,
188
- "content": content,
189
- }
190
- output.append(message)
191
-
192
- # Handle Responses API format
193
- if hasattr(response, "output"):
194
- content = []
195
- role = "assistant"
196
-
197
- for item in response.output:
198
- if item.type == "message":
199
- role = item.role
200
-
201
- if hasattr(item, "content") and isinstance(item.content, list):
202
- for content_item in item.content:
203
- if (
204
- hasattr(content_item, "type")
205
- and content_item.type == "output_text"
206
- and hasattr(content_item, "text")
207
- ):
208
- content.append({"type": "text", "text": content_item.text})
209
- elif hasattr(content_item, "text"):
210
- content.append({"type": "text", "text": content_item.text})
211
- elif (
212
- hasattr(content_item, "type")
213
- and content_item.type == "input_image"
214
- and hasattr(content_item, "image_url")
215
- ):
216
- content.append(
217
- {
218
- "type": "image",
219
- "image": content_item.image_url,
220
- }
221
- )
222
- elif hasattr(item, "content"):
223
- content.append({"type": "text", "text": str(item.content)})
224
-
225
- elif hasattr(item, "type") and item.type == "function_call":
226
- content.append(
227
- {
228
- "type": "function",
229
- "id": getattr(item, "call_id", getattr(item, "id", "")),
230
- "function": {
231
- "name": item.name,
232
- "arguments": getattr(item, "arguments", {}),
233
- },
234
- }
235
- )
139
+ return format_anthropic_response(response)
140
+ elif provider == "openai":
141
+ from posthoganalytics.ai.openai.openai_converter import format_openai_response
236
142
 
237
- if content:
238
- message = {
239
- "role": role,
240
- "content": content,
241
- }
242
- output.append(message)
243
-
244
- return output
245
-
246
-
247
- def format_response_gemini(response):
248
- output = []
249
-
250
- if hasattr(response, "candidates") and response.candidates:
251
- for candidate in response.candidates:
252
- if hasattr(candidate, "content") and candidate.content:
253
- content = []
254
-
255
- if hasattr(candidate.content, "parts") and candidate.content.parts:
256
- for part in candidate.content.parts:
257
- if hasattr(part, "text") and part.text:
258
- content.append({"type": "text", "text": part.text})
259
- elif hasattr(part, "function_call") and part.function_call:
260
- function_call = part.function_call
261
- content.append(
262
- {
263
- "type": "function",
264
- "function": {
265
- "name": function_call.name,
266
- "arguments": function_call.args,
267
- },
268
- }
269
- )
270
-
271
- if content:
272
- message = {
273
- "role": "assistant",
274
- "content": content,
275
- }
276
- output.append(message)
277
-
278
- elif hasattr(candidate, "text") and candidate.text:
279
- output.append(
280
- {
281
- "role": "assistant",
282
- "content": [{"type": "text", "text": candidate.text}],
283
- }
284
- )
285
- elif hasattr(response, "text") and response.text:
286
- output.append(
287
- {
288
- "role": "assistant",
289
- "content": [{"type": "text", "text": response.text}],
290
- }
291
- )
143
+ return format_openai_response(response)
144
+ elif provider == "gemini":
145
+ from posthoganalytics.ai.gemini.gemini_converter import format_gemini_response
292
146
 
293
- return output
147
+ return format_gemini_response(response)
148
+ return []
294
149
 
295
150
 
296
151
  def extract_available_tool_calls(provider: str, kwargs: Dict[str, Any]):
152
+ """
153
+ Extract available tool calls for the given provider.
154
+ """
297
155
  if provider == "anthropic":
298
- if "tools" in kwargs:
299
- return kwargs["tools"]
156
+ from posthoganalytics.ai.anthropic.anthropic_converter import extract_anthropic_tools
300
157
 
301
- return None
158
+ return extract_anthropic_tools(kwargs)
302
159
  elif provider == "gemini":
303
- if "config" in kwargs and hasattr(kwargs["config"], "tools"):
304
- return kwargs["config"].tools
160
+ from posthoganalytics.ai.gemini.gemini_converter import extract_gemini_tools
305
161
 
306
- return None
162
+ return extract_gemini_tools(kwargs)
307
163
  elif provider == "openai":
308
- if "tools" in kwargs:
309
- return kwargs["tools"]
164
+ from posthoganalytics.ai.openai.openai_converter import extract_openai_tools
310
165
 
311
- return None
166
+ return extract_openai_tools(kwargs)
167
+ return None
312
168
 
313
169
 
314
- def merge_system_prompt(kwargs: Dict[str, Any], provider: str):
315
- messages: List[Dict[str, Any]] = []
170
+ def merge_system_prompt(
171
+ kwargs: Dict[str, Any], provider: str
172
+ ) -> List[FormattedMessage]:
173
+ """
174
+ Merge system prompts and format messages for the given provider.
175
+ """
316
176
  if provider == "anthropic":
177
+ from posthoganalytics.ai.anthropic.anthropic_converter import format_anthropic_input
178
+
317
179
  messages = kwargs.get("messages") or []
318
- if kwargs.get("system") is None:
319
- return messages
320
- return [{"role": "system", "content": kwargs.get("system")}] + messages
180
+ system = kwargs.get("system")
181
+ return format_anthropic_input(messages, system)
321
182
  elif provider == "gemini":
322
- contents = kwargs.get("contents", [])
323
- if isinstance(contents, str):
324
- return [{"role": "user", "content": contents}]
325
- elif isinstance(contents, list):
326
- formatted = []
327
- for item in contents:
328
- if isinstance(item, str):
329
- formatted.append({"role": "user", "content": item})
330
- elif hasattr(item, "text"):
331
- formatted.append({"role": "user", "content": item.text})
332
- else:
333
- formatted.append({"role": "user", "content": str(item)})
334
- return formatted
335
- else:
336
- return [{"role": "user", "content": str(contents)}]
337
-
338
- # For OpenAI, handle both Chat Completions and Responses API
339
- if kwargs.get("messages") is not None:
340
- messages = list(kwargs.get("messages", []))
341
-
342
- if kwargs.get("input") is not None:
343
- input_data = kwargs.get("input")
344
- if isinstance(input_data, list):
345
- messages.extend(input_data)
346
- else:
347
- messages.append({"role": "user", "content": input_data})
348
-
349
- # Check if system prompt is provided as a separate parameter
350
- if kwargs.get("system") is not None:
351
- has_system = any(msg.get("role") == "system" for msg in messages)
352
- if not has_system:
353
- messages = [{"role": "system", "content": kwargs.get("system")}] + messages
354
-
355
- # For Responses API, add instructions to the system prompt if provided
356
- if kwargs.get("instructions") is not None:
357
- # Find the system message if it exists
358
- system_idx = next(
359
- (i for i, msg in enumerate(messages) if msg.get("role") == "system"), None
360
- )
183
+ from posthoganalytics.ai.gemini.gemini_converter import format_gemini_input_with_system
361
184
 
362
- if system_idx is not None:
363
- # Append instructions to existing system message
364
- system_content = messages[system_idx].get("content", "")
365
- messages[system_idx]["content"] = (
366
- f"{system_content}\n\n{kwargs.get('instructions')}"
185
+ contents = kwargs.get("contents", [])
186
+ config = kwargs.get("config")
187
+ return format_gemini_input_with_system(contents, config)
188
+ elif provider == "openai":
189
+ from posthoganalytics.ai.openai.openai_converter import format_openai_input
190
+
191
+ # For OpenAI, handle both Chat Completions and Responses API
192
+ messages_param = kwargs.get("messages")
193
+ input_param = kwargs.get("input")
194
+
195
+ # Get base formatted messages
196
+ messages = format_openai_input(messages_param, input_param)
197
+
198
+ # Check if system prompt is provided as a separate parameter
199
+ if kwargs.get("system") is not None:
200
+ has_system = any(msg.get("role") == "system" for msg in messages)
201
+ if not has_system:
202
+ system_msg = cast(
203
+ FormattedMessage,
204
+ {"role": "system", "content": kwargs.get("system")},
205
+ )
206
+ messages = [system_msg] + messages
207
+
208
+ # For Responses API, add instructions to the system prompt if provided
209
+ if kwargs.get("instructions") is not None:
210
+ # Find the system message if it exists
211
+ system_idx = next(
212
+ (i for i, msg in enumerate(messages) if msg.get("role") == "system"),
213
+ None,
367
214
  )
368
- else:
369
- # Create a new system message with instructions
370
- messages = [
371
- {"role": "system", "content": kwargs.get("instructions")}
372
- ] + messages
373
215
 
374
- return messages
216
+ if system_idx is not None:
217
+ # Append instructions to existing system message
218
+ system_content = messages[system_idx].get("content", "")
219
+ messages[system_idx]["content"] = (
220
+ f"{system_content}\n\n{kwargs.get('instructions')}"
221
+ )
222
+ else:
223
+ # Create a new system message with instructions
224
+ instruction_msg = cast(
225
+ FormattedMessage,
226
+ {"role": "system", "content": kwargs.get("instructions")},
227
+ )
228
+ messages = [instruction_msg] + messages
229
+
230
+ return messages
231
+
232
+ # Default case - return empty list
233
+ return []
375
234
 
376
235
 
377
236
  def call_llm_and_track_usage(
@@ -382,7 +241,7 @@ def call_llm_and_track_usage(
382
241
  posthog_properties: Optional[Dict[str, Any]],
383
242
  posthog_privacy_mode: bool,
384
243
  posthog_groups: Optional[Dict[str, Any]],
385
- base_url: URL,
244
+ base_url: str,
386
245
  call_method: Callable[..., Any],
387
246
  **kwargs: Any,
388
247
  ) -> Any:
@@ -394,8 +253,8 @@ def call_llm_and_track_usage(
394
253
  response = None
395
254
  error = None
396
255
  http_status = 200
397
- usage: Dict[str, Any] = {}
398
- error_params: Dict[str, any] = {}
256
+ usage: TokenUsage = TokenUsage()
257
+ error_params: Dict[str, Any] = {}
399
258
 
400
259
  try:
401
260
  response = call_method(**kwargs)
@@ -422,12 +281,15 @@ def call_llm_and_track_usage(
422
281
  usage = get_usage(response, provider)
423
282
 
424
283
  messages = merge_system_prompt(kwargs, provider)
284
+ sanitized_messages = sanitize_messages(messages, provider)
425
285
 
426
286
  event_properties = {
427
287
  "$ai_provider": provider,
428
- "$ai_model": kwargs.get("model"),
288
+ "$ai_model": kwargs.get("model") or getattr(response, "model", None),
429
289
  "$ai_model_parameters": get_model_params(kwargs),
430
- "$ai_input": with_privacy_mode(ph_client, posthog_privacy_mode, messages),
290
+ "$ai_input": with_privacy_mode(
291
+ ph_client, posthog_privacy_mode, sanitized_messages
292
+ ),
431
293
  "$ai_output_choices": with_privacy_mode(
432
294
  ph_client, posthog_privacy_mode, format_response(response, provider)
433
295
  ),
@@ -446,27 +308,21 @@ def call_llm_and_track_usage(
446
308
  if available_tool_calls:
447
309
  event_properties["$ai_tools"] = available_tool_calls
448
310
 
449
- if (
450
- usage.get("cache_read_input_tokens") is not None
451
- and usage.get("cache_read_input_tokens", 0) > 0
452
- ):
453
- event_properties["$ai_cache_read_input_tokens"] = usage.get(
454
- "cache_read_input_tokens", 0
455
- )
311
+ cache_read = usage.get("cache_read_input_tokens")
312
+ if cache_read is not None and cache_read > 0:
313
+ event_properties["$ai_cache_read_input_tokens"] = cache_read
456
314
 
457
- if (
458
- usage.get("cache_creation_input_tokens") is not None
459
- and usage.get("cache_creation_input_tokens", 0) > 0
460
- ):
461
- event_properties["$ai_cache_creation_input_tokens"] = usage.get(
462
- "cache_creation_input_tokens", 0
463
- )
315
+ cache_creation = usage.get("cache_creation_input_tokens")
316
+ if cache_creation is not None and cache_creation > 0:
317
+ event_properties["$ai_cache_creation_input_tokens"] = cache_creation
464
318
 
465
- if (
466
- usage.get("reasoning_tokens") is not None
467
- and usage.get("reasoning_tokens", 0) > 0
468
- ):
469
- event_properties["$ai_reasoning_tokens"] = usage.get("reasoning_tokens", 0)
319
+ reasoning = usage.get("reasoning_tokens")
320
+ if reasoning is not None and reasoning > 0:
321
+ event_properties["$ai_reasoning_tokens"] = reasoning
322
+
323
+ web_search_count = usage.get("web_search_count")
324
+ if web_search_count is not None and web_search_count > 0:
325
+ event_properties["$ai_web_search_count"] = web_search_count
470
326
 
471
327
  if posthog_distinct_id is None:
472
328
  event_properties["$process_person_profile"] = False
@@ -500,7 +356,7 @@ async def call_llm_and_track_usage_async(
500
356
  posthog_properties: Optional[Dict[str, Any]],
501
357
  posthog_privacy_mode: bool,
502
358
  posthog_groups: Optional[Dict[str, Any]],
503
- base_url: URL,
359
+ base_url: str,
504
360
  call_async_method: Callable[..., Any],
505
361
  **kwargs: Any,
506
362
  ) -> Any:
@@ -508,8 +364,8 @@ async def call_llm_and_track_usage_async(
508
364
  response = None
509
365
  error = None
510
366
  http_status = 200
511
- usage: Dict[str, Any] = {}
512
- error_params: Dict[str, any] = {}
367
+ usage: TokenUsage = TokenUsage()
368
+ error_params: Dict[str, Any] = {}
513
369
 
514
370
  try:
515
371
  response = await call_async_method(**kwargs)
@@ -536,12 +392,15 @@ async def call_llm_and_track_usage_async(
536
392
  usage = get_usage(response, provider)
537
393
 
538
394
  messages = merge_system_prompt(kwargs, provider)
395
+ sanitized_messages = sanitize_messages(messages, provider)
539
396
 
540
397
  event_properties = {
541
398
  "$ai_provider": provider,
542
- "$ai_model": kwargs.get("model"),
399
+ "$ai_model": kwargs.get("model") or getattr(response, "model", None),
543
400
  "$ai_model_parameters": get_model_params(kwargs),
544
- "$ai_input": with_privacy_mode(ph_client, posthog_privacy_mode, messages),
401
+ "$ai_input": with_privacy_mode(
402
+ ph_client, posthog_privacy_mode, sanitized_messages
403
+ ),
545
404
  "$ai_output_choices": with_privacy_mode(
546
405
  ph_client, posthog_privacy_mode, format_response(response, provider)
547
406
  ),
@@ -560,21 +419,21 @@ async def call_llm_and_track_usage_async(
560
419
  if available_tool_calls:
561
420
  event_properties["$ai_tools"] = available_tool_calls
562
421
 
563
- if (
564
- usage.get("cache_read_input_tokens") is not None
565
- and usage.get("cache_read_input_tokens", 0) > 0
566
- ):
567
- event_properties["$ai_cache_read_input_tokens"] = usage.get(
568
- "cache_read_input_tokens", 0
569
- )
422
+ cache_read = usage.get("cache_read_input_tokens")
423
+ if cache_read is not None and cache_read > 0:
424
+ event_properties["$ai_cache_read_input_tokens"] = cache_read
570
425
 
571
- if (
572
- usage.get("cache_creation_input_tokens") is not None
573
- and usage.get("cache_creation_input_tokens", 0) > 0
574
- ):
575
- event_properties["$ai_cache_creation_input_tokens"] = usage.get(
576
- "cache_creation_input_tokens", 0
577
- )
426
+ cache_creation = usage.get("cache_creation_input_tokens")
427
+ if cache_creation is not None and cache_creation > 0:
428
+ event_properties["$ai_cache_creation_input_tokens"] = cache_creation
429
+
430
+ reasoning = usage.get("reasoning_tokens")
431
+ if reasoning is not None and reasoning > 0:
432
+ event_properties["$ai_reasoning_tokens"] = reasoning
433
+
434
+ web_search_count = usage.get("web_search_count")
435
+ if web_search_count is not None and web_search_count > 0:
436
+ event_properties["$ai_web_search_count"] = web_search_count
578
437
 
579
438
  if posthog_distinct_id is None:
580
439
  event_properties["$process_person_profile"] = False
@@ -600,7 +459,131 @@ async def call_llm_and_track_usage_async(
600
459
  return response
601
460
 
602
461
 
462
+ def sanitize_messages(data: Any, provider: str) -> Any:
463
+ """Sanitize messages using provider-specific sanitization functions."""
464
+ if provider == "anthropic":
465
+ return sanitize_anthropic(data)
466
+ elif provider == "openai":
467
+ return sanitize_openai(data)
468
+ elif provider == "gemini":
469
+ return sanitize_gemini(data)
470
+ elif provider == "langchain":
471
+ return sanitize_langchain(data)
472
+ return data
473
+
474
+
603
475
  def with_privacy_mode(ph_client: PostHogClient, privacy_mode: bool, value: Any):
604
476
  if ph_client.privacy_mode or privacy_mode:
605
477
  return None
606
478
  return value
479
+
480
+
481
+ def capture_streaming_event(
482
+ ph_client: PostHogClient,
483
+ event_data: StreamingEventData,
484
+ ):
485
+ """
486
+ Unified streaming event capture for all LLM providers.
487
+
488
+ This function handles the common logic for capturing streaming events across all providers.
489
+ All provider-specific formatting should be done BEFORE calling this function.
490
+
491
+ The function handles:
492
+ - Building PostHog event properties
493
+ - Extracting and adding tools based on provider
494
+ - Applying privacy mode
495
+ - Adding special token fields (cache, reasoning)
496
+ - Provider-specific fields (e.g., OpenAI instructions)
497
+ - Sending the event to PostHog
498
+
499
+ Args:
500
+ ph_client: PostHog client instance
501
+ event_data: Standardized streaming event data containing all necessary information
502
+ """
503
+ trace_id = event_data.get("trace_id") or str(uuid.uuid4())
504
+
505
+ # Build base event properties
506
+ event_properties = {
507
+ "$ai_provider": event_data["provider"],
508
+ "$ai_model": event_data["model"],
509
+ "$ai_model_parameters": get_model_params(event_data["kwargs"]),
510
+ "$ai_input": with_privacy_mode(
511
+ ph_client,
512
+ event_data["privacy_mode"],
513
+ event_data["formatted_input"],
514
+ ),
515
+ "$ai_output_choices": with_privacy_mode(
516
+ ph_client,
517
+ event_data["privacy_mode"],
518
+ event_data["formatted_output"],
519
+ ),
520
+ "$ai_http_status": 200,
521
+ "$ai_input_tokens": event_data["usage_stats"].get("input_tokens", 0),
522
+ "$ai_output_tokens": event_data["usage_stats"].get("output_tokens", 0),
523
+ "$ai_latency": event_data["latency"],
524
+ "$ai_trace_id": trace_id,
525
+ "$ai_base_url": str(event_data["base_url"]),
526
+ **(event_data.get("properties") or {}),
527
+ }
528
+
529
+ # Extract and add tools based on provider
530
+ available_tools = extract_available_tool_calls(
531
+ event_data["provider"],
532
+ event_data["kwargs"],
533
+ )
534
+ if available_tools:
535
+ event_properties["$ai_tools"] = available_tools
536
+
537
+ # Add optional token fields
538
+ # For Anthropic, always include cache fields even if 0 (backward compatibility)
539
+ # For others, only include if present and non-zero
540
+ if event_data["provider"] == "anthropic":
541
+ # Anthropic always includes cache fields
542
+ cache_read = event_data["usage_stats"].get("cache_read_input_tokens", 0)
543
+ cache_creation = event_data["usage_stats"].get("cache_creation_input_tokens", 0)
544
+ event_properties["$ai_cache_read_input_tokens"] = cache_read
545
+ event_properties["$ai_cache_creation_input_tokens"] = cache_creation
546
+ else:
547
+ # Other providers only include if non-zero
548
+ optional_token_fields = [
549
+ "cache_read_input_tokens",
550
+ "cache_creation_input_tokens",
551
+ "reasoning_tokens",
552
+ ]
553
+
554
+ for field in optional_token_fields:
555
+ value = event_data["usage_stats"].get(field)
556
+ if value is not None and isinstance(value, int) and value > 0:
557
+ event_properties[f"$ai_{field}"] = value
558
+
559
+ # Add web search count if present (all providers)
560
+ web_search_count = event_data["usage_stats"].get("web_search_count")
561
+ if (
562
+ web_search_count is not None
563
+ and isinstance(web_search_count, int)
564
+ and web_search_count > 0
565
+ ):
566
+ event_properties["$ai_web_search_count"] = web_search_count
567
+
568
+ # Handle provider-specific fields
569
+ if (
570
+ event_data["provider"] == "openai"
571
+ and event_data["kwargs"].get("instructions") is not None
572
+ ):
573
+ event_properties["$ai_instructions"] = with_privacy_mode(
574
+ ph_client,
575
+ event_data["privacy_mode"],
576
+ event_data["kwargs"]["instructions"],
577
+ )
578
+
579
+ if event_data.get("distinct_id") is None:
580
+ event_properties["$process_person_profile"] = False
581
+
582
+ # Send event to PostHog
583
+ if hasattr(ph_client, "capture"):
584
+ ph_client.capture(
585
+ distinct_id=event_data.get("distinct_id") or trace_id,
586
+ event="$ai_generation",
587
+ properties=event_properties,
588
+ groups=event_data.get("groups"),
589
+ )