posthoganalytics 6.7.1__py3-none-any.whl → 6.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,9 @@
1
1
  import time
2
2
  import uuid
3
- from typing import Any, Callable, Dict, List, Optional
4
-
5
- from httpx import URL
3
+ from typing import Any, Callable, Dict, Optional
6
4
 
7
5
  from posthoganalytics.client import Client as PostHogClient
6
+ from posthoganalytics.ai.types import StreamingEventData, TokenUsage
8
7
  from posthoganalytics.ai.sanitization import (
9
8
  sanitize_openai,
10
9
  sanitize_anthropic,
@@ -13,6 +12,65 @@ from posthoganalytics.ai.sanitization import (
13
12
  )
14
13
 
15
14
 
15
+ def merge_usage_stats(
16
+ target: TokenUsage, source: TokenUsage, mode: str = "incremental"
17
+ ) -> None:
18
+ """
19
+ Merge streaming usage statistics into target dict, handling None values.
20
+
21
+ Supports two modes:
22
+ - "incremental": Add source values to target (for APIs that report new tokens)
23
+ - "cumulative": Replace target with source values (for APIs that report totals)
24
+
25
+ Args:
26
+ target: Dictionary to update with usage stats
27
+ source: TokenUsage that may contain None values
28
+ mode: Either "incremental" or "cumulative"
29
+ """
30
+ if mode == "incremental":
31
+ # Add new values to existing totals
32
+ source_input = source.get("input_tokens")
33
+ if source_input is not None:
34
+ current = target.get("input_tokens") or 0
35
+ target["input_tokens"] = current + source_input
36
+
37
+ source_output = source.get("output_tokens")
38
+ if source_output is not None:
39
+ current = target.get("output_tokens") or 0
40
+ target["output_tokens"] = current + source_output
41
+
42
+ source_cache_read = source.get("cache_read_input_tokens")
43
+ if source_cache_read is not None:
44
+ current = target.get("cache_read_input_tokens") or 0
45
+ target["cache_read_input_tokens"] = current + source_cache_read
46
+
47
+ source_cache_creation = source.get("cache_creation_input_tokens")
48
+ if source_cache_creation is not None:
49
+ current = target.get("cache_creation_input_tokens") or 0
50
+ target["cache_creation_input_tokens"] = current + source_cache_creation
51
+
52
+ source_reasoning = source.get("reasoning_tokens")
53
+ if source_reasoning is not None:
54
+ current = target.get("reasoning_tokens") or 0
55
+ target["reasoning_tokens"] = current + source_reasoning
56
+ elif mode == "cumulative":
57
+ # Replace with latest values (already cumulative)
58
+ if source.get("input_tokens") is not None:
59
+ target["input_tokens"] = source["input_tokens"]
60
+ if source.get("output_tokens") is not None:
61
+ target["output_tokens"] = source["output_tokens"]
62
+ if source.get("cache_read_input_tokens") is not None:
63
+ target["cache_read_input_tokens"] = source["cache_read_input_tokens"]
64
+ if source.get("cache_creation_input_tokens") is not None:
65
+ target["cache_creation_input_tokens"] = source[
66
+ "cache_creation_input_tokens"
67
+ ]
68
+ if source.get("reasoning_tokens") is not None:
69
+ target["reasoning_tokens"] = source["reasoning_tokens"]
70
+ else:
71
+ raise ValueError(f"Invalid mode: {mode}. Must be 'incremental' or 'cumulative'")
72
+
73
+
16
74
  def get_model_params(kwargs: Dict[str, Any]) -> Dict[str, Any]:
17
75
  """
18
76
  Extracts model parameters from the kwargs dictionary.
@@ -35,349 +93,128 @@ def get_model_params(kwargs: Dict[str, Any]) -> Dict[str, Any]:
35
93
  return model_params
36
94
 
37
95
 
38
- def get_usage(response, provider: str) -> Dict[str, Any]:
96
+ def get_usage(response, provider: str) -> TokenUsage:
97
+ """
98
+ Extract usage statistics from response based on provider.
99
+ Delegates to provider-specific converter functions.
100
+ """
39
101
  if provider == "anthropic":
40
- return {
41
- "input_tokens": response.usage.input_tokens,
42
- "output_tokens": response.usage.output_tokens,
43
- "cache_read_input_tokens": response.usage.cache_read_input_tokens,
44
- "cache_creation_input_tokens": response.usage.cache_creation_input_tokens,
45
- }
102
+ from posthoganalytics.ai.anthropic.anthropic_converter import (
103
+ extract_anthropic_usage_from_response,
104
+ )
105
+
106
+ return extract_anthropic_usage_from_response(response)
46
107
  elif provider == "openai":
47
- cached_tokens = 0
48
- input_tokens = 0
49
- output_tokens = 0
50
- reasoning_tokens = 0
51
-
52
- # responses api
53
- if hasattr(response.usage, "input_tokens"):
54
- input_tokens = response.usage.input_tokens
55
- if hasattr(response.usage, "output_tokens"):
56
- output_tokens = response.usage.output_tokens
57
- if hasattr(response.usage, "input_tokens_details") and hasattr(
58
- response.usage.input_tokens_details, "cached_tokens"
59
- ):
60
- cached_tokens = response.usage.input_tokens_details.cached_tokens
61
- if hasattr(response.usage, "output_tokens_details") and hasattr(
62
- response.usage.output_tokens_details, "reasoning_tokens"
63
- ):
64
- reasoning_tokens = response.usage.output_tokens_details.reasoning_tokens
65
-
66
- # chat completions
67
- if hasattr(response.usage, "prompt_tokens"):
68
- input_tokens = response.usage.prompt_tokens
69
- if hasattr(response.usage, "completion_tokens"):
70
- output_tokens = response.usage.completion_tokens
71
- if hasattr(response.usage, "prompt_tokens_details") and hasattr(
72
- response.usage.prompt_tokens_details, "cached_tokens"
73
- ):
74
- cached_tokens = response.usage.prompt_tokens_details.cached_tokens
108
+ from posthoganalytics.ai.openai.openai_converter import (
109
+ extract_openai_usage_from_response,
110
+ )
75
111
 
76
- return {
77
- "input_tokens": input_tokens,
78
- "output_tokens": output_tokens,
79
- "cache_read_input_tokens": cached_tokens,
80
- "reasoning_tokens": reasoning_tokens,
81
- }
112
+ return extract_openai_usage_from_response(response)
82
113
  elif provider == "gemini":
83
- input_tokens = 0
84
- output_tokens = 0
114
+ from posthoganalytics.ai.gemini.gemini_converter import (
115
+ extract_gemini_usage_from_response,
116
+ )
85
117
 
86
- if hasattr(response, "usage_metadata") and response.usage_metadata:
87
- input_tokens = getattr(response.usage_metadata, "prompt_token_count", 0)
88
- output_tokens = getattr(
89
- response.usage_metadata, "candidates_token_count", 0
90
- )
118
+ return extract_gemini_usage_from_response(response)
91
119
 
92
- return {
93
- "input_tokens": input_tokens,
94
- "output_tokens": output_tokens,
95
- "cache_read_input_tokens": 0,
96
- "cache_creation_input_tokens": 0,
97
- "reasoning_tokens": 0,
98
- }
99
- return {
100
- "input_tokens": 0,
101
- "output_tokens": 0,
102
- "cache_read_input_tokens": 0,
103
- "cache_creation_input_tokens": 0,
104
- "reasoning_tokens": 0,
105
- }
120
+ return TokenUsage(input_tokens=0, output_tokens=0)
106
121
 
107
122
 
108
123
  def format_response(response, provider: str):
109
124
  """
110
125
  Format a regular (non-streaming) response.
111
126
  """
112
- output = []
113
- if response is None:
114
- return output
115
127
  if provider == "anthropic":
116
- return format_response_anthropic(response)
117
- elif provider == "openai":
118
- return format_response_openai(response)
119
- elif provider == "gemini":
120
- return format_response_gemini(response)
121
- return output
122
-
128
+ from posthoganalytics.ai.anthropic.anthropic_converter import format_anthropic_response
123
129
 
124
- def format_response_anthropic(response):
125
- output = []
126
- content = []
127
-
128
- for choice in response.content:
129
- if (
130
- hasattr(choice, "type")
131
- and choice.type == "text"
132
- and hasattr(choice, "text")
133
- and choice.text
134
- ):
135
- content.append({"type": "text", "text": choice.text})
136
- elif (
137
- hasattr(choice, "type")
138
- and choice.type == "tool_use"
139
- and hasattr(choice, "name")
140
- and hasattr(choice, "id")
141
- ):
142
- tool_call = {
143
- "type": "function",
144
- "id": choice.id,
145
- "function": {
146
- "name": choice.name,
147
- "arguments": getattr(choice, "input", {}),
148
- },
149
- }
150
- content.append(tool_call)
151
-
152
- if content:
153
- message = {
154
- "role": "assistant",
155
- "content": content,
156
- }
157
- output.append(message)
158
-
159
- return output
160
-
161
-
162
- def format_response_openai(response):
163
- output = []
164
-
165
- if hasattr(response, "choices"):
166
- content = []
167
- role = "assistant"
168
-
169
- for choice in response.choices:
170
- # Handle Chat Completions response format
171
- if hasattr(choice, "message") and choice.message:
172
- if choice.message.role:
173
- role = choice.message.role
174
-
175
- if choice.message.content:
176
- content.append({"type": "text", "text": choice.message.content})
177
-
178
- if hasattr(choice.message, "tool_calls") and choice.message.tool_calls:
179
- for tool_call in choice.message.tool_calls:
180
- content.append(
181
- {
182
- "type": "function",
183
- "id": tool_call.id,
184
- "function": {
185
- "name": tool_call.function.name,
186
- "arguments": tool_call.function.arguments,
187
- },
188
- }
189
- )
190
-
191
- if content:
192
- message = {
193
- "role": role,
194
- "content": content,
195
- }
196
- output.append(message)
197
-
198
- # Handle Responses API format
199
- if hasattr(response, "output"):
200
- content = []
201
- role = "assistant"
202
-
203
- for item in response.output:
204
- if item.type == "message":
205
- role = item.role
206
-
207
- if hasattr(item, "content") and isinstance(item.content, list):
208
- for content_item in item.content:
209
- if (
210
- hasattr(content_item, "type")
211
- and content_item.type == "output_text"
212
- and hasattr(content_item, "text")
213
- ):
214
- content.append({"type": "text", "text": content_item.text})
215
- elif hasattr(content_item, "text"):
216
- content.append({"type": "text", "text": content_item.text})
217
- elif (
218
- hasattr(content_item, "type")
219
- and content_item.type == "input_image"
220
- and hasattr(content_item, "image_url")
221
- ):
222
- content.append(
223
- {
224
- "type": "image",
225
- "image": content_item.image_url,
226
- }
227
- )
228
- elif hasattr(item, "content"):
229
- content.append({"type": "text", "text": str(item.content)})
230
-
231
- elif hasattr(item, "type") and item.type == "function_call":
232
- content.append(
233
- {
234
- "type": "function",
235
- "id": getattr(item, "call_id", getattr(item, "id", "")),
236
- "function": {
237
- "name": item.name,
238
- "arguments": getattr(item, "arguments", {}),
239
- },
240
- }
241
- )
130
+ return format_anthropic_response(response)
131
+ elif provider == "openai":
132
+ from posthoganalytics.ai.openai.openai_converter import format_openai_response
242
133
 
243
- if content:
244
- message = {
245
- "role": role,
246
- "content": content,
247
- }
248
- output.append(message)
249
-
250
- return output
251
-
252
-
253
- def format_response_gemini(response):
254
- output = []
255
-
256
- if hasattr(response, "candidates") and response.candidates:
257
- for candidate in response.candidates:
258
- if hasattr(candidate, "content") and candidate.content:
259
- content = []
260
-
261
- if hasattr(candidate.content, "parts") and candidate.content.parts:
262
- for part in candidate.content.parts:
263
- if hasattr(part, "text") and part.text:
264
- content.append({"type": "text", "text": part.text})
265
- elif hasattr(part, "function_call") and part.function_call:
266
- function_call = part.function_call
267
- content.append(
268
- {
269
- "type": "function",
270
- "function": {
271
- "name": function_call.name,
272
- "arguments": function_call.args,
273
- },
274
- }
275
- )
276
-
277
- if content:
278
- message = {
279
- "role": "assistant",
280
- "content": content,
281
- }
282
- output.append(message)
283
-
284
- elif hasattr(candidate, "text") and candidate.text:
285
- output.append(
286
- {
287
- "role": "assistant",
288
- "content": [{"type": "text", "text": candidate.text}],
289
- }
290
- )
291
- elif hasattr(response, "text") and response.text:
292
- output.append(
293
- {
294
- "role": "assistant",
295
- "content": [{"type": "text", "text": response.text}],
296
- }
297
- )
134
+ return format_openai_response(response)
135
+ elif provider == "gemini":
136
+ from posthoganalytics.ai.gemini.gemini_converter import format_gemini_response
298
137
 
299
- return output
138
+ return format_gemini_response(response)
139
+ return []
300
140
 
301
141
 
302
142
  def extract_available_tool_calls(provider: str, kwargs: Dict[str, Any]):
143
+ """
144
+ Extract available tool calls for the given provider.
145
+ """
303
146
  if provider == "anthropic":
304
- if "tools" in kwargs:
305
- return kwargs["tools"]
147
+ from posthoganalytics.ai.anthropic.anthropic_converter import extract_anthropic_tools
306
148
 
307
- return None
149
+ return extract_anthropic_tools(kwargs)
308
150
  elif provider == "gemini":
309
- if "config" in kwargs and hasattr(kwargs["config"], "tools"):
310
- return kwargs["config"].tools
151
+ from posthoganalytics.ai.gemini.gemini_converter import extract_gemini_tools
311
152
 
312
- return None
153
+ return extract_gemini_tools(kwargs)
313
154
  elif provider == "openai":
314
- if "tools" in kwargs:
315
- return kwargs["tools"]
155
+ from posthoganalytics.ai.openai.openai_converter import extract_openai_tools
316
156
 
317
- return None
157
+ return extract_openai_tools(kwargs)
158
+ return None
318
159
 
319
160
 
320
161
  def merge_system_prompt(kwargs: Dict[str, Any], provider: str):
321
- messages: List[Dict[str, Any]] = []
162
+ """
163
+ Merge system prompts and format messages for the given provider.
164
+ """
322
165
  if provider == "anthropic":
166
+ from posthoganalytics.ai.anthropic.anthropic_converter import format_anthropic_input
167
+
323
168
  messages = kwargs.get("messages") or []
324
- if kwargs.get("system") is None:
325
- return messages
326
- return [{"role": "system", "content": kwargs.get("system")}] + messages
169
+ system = kwargs.get("system")
170
+ return format_anthropic_input(messages, system)
327
171
  elif provider == "gemini":
328
- contents = kwargs.get("contents", [])
329
- if isinstance(contents, str):
330
- return [{"role": "user", "content": contents}]
331
- elif isinstance(contents, list):
332
- formatted = []
333
- for item in contents:
334
- if isinstance(item, str):
335
- formatted.append({"role": "user", "content": item})
336
- elif hasattr(item, "text"):
337
- formatted.append({"role": "user", "content": item.text})
338
- else:
339
- formatted.append({"role": "user", "content": str(item)})
340
- return formatted
341
- else:
342
- return [{"role": "user", "content": str(contents)}]
343
-
344
- # For OpenAI, handle both Chat Completions and Responses API
345
- if kwargs.get("messages") is not None:
346
- messages = list(kwargs.get("messages", []))
347
-
348
- if kwargs.get("input") is not None:
349
- input_data = kwargs.get("input")
350
- if isinstance(input_data, list):
351
- messages.extend(input_data)
352
- else:
353
- messages.append({"role": "user", "content": input_data})
354
-
355
- # Check if system prompt is provided as a separate parameter
356
- if kwargs.get("system") is not None:
357
- has_system = any(msg.get("role") == "system" for msg in messages)
358
- if not has_system:
359
- messages = [{"role": "system", "content": kwargs.get("system")}] + messages
360
-
361
- # For Responses API, add instructions to the system prompt if provided
362
- if kwargs.get("instructions") is not None:
363
- # Find the system message if it exists
364
- system_idx = next(
365
- (i for i, msg in enumerate(messages) if msg.get("role") == "system"), None
366
- )
172
+ from posthoganalytics.ai.gemini.gemini_converter import format_gemini_input
367
173
 
368
- if system_idx is not None:
369
- # Append instructions to existing system message
370
- system_content = messages[system_idx].get("content", "")
371
- messages[system_idx]["content"] = (
372
- f"{system_content}\n\n{kwargs.get('instructions')}"
174
+ contents = kwargs.get("contents", [])
175
+ return format_gemini_input(contents)
176
+ elif provider == "openai":
177
+ from posthoganalytics.ai.openai.openai_converter import format_openai_input
178
+
179
+ # For OpenAI, handle both Chat Completions and Responses API
180
+ messages_param = kwargs.get("messages")
181
+ input_param = kwargs.get("input")
182
+
183
+ # Get base formatted messages
184
+ messages = format_openai_input(messages_param, input_param)
185
+
186
+ # Check if system prompt is provided as a separate parameter
187
+ if kwargs.get("system") is not None:
188
+ has_system = any(msg.get("role") == "system" for msg in messages)
189
+ if not has_system:
190
+ messages = [
191
+ {"role": "system", "content": kwargs.get("system")}
192
+ ] + messages
193
+
194
+ # For Responses API, add instructions to the system prompt if provided
195
+ if kwargs.get("instructions") is not None:
196
+ # Find the system message if it exists
197
+ system_idx = next(
198
+ (i for i, msg in enumerate(messages) if msg.get("role") == "system"),
199
+ None,
373
200
  )
374
- else:
375
- # Create a new system message with instructions
376
- messages = [
377
- {"role": "system", "content": kwargs.get("instructions")}
378
- ] + messages
379
201
 
380
- return messages
202
+ if system_idx is not None:
203
+ # Append instructions to existing system message
204
+ system_content = messages[system_idx].get("content", "")
205
+ messages[system_idx]["content"] = (
206
+ f"{system_content}\n\n{kwargs.get('instructions')}"
207
+ )
208
+ else:
209
+ # Create a new system message with instructions
210
+ messages = [
211
+ {"role": "system", "content": kwargs.get("instructions")}
212
+ ] + messages
213
+
214
+ return messages
215
+
216
+ # Default case - return empty list
217
+ return []
381
218
 
382
219
 
383
220
  def call_llm_and_track_usage(
@@ -388,7 +225,7 @@ def call_llm_and_track_usage(
388
225
  posthog_properties: Optional[Dict[str, Any]],
389
226
  posthog_privacy_mode: bool,
390
227
  posthog_groups: Optional[Dict[str, Any]],
391
- base_url: URL,
228
+ base_url: str,
392
229
  call_method: Callable[..., Any],
393
230
  **kwargs: Any,
394
231
  ) -> Any:
@@ -400,8 +237,8 @@ def call_llm_and_track_usage(
400
237
  response = None
401
238
  error = None
402
239
  http_status = 200
403
- usage: Dict[str, Any] = {}
404
- error_params: Dict[str, any] = {}
240
+ usage: TokenUsage = TokenUsage()
241
+ error_params: Dict[str, Any] = {}
405
242
 
406
243
  try:
407
244
  response = call_method(**kwargs)
@@ -455,27 +292,17 @@ def call_llm_and_track_usage(
455
292
  if available_tool_calls:
456
293
  event_properties["$ai_tools"] = available_tool_calls
457
294
 
458
- if (
459
- usage.get("cache_read_input_tokens") is not None
460
- and usage.get("cache_read_input_tokens", 0) > 0
461
- ):
462
- event_properties["$ai_cache_read_input_tokens"] = usage.get(
463
- "cache_read_input_tokens", 0
464
- )
295
+ cache_read = usage.get("cache_read_input_tokens")
296
+ if cache_read is not None and cache_read > 0:
297
+ event_properties["$ai_cache_read_input_tokens"] = cache_read
465
298
 
466
- if (
467
- usage.get("cache_creation_input_tokens") is not None
468
- and usage.get("cache_creation_input_tokens", 0) > 0
469
- ):
470
- event_properties["$ai_cache_creation_input_tokens"] = usage.get(
471
- "cache_creation_input_tokens", 0
472
- )
299
+ cache_creation = usage.get("cache_creation_input_tokens")
300
+ if cache_creation is not None and cache_creation > 0:
301
+ event_properties["$ai_cache_creation_input_tokens"] = cache_creation
473
302
 
474
- if (
475
- usage.get("reasoning_tokens") is not None
476
- and usage.get("reasoning_tokens", 0) > 0
477
- ):
478
- event_properties["$ai_reasoning_tokens"] = usage.get("reasoning_tokens", 0)
303
+ reasoning = usage.get("reasoning_tokens")
304
+ if reasoning is not None and reasoning > 0:
305
+ event_properties["$ai_reasoning_tokens"] = reasoning
479
306
 
480
307
  if posthog_distinct_id is None:
481
308
  event_properties["$process_person_profile"] = False
@@ -509,7 +336,7 @@ async def call_llm_and_track_usage_async(
509
336
  posthog_properties: Optional[Dict[str, Any]],
510
337
  posthog_privacy_mode: bool,
511
338
  posthog_groups: Optional[Dict[str, Any]],
512
- base_url: URL,
339
+ base_url: str,
513
340
  call_async_method: Callable[..., Any],
514
341
  **kwargs: Any,
515
342
  ) -> Any:
@@ -517,8 +344,8 @@ async def call_llm_and_track_usage_async(
517
344
  response = None
518
345
  error = None
519
346
  http_status = 200
520
- usage: Dict[str, Any] = {}
521
- error_params: Dict[str, any] = {}
347
+ usage: TokenUsage = TokenUsage()
348
+ error_params: Dict[str, Any] = {}
522
349
 
523
350
  try:
524
351
  response = await call_async_method(**kwargs)
@@ -572,21 +399,13 @@ async def call_llm_and_track_usage_async(
572
399
  if available_tool_calls:
573
400
  event_properties["$ai_tools"] = available_tool_calls
574
401
 
575
- if (
576
- usage.get("cache_read_input_tokens") is not None
577
- and usage.get("cache_read_input_tokens", 0) > 0
578
- ):
579
- event_properties["$ai_cache_read_input_tokens"] = usage.get(
580
- "cache_read_input_tokens", 0
581
- )
402
+ cache_read = usage.get("cache_read_input_tokens")
403
+ if cache_read is not None and cache_read > 0:
404
+ event_properties["$ai_cache_read_input_tokens"] = cache_read
582
405
 
583
- if (
584
- usage.get("cache_creation_input_tokens") is not None
585
- and usage.get("cache_creation_input_tokens", 0) > 0
586
- ):
587
- event_properties["$ai_cache_creation_input_tokens"] = usage.get(
588
- "cache_creation_input_tokens", 0
589
- )
406
+ cache_creation = usage.get("cache_creation_input_tokens")
407
+ if cache_creation is not None and cache_creation > 0:
408
+ event_properties["$ai_cache_creation_input_tokens"] = cache_creation
590
409
 
591
410
  if posthog_distinct_id is None:
592
411
  event_properties["$process_person_profile"] = False
@@ -629,3 +448,105 @@ def with_privacy_mode(ph_client: PostHogClient, privacy_mode: bool, value: Any):
629
448
  if ph_client.privacy_mode or privacy_mode:
630
449
  return None
631
450
  return value
451
+
452
+
453
+ def capture_streaming_event(
454
+ ph_client: PostHogClient,
455
+ event_data: StreamingEventData,
456
+ ):
457
+ """
458
+ Unified streaming event capture for all LLM providers.
459
+
460
+ This function handles the common logic for capturing streaming events across all providers.
461
+ All provider-specific formatting should be done BEFORE calling this function.
462
+
463
+ The function handles:
464
+ - Building PostHog event properties
465
+ - Extracting and adding tools based on provider
466
+ - Applying privacy mode
467
+ - Adding special token fields (cache, reasoning)
468
+ - Provider-specific fields (e.g., OpenAI instructions)
469
+ - Sending the event to PostHog
470
+
471
+ Args:
472
+ ph_client: PostHog client instance
473
+ event_data: Standardized streaming event data containing all necessary information
474
+ """
475
+ trace_id = event_data.get("trace_id") or str(uuid.uuid4())
476
+
477
+ # Build base event properties
478
+ event_properties = {
479
+ "$ai_provider": event_data["provider"],
480
+ "$ai_model": event_data["model"],
481
+ "$ai_model_parameters": get_model_params(event_data["kwargs"]),
482
+ "$ai_input": with_privacy_mode(
483
+ ph_client,
484
+ event_data["privacy_mode"],
485
+ event_data["formatted_input"],
486
+ ),
487
+ "$ai_output_choices": with_privacy_mode(
488
+ ph_client,
489
+ event_data["privacy_mode"],
490
+ event_data["formatted_output"],
491
+ ),
492
+ "$ai_http_status": 200,
493
+ "$ai_input_tokens": event_data["usage_stats"].get("input_tokens", 0),
494
+ "$ai_output_tokens": event_data["usage_stats"].get("output_tokens", 0),
495
+ "$ai_latency": event_data["latency"],
496
+ "$ai_trace_id": trace_id,
497
+ "$ai_base_url": str(event_data["base_url"]),
498
+ **(event_data.get("properties") or {}),
499
+ }
500
+
501
+ # Extract and add tools based on provider
502
+ available_tools = extract_available_tool_calls(
503
+ event_data["provider"],
504
+ event_data["kwargs"],
505
+ )
506
+ if available_tools:
507
+ event_properties["$ai_tools"] = available_tools
508
+
509
+ # Add optional token fields
510
+ # For Anthropic, always include cache fields even if 0 (backward compatibility)
511
+ # For others, only include if present and non-zero
512
+ if event_data["provider"] == "anthropic":
513
+ # Anthropic always includes cache fields
514
+ cache_read = event_data["usage_stats"].get("cache_read_input_tokens", 0)
515
+ cache_creation = event_data["usage_stats"].get("cache_creation_input_tokens", 0)
516
+ event_properties["$ai_cache_read_input_tokens"] = cache_read
517
+ event_properties["$ai_cache_creation_input_tokens"] = cache_creation
518
+ else:
519
+ # Other providers only include if non-zero
520
+ optional_token_fields = [
521
+ "cache_read_input_tokens",
522
+ "cache_creation_input_tokens",
523
+ "reasoning_tokens",
524
+ ]
525
+
526
+ for field in optional_token_fields:
527
+ value = event_data["usage_stats"].get(field)
528
+ if value is not None and isinstance(value, int) and value > 0:
529
+ event_properties[f"$ai_{field}"] = value
530
+
531
+ # Handle provider-specific fields
532
+ if (
533
+ event_data["provider"] == "openai"
534
+ and event_data["kwargs"].get("instructions") is not None
535
+ ):
536
+ event_properties["$ai_instructions"] = with_privacy_mode(
537
+ ph_client,
538
+ event_data["privacy_mode"],
539
+ event_data["kwargs"]["instructions"],
540
+ )
541
+
542
+ if event_data.get("distinct_id") is None:
543
+ event_properties["$process_person_profile"] = False
544
+
545
+ # Send event to PostHog
546
+ if hasattr(ph_client, "capture"):
547
+ ph_client.capture(
548
+ distinct_id=event_data.get("distinct_id") or trace_id,
549
+ event="$ai_generation",
550
+ properties=event_properties,
551
+ groups=event_data.get("groups"),
552
+ )