posthog 6.7.2__py3-none-any.whl → 6.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,8 +20,14 @@ from typing import (
20
20
  )
21
21
  from uuid import UUID
22
22
 
23
- from langchain.callbacks.base import BaseCallbackHandler
24
- from langchain.schema.agent import AgentAction, AgentFinish
23
+ try:
24
+ # LangChain 1.0+ and modern 0.x with langchain-core
25
+ from langchain_core.callbacks.base import BaseCallbackHandler
26
+ from langchain_core.agents import AgentAction, AgentFinish
27
+ except (ImportError, ModuleNotFoundError):
28
+ # Fallback for older LangChain versions
29
+ from langchain.callbacks.base import BaseCallbackHandler
30
+ from langchain.schema.agent import AgentAction, AgentFinish
25
31
  from langchain_core.documents import Document
26
32
  from langchain_core.messages import (
27
33
  AIMessage,
@@ -486,6 +492,7 @@ class CallbackHandler(BaseCallbackHandler):
486
492
  "$ai_latency": run.latency,
487
493
  "$ai_span_name": run.name,
488
494
  "$ai_span_id": run_id,
495
+ "$ai_framework": "langchain",
489
496
  }
490
497
  if parent_run_id is not None:
491
498
  event_properties["$ai_parent_id"] = parent_run_id
@@ -556,6 +563,7 @@ class CallbackHandler(BaseCallbackHandler):
556
563
  "$ai_http_status": 200,
557
564
  "$ai_latency": run.latency,
558
565
  "$ai_base_url": run.base_url,
566
+ "$ai_framework": "langchain",
559
567
  }
560
568
 
561
569
  if run.tools:
@@ -750,12 +758,19 @@ def _parse_usage_model(
750
758
  "cache_read": "cache_read_tokens",
751
759
  "reasoning": "reasoning_tokens",
752
760
  }
753
- return ModelUsage(
761
+ normalized_usage = ModelUsage(
754
762
  **{
755
763
  dataclass_key: parsed_usage.get(mapped_key) or 0
756
764
  for mapped_key, dataclass_key in field_mapping.items()
757
765
  },
758
766
  )
767
+ # In LangChain, input_tokens is the sum of input and cache read tokens.
768
+ # Our cost calculation expects them to be separate, for Anthropic.
769
+ if normalized_usage.input_tokens and normalized_usage.cache_read_tokens:
770
+ normalized_usage.input_tokens = max(
771
+ normalized_usage.input_tokens - normalized_usage.cache_read_tokens, 0
772
+ )
773
+ return normalized_usage
759
774
 
760
775
 
761
776
  def _parse_usage(response: LLMResult) -> ModelUsage:
@@ -2,6 +2,8 @@ import time
2
2
  import uuid
3
3
  from typing import Any, Dict, List, Optional
4
4
 
5
+ from posthog.ai.types import TokenUsage
6
+
5
7
  try:
6
8
  import openai
7
9
  except ImportError:
@@ -120,7 +122,7 @@ class WrappedResponses:
120
122
  **kwargs: Any,
121
123
  ):
122
124
  start_time = time.time()
123
- usage_stats: Dict[str, int] = {}
125
+ usage_stats: TokenUsage = TokenUsage()
124
126
  final_content = []
125
127
  response = self._original.create(**kwargs)
126
128
 
@@ -171,14 +173,13 @@ class WrappedResponses:
171
173
  posthog_privacy_mode: bool,
172
174
  posthog_groups: Optional[Dict[str, Any]],
173
175
  kwargs: Dict[str, Any],
174
- usage_stats: Dict[str, int],
176
+ usage_stats: TokenUsage,
175
177
  latency: float,
176
178
  output: Any,
177
179
  available_tool_calls: Optional[List[Dict[str, Any]]] = None,
178
180
  ):
179
181
  from posthog.ai.types import StreamingEventData
180
182
  from posthog.ai.openai.openai_converter import (
181
- standardize_openai_usage,
182
183
  format_openai_streaming_input,
183
184
  format_openai_streaming_output,
184
185
  )
@@ -195,7 +196,7 @@ class WrappedResponses:
195
196
  kwargs=kwargs,
196
197
  formatted_input=sanitized_input,
197
198
  formatted_output=format_openai_streaming_output(output, "responses"),
198
- usage_stats=standardize_openai_usage(usage_stats, "responses"),
199
+ usage_stats=usage_stats,
199
200
  latency=latency,
200
201
  distinct_id=posthog_distinct_id,
201
202
  trace_id=posthog_trace_id,
@@ -316,7 +317,7 @@ class WrappedCompletions:
316
317
  **kwargs: Any,
317
318
  ):
318
319
  start_time = time.time()
319
- usage_stats: Dict[str, int] = {}
320
+ usage_stats: TokenUsage = TokenUsage()
320
321
  accumulated_content = []
321
322
  accumulated_tool_calls: Dict[int, Dict[str, Any]] = {}
322
323
  if "stream_options" not in kwargs:
@@ -387,7 +388,7 @@ class WrappedCompletions:
387
388
  posthog_privacy_mode: bool,
388
389
  posthog_groups: Optional[Dict[str, Any]],
389
390
  kwargs: Dict[str, Any],
390
- usage_stats: Dict[str, int],
391
+ usage_stats: TokenUsage,
391
392
  latency: float,
392
393
  output: Any,
393
394
  tool_calls: Optional[List[Dict[str, Any]]] = None,
@@ -395,7 +396,6 @@ class WrappedCompletions:
395
396
  ):
396
397
  from posthog.ai.types import StreamingEventData
397
398
  from posthog.ai.openai.openai_converter import (
398
- standardize_openai_usage,
399
399
  format_openai_streaming_input,
400
400
  format_openai_streaming_output,
401
401
  )
@@ -412,7 +412,7 @@ class WrappedCompletions:
412
412
  kwargs=kwargs,
413
413
  formatted_input=sanitized_input,
414
414
  formatted_output=format_openai_streaming_output(output, "chat", tool_calls),
415
- usage_stats=standardize_openai_usage(usage_stats, "chat"),
415
+ usage_stats=usage_stats,
416
416
  latency=latency,
417
417
  distinct_id=posthog_distinct_id,
418
418
  trace_id=posthog_trace_id,
@@ -2,6 +2,8 @@ import time
2
2
  import uuid
3
3
  from typing import Any, Dict, List, Optional
4
4
 
5
+ from posthog.ai.types import TokenUsage
6
+
5
7
  try:
6
8
  import openai
7
9
  except ImportError:
@@ -124,9 +126,9 @@ class WrappedResponses:
124
126
  **kwargs: Any,
125
127
  ):
126
128
  start_time = time.time()
127
- usage_stats: Dict[str, int] = {}
129
+ usage_stats: TokenUsage = TokenUsage()
128
130
  final_content = []
129
- response = self._original.create(**kwargs)
131
+ response = await self._original.create(**kwargs)
130
132
 
131
133
  async def async_generator():
132
134
  nonlocal usage_stats
@@ -176,7 +178,7 @@ class WrappedResponses:
176
178
  posthog_privacy_mode: bool,
177
179
  posthog_groups: Optional[Dict[str, Any]],
178
180
  kwargs: Dict[str, Any],
179
- usage_stats: Dict[str, int],
181
+ usage_stats: TokenUsage,
180
182
  latency: float,
181
183
  output: Any,
182
184
  available_tool_calls: Optional[List[Dict[str, Any]]] = None,
@@ -211,6 +213,15 @@ class WrappedResponses:
211
213
  **(posthog_properties or {}),
212
214
  }
213
215
 
216
+ # Add web search count if present
217
+ web_search_count = usage_stats.get("web_search_count")
218
+ if (
219
+ web_search_count is not None
220
+ and isinstance(web_search_count, int)
221
+ and web_search_count > 0
222
+ ):
223
+ event_properties["$ai_web_search_count"] = web_search_count
224
+
214
225
  if available_tool_calls:
215
226
  event_properties["$ai_tools"] = available_tool_calls
216
227
 
@@ -336,14 +347,14 @@ class WrappedCompletions:
336
347
  **kwargs: Any,
337
348
  ):
338
349
  start_time = time.time()
339
- usage_stats: Dict[str, int] = {}
350
+ usage_stats: TokenUsage = TokenUsage()
340
351
  accumulated_content = []
341
352
  accumulated_tool_calls: Dict[int, Dict[str, Any]] = {}
342
353
 
343
354
  if "stream_options" not in kwargs:
344
355
  kwargs["stream_options"] = {}
345
356
  kwargs["stream_options"]["include_usage"] = True
346
- response = self._original.create(**kwargs)
357
+ response = await self._original.create(**kwargs)
347
358
 
348
359
  async def async_generator():
349
360
  nonlocal usage_stats
@@ -406,7 +417,7 @@ class WrappedCompletions:
406
417
  posthog_privacy_mode: bool,
407
418
  posthog_groups: Optional[Dict[str, Any]],
408
419
  kwargs: Dict[str, Any],
409
- usage_stats: Dict[str, int],
420
+ usage_stats: TokenUsage,
410
421
  latency: float,
411
422
  output: Any,
412
423
  tool_calls: Optional[List[Dict[str, Any]]] = None,
@@ -430,8 +441,8 @@ class WrappedCompletions:
430
441
  format_openai_streaming_output(output, "chat", tool_calls),
431
442
  ),
432
443
  "$ai_http_status": 200,
433
- "$ai_input_tokens": usage_stats.get("prompt_tokens", 0),
434
- "$ai_output_tokens": usage_stats.get("completion_tokens", 0),
444
+ "$ai_input_tokens": usage_stats.get("input_tokens", 0),
445
+ "$ai_output_tokens": usage_stats.get("output_tokens", 0),
435
446
  "$ai_cache_read_input_tokens": usage_stats.get(
436
447
  "cache_read_input_tokens", 0
437
448
  ),
@@ -442,6 +453,16 @@ class WrappedCompletions:
442
453
  **(posthog_properties or {}),
443
454
  }
444
455
 
456
+ # Add web search count if present
457
+ web_search_count = usage_stats.get("web_search_count")
458
+
459
+ if (
460
+ web_search_count is not None
461
+ and isinstance(web_search_count, int)
462
+ and web_search_count > 0
463
+ ):
464
+ event_properties["$ai_web_search_count"] = web_search_count
465
+
445
466
  if available_tool_calls:
446
467
  event_properties["$ai_tools"] = available_tool_calls
447
468
 
@@ -497,17 +518,17 @@ class WrappedEmbeddings:
497
518
  posthog_trace_id = str(uuid.uuid4())
498
519
 
499
520
  start_time = time.time()
500
- response = self._original.create(**kwargs)
521
+ response = await self._original.create(**kwargs)
501
522
  end_time = time.time()
502
523
 
503
524
  # Extract usage statistics if available
504
- usage_stats = {}
525
+ usage_stats: TokenUsage = TokenUsage()
505
526
 
506
527
  if hasattr(response, "usage") and response.usage:
507
- usage_stats = {
508
- "prompt_tokens": getattr(response.usage, "prompt_tokens", 0),
509
- "total_tokens": getattr(response.usage, "total_tokens", 0),
510
- }
528
+ usage_stats = TokenUsage(
529
+ input_tokens=getattr(response.usage, "prompt_tokens", 0),
530
+ output_tokens=getattr(response.usage, "completion_tokens", 0),
531
+ )
511
532
 
512
533
  latency = end_time - start_time
513
534
 
@@ -521,7 +542,7 @@ class WrappedEmbeddings:
521
542
  sanitize_openai_response(kwargs.get("input")),
522
543
  ),
523
544
  "$ai_http_status": 200,
524
- "$ai_input_tokens": usage_stats.get("prompt_tokens", 0),
545
+ "$ai_input_tokens": usage_stats.get("input_tokens", 0),
525
546
  "$ai_latency": latency,
526
547
  "$ai_trace_id": posthog_trace_id,
527
548
  "$ai_base_url": str(self._client.base_url),
@@ -14,7 +14,6 @@ from posthog.ai.types import (
14
14
  FormattedImageContent,
15
15
  FormattedMessage,
16
16
  FormattedTextContent,
17
- StreamingUsageStats,
18
17
  TokenUsage,
19
18
  )
20
19
 
@@ -256,9 +255,180 @@ def format_openai_streaming_content(
256
255
  return formatted
257
256
 
258
257
 
258
+ def extract_openai_web_search_count(response: Any) -> int:
259
+ """
260
+ Extract web search count from OpenAI response.
261
+
262
+ Uses a two-tier detection strategy:
263
+ 1. Priority 1 (exact count): Check for output[].type == "web_search_call" (Responses API)
264
+ 2. Priority 2 (binary detection): Check for various web search indicators:
265
+ - Root-level citations, search_results, or usage.search_context_size (Perplexity)
266
+ - Annotations with type "url_citation" in choices/output (including delta for streaming)
267
+
268
+ Args:
269
+ response: The response from OpenAI API
270
+
271
+ Returns:
272
+ Number of web search requests (exact count or binary 1/0)
273
+ """
274
+
275
+ # Priority 1: Check for exact count in Responses API output
276
+ if hasattr(response, "output"):
277
+ web_search_count = 0
278
+
279
+ for item in response.output:
280
+ if hasattr(item, "type") and item.type == "web_search_call":
281
+ web_search_count += 1
282
+
283
+ web_search_count = max(0, web_search_count)
284
+
285
+ if web_search_count > 0:
286
+ return web_search_count
287
+
288
+ # Priority 2: Binary detection (returns 1 or 0)
289
+
290
+ # Check root-level indicators (Perplexity)
291
+ if hasattr(response, "citations"):
292
+ citations = getattr(response, "citations")
293
+
294
+ if citations and len(citations) > 0:
295
+ return 1
296
+
297
+ if hasattr(response, "search_results"):
298
+ search_results = getattr(response, "search_results")
299
+
300
+ if search_results and len(search_results) > 0:
301
+ return 1
302
+
303
+ if hasattr(response, "usage") and hasattr(response.usage, "search_context_size"):
304
+ if response.usage.search_context_size:
305
+ return 1
306
+
307
+ # Check for url_citation annotations in choices (Chat Completions)
308
+ if hasattr(response, "choices"):
309
+ for choice in response.choices:
310
+ # Check message.annotations (non-streaming or final chunk)
311
+ if hasattr(choice, "message") and hasattr(choice.message, "annotations"):
312
+ annotations = choice.message.annotations
313
+
314
+ if annotations:
315
+ for annotation in annotations:
316
+ # Support both dict and object formats
317
+ annotation_type = (
318
+ annotation.get("type")
319
+ if isinstance(annotation, dict)
320
+ else getattr(annotation, "type", None)
321
+ )
322
+
323
+ if annotation_type == "url_citation":
324
+ return 1
325
+
326
+ # Check delta.annotations (streaming chunks)
327
+ if hasattr(choice, "delta") and hasattr(choice.delta, "annotations"):
328
+ annotations = choice.delta.annotations
329
+
330
+ if annotations:
331
+ for annotation in annotations:
332
+ # Support both dict and object formats
333
+ annotation_type = (
334
+ annotation.get("type")
335
+ if isinstance(annotation, dict)
336
+ else getattr(annotation, "type", None)
337
+ )
338
+
339
+ if annotation_type == "url_citation":
340
+ return 1
341
+
342
+ # Check for url_citation annotations in output (Responses API)
343
+ if hasattr(response, "output"):
344
+ for item in response.output:
345
+ if hasattr(item, "content") and isinstance(item.content, list):
346
+ for content_item in item.content:
347
+ if hasattr(content_item, "annotations"):
348
+ annotations = content_item.annotations
349
+
350
+ if annotations:
351
+ for annotation in annotations:
352
+ # Support both dict and object formats
353
+ annotation_type = (
354
+ annotation.get("type")
355
+ if isinstance(annotation, dict)
356
+ else getattr(annotation, "type", None)
357
+ )
358
+
359
+ if annotation_type == "url_citation":
360
+ return 1
361
+
362
+ return 0
363
+
364
+
365
+ def extract_openai_usage_from_response(response: Any) -> TokenUsage:
366
+ """
367
+ Extract usage statistics from a full OpenAI response (non-streaming).
368
+ Handles both Chat Completions and Responses API.
369
+
370
+ Args:
371
+ response: The complete response from OpenAI API
372
+
373
+ Returns:
374
+ TokenUsage with standardized usage statistics
375
+ """
376
+ if not hasattr(response, "usage"):
377
+ return TokenUsage(input_tokens=0, output_tokens=0)
378
+
379
+ cached_tokens = 0
380
+ input_tokens = 0
381
+ output_tokens = 0
382
+ reasoning_tokens = 0
383
+
384
+ # Responses API format
385
+ if hasattr(response.usage, "input_tokens"):
386
+ input_tokens = response.usage.input_tokens
387
+ if hasattr(response.usage, "output_tokens"):
388
+ output_tokens = response.usage.output_tokens
389
+ if hasattr(response.usage, "input_tokens_details") and hasattr(
390
+ response.usage.input_tokens_details, "cached_tokens"
391
+ ):
392
+ cached_tokens = response.usage.input_tokens_details.cached_tokens
393
+ if hasattr(response.usage, "output_tokens_details") and hasattr(
394
+ response.usage.output_tokens_details, "reasoning_tokens"
395
+ ):
396
+ reasoning_tokens = response.usage.output_tokens_details.reasoning_tokens
397
+
398
+ # Chat Completions format
399
+ if hasattr(response.usage, "prompt_tokens"):
400
+ input_tokens = response.usage.prompt_tokens
401
+ if hasattr(response.usage, "completion_tokens"):
402
+ output_tokens = response.usage.completion_tokens
403
+ if hasattr(response.usage, "prompt_tokens_details") and hasattr(
404
+ response.usage.prompt_tokens_details, "cached_tokens"
405
+ ):
406
+ cached_tokens = response.usage.prompt_tokens_details.cached_tokens
407
+ if hasattr(response.usage, "completion_tokens_details") and hasattr(
408
+ response.usage.completion_tokens_details, "reasoning_tokens"
409
+ ):
410
+ reasoning_tokens = response.usage.completion_tokens_details.reasoning_tokens
411
+
412
+ result = TokenUsage(
413
+ input_tokens=input_tokens,
414
+ output_tokens=output_tokens,
415
+ )
416
+
417
+ if cached_tokens > 0:
418
+ result["cache_read_input_tokens"] = cached_tokens
419
+ if reasoning_tokens > 0:
420
+ result["reasoning_tokens"] = reasoning_tokens
421
+
422
+ web_search_count = extract_openai_web_search_count(response)
423
+ if web_search_count > 0:
424
+ result["web_search_count"] = web_search_count
425
+
426
+ return result
427
+
428
+
259
429
  def extract_openai_usage_from_chunk(
260
430
  chunk: Any, provider_type: str = "chat"
261
- ) -> StreamingUsageStats:
431
+ ) -> TokenUsage:
262
432
  """
263
433
  Extract usage statistics from an OpenAI streaming chunk.
264
434
 
@@ -272,16 +442,23 @@ def extract_openai_usage_from_chunk(
272
442
  Dictionary of usage statistics
273
443
  """
274
444
 
275
- usage: StreamingUsageStats = {}
445
+ usage: TokenUsage = TokenUsage()
276
446
 
277
447
  if provider_type == "chat":
448
+ # Extract web search count from the chunk before checking for usage
449
+ # Web search indicators (citations, annotations) can appear on any chunk,
450
+ # not just those with usage data
451
+ web_search_count = extract_openai_web_search_count(chunk)
452
+ if web_search_count > 0:
453
+ usage["web_search_count"] = web_search_count
454
+
278
455
  if not hasattr(chunk, "usage") or not chunk.usage:
279
456
  return usage
280
457
 
281
458
  # Chat Completions API uses prompt_tokens and completion_tokens
282
- usage["prompt_tokens"] = getattr(chunk.usage, "prompt_tokens", 0)
283
- usage["completion_tokens"] = getattr(chunk.usage, "completion_tokens", 0)
284
- usage["total_tokens"] = getattr(chunk.usage, "total_tokens", 0)
459
+ # Standardize to input_tokens and output_tokens
460
+ usage["input_tokens"] = getattr(chunk.usage, "prompt_tokens", 0)
461
+ usage["output_tokens"] = getattr(chunk.usage, "completion_tokens", 0)
285
462
 
286
463
  # Handle cached tokens
287
464
  if hasattr(chunk.usage, "prompt_tokens_details") and hasattr(
@@ -310,7 +487,6 @@ def extract_openai_usage_from_chunk(
310
487
  response_usage = chunk.response.usage
311
488
  usage["input_tokens"] = getattr(response_usage, "input_tokens", 0)
312
489
  usage["output_tokens"] = getattr(response_usage, "output_tokens", 0)
313
- usage["total_tokens"] = getattr(response_usage, "total_tokens", 0)
314
490
 
315
491
  # Handle cached tokens
316
492
  if hasattr(response_usage, "input_tokens_details") and hasattr(
@@ -328,6 +504,12 @@ def extract_openai_usage_from_chunk(
328
504
  response_usage.output_tokens_details.reasoning_tokens
329
505
  )
330
506
 
507
+ # Extract web search count from the complete response
508
+ if hasattr(chunk, "response"):
509
+ web_search_count = extract_openai_web_search_count(chunk.response)
510
+ if web_search_count > 0:
511
+ usage["web_search_count"] = web_search_count
512
+
331
513
  return usage
332
514
 
333
515
 
@@ -535,37 +717,6 @@ def format_openai_streaming_output(
535
717
  ]
536
718
 
537
719
 
538
- def standardize_openai_usage(
539
- usage: Dict[str, Any], api_type: str = "chat"
540
- ) -> TokenUsage:
541
- """
542
- Standardize OpenAI usage statistics to common TokenUsage format.
543
-
544
- Args:
545
- usage: Raw usage statistics from OpenAI
546
- api_type: Either "chat" or "responses" to handle different field names
547
-
548
- Returns:
549
- Standardized TokenUsage dict
550
- """
551
- if api_type == "chat":
552
- # Chat API uses prompt_tokens/completion_tokens
553
- return TokenUsage(
554
- input_tokens=usage.get("prompt_tokens", 0),
555
- output_tokens=usage.get("completion_tokens", 0),
556
- cache_read_input_tokens=usage.get("cache_read_input_tokens"),
557
- reasoning_tokens=usage.get("reasoning_tokens"),
558
- )
559
- else: # responses API
560
- # Responses API uses input_tokens/output_tokens
561
- return TokenUsage(
562
- input_tokens=usage.get("input_tokens", 0),
563
- output_tokens=usage.get("output_tokens", 0),
564
- cache_read_input_tokens=usage.get("cache_read_input_tokens"),
565
- reasoning_tokens=usage.get("reasoning_tokens"),
566
- )
567
-
568
-
569
720
  def format_openai_streaming_input(
570
721
  kwargs: Dict[str, Any], api_type: str = "chat"
571
722
  ) -> Any:
@@ -579,7 +730,6 @@ def format_openai_streaming_input(
579
730
  Returns:
580
731
  Formatted input ready for PostHog tracking
581
732
  """
582
- if api_type == "chat":
583
- return kwargs.get("messages")
584
- else: # responses API
585
- return kwargs.get("input")
733
+ from posthog.ai.utils import merge_system_prompt
734
+
735
+ return merge_system_prompt(kwargs, "openai")
posthog/ai/types.py CHANGED
@@ -63,6 +63,7 @@ class TokenUsage(TypedDict, total=False):
63
63
  cache_read_input_tokens: Optional[int]
64
64
  cache_creation_input_tokens: Optional[int]
65
65
  reasoning_tokens: Optional[int]
66
+ web_search_count: Optional[int]
66
67
 
67
68
 
68
69
  class ProviderResponse(TypedDict, total=False):
@@ -77,24 +78,6 @@ class ProviderResponse(TypedDict, total=False):
77
78
  error: Optional[str]
78
79
 
79
80
 
80
- class StreamingUsageStats(TypedDict, total=False):
81
- """
82
- Usage statistics collected during streaming.
83
-
84
- Different providers populate different fields during streaming.
85
- """
86
-
87
- input_tokens: int
88
- output_tokens: int
89
- cache_read_input_tokens: Optional[int]
90
- cache_creation_input_tokens: Optional[int]
91
- reasoning_tokens: Optional[int]
92
- # OpenAI-specific names
93
- prompt_tokens: Optional[int]
94
- completion_tokens: Optional[int]
95
- total_tokens: Optional[int]
96
-
97
-
98
81
  class StreamingContentBlock(TypedDict, total=False):
99
82
  """
100
83
  Content block used during streaming to accumulate content.
@@ -133,7 +116,7 @@ class StreamingEventData(TypedDict):
133
116
  kwargs: Dict[str, Any] # Original kwargs for tool extraction and special handling
134
117
  formatted_input: Any # Provider-formatted input ready for tracking
135
118
  formatted_output: Any # Provider-formatted output ready for tracking
136
- usage_stats: TokenUsage # Standardized token counts
119
+ usage_stats: TokenUsage
137
120
  latency: float
138
121
  distinct_id: Optional[str]
139
122
  trace_id: Optional[str]