posthoganalytics 6.7.5__py3-none-any.whl → 7.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. posthoganalytics/__init__.py +84 -7
  2. posthoganalytics/ai/anthropic/anthropic_async.py +30 -67
  3. posthoganalytics/ai/anthropic/anthropic_converter.py +40 -0
  4. posthoganalytics/ai/gemini/__init__.py +3 -0
  5. posthoganalytics/ai/gemini/gemini.py +1 -1
  6. posthoganalytics/ai/gemini/gemini_async.py +423 -0
  7. posthoganalytics/ai/gemini/gemini_converter.py +160 -24
  8. posthoganalytics/ai/langchain/callbacks.py +55 -11
  9. posthoganalytics/ai/openai/openai.py +27 -2
  10. posthoganalytics/ai/openai/openai_async.py +49 -5
  11. posthoganalytics/ai/openai/openai_converter.py +130 -0
  12. posthoganalytics/ai/sanitization.py +27 -5
  13. posthoganalytics/ai/types.py +1 -0
  14. posthoganalytics/ai/utils.py +32 -2
  15. posthoganalytics/client.py +338 -90
  16. posthoganalytics/contexts.py +81 -0
  17. posthoganalytics/exception_utils.py +250 -2
  18. posthoganalytics/feature_flags.py +26 -10
  19. posthoganalytics/flag_definition_cache.py +127 -0
  20. posthoganalytics/integrations/django.py +149 -50
  21. posthoganalytics/request.py +203 -23
  22. posthoganalytics/test/test_client.py +250 -22
  23. posthoganalytics/test/test_exception_capture.py +418 -0
  24. posthoganalytics/test/test_feature_flag_result.py +441 -2
  25. posthoganalytics/test/test_feature_flags.py +306 -102
  26. posthoganalytics/test/test_flag_definition_cache.py +612 -0
  27. posthoganalytics/test/test_module.py +0 -8
  28. posthoganalytics/test/test_request.py +536 -0
  29. posthoganalytics/test/test_utils.py +4 -1
  30. posthoganalytics/types.py +40 -0
  31. posthoganalytics/version.py +1 -1
  32. {posthoganalytics-6.7.5.dist-info → posthoganalytics-7.4.3.dist-info}/METADATA +12 -12
  33. posthoganalytics-7.4.3.dist-info/RECORD +57 -0
  34. posthoganalytics-6.7.5.dist-info/RECORD +0 -54
  35. {posthoganalytics-6.7.5.dist-info → posthoganalytics-7.4.3.dist-info}/WHEEL +0 -0
  36. {posthoganalytics-6.7.5.dist-info → posthoganalytics-7.4.3.dist-info}/licenses/LICENSE +0 -0
  37. {posthoganalytics-6.7.5.dist-info → posthoganalytics-7.4.3.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,8 @@
1
1
  try:
2
- import langchain # noqa: F401
2
+ import langchain_core # noqa: F401
3
3
  except ImportError:
4
4
  raise ModuleNotFoundError(
5
- "Please install LangChain to use this feature: 'pip install langchain'"
5
+ "Please install LangChain to use this feature: 'pip install langchain-core'"
6
6
  )
7
7
 
8
8
  import json
@@ -20,8 +20,14 @@ from typing import (
20
20
  )
21
21
  from uuid import UUID
22
22
 
23
- from langchain.callbacks.base import BaseCallbackHandler
24
- from langchain.schema.agent import AgentAction, AgentFinish
23
+ try:
24
+ # LangChain 1.0+ and modern 0.x with langchain-core
25
+ from langchain_core.callbacks.base import BaseCallbackHandler
26
+ from langchain_core.agents import AgentAction, AgentFinish
27
+ except (ImportError, ModuleNotFoundError):
28
+ # Fallback for older LangChain versions
29
+ from langchain.callbacks.base import BaseCallbackHandler
30
+ from langchain.schema.agent import AgentAction, AgentFinish
25
31
  from langchain_core.documents import Document
26
32
  from langchain_core.messages import (
27
33
  AIMessage,
@@ -73,6 +79,8 @@ class GenerationMetadata(SpanMetadata):
73
79
  """Base URL of the provider's API used in the run."""
74
80
  tools: Optional[List[Dict[str, Any]]] = None
75
81
  """Tools provided to the model."""
82
+ posthog_properties: Optional[Dict[str, Any]] = None
83
+ """PostHog properties of the run."""
76
84
 
77
85
 
78
86
  RunMetadata = Union[SpanMetadata, GenerationMetadata]
@@ -414,6 +422,8 @@ class CallbackHandler(BaseCallbackHandler):
414
422
  generation.model = model
415
423
  if provider := metadata.get("ls_provider"):
416
424
  generation.provider = provider
425
+
426
+ generation.posthog_properties = metadata.get("posthog_properties")
417
427
  try:
418
428
  base_url = serialized["kwargs"]["openai_api_base"]
419
429
  if base_url is not None:
@@ -486,6 +496,7 @@ class CallbackHandler(BaseCallbackHandler):
486
496
  "$ai_latency": run.latency,
487
497
  "$ai_span_name": run.name,
488
498
  "$ai_span_id": run_id,
499
+ "$ai_framework": "langchain",
489
500
  }
490
501
  if parent_run_id is not None:
491
502
  event_properties["$ai_parent_id"] = parent_run_id
@@ -556,8 +567,12 @@ class CallbackHandler(BaseCallbackHandler):
556
567
  "$ai_http_status": 200,
557
568
  "$ai_latency": run.latency,
558
569
  "$ai_base_url": run.base_url,
570
+ "$ai_framework": "langchain",
559
571
  }
560
572
 
573
+ if isinstance(run.posthog_properties, dict):
574
+ event_properties.update(run.posthog_properties)
575
+
561
576
  if run.tools:
562
577
  event_properties["$ai_tools"] = run.tools
563
578
 
@@ -567,7 +582,7 @@ class CallbackHandler(BaseCallbackHandler):
567
582
  event_properties["$ai_is_error"] = True
568
583
  else:
569
584
  # Add usage
570
- usage = _parse_usage(output)
585
+ usage = _parse_usage(output, run.provider, run.model)
571
586
  event_properties["$ai_input_tokens"] = usage.input_tokens
572
587
  event_properties["$ai_output_tokens"] = usage.output_tokens
573
588
  event_properties["$ai_cache_creation_input_tokens"] = (
@@ -688,6 +703,8 @@ class ModelUsage:
688
703
 
689
704
  def _parse_usage_model(
690
705
  usage: Union[BaseModel, dict],
706
+ provider: Optional[str] = None,
707
+ model: Optional[str] = None,
691
708
  ) -> ModelUsage:
692
709
  if isinstance(usage, BaseModel):
693
710
  usage = usage.__dict__
@@ -750,15 +767,38 @@ def _parse_usage_model(
750
767
  "cache_read": "cache_read_tokens",
751
768
  "reasoning": "reasoning_tokens",
752
769
  }
753
- return ModelUsage(
770
+ normalized_usage = ModelUsage(
754
771
  **{
755
772
  dataclass_key: parsed_usage.get(mapped_key) or 0
756
773
  for mapped_key, dataclass_key in field_mapping.items()
757
774
  },
758
775
  )
776
+ # For Anthropic providers, LangChain reports input_tokens as the sum of all input tokens.
777
+ # Our cost calculation expects them to be separate for Anthropic, so we subtract cache tokens.
778
+ # Both cache_read and cache_write tokens should be subtracted since Anthropic's raw API
779
+ # reports input_tokens as tokens NOT read from or used to create a cache.
780
+ # For other providers (OpenAI, etc.), input_tokens already excludes cache tokens as expected.
781
+ # Match logic consistent with plugin-server: exact match on provider OR substring match on model
782
+ is_anthropic = False
783
+ if provider and provider.lower() == "anthropic":
784
+ is_anthropic = True
785
+ elif model and "anthropic" in model.lower():
786
+ is_anthropic = True
787
+
788
+ if is_anthropic and normalized_usage.input_tokens:
789
+ cache_tokens = (normalized_usage.cache_read_tokens or 0) + (
790
+ normalized_usage.cache_write_tokens or 0
791
+ )
792
+ if cache_tokens > 0:
793
+ normalized_usage.input_tokens = max(
794
+ normalized_usage.input_tokens - cache_tokens, 0
795
+ )
796
+ return normalized_usage
759
797
 
760
798
 
761
- def _parse_usage(response: LLMResult) -> ModelUsage:
799
+ def _parse_usage(
800
+ response: LLMResult, provider: Optional[str] = None, model: Optional[str] = None
801
+ ) -> ModelUsage:
762
802
  # langchain-anthropic uses the usage field
763
803
  llm_usage_keys = ["token_usage", "usage"]
764
804
  llm_usage: ModelUsage = ModelUsage(
@@ -772,13 +812,15 @@ def _parse_usage(response: LLMResult) -> ModelUsage:
772
812
  if response.llm_output is not None:
773
813
  for key in llm_usage_keys:
774
814
  if response.llm_output.get(key):
775
- llm_usage = _parse_usage_model(response.llm_output[key])
815
+ llm_usage = _parse_usage_model(
816
+ response.llm_output[key], provider, model
817
+ )
776
818
  break
777
819
 
778
820
  if hasattr(response, "generations"):
779
821
  for generation in response.generations:
780
822
  if "usage" in generation:
781
- llm_usage = _parse_usage_model(generation["usage"])
823
+ llm_usage = _parse_usage_model(generation["usage"], provider, model)
782
824
  break
783
825
 
784
826
  for generation_chunk in generation:
@@ -786,7 +828,9 @@ def _parse_usage(response: LLMResult) -> ModelUsage:
786
828
  "usage_metadata" in generation_chunk.generation_info
787
829
  ):
788
830
  llm_usage = _parse_usage_model(
789
- generation_chunk.generation_info["usage_metadata"]
831
+ generation_chunk.generation_info["usage_metadata"],
832
+ provider,
833
+ model,
790
834
  )
791
835
  break
792
836
 
@@ -813,7 +857,7 @@ def _parse_usage(response: LLMResult) -> ModelUsage:
813
857
  bedrock_anthropic_usage or bedrock_titan_usage or ollama_usage
814
858
  )
815
859
  if chunk_usage:
816
- llm_usage = _parse_usage_model(chunk_usage)
860
+ llm_usage = _parse_usage_model(chunk_usage, provider, model)
817
861
  break
818
862
 
819
863
  return llm_usage
@@ -124,14 +124,23 @@ class WrappedResponses:
124
124
  start_time = time.time()
125
125
  usage_stats: TokenUsage = TokenUsage()
126
126
  final_content = []
127
+ model_from_response: Optional[str] = None
127
128
  response = self._original.create(**kwargs)
128
129
 
129
130
  def generator():
130
131
  nonlocal usage_stats
131
132
  nonlocal final_content # noqa: F824
133
+ nonlocal model_from_response
132
134
 
133
135
  try:
134
136
  for chunk in response:
137
+ # Extract model from response object in chunk (for stored prompts)
138
+ if hasattr(chunk, "response") and chunk.response:
139
+ if model_from_response is None and hasattr(
140
+ chunk.response, "model"
141
+ ):
142
+ model_from_response = chunk.response.model
143
+
135
144
  # Extract usage stats from chunk
136
145
  chunk_usage = extract_openai_usage_from_chunk(chunk, "responses")
137
146
 
@@ -161,6 +170,7 @@ class WrappedResponses:
161
170
  latency,
162
171
  output,
163
172
  None, # Responses API doesn't have tools
173
+ model_from_response,
164
174
  )
165
175
 
166
176
  return generator()
@@ -177,6 +187,7 @@ class WrappedResponses:
177
187
  latency: float,
178
188
  output: Any,
179
189
  available_tool_calls: Optional[List[Dict[str, Any]]] = None,
190
+ model_from_response: Optional[str] = None,
180
191
  ):
181
192
  from posthoganalytics.ai.types import StreamingEventData
182
193
  from posthoganalytics.ai.openai.openai_converter import (
@@ -189,9 +200,12 @@ class WrappedResponses:
189
200
  formatted_input = format_openai_streaming_input(kwargs, "responses")
190
201
  sanitized_input = sanitize_openai_response(formatted_input)
191
202
 
203
+ # Use model from kwargs, fallback to model from response
204
+ model = kwargs.get("model") or model_from_response or "unknown"
205
+
192
206
  event_data = StreamingEventData(
193
207
  provider="openai",
194
- model=kwargs.get("model", "unknown"),
208
+ model=model,
195
209
  base_url=str(self._client.base_url),
196
210
  kwargs=kwargs,
197
211
  formatted_input=sanitized_input,
@@ -320,6 +334,7 @@ class WrappedCompletions:
320
334
  usage_stats: TokenUsage = TokenUsage()
321
335
  accumulated_content = []
322
336
  accumulated_tool_calls: Dict[int, Dict[str, Any]] = {}
337
+ model_from_response: Optional[str] = None
323
338
  if "stream_options" not in kwargs:
324
339
  kwargs["stream_options"] = {}
325
340
  kwargs["stream_options"]["include_usage"] = True
@@ -329,9 +344,14 @@ class WrappedCompletions:
329
344
  nonlocal usage_stats
330
345
  nonlocal accumulated_content # noqa: F824
331
346
  nonlocal accumulated_tool_calls
347
+ nonlocal model_from_response
332
348
 
333
349
  try:
334
350
  for chunk in response:
351
+ # Extract model from chunk (Chat Completions chunks have model field)
352
+ if model_from_response is None and hasattr(chunk, "model"):
353
+ model_from_response = chunk.model
354
+
335
355
  # Extract usage stats from chunk
336
356
  chunk_usage = extract_openai_usage_from_chunk(chunk, "chat")
337
357
 
@@ -376,6 +396,7 @@ class WrappedCompletions:
376
396
  accumulated_content,
377
397
  tool_calls_list,
378
398
  extract_available_tool_calls("openai", kwargs),
399
+ model_from_response,
379
400
  )
380
401
 
381
402
  return generator()
@@ -393,6 +414,7 @@ class WrappedCompletions:
393
414
  output: Any,
394
415
  tool_calls: Optional[List[Dict[str, Any]]] = None,
395
416
  available_tool_calls: Optional[List[Dict[str, Any]]] = None,
417
+ model_from_response: Optional[str] = None,
396
418
  ):
397
419
  from posthoganalytics.ai.types import StreamingEventData
398
420
  from posthoganalytics.ai.openai.openai_converter import (
@@ -405,9 +427,12 @@ class WrappedCompletions:
405
427
  formatted_input = format_openai_streaming_input(kwargs, "chat")
406
428
  sanitized_input = sanitize_openai(formatted_input)
407
429
 
430
+ # Use model from kwargs, fallback to model from response
431
+ model = kwargs.get("model") or model_from_response or "unknown"
432
+
408
433
  event_data = StreamingEventData(
409
434
  provider="openai",
410
- model=kwargs.get("model", "unknown"),
435
+ model=model,
411
436
  base_url=str(self._client.base_url),
412
437
  kwargs=kwargs,
413
438
  formatted_input=sanitized_input,
@@ -128,14 +128,23 @@ class WrappedResponses:
128
128
  start_time = time.time()
129
129
  usage_stats: TokenUsage = TokenUsage()
130
130
  final_content = []
131
- response = self._original.create(**kwargs)
131
+ model_from_response: Optional[str] = None
132
+ response = await self._original.create(**kwargs)
132
133
 
133
134
  async def async_generator():
134
135
  nonlocal usage_stats
135
136
  nonlocal final_content # noqa: F824
137
+ nonlocal model_from_response
136
138
 
137
139
  try:
138
140
  async for chunk in response:
141
+ # Extract model from response object in chunk (for stored prompts)
142
+ if hasattr(chunk, "response") and chunk.response:
143
+ if model_from_response is None and hasattr(
144
+ chunk.response, "model"
145
+ ):
146
+ model_from_response = chunk.response.model
147
+
139
148
  # Extract usage stats from chunk
140
149
  chunk_usage = extract_openai_usage_from_chunk(chunk, "responses")
141
150
 
@@ -166,6 +175,7 @@ class WrappedResponses:
166
175
  latency,
167
176
  output,
168
177
  extract_available_tool_calls("openai", kwargs),
178
+ model_from_response,
169
179
  )
170
180
 
171
181
  return async_generator()
@@ -182,13 +192,17 @@ class WrappedResponses:
182
192
  latency: float,
183
193
  output: Any,
184
194
  available_tool_calls: Optional[List[Dict[str, Any]]] = None,
195
+ model_from_response: Optional[str] = None,
185
196
  ):
186
197
  if posthog_trace_id is None:
187
198
  posthog_trace_id = str(uuid.uuid4())
188
199
 
200
+ # Use model from kwargs, fallback to model from response
201
+ model = kwargs.get("model") or model_from_response or "unknown"
202
+
189
203
  event_properties = {
190
204
  "$ai_provider": "openai",
191
- "$ai_model": kwargs.get("model"),
205
+ "$ai_model": model,
192
206
  "$ai_model_parameters": get_model_params(kwargs),
193
207
  "$ai_input": with_privacy_mode(
194
208
  self._client._ph_client,
@@ -213,6 +227,15 @@ class WrappedResponses:
213
227
  **(posthog_properties or {}),
214
228
  }
215
229
 
230
+ # Add web search count if present
231
+ web_search_count = usage_stats.get("web_search_count")
232
+ if (
233
+ web_search_count is not None
234
+ and isinstance(web_search_count, int)
235
+ and web_search_count > 0
236
+ ):
237
+ event_properties["$ai_web_search_count"] = web_search_count
238
+
216
239
  if available_tool_calls:
217
240
  event_properties["$ai_tools"] = available_tool_calls
218
241
 
@@ -341,19 +364,25 @@ class WrappedCompletions:
341
364
  usage_stats: TokenUsage = TokenUsage()
342
365
  accumulated_content = []
343
366
  accumulated_tool_calls: Dict[int, Dict[str, Any]] = {}
367
+ model_from_response: Optional[str] = None
344
368
 
345
369
  if "stream_options" not in kwargs:
346
370
  kwargs["stream_options"] = {}
347
371
  kwargs["stream_options"]["include_usage"] = True
348
- response = self._original.create(**kwargs)
372
+ response = await self._original.create(**kwargs)
349
373
 
350
374
  async def async_generator():
351
375
  nonlocal usage_stats
352
376
  nonlocal accumulated_content # noqa: F824
353
377
  nonlocal accumulated_tool_calls
378
+ nonlocal model_from_response
354
379
 
355
380
  try:
356
381
  async for chunk in response:
382
+ # Extract model from chunk (Chat Completions chunks have model field)
383
+ if model_from_response is None and hasattr(chunk, "model"):
384
+ model_from_response = chunk.model
385
+
357
386
  # Extract usage stats from chunk
358
387
  chunk_usage = extract_openai_usage_from_chunk(chunk, "chat")
359
388
  if chunk_usage:
@@ -396,6 +425,7 @@ class WrappedCompletions:
396
425
  accumulated_content,
397
426
  tool_calls_list,
398
427
  extract_available_tool_calls("openai", kwargs),
428
+ model_from_response,
399
429
  )
400
430
 
401
431
  return async_generator()
@@ -413,13 +443,17 @@ class WrappedCompletions:
413
443
  output: Any,
414
444
  tool_calls: Optional[List[Dict[str, Any]]] = None,
415
445
  available_tool_calls: Optional[List[Dict[str, Any]]] = None,
446
+ model_from_response: Optional[str] = None,
416
447
  ):
417
448
  if posthog_trace_id is None:
418
449
  posthog_trace_id = str(uuid.uuid4())
419
450
 
451
+ # Use model from kwargs, fallback to model from response
452
+ model = kwargs.get("model") or model_from_response or "unknown"
453
+
420
454
  event_properties = {
421
455
  "$ai_provider": "openai",
422
- "$ai_model": kwargs.get("model"),
456
+ "$ai_model": model,
423
457
  "$ai_model_parameters": get_model_params(kwargs),
424
458
  "$ai_input": with_privacy_mode(
425
459
  self._client._ph_client,
@@ -444,6 +478,16 @@ class WrappedCompletions:
444
478
  **(posthog_properties or {}),
445
479
  }
446
480
 
481
+ # Add web search count if present
482
+ web_search_count = usage_stats.get("web_search_count")
483
+
484
+ if (
485
+ web_search_count is not None
486
+ and isinstance(web_search_count, int)
487
+ and web_search_count > 0
488
+ ):
489
+ event_properties["$ai_web_search_count"] = web_search_count
490
+
447
491
  if available_tool_calls:
448
492
  event_properties["$ai_tools"] = available_tool_calls
449
493
 
@@ -499,7 +543,7 @@ class WrappedEmbeddings:
499
543
  posthog_trace_id = str(uuid.uuid4())
500
544
 
501
545
  start_time = time.time()
502
- response = self._original.create(**kwargs)
546
+ response = await self._original.create(**kwargs)
503
547
  end_time = time.time()
504
548
 
505
549
  # Extract usage statistics if available
@@ -67,6 +67,12 @@ def format_openai_response(response: Any) -> List[FormattedMessage]:
67
67
  }
68
68
  )
69
69
 
70
+ # Handle audio output (gpt-4o-audio-preview)
71
+ if hasattr(choice.message, "audio") and choice.message.audio:
72
+ # Convert Pydantic model to dict to capture all fields from OpenAI
73
+ audio_dict = choice.message.audio.model_dump()
74
+ content.append({"type": "audio", **audio_dict})
75
+
70
76
  if content:
71
77
  output.append(
72
78
  {
@@ -255,6 +261,113 @@ def format_openai_streaming_content(
255
261
  return formatted
256
262
 
257
263
 
264
+ def extract_openai_web_search_count(response: Any) -> int:
265
+ """
266
+ Extract web search count from OpenAI response.
267
+
268
+ Uses a two-tier detection strategy:
269
+ 1. Priority 1 (exact count): Check for output[].type == "web_search_call" (Responses API)
270
+ 2. Priority 2 (binary detection): Check for various web search indicators:
271
+ - Root-level citations, search_results, or usage.search_context_size (Perplexity)
272
+ - Annotations with type "url_citation" in choices/output (including delta for streaming)
273
+
274
+ Args:
275
+ response: The response from OpenAI API
276
+
277
+ Returns:
278
+ Number of web search requests (exact count or binary 1/0)
279
+ """
280
+
281
+ # Priority 1: Check for exact count in Responses API output
282
+ if hasattr(response, "output"):
283
+ web_search_count = 0
284
+
285
+ for item in response.output:
286
+ if hasattr(item, "type") and item.type == "web_search_call":
287
+ web_search_count += 1
288
+
289
+ web_search_count = max(0, web_search_count)
290
+
291
+ if web_search_count > 0:
292
+ return web_search_count
293
+
294
+ # Priority 2: Binary detection (returns 1 or 0)
295
+
296
+ # Check root-level indicators (Perplexity)
297
+ if hasattr(response, "citations"):
298
+ citations = getattr(response, "citations")
299
+
300
+ if citations and len(citations) > 0:
301
+ return 1
302
+
303
+ if hasattr(response, "search_results"):
304
+ search_results = getattr(response, "search_results")
305
+
306
+ if search_results and len(search_results) > 0:
307
+ return 1
308
+
309
+ if hasattr(response, "usage") and hasattr(response.usage, "search_context_size"):
310
+ if response.usage.search_context_size:
311
+ return 1
312
+
313
+ # Check for url_citation annotations in choices (Chat Completions)
314
+ if hasattr(response, "choices"):
315
+ for choice in response.choices:
316
+ # Check message.annotations (non-streaming or final chunk)
317
+ if hasattr(choice, "message") and hasattr(choice.message, "annotations"):
318
+ annotations = choice.message.annotations
319
+
320
+ if annotations:
321
+ for annotation in annotations:
322
+ # Support both dict and object formats
323
+ annotation_type = (
324
+ annotation.get("type")
325
+ if isinstance(annotation, dict)
326
+ else getattr(annotation, "type", None)
327
+ )
328
+
329
+ if annotation_type == "url_citation":
330
+ return 1
331
+
332
+ # Check delta.annotations (streaming chunks)
333
+ if hasattr(choice, "delta") and hasattr(choice.delta, "annotations"):
334
+ annotations = choice.delta.annotations
335
+
336
+ if annotations:
337
+ for annotation in annotations:
338
+ # Support both dict and object formats
339
+ annotation_type = (
340
+ annotation.get("type")
341
+ if isinstance(annotation, dict)
342
+ else getattr(annotation, "type", None)
343
+ )
344
+
345
+ if annotation_type == "url_citation":
346
+ return 1
347
+
348
+ # Check for url_citation annotations in output (Responses API)
349
+ if hasattr(response, "output"):
350
+ for item in response.output:
351
+ if hasattr(item, "content") and isinstance(item.content, list):
352
+ for content_item in item.content:
353
+ if hasattr(content_item, "annotations"):
354
+ annotations = content_item.annotations
355
+
356
+ if annotations:
357
+ for annotation in annotations:
358
+ # Support both dict and object formats
359
+ annotation_type = (
360
+ annotation.get("type")
361
+ if isinstance(annotation, dict)
362
+ else getattr(annotation, "type", None)
363
+ )
364
+
365
+ if annotation_type == "url_citation":
366
+ return 1
367
+
368
+ return 0
369
+
370
+
258
371
  def extract_openai_usage_from_response(response: Any) -> TokenUsage:
259
372
  """
260
373
  Extract usage statistics from a full OpenAI response (non-streaming).
@@ -312,6 +425,10 @@ def extract_openai_usage_from_response(response: Any) -> TokenUsage:
312
425
  if reasoning_tokens > 0:
313
426
  result["reasoning_tokens"] = reasoning_tokens
314
427
 
428
+ web_search_count = extract_openai_web_search_count(response)
429
+ if web_search_count > 0:
430
+ result["web_search_count"] = web_search_count
431
+
315
432
  return result
316
433
 
317
434
 
@@ -334,6 +451,13 @@ def extract_openai_usage_from_chunk(
334
451
  usage: TokenUsage = TokenUsage()
335
452
 
336
453
  if provider_type == "chat":
454
+ # Extract web search count from the chunk before checking for usage
455
+ # Web search indicators (citations, annotations) can appear on any chunk,
456
+ # not just those with usage data
457
+ web_search_count = extract_openai_web_search_count(chunk)
458
+ if web_search_count > 0:
459
+ usage["web_search_count"] = web_search_count
460
+
337
461
  if not hasattr(chunk, "usage") or not chunk.usage:
338
462
  return usage
339
463
 
@@ -386,6 +510,12 @@ def extract_openai_usage_from_chunk(
386
510
  response_usage.output_tokens_details.reasoning_tokens
387
511
  )
388
512
 
513
+ # Extract web search count from the complete response
514
+ if hasattr(chunk, "response"):
515
+ web_search_count = extract_openai_web_search_count(chunk.response)
516
+ if web_search_count > 0:
517
+ usage["web_search_count"] = web_search_count
518
+
389
519
  return usage
390
520
 
391
521
 
@@ -1,3 +1,4 @@
1
+ import os
1
2
  import re
2
3
  from typing import Any
3
4
  from urllib.parse import urlparse
@@ -5,6 +6,15 @@ from urllib.parse import urlparse
5
6
  REDACTED_IMAGE_PLACEHOLDER = "[base64 image redacted]"
6
7
 
7
8
 
9
+ def _is_multimodal_enabled() -> bool:
10
+ """Check if multimodal capture is enabled via environment variable."""
11
+ return os.environ.get("_INTERNAL_LLMA_MULTIMODAL", "").lower() in (
12
+ "true",
13
+ "1",
14
+ "yes",
15
+ )
16
+
17
+
8
18
  def is_base64_data_url(text: str) -> bool:
9
19
  return re.match(r"^data:([^;]+);base64,", text) is not None
10
20
 
@@ -27,6 +37,9 @@ def is_raw_base64(text: str) -> bool:
27
37
 
28
38
 
29
39
  def redact_base64_data_url(value: Any) -> Any:
40
+ if _is_multimodal_enabled():
41
+ return value
42
+
30
43
  if not isinstance(value, str):
31
44
  return value
32
45
 
@@ -83,6 +96,11 @@ def sanitize_openai_image(item: Any) -> Any:
83
96
  },
84
97
  }
85
98
 
99
+ if item.get("type") == "audio" and "data" in item:
100
+ if _is_multimodal_enabled():
101
+ return item
102
+ return {**item, "data": REDACTED_IMAGE_PLACEHOLDER}
103
+
86
104
  return item
87
105
 
88
106
 
@@ -100,6 +118,9 @@ def sanitize_openai_response_image(item: Any) -> Any:
100
118
 
101
119
 
102
120
  def sanitize_anthropic_image(item: Any) -> Any:
121
+ if _is_multimodal_enabled():
122
+ return item
123
+
103
124
  if not isinstance(item, dict):
104
125
  return item
105
126
 
@@ -109,8 +130,6 @@ def sanitize_anthropic_image(item: Any) -> Any:
109
130
  and item["source"].get("type") == "base64"
110
131
  and "data" in item["source"]
111
132
  ):
112
- # For Anthropic, if the source type is "base64", we should always redact the data
113
- # The provider is explicitly telling us this is base64 data
114
133
  return {
115
134
  **item,
116
135
  "source": {
@@ -123,6 +142,9 @@ def sanitize_anthropic_image(item: Any) -> Any:
123
142
 
124
143
 
125
144
  def sanitize_gemini_part(part: Any) -> Any:
145
+ if _is_multimodal_enabled():
146
+ return part
147
+
126
148
  if not isinstance(part, dict):
127
149
  return part
128
150
 
@@ -131,8 +153,6 @@ def sanitize_gemini_part(part: Any) -> Any:
131
153
  and isinstance(part["inline_data"], dict)
132
154
  and "data" in part["inline_data"]
133
155
  ):
134
- # For Gemini, the inline_data structure indicates base64 data
135
- # We should redact any string data in this context
136
156
  return {
137
157
  **part,
138
158
  "inline_data": {
@@ -185,7 +205,9 @@ def sanitize_langchain_image(item: Any) -> Any:
185
205
  and isinstance(item.get("source"), dict)
186
206
  and "data" in item["source"]
187
207
  ):
188
- # Anthropic style - raw base64 in structured format, always redact
208
+ if _is_multimodal_enabled():
209
+ return item
210
+
189
211
  return {
190
212
  **item,
191
213
  "source": {
@@ -63,6 +63,7 @@ class TokenUsage(TypedDict, total=False):
63
63
  cache_read_input_tokens: Optional[int]
64
64
  cache_creation_input_tokens: Optional[int]
65
65
  reasoning_tokens: Optional[int]
66
+ web_search_count: Optional[int]
66
67
 
67
68
 
68
69
  class ProviderResponse(TypedDict, total=False):