posthoganalytics 6.7.13__tar.gz → 6.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {posthoganalytics-6.7.13/posthoganalytics.egg-info → posthoganalytics-6.8.0}/PKG-INFO +1 -1
  2. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/ai/anthropic/anthropic_async.py +30 -67
  3. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/ai/anthropic/anthropic_converter.py +40 -0
  4. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/ai/gemini/gemini_converter.py +73 -3
  5. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/ai/openai/openai_async.py +19 -0
  6. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/ai/openai/openai_converter.py +124 -0
  7. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/ai/types.py +1 -0
  8. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/ai/utils.py +30 -0
  9. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/client.py +1 -2
  10. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/integrations/django.py +81 -13
  11. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/version.py +1 -1
  12. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0/posthoganalytics.egg-info}/PKG-INFO +1 -1
  13. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/pyproject.toml +6 -0
  14. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/LICENSE +0 -0
  15. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/MANIFEST.in +0 -0
  16. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/README.md +0 -0
  17. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/__init__.py +0 -0
  18. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/ai/__init__.py +0 -0
  19. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/ai/anthropic/__init__.py +0 -0
  20. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/ai/anthropic/anthropic.py +0 -0
  21. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/ai/anthropic/anthropic_providers.py +0 -0
  22. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/ai/gemini/__init__.py +0 -0
  23. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/ai/gemini/gemini.py +0 -0
  24. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/ai/langchain/__init__.py +0 -0
  25. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/ai/langchain/callbacks.py +0 -0
  26. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/ai/openai/__init__.py +0 -0
  27. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/ai/openai/openai.py +0 -0
  28. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/ai/openai/openai_providers.py +0 -0
  29. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/ai/sanitization.py +0 -0
  30. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/args.py +0 -0
  31. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/consumer.py +0 -0
  32. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/contexts.py +0 -0
  33. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/exception_capture.py +0 -0
  34. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/exception_utils.py +0 -0
  35. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/feature_flags.py +0 -0
  36. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/integrations/__init__.py +0 -0
  37. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/poller.py +0 -0
  38. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/py.typed +0 -0
  39. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/request.py +0 -0
  40. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/test/__init__.py +0 -0
  41. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/test/test_before_send.py +0 -0
  42. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/test/test_client.py +0 -0
  43. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/test/test_consumer.py +0 -0
  44. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/test/test_contexts.py +0 -0
  45. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/test/test_exception_capture.py +0 -0
  46. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/test/test_feature_flag.py +0 -0
  47. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/test/test_feature_flag_result.py +0 -0
  48. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/test/test_feature_flags.py +0 -0
  49. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/test/test_module.py +0 -0
  50. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/test/test_request.py +0 -0
  51. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/test/test_size_limited_dict.py +0 -0
  52. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/test/test_types.py +0 -0
  53. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/test/test_utils.py +0 -0
  54. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/types.py +0 -0
  55. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics/utils.py +0 -0
  56. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics.egg-info/SOURCES.txt +0 -0
  57. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics.egg-info/dependency_links.txt +0 -0
  58. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics.egg-info/requires.txt +0 -0
  59. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/posthoganalytics.egg-info/top_level.txt +0 -0
  60. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/setup.cfg +0 -0
  61. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/setup.py +0 -0
  62. {posthoganalytics-6.7.13 → posthoganalytics-6.8.0}/setup_analytics.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: posthoganalytics
3
- Version: 6.7.13
3
+ Version: 6.8.0
4
4
  Summary: Integrate PostHog into any python application.
5
5
  Home-page: https://github.com/posthog/posthog-python
6
6
  Author: Posthog
@@ -14,14 +14,9 @@ from posthoganalytics import setup
14
14
  from posthoganalytics.ai.types import StreamingContentBlock, TokenUsage, ToolInProgress
15
15
  from posthoganalytics.ai.utils import (
16
16
  call_llm_and_track_usage_async,
17
- extract_available_tool_calls,
18
- get_model_params,
19
- merge_system_prompt,
20
17
  merge_usage_stats,
21
- with_privacy_mode,
22
18
  )
23
19
  from posthoganalytics.ai.anthropic.anthropic_converter import (
24
- format_anthropic_streaming_content,
25
20
  extract_anthropic_usage_from_event,
26
21
  handle_anthropic_content_block_start,
27
22
  handle_anthropic_text_delta,
@@ -220,66 +215,34 @@ class AsyncWrappedMessages(AsyncMessages):
220
215
  content_blocks: List[StreamingContentBlock],
221
216
  accumulated_content: str,
222
217
  ):
223
- if posthog_trace_id is None:
224
- posthog_trace_id = str(uuid.uuid4())
225
-
226
- # Format output using converter
227
- formatted_content = format_anthropic_streaming_content(content_blocks)
228
- formatted_output = []
229
-
230
- if formatted_content:
231
- formatted_output = [{"role": "assistant", "content": formatted_content}]
232
- else:
233
- # Fallback to accumulated content if no blocks
234
- formatted_output = [
235
- {
236
- "role": "assistant",
237
- "content": [{"type": "text", "text": accumulated_content}],
238
- }
239
- ]
240
-
241
- event_properties = {
242
- "$ai_provider": "anthropic",
243
- "$ai_model": kwargs.get("model"),
244
- "$ai_model_parameters": get_model_params(kwargs),
245
- "$ai_input": with_privacy_mode(
246
- self._client._ph_client,
247
- posthog_privacy_mode,
248
- sanitize_anthropic(merge_system_prompt(kwargs, "anthropic")),
249
- ),
250
- "$ai_output_choices": with_privacy_mode(
251
- self._client._ph_client,
252
- posthog_privacy_mode,
253
- formatted_output,
254
- ),
255
- "$ai_http_status": 200,
256
- "$ai_input_tokens": usage_stats.get("input_tokens", 0),
257
- "$ai_output_tokens": usage_stats.get("output_tokens", 0),
258
- "$ai_cache_read_input_tokens": usage_stats.get(
259
- "cache_read_input_tokens", 0
260
- ),
261
- "$ai_cache_creation_input_tokens": usage_stats.get(
262
- "cache_creation_input_tokens", 0
218
+ from posthoganalytics.ai.types import StreamingEventData
219
+ from posthoganalytics.ai.anthropic.anthropic_converter import (
220
+ format_anthropic_streaming_input,
221
+ format_anthropic_streaming_output_complete,
222
+ )
223
+ from posthoganalytics.ai.utils import capture_streaming_event
224
+
225
+ # Prepare standardized event data
226
+ formatted_input = format_anthropic_streaming_input(kwargs)
227
+ sanitized_input = sanitize_anthropic(formatted_input)
228
+
229
+ event_data = StreamingEventData(
230
+ provider="anthropic",
231
+ model=kwargs.get("model", "unknown"),
232
+ base_url=str(self._client.base_url),
233
+ kwargs=kwargs,
234
+ formatted_input=sanitized_input,
235
+ formatted_output=format_anthropic_streaming_output_complete(
236
+ content_blocks, accumulated_content
263
237
  ),
264
- "$ai_latency": latency,
265
- "$ai_trace_id": posthog_trace_id,
266
- "$ai_base_url": str(self._client.base_url),
267
- **(posthog_properties or {}),
268
- }
269
-
270
- # Add tools if available
271
- available_tools = extract_available_tool_calls("anthropic", kwargs)
272
-
273
- if available_tools:
274
- event_properties["$ai_tools"] = available_tools
275
-
276
- if posthog_distinct_id is None:
277
- event_properties["$process_person_profile"] = False
278
-
279
- if hasattr(self._client._ph_client, "capture"):
280
- self._client._ph_client.capture(
281
- distinct_id=posthog_distinct_id or posthog_trace_id,
282
- event="$ai_generation",
283
- properties=event_properties,
284
- groups=posthog_groups,
285
- )
238
+ usage_stats=usage_stats,
239
+ latency=latency,
240
+ distinct_id=posthog_distinct_id,
241
+ trace_id=posthog_trace_id,
242
+ properties=posthog_properties,
243
+ privacy_mode=posthog_privacy_mode,
244
+ groups=posthog_groups,
245
+ )
246
+
247
+ # Use the common capture function
248
+ capture_streaming_event(self._client._ph_client, event_data)
@@ -163,6 +163,32 @@ def format_anthropic_streaming_content(
163
163
  return formatted
164
164
 
165
165
 
166
+ def extract_anthropic_web_search_count(response: Any) -> int:
167
+ """
168
+ Extract web search count from Anthropic response.
169
+
170
+ Anthropic provides exact web search counts via usage.server_tool_use.web_search_requests.
171
+
172
+ Args:
173
+ response: The response from Anthropic API
174
+
175
+ Returns:
176
+ Number of web search requests (0 if none)
177
+ """
178
+ if not hasattr(response, "usage"):
179
+ return 0
180
+
181
+ if not hasattr(response.usage, "server_tool_use"):
182
+ return 0
183
+
184
+ server_tool_use = response.usage.server_tool_use
185
+
186
+ if hasattr(server_tool_use, "web_search_requests"):
187
+ return max(0, int(getattr(server_tool_use, "web_search_requests", 0)))
188
+
189
+ return 0
190
+
191
+
166
192
  def extract_anthropic_usage_from_response(response: Any) -> TokenUsage:
167
193
  """
168
194
  Extract usage from a full Anthropic response (non-streaming).
@@ -191,6 +217,10 @@ def extract_anthropic_usage_from_response(response: Any) -> TokenUsage:
191
217
  if cache_creation and cache_creation > 0:
192
218
  result["cache_creation_input_tokens"] = cache_creation
193
219
 
220
+ web_search_count = extract_anthropic_web_search_count(response)
221
+ if web_search_count > 0:
222
+ result["web_search_count"] = web_search_count
223
+
194
224
  return result
195
225
 
196
226
 
@@ -222,6 +252,16 @@ def extract_anthropic_usage_from_event(event: Any) -> TokenUsage:
222
252
  if hasattr(event, "usage") and event.usage:
223
253
  usage["output_tokens"] = getattr(event.usage, "output_tokens", 0)
224
254
 
255
+ # Extract web search count from usage
256
+ if hasattr(event.usage, "server_tool_use"):
257
+ server_tool_use = event.usage.server_tool_use
258
+ if hasattr(server_tool_use, "web_search_requests"):
259
+ web_search_count = int(
260
+ getattr(server_tool_use, "web_search_requests", 0)
261
+ )
262
+ if web_search_count > 0:
263
+ usage["web_search_count"] = web_search_count
264
+
225
265
  return usage
226
266
 
227
267
 
@@ -338,6 +338,61 @@ def format_gemini_input(contents: Any) -> List[FormattedMessage]:
338
338
  return [_format_object_message(contents)]
339
339
 
340
340
 
341
+ def extract_gemini_web_search_count(response: Any) -> int:
342
+ """
343
+ Extract web search count from Gemini response.
344
+
345
+ Gemini bills per request that uses grounding, not per query.
346
+ Returns 1 if grounding_metadata is present with actual search data, 0 otherwise.
347
+
348
+ Args:
349
+ response: The response from Gemini API
350
+
351
+ Returns:
352
+ 1 if web search/grounding was used, 0 otherwise
353
+ """
354
+
355
+ # Check for grounding_metadata in candidates
356
+ if hasattr(response, "candidates"):
357
+ for candidate in response.candidates:
358
+ if (
359
+ hasattr(candidate, "grounding_metadata")
360
+ and candidate.grounding_metadata
361
+ ):
362
+ grounding_metadata = candidate.grounding_metadata
363
+
364
+ # Check if web_search_queries exists and is non-empty
365
+ if hasattr(grounding_metadata, "web_search_queries"):
366
+ queries = grounding_metadata.web_search_queries
367
+
368
+ if queries is not None and len(queries) > 0:
369
+ return 1
370
+
371
+ # Check if grounding_chunks exists and is non-empty
372
+ if hasattr(grounding_metadata, "grounding_chunks"):
373
+ chunks = grounding_metadata.grounding_chunks
374
+
375
+ if chunks is not None and len(chunks) > 0:
376
+ return 1
377
+
378
+ # Also check for google_search or grounding in function call names
379
+ if hasattr(candidate, "content") and candidate.content:
380
+ if hasattr(candidate.content, "parts") and candidate.content.parts:
381
+ for part in candidate.content.parts:
382
+ if hasattr(part, "function_call") and part.function_call:
383
+ function_name = getattr(
384
+ part.function_call, "name", ""
385
+ ).lower()
386
+
387
+ if (
388
+ "google_search" in function_name
389
+ or "grounding" in function_name
390
+ ):
391
+ return 1
392
+
393
+ return 0
394
+
395
+
341
396
  def _extract_usage_from_metadata(metadata: Any) -> TokenUsage:
342
397
  """
343
398
  Common logic to extract usage from Gemini metadata.
@@ -382,7 +437,14 @@ def extract_gemini_usage_from_response(response: Any) -> TokenUsage:
382
437
  if not hasattr(response, "usage_metadata") or not response.usage_metadata:
383
438
  return TokenUsage(input_tokens=0, output_tokens=0)
384
439
 
385
- return _extract_usage_from_metadata(response.usage_metadata)
440
+ usage = _extract_usage_from_metadata(response.usage_metadata)
441
+
442
+ # Add web search count if present
443
+ web_search_count = extract_gemini_web_search_count(response)
444
+ if web_search_count > 0:
445
+ usage["web_search_count"] = web_search_count
446
+
447
+ return usage
386
448
 
387
449
 
388
450
  def extract_gemini_usage_from_chunk(chunk: Any) -> TokenUsage:
@@ -398,11 +460,19 @@ def extract_gemini_usage_from_chunk(chunk: Any) -> TokenUsage:
398
460
 
399
461
  usage: TokenUsage = TokenUsage()
400
462
 
463
+ # Extract web search count from the chunk before checking for usage_metadata
464
+ # Web search indicators can appear on any chunk, not just those with usage data
465
+ web_search_count = extract_gemini_web_search_count(chunk)
466
+ if web_search_count > 0:
467
+ usage["web_search_count"] = web_search_count
468
+
401
469
  if not hasattr(chunk, "usage_metadata") or not chunk.usage_metadata:
402
470
  return usage
403
471
 
404
- # Use the shared helper to extract usage
405
- usage = _extract_usage_from_metadata(chunk.usage_metadata)
472
+ usage_from_metadata = _extract_usage_from_metadata(chunk.usage_metadata)
473
+
474
+ # Merge the usage from metadata with any web search count we found
475
+ usage.update(usage_from_metadata)
406
476
 
407
477
  return usage
408
478
 
@@ -213,6 +213,15 @@ class WrappedResponses:
213
213
  **(posthog_properties or {}),
214
214
  }
215
215
 
216
+ # Add web search count if present
217
+ web_search_count = usage_stats.get("web_search_count")
218
+ if (
219
+ web_search_count is not None
220
+ and isinstance(web_search_count, int)
221
+ and web_search_count > 0
222
+ ):
223
+ event_properties["$ai_web_search_count"] = web_search_count
224
+
216
225
  if available_tool_calls:
217
226
  event_properties["$ai_tools"] = available_tool_calls
218
227
 
@@ -444,6 +453,16 @@ class WrappedCompletions:
444
453
  **(posthog_properties or {}),
445
454
  }
446
455
 
456
+ # Add web search count if present
457
+ web_search_count = usage_stats.get("web_search_count")
458
+
459
+ if (
460
+ web_search_count is not None
461
+ and isinstance(web_search_count, int)
462
+ and web_search_count > 0
463
+ ):
464
+ event_properties["$ai_web_search_count"] = web_search_count
465
+
447
466
  if available_tool_calls:
448
467
  event_properties["$ai_tools"] = available_tool_calls
449
468
 
@@ -255,6 +255,113 @@ def format_openai_streaming_content(
255
255
  return formatted
256
256
 
257
257
 
258
+ def extract_openai_web_search_count(response: Any) -> int:
259
+ """
260
+ Extract web search count from OpenAI response.
261
+
262
+ Uses a two-tier detection strategy:
263
+ 1. Priority 1 (exact count): Check for output[].type == "web_search_call" (Responses API)
264
+ 2. Priority 2 (binary detection): Check for various web search indicators:
265
+ - Root-level citations, search_results, or usage.search_context_size (Perplexity)
266
+ - Annotations with type "url_citation" in choices/output (including delta for streaming)
267
+
268
+ Args:
269
+ response: The response from OpenAI API
270
+
271
+ Returns:
272
+ Number of web search requests (exact count or binary 1/0)
273
+ """
274
+
275
+ # Priority 1: Check for exact count in Responses API output
276
+ if hasattr(response, "output"):
277
+ web_search_count = 0
278
+
279
+ for item in response.output:
280
+ if hasattr(item, "type") and item.type == "web_search_call":
281
+ web_search_count += 1
282
+
283
+ web_search_count = max(0, web_search_count)
284
+
285
+ if web_search_count > 0:
286
+ return web_search_count
287
+
288
+ # Priority 2: Binary detection (returns 1 or 0)
289
+
290
+ # Check root-level indicators (Perplexity)
291
+ if hasattr(response, "citations"):
292
+ citations = getattr(response, "citations")
293
+
294
+ if citations and len(citations) > 0:
295
+ return 1
296
+
297
+ if hasattr(response, "search_results"):
298
+ search_results = getattr(response, "search_results")
299
+
300
+ if search_results and len(search_results) > 0:
301
+ return 1
302
+
303
+ if hasattr(response, "usage") and hasattr(response.usage, "search_context_size"):
304
+ if response.usage.search_context_size:
305
+ return 1
306
+
307
+ # Check for url_citation annotations in choices (Chat Completions)
308
+ if hasattr(response, "choices"):
309
+ for choice in response.choices:
310
+ # Check message.annotations (non-streaming or final chunk)
311
+ if hasattr(choice, "message") and hasattr(choice.message, "annotations"):
312
+ annotations = choice.message.annotations
313
+
314
+ if annotations:
315
+ for annotation in annotations:
316
+ # Support both dict and object formats
317
+ annotation_type = (
318
+ annotation.get("type")
319
+ if isinstance(annotation, dict)
320
+ else getattr(annotation, "type", None)
321
+ )
322
+
323
+ if annotation_type == "url_citation":
324
+ return 1
325
+
326
+ # Check delta.annotations (streaming chunks)
327
+ if hasattr(choice, "delta") and hasattr(choice.delta, "annotations"):
328
+ annotations = choice.delta.annotations
329
+
330
+ if annotations:
331
+ for annotation in annotations:
332
+ # Support both dict and object formats
333
+ annotation_type = (
334
+ annotation.get("type")
335
+ if isinstance(annotation, dict)
336
+ else getattr(annotation, "type", None)
337
+ )
338
+
339
+ if annotation_type == "url_citation":
340
+ return 1
341
+
342
+ # Check for url_citation annotations in output (Responses API)
343
+ if hasattr(response, "output"):
344
+ for item in response.output:
345
+ if hasattr(item, "content") and isinstance(item.content, list):
346
+ for content_item in item.content:
347
+ if hasattr(content_item, "annotations"):
348
+ annotations = content_item.annotations
349
+
350
+ if annotations:
351
+ for annotation in annotations:
352
+ # Support both dict and object formats
353
+ annotation_type = (
354
+ annotation.get("type")
355
+ if isinstance(annotation, dict)
356
+ else getattr(annotation, "type", None)
357
+ )
358
+
359
+ if annotation_type == "url_citation":
360
+ return 1
361
+
362
+ return 0
363
+
364
+
258
365
  def extract_openai_usage_from_response(response: Any) -> TokenUsage:
259
366
  """
260
367
  Extract usage statistics from a full OpenAI response (non-streaming).
@@ -312,6 +419,10 @@ def extract_openai_usage_from_response(response: Any) -> TokenUsage:
312
419
  if reasoning_tokens > 0:
313
420
  result["reasoning_tokens"] = reasoning_tokens
314
421
 
422
+ web_search_count = extract_openai_web_search_count(response)
423
+ if web_search_count > 0:
424
+ result["web_search_count"] = web_search_count
425
+
315
426
  return result
316
427
 
317
428
 
@@ -334,6 +445,13 @@ def extract_openai_usage_from_chunk(
334
445
  usage: TokenUsage = TokenUsage()
335
446
 
336
447
  if provider_type == "chat":
448
+ # Extract web search count from the chunk before checking for usage
449
+ # Web search indicators (citations, annotations) can appear on any chunk,
450
+ # not just those with usage data
451
+ web_search_count = extract_openai_web_search_count(chunk)
452
+ if web_search_count > 0:
453
+ usage["web_search_count"] = web_search_count
454
+
337
455
  if not hasattr(chunk, "usage") or not chunk.usage:
338
456
  return usage
339
457
 
@@ -386,6 +504,12 @@ def extract_openai_usage_from_chunk(
386
504
  response_usage.output_tokens_details.reasoning_tokens
387
505
  )
388
506
 
507
+ # Extract web search count from the complete response
508
+ if hasattr(chunk, "response"):
509
+ web_search_count = extract_openai_web_search_count(chunk.response)
510
+ if web_search_count > 0:
511
+ usage["web_search_count"] = web_search_count
512
+
389
513
  return usage
390
514
 
391
515
 
@@ -63,6 +63,7 @@ class TokenUsage(TypedDict, total=False):
63
63
  cache_read_input_tokens: Optional[int]
64
64
  cache_creation_input_tokens: Optional[int]
65
65
  reasoning_tokens: Optional[int]
66
+ web_search_count: Optional[int]
66
67
 
67
68
 
68
69
  class ProviderResponse(TypedDict, total=False):
@@ -53,6 +53,12 @@ def merge_usage_stats(
53
53
  if source_reasoning is not None:
54
54
  current = target.get("reasoning_tokens") or 0
55
55
  target["reasoning_tokens"] = current + source_reasoning
56
+
57
+ source_web_search = source.get("web_search_count")
58
+ if source_web_search is not None:
59
+ current = target.get("web_search_count") or 0
60
+ target["web_search_count"] = max(current, source_web_search)
61
+
56
62
  elif mode == "cumulative":
57
63
  # Replace with latest values (already cumulative)
58
64
  if source.get("input_tokens") is not None:
@@ -67,6 +73,9 @@ def merge_usage_stats(
67
73
  ]
68
74
  if source.get("reasoning_tokens") is not None:
69
75
  target["reasoning_tokens"] = source["reasoning_tokens"]
76
+ if source.get("web_search_count") is not None:
77
+ target["web_search_count"] = source["web_search_count"]
78
+
70
79
  else:
71
80
  raise ValueError(f"Invalid mode: {mode}. Must be 'incremental' or 'cumulative'")
72
81
 
@@ -311,6 +320,10 @@ def call_llm_and_track_usage(
311
320
  if reasoning is not None and reasoning > 0:
312
321
  event_properties["$ai_reasoning_tokens"] = reasoning
313
322
 
323
+ web_search_count = usage.get("web_search_count")
324
+ if web_search_count is not None and web_search_count > 0:
325
+ event_properties["$ai_web_search_count"] = web_search_count
326
+
314
327
  if posthog_distinct_id is None:
315
328
  event_properties["$process_person_profile"] = False
316
329
 
@@ -414,6 +427,14 @@ async def call_llm_and_track_usage_async(
414
427
  if cache_creation is not None and cache_creation > 0:
415
428
  event_properties["$ai_cache_creation_input_tokens"] = cache_creation
416
429
 
430
+ reasoning = usage.get("reasoning_tokens")
431
+ if reasoning is not None and reasoning > 0:
432
+ event_properties["$ai_reasoning_tokens"] = reasoning
433
+
434
+ web_search_count = usage.get("web_search_count")
435
+ if web_search_count is not None and web_search_count > 0:
436
+ event_properties["$ai_web_search_count"] = web_search_count
437
+
417
438
  if posthog_distinct_id is None:
418
439
  event_properties["$process_person_profile"] = False
419
440
 
@@ -535,6 +556,15 @@ def capture_streaming_event(
535
556
  if value is not None and isinstance(value, int) and value > 0:
536
557
  event_properties[f"$ai_{field}"] = value
537
558
 
559
+ # Add web search count if present (all providers)
560
+ web_search_count = event_data["usage_stats"].get("web_search_count")
561
+ if (
562
+ web_search_count is not None
563
+ and isinstance(web_search_count, int)
564
+ and web_search_count > 0
565
+ ):
566
+ event_properties["$ai_web_search_count"] = web_search_count
567
+
538
568
  # Handle provider-specific fields
539
569
  if (
540
570
  event_data["provider"] == "openai"
@@ -3,7 +3,7 @@ import logging
3
3
  import os
4
4
  import sys
5
5
  from datetime import datetime, timedelta
6
- from typing import Any, Callable, Dict, Optional, Union
6
+ from typing import Any, Dict, Optional, Union
7
7
  from typing_extensions import Unpack
8
8
  from uuid import uuid4
9
9
 
@@ -60,7 +60,6 @@ from posthoganalytics.utils import (
60
60
  SizeLimitedDict,
61
61
  clean,
62
62
  guess_timezone,
63
- remove_trailing_slash,
64
63
  system_context,
65
64
  )
66
65
  from posthoganalytics.version import VERSION
@@ -112,9 +112,18 @@ class PosthogContextMiddleware:
112
112
 
113
113
  def extract_tags(self, request):
114
114
  # type: (HttpRequest) -> Dict[str, Any]
115
- tags = {}
115
+ """Extract tags from request in sync context."""
116
+ user_id, user_email = self.extract_request_user(request)
117
+ return self._build_tags(request, user_id, user_email)
118
+
119
+ def _build_tags(self, request, user_id, user_email):
120
+ # type: (HttpRequest, Optional[str], Optional[str]) -> Dict[str, Any]
121
+ """
122
+ Build tags dict from request and user info.
116
123
 
117
- (user_id, user_email) = self.extract_request_user(request)
124
+ Centralized tag extraction logic used by both sync and async paths.
125
+ """
126
+ tags = {}
118
127
 
119
128
  # Extract session ID from X-POSTHOG-SESSION-ID header
120
129
  session_id = request.headers.get("X-POSTHOG-SESSION-ID")
@@ -166,21 +175,78 @@ class PosthogContextMiddleware:
166
175
  return tags
167
176
 
168
177
  def extract_request_user(self, request):
169
- user_id = None
170
- email = None
171
-
178
+ # type: (HttpRequest) -> tuple[Optional[str], Optional[str]]
179
+ """Extract user ID and email from request in sync context."""
172
180
  user = getattr(request, "user", None)
181
+ return self._resolve_user_details(user)
173
182
 
174
- if user and getattr(user, "is_authenticated", False):
175
- try:
176
- user_id = str(user.pk)
177
- except Exception:
178
- pass
183
+ async def aextract_tags(self, request):
184
+ # type: (HttpRequest) -> Dict[str, Any]
185
+ """
186
+ Async version of extract_tags for use in async request handling.
187
+
188
+ Uses await request.auser() instead of request.user to avoid
189
+ SynchronousOnlyOperation in async context.
190
+
191
+ Follows Django's naming convention for async methods (auser, asave, etc.).
192
+ """
193
+ user_id, user_email = await self.aextract_request_user(request)
194
+ return self._build_tags(request, user_id, user_email)
195
+
196
+ async def aextract_request_user(self, request):
197
+ # type: (HttpRequest) -> tuple[Optional[str], Optional[str]]
198
+ """
199
+ Async version of extract_request_user for use in async request handling.
179
200
 
201
+ Uses await request.auser() instead of request.user to avoid
202
+ SynchronousOnlyOperation in async context.
203
+
204
+ Follows Django's naming convention for async methods (auser, asave, etc.).
205
+ """
206
+ auser = getattr(request, "auser", None)
207
+ if callable(auser):
180
208
  try:
181
- email = str(user.email)
209
+ user = await auser()
210
+ return self._resolve_user_details(user)
182
211
  except Exception:
183
- pass
212
+ # If auser() fails, return empty - don't break the request
213
+ # Real errors (permissions, broken auth) will be logged by Django
214
+ return None, None
215
+
216
+ # Fallback for test requests without auser
217
+ return None, None
218
+
219
+ def _resolve_user_details(self, user):
220
+ # type: (Any) -> tuple[Optional[str], Optional[str]]
221
+ """
222
+ Extract user ID and email from a user object.
223
+
224
+ Handles both authenticated and unauthenticated users, as well as
225
+ legacy Django where is_authenticated was a method.
226
+ """
227
+ user_id = None
228
+ email = None
229
+
230
+ if user is None:
231
+ return user_id, email
232
+
233
+ # Handle is_authenticated (property in modern Django, method in legacy)
234
+ is_authenticated = getattr(user, "is_authenticated", False)
235
+ if callable(is_authenticated):
236
+ is_authenticated = is_authenticated()
237
+
238
+ if not is_authenticated:
239
+ return user_id, email
240
+
241
+ # Extract user primary key
242
+ user_pk = getattr(user, "pk", None)
243
+ if user_pk is not None:
244
+ user_id = str(user_pk)
245
+
246
+ # Extract user email
247
+ user_email = getattr(user, "email", None)
248
+ if user_email:
249
+ email = str(user_email)
184
250
 
185
251
  return user_id, email
186
252
 
@@ -211,12 +277,14 @@ class PosthogContextMiddleware:
211
277
  Asynchronous entry point for async request handling.
212
278
 
213
279
  This method is called when the middleware chain is async.
280
+ Uses aextract_tags() which calls request.auser() to avoid
281
+ SynchronousOnlyOperation when accessing user in async context.
214
282
  """
215
283
  if self.request_filter and not self.request_filter(request):
216
284
  return await self.get_response(request)
217
285
 
218
286
  with contexts.new_context(self.capture_exceptions, client=self.client):
219
- for k, v in self.extract_tags(request).items():
287
+ for k, v in (await self.aextract_tags(request)).items():
220
288
  contexts.tag(k, v)
221
289
 
222
290
  return await self.get_response(request)
@@ -1,4 +1,4 @@
1
- VERSION = "6.7.13"
1
+ VERSION = "6.8.0"
2
2
 
3
3
  if __name__ == "__main__":
4
4
  print(VERSION, end="") # noqa: T201
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: posthoganalytics
3
- Version: 6.7.13
3
+ Version: 6.8.0
4
4
  Summary: Integrate PostHog into any python application.
5
5
  Home-page: https://github.com/posthog/posthog-python
6
6
  Author: Posthog
@@ -109,3 +109,9 @@ attr = "posthoganalytics.version.VERSION"
109
109
  [tool.pytest.ini_options]
110
110
  asyncio_mode = "auto"
111
111
  asyncio_default_fixture_loop_scope = "function"
112
+ testpaths = [
113
+ "posthog/test",
114
+ ]
115
+ norecursedirs = [
116
+ "integration_tests",
117
+ ]