posthoganalytics 7.6.0__py3-none-any.whl → 7.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,863 @@
1
+ import json
2
+ import logging
3
+ import time
4
+ from datetime import datetime
5
+ from typing import Any, Callable, Dict, Optional, Union
6
+
7
+ from agents.tracing import Span, Trace
8
+ from agents.tracing.processor_interface import TracingProcessor
9
+ from agents.tracing.span_data import (
10
+ AgentSpanData,
11
+ CustomSpanData,
12
+ FunctionSpanData,
13
+ GenerationSpanData,
14
+ GuardrailSpanData,
15
+ HandoffSpanData,
16
+ MCPListToolsSpanData,
17
+ ResponseSpanData,
18
+ SpeechGroupSpanData,
19
+ SpeechSpanData,
20
+ TranscriptionSpanData,
21
+ )
22
+
23
+ from posthoganalytics import setup
24
+ from posthoganalytics.client import Client
25
+
26
+ log = logging.getLogger("posthog")
27
+
28
+
29
+ def _ensure_serializable(obj: Any) -> Any:
30
+ """Ensure an object is JSON-serializable, converting to str as fallback.
31
+
32
+ Returns the original object if it's already serializable (dict, list, str,
33
+ int, etc.), or str(obj) for non-serializable types so that downstream
34
+ json.dumps() calls won't fail.
35
+ """
36
+ if obj is None:
37
+ return None
38
+ try:
39
+ json.dumps(obj)
40
+ return obj
41
+ except (TypeError, ValueError):
42
+ return str(obj)
43
+
44
+
45
+ def _parse_iso_timestamp(iso_str: Optional[str]) -> Optional[float]:
46
+ """Parse ISO timestamp to Unix timestamp."""
47
+ if not iso_str:
48
+ return None
49
+ try:
50
+ dt = datetime.fromisoformat(iso_str.replace("Z", "+00:00"))
51
+ return dt.timestamp()
52
+ except (ValueError, AttributeError):
53
+ return None
54
+
55
+
56
+ class PostHogTracingProcessor(TracingProcessor):
57
+ """
58
+ A tracing processor that sends OpenAI Agents SDK traces to PostHog.
59
+
60
+ This processor implements the TracingProcessor interface from the OpenAI Agents SDK
61
+ and maps agent traces, spans, and generations to PostHog's LLM analytics events.
62
+
63
+ Example:
64
+ ```python
65
+ from agents import Agent, Runner
66
+ from agents.tracing import add_trace_processor
67
+ from posthoganalytics.ai.openai_agents import PostHogTracingProcessor
68
+
69
+ # Create and register the processor
70
+ processor = PostHogTracingProcessor(
71
+ distinct_id="user@example.com",
72
+ privacy_mode=False,
73
+ )
74
+ add_trace_processor(processor)
75
+
76
+ # Run agents as normal - traces automatically sent to PostHog
77
+ agent = Agent(name="Assistant", instructions="You are helpful.")
78
+ result = Runner.run_sync(agent, "Hello!")
79
+ ```
80
+ """
81
+
82
+ def __init__(
83
+ self,
84
+ client: Optional[Client] = None,
85
+ distinct_id: Optional[Union[str, Callable[[Trace], Optional[str]]]] = None,
86
+ privacy_mode: bool = False,
87
+ groups: Optional[Dict[str, Any]] = None,
88
+ properties: Optional[Dict[str, Any]] = None,
89
+ ):
90
+ """
91
+ Initialize the PostHog tracing processor.
92
+
93
+ Args:
94
+ client: Optional PostHog client instance. If not provided, uses the default client.
95
+ distinct_id: Either a string distinct ID or a callable that takes a Trace
96
+ and returns a distinct ID. If not provided, uses the trace_id.
97
+ privacy_mode: If True, redacts input/output content from events.
98
+ groups: Optional PostHog groups to associate with all events.
99
+ properties: Optional additional properties to include with all events.
100
+ """
101
+ self._client = client or setup()
102
+ self._distinct_id = distinct_id
103
+ self._privacy_mode = privacy_mode
104
+ self._groups = groups or {}
105
+ self._properties = properties or {}
106
+
107
+ # Track span start times for latency calculation
108
+ self._span_start_times: Dict[str, float] = {}
109
+
110
+ # Track trace metadata for associating with spans
111
+ self._trace_metadata: Dict[str, Dict[str, Any]] = {}
112
+
113
+ # Max entries to prevent unbounded growth if on_span_end/on_trace_end
114
+ # is never called (e.g., due to an exception in the Agents SDK).
115
+ self._max_tracked_entries = 10000
116
+
117
+ def _get_distinct_id(self, trace: Optional[Trace]) -> Optional[str]:
118
+ """Resolve the distinct ID for a trace.
119
+
120
+ Returns the user-provided distinct ID (string or callable result),
121
+ or None if no user-provided ID is available. Callers should treat
122
+ None as a signal to use a fallback ID in personless mode.
123
+ """
124
+ if callable(self._distinct_id):
125
+ if trace:
126
+ result = self._distinct_id(trace)
127
+ if result:
128
+ return str(result)
129
+ return None
130
+ elif self._distinct_id:
131
+ return str(self._distinct_id)
132
+ return None
133
+
134
+ def _with_privacy_mode(self, value: Any) -> Any:
135
+ """Apply privacy mode redaction if enabled."""
136
+ if self._privacy_mode or (
137
+ hasattr(self._client, "privacy_mode") and self._client.privacy_mode
138
+ ):
139
+ return None
140
+ return value
141
+
142
+ def _evict_stale_entries(self) -> None:
143
+ """Evict oldest entries if dicts exceed max size to prevent unbounded growth."""
144
+ if len(self._span_start_times) > self._max_tracked_entries:
145
+ # Remove oldest entries by start time
146
+ sorted_spans = sorted(self._span_start_times.items(), key=lambda x: x[1])
147
+ for span_id, _ in sorted_spans[: len(sorted_spans) // 2]:
148
+ del self._span_start_times[span_id]
149
+ log.debug(
150
+ "Evicted stale span start times (exceeded %d entries)",
151
+ self._max_tracked_entries,
152
+ )
153
+
154
+ if len(self._trace_metadata) > self._max_tracked_entries:
155
+ # Remove half the entries (oldest inserted via dict ordering in Python 3.7+)
156
+ keys = list(self._trace_metadata.keys())
157
+ for key in keys[: len(keys) // 2]:
158
+ del self._trace_metadata[key]
159
+ log.debug(
160
+ "Evicted stale trace metadata (exceeded %d entries)",
161
+ self._max_tracked_entries,
162
+ )
163
+
164
+ def _get_group_id(self, trace_id: str) -> Optional[str]:
165
+ """Get the group_id for a trace from stored metadata."""
166
+ if trace_id in self._trace_metadata:
167
+ return self._trace_metadata[trace_id].get("group_id")
168
+ return None
169
+
170
+ def _capture_event(
171
+ self,
172
+ event: str,
173
+ properties: Dict[str, Any],
174
+ distinct_id: Optional[str] = None,
175
+ ) -> None:
176
+ """Capture an event to PostHog with error handling.
177
+
178
+ Args:
179
+ distinct_id: The resolved distinct ID. When the user didn't provide
180
+ one, callers should pass ``user_distinct_id or fallback_id``
181
+ (matching the langchain/openai pattern) and separately set
182
+ ``$process_person_profile`` in properties.
183
+ """
184
+ try:
185
+ if not hasattr(self._client, "capture") or not callable(
186
+ self._client.capture
187
+ ):
188
+ return
189
+
190
+ final_properties = {
191
+ **properties,
192
+ **self._properties,
193
+ }
194
+
195
+ self._client.capture(
196
+ distinct_id=distinct_id or "unknown",
197
+ event=event,
198
+ properties=final_properties,
199
+ groups=self._groups,
200
+ )
201
+ except Exception as e:
202
+ log.debug(f"Failed to capture PostHog event: {e}")
203
+
204
+ def on_trace_start(self, trace: Trace) -> None:
205
+ """Called when a new trace begins. Stores metadata for spans; the $ai_trace event is emitted in on_trace_end."""
206
+ try:
207
+ self._evict_stale_entries()
208
+ trace_id = trace.trace_id
209
+ trace_name = trace.name
210
+ group_id = getattr(trace, "group_id", None)
211
+ metadata = getattr(trace, "metadata", None)
212
+
213
+ distinct_id = self._get_distinct_id(trace)
214
+
215
+ # Store trace metadata for later (used by spans and on_trace_end)
216
+ self._trace_metadata[trace_id] = {
217
+ "name": trace_name,
218
+ "group_id": group_id,
219
+ "metadata": metadata,
220
+ "distinct_id": distinct_id,
221
+ "start_time": time.time(),
222
+ }
223
+ except Exception as e:
224
+ log.debug(f"Error in on_trace_start: {e}")
225
+
226
+ def on_trace_end(self, trace: Trace) -> None:
227
+ """Called when a trace completes. Emits the $ai_trace event with full metadata."""
228
+ try:
229
+ trace_id = trace.trace_id
230
+
231
+ # Pop stored metadata (also cleans up)
232
+ trace_info = self._trace_metadata.pop(trace_id, {})
233
+ trace_name = trace_info.get("name") or trace.name
234
+ group_id = trace_info.get("group_id") or getattr(trace, "group_id", None)
235
+ metadata = trace_info.get("metadata") or getattr(trace, "metadata", None)
236
+ distinct_id = trace_info.get("distinct_id") or self._get_distinct_id(trace)
237
+
238
+ # Calculate trace-level latency
239
+ start_time = trace_info.get("start_time")
240
+ latency = (time.time() - start_time) if start_time else None
241
+
242
+ properties = {
243
+ "$ai_trace_id": trace_id,
244
+ "$ai_trace_name": trace_name,
245
+ "$ai_provider": "openai",
246
+ "$ai_framework": "openai-agents",
247
+ }
248
+
249
+ if latency is not None:
250
+ properties["$ai_latency"] = latency
251
+
252
+ # Include group_id for linking related traces (e.g., conversation threads)
253
+ if group_id:
254
+ properties["$ai_group_id"] = group_id
255
+
256
+ # Include trace metadata if present
257
+ if metadata:
258
+ properties["$ai_trace_metadata"] = _ensure_serializable(metadata)
259
+
260
+ if distinct_id is None:
261
+ properties["$process_person_profile"] = False
262
+
263
+ self._capture_event(
264
+ event="$ai_trace",
265
+ distinct_id=distinct_id or trace_id,
266
+ properties=properties,
267
+ )
268
+ except Exception as e:
269
+ log.debug(f"Error in on_trace_end: {e}")
270
+
271
+ def on_span_start(self, span: Span[Any]) -> None:
272
+ """Called when a new span begins."""
273
+ try:
274
+ self._evict_stale_entries()
275
+ span_id = span.span_id
276
+ self._span_start_times[span_id] = time.time()
277
+ except Exception as e:
278
+ log.debug(f"Error in on_span_start: {e}")
279
+
280
+ def on_span_end(self, span: Span[Any]) -> None:
281
+ """Called when a span completes."""
282
+ try:
283
+ span_id = span.span_id
284
+ trace_id = span.trace_id
285
+ parent_id = span.parent_id
286
+ span_data = span.span_data
287
+
288
+ # Calculate latency
289
+ start_time = self._span_start_times.pop(span_id, None)
290
+ if start_time:
291
+ latency = time.time() - start_time
292
+ else:
293
+ # Fall back to parsing timestamps
294
+ started = _parse_iso_timestamp(span.started_at)
295
+ ended = _parse_iso_timestamp(span.ended_at)
296
+ latency = (ended - started) if (started and ended) else 0
297
+
298
+ # Get user-provided distinct ID from trace metadata (resolved at trace start).
299
+ # None means no user-provided ID — use trace_id as fallback in personless mode,
300
+ # matching the langchain/openai pattern: `distinct_id or trace_id`.
301
+ trace_info = self._trace_metadata.get(trace_id, {})
302
+ distinct_id = trace_info.get("distinct_id") or self._get_distinct_id(None)
303
+
304
+ # Get group_id from trace metadata for linking
305
+ group_id = self._get_group_id(trace_id)
306
+
307
+ # Get error info if present
308
+ error_info = span.error
309
+ error_properties = {}
310
+ if error_info:
311
+ if isinstance(error_info, dict):
312
+ error_message = error_info.get("message", str(error_info))
313
+ error_type_raw = error_info.get("type", "")
314
+ else:
315
+ error_message = str(error_info)
316
+ error_type_raw = ""
317
+
318
+ # Categorize error type for cross-provider filtering/alerting
319
+ error_type = "unknown"
320
+ if (
321
+ "ModelBehaviorError" in error_type_raw
322
+ or "ModelBehaviorError" in error_message
323
+ ):
324
+ error_type = "model_behavior_error"
325
+ elif "UserError" in error_type_raw or "UserError" in error_message:
326
+ error_type = "user_error"
327
+ elif (
328
+ "InputGuardrailTripwireTriggered" in error_type_raw
329
+ or "InputGuardrailTripwireTriggered" in error_message
330
+ ):
331
+ error_type = "input_guardrail_triggered"
332
+ elif (
333
+ "OutputGuardrailTripwireTriggered" in error_type_raw
334
+ or "OutputGuardrailTripwireTriggered" in error_message
335
+ ):
336
+ error_type = "output_guardrail_triggered"
337
+ elif (
338
+ "MaxTurnsExceeded" in error_type_raw
339
+ or "MaxTurnsExceeded" in error_message
340
+ ):
341
+ error_type = "max_turns_exceeded"
342
+
343
+ error_properties = {
344
+ "$ai_is_error": True,
345
+ "$ai_error": error_message,
346
+ "$ai_error_type": error_type,
347
+ }
348
+
349
+ # Personless mode: no user-provided distinct_id, fallback to trace_id
350
+ if distinct_id is None:
351
+ error_properties["$process_person_profile"] = False
352
+ distinct_id = trace_id
353
+
354
+ # Dispatch based on span data type
355
+ if isinstance(span_data, GenerationSpanData):
356
+ self._handle_generation_span(
357
+ span_data,
358
+ trace_id,
359
+ span_id,
360
+ parent_id,
361
+ latency,
362
+ distinct_id,
363
+ group_id,
364
+ error_properties,
365
+ )
366
+ elif isinstance(span_data, FunctionSpanData):
367
+ self._handle_function_span(
368
+ span_data,
369
+ trace_id,
370
+ span_id,
371
+ parent_id,
372
+ latency,
373
+ distinct_id,
374
+ group_id,
375
+ error_properties,
376
+ )
377
+ elif isinstance(span_data, AgentSpanData):
378
+ self._handle_agent_span(
379
+ span_data,
380
+ trace_id,
381
+ span_id,
382
+ parent_id,
383
+ latency,
384
+ distinct_id,
385
+ group_id,
386
+ error_properties,
387
+ )
388
+ elif isinstance(span_data, HandoffSpanData):
389
+ self._handle_handoff_span(
390
+ span_data,
391
+ trace_id,
392
+ span_id,
393
+ parent_id,
394
+ latency,
395
+ distinct_id,
396
+ group_id,
397
+ error_properties,
398
+ )
399
+ elif isinstance(span_data, GuardrailSpanData):
400
+ self._handle_guardrail_span(
401
+ span_data,
402
+ trace_id,
403
+ span_id,
404
+ parent_id,
405
+ latency,
406
+ distinct_id,
407
+ group_id,
408
+ error_properties,
409
+ )
410
+ elif isinstance(span_data, ResponseSpanData):
411
+ self._handle_response_span(
412
+ span_data,
413
+ trace_id,
414
+ span_id,
415
+ parent_id,
416
+ latency,
417
+ distinct_id,
418
+ group_id,
419
+ error_properties,
420
+ )
421
+ elif isinstance(span_data, CustomSpanData):
422
+ self._handle_custom_span(
423
+ span_data,
424
+ trace_id,
425
+ span_id,
426
+ parent_id,
427
+ latency,
428
+ distinct_id,
429
+ group_id,
430
+ error_properties,
431
+ )
432
+ elif isinstance(
433
+ span_data, (TranscriptionSpanData, SpeechSpanData, SpeechGroupSpanData)
434
+ ):
435
+ self._handle_audio_span(
436
+ span_data,
437
+ trace_id,
438
+ span_id,
439
+ parent_id,
440
+ latency,
441
+ distinct_id,
442
+ group_id,
443
+ error_properties,
444
+ )
445
+ elif isinstance(span_data, MCPListToolsSpanData):
446
+ self._handle_mcp_span(
447
+ span_data,
448
+ trace_id,
449
+ span_id,
450
+ parent_id,
451
+ latency,
452
+ distinct_id,
453
+ group_id,
454
+ error_properties,
455
+ )
456
+ else:
457
+ # Unknown span type - capture as generic span
458
+ self._handle_generic_span(
459
+ span_data,
460
+ trace_id,
461
+ span_id,
462
+ parent_id,
463
+ latency,
464
+ distinct_id,
465
+ group_id,
466
+ error_properties,
467
+ )
468
+
469
+ except Exception as e:
470
+ log.debug(f"Error in on_span_end: {e}")
471
+
472
+ def _base_properties(
473
+ self,
474
+ trace_id: str,
475
+ span_id: str,
476
+ parent_id: Optional[str],
477
+ latency: float,
478
+ group_id: Optional[str],
479
+ error_properties: Dict[str, Any],
480
+ ) -> Dict[str, Any]:
481
+ """Build the base properties dict shared by all span handlers."""
482
+ properties = {
483
+ "$ai_trace_id": trace_id,
484
+ "$ai_span_id": span_id,
485
+ "$ai_parent_id": parent_id,
486
+ "$ai_provider": "openai",
487
+ "$ai_framework": "openai-agents",
488
+ "$ai_latency": latency,
489
+ **error_properties,
490
+ }
491
+ if group_id:
492
+ properties["$ai_group_id"] = group_id
493
+ return properties
494
+
495
+ def _handle_generation_span(
496
+ self,
497
+ span_data: GenerationSpanData,
498
+ trace_id: str,
499
+ span_id: str,
500
+ parent_id: Optional[str],
501
+ latency: float,
502
+ distinct_id: str,
503
+ group_id: Optional[str],
504
+ error_properties: Dict[str, Any],
505
+ ) -> None:
506
+ """Handle LLM generation spans - maps to $ai_generation event."""
507
+ # Extract token usage
508
+ usage = span_data.usage or {}
509
+ input_tokens = usage.get("input_tokens") or usage.get("prompt_tokens") or 0
510
+ output_tokens = (
511
+ usage.get("output_tokens") or usage.get("completion_tokens") or 0
512
+ )
513
+
514
+ # Extract model config parameters
515
+ model_config = span_data.model_config or {}
516
+ model_params = {}
517
+ for param in [
518
+ "temperature",
519
+ "max_tokens",
520
+ "top_p",
521
+ "frequency_penalty",
522
+ "presence_penalty",
523
+ ]:
524
+ if param in model_config:
525
+ model_params[param] = model_config[param]
526
+
527
+ properties = {
528
+ **self._base_properties(
529
+ trace_id, span_id, parent_id, latency, group_id, error_properties
530
+ ),
531
+ "$ai_model": span_data.model,
532
+ "$ai_model_parameters": model_params if model_params else None,
533
+ "$ai_input": self._with_privacy_mode(_ensure_serializable(span_data.input)),
534
+ "$ai_output_choices": self._with_privacy_mode(
535
+ _ensure_serializable(span_data.output)
536
+ ),
537
+ "$ai_input_tokens": input_tokens,
538
+ "$ai_output_tokens": output_tokens,
539
+ "$ai_total_tokens": (input_tokens or 0) + (output_tokens or 0),
540
+ }
541
+
542
+ # Add optional token fields if present
543
+ if usage.get("reasoning_tokens"):
544
+ properties["$ai_reasoning_tokens"] = usage["reasoning_tokens"]
545
+ if usage.get("cache_read_input_tokens"):
546
+ properties["$ai_cache_read_input_tokens"] = usage["cache_read_input_tokens"]
547
+ if usage.get("cache_creation_input_tokens"):
548
+ properties["$ai_cache_creation_input_tokens"] = usage[
549
+ "cache_creation_input_tokens"
550
+ ]
551
+
552
+ self._capture_event("$ai_generation", properties, distinct_id)
553
+
554
+ def _handle_function_span(
555
+ self,
556
+ span_data: FunctionSpanData,
557
+ trace_id: str,
558
+ span_id: str,
559
+ parent_id: Optional[str],
560
+ latency: float,
561
+ distinct_id: str,
562
+ group_id: Optional[str],
563
+ error_properties: Dict[str, Any],
564
+ ) -> None:
565
+ """Handle function/tool call spans - maps to $ai_span event."""
566
+ properties = {
567
+ **self._base_properties(
568
+ trace_id, span_id, parent_id, latency, group_id, error_properties
569
+ ),
570
+ "$ai_span_name": span_data.name,
571
+ "$ai_span_type": "tool",
572
+ "$ai_input_state": self._with_privacy_mode(
573
+ _ensure_serializable(span_data.input)
574
+ ),
575
+ "$ai_output_state": self._with_privacy_mode(
576
+ _ensure_serializable(span_data.output)
577
+ ),
578
+ }
579
+
580
+ if span_data.mcp_data:
581
+ properties["$ai_mcp_data"] = _ensure_serializable(span_data.mcp_data)
582
+
583
+ self._capture_event("$ai_span", properties, distinct_id)
584
+
585
+ def _handle_agent_span(
586
+ self,
587
+ span_data: AgentSpanData,
588
+ trace_id: str,
589
+ span_id: str,
590
+ parent_id: Optional[str],
591
+ latency: float,
592
+ distinct_id: str,
593
+ group_id: Optional[str],
594
+ error_properties: Dict[str, Any],
595
+ ) -> None:
596
+ """Handle agent execution spans - maps to $ai_span event."""
597
+ properties = {
598
+ **self._base_properties(
599
+ trace_id, span_id, parent_id, latency, group_id, error_properties
600
+ ),
601
+ "$ai_span_name": span_data.name,
602
+ "$ai_span_type": "agent",
603
+ }
604
+
605
+ if span_data.handoffs:
606
+ properties["$ai_agent_handoffs"] = span_data.handoffs
607
+ if span_data.tools:
608
+ properties["$ai_agent_tools"] = span_data.tools
609
+ if span_data.output_type:
610
+ properties["$ai_agent_output_type"] = span_data.output_type
611
+
612
+ self._capture_event("$ai_span", properties, distinct_id)
613
+
614
+ def _handle_handoff_span(
615
+ self,
616
+ span_data: HandoffSpanData,
617
+ trace_id: str,
618
+ span_id: str,
619
+ parent_id: Optional[str],
620
+ latency: float,
621
+ distinct_id: str,
622
+ group_id: Optional[str],
623
+ error_properties: Dict[str, Any],
624
+ ) -> None:
625
+ """Handle agent handoff spans - maps to $ai_span event."""
626
+ properties = {
627
+ **self._base_properties(
628
+ trace_id, span_id, parent_id, latency, group_id, error_properties
629
+ ),
630
+ "$ai_span_name": f"{span_data.from_agent} -> {span_data.to_agent}",
631
+ "$ai_span_type": "handoff",
632
+ "$ai_handoff_from_agent": span_data.from_agent,
633
+ "$ai_handoff_to_agent": span_data.to_agent,
634
+ }
635
+
636
+ self._capture_event("$ai_span", properties, distinct_id)
637
+
638
+ def _handle_guardrail_span(
639
+ self,
640
+ span_data: GuardrailSpanData,
641
+ trace_id: str,
642
+ span_id: str,
643
+ parent_id: Optional[str],
644
+ latency: float,
645
+ distinct_id: str,
646
+ group_id: Optional[str],
647
+ error_properties: Dict[str, Any],
648
+ ) -> None:
649
+ """Handle guardrail execution spans - maps to $ai_span event."""
650
+ properties = {
651
+ **self._base_properties(
652
+ trace_id, span_id, parent_id, latency, group_id, error_properties
653
+ ),
654
+ "$ai_span_name": span_data.name,
655
+ "$ai_span_type": "guardrail",
656
+ "$ai_guardrail_triggered": span_data.triggered,
657
+ }
658
+
659
+ self._capture_event("$ai_span", properties, distinct_id)
660
+
661
+ def _handle_response_span(
662
+ self,
663
+ span_data: ResponseSpanData,
664
+ trace_id: str,
665
+ span_id: str,
666
+ parent_id: Optional[str],
667
+ latency: float,
668
+ distinct_id: str,
669
+ group_id: Optional[str],
670
+ error_properties: Dict[str, Any],
671
+ ) -> None:
672
+ """Handle OpenAI Response API spans - maps to $ai_generation event."""
673
+ response = span_data.response
674
+ response_id = response.id if response else None
675
+
676
+ # Try to extract usage from response
677
+ usage = getattr(response, "usage", None) if response else None
678
+ input_tokens = 0
679
+ output_tokens = 0
680
+ if usage:
681
+ input_tokens = getattr(usage, "input_tokens", 0) or 0
682
+ output_tokens = getattr(usage, "output_tokens", 0) or 0
683
+
684
+ # Try to extract model from response
685
+ model = getattr(response, "model", None) if response else None
686
+
687
+ properties = {
688
+ **self._base_properties(
689
+ trace_id, span_id, parent_id, latency, group_id, error_properties
690
+ ),
691
+ "$ai_model": model,
692
+ "$ai_response_id": response_id,
693
+ "$ai_input": self._with_privacy_mode(_ensure_serializable(span_data.input)),
694
+ "$ai_input_tokens": input_tokens,
695
+ "$ai_output_tokens": output_tokens,
696
+ "$ai_total_tokens": input_tokens + output_tokens,
697
+ }
698
+
699
+ # Extract output content from response
700
+ if response:
701
+ output_items = getattr(response, "output", None)
702
+ if output_items:
703
+ properties["$ai_output_choices"] = self._with_privacy_mode(
704
+ _ensure_serializable(output_items)
705
+ )
706
+
707
+ self._capture_event("$ai_generation", properties, distinct_id)
708
+
709
+ def _handle_custom_span(
710
+ self,
711
+ span_data: CustomSpanData,
712
+ trace_id: str,
713
+ span_id: str,
714
+ parent_id: Optional[str],
715
+ latency: float,
716
+ distinct_id: str,
717
+ group_id: Optional[str],
718
+ error_properties: Dict[str, Any],
719
+ ) -> None:
720
+ """Handle custom user-defined spans - maps to $ai_span event."""
721
+ properties = {
722
+ **self._base_properties(
723
+ trace_id, span_id, parent_id, latency, group_id, error_properties
724
+ ),
725
+ "$ai_span_name": span_data.name,
726
+ "$ai_span_type": "custom",
727
+ "$ai_custom_data": self._with_privacy_mode(
728
+ _ensure_serializable(span_data.data)
729
+ ),
730
+ }
731
+
732
+ self._capture_event("$ai_span", properties, distinct_id)
733
+
734
+ def _handle_audio_span(
735
+ self,
736
+ span_data: Union[TranscriptionSpanData, SpeechSpanData, SpeechGroupSpanData],
737
+ trace_id: str,
738
+ span_id: str,
739
+ parent_id: Optional[str],
740
+ latency: float,
741
+ distinct_id: str,
742
+ group_id: Optional[str],
743
+ error_properties: Dict[str, Any],
744
+ ) -> None:
745
+ """Handle audio-related spans (transcription, speech) - maps to $ai_span event."""
746
+ span_type = span_data.type # "transcription", "speech", or "speech_group"
747
+
748
+ properties = {
749
+ **self._base_properties(
750
+ trace_id, span_id, parent_id, latency, group_id, error_properties
751
+ ),
752
+ "$ai_span_name": span_type,
753
+ "$ai_span_type": span_type,
754
+ }
755
+
756
+ # Add model info if available
757
+ if hasattr(span_data, "model") and span_data.model:
758
+ properties["$ai_model"] = span_data.model
759
+
760
+ # Add model config if available (pass-through property)
761
+ if hasattr(span_data, "model_config") and span_data.model_config:
762
+ properties["model_config"] = _ensure_serializable(span_data.model_config)
763
+
764
+ # Add time to first audio byte for speech spans (pass-through property)
765
+ if hasattr(span_data, "first_content_at") and span_data.first_content_at:
766
+ properties["first_content_at"] = span_data.first_content_at
767
+
768
+ # Add audio format info (pass-through properties)
769
+ if hasattr(span_data, "input_format"):
770
+ properties["audio_input_format"] = span_data.input_format
771
+ if hasattr(span_data, "output_format"):
772
+ properties["audio_output_format"] = span_data.output_format
773
+
774
+ # Add text input for TTS
775
+ if (
776
+ hasattr(span_data, "input")
777
+ and span_data.input
778
+ and isinstance(span_data.input, str)
779
+ ):
780
+ properties["$ai_input"] = self._with_privacy_mode(span_data.input)
781
+
782
+ # Don't include audio data (base64) - just metadata
783
+ if hasattr(span_data, "output") and isinstance(span_data.output, str):
784
+ # For transcription, output is the text
785
+ properties["$ai_output_state"] = self._with_privacy_mode(span_data.output)
786
+
787
+ self._capture_event("$ai_span", properties, distinct_id)
788
+
789
+ def _handle_mcp_span(
790
+ self,
791
+ span_data: MCPListToolsSpanData,
792
+ trace_id: str,
793
+ span_id: str,
794
+ parent_id: Optional[str],
795
+ latency: float,
796
+ distinct_id: str,
797
+ group_id: Optional[str],
798
+ error_properties: Dict[str, Any],
799
+ ) -> None:
800
+ """Handle MCP (Model Context Protocol) spans - maps to $ai_span event."""
801
+ properties = {
802
+ **self._base_properties(
803
+ trace_id, span_id, parent_id, latency, group_id, error_properties
804
+ ),
805
+ "$ai_span_name": f"mcp:{span_data.server}",
806
+ "$ai_span_type": "mcp_tools",
807
+ "$ai_mcp_server": span_data.server,
808
+ "$ai_mcp_tools": span_data.result,
809
+ }
810
+
811
+ self._capture_event("$ai_span", properties, distinct_id)
812
+
813
+ def _handle_generic_span(
814
+ self,
815
+ span_data: Any,
816
+ trace_id: str,
817
+ span_id: str,
818
+ parent_id: Optional[str],
819
+ latency: float,
820
+ distinct_id: str,
821
+ group_id: Optional[str],
822
+ error_properties: Dict[str, Any],
823
+ ) -> None:
824
+ """Handle unknown span types - maps to $ai_span event."""
825
+ span_type = getattr(span_data, "type", "unknown")
826
+
827
+ properties = {
828
+ **self._base_properties(
829
+ trace_id, span_id, parent_id, latency, group_id, error_properties
830
+ ),
831
+ "$ai_span_name": span_type,
832
+ "$ai_span_type": span_type,
833
+ }
834
+
835
+ # Try to export span data
836
+ if hasattr(span_data, "export"):
837
+ try:
838
+ exported = span_data.export()
839
+ properties["$ai_span_data"] = _ensure_serializable(exported)
840
+ except Exception:
841
+ pass
842
+
843
+ self._capture_event("$ai_span", properties, distinct_id)
844
+
845
+ def shutdown(self) -> None:
846
+ """Clean up resources when the application stops."""
847
+ try:
848
+ self._span_start_times.clear()
849
+ self._trace_metadata.clear()
850
+
851
+ # Flush the PostHog client if possible
852
+ if hasattr(self._client, "flush") and callable(self._client.flush):
853
+ self._client.flush()
854
+ except Exception as e:
855
+ log.debug(f"Error in shutdown: {e}")
856
+
857
+ def force_flush(self) -> None:
858
+ """Force immediate processing of any queued events."""
859
+ try:
860
+ if hasattr(self._client, "flush") and callable(self._client.flush):
861
+ self._client.flush()
862
+ except Exception as e:
863
+ log.debug(f"Error in force_flush: {e}")