posthog 7.6.0__py3-none-any.whl → 7.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- posthog/ai/__init__.py +3 -0
- posthog/ai/anthropic/anthropic_converter.py +18 -0
- posthog/ai/gemini/gemini_converter.py +7 -0
- posthog/ai/openai/openai_converter.py +19 -0
- posthog/ai/openai_agents/__init__.py +76 -0
- posthog/ai/openai_agents/processor.py +863 -0
- posthog/ai/prompts.py +271 -0
- posthog/ai/types.py +1 -0
- posthog/ai/utils.py +78 -0
- posthog/test/ai/__init__.py +0 -0
- posthog/test/ai/openai_agents/__init__.py +1 -0
- posthog/test/ai/openai_agents/test_processor.py +810 -0
- posthog/test/ai/test_prompts.py +577 -0
- posthog/test/ai/test_sanitization.py +522 -0
- posthog/test/ai/test_system_prompts.py +363 -0
- posthog/version.py +1 -1
- {posthog-7.6.0.dist-info → posthog-7.8.0.dist-info}/METADATA +1 -1
- {posthog-7.6.0.dist-info → posthog-7.8.0.dist-info}/RECORD +21 -12
- {posthog-7.6.0.dist-info → posthog-7.8.0.dist-info}/WHEEL +0 -0
- {posthog-7.6.0.dist-info → posthog-7.8.0.dist-info}/licenses/LICENSE +0 -0
- {posthog-7.6.0.dist-info → posthog-7.8.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,863 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import time
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any, Callable, Dict, Optional, Union
|
|
6
|
+
|
|
7
|
+
from agents.tracing import Span, Trace
|
|
8
|
+
from agents.tracing.processor_interface import TracingProcessor
|
|
9
|
+
from agents.tracing.span_data import (
|
|
10
|
+
AgentSpanData,
|
|
11
|
+
CustomSpanData,
|
|
12
|
+
FunctionSpanData,
|
|
13
|
+
GenerationSpanData,
|
|
14
|
+
GuardrailSpanData,
|
|
15
|
+
HandoffSpanData,
|
|
16
|
+
MCPListToolsSpanData,
|
|
17
|
+
ResponseSpanData,
|
|
18
|
+
SpeechGroupSpanData,
|
|
19
|
+
SpeechSpanData,
|
|
20
|
+
TranscriptionSpanData,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
from posthog import setup
|
|
24
|
+
from posthog.client import Client
|
|
25
|
+
|
|
26
|
+
log = logging.getLogger("posthog")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _ensure_serializable(obj: Any) -> Any:
|
|
30
|
+
"""Ensure an object is JSON-serializable, converting to str as fallback.
|
|
31
|
+
|
|
32
|
+
Returns the original object if it's already serializable (dict, list, str,
|
|
33
|
+
int, etc.), or str(obj) for non-serializable types so that downstream
|
|
34
|
+
json.dumps() calls won't fail.
|
|
35
|
+
"""
|
|
36
|
+
if obj is None:
|
|
37
|
+
return None
|
|
38
|
+
try:
|
|
39
|
+
json.dumps(obj)
|
|
40
|
+
return obj
|
|
41
|
+
except (TypeError, ValueError):
|
|
42
|
+
return str(obj)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _parse_iso_timestamp(iso_str: Optional[str]) -> Optional[float]:
|
|
46
|
+
"""Parse ISO timestamp to Unix timestamp."""
|
|
47
|
+
if not iso_str:
|
|
48
|
+
return None
|
|
49
|
+
try:
|
|
50
|
+
dt = datetime.fromisoformat(iso_str.replace("Z", "+00:00"))
|
|
51
|
+
return dt.timestamp()
|
|
52
|
+
except (ValueError, AttributeError):
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class PostHogTracingProcessor(TracingProcessor):
|
|
57
|
+
"""
|
|
58
|
+
A tracing processor that sends OpenAI Agents SDK traces to PostHog.
|
|
59
|
+
|
|
60
|
+
This processor implements the TracingProcessor interface from the OpenAI Agents SDK
|
|
61
|
+
and maps agent traces, spans, and generations to PostHog's LLM analytics events.
|
|
62
|
+
|
|
63
|
+
Example:
|
|
64
|
+
```python
|
|
65
|
+
from agents import Agent, Runner
|
|
66
|
+
from agents.tracing import add_trace_processor
|
|
67
|
+
from posthog.ai.openai_agents import PostHogTracingProcessor
|
|
68
|
+
|
|
69
|
+
# Create and register the processor
|
|
70
|
+
processor = PostHogTracingProcessor(
|
|
71
|
+
distinct_id="user@example.com",
|
|
72
|
+
privacy_mode=False,
|
|
73
|
+
)
|
|
74
|
+
add_trace_processor(processor)
|
|
75
|
+
|
|
76
|
+
# Run agents as normal - traces automatically sent to PostHog
|
|
77
|
+
agent = Agent(name="Assistant", instructions="You are helpful.")
|
|
78
|
+
result = Runner.run_sync(agent, "Hello!")
|
|
79
|
+
```
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
def __init__(
|
|
83
|
+
self,
|
|
84
|
+
client: Optional[Client] = None,
|
|
85
|
+
distinct_id: Optional[Union[str, Callable[[Trace], Optional[str]]]] = None,
|
|
86
|
+
privacy_mode: bool = False,
|
|
87
|
+
groups: Optional[Dict[str, Any]] = None,
|
|
88
|
+
properties: Optional[Dict[str, Any]] = None,
|
|
89
|
+
):
|
|
90
|
+
"""
|
|
91
|
+
Initialize the PostHog tracing processor.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
client: Optional PostHog client instance. If not provided, uses the default client.
|
|
95
|
+
distinct_id: Either a string distinct ID or a callable that takes a Trace
|
|
96
|
+
and returns a distinct ID. If not provided, uses the trace_id.
|
|
97
|
+
privacy_mode: If True, redacts input/output content from events.
|
|
98
|
+
groups: Optional PostHog groups to associate with all events.
|
|
99
|
+
properties: Optional additional properties to include with all events.
|
|
100
|
+
"""
|
|
101
|
+
self._client = client or setup()
|
|
102
|
+
self._distinct_id = distinct_id
|
|
103
|
+
self._privacy_mode = privacy_mode
|
|
104
|
+
self._groups = groups or {}
|
|
105
|
+
self._properties = properties or {}
|
|
106
|
+
|
|
107
|
+
# Track span start times for latency calculation
|
|
108
|
+
self._span_start_times: Dict[str, float] = {}
|
|
109
|
+
|
|
110
|
+
# Track trace metadata for associating with spans
|
|
111
|
+
self._trace_metadata: Dict[str, Dict[str, Any]] = {}
|
|
112
|
+
|
|
113
|
+
# Max entries to prevent unbounded growth if on_span_end/on_trace_end
|
|
114
|
+
# is never called (e.g., due to an exception in the Agents SDK).
|
|
115
|
+
self._max_tracked_entries = 10000
|
|
116
|
+
|
|
117
|
+
def _get_distinct_id(self, trace: Optional[Trace]) -> Optional[str]:
|
|
118
|
+
"""Resolve the distinct ID for a trace.
|
|
119
|
+
|
|
120
|
+
Returns the user-provided distinct ID (string or callable result),
|
|
121
|
+
or None if no user-provided ID is available. Callers should treat
|
|
122
|
+
None as a signal to use a fallback ID in personless mode.
|
|
123
|
+
"""
|
|
124
|
+
if callable(self._distinct_id):
|
|
125
|
+
if trace:
|
|
126
|
+
result = self._distinct_id(trace)
|
|
127
|
+
if result:
|
|
128
|
+
return str(result)
|
|
129
|
+
return None
|
|
130
|
+
elif self._distinct_id:
|
|
131
|
+
return str(self._distinct_id)
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
def _with_privacy_mode(self, value: Any) -> Any:
|
|
135
|
+
"""Apply privacy mode redaction if enabled."""
|
|
136
|
+
if self._privacy_mode or (
|
|
137
|
+
hasattr(self._client, "privacy_mode") and self._client.privacy_mode
|
|
138
|
+
):
|
|
139
|
+
return None
|
|
140
|
+
return value
|
|
141
|
+
|
|
142
|
+
def _evict_stale_entries(self) -> None:
|
|
143
|
+
"""Evict oldest entries if dicts exceed max size to prevent unbounded growth."""
|
|
144
|
+
if len(self._span_start_times) > self._max_tracked_entries:
|
|
145
|
+
# Remove oldest entries by start time
|
|
146
|
+
sorted_spans = sorted(self._span_start_times.items(), key=lambda x: x[1])
|
|
147
|
+
for span_id, _ in sorted_spans[: len(sorted_spans) // 2]:
|
|
148
|
+
del self._span_start_times[span_id]
|
|
149
|
+
log.debug(
|
|
150
|
+
"Evicted stale span start times (exceeded %d entries)",
|
|
151
|
+
self._max_tracked_entries,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
if len(self._trace_metadata) > self._max_tracked_entries:
|
|
155
|
+
# Remove half the entries (oldest inserted via dict ordering in Python 3.7+)
|
|
156
|
+
keys = list(self._trace_metadata.keys())
|
|
157
|
+
for key in keys[: len(keys) // 2]:
|
|
158
|
+
del self._trace_metadata[key]
|
|
159
|
+
log.debug(
|
|
160
|
+
"Evicted stale trace metadata (exceeded %d entries)",
|
|
161
|
+
self._max_tracked_entries,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
def _get_group_id(self, trace_id: str) -> Optional[str]:
|
|
165
|
+
"""Get the group_id for a trace from stored metadata."""
|
|
166
|
+
if trace_id in self._trace_metadata:
|
|
167
|
+
return self._trace_metadata[trace_id].get("group_id")
|
|
168
|
+
return None
|
|
169
|
+
|
|
170
|
+
def _capture_event(
|
|
171
|
+
self,
|
|
172
|
+
event: str,
|
|
173
|
+
properties: Dict[str, Any],
|
|
174
|
+
distinct_id: Optional[str] = None,
|
|
175
|
+
) -> None:
|
|
176
|
+
"""Capture an event to PostHog with error handling.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
distinct_id: The resolved distinct ID. When the user didn't provide
|
|
180
|
+
one, callers should pass ``user_distinct_id or fallback_id``
|
|
181
|
+
(matching the langchain/openai pattern) and separately set
|
|
182
|
+
``$process_person_profile`` in properties.
|
|
183
|
+
"""
|
|
184
|
+
try:
|
|
185
|
+
if not hasattr(self._client, "capture") or not callable(
|
|
186
|
+
self._client.capture
|
|
187
|
+
):
|
|
188
|
+
return
|
|
189
|
+
|
|
190
|
+
final_properties = {
|
|
191
|
+
**properties,
|
|
192
|
+
**self._properties,
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
self._client.capture(
|
|
196
|
+
distinct_id=distinct_id or "unknown",
|
|
197
|
+
event=event,
|
|
198
|
+
properties=final_properties,
|
|
199
|
+
groups=self._groups,
|
|
200
|
+
)
|
|
201
|
+
except Exception as e:
|
|
202
|
+
log.debug(f"Failed to capture PostHog event: {e}")
|
|
203
|
+
|
|
204
|
+
def on_trace_start(self, trace: Trace) -> None:
|
|
205
|
+
"""Called when a new trace begins. Stores metadata for spans; the $ai_trace event is emitted in on_trace_end."""
|
|
206
|
+
try:
|
|
207
|
+
self._evict_stale_entries()
|
|
208
|
+
trace_id = trace.trace_id
|
|
209
|
+
trace_name = trace.name
|
|
210
|
+
group_id = getattr(trace, "group_id", None)
|
|
211
|
+
metadata = getattr(trace, "metadata", None)
|
|
212
|
+
|
|
213
|
+
distinct_id = self._get_distinct_id(trace)
|
|
214
|
+
|
|
215
|
+
# Store trace metadata for later (used by spans and on_trace_end)
|
|
216
|
+
self._trace_metadata[trace_id] = {
|
|
217
|
+
"name": trace_name,
|
|
218
|
+
"group_id": group_id,
|
|
219
|
+
"metadata": metadata,
|
|
220
|
+
"distinct_id": distinct_id,
|
|
221
|
+
"start_time": time.time(),
|
|
222
|
+
}
|
|
223
|
+
except Exception as e:
|
|
224
|
+
log.debug(f"Error in on_trace_start: {e}")
|
|
225
|
+
|
|
226
|
+
def on_trace_end(self, trace: Trace) -> None:
|
|
227
|
+
"""Called when a trace completes. Emits the $ai_trace event with full metadata."""
|
|
228
|
+
try:
|
|
229
|
+
trace_id = trace.trace_id
|
|
230
|
+
|
|
231
|
+
# Pop stored metadata (also cleans up)
|
|
232
|
+
trace_info = self._trace_metadata.pop(trace_id, {})
|
|
233
|
+
trace_name = trace_info.get("name") or trace.name
|
|
234
|
+
group_id = trace_info.get("group_id") or getattr(trace, "group_id", None)
|
|
235
|
+
metadata = trace_info.get("metadata") or getattr(trace, "metadata", None)
|
|
236
|
+
distinct_id = trace_info.get("distinct_id") or self._get_distinct_id(trace)
|
|
237
|
+
|
|
238
|
+
# Calculate trace-level latency
|
|
239
|
+
start_time = trace_info.get("start_time")
|
|
240
|
+
latency = (time.time() - start_time) if start_time else None
|
|
241
|
+
|
|
242
|
+
properties = {
|
|
243
|
+
"$ai_trace_id": trace_id,
|
|
244
|
+
"$ai_trace_name": trace_name,
|
|
245
|
+
"$ai_provider": "openai",
|
|
246
|
+
"$ai_framework": "openai-agents",
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
if latency is not None:
|
|
250
|
+
properties["$ai_latency"] = latency
|
|
251
|
+
|
|
252
|
+
# Include group_id for linking related traces (e.g., conversation threads)
|
|
253
|
+
if group_id:
|
|
254
|
+
properties["$ai_group_id"] = group_id
|
|
255
|
+
|
|
256
|
+
# Include trace metadata if present
|
|
257
|
+
if metadata:
|
|
258
|
+
properties["$ai_trace_metadata"] = _ensure_serializable(metadata)
|
|
259
|
+
|
|
260
|
+
if distinct_id is None:
|
|
261
|
+
properties["$process_person_profile"] = False
|
|
262
|
+
|
|
263
|
+
self._capture_event(
|
|
264
|
+
event="$ai_trace",
|
|
265
|
+
distinct_id=distinct_id or trace_id,
|
|
266
|
+
properties=properties,
|
|
267
|
+
)
|
|
268
|
+
except Exception as e:
|
|
269
|
+
log.debug(f"Error in on_trace_end: {e}")
|
|
270
|
+
|
|
271
|
+
def on_span_start(self, span: Span[Any]) -> None:
|
|
272
|
+
"""Called when a new span begins."""
|
|
273
|
+
try:
|
|
274
|
+
self._evict_stale_entries()
|
|
275
|
+
span_id = span.span_id
|
|
276
|
+
self._span_start_times[span_id] = time.time()
|
|
277
|
+
except Exception as e:
|
|
278
|
+
log.debug(f"Error in on_span_start: {e}")
|
|
279
|
+
|
|
280
|
+
def on_span_end(self, span: Span[Any]) -> None:
|
|
281
|
+
"""Called when a span completes."""
|
|
282
|
+
try:
|
|
283
|
+
span_id = span.span_id
|
|
284
|
+
trace_id = span.trace_id
|
|
285
|
+
parent_id = span.parent_id
|
|
286
|
+
span_data = span.span_data
|
|
287
|
+
|
|
288
|
+
# Calculate latency
|
|
289
|
+
start_time = self._span_start_times.pop(span_id, None)
|
|
290
|
+
if start_time:
|
|
291
|
+
latency = time.time() - start_time
|
|
292
|
+
else:
|
|
293
|
+
# Fall back to parsing timestamps
|
|
294
|
+
started = _parse_iso_timestamp(span.started_at)
|
|
295
|
+
ended = _parse_iso_timestamp(span.ended_at)
|
|
296
|
+
latency = (ended - started) if (started and ended) else 0
|
|
297
|
+
|
|
298
|
+
# Get user-provided distinct ID from trace metadata (resolved at trace start).
|
|
299
|
+
# None means no user-provided ID — use trace_id as fallback in personless mode,
|
|
300
|
+
# matching the langchain/openai pattern: `distinct_id or trace_id`.
|
|
301
|
+
trace_info = self._trace_metadata.get(trace_id, {})
|
|
302
|
+
distinct_id = trace_info.get("distinct_id") or self._get_distinct_id(None)
|
|
303
|
+
|
|
304
|
+
# Get group_id from trace metadata for linking
|
|
305
|
+
group_id = self._get_group_id(trace_id)
|
|
306
|
+
|
|
307
|
+
# Get error info if present
|
|
308
|
+
error_info = span.error
|
|
309
|
+
error_properties = {}
|
|
310
|
+
if error_info:
|
|
311
|
+
if isinstance(error_info, dict):
|
|
312
|
+
error_message = error_info.get("message", str(error_info))
|
|
313
|
+
error_type_raw = error_info.get("type", "")
|
|
314
|
+
else:
|
|
315
|
+
error_message = str(error_info)
|
|
316
|
+
error_type_raw = ""
|
|
317
|
+
|
|
318
|
+
# Categorize error type for cross-provider filtering/alerting
|
|
319
|
+
error_type = "unknown"
|
|
320
|
+
if (
|
|
321
|
+
"ModelBehaviorError" in error_type_raw
|
|
322
|
+
or "ModelBehaviorError" in error_message
|
|
323
|
+
):
|
|
324
|
+
error_type = "model_behavior_error"
|
|
325
|
+
elif "UserError" in error_type_raw or "UserError" in error_message:
|
|
326
|
+
error_type = "user_error"
|
|
327
|
+
elif (
|
|
328
|
+
"InputGuardrailTripwireTriggered" in error_type_raw
|
|
329
|
+
or "InputGuardrailTripwireTriggered" in error_message
|
|
330
|
+
):
|
|
331
|
+
error_type = "input_guardrail_triggered"
|
|
332
|
+
elif (
|
|
333
|
+
"OutputGuardrailTripwireTriggered" in error_type_raw
|
|
334
|
+
or "OutputGuardrailTripwireTriggered" in error_message
|
|
335
|
+
):
|
|
336
|
+
error_type = "output_guardrail_triggered"
|
|
337
|
+
elif (
|
|
338
|
+
"MaxTurnsExceeded" in error_type_raw
|
|
339
|
+
or "MaxTurnsExceeded" in error_message
|
|
340
|
+
):
|
|
341
|
+
error_type = "max_turns_exceeded"
|
|
342
|
+
|
|
343
|
+
error_properties = {
|
|
344
|
+
"$ai_is_error": True,
|
|
345
|
+
"$ai_error": error_message,
|
|
346
|
+
"$ai_error_type": error_type,
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
# Personless mode: no user-provided distinct_id, fallback to trace_id
|
|
350
|
+
if distinct_id is None:
|
|
351
|
+
error_properties["$process_person_profile"] = False
|
|
352
|
+
distinct_id = trace_id
|
|
353
|
+
|
|
354
|
+
# Dispatch based on span data type
|
|
355
|
+
if isinstance(span_data, GenerationSpanData):
|
|
356
|
+
self._handle_generation_span(
|
|
357
|
+
span_data,
|
|
358
|
+
trace_id,
|
|
359
|
+
span_id,
|
|
360
|
+
parent_id,
|
|
361
|
+
latency,
|
|
362
|
+
distinct_id,
|
|
363
|
+
group_id,
|
|
364
|
+
error_properties,
|
|
365
|
+
)
|
|
366
|
+
elif isinstance(span_data, FunctionSpanData):
|
|
367
|
+
self._handle_function_span(
|
|
368
|
+
span_data,
|
|
369
|
+
trace_id,
|
|
370
|
+
span_id,
|
|
371
|
+
parent_id,
|
|
372
|
+
latency,
|
|
373
|
+
distinct_id,
|
|
374
|
+
group_id,
|
|
375
|
+
error_properties,
|
|
376
|
+
)
|
|
377
|
+
elif isinstance(span_data, AgentSpanData):
|
|
378
|
+
self._handle_agent_span(
|
|
379
|
+
span_data,
|
|
380
|
+
trace_id,
|
|
381
|
+
span_id,
|
|
382
|
+
parent_id,
|
|
383
|
+
latency,
|
|
384
|
+
distinct_id,
|
|
385
|
+
group_id,
|
|
386
|
+
error_properties,
|
|
387
|
+
)
|
|
388
|
+
elif isinstance(span_data, HandoffSpanData):
|
|
389
|
+
self._handle_handoff_span(
|
|
390
|
+
span_data,
|
|
391
|
+
trace_id,
|
|
392
|
+
span_id,
|
|
393
|
+
parent_id,
|
|
394
|
+
latency,
|
|
395
|
+
distinct_id,
|
|
396
|
+
group_id,
|
|
397
|
+
error_properties,
|
|
398
|
+
)
|
|
399
|
+
elif isinstance(span_data, GuardrailSpanData):
|
|
400
|
+
self._handle_guardrail_span(
|
|
401
|
+
span_data,
|
|
402
|
+
trace_id,
|
|
403
|
+
span_id,
|
|
404
|
+
parent_id,
|
|
405
|
+
latency,
|
|
406
|
+
distinct_id,
|
|
407
|
+
group_id,
|
|
408
|
+
error_properties,
|
|
409
|
+
)
|
|
410
|
+
elif isinstance(span_data, ResponseSpanData):
|
|
411
|
+
self._handle_response_span(
|
|
412
|
+
span_data,
|
|
413
|
+
trace_id,
|
|
414
|
+
span_id,
|
|
415
|
+
parent_id,
|
|
416
|
+
latency,
|
|
417
|
+
distinct_id,
|
|
418
|
+
group_id,
|
|
419
|
+
error_properties,
|
|
420
|
+
)
|
|
421
|
+
elif isinstance(span_data, CustomSpanData):
|
|
422
|
+
self._handle_custom_span(
|
|
423
|
+
span_data,
|
|
424
|
+
trace_id,
|
|
425
|
+
span_id,
|
|
426
|
+
parent_id,
|
|
427
|
+
latency,
|
|
428
|
+
distinct_id,
|
|
429
|
+
group_id,
|
|
430
|
+
error_properties,
|
|
431
|
+
)
|
|
432
|
+
elif isinstance(
|
|
433
|
+
span_data, (TranscriptionSpanData, SpeechSpanData, SpeechGroupSpanData)
|
|
434
|
+
):
|
|
435
|
+
self._handle_audio_span(
|
|
436
|
+
span_data,
|
|
437
|
+
trace_id,
|
|
438
|
+
span_id,
|
|
439
|
+
parent_id,
|
|
440
|
+
latency,
|
|
441
|
+
distinct_id,
|
|
442
|
+
group_id,
|
|
443
|
+
error_properties,
|
|
444
|
+
)
|
|
445
|
+
elif isinstance(span_data, MCPListToolsSpanData):
|
|
446
|
+
self._handle_mcp_span(
|
|
447
|
+
span_data,
|
|
448
|
+
trace_id,
|
|
449
|
+
span_id,
|
|
450
|
+
parent_id,
|
|
451
|
+
latency,
|
|
452
|
+
distinct_id,
|
|
453
|
+
group_id,
|
|
454
|
+
error_properties,
|
|
455
|
+
)
|
|
456
|
+
else:
|
|
457
|
+
# Unknown span type - capture as generic span
|
|
458
|
+
self._handle_generic_span(
|
|
459
|
+
span_data,
|
|
460
|
+
trace_id,
|
|
461
|
+
span_id,
|
|
462
|
+
parent_id,
|
|
463
|
+
latency,
|
|
464
|
+
distinct_id,
|
|
465
|
+
group_id,
|
|
466
|
+
error_properties,
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
except Exception as e:
|
|
470
|
+
log.debug(f"Error in on_span_end: {e}")
|
|
471
|
+
|
|
472
|
+
def _base_properties(
|
|
473
|
+
self,
|
|
474
|
+
trace_id: str,
|
|
475
|
+
span_id: str,
|
|
476
|
+
parent_id: Optional[str],
|
|
477
|
+
latency: float,
|
|
478
|
+
group_id: Optional[str],
|
|
479
|
+
error_properties: Dict[str, Any],
|
|
480
|
+
) -> Dict[str, Any]:
|
|
481
|
+
"""Build the base properties dict shared by all span handlers."""
|
|
482
|
+
properties = {
|
|
483
|
+
"$ai_trace_id": trace_id,
|
|
484
|
+
"$ai_span_id": span_id,
|
|
485
|
+
"$ai_parent_id": parent_id,
|
|
486
|
+
"$ai_provider": "openai",
|
|
487
|
+
"$ai_framework": "openai-agents",
|
|
488
|
+
"$ai_latency": latency,
|
|
489
|
+
**error_properties,
|
|
490
|
+
}
|
|
491
|
+
if group_id:
|
|
492
|
+
properties["$ai_group_id"] = group_id
|
|
493
|
+
return properties
|
|
494
|
+
|
|
495
|
+
def _handle_generation_span(
|
|
496
|
+
self,
|
|
497
|
+
span_data: GenerationSpanData,
|
|
498
|
+
trace_id: str,
|
|
499
|
+
span_id: str,
|
|
500
|
+
parent_id: Optional[str],
|
|
501
|
+
latency: float,
|
|
502
|
+
distinct_id: str,
|
|
503
|
+
group_id: Optional[str],
|
|
504
|
+
error_properties: Dict[str, Any],
|
|
505
|
+
) -> None:
|
|
506
|
+
"""Handle LLM generation spans - maps to $ai_generation event."""
|
|
507
|
+
# Extract token usage
|
|
508
|
+
usage = span_data.usage or {}
|
|
509
|
+
input_tokens = usage.get("input_tokens") or usage.get("prompt_tokens") or 0
|
|
510
|
+
output_tokens = (
|
|
511
|
+
usage.get("output_tokens") or usage.get("completion_tokens") or 0
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
# Extract model config parameters
|
|
515
|
+
model_config = span_data.model_config or {}
|
|
516
|
+
model_params = {}
|
|
517
|
+
for param in [
|
|
518
|
+
"temperature",
|
|
519
|
+
"max_tokens",
|
|
520
|
+
"top_p",
|
|
521
|
+
"frequency_penalty",
|
|
522
|
+
"presence_penalty",
|
|
523
|
+
]:
|
|
524
|
+
if param in model_config:
|
|
525
|
+
model_params[param] = model_config[param]
|
|
526
|
+
|
|
527
|
+
properties = {
|
|
528
|
+
**self._base_properties(
|
|
529
|
+
trace_id, span_id, parent_id, latency, group_id, error_properties
|
|
530
|
+
),
|
|
531
|
+
"$ai_model": span_data.model,
|
|
532
|
+
"$ai_model_parameters": model_params if model_params else None,
|
|
533
|
+
"$ai_input": self._with_privacy_mode(_ensure_serializable(span_data.input)),
|
|
534
|
+
"$ai_output_choices": self._with_privacy_mode(
|
|
535
|
+
_ensure_serializable(span_data.output)
|
|
536
|
+
),
|
|
537
|
+
"$ai_input_tokens": input_tokens,
|
|
538
|
+
"$ai_output_tokens": output_tokens,
|
|
539
|
+
"$ai_total_tokens": (input_tokens or 0) + (output_tokens or 0),
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
# Add optional token fields if present
|
|
543
|
+
if usage.get("reasoning_tokens"):
|
|
544
|
+
properties["$ai_reasoning_tokens"] = usage["reasoning_tokens"]
|
|
545
|
+
if usage.get("cache_read_input_tokens"):
|
|
546
|
+
properties["$ai_cache_read_input_tokens"] = usage["cache_read_input_tokens"]
|
|
547
|
+
if usage.get("cache_creation_input_tokens"):
|
|
548
|
+
properties["$ai_cache_creation_input_tokens"] = usage[
|
|
549
|
+
"cache_creation_input_tokens"
|
|
550
|
+
]
|
|
551
|
+
|
|
552
|
+
self._capture_event("$ai_generation", properties, distinct_id)
|
|
553
|
+
|
|
554
|
+
def _handle_function_span(
|
|
555
|
+
self,
|
|
556
|
+
span_data: FunctionSpanData,
|
|
557
|
+
trace_id: str,
|
|
558
|
+
span_id: str,
|
|
559
|
+
parent_id: Optional[str],
|
|
560
|
+
latency: float,
|
|
561
|
+
distinct_id: str,
|
|
562
|
+
group_id: Optional[str],
|
|
563
|
+
error_properties: Dict[str, Any],
|
|
564
|
+
) -> None:
|
|
565
|
+
"""Handle function/tool call spans - maps to $ai_span event."""
|
|
566
|
+
properties = {
|
|
567
|
+
**self._base_properties(
|
|
568
|
+
trace_id, span_id, parent_id, latency, group_id, error_properties
|
|
569
|
+
),
|
|
570
|
+
"$ai_span_name": span_data.name,
|
|
571
|
+
"$ai_span_type": "tool",
|
|
572
|
+
"$ai_input_state": self._with_privacy_mode(
|
|
573
|
+
_ensure_serializable(span_data.input)
|
|
574
|
+
),
|
|
575
|
+
"$ai_output_state": self._with_privacy_mode(
|
|
576
|
+
_ensure_serializable(span_data.output)
|
|
577
|
+
),
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
if span_data.mcp_data:
|
|
581
|
+
properties["$ai_mcp_data"] = _ensure_serializable(span_data.mcp_data)
|
|
582
|
+
|
|
583
|
+
self._capture_event("$ai_span", properties, distinct_id)
|
|
584
|
+
|
|
585
|
+
def _handle_agent_span(
|
|
586
|
+
self,
|
|
587
|
+
span_data: AgentSpanData,
|
|
588
|
+
trace_id: str,
|
|
589
|
+
span_id: str,
|
|
590
|
+
parent_id: Optional[str],
|
|
591
|
+
latency: float,
|
|
592
|
+
distinct_id: str,
|
|
593
|
+
group_id: Optional[str],
|
|
594
|
+
error_properties: Dict[str, Any],
|
|
595
|
+
) -> None:
|
|
596
|
+
"""Handle agent execution spans - maps to $ai_span event."""
|
|
597
|
+
properties = {
|
|
598
|
+
**self._base_properties(
|
|
599
|
+
trace_id, span_id, parent_id, latency, group_id, error_properties
|
|
600
|
+
),
|
|
601
|
+
"$ai_span_name": span_data.name,
|
|
602
|
+
"$ai_span_type": "agent",
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
if span_data.handoffs:
|
|
606
|
+
properties["$ai_agent_handoffs"] = span_data.handoffs
|
|
607
|
+
if span_data.tools:
|
|
608
|
+
properties["$ai_agent_tools"] = span_data.tools
|
|
609
|
+
if span_data.output_type:
|
|
610
|
+
properties["$ai_agent_output_type"] = span_data.output_type
|
|
611
|
+
|
|
612
|
+
self._capture_event("$ai_span", properties, distinct_id)
|
|
613
|
+
|
|
614
|
+
def _handle_handoff_span(
|
|
615
|
+
self,
|
|
616
|
+
span_data: HandoffSpanData,
|
|
617
|
+
trace_id: str,
|
|
618
|
+
span_id: str,
|
|
619
|
+
parent_id: Optional[str],
|
|
620
|
+
latency: float,
|
|
621
|
+
distinct_id: str,
|
|
622
|
+
group_id: Optional[str],
|
|
623
|
+
error_properties: Dict[str, Any],
|
|
624
|
+
) -> None:
|
|
625
|
+
"""Handle agent handoff spans - maps to $ai_span event."""
|
|
626
|
+
properties = {
|
|
627
|
+
**self._base_properties(
|
|
628
|
+
trace_id, span_id, parent_id, latency, group_id, error_properties
|
|
629
|
+
),
|
|
630
|
+
"$ai_span_name": f"{span_data.from_agent} -> {span_data.to_agent}",
|
|
631
|
+
"$ai_span_type": "handoff",
|
|
632
|
+
"$ai_handoff_from_agent": span_data.from_agent,
|
|
633
|
+
"$ai_handoff_to_agent": span_data.to_agent,
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
self._capture_event("$ai_span", properties, distinct_id)
|
|
637
|
+
|
|
638
|
+
def _handle_guardrail_span(
|
|
639
|
+
self,
|
|
640
|
+
span_data: GuardrailSpanData,
|
|
641
|
+
trace_id: str,
|
|
642
|
+
span_id: str,
|
|
643
|
+
parent_id: Optional[str],
|
|
644
|
+
latency: float,
|
|
645
|
+
distinct_id: str,
|
|
646
|
+
group_id: Optional[str],
|
|
647
|
+
error_properties: Dict[str, Any],
|
|
648
|
+
) -> None:
|
|
649
|
+
"""Handle guardrail execution spans - maps to $ai_span event."""
|
|
650
|
+
properties = {
|
|
651
|
+
**self._base_properties(
|
|
652
|
+
trace_id, span_id, parent_id, latency, group_id, error_properties
|
|
653
|
+
),
|
|
654
|
+
"$ai_span_name": span_data.name,
|
|
655
|
+
"$ai_span_type": "guardrail",
|
|
656
|
+
"$ai_guardrail_triggered": span_data.triggered,
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
self._capture_event("$ai_span", properties, distinct_id)
|
|
660
|
+
|
|
661
|
+
def _handle_response_span(
|
|
662
|
+
self,
|
|
663
|
+
span_data: ResponseSpanData,
|
|
664
|
+
trace_id: str,
|
|
665
|
+
span_id: str,
|
|
666
|
+
parent_id: Optional[str],
|
|
667
|
+
latency: float,
|
|
668
|
+
distinct_id: str,
|
|
669
|
+
group_id: Optional[str],
|
|
670
|
+
error_properties: Dict[str, Any],
|
|
671
|
+
) -> None:
|
|
672
|
+
"""Handle OpenAI Response API spans - maps to $ai_generation event."""
|
|
673
|
+
response = span_data.response
|
|
674
|
+
response_id = response.id if response else None
|
|
675
|
+
|
|
676
|
+
# Try to extract usage from response
|
|
677
|
+
usage = getattr(response, "usage", None) if response else None
|
|
678
|
+
input_tokens = 0
|
|
679
|
+
output_tokens = 0
|
|
680
|
+
if usage:
|
|
681
|
+
input_tokens = getattr(usage, "input_tokens", 0) or 0
|
|
682
|
+
output_tokens = getattr(usage, "output_tokens", 0) or 0
|
|
683
|
+
|
|
684
|
+
# Try to extract model from response
|
|
685
|
+
model = getattr(response, "model", None) if response else None
|
|
686
|
+
|
|
687
|
+
properties = {
|
|
688
|
+
**self._base_properties(
|
|
689
|
+
trace_id, span_id, parent_id, latency, group_id, error_properties
|
|
690
|
+
),
|
|
691
|
+
"$ai_model": model,
|
|
692
|
+
"$ai_response_id": response_id,
|
|
693
|
+
"$ai_input": self._with_privacy_mode(_ensure_serializable(span_data.input)),
|
|
694
|
+
"$ai_input_tokens": input_tokens,
|
|
695
|
+
"$ai_output_tokens": output_tokens,
|
|
696
|
+
"$ai_total_tokens": input_tokens + output_tokens,
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
# Extract output content from response
|
|
700
|
+
if response:
|
|
701
|
+
output_items = getattr(response, "output", None)
|
|
702
|
+
if output_items:
|
|
703
|
+
properties["$ai_output_choices"] = self._with_privacy_mode(
|
|
704
|
+
_ensure_serializable(output_items)
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
self._capture_event("$ai_generation", properties, distinct_id)
|
|
708
|
+
|
|
709
|
+
def _handle_custom_span(
|
|
710
|
+
self,
|
|
711
|
+
span_data: CustomSpanData,
|
|
712
|
+
trace_id: str,
|
|
713
|
+
span_id: str,
|
|
714
|
+
parent_id: Optional[str],
|
|
715
|
+
latency: float,
|
|
716
|
+
distinct_id: str,
|
|
717
|
+
group_id: Optional[str],
|
|
718
|
+
error_properties: Dict[str, Any],
|
|
719
|
+
) -> None:
|
|
720
|
+
"""Handle custom user-defined spans - maps to $ai_span event."""
|
|
721
|
+
properties = {
|
|
722
|
+
**self._base_properties(
|
|
723
|
+
trace_id, span_id, parent_id, latency, group_id, error_properties
|
|
724
|
+
),
|
|
725
|
+
"$ai_span_name": span_data.name,
|
|
726
|
+
"$ai_span_type": "custom",
|
|
727
|
+
"$ai_custom_data": self._with_privacy_mode(
|
|
728
|
+
_ensure_serializable(span_data.data)
|
|
729
|
+
),
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
self._capture_event("$ai_span", properties, distinct_id)
|
|
733
|
+
|
|
734
|
+
def _handle_audio_span(
|
|
735
|
+
self,
|
|
736
|
+
span_data: Union[TranscriptionSpanData, SpeechSpanData, SpeechGroupSpanData],
|
|
737
|
+
trace_id: str,
|
|
738
|
+
span_id: str,
|
|
739
|
+
parent_id: Optional[str],
|
|
740
|
+
latency: float,
|
|
741
|
+
distinct_id: str,
|
|
742
|
+
group_id: Optional[str],
|
|
743
|
+
error_properties: Dict[str, Any],
|
|
744
|
+
) -> None:
|
|
745
|
+
"""Handle audio-related spans (transcription, speech) - maps to $ai_span event."""
|
|
746
|
+
span_type = span_data.type # "transcription", "speech", or "speech_group"
|
|
747
|
+
|
|
748
|
+
properties = {
|
|
749
|
+
**self._base_properties(
|
|
750
|
+
trace_id, span_id, parent_id, latency, group_id, error_properties
|
|
751
|
+
),
|
|
752
|
+
"$ai_span_name": span_type,
|
|
753
|
+
"$ai_span_type": span_type,
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
# Add model info if available
|
|
757
|
+
if hasattr(span_data, "model") and span_data.model:
|
|
758
|
+
properties["$ai_model"] = span_data.model
|
|
759
|
+
|
|
760
|
+
# Add model config if available (pass-through property)
|
|
761
|
+
if hasattr(span_data, "model_config") and span_data.model_config:
|
|
762
|
+
properties["model_config"] = _ensure_serializable(span_data.model_config)
|
|
763
|
+
|
|
764
|
+
# Add time to first audio byte for speech spans (pass-through property)
|
|
765
|
+
if hasattr(span_data, "first_content_at") and span_data.first_content_at:
|
|
766
|
+
properties["first_content_at"] = span_data.first_content_at
|
|
767
|
+
|
|
768
|
+
# Add audio format info (pass-through properties)
|
|
769
|
+
if hasattr(span_data, "input_format"):
|
|
770
|
+
properties["audio_input_format"] = span_data.input_format
|
|
771
|
+
if hasattr(span_data, "output_format"):
|
|
772
|
+
properties["audio_output_format"] = span_data.output_format
|
|
773
|
+
|
|
774
|
+
# Add text input for TTS
|
|
775
|
+
if (
|
|
776
|
+
hasattr(span_data, "input")
|
|
777
|
+
and span_data.input
|
|
778
|
+
and isinstance(span_data.input, str)
|
|
779
|
+
):
|
|
780
|
+
properties["$ai_input"] = self._with_privacy_mode(span_data.input)
|
|
781
|
+
|
|
782
|
+
# Don't include audio data (base64) - just metadata
|
|
783
|
+
if hasattr(span_data, "output") and isinstance(span_data.output, str):
|
|
784
|
+
# For transcription, output is the text
|
|
785
|
+
properties["$ai_output_state"] = self._with_privacy_mode(span_data.output)
|
|
786
|
+
|
|
787
|
+
self._capture_event("$ai_span", properties, distinct_id)
|
|
788
|
+
|
|
789
|
+
def _handle_mcp_span(
|
|
790
|
+
self,
|
|
791
|
+
span_data: MCPListToolsSpanData,
|
|
792
|
+
trace_id: str,
|
|
793
|
+
span_id: str,
|
|
794
|
+
parent_id: Optional[str],
|
|
795
|
+
latency: float,
|
|
796
|
+
distinct_id: str,
|
|
797
|
+
group_id: Optional[str],
|
|
798
|
+
error_properties: Dict[str, Any],
|
|
799
|
+
) -> None:
|
|
800
|
+
"""Handle MCP (Model Context Protocol) spans - maps to $ai_span event."""
|
|
801
|
+
properties = {
|
|
802
|
+
**self._base_properties(
|
|
803
|
+
trace_id, span_id, parent_id, latency, group_id, error_properties
|
|
804
|
+
),
|
|
805
|
+
"$ai_span_name": f"mcp:{span_data.server}",
|
|
806
|
+
"$ai_span_type": "mcp_tools",
|
|
807
|
+
"$ai_mcp_server": span_data.server,
|
|
808
|
+
"$ai_mcp_tools": span_data.result,
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
self._capture_event("$ai_span", properties, distinct_id)
|
|
812
|
+
|
|
813
|
+
def _handle_generic_span(
|
|
814
|
+
self,
|
|
815
|
+
span_data: Any,
|
|
816
|
+
trace_id: str,
|
|
817
|
+
span_id: str,
|
|
818
|
+
parent_id: Optional[str],
|
|
819
|
+
latency: float,
|
|
820
|
+
distinct_id: str,
|
|
821
|
+
group_id: Optional[str],
|
|
822
|
+
error_properties: Dict[str, Any],
|
|
823
|
+
) -> None:
|
|
824
|
+
"""Handle unknown span types - maps to $ai_span event."""
|
|
825
|
+
span_type = getattr(span_data, "type", "unknown")
|
|
826
|
+
|
|
827
|
+
properties = {
|
|
828
|
+
**self._base_properties(
|
|
829
|
+
trace_id, span_id, parent_id, latency, group_id, error_properties
|
|
830
|
+
),
|
|
831
|
+
"$ai_span_name": span_type,
|
|
832
|
+
"$ai_span_type": span_type,
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
# Try to export span data
|
|
836
|
+
if hasattr(span_data, "export"):
|
|
837
|
+
try:
|
|
838
|
+
exported = span_data.export()
|
|
839
|
+
properties["$ai_span_data"] = _ensure_serializable(exported)
|
|
840
|
+
except Exception:
|
|
841
|
+
pass
|
|
842
|
+
|
|
843
|
+
self._capture_event("$ai_span", properties, distinct_id)
|
|
844
|
+
|
|
845
|
+
def shutdown(self) -> None:
|
|
846
|
+
"""Clean up resources when the application stops."""
|
|
847
|
+
try:
|
|
848
|
+
self._span_start_times.clear()
|
|
849
|
+
self._trace_metadata.clear()
|
|
850
|
+
|
|
851
|
+
# Flush the PostHog client if possible
|
|
852
|
+
if hasattr(self._client, "flush") and callable(self._client.flush):
|
|
853
|
+
self._client.flush()
|
|
854
|
+
except Exception as e:
|
|
855
|
+
log.debug(f"Error in shutdown: {e}")
|
|
856
|
+
|
|
857
|
+
def force_flush(self) -> None:
|
|
858
|
+
"""Force immediate processing of any queued events."""
|
|
859
|
+
try:
|
|
860
|
+
if hasattr(self._client, "flush") and callable(self._client.flush):
|
|
861
|
+
self._client.flush()
|
|
862
|
+
except Exception as e:
|
|
863
|
+
log.debug(f"Error in force_flush: {e}")
|