posthoganalytics 7.6.0__py3-none-any.whl → 7.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- posthoganalytics/ai/openai_agents/__init__.py +76 -0
- posthoganalytics/ai/openai_agents/processor.py +863 -0
- posthoganalytics/test/ai/__init__.py +0 -0
- posthoganalytics/test/ai/openai_agents/__init__.py +1 -0
- posthoganalytics/test/ai/openai_agents/test_processor.py +810 -0
- posthoganalytics/test/ai/test_sanitization.py +522 -0
- posthoganalytics/test/ai/test_system_prompts.py +363 -0
- posthoganalytics/version.py +1 -1
- {posthoganalytics-7.6.0.dist-info → posthoganalytics-7.7.0.dist-info}/METADATA +1 -1
- {posthoganalytics-7.6.0.dist-info → posthoganalytics-7.7.0.dist-info}/RECORD +13 -6
- {posthoganalytics-7.6.0.dist-info → posthoganalytics-7.7.0.dist-info}/WHEEL +0 -0
- {posthoganalytics-7.6.0.dist-info → posthoganalytics-7.7.0.dist-info}/licenses/LICENSE +0 -0
- {posthoganalytics-7.6.0.dist-info → posthoganalytics-7.7.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,810 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from unittest.mock import MagicMock, patch
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from agents.tracing.span_data import (
|
|
8
|
+
AgentSpanData,
|
|
9
|
+
CustomSpanData,
|
|
10
|
+
FunctionSpanData,
|
|
11
|
+
GenerationSpanData,
|
|
12
|
+
GuardrailSpanData,
|
|
13
|
+
HandoffSpanData,
|
|
14
|
+
ResponseSpanData,
|
|
15
|
+
SpeechSpanData,
|
|
16
|
+
TranscriptionSpanData,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
from posthoganalytics.ai.openai_agents import PostHogTracingProcessor, instrument
|
|
20
|
+
|
|
21
|
+
OPENAI_AGENTS_AVAILABLE = True
|
|
22
|
+
except ImportError:
|
|
23
|
+
OPENAI_AGENTS_AVAILABLE = False
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# Skip all tests if OpenAI Agents SDK is not available
|
|
27
|
+
pytestmark = pytest.mark.skipif(
|
|
28
|
+
not OPENAI_AGENTS_AVAILABLE, reason="OpenAI Agents SDK is not available"
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@pytest.fixture(scope="function")
|
|
33
|
+
def mock_client():
|
|
34
|
+
client = MagicMock()
|
|
35
|
+
client.privacy_mode = False
|
|
36
|
+
logging.getLogger("posthog").setLevel(logging.DEBUG)
|
|
37
|
+
return client
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@pytest.fixture(scope="function")
|
|
41
|
+
def processor(mock_client):
|
|
42
|
+
return PostHogTracingProcessor(
|
|
43
|
+
client=mock_client,
|
|
44
|
+
distinct_id="test-user",
|
|
45
|
+
privacy_mode=False,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@pytest.fixture
|
|
50
|
+
def mock_trace():
|
|
51
|
+
trace = MagicMock()
|
|
52
|
+
trace.trace_id = "trace_123456789"
|
|
53
|
+
trace.name = "Test Workflow"
|
|
54
|
+
trace.group_id = "group_123"
|
|
55
|
+
trace.metadata = {"key": "value"}
|
|
56
|
+
return trace
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@pytest.fixture
|
|
60
|
+
def mock_span():
|
|
61
|
+
span = MagicMock()
|
|
62
|
+
span.trace_id = "trace_123456789"
|
|
63
|
+
span.span_id = "span_987654321"
|
|
64
|
+
span.parent_id = None
|
|
65
|
+
span.started_at = "2024-01-01T00:00:00Z"
|
|
66
|
+
span.ended_at = "2024-01-01T00:00:01Z"
|
|
67
|
+
span.error = None
|
|
68
|
+
return span
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class TestPostHogTracingProcessor:
|
|
72
|
+
"""Tests for the PostHogTracingProcessor class."""
|
|
73
|
+
|
|
74
|
+
def test_initialization(self, mock_client):
|
|
75
|
+
"""Test processor initializes correctly."""
|
|
76
|
+
processor = PostHogTracingProcessor(
|
|
77
|
+
client=mock_client,
|
|
78
|
+
distinct_id="user@example.com",
|
|
79
|
+
privacy_mode=True,
|
|
80
|
+
groups={"company": "acme"},
|
|
81
|
+
properties={"env": "test"},
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
assert processor._client == mock_client
|
|
85
|
+
assert processor._distinct_id == "user@example.com"
|
|
86
|
+
assert processor._privacy_mode is True
|
|
87
|
+
assert processor._groups == {"company": "acme"}
|
|
88
|
+
assert processor._properties == {"env": "test"}
|
|
89
|
+
|
|
90
|
+
def test_initialization_with_callable_distinct_id(self, mock_client, mock_trace):
|
|
91
|
+
"""Test processor with callable distinct_id resolver."""
|
|
92
|
+
|
|
93
|
+
def resolver(trace):
|
|
94
|
+
return trace.metadata.get("user_id", "default")
|
|
95
|
+
|
|
96
|
+
processor = PostHogTracingProcessor(
|
|
97
|
+
client=mock_client,
|
|
98
|
+
distinct_id=resolver,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
mock_trace.metadata = {"user_id": "resolved-user"}
|
|
102
|
+
distinct_id = processor._get_distinct_id(mock_trace)
|
|
103
|
+
assert distinct_id == "resolved-user"
|
|
104
|
+
|
|
105
|
+
def test_on_trace_start_stores_metadata(self, processor, mock_client, mock_trace):
|
|
106
|
+
"""Test that on_trace_start stores metadata but does not capture an event."""
|
|
107
|
+
processor.on_trace_start(mock_trace)
|
|
108
|
+
|
|
109
|
+
mock_client.capture.assert_not_called()
|
|
110
|
+
assert mock_trace.trace_id in processor._trace_metadata
|
|
111
|
+
|
|
112
|
+
def test_on_trace_end_captures_ai_trace(self, processor, mock_client, mock_trace):
|
|
113
|
+
"""Test that on_trace_end captures $ai_trace event."""
|
|
114
|
+
processor.on_trace_start(mock_trace)
|
|
115
|
+
processor.on_trace_end(mock_trace)
|
|
116
|
+
|
|
117
|
+
mock_client.capture.assert_called_once()
|
|
118
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
119
|
+
|
|
120
|
+
assert call_kwargs["event"] == "$ai_trace"
|
|
121
|
+
assert call_kwargs["distinct_id"] == "test-user"
|
|
122
|
+
assert call_kwargs["properties"]["$ai_trace_id"] == "trace_123456789"
|
|
123
|
+
assert call_kwargs["properties"]["$ai_trace_name"] == "Test Workflow"
|
|
124
|
+
assert call_kwargs["properties"]["$ai_provider"] == "openai"
|
|
125
|
+
assert call_kwargs["properties"]["$ai_framework"] == "openai-agents"
|
|
126
|
+
assert "$ai_latency" in call_kwargs["properties"]
|
|
127
|
+
|
|
128
|
+
def test_personless_mode_when_no_distinct_id(self, mock_client, mock_trace):
|
|
129
|
+
"""Test that trace events use personless mode when no distinct_id is provided."""
|
|
130
|
+
processor = PostHogTracingProcessor(
|
|
131
|
+
client=mock_client,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
processor.on_trace_start(mock_trace)
|
|
135
|
+
processor.on_trace_end(mock_trace)
|
|
136
|
+
|
|
137
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
138
|
+
assert call_kwargs["properties"]["$process_person_profile"] is False
|
|
139
|
+
# Should fallback to trace_id as the distinct_id
|
|
140
|
+
assert call_kwargs["distinct_id"] == mock_trace.trace_id
|
|
141
|
+
|
|
142
|
+
def test_personless_mode_for_spans_when_no_distinct_id(
|
|
143
|
+
self, mock_client, mock_trace, mock_span
|
|
144
|
+
):
|
|
145
|
+
"""Test that span events use personless mode when no distinct_id is provided."""
|
|
146
|
+
processor = PostHogTracingProcessor(
|
|
147
|
+
client=mock_client,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
processor.on_trace_start(mock_trace)
|
|
151
|
+
mock_client.capture.reset_mock()
|
|
152
|
+
|
|
153
|
+
span_data = GenerationSpanData(model="gpt-4o")
|
|
154
|
+
mock_span.span_data = span_data
|
|
155
|
+
|
|
156
|
+
processor.on_span_start(mock_span)
|
|
157
|
+
processor.on_span_end(mock_span)
|
|
158
|
+
|
|
159
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
160
|
+
assert call_kwargs["properties"]["$process_person_profile"] is False
|
|
161
|
+
assert call_kwargs["distinct_id"] == mock_span.trace_id
|
|
162
|
+
|
|
163
|
+
def test_personless_mode_when_callable_returns_none(
|
|
164
|
+
self, mock_client, mock_trace, mock_span
|
|
165
|
+
):
|
|
166
|
+
"""Test personless mode when callable distinct_id returns None."""
|
|
167
|
+
|
|
168
|
+
def resolver(trace):
|
|
169
|
+
return None # Simulate no user ID available
|
|
170
|
+
|
|
171
|
+
processor = PostHogTracingProcessor(
|
|
172
|
+
client=mock_client,
|
|
173
|
+
distinct_id=resolver,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
processor.on_trace_start(mock_trace)
|
|
177
|
+
mock_client.capture.reset_mock()
|
|
178
|
+
|
|
179
|
+
span_data = GenerationSpanData(model="gpt-4o")
|
|
180
|
+
mock_span.span_data = span_data
|
|
181
|
+
|
|
182
|
+
processor.on_span_start(mock_span)
|
|
183
|
+
processor.on_span_end(mock_span)
|
|
184
|
+
|
|
185
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
186
|
+
assert call_kwargs["properties"]["$process_person_profile"] is False
|
|
187
|
+
assert call_kwargs["distinct_id"] == mock_span.trace_id
|
|
188
|
+
|
|
189
|
+
def test_person_profile_when_distinct_id_provided(self, mock_client, mock_trace):
|
|
190
|
+
"""Test that events create person profiles when distinct_id is provided."""
|
|
191
|
+
processor = PostHogTracingProcessor(
|
|
192
|
+
client=mock_client,
|
|
193
|
+
distinct_id="real-user",
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
processor.on_trace_start(mock_trace)
|
|
197
|
+
processor.on_trace_end(mock_trace)
|
|
198
|
+
|
|
199
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
200
|
+
assert "$process_person_profile" not in call_kwargs["properties"]
|
|
201
|
+
|
|
202
|
+
def test_on_trace_end_clears_metadata(self, processor, mock_client, mock_trace):
|
|
203
|
+
"""Test that on_trace_end clears stored trace metadata."""
|
|
204
|
+
processor.on_trace_start(mock_trace)
|
|
205
|
+
assert mock_trace.trace_id in processor._trace_metadata
|
|
206
|
+
|
|
207
|
+
processor.on_trace_end(mock_trace)
|
|
208
|
+
assert mock_trace.trace_id not in processor._trace_metadata
|
|
209
|
+
# Also verify it captured the event
|
|
210
|
+
mock_client.capture.assert_called_once()
|
|
211
|
+
|
|
212
|
+
def test_on_span_start_tracks_time(self, processor, mock_span):
|
|
213
|
+
"""Test that on_span_start records start time."""
|
|
214
|
+
processor.on_span_start(mock_span)
|
|
215
|
+
assert mock_span.span_id in processor._span_start_times
|
|
216
|
+
|
|
217
|
+
def test_generation_span_mapping(self, processor, mock_client, mock_span):
|
|
218
|
+
"""Test GenerationSpanData maps to $ai_generation event."""
|
|
219
|
+
span_data = GenerationSpanData(
|
|
220
|
+
input=[{"role": "user", "content": "Hello"}],
|
|
221
|
+
output=[{"role": "assistant", "content": "Hi there!"}],
|
|
222
|
+
model="gpt-4o",
|
|
223
|
+
model_config={"temperature": 0.7, "max_tokens": 100},
|
|
224
|
+
usage={"input_tokens": 10, "output_tokens": 20},
|
|
225
|
+
)
|
|
226
|
+
mock_span.span_data = span_data
|
|
227
|
+
|
|
228
|
+
processor.on_span_start(mock_span)
|
|
229
|
+
processor.on_span_end(mock_span)
|
|
230
|
+
|
|
231
|
+
mock_client.capture.assert_called_once()
|
|
232
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
233
|
+
|
|
234
|
+
assert call_kwargs["event"] == "$ai_generation"
|
|
235
|
+
assert call_kwargs["properties"]["$ai_trace_id"] == "trace_123456789"
|
|
236
|
+
assert call_kwargs["properties"]["$ai_span_id"] == "span_987654321"
|
|
237
|
+
assert call_kwargs["properties"]["$ai_provider"] == "openai"
|
|
238
|
+
assert call_kwargs["properties"]["$ai_framework"] == "openai-agents"
|
|
239
|
+
assert call_kwargs["properties"]["$ai_model"] == "gpt-4o"
|
|
240
|
+
assert call_kwargs["properties"]["$ai_input_tokens"] == 10
|
|
241
|
+
assert call_kwargs["properties"]["$ai_output_tokens"] == 20
|
|
242
|
+
assert call_kwargs["properties"]["$ai_input"] == [
|
|
243
|
+
{"role": "user", "content": "Hello"}
|
|
244
|
+
]
|
|
245
|
+
assert call_kwargs["properties"]["$ai_output_choices"] == [
|
|
246
|
+
{"role": "assistant", "content": "Hi there!"}
|
|
247
|
+
]
|
|
248
|
+
|
|
249
|
+
def test_generation_span_with_reasoning_tokens(
|
|
250
|
+
self, processor, mock_client, mock_span
|
|
251
|
+
):
|
|
252
|
+
"""Test GenerationSpanData includes reasoning tokens when present."""
|
|
253
|
+
span_data = GenerationSpanData(
|
|
254
|
+
model="o1-preview",
|
|
255
|
+
usage={
|
|
256
|
+
"input_tokens": 100,
|
|
257
|
+
"output_tokens": 500,
|
|
258
|
+
"reasoning_tokens": 400,
|
|
259
|
+
},
|
|
260
|
+
)
|
|
261
|
+
mock_span.span_data = span_data
|
|
262
|
+
|
|
263
|
+
processor.on_span_start(mock_span)
|
|
264
|
+
processor.on_span_end(mock_span)
|
|
265
|
+
|
|
266
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
267
|
+
assert call_kwargs["properties"]["$ai_reasoning_tokens"] == 400
|
|
268
|
+
|
|
269
|
+
def test_function_span_mapping(self, processor, mock_client, mock_span):
|
|
270
|
+
"""Test FunctionSpanData maps to $ai_span event with type=tool."""
|
|
271
|
+
span_data = FunctionSpanData(
|
|
272
|
+
name="get_weather",
|
|
273
|
+
input='{"city": "San Francisco"}',
|
|
274
|
+
output="Sunny, 72F",
|
|
275
|
+
)
|
|
276
|
+
mock_span.span_data = span_data
|
|
277
|
+
|
|
278
|
+
processor.on_span_start(mock_span)
|
|
279
|
+
processor.on_span_end(mock_span)
|
|
280
|
+
|
|
281
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
282
|
+
|
|
283
|
+
assert call_kwargs["event"] == "$ai_span"
|
|
284
|
+
assert call_kwargs["properties"]["$ai_span_name"] == "get_weather"
|
|
285
|
+
assert call_kwargs["properties"]["$ai_span_type"] == "tool"
|
|
286
|
+
assert (
|
|
287
|
+
call_kwargs["properties"]["$ai_input_state"] == '{"city": "San Francisco"}'
|
|
288
|
+
)
|
|
289
|
+
assert call_kwargs["properties"]["$ai_output_state"] == "Sunny, 72F"
|
|
290
|
+
|
|
291
|
+
def test_agent_span_mapping(self, processor, mock_client, mock_span):
|
|
292
|
+
"""Test AgentSpanData maps to $ai_span event with type=agent."""
|
|
293
|
+
span_data = AgentSpanData(
|
|
294
|
+
name="CustomerServiceAgent",
|
|
295
|
+
handoffs=["TechnicalAgent", "BillingAgent"],
|
|
296
|
+
tools=["search", "get_order"],
|
|
297
|
+
output_type="str",
|
|
298
|
+
)
|
|
299
|
+
mock_span.span_data = span_data
|
|
300
|
+
|
|
301
|
+
processor.on_span_start(mock_span)
|
|
302
|
+
processor.on_span_end(mock_span)
|
|
303
|
+
|
|
304
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
305
|
+
|
|
306
|
+
assert call_kwargs["event"] == "$ai_span"
|
|
307
|
+
assert call_kwargs["properties"]["$ai_span_name"] == "CustomerServiceAgent"
|
|
308
|
+
assert call_kwargs["properties"]["$ai_span_type"] == "agent"
|
|
309
|
+
assert call_kwargs["properties"]["$ai_agent_handoffs"] == [
|
|
310
|
+
"TechnicalAgent",
|
|
311
|
+
"BillingAgent",
|
|
312
|
+
]
|
|
313
|
+
assert call_kwargs["properties"]["$ai_agent_tools"] == ["search", "get_order"]
|
|
314
|
+
|
|
315
|
+
def test_handoff_span_mapping(self, processor, mock_client, mock_span):
|
|
316
|
+
"""Test HandoffSpanData maps to $ai_span event with type=handoff."""
|
|
317
|
+
span_data = HandoffSpanData(
|
|
318
|
+
from_agent="TriageAgent",
|
|
319
|
+
to_agent="TechnicalAgent",
|
|
320
|
+
)
|
|
321
|
+
mock_span.span_data = span_data
|
|
322
|
+
|
|
323
|
+
processor.on_span_start(mock_span)
|
|
324
|
+
processor.on_span_end(mock_span)
|
|
325
|
+
|
|
326
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
327
|
+
|
|
328
|
+
assert call_kwargs["event"] == "$ai_span"
|
|
329
|
+
assert call_kwargs["properties"]["$ai_span_type"] == "handoff"
|
|
330
|
+
assert call_kwargs["properties"]["$ai_handoff_from_agent"] == "TriageAgent"
|
|
331
|
+
assert call_kwargs["properties"]["$ai_handoff_to_agent"] == "TechnicalAgent"
|
|
332
|
+
assert (
|
|
333
|
+
call_kwargs["properties"]["$ai_span_name"]
|
|
334
|
+
== "TriageAgent -> TechnicalAgent"
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
def test_guardrail_span_mapping(self, processor, mock_client, mock_span):
|
|
338
|
+
"""Test GuardrailSpanData maps to $ai_span event with type=guardrail."""
|
|
339
|
+
span_data = GuardrailSpanData(
|
|
340
|
+
name="ContentFilter",
|
|
341
|
+
triggered=True,
|
|
342
|
+
)
|
|
343
|
+
mock_span.span_data = span_data
|
|
344
|
+
|
|
345
|
+
processor.on_span_start(mock_span)
|
|
346
|
+
processor.on_span_end(mock_span)
|
|
347
|
+
|
|
348
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
349
|
+
|
|
350
|
+
assert call_kwargs["event"] == "$ai_span"
|
|
351
|
+
assert call_kwargs["properties"]["$ai_span_name"] == "ContentFilter"
|
|
352
|
+
assert call_kwargs["properties"]["$ai_span_type"] == "guardrail"
|
|
353
|
+
assert call_kwargs["properties"]["$ai_guardrail_triggered"] is True
|
|
354
|
+
|
|
355
|
+
def test_custom_span_mapping(self, processor, mock_client, mock_span):
|
|
356
|
+
"""Test CustomSpanData maps to $ai_span event with type=custom."""
|
|
357
|
+
span_data = CustomSpanData(
|
|
358
|
+
name="database_query",
|
|
359
|
+
data={"query": "SELECT * FROM users", "rows": 100},
|
|
360
|
+
)
|
|
361
|
+
mock_span.span_data = span_data
|
|
362
|
+
|
|
363
|
+
processor.on_span_start(mock_span)
|
|
364
|
+
processor.on_span_end(mock_span)
|
|
365
|
+
|
|
366
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
367
|
+
|
|
368
|
+
assert call_kwargs["event"] == "$ai_span"
|
|
369
|
+
assert call_kwargs["properties"]["$ai_span_name"] == "database_query"
|
|
370
|
+
assert call_kwargs["properties"]["$ai_span_type"] == "custom"
|
|
371
|
+
assert call_kwargs["properties"]["$ai_custom_data"] == {
|
|
372
|
+
"query": "SELECT * FROM users",
|
|
373
|
+
"rows": 100,
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
def test_privacy_mode_redacts_content(self, mock_client, mock_span):
|
|
377
|
+
"""Test that privacy_mode redacts input/output content."""
|
|
378
|
+
processor = PostHogTracingProcessor(
|
|
379
|
+
client=mock_client,
|
|
380
|
+
distinct_id="test-user",
|
|
381
|
+
privacy_mode=True,
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
span_data = GenerationSpanData(
|
|
385
|
+
input=[{"role": "user", "content": "Secret message"}],
|
|
386
|
+
output=[{"role": "assistant", "content": "Secret response"}],
|
|
387
|
+
model="gpt-4o",
|
|
388
|
+
usage={"input_tokens": 10, "output_tokens": 20},
|
|
389
|
+
)
|
|
390
|
+
mock_span.span_data = span_data
|
|
391
|
+
|
|
392
|
+
processor.on_span_start(mock_span)
|
|
393
|
+
processor.on_span_end(mock_span)
|
|
394
|
+
|
|
395
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
396
|
+
|
|
397
|
+
# Content should be redacted
|
|
398
|
+
assert call_kwargs["properties"]["$ai_input"] is None
|
|
399
|
+
assert call_kwargs["properties"]["$ai_output_choices"] is None
|
|
400
|
+
# Token counts should still be present
|
|
401
|
+
assert call_kwargs["properties"]["$ai_input_tokens"] == 10
|
|
402
|
+
assert call_kwargs["properties"]["$ai_output_tokens"] == 20
|
|
403
|
+
|
|
404
|
+
def test_error_handling_in_span(self, processor, mock_client, mock_span):
|
|
405
|
+
"""Test that span errors are captured correctly."""
|
|
406
|
+
span_data = GenerationSpanData(model="gpt-4o")
|
|
407
|
+
mock_span.span_data = span_data
|
|
408
|
+
mock_span.error = {"message": "Rate limit exceeded", "data": {"code": 429}}
|
|
409
|
+
|
|
410
|
+
processor.on_span_start(mock_span)
|
|
411
|
+
processor.on_span_end(mock_span)
|
|
412
|
+
|
|
413
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
414
|
+
|
|
415
|
+
assert call_kwargs["properties"]["$ai_is_error"] is True
|
|
416
|
+
assert call_kwargs["properties"]["$ai_error"] == "Rate limit exceeded"
|
|
417
|
+
|
|
418
|
+
def test_generation_span_includes_total_tokens(
|
|
419
|
+
self, processor, mock_client, mock_span
|
|
420
|
+
):
|
|
421
|
+
"""Test that $ai_total_tokens is calculated and included."""
|
|
422
|
+
span_data = GenerationSpanData(
|
|
423
|
+
model="gpt-4o",
|
|
424
|
+
usage={"input_tokens": 100, "output_tokens": 50},
|
|
425
|
+
)
|
|
426
|
+
mock_span.span_data = span_data
|
|
427
|
+
|
|
428
|
+
processor.on_span_start(mock_span)
|
|
429
|
+
processor.on_span_end(mock_span)
|
|
430
|
+
|
|
431
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
432
|
+
assert call_kwargs["properties"]["$ai_total_tokens"] == 150
|
|
433
|
+
|
|
434
|
+
def test_error_type_categorization_model_behavior(
|
|
435
|
+
self, processor, mock_client, mock_span
|
|
436
|
+
):
|
|
437
|
+
"""Test that ModelBehaviorError is categorized correctly."""
|
|
438
|
+
span_data = GenerationSpanData(model="gpt-4o")
|
|
439
|
+
mock_span.span_data = span_data
|
|
440
|
+
mock_span.error = {
|
|
441
|
+
"message": "ModelBehaviorError: Invalid JSON output",
|
|
442
|
+
"type": "ModelBehaviorError",
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
processor.on_span_start(mock_span)
|
|
446
|
+
processor.on_span_end(mock_span)
|
|
447
|
+
|
|
448
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
449
|
+
assert call_kwargs["properties"]["$ai_error_type"] == "model_behavior_error"
|
|
450
|
+
|
|
451
|
+
def test_error_type_categorization_user_error(
|
|
452
|
+
self, processor, mock_client, mock_span
|
|
453
|
+
):
|
|
454
|
+
"""Test that UserError is categorized correctly."""
|
|
455
|
+
span_data = GenerationSpanData(model="gpt-4o")
|
|
456
|
+
mock_span.span_data = span_data
|
|
457
|
+
mock_span.error = {"message": "UserError: Tool failed", "type": "UserError"}
|
|
458
|
+
|
|
459
|
+
processor.on_span_start(mock_span)
|
|
460
|
+
processor.on_span_end(mock_span)
|
|
461
|
+
|
|
462
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
463
|
+
assert call_kwargs["properties"]["$ai_error_type"] == "user_error"
|
|
464
|
+
|
|
465
|
+
def test_error_type_categorization_input_guardrail(
|
|
466
|
+
self, processor, mock_client, mock_span
|
|
467
|
+
):
|
|
468
|
+
"""Test that InputGuardrailTripwireTriggered is categorized correctly."""
|
|
469
|
+
span_data = GenerationSpanData(model="gpt-4o")
|
|
470
|
+
mock_span.span_data = span_data
|
|
471
|
+
mock_span.error = {
|
|
472
|
+
"message": "InputGuardrailTripwireTriggered: Content blocked"
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
processor.on_span_start(mock_span)
|
|
476
|
+
processor.on_span_end(mock_span)
|
|
477
|
+
|
|
478
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
479
|
+
assert (
|
|
480
|
+
call_kwargs["properties"]["$ai_error_type"] == "input_guardrail_triggered"
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
def test_error_type_categorization_output_guardrail(
|
|
484
|
+
self, processor, mock_client, mock_span
|
|
485
|
+
):
|
|
486
|
+
"""Test that OutputGuardrailTripwireTriggered is categorized correctly."""
|
|
487
|
+
span_data = GenerationSpanData(model="gpt-4o")
|
|
488
|
+
mock_span.span_data = span_data
|
|
489
|
+
mock_span.error = {
|
|
490
|
+
"message": "OutputGuardrailTripwireTriggered: Response blocked"
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
processor.on_span_start(mock_span)
|
|
494
|
+
processor.on_span_end(mock_span)
|
|
495
|
+
|
|
496
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
497
|
+
assert (
|
|
498
|
+
call_kwargs["properties"]["$ai_error_type"] == "output_guardrail_triggered"
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
def test_error_type_categorization_max_turns(
|
|
502
|
+
self, processor, mock_client, mock_span
|
|
503
|
+
):
|
|
504
|
+
"""Test that MaxTurnsExceeded is categorized correctly."""
|
|
505
|
+
span_data = GenerationSpanData(model="gpt-4o")
|
|
506
|
+
mock_span.span_data = span_data
|
|
507
|
+
mock_span.error = {"message": "MaxTurnsExceeded: Agent exceeded maximum turns"}
|
|
508
|
+
|
|
509
|
+
processor.on_span_start(mock_span)
|
|
510
|
+
processor.on_span_end(mock_span)
|
|
511
|
+
|
|
512
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
513
|
+
assert call_kwargs["properties"]["$ai_error_type"] == "max_turns_exceeded"
|
|
514
|
+
|
|
515
|
+
def test_error_type_categorization_unknown(self, processor, mock_client, mock_span):
|
|
516
|
+
"""Test that unknown errors are categorized as unknown."""
|
|
517
|
+
span_data = GenerationSpanData(model="gpt-4o")
|
|
518
|
+
mock_span.span_data = span_data
|
|
519
|
+
mock_span.error = {"message": "Some random error occurred"}
|
|
520
|
+
|
|
521
|
+
processor.on_span_start(mock_span)
|
|
522
|
+
processor.on_span_end(mock_span)
|
|
523
|
+
|
|
524
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
525
|
+
assert call_kwargs["properties"]["$ai_error_type"] == "unknown"
|
|
526
|
+
|
|
527
|
+
def test_response_span_with_output_and_total_tokens(
|
|
528
|
+
self, processor, mock_client, mock_span
|
|
529
|
+
):
|
|
530
|
+
"""Test ResponseSpanData includes output choices and total tokens."""
|
|
531
|
+
# Create a mock response object
|
|
532
|
+
mock_response = MagicMock()
|
|
533
|
+
mock_response.id = "resp_123"
|
|
534
|
+
mock_response.model = "gpt-4o"
|
|
535
|
+
mock_response.output = [{"type": "message", "content": "Hello!"}]
|
|
536
|
+
mock_response.usage = MagicMock()
|
|
537
|
+
mock_response.usage.input_tokens = 25
|
|
538
|
+
mock_response.usage.output_tokens = 10
|
|
539
|
+
|
|
540
|
+
span_data = ResponseSpanData(
|
|
541
|
+
response=mock_response,
|
|
542
|
+
input="Hello, world!",
|
|
543
|
+
)
|
|
544
|
+
mock_span.span_data = span_data
|
|
545
|
+
|
|
546
|
+
processor.on_span_start(mock_span)
|
|
547
|
+
processor.on_span_end(mock_span)
|
|
548
|
+
|
|
549
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
550
|
+
|
|
551
|
+
assert call_kwargs["event"] == "$ai_generation"
|
|
552
|
+
assert call_kwargs["properties"]["$ai_total_tokens"] == 35
|
|
553
|
+
assert call_kwargs["properties"]["$ai_output_choices"] == [
|
|
554
|
+
{"type": "message", "content": "Hello!"}
|
|
555
|
+
]
|
|
556
|
+
assert call_kwargs["properties"]["$ai_response_id"] == "resp_123"
|
|
557
|
+
|
|
558
|
+
def test_speech_span_with_pass_through_properties(
|
|
559
|
+
self, processor, mock_client, mock_span
|
|
560
|
+
):
|
|
561
|
+
"""Test SpeechSpanData includes pass-through properties."""
|
|
562
|
+
span_data = SpeechSpanData(
|
|
563
|
+
input="Hello, how can I help you?",
|
|
564
|
+
output="base64_audio_data",
|
|
565
|
+
output_format="pcm",
|
|
566
|
+
model="tts-1",
|
|
567
|
+
model_config={"voice": "alloy", "speed": 1.0},
|
|
568
|
+
first_content_at="2024-01-01T00:00:00.500Z",
|
|
569
|
+
)
|
|
570
|
+
mock_span.span_data = span_data
|
|
571
|
+
|
|
572
|
+
processor.on_span_start(mock_span)
|
|
573
|
+
processor.on_span_end(mock_span)
|
|
574
|
+
|
|
575
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
576
|
+
|
|
577
|
+
assert call_kwargs["event"] == "$ai_span"
|
|
578
|
+
assert call_kwargs["properties"]["$ai_span_type"] == "speech"
|
|
579
|
+
assert call_kwargs["properties"]["$ai_model"] == "tts-1"
|
|
580
|
+
# Pass-through properties (no $ai_ prefix)
|
|
581
|
+
assert (
|
|
582
|
+
call_kwargs["properties"]["first_content_at"] == "2024-01-01T00:00:00.500Z"
|
|
583
|
+
)
|
|
584
|
+
assert call_kwargs["properties"]["audio_output_format"] == "pcm"
|
|
585
|
+
assert call_kwargs["properties"]["model_config"] == {
|
|
586
|
+
"voice": "alloy",
|
|
587
|
+
"speed": 1.0,
|
|
588
|
+
}
|
|
589
|
+
# Text input should be captured
|
|
590
|
+
assert call_kwargs["properties"]["$ai_input"] == "Hello, how can I help you?"
|
|
591
|
+
|
|
592
|
+
def test_transcription_span_with_pass_through_properties(
|
|
593
|
+
self, processor, mock_client, mock_span
|
|
594
|
+
):
|
|
595
|
+
"""Test TranscriptionSpanData includes pass-through properties."""
|
|
596
|
+
span_data = TranscriptionSpanData(
|
|
597
|
+
input="base64_audio_data",
|
|
598
|
+
input_format="pcm",
|
|
599
|
+
output="This is the transcribed text.",
|
|
600
|
+
model="whisper-1",
|
|
601
|
+
model_config={"language": "en"},
|
|
602
|
+
)
|
|
603
|
+
mock_span.span_data = span_data
|
|
604
|
+
|
|
605
|
+
processor.on_span_start(mock_span)
|
|
606
|
+
processor.on_span_end(mock_span)
|
|
607
|
+
|
|
608
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
609
|
+
|
|
610
|
+
assert call_kwargs["event"] == "$ai_span"
|
|
611
|
+
assert call_kwargs["properties"]["$ai_span_type"] == "transcription"
|
|
612
|
+
assert call_kwargs["properties"]["$ai_model"] == "whisper-1"
|
|
613
|
+
# Pass-through properties (no $ai_ prefix)
|
|
614
|
+
assert call_kwargs["properties"]["audio_input_format"] == "pcm"
|
|
615
|
+
assert call_kwargs["properties"]["model_config"] == {"language": "en"}
|
|
616
|
+
# Transcription output should be captured
|
|
617
|
+
assert (
|
|
618
|
+
call_kwargs["properties"]["$ai_output_state"]
|
|
619
|
+
== "This is the transcribed text."
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
def test_latency_calculation(self, processor, mock_client, mock_span):
|
|
623
|
+
"""Test that latency is calculated correctly."""
|
|
624
|
+
span_data = GenerationSpanData(model="gpt-4o")
|
|
625
|
+
mock_span.span_data = span_data
|
|
626
|
+
|
|
627
|
+
with patch("time.time") as mock_time:
|
|
628
|
+
mock_time.return_value = 1000.0
|
|
629
|
+
processor.on_span_start(mock_span)
|
|
630
|
+
|
|
631
|
+
mock_time.return_value = 1001.5 # 1.5 seconds later
|
|
632
|
+
processor.on_span_end(mock_span)
|
|
633
|
+
|
|
634
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
635
|
+
assert call_kwargs["properties"]["$ai_latency"] == pytest.approx(1.5, rel=0.01)
|
|
636
|
+
|
|
637
|
+
def test_groups_included_in_events(self, mock_client, mock_trace, mock_span):
|
|
638
|
+
"""Test that groups are included in captured events."""
|
|
639
|
+
processor = PostHogTracingProcessor(
|
|
640
|
+
client=mock_client,
|
|
641
|
+
distinct_id="test-user",
|
|
642
|
+
groups={"company": "acme", "team": "engineering"},
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
processor.on_trace_start(mock_trace)
|
|
646
|
+
processor.on_trace_end(mock_trace)
|
|
647
|
+
|
|
648
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
649
|
+
assert call_kwargs["groups"] == {"company": "acme", "team": "engineering"}
|
|
650
|
+
|
|
651
|
+
def test_additional_properties_included(self, mock_client, mock_trace):
|
|
652
|
+
"""Test that additional properties are included in events."""
|
|
653
|
+
processor = PostHogTracingProcessor(
|
|
654
|
+
client=mock_client,
|
|
655
|
+
distinct_id="test-user",
|
|
656
|
+
properties={"environment": "production", "version": "1.0"},
|
|
657
|
+
)
|
|
658
|
+
|
|
659
|
+
processor.on_trace_start(mock_trace)
|
|
660
|
+
processor.on_trace_end(mock_trace)
|
|
661
|
+
|
|
662
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
663
|
+
assert call_kwargs["properties"]["environment"] == "production"
|
|
664
|
+
assert call_kwargs["properties"]["version"] == "1.0"
|
|
665
|
+
|
|
666
|
+
def test_shutdown_clears_state(self, processor):
|
|
667
|
+
"""Test that shutdown clears internal state."""
|
|
668
|
+
processor._span_start_times["span_1"] = 1000.0
|
|
669
|
+
processor._trace_metadata["trace_1"] = {"name": "test"}
|
|
670
|
+
|
|
671
|
+
processor.shutdown()
|
|
672
|
+
|
|
673
|
+
assert len(processor._span_start_times) == 0
|
|
674
|
+
assert len(processor._trace_metadata) == 0
|
|
675
|
+
|
|
676
|
+
def test_force_flush_calls_client_flush(self, processor, mock_client):
|
|
677
|
+
"""Test that force_flush calls client.flush()."""
|
|
678
|
+
processor.force_flush()
|
|
679
|
+
mock_client.flush.assert_called_once()
|
|
680
|
+
|
|
681
|
+
def test_generation_span_with_no_usage(self, processor, mock_client, mock_span):
|
|
682
|
+
"""Test GenerationSpanData with no usage data defaults to zero tokens."""
|
|
683
|
+
span_data = GenerationSpanData(model="gpt-4o")
|
|
684
|
+
mock_span.span_data = span_data
|
|
685
|
+
|
|
686
|
+
processor.on_span_start(mock_span)
|
|
687
|
+
processor.on_span_end(mock_span)
|
|
688
|
+
|
|
689
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
690
|
+
assert call_kwargs["properties"]["$ai_input_tokens"] == 0
|
|
691
|
+
assert call_kwargs["properties"]["$ai_output_tokens"] == 0
|
|
692
|
+
assert call_kwargs["properties"]["$ai_total_tokens"] == 0
|
|
693
|
+
|
|
694
|
+
def test_generation_span_with_partial_usage(
|
|
695
|
+
self, processor, mock_client, mock_span
|
|
696
|
+
):
|
|
697
|
+
"""Test GenerationSpanData with only input_tokens present."""
|
|
698
|
+
span_data = GenerationSpanData(
|
|
699
|
+
model="gpt-4o",
|
|
700
|
+
usage={"input_tokens": 42},
|
|
701
|
+
)
|
|
702
|
+
mock_span.span_data = span_data
|
|
703
|
+
|
|
704
|
+
processor.on_span_start(mock_span)
|
|
705
|
+
processor.on_span_end(mock_span)
|
|
706
|
+
|
|
707
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
708
|
+
assert call_kwargs["properties"]["$ai_input_tokens"] == 42
|
|
709
|
+
assert call_kwargs["properties"]["$ai_output_tokens"] == 0
|
|
710
|
+
assert call_kwargs["properties"]["$ai_total_tokens"] == 42
|
|
711
|
+
|
|
712
|
+
def test_error_type_categorization_by_type_field_only(
|
|
713
|
+
self, processor, mock_client, mock_span
|
|
714
|
+
):
|
|
715
|
+
"""Test error categorization works when only the type field matches."""
|
|
716
|
+
span_data = GenerationSpanData(model="gpt-4o")
|
|
717
|
+
mock_span.span_data = span_data
|
|
718
|
+
mock_span.error = {
|
|
719
|
+
"message": "Something went wrong",
|
|
720
|
+
"type": "ModelBehaviorError",
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
processor.on_span_start(mock_span)
|
|
724
|
+
processor.on_span_end(mock_span)
|
|
725
|
+
|
|
726
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
727
|
+
assert call_kwargs["properties"]["$ai_error_type"] == "model_behavior_error"
|
|
728
|
+
|
|
729
|
+
def test_distinct_id_resolved_from_trace_for_spans(
|
|
730
|
+
self, mock_client, mock_trace, mock_span
|
|
731
|
+
):
|
|
732
|
+
"""Test that spans use the distinct_id resolved at trace start."""
|
|
733
|
+
|
|
734
|
+
def resolver(trace):
|
|
735
|
+
return f"user-{trace.name}"
|
|
736
|
+
|
|
737
|
+
processor = PostHogTracingProcessor(
|
|
738
|
+
client=mock_client,
|
|
739
|
+
distinct_id=resolver,
|
|
740
|
+
)
|
|
741
|
+
|
|
742
|
+
# Start trace - this resolves and stores distinct_id
|
|
743
|
+
processor.on_trace_start(mock_trace)
|
|
744
|
+
mock_client.capture.reset_mock()
|
|
745
|
+
|
|
746
|
+
# End a span - should use the stored distinct_id from trace
|
|
747
|
+
span_data = GenerationSpanData(model="gpt-4o")
|
|
748
|
+
mock_span.span_data = span_data
|
|
749
|
+
|
|
750
|
+
processor.on_span_start(mock_span)
|
|
751
|
+
processor.on_span_end(mock_span)
|
|
752
|
+
|
|
753
|
+
call_kwargs = mock_client.capture.call_args[1]
|
|
754
|
+
assert call_kwargs["distinct_id"] == "user-Test Workflow"
|
|
755
|
+
|
|
756
|
+
def test_eviction_of_stale_entries(self, mock_client):
|
|
757
|
+
"""Test that stale entries are evicted when max is exceeded."""
|
|
758
|
+
processor = PostHogTracingProcessor(
|
|
759
|
+
client=mock_client,
|
|
760
|
+
distinct_id="test-user",
|
|
761
|
+
)
|
|
762
|
+
processor._max_tracked_entries = 10
|
|
763
|
+
|
|
764
|
+
# Fill beyond max
|
|
765
|
+
for i in range(15):
|
|
766
|
+
processor._span_start_times[f"span_{i}"] = float(i)
|
|
767
|
+
processor._trace_metadata[f"trace_{i}"] = {"name": f"trace_{i}"}
|
|
768
|
+
|
|
769
|
+
processor._evict_stale_entries()
|
|
770
|
+
|
|
771
|
+
# Should have evicted half
|
|
772
|
+
assert len(processor._span_start_times) <= 10
|
|
773
|
+
assert len(processor._trace_metadata) <= 10
|
|
774
|
+
|
|
775
|
+
|
|
776
|
+
class TestInstrumentHelper:
|
|
777
|
+
"""Tests for the instrument() convenience function."""
|
|
778
|
+
|
|
779
|
+
def test_instrument_registers_processor(self, mock_client):
|
|
780
|
+
"""Test that instrument() registers a processor."""
|
|
781
|
+
with patch("agents.tracing.add_trace_processor") as mock_add:
|
|
782
|
+
processor = instrument(
|
|
783
|
+
client=mock_client,
|
|
784
|
+
distinct_id="test-user",
|
|
785
|
+
)
|
|
786
|
+
|
|
787
|
+
mock_add.assert_called_once_with(processor)
|
|
788
|
+
assert isinstance(processor, PostHogTracingProcessor)
|
|
789
|
+
|
|
790
|
+
def test_instrument_with_privacy_mode(self, mock_client):
|
|
791
|
+
"""Test instrument() respects privacy_mode."""
|
|
792
|
+
with patch("agents.tracing.add_trace_processor"):
|
|
793
|
+
processor = instrument(
|
|
794
|
+
client=mock_client,
|
|
795
|
+
privacy_mode=True,
|
|
796
|
+
)
|
|
797
|
+
|
|
798
|
+
assert processor._privacy_mode is True
|
|
799
|
+
|
|
800
|
+
def test_instrument_with_groups_and_properties(self, mock_client):
|
|
801
|
+
"""Test instrument() accepts groups and properties."""
|
|
802
|
+
with patch("agents.tracing.add_trace_processor"):
|
|
803
|
+
processor = instrument(
|
|
804
|
+
client=mock_client,
|
|
805
|
+
groups={"company": "acme"},
|
|
806
|
+
properties={"env": "test"},
|
|
807
|
+
)
|
|
808
|
+
|
|
809
|
+
assert processor._groups == {"company": "acme"}
|
|
810
|
+
assert processor._properties == {"env": "test"}
|