posthog 7.6.0__py3-none-any.whl → 7.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,810 @@
1
+ import logging
2
+ from unittest.mock import MagicMock, patch
3
+
4
+ import pytest
5
+
6
+ try:
7
+ from agents.tracing.span_data import (
8
+ AgentSpanData,
9
+ CustomSpanData,
10
+ FunctionSpanData,
11
+ GenerationSpanData,
12
+ GuardrailSpanData,
13
+ HandoffSpanData,
14
+ ResponseSpanData,
15
+ SpeechSpanData,
16
+ TranscriptionSpanData,
17
+ )
18
+
19
+ from posthog.ai.openai_agents import PostHogTracingProcessor, instrument
20
+
21
+ OPENAI_AGENTS_AVAILABLE = True
22
+ except ImportError:
23
+ OPENAI_AGENTS_AVAILABLE = False
24
+
25
+
26
+ # Skip all tests if OpenAI Agents SDK is not available
27
+ pytestmark = pytest.mark.skipif(
28
+ not OPENAI_AGENTS_AVAILABLE, reason="OpenAI Agents SDK is not available"
29
+ )
30
+
31
+
32
+ @pytest.fixture(scope="function")
33
+ def mock_client():
34
+ client = MagicMock()
35
+ client.privacy_mode = False
36
+ logging.getLogger("posthog").setLevel(logging.DEBUG)
37
+ return client
38
+
39
+
40
+ @pytest.fixture(scope="function")
41
+ def processor(mock_client):
42
+ return PostHogTracingProcessor(
43
+ client=mock_client,
44
+ distinct_id="test-user",
45
+ privacy_mode=False,
46
+ )
47
+
48
+
49
+ @pytest.fixture
50
+ def mock_trace():
51
+ trace = MagicMock()
52
+ trace.trace_id = "trace_123456789"
53
+ trace.name = "Test Workflow"
54
+ trace.group_id = "group_123"
55
+ trace.metadata = {"key": "value"}
56
+ return trace
57
+
58
+
59
+ @pytest.fixture
60
+ def mock_span():
61
+ span = MagicMock()
62
+ span.trace_id = "trace_123456789"
63
+ span.span_id = "span_987654321"
64
+ span.parent_id = None
65
+ span.started_at = "2024-01-01T00:00:00Z"
66
+ span.ended_at = "2024-01-01T00:00:01Z"
67
+ span.error = None
68
+ return span
69
+
70
+
71
+ class TestPostHogTracingProcessor:
72
+ """Tests for the PostHogTracingProcessor class."""
73
+
74
+ def test_initialization(self, mock_client):
75
+ """Test processor initializes correctly."""
76
+ processor = PostHogTracingProcessor(
77
+ client=mock_client,
78
+ distinct_id="user@example.com",
79
+ privacy_mode=True,
80
+ groups={"company": "acme"},
81
+ properties={"env": "test"},
82
+ )
83
+
84
+ assert processor._client == mock_client
85
+ assert processor._distinct_id == "user@example.com"
86
+ assert processor._privacy_mode is True
87
+ assert processor._groups == {"company": "acme"}
88
+ assert processor._properties == {"env": "test"}
89
+
90
+ def test_initialization_with_callable_distinct_id(self, mock_client, mock_trace):
91
+ """Test processor with callable distinct_id resolver."""
92
+
93
+ def resolver(trace):
94
+ return trace.metadata.get("user_id", "default")
95
+
96
+ processor = PostHogTracingProcessor(
97
+ client=mock_client,
98
+ distinct_id=resolver,
99
+ )
100
+
101
+ mock_trace.metadata = {"user_id": "resolved-user"}
102
+ distinct_id = processor._get_distinct_id(mock_trace)
103
+ assert distinct_id == "resolved-user"
104
+
105
+ def test_on_trace_start_stores_metadata(self, processor, mock_client, mock_trace):
106
+ """Test that on_trace_start stores metadata but does not capture an event."""
107
+ processor.on_trace_start(mock_trace)
108
+
109
+ mock_client.capture.assert_not_called()
110
+ assert mock_trace.trace_id in processor._trace_metadata
111
+
112
+ def test_on_trace_end_captures_ai_trace(self, processor, mock_client, mock_trace):
113
+ """Test that on_trace_end captures $ai_trace event."""
114
+ processor.on_trace_start(mock_trace)
115
+ processor.on_trace_end(mock_trace)
116
+
117
+ mock_client.capture.assert_called_once()
118
+ call_kwargs = mock_client.capture.call_args[1]
119
+
120
+ assert call_kwargs["event"] == "$ai_trace"
121
+ assert call_kwargs["distinct_id"] == "test-user"
122
+ assert call_kwargs["properties"]["$ai_trace_id"] == "trace_123456789"
123
+ assert call_kwargs["properties"]["$ai_trace_name"] == "Test Workflow"
124
+ assert call_kwargs["properties"]["$ai_provider"] == "openai"
125
+ assert call_kwargs["properties"]["$ai_framework"] == "openai-agents"
126
+ assert "$ai_latency" in call_kwargs["properties"]
127
+
128
+ def test_personless_mode_when_no_distinct_id(self, mock_client, mock_trace):
129
+ """Test that trace events use personless mode when no distinct_id is provided."""
130
+ processor = PostHogTracingProcessor(
131
+ client=mock_client,
132
+ )
133
+
134
+ processor.on_trace_start(mock_trace)
135
+ processor.on_trace_end(mock_trace)
136
+
137
+ call_kwargs = mock_client.capture.call_args[1]
138
+ assert call_kwargs["properties"]["$process_person_profile"] is False
139
+ # Should fallback to trace_id as the distinct_id
140
+ assert call_kwargs["distinct_id"] == mock_trace.trace_id
141
+
142
+ def test_personless_mode_for_spans_when_no_distinct_id(
143
+ self, mock_client, mock_trace, mock_span
144
+ ):
145
+ """Test that span events use personless mode when no distinct_id is provided."""
146
+ processor = PostHogTracingProcessor(
147
+ client=mock_client,
148
+ )
149
+
150
+ processor.on_trace_start(mock_trace)
151
+ mock_client.capture.reset_mock()
152
+
153
+ span_data = GenerationSpanData(model="gpt-4o")
154
+ mock_span.span_data = span_data
155
+
156
+ processor.on_span_start(mock_span)
157
+ processor.on_span_end(mock_span)
158
+
159
+ call_kwargs = mock_client.capture.call_args[1]
160
+ assert call_kwargs["properties"]["$process_person_profile"] is False
161
+ assert call_kwargs["distinct_id"] == mock_span.trace_id
162
+
163
+ def test_personless_mode_when_callable_returns_none(
164
+ self, mock_client, mock_trace, mock_span
165
+ ):
166
+ """Test personless mode when callable distinct_id returns None."""
167
+
168
+ def resolver(trace):
169
+ return None # Simulate no user ID available
170
+
171
+ processor = PostHogTracingProcessor(
172
+ client=mock_client,
173
+ distinct_id=resolver,
174
+ )
175
+
176
+ processor.on_trace_start(mock_trace)
177
+ mock_client.capture.reset_mock()
178
+
179
+ span_data = GenerationSpanData(model="gpt-4o")
180
+ mock_span.span_data = span_data
181
+
182
+ processor.on_span_start(mock_span)
183
+ processor.on_span_end(mock_span)
184
+
185
+ call_kwargs = mock_client.capture.call_args[1]
186
+ assert call_kwargs["properties"]["$process_person_profile"] is False
187
+ assert call_kwargs["distinct_id"] == mock_span.trace_id
188
+
189
+ def test_person_profile_when_distinct_id_provided(self, mock_client, mock_trace):
190
+ """Test that events create person profiles when distinct_id is provided."""
191
+ processor = PostHogTracingProcessor(
192
+ client=mock_client,
193
+ distinct_id="real-user",
194
+ )
195
+
196
+ processor.on_trace_start(mock_trace)
197
+ processor.on_trace_end(mock_trace)
198
+
199
+ call_kwargs = mock_client.capture.call_args[1]
200
+ assert "$process_person_profile" not in call_kwargs["properties"]
201
+
202
+ def test_on_trace_end_clears_metadata(self, processor, mock_client, mock_trace):
203
+ """Test that on_trace_end clears stored trace metadata."""
204
+ processor.on_trace_start(mock_trace)
205
+ assert mock_trace.trace_id in processor._trace_metadata
206
+
207
+ processor.on_trace_end(mock_trace)
208
+ assert mock_trace.trace_id not in processor._trace_metadata
209
+ # Also verify it captured the event
210
+ mock_client.capture.assert_called_once()
211
+
212
+ def test_on_span_start_tracks_time(self, processor, mock_span):
213
+ """Test that on_span_start records start time."""
214
+ processor.on_span_start(mock_span)
215
+ assert mock_span.span_id in processor._span_start_times
216
+
217
+ def test_generation_span_mapping(self, processor, mock_client, mock_span):
218
+ """Test GenerationSpanData maps to $ai_generation event."""
219
+ span_data = GenerationSpanData(
220
+ input=[{"role": "user", "content": "Hello"}],
221
+ output=[{"role": "assistant", "content": "Hi there!"}],
222
+ model="gpt-4o",
223
+ model_config={"temperature": 0.7, "max_tokens": 100},
224
+ usage={"input_tokens": 10, "output_tokens": 20},
225
+ )
226
+ mock_span.span_data = span_data
227
+
228
+ processor.on_span_start(mock_span)
229
+ processor.on_span_end(mock_span)
230
+
231
+ mock_client.capture.assert_called_once()
232
+ call_kwargs = mock_client.capture.call_args[1]
233
+
234
+ assert call_kwargs["event"] == "$ai_generation"
235
+ assert call_kwargs["properties"]["$ai_trace_id"] == "trace_123456789"
236
+ assert call_kwargs["properties"]["$ai_span_id"] == "span_987654321"
237
+ assert call_kwargs["properties"]["$ai_provider"] == "openai"
238
+ assert call_kwargs["properties"]["$ai_framework"] == "openai-agents"
239
+ assert call_kwargs["properties"]["$ai_model"] == "gpt-4o"
240
+ assert call_kwargs["properties"]["$ai_input_tokens"] == 10
241
+ assert call_kwargs["properties"]["$ai_output_tokens"] == 20
242
+ assert call_kwargs["properties"]["$ai_input"] == [
243
+ {"role": "user", "content": "Hello"}
244
+ ]
245
+ assert call_kwargs["properties"]["$ai_output_choices"] == [
246
+ {"role": "assistant", "content": "Hi there!"}
247
+ ]
248
+
249
+ def test_generation_span_with_reasoning_tokens(
250
+ self, processor, mock_client, mock_span
251
+ ):
252
+ """Test GenerationSpanData includes reasoning tokens when present."""
253
+ span_data = GenerationSpanData(
254
+ model="o1-preview",
255
+ usage={
256
+ "input_tokens": 100,
257
+ "output_tokens": 500,
258
+ "reasoning_tokens": 400,
259
+ },
260
+ )
261
+ mock_span.span_data = span_data
262
+
263
+ processor.on_span_start(mock_span)
264
+ processor.on_span_end(mock_span)
265
+
266
+ call_kwargs = mock_client.capture.call_args[1]
267
+ assert call_kwargs["properties"]["$ai_reasoning_tokens"] == 400
268
+
269
+ def test_function_span_mapping(self, processor, mock_client, mock_span):
270
+ """Test FunctionSpanData maps to $ai_span event with type=tool."""
271
+ span_data = FunctionSpanData(
272
+ name="get_weather",
273
+ input='{"city": "San Francisco"}',
274
+ output="Sunny, 72F",
275
+ )
276
+ mock_span.span_data = span_data
277
+
278
+ processor.on_span_start(mock_span)
279
+ processor.on_span_end(mock_span)
280
+
281
+ call_kwargs = mock_client.capture.call_args[1]
282
+
283
+ assert call_kwargs["event"] == "$ai_span"
284
+ assert call_kwargs["properties"]["$ai_span_name"] == "get_weather"
285
+ assert call_kwargs["properties"]["$ai_span_type"] == "tool"
286
+ assert (
287
+ call_kwargs["properties"]["$ai_input_state"] == '{"city": "San Francisco"}'
288
+ )
289
+ assert call_kwargs["properties"]["$ai_output_state"] == "Sunny, 72F"
290
+
291
+ def test_agent_span_mapping(self, processor, mock_client, mock_span):
292
+ """Test AgentSpanData maps to $ai_span event with type=agent."""
293
+ span_data = AgentSpanData(
294
+ name="CustomerServiceAgent",
295
+ handoffs=["TechnicalAgent", "BillingAgent"],
296
+ tools=["search", "get_order"],
297
+ output_type="str",
298
+ )
299
+ mock_span.span_data = span_data
300
+
301
+ processor.on_span_start(mock_span)
302
+ processor.on_span_end(mock_span)
303
+
304
+ call_kwargs = mock_client.capture.call_args[1]
305
+
306
+ assert call_kwargs["event"] == "$ai_span"
307
+ assert call_kwargs["properties"]["$ai_span_name"] == "CustomerServiceAgent"
308
+ assert call_kwargs["properties"]["$ai_span_type"] == "agent"
309
+ assert call_kwargs["properties"]["$ai_agent_handoffs"] == [
310
+ "TechnicalAgent",
311
+ "BillingAgent",
312
+ ]
313
+ assert call_kwargs["properties"]["$ai_agent_tools"] == ["search", "get_order"]
314
+
315
+ def test_handoff_span_mapping(self, processor, mock_client, mock_span):
316
+ """Test HandoffSpanData maps to $ai_span event with type=handoff."""
317
+ span_data = HandoffSpanData(
318
+ from_agent="TriageAgent",
319
+ to_agent="TechnicalAgent",
320
+ )
321
+ mock_span.span_data = span_data
322
+
323
+ processor.on_span_start(mock_span)
324
+ processor.on_span_end(mock_span)
325
+
326
+ call_kwargs = mock_client.capture.call_args[1]
327
+
328
+ assert call_kwargs["event"] == "$ai_span"
329
+ assert call_kwargs["properties"]["$ai_span_type"] == "handoff"
330
+ assert call_kwargs["properties"]["$ai_handoff_from_agent"] == "TriageAgent"
331
+ assert call_kwargs["properties"]["$ai_handoff_to_agent"] == "TechnicalAgent"
332
+ assert (
333
+ call_kwargs["properties"]["$ai_span_name"]
334
+ == "TriageAgent -> TechnicalAgent"
335
+ )
336
+
337
+ def test_guardrail_span_mapping(self, processor, mock_client, mock_span):
338
+ """Test GuardrailSpanData maps to $ai_span event with type=guardrail."""
339
+ span_data = GuardrailSpanData(
340
+ name="ContentFilter",
341
+ triggered=True,
342
+ )
343
+ mock_span.span_data = span_data
344
+
345
+ processor.on_span_start(mock_span)
346
+ processor.on_span_end(mock_span)
347
+
348
+ call_kwargs = mock_client.capture.call_args[1]
349
+
350
+ assert call_kwargs["event"] == "$ai_span"
351
+ assert call_kwargs["properties"]["$ai_span_name"] == "ContentFilter"
352
+ assert call_kwargs["properties"]["$ai_span_type"] == "guardrail"
353
+ assert call_kwargs["properties"]["$ai_guardrail_triggered"] is True
354
+
355
+ def test_custom_span_mapping(self, processor, mock_client, mock_span):
356
+ """Test CustomSpanData maps to $ai_span event with type=custom."""
357
+ span_data = CustomSpanData(
358
+ name="database_query",
359
+ data={"query": "SELECT * FROM users", "rows": 100},
360
+ )
361
+ mock_span.span_data = span_data
362
+
363
+ processor.on_span_start(mock_span)
364
+ processor.on_span_end(mock_span)
365
+
366
+ call_kwargs = mock_client.capture.call_args[1]
367
+
368
+ assert call_kwargs["event"] == "$ai_span"
369
+ assert call_kwargs["properties"]["$ai_span_name"] == "database_query"
370
+ assert call_kwargs["properties"]["$ai_span_type"] == "custom"
371
+ assert call_kwargs["properties"]["$ai_custom_data"] == {
372
+ "query": "SELECT * FROM users",
373
+ "rows": 100,
374
+ }
375
+
376
+ def test_privacy_mode_redacts_content(self, mock_client, mock_span):
377
+ """Test that privacy_mode redacts input/output content."""
378
+ processor = PostHogTracingProcessor(
379
+ client=mock_client,
380
+ distinct_id="test-user",
381
+ privacy_mode=True,
382
+ )
383
+
384
+ span_data = GenerationSpanData(
385
+ input=[{"role": "user", "content": "Secret message"}],
386
+ output=[{"role": "assistant", "content": "Secret response"}],
387
+ model="gpt-4o",
388
+ usage={"input_tokens": 10, "output_tokens": 20},
389
+ )
390
+ mock_span.span_data = span_data
391
+
392
+ processor.on_span_start(mock_span)
393
+ processor.on_span_end(mock_span)
394
+
395
+ call_kwargs = mock_client.capture.call_args[1]
396
+
397
+ # Content should be redacted
398
+ assert call_kwargs["properties"]["$ai_input"] is None
399
+ assert call_kwargs["properties"]["$ai_output_choices"] is None
400
+ # Token counts should still be present
401
+ assert call_kwargs["properties"]["$ai_input_tokens"] == 10
402
+ assert call_kwargs["properties"]["$ai_output_tokens"] == 20
403
+
404
+ def test_error_handling_in_span(self, processor, mock_client, mock_span):
405
+ """Test that span errors are captured correctly."""
406
+ span_data = GenerationSpanData(model="gpt-4o")
407
+ mock_span.span_data = span_data
408
+ mock_span.error = {"message": "Rate limit exceeded", "data": {"code": 429}}
409
+
410
+ processor.on_span_start(mock_span)
411
+ processor.on_span_end(mock_span)
412
+
413
+ call_kwargs = mock_client.capture.call_args[1]
414
+
415
+ assert call_kwargs["properties"]["$ai_is_error"] is True
416
+ assert call_kwargs["properties"]["$ai_error"] == "Rate limit exceeded"
417
+
418
+ def test_generation_span_includes_total_tokens(
419
+ self, processor, mock_client, mock_span
420
+ ):
421
+ """Test that $ai_total_tokens is calculated and included."""
422
+ span_data = GenerationSpanData(
423
+ model="gpt-4o",
424
+ usage={"input_tokens": 100, "output_tokens": 50},
425
+ )
426
+ mock_span.span_data = span_data
427
+
428
+ processor.on_span_start(mock_span)
429
+ processor.on_span_end(mock_span)
430
+
431
+ call_kwargs = mock_client.capture.call_args[1]
432
+ assert call_kwargs["properties"]["$ai_total_tokens"] == 150
433
+
434
+ def test_error_type_categorization_model_behavior(
435
+ self, processor, mock_client, mock_span
436
+ ):
437
+ """Test that ModelBehaviorError is categorized correctly."""
438
+ span_data = GenerationSpanData(model="gpt-4o")
439
+ mock_span.span_data = span_data
440
+ mock_span.error = {
441
+ "message": "ModelBehaviorError: Invalid JSON output",
442
+ "type": "ModelBehaviorError",
443
+ }
444
+
445
+ processor.on_span_start(mock_span)
446
+ processor.on_span_end(mock_span)
447
+
448
+ call_kwargs = mock_client.capture.call_args[1]
449
+ assert call_kwargs["properties"]["$ai_error_type"] == "model_behavior_error"
450
+
451
+ def test_error_type_categorization_user_error(
452
+ self, processor, mock_client, mock_span
453
+ ):
454
+ """Test that UserError is categorized correctly."""
455
+ span_data = GenerationSpanData(model="gpt-4o")
456
+ mock_span.span_data = span_data
457
+ mock_span.error = {"message": "UserError: Tool failed", "type": "UserError"}
458
+
459
+ processor.on_span_start(mock_span)
460
+ processor.on_span_end(mock_span)
461
+
462
+ call_kwargs = mock_client.capture.call_args[1]
463
+ assert call_kwargs["properties"]["$ai_error_type"] == "user_error"
464
+
465
+ def test_error_type_categorization_input_guardrail(
466
+ self, processor, mock_client, mock_span
467
+ ):
468
+ """Test that InputGuardrailTripwireTriggered is categorized correctly."""
469
+ span_data = GenerationSpanData(model="gpt-4o")
470
+ mock_span.span_data = span_data
471
+ mock_span.error = {
472
+ "message": "InputGuardrailTripwireTriggered: Content blocked"
473
+ }
474
+
475
+ processor.on_span_start(mock_span)
476
+ processor.on_span_end(mock_span)
477
+
478
+ call_kwargs = mock_client.capture.call_args[1]
479
+ assert (
480
+ call_kwargs["properties"]["$ai_error_type"] == "input_guardrail_triggered"
481
+ )
482
+
483
+ def test_error_type_categorization_output_guardrail(
484
+ self, processor, mock_client, mock_span
485
+ ):
486
+ """Test that OutputGuardrailTripwireTriggered is categorized correctly."""
487
+ span_data = GenerationSpanData(model="gpt-4o")
488
+ mock_span.span_data = span_data
489
+ mock_span.error = {
490
+ "message": "OutputGuardrailTripwireTriggered: Response blocked"
491
+ }
492
+
493
+ processor.on_span_start(mock_span)
494
+ processor.on_span_end(mock_span)
495
+
496
+ call_kwargs = mock_client.capture.call_args[1]
497
+ assert (
498
+ call_kwargs["properties"]["$ai_error_type"] == "output_guardrail_triggered"
499
+ )
500
+
501
+ def test_error_type_categorization_max_turns(
502
+ self, processor, mock_client, mock_span
503
+ ):
504
+ """Test that MaxTurnsExceeded is categorized correctly."""
505
+ span_data = GenerationSpanData(model="gpt-4o")
506
+ mock_span.span_data = span_data
507
+ mock_span.error = {"message": "MaxTurnsExceeded: Agent exceeded maximum turns"}
508
+
509
+ processor.on_span_start(mock_span)
510
+ processor.on_span_end(mock_span)
511
+
512
+ call_kwargs = mock_client.capture.call_args[1]
513
+ assert call_kwargs["properties"]["$ai_error_type"] == "max_turns_exceeded"
514
+
515
+ def test_error_type_categorization_unknown(self, processor, mock_client, mock_span):
516
+ """Test that unknown errors are categorized as unknown."""
517
+ span_data = GenerationSpanData(model="gpt-4o")
518
+ mock_span.span_data = span_data
519
+ mock_span.error = {"message": "Some random error occurred"}
520
+
521
+ processor.on_span_start(mock_span)
522
+ processor.on_span_end(mock_span)
523
+
524
+ call_kwargs = mock_client.capture.call_args[1]
525
+ assert call_kwargs["properties"]["$ai_error_type"] == "unknown"
526
+
527
+ def test_response_span_with_output_and_total_tokens(
528
+ self, processor, mock_client, mock_span
529
+ ):
530
+ """Test ResponseSpanData includes output choices and total tokens."""
531
+ # Create a mock response object
532
+ mock_response = MagicMock()
533
+ mock_response.id = "resp_123"
534
+ mock_response.model = "gpt-4o"
535
+ mock_response.output = [{"type": "message", "content": "Hello!"}]
536
+ mock_response.usage = MagicMock()
537
+ mock_response.usage.input_tokens = 25
538
+ mock_response.usage.output_tokens = 10
539
+
540
+ span_data = ResponseSpanData(
541
+ response=mock_response,
542
+ input="Hello, world!",
543
+ )
544
+ mock_span.span_data = span_data
545
+
546
+ processor.on_span_start(mock_span)
547
+ processor.on_span_end(mock_span)
548
+
549
+ call_kwargs = mock_client.capture.call_args[1]
550
+
551
+ assert call_kwargs["event"] == "$ai_generation"
552
+ assert call_kwargs["properties"]["$ai_total_tokens"] == 35
553
+ assert call_kwargs["properties"]["$ai_output_choices"] == [
554
+ {"type": "message", "content": "Hello!"}
555
+ ]
556
+ assert call_kwargs["properties"]["$ai_response_id"] == "resp_123"
557
+
558
+ def test_speech_span_with_pass_through_properties(
559
+ self, processor, mock_client, mock_span
560
+ ):
561
+ """Test SpeechSpanData includes pass-through properties."""
562
+ span_data = SpeechSpanData(
563
+ input="Hello, how can I help you?",
564
+ output="base64_audio_data",
565
+ output_format="pcm",
566
+ model="tts-1",
567
+ model_config={"voice": "alloy", "speed": 1.0},
568
+ first_content_at="2024-01-01T00:00:00.500Z",
569
+ )
570
+ mock_span.span_data = span_data
571
+
572
+ processor.on_span_start(mock_span)
573
+ processor.on_span_end(mock_span)
574
+
575
+ call_kwargs = mock_client.capture.call_args[1]
576
+
577
+ assert call_kwargs["event"] == "$ai_span"
578
+ assert call_kwargs["properties"]["$ai_span_type"] == "speech"
579
+ assert call_kwargs["properties"]["$ai_model"] == "tts-1"
580
+ # Pass-through properties (no $ai_ prefix)
581
+ assert (
582
+ call_kwargs["properties"]["first_content_at"] == "2024-01-01T00:00:00.500Z"
583
+ )
584
+ assert call_kwargs["properties"]["audio_output_format"] == "pcm"
585
+ assert call_kwargs["properties"]["model_config"] == {
586
+ "voice": "alloy",
587
+ "speed": 1.0,
588
+ }
589
+ # Text input should be captured
590
+ assert call_kwargs["properties"]["$ai_input"] == "Hello, how can I help you?"
591
+
592
+ def test_transcription_span_with_pass_through_properties(
593
+ self, processor, mock_client, mock_span
594
+ ):
595
+ """Test TranscriptionSpanData includes pass-through properties."""
596
+ span_data = TranscriptionSpanData(
597
+ input="base64_audio_data",
598
+ input_format="pcm",
599
+ output="This is the transcribed text.",
600
+ model="whisper-1",
601
+ model_config={"language": "en"},
602
+ )
603
+ mock_span.span_data = span_data
604
+
605
+ processor.on_span_start(mock_span)
606
+ processor.on_span_end(mock_span)
607
+
608
+ call_kwargs = mock_client.capture.call_args[1]
609
+
610
+ assert call_kwargs["event"] == "$ai_span"
611
+ assert call_kwargs["properties"]["$ai_span_type"] == "transcription"
612
+ assert call_kwargs["properties"]["$ai_model"] == "whisper-1"
613
+ # Pass-through properties (no $ai_ prefix)
614
+ assert call_kwargs["properties"]["audio_input_format"] == "pcm"
615
+ assert call_kwargs["properties"]["model_config"] == {"language": "en"}
616
+ # Transcription output should be captured
617
+ assert (
618
+ call_kwargs["properties"]["$ai_output_state"]
619
+ == "This is the transcribed text."
620
+ )
621
+
622
+ def test_latency_calculation(self, processor, mock_client, mock_span):
623
+ """Test that latency is calculated correctly."""
624
+ span_data = GenerationSpanData(model="gpt-4o")
625
+ mock_span.span_data = span_data
626
+
627
+ with patch("time.time") as mock_time:
628
+ mock_time.return_value = 1000.0
629
+ processor.on_span_start(mock_span)
630
+
631
+ mock_time.return_value = 1001.5 # 1.5 seconds later
632
+ processor.on_span_end(mock_span)
633
+
634
+ call_kwargs = mock_client.capture.call_args[1]
635
+ assert call_kwargs["properties"]["$ai_latency"] == pytest.approx(1.5, rel=0.01)
636
+
637
+ def test_groups_included_in_events(self, mock_client, mock_trace, mock_span):
638
+ """Test that groups are included in captured events."""
639
+ processor = PostHogTracingProcessor(
640
+ client=mock_client,
641
+ distinct_id="test-user",
642
+ groups={"company": "acme", "team": "engineering"},
643
+ )
644
+
645
+ processor.on_trace_start(mock_trace)
646
+ processor.on_trace_end(mock_trace)
647
+
648
+ call_kwargs = mock_client.capture.call_args[1]
649
+ assert call_kwargs["groups"] == {"company": "acme", "team": "engineering"}
650
+
651
+ def test_additional_properties_included(self, mock_client, mock_trace):
652
+ """Test that additional properties are included in events."""
653
+ processor = PostHogTracingProcessor(
654
+ client=mock_client,
655
+ distinct_id="test-user",
656
+ properties={"environment": "production", "version": "1.0"},
657
+ )
658
+
659
+ processor.on_trace_start(mock_trace)
660
+ processor.on_trace_end(mock_trace)
661
+
662
+ call_kwargs = mock_client.capture.call_args[1]
663
+ assert call_kwargs["properties"]["environment"] == "production"
664
+ assert call_kwargs["properties"]["version"] == "1.0"
665
+
666
+ def test_shutdown_clears_state(self, processor):
667
+ """Test that shutdown clears internal state."""
668
+ processor._span_start_times["span_1"] = 1000.0
669
+ processor._trace_metadata["trace_1"] = {"name": "test"}
670
+
671
+ processor.shutdown()
672
+
673
+ assert len(processor._span_start_times) == 0
674
+ assert len(processor._trace_metadata) == 0
675
+
676
+ def test_force_flush_calls_client_flush(self, processor, mock_client):
677
+ """Test that force_flush calls client.flush()."""
678
+ processor.force_flush()
679
+ mock_client.flush.assert_called_once()
680
+
681
+ def test_generation_span_with_no_usage(self, processor, mock_client, mock_span):
682
+ """Test GenerationSpanData with no usage data defaults to zero tokens."""
683
+ span_data = GenerationSpanData(model="gpt-4o")
684
+ mock_span.span_data = span_data
685
+
686
+ processor.on_span_start(mock_span)
687
+ processor.on_span_end(mock_span)
688
+
689
+ call_kwargs = mock_client.capture.call_args[1]
690
+ assert call_kwargs["properties"]["$ai_input_tokens"] == 0
691
+ assert call_kwargs["properties"]["$ai_output_tokens"] == 0
692
+ assert call_kwargs["properties"]["$ai_total_tokens"] == 0
693
+
694
+ def test_generation_span_with_partial_usage(
695
+ self, processor, mock_client, mock_span
696
+ ):
697
+ """Test GenerationSpanData with only input_tokens present."""
698
+ span_data = GenerationSpanData(
699
+ model="gpt-4o",
700
+ usage={"input_tokens": 42},
701
+ )
702
+ mock_span.span_data = span_data
703
+
704
+ processor.on_span_start(mock_span)
705
+ processor.on_span_end(mock_span)
706
+
707
+ call_kwargs = mock_client.capture.call_args[1]
708
+ assert call_kwargs["properties"]["$ai_input_tokens"] == 42
709
+ assert call_kwargs["properties"]["$ai_output_tokens"] == 0
710
+ assert call_kwargs["properties"]["$ai_total_tokens"] == 42
711
+
712
+ def test_error_type_categorization_by_type_field_only(
713
+ self, processor, mock_client, mock_span
714
+ ):
715
+ """Test error categorization works when only the type field matches."""
716
+ span_data = GenerationSpanData(model="gpt-4o")
717
+ mock_span.span_data = span_data
718
+ mock_span.error = {
719
+ "message": "Something went wrong",
720
+ "type": "ModelBehaviorError",
721
+ }
722
+
723
+ processor.on_span_start(mock_span)
724
+ processor.on_span_end(mock_span)
725
+
726
+ call_kwargs = mock_client.capture.call_args[1]
727
+ assert call_kwargs["properties"]["$ai_error_type"] == "model_behavior_error"
728
+
729
+ def test_distinct_id_resolved_from_trace_for_spans(
730
+ self, mock_client, mock_trace, mock_span
731
+ ):
732
+ """Test that spans use the distinct_id resolved at trace start."""
733
+
734
+ def resolver(trace):
735
+ return f"user-{trace.name}"
736
+
737
+ processor = PostHogTracingProcessor(
738
+ client=mock_client,
739
+ distinct_id=resolver,
740
+ )
741
+
742
+ # Start trace - this resolves and stores distinct_id
743
+ processor.on_trace_start(mock_trace)
744
+ mock_client.capture.reset_mock()
745
+
746
+ # End a span - should use the stored distinct_id from trace
747
+ span_data = GenerationSpanData(model="gpt-4o")
748
+ mock_span.span_data = span_data
749
+
750
+ processor.on_span_start(mock_span)
751
+ processor.on_span_end(mock_span)
752
+
753
+ call_kwargs = mock_client.capture.call_args[1]
754
+ assert call_kwargs["distinct_id"] == "user-Test Workflow"
755
+
756
+ def test_eviction_of_stale_entries(self, mock_client):
757
+ """Test that stale entries are evicted when max is exceeded."""
758
+ processor = PostHogTracingProcessor(
759
+ client=mock_client,
760
+ distinct_id="test-user",
761
+ )
762
+ processor._max_tracked_entries = 10
763
+
764
+ # Fill beyond max
765
+ for i in range(15):
766
+ processor._span_start_times[f"span_{i}"] = float(i)
767
+ processor._trace_metadata[f"trace_{i}"] = {"name": f"trace_{i}"}
768
+
769
+ processor._evict_stale_entries()
770
+
771
+ # Should have evicted half
772
+ assert len(processor._span_start_times) <= 10
773
+ assert len(processor._trace_metadata) <= 10
774
+
775
+
776
+ class TestInstrumentHelper:
777
+ """Tests for the instrument() convenience function."""
778
+
779
+ def test_instrument_registers_processor(self, mock_client):
780
+ """Test that instrument() registers a processor."""
781
+ with patch("agents.tracing.add_trace_processor") as mock_add:
782
+ processor = instrument(
783
+ client=mock_client,
784
+ distinct_id="test-user",
785
+ )
786
+
787
+ mock_add.assert_called_once_with(processor)
788
+ assert isinstance(processor, PostHogTracingProcessor)
789
+
790
+ def test_instrument_with_privacy_mode(self, mock_client):
791
+ """Test instrument() respects privacy_mode."""
792
+ with patch("agents.tracing.add_trace_processor"):
793
+ processor = instrument(
794
+ client=mock_client,
795
+ privacy_mode=True,
796
+ )
797
+
798
+ assert processor._privacy_mode is True
799
+
800
+ def test_instrument_with_groups_and_properties(self, mock_client):
801
+ """Test instrument() accepts groups and properties."""
802
+ with patch("agents.tracing.add_trace_processor"):
803
+ processor = instrument(
804
+ client=mock_client,
805
+ groups={"company": "acme"},
806
+ properties={"env": "test"},
807
+ )
808
+
809
+ assert processor._groups == {"company": "acme"}
810
+ assert processor._properties == {"env": "test"}