posthog 6.7.2__py3-none-any.whl → 6.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- posthog/__init__.py +30 -2
- posthog/ai/anthropic/anthropic.py +4 -5
- posthog/ai/anthropic/anthropic_async.py +33 -70
- posthog/ai/anthropic/anthropic_converter.py +73 -23
- posthog/ai/gemini/gemini.py +11 -10
- posthog/ai/gemini/gemini_converter.py +177 -29
- posthog/ai/langchain/callbacks.py +18 -3
- posthog/ai/openai/openai.py +8 -8
- posthog/ai/openai/openai_async.py +36 -15
- posthog/ai/openai/openai_converter.py +192 -42
- posthog/ai/types.py +2 -19
- posthog/ai/utils.py +124 -118
- posthog/client.py +96 -4
- posthog/contexts.py +81 -0
- posthog/exception_utils.py +192 -0
- posthog/feature_flags.py +26 -10
- posthog/integrations/django.py +157 -19
- posthog/test/test_client.py +43 -0
- posthog/test/test_exception_capture.py +300 -0
- posthog/test/test_feature_flags.py +146 -35
- posthog/test/test_module.py +0 -8
- posthog/version.py +1 -1
- {posthog-6.7.2.dist-info → posthog-6.9.0.dist-info}/METADATA +1 -1
- {posthog-6.7.2.dist-info → posthog-6.9.0.dist-info}/RECORD +27 -27
- {posthog-6.7.2.dist-info → posthog-6.9.0.dist-info}/WHEEL +0 -0
- {posthog-6.7.2.dist-info → posthog-6.9.0.dist-info}/licenses/LICENSE +0 -0
- {posthog-6.7.2.dist-info → posthog-6.9.0.dist-info}/top_level.txt +0 -0
|
@@ -20,8 +20,14 @@ from typing import (
|
|
|
20
20
|
)
|
|
21
21
|
from uuid import UUID
|
|
22
22
|
|
|
23
|
-
|
|
24
|
-
|
|
23
|
+
try:
|
|
24
|
+
# LangChain 1.0+ and modern 0.x with langchain-core
|
|
25
|
+
from langchain_core.callbacks.base import BaseCallbackHandler
|
|
26
|
+
from langchain_core.agents import AgentAction, AgentFinish
|
|
27
|
+
except (ImportError, ModuleNotFoundError):
|
|
28
|
+
# Fallback for older LangChain versions
|
|
29
|
+
from langchain.callbacks.base import BaseCallbackHandler
|
|
30
|
+
from langchain.schema.agent import AgentAction, AgentFinish
|
|
25
31
|
from langchain_core.documents import Document
|
|
26
32
|
from langchain_core.messages import (
|
|
27
33
|
AIMessage,
|
|
@@ -486,6 +492,7 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
486
492
|
"$ai_latency": run.latency,
|
|
487
493
|
"$ai_span_name": run.name,
|
|
488
494
|
"$ai_span_id": run_id,
|
|
495
|
+
"$ai_framework": "langchain",
|
|
489
496
|
}
|
|
490
497
|
if parent_run_id is not None:
|
|
491
498
|
event_properties["$ai_parent_id"] = parent_run_id
|
|
@@ -556,6 +563,7 @@ class CallbackHandler(BaseCallbackHandler):
|
|
|
556
563
|
"$ai_http_status": 200,
|
|
557
564
|
"$ai_latency": run.latency,
|
|
558
565
|
"$ai_base_url": run.base_url,
|
|
566
|
+
"$ai_framework": "langchain",
|
|
559
567
|
}
|
|
560
568
|
|
|
561
569
|
if run.tools:
|
|
@@ -750,12 +758,19 @@ def _parse_usage_model(
|
|
|
750
758
|
"cache_read": "cache_read_tokens",
|
|
751
759
|
"reasoning": "reasoning_tokens",
|
|
752
760
|
}
|
|
753
|
-
|
|
761
|
+
normalized_usage = ModelUsage(
|
|
754
762
|
**{
|
|
755
763
|
dataclass_key: parsed_usage.get(mapped_key) or 0
|
|
756
764
|
for mapped_key, dataclass_key in field_mapping.items()
|
|
757
765
|
},
|
|
758
766
|
)
|
|
767
|
+
# In LangChain, input_tokens is the sum of input and cache read tokens.
|
|
768
|
+
# Our cost calculation expects them to be separate, for Anthropic.
|
|
769
|
+
if normalized_usage.input_tokens and normalized_usage.cache_read_tokens:
|
|
770
|
+
normalized_usage.input_tokens = max(
|
|
771
|
+
normalized_usage.input_tokens - normalized_usage.cache_read_tokens, 0
|
|
772
|
+
)
|
|
773
|
+
return normalized_usage
|
|
759
774
|
|
|
760
775
|
|
|
761
776
|
def _parse_usage(response: LLMResult) -> ModelUsage:
|
posthog/ai/openai/openai.py
CHANGED
|
@@ -2,6 +2,8 @@ import time
|
|
|
2
2
|
import uuid
|
|
3
3
|
from typing import Any, Dict, List, Optional
|
|
4
4
|
|
|
5
|
+
from posthog.ai.types import TokenUsage
|
|
6
|
+
|
|
5
7
|
try:
|
|
6
8
|
import openai
|
|
7
9
|
except ImportError:
|
|
@@ -120,7 +122,7 @@ class WrappedResponses:
|
|
|
120
122
|
**kwargs: Any,
|
|
121
123
|
):
|
|
122
124
|
start_time = time.time()
|
|
123
|
-
usage_stats:
|
|
125
|
+
usage_stats: TokenUsage = TokenUsage()
|
|
124
126
|
final_content = []
|
|
125
127
|
response = self._original.create(**kwargs)
|
|
126
128
|
|
|
@@ -171,14 +173,13 @@ class WrappedResponses:
|
|
|
171
173
|
posthog_privacy_mode: bool,
|
|
172
174
|
posthog_groups: Optional[Dict[str, Any]],
|
|
173
175
|
kwargs: Dict[str, Any],
|
|
174
|
-
usage_stats:
|
|
176
|
+
usage_stats: TokenUsage,
|
|
175
177
|
latency: float,
|
|
176
178
|
output: Any,
|
|
177
179
|
available_tool_calls: Optional[List[Dict[str, Any]]] = None,
|
|
178
180
|
):
|
|
179
181
|
from posthog.ai.types import StreamingEventData
|
|
180
182
|
from posthog.ai.openai.openai_converter import (
|
|
181
|
-
standardize_openai_usage,
|
|
182
183
|
format_openai_streaming_input,
|
|
183
184
|
format_openai_streaming_output,
|
|
184
185
|
)
|
|
@@ -195,7 +196,7 @@ class WrappedResponses:
|
|
|
195
196
|
kwargs=kwargs,
|
|
196
197
|
formatted_input=sanitized_input,
|
|
197
198
|
formatted_output=format_openai_streaming_output(output, "responses"),
|
|
198
|
-
usage_stats=
|
|
199
|
+
usage_stats=usage_stats,
|
|
199
200
|
latency=latency,
|
|
200
201
|
distinct_id=posthog_distinct_id,
|
|
201
202
|
trace_id=posthog_trace_id,
|
|
@@ -316,7 +317,7 @@ class WrappedCompletions:
|
|
|
316
317
|
**kwargs: Any,
|
|
317
318
|
):
|
|
318
319
|
start_time = time.time()
|
|
319
|
-
usage_stats:
|
|
320
|
+
usage_stats: TokenUsage = TokenUsage()
|
|
320
321
|
accumulated_content = []
|
|
321
322
|
accumulated_tool_calls: Dict[int, Dict[str, Any]] = {}
|
|
322
323
|
if "stream_options" not in kwargs:
|
|
@@ -387,7 +388,7 @@ class WrappedCompletions:
|
|
|
387
388
|
posthog_privacy_mode: bool,
|
|
388
389
|
posthog_groups: Optional[Dict[str, Any]],
|
|
389
390
|
kwargs: Dict[str, Any],
|
|
390
|
-
usage_stats:
|
|
391
|
+
usage_stats: TokenUsage,
|
|
391
392
|
latency: float,
|
|
392
393
|
output: Any,
|
|
393
394
|
tool_calls: Optional[List[Dict[str, Any]]] = None,
|
|
@@ -395,7 +396,6 @@ class WrappedCompletions:
|
|
|
395
396
|
):
|
|
396
397
|
from posthog.ai.types import StreamingEventData
|
|
397
398
|
from posthog.ai.openai.openai_converter import (
|
|
398
|
-
standardize_openai_usage,
|
|
399
399
|
format_openai_streaming_input,
|
|
400
400
|
format_openai_streaming_output,
|
|
401
401
|
)
|
|
@@ -412,7 +412,7 @@ class WrappedCompletions:
|
|
|
412
412
|
kwargs=kwargs,
|
|
413
413
|
formatted_input=sanitized_input,
|
|
414
414
|
formatted_output=format_openai_streaming_output(output, "chat", tool_calls),
|
|
415
|
-
usage_stats=
|
|
415
|
+
usage_stats=usage_stats,
|
|
416
416
|
latency=latency,
|
|
417
417
|
distinct_id=posthog_distinct_id,
|
|
418
418
|
trace_id=posthog_trace_id,
|
|
@@ -2,6 +2,8 @@ import time
|
|
|
2
2
|
import uuid
|
|
3
3
|
from typing import Any, Dict, List, Optional
|
|
4
4
|
|
|
5
|
+
from posthog.ai.types import TokenUsage
|
|
6
|
+
|
|
5
7
|
try:
|
|
6
8
|
import openai
|
|
7
9
|
except ImportError:
|
|
@@ -124,9 +126,9 @@ class WrappedResponses:
|
|
|
124
126
|
**kwargs: Any,
|
|
125
127
|
):
|
|
126
128
|
start_time = time.time()
|
|
127
|
-
usage_stats:
|
|
129
|
+
usage_stats: TokenUsage = TokenUsage()
|
|
128
130
|
final_content = []
|
|
129
|
-
response = self._original.create(**kwargs)
|
|
131
|
+
response = await self._original.create(**kwargs)
|
|
130
132
|
|
|
131
133
|
async def async_generator():
|
|
132
134
|
nonlocal usage_stats
|
|
@@ -176,7 +178,7 @@ class WrappedResponses:
|
|
|
176
178
|
posthog_privacy_mode: bool,
|
|
177
179
|
posthog_groups: Optional[Dict[str, Any]],
|
|
178
180
|
kwargs: Dict[str, Any],
|
|
179
|
-
usage_stats:
|
|
181
|
+
usage_stats: TokenUsage,
|
|
180
182
|
latency: float,
|
|
181
183
|
output: Any,
|
|
182
184
|
available_tool_calls: Optional[List[Dict[str, Any]]] = None,
|
|
@@ -211,6 +213,15 @@ class WrappedResponses:
|
|
|
211
213
|
**(posthog_properties or {}),
|
|
212
214
|
}
|
|
213
215
|
|
|
216
|
+
# Add web search count if present
|
|
217
|
+
web_search_count = usage_stats.get("web_search_count")
|
|
218
|
+
if (
|
|
219
|
+
web_search_count is not None
|
|
220
|
+
and isinstance(web_search_count, int)
|
|
221
|
+
and web_search_count > 0
|
|
222
|
+
):
|
|
223
|
+
event_properties["$ai_web_search_count"] = web_search_count
|
|
224
|
+
|
|
214
225
|
if available_tool_calls:
|
|
215
226
|
event_properties["$ai_tools"] = available_tool_calls
|
|
216
227
|
|
|
@@ -336,14 +347,14 @@ class WrappedCompletions:
|
|
|
336
347
|
**kwargs: Any,
|
|
337
348
|
):
|
|
338
349
|
start_time = time.time()
|
|
339
|
-
usage_stats:
|
|
350
|
+
usage_stats: TokenUsage = TokenUsage()
|
|
340
351
|
accumulated_content = []
|
|
341
352
|
accumulated_tool_calls: Dict[int, Dict[str, Any]] = {}
|
|
342
353
|
|
|
343
354
|
if "stream_options" not in kwargs:
|
|
344
355
|
kwargs["stream_options"] = {}
|
|
345
356
|
kwargs["stream_options"]["include_usage"] = True
|
|
346
|
-
response = self._original.create(**kwargs)
|
|
357
|
+
response = await self._original.create(**kwargs)
|
|
347
358
|
|
|
348
359
|
async def async_generator():
|
|
349
360
|
nonlocal usage_stats
|
|
@@ -406,7 +417,7 @@ class WrappedCompletions:
|
|
|
406
417
|
posthog_privacy_mode: bool,
|
|
407
418
|
posthog_groups: Optional[Dict[str, Any]],
|
|
408
419
|
kwargs: Dict[str, Any],
|
|
409
|
-
usage_stats:
|
|
420
|
+
usage_stats: TokenUsage,
|
|
410
421
|
latency: float,
|
|
411
422
|
output: Any,
|
|
412
423
|
tool_calls: Optional[List[Dict[str, Any]]] = None,
|
|
@@ -430,8 +441,8 @@ class WrappedCompletions:
|
|
|
430
441
|
format_openai_streaming_output(output, "chat", tool_calls),
|
|
431
442
|
),
|
|
432
443
|
"$ai_http_status": 200,
|
|
433
|
-
"$ai_input_tokens": usage_stats.get("
|
|
434
|
-
"$ai_output_tokens": usage_stats.get("
|
|
444
|
+
"$ai_input_tokens": usage_stats.get("input_tokens", 0),
|
|
445
|
+
"$ai_output_tokens": usage_stats.get("output_tokens", 0),
|
|
435
446
|
"$ai_cache_read_input_tokens": usage_stats.get(
|
|
436
447
|
"cache_read_input_tokens", 0
|
|
437
448
|
),
|
|
@@ -442,6 +453,16 @@ class WrappedCompletions:
|
|
|
442
453
|
**(posthog_properties or {}),
|
|
443
454
|
}
|
|
444
455
|
|
|
456
|
+
# Add web search count if present
|
|
457
|
+
web_search_count = usage_stats.get("web_search_count")
|
|
458
|
+
|
|
459
|
+
if (
|
|
460
|
+
web_search_count is not None
|
|
461
|
+
and isinstance(web_search_count, int)
|
|
462
|
+
and web_search_count > 0
|
|
463
|
+
):
|
|
464
|
+
event_properties["$ai_web_search_count"] = web_search_count
|
|
465
|
+
|
|
445
466
|
if available_tool_calls:
|
|
446
467
|
event_properties["$ai_tools"] = available_tool_calls
|
|
447
468
|
|
|
@@ -497,17 +518,17 @@ class WrappedEmbeddings:
|
|
|
497
518
|
posthog_trace_id = str(uuid.uuid4())
|
|
498
519
|
|
|
499
520
|
start_time = time.time()
|
|
500
|
-
response = self._original.create(**kwargs)
|
|
521
|
+
response = await self._original.create(**kwargs)
|
|
501
522
|
end_time = time.time()
|
|
502
523
|
|
|
503
524
|
# Extract usage statistics if available
|
|
504
|
-
usage_stats =
|
|
525
|
+
usage_stats: TokenUsage = TokenUsage()
|
|
505
526
|
|
|
506
527
|
if hasattr(response, "usage") and response.usage:
|
|
507
|
-
usage_stats =
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
528
|
+
usage_stats = TokenUsage(
|
|
529
|
+
input_tokens=getattr(response.usage, "prompt_tokens", 0),
|
|
530
|
+
output_tokens=getattr(response.usage, "completion_tokens", 0),
|
|
531
|
+
)
|
|
511
532
|
|
|
512
533
|
latency = end_time - start_time
|
|
513
534
|
|
|
@@ -521,7 +542,7 @@ class WrappedEmbeddings:
|
|
|
521
542
|
sanitize_openai_response(kwargs.get("input")),
|
|
522
543
|
),
|
|
523
544
|
"$ai_http_status": 200,
|
|
524
|
-
"$ai_input_tokens": usage_stats.get("
|
|
545
|
+
"$ai_input_tokens": usage_stats.get("input_tokens", 0),
|
|
525
546
|
"$ai_latency": latency,
|
|
526
547
|
"$ai_trace_id": posthog_trace_id,
|
|
527
548
|
"$ai_base_url": str(self._client.base_url),
|
|
@@ -14,7 +14,6 @@ from posthog.ai.types import (
|
|
|
14
14
|
FormattedImageContent,
|
|
15
15
|
FormattedMessage,
|
|
16
16
|
FormattedTextContent,
|
|
17
|
-
StreamingUsageStats,
|
|
18
17
|
TokenUsage,
|
|
19
18
|
)
|
|
20
19
|
|
|
@@ -256,9 +255,180 @@ def format_openai_streaming_content(
|
|
|
256
255
|
return formatted
|
|
257
256
|
|
|
258
257
|
|
|
258
|
+
def extract_openai_web_search_count(response: Any) -> int:
|
|
259
|
+
"""
|
|
260
|
+
Extract web search count from OpenAI response.
|
|
261
|
+
|
|
262
|
+
Uses a two-tier detection strategy:
|
|
263
|
+
1. Priority 1 (exact count): Check for output[].type == "web_search_call" (Responses API)
|
|
264
|
+
2. Priority 2 (binary detection): Check for various web search indicators:
|
|
265
|
+
- Root-level citations, search_results, or usage.search_context_size (Perplexity)
|
|
266
|
+
- Annotations with type "url_citation" in choices/output (including delta for streaming)
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
response: The response from OpenAI API
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
Number of web search requests (exact count or binary 1/0)
|
|
273
|
+
"""
|
|
274
|
+
|
|
275
|
+
# Priority 1: Check for exact count in Responses API output
|
|
276
|
+
if hasattr(response, "output"):
|
|
277
|
+
web_search_count = 0
|
|
278
|
+
|
|
279
|
+
for item in response.output:
|
|
280
|
+
if hasattr(item, "type") and item.type == "web_search_call":
|
|
281
|
+
web_search_count += 1
|
|
282
|
+
|
|
283
|
+
web_search_count = max(0, web_search_count)
|
|
284
|
+
|
|
285
|
+
if web_search_count > 0:
|
|
286
|
+
return web_search_count
|
|
287
|
+
|
|
288
|
+
# Priority 2: Binary detection (returns 1 or 0)
|
|
289
|
+
|
|
290
|
+
# Check root-level indicators (Perplexity)
|
|
291
|
+
if hasattr(response, "citations"):
|
|
292
|
+
citations = getattr(response, "citations")
|
|
293
|
+
|
|
294
|
+
if citations and len(citations) > 0:
|
|
295
|
+
return 1
|
|
296
|
+
|
|
297
|
+
if hasattr(response, "search_results"):
|
|
298
|
+
search_results = getattr(response, "search_results")
|
|
299
|
+
|
|
300
|
+
if search_results and len(search_results) > 0:
|
|
301
|
+
return 1
|
|
302
|
+
|
|
303
|
+
if hasattr(response, "usage") and hasattr(response.usage, "search_context_size"):
|
|
304
|
+
if response.usage.search_context_size:
|
|
305
|
+
return 1
|
|
306
|
+
|
|
307
|
+
# Check for url_citation annotations in choices (Chat Completions)
|
|
308
|
+
if hasattr(response, "choices"):
|
|
309
|
+
for choice in response.choices:
|
|
310
|
+
# Check message.annotations (non-streaming or final chunk)
|
|
311
|
+
if hasattr(choice, "message") and hasattr(choice.message, "annotations"):
|
|
312
|
+
annotations = choice.message.annotations
|
|
313
|
+
|
|
314
|
+
if annotations:
|
|
315
|
+
for annotation in annotations:
|
|
316
|
+
# Support both dict and object formats
|
|
317
|
+
annotation_type = (
|
|
318
|
+
annotation.get("type")
|
|
319
|
+
if isinstance(annotation, dict)
|
|
320
|
+
else getattr(annotation, "type", None)
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
if annotation_type == "url_citation":
|
|
324
|
+
return 1
|
|
325
|
+
|
|
326
|
+
# Check delta.annotations (streaming chunks)
|
|
327
|
+
if hasattr(choice, "delta") and hasattr(choice.delta, "annotations"):
|
|
328
|
+
annotations = choice.delta.annotations
|
|
329
|
+
|
|
330
|
+
if annotations:
|
|
331
|
+
for annotation in annotations:
|
|
332
|
+
# Support both dict and object formats
|
|
333
|
+
annotation_type = (
|
|
334
|
+
annotation.get("type")
|
|
335
|
+
if isinstance(annotation, dict)
|
|
336
|
+
else getattr(annotation, "type", None)
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
if annotation_type == "url_citation":
|
|
340
|
+
return 1
|
|
341
|
+
|
|
342
|
+
# Check for url_citation annotations in output (Responses API)
|
|
343
|
+
if hasattr(response, "output"):
|
|
344
|
+
for item in response.output:
|
|
345
|
+
if hasattr(item, "content") and isinstance(item.content, list):
|
|
346
|
+
for content_item in item.content:
|
|
347
|
+
if hasattr(content_item, "annotations"):
|
|
348
|
+
annotations = content_item.annotations
|
|
349
|
+
|
|
350
|
+
if annotations:
|
|
351
|
+
for annotation in annotations:
|
|
352
|
+
# Support both dict and object formats
|
|
353
|
+
annotation_type = (
|
|
354
|
+
annotation.get("type")
|
|
355
|
+
if isinstance(annotation, dict)
|
|
356
|
+
else getattr(annotation, "type", None)
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
if annotation_type == "url_citation":
|
|
360
|
+
return 1
|
|
361
|
+
|
|
362
|
+
return 0
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def extract_openai_usage_from_response(response: Any) -> TokenUsage:
|
|
366
|
+
"""
|
|
367
|
+
Extract usage statistics from a full OpenAI response (non-streaming).
|
|
368
|
+
Handles both Chat Completions and Responses API.
|
|
369
|
+
|
|
370
|
+
Args:
|
|
371
|
+
response: The complete response from OpenAI API
|
|
372
|
+
|
|
373
|
+
Returns:
|
|
374
|
+
TokenUsage with standardized usage statistics
|
|
375
|
+
"""
|
|
376
|
+
if not hasattr(response, "usage"):
|
|
377
|
+
return TokenUsage(input_tokens=0, output_tokens=0)
|
|
378
|
+
|
|
379
|
+
cached_tokens = 0
|
|
380
|
+
input_tokens = 0
|
|
381
|
+
output_tokens = 0
|
|
382
|
+
reasoning_tokens = 0
|
|
383
|
+
|
|
384
|
+
# Responses API format
|
|
385
|
+
if hasattr(response.usage, "input_tokens"):
|
|
386
|
+
input_tokens = response.usage.input_tokens
|
|
387
|
+
if hasattr(response.usage, "output_tokens"):
|
|
388
|
+
output_tokens = response.usage.output_tokens
|
|
389
|
+
if hasattr(response.usage, "input_tokens_details") and hasattr(
|
|
390
|
+
response.usage.input_tokens_details, "cached_tokens"
|
|
391
|
+
):
|
|
392
|
+
cached_tokens = response.usage.input_tokens_details.cached_tokens
|
|
393
|
+
if hasattr(response.usage, "output_tokens_details") and hasattr(
|
|
394
|
+
response.usage.output_tokens_details, "reasoning_tokens"
|
|
395
|
+
):
|
|
396
|
+
reasoning_tokens = response.usage.output_tokens_details.reasoning_tokens
|
|
397
|
+
|
|
398
|
+
# Chat Completions format
|
|
399
|
+
if hasattr(response.usage, "prompt_tokens"):
|
|
400
|
+
input_tokens = response.usage.prompt_tokens
|
|
401
|
+
if hasattr(response.usage, "completion_tokens"):
|
|
402
|
+
output_tokens = response.usage.completion_tokens
|
|
403
|
+
if hasattr(response.usage, "prompt_tokens_details") and hasattr(
|
|
404
|
+
response.usage.prompt_tokens_details, "cached_tokens"
|
|
405
|
+
):
|
|
406
|
+
cached_tokens = response.usage.prompt_tokens_details.cached_tokens
|
|
407
|
+
if hasattr(response.usage, "completion_tokens_details") and hasattr(
|
|
408
|
+
response.usage.completion_tokens_details, "reasoning_tokens"
|
|
409
|
+
):
|
|
410
|
+
reasoning_tokens = response.usage.completion_tokens_details.reasoning_tokens
|
|
411
|
+
|
|
412
|
+
result = TokenUsage(
|
|
413
|
+
input_tokens=input_tokens,
|
|
414
|
+
output_tokens=output_tokens,
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
if cached_tokens > 0:
|
|
418
|
+
result["cache_read_input_tokens"] = cached_tokens
|
|
419
|
+
if reasoning_tokens > 0:
|
|
420
|
+
result["reasoning_tokens"] = reasoning_tokens
|
|
421
|
+
|
|
422
|
+
web_search_count = extract_openai_web_search_count(response)
|
|
423
|
+
if web_search_count > 0:
|
|
424
|
+
result["web_search_count"] = web_search_count
|
|
425
|
+
|
|
426
|
+
return result
|
|
427
|
+
|
|
428
|
+
|
|
259
429
|
def extract_openai_usage_from_chunk(
|
|
260
430
|
chunk: Any, provider_type: str = "chat"
|
|
261
|
-
) ->
|
|
431
|
+
) -> TokenUsage:
|
|
262
432
|
"""
|
|
263
433
|
Extract usage statistics from an OpenAI streaming chunk.
|
|
264
434
|
|
|
@@ -272,16 +442,23 @@ def extract_openai_usage_from_chunk(
|
|
|
272
442
|
Dictionary of usage statistics
|
|
273
443
|
"""
|
|
274
444
|
|
|
275
|
-
usage:
|
|
445
|
+
usage: TokenUsage = TokenUsage()
|
|
276
446
|
|
|
277
447
|
if provider_type == "chat":
|
|
448
|
+
# Extract web search count from the chunk before checking for usage
|
|
449
|
+
# Web search indicators (citations, annotations) can appear on any chunk,
|
|
450
|
+
# not just those with usage data
|
|
451
|
+
web_search_count = extract_openai_web_search_count(chunk)
|
|
452
|
+
if web_search_count > 0:
|
|
453
|
+
usage["web_search_count"] = web_search_count
|
|
454
|
+
|
|
278
455
|
if not hasattr(chunk, "usage") or not chunk.usage:
|
|
279
456
|
return usage
|
|
280
457
|
|
|
281
458
|
# Chat Completions API uses prompt_tokens and completion_tokens
|
|
282
|
-
|
|
283
|
-
usage["
|
|
284
|
-
usage["
|
|
459
|
+
# Standardize to input_tokens and output_tokens
|
|
460
|
+
usage["input_tokens"] = getattr(chunk.usage, "prompt_tokens", 0)
|
|
461
|
+
usage["output_tokens"] = getattr(chunk.usage, "completion_tokens", 0)
|
|
285
462
|
|
|
286
463
|
# Handle cached tokens
|
|
287
464
|
if hasattr(chunk.usage, "prompt_tokens_details") and hasattr(
|
|
@@ -310,7 +487,6 @@ def extract_openai_usage_from_chunk(
|
|
|
310
487
|
response_usage = chunk.response.usage
|
|
311
488
|
usage["input_tokens"] = getattr(response_usage, "input_tokens", 0)
|
|
312
489
|
usage["output_tokens"] = getattr(response_usage, "output_tokens", 0)
|
|
313
|
-
usage["total_tokens"] = getattr(response_usage, "total_tokens", 0)
|
|
314
490
|
|
|
315
491
|
# Handle cached tokens
|
|
316
492
|
if hasattr(response_usage, "input_tokens_details") and hasattr(
|
|
@@ -328,6 +504,12 @@ def extract_openai_usage_from_chunk(
|
|
|
328
504
|
response_usage.output_tokens_details.reasoning_tokens
|
|
329
505
|
)
|
|
330
506
|
|
|
507
|
+
# Extract web search count from the complete response
|
|
508
|
+
if hasattr(chunk, "response"):
|
|
509
|
+
web_search_count = extract_openai_web_search_count(chunk.response)
|
|
510
|
+
if web_search_count > 0:
|
|
511
|
+
usage["web_search_count"] = web_search_count
|
|
512
|
+
|
|
331
513
|
return usage
|
|
332
514
|
|
|
333
515
|
|
|
@@ -535,37 +717,6 @@ def format_openai_streaming_output(
|
|
|
535
717
|
]
|
|
536
718
|
|
|
537
719
|
|
|
538
|
-
def standardize_openai_usage(
|
|
539
|
-
usage: Dict[str, Any], api_type: str = "chat"
|
|
540
|
-
) -> TokenUsage:
|
|
541
|
-
"""
|
|
542
|
-
Standardize OpenAI usage statistics to common TokenUsage format.
|
|
543
|
-
|
|
544
|
-
Args:
|
|
545
|
-
usage: Raw usage statistics from OpenAI
|
|
546
|
-
api_type: Either "chat" or "responses" to handle different field names
|
|
547
|
-
|
|
548
|
-
Returns:
|
|
549
|
-
Standardized TokenUsage dict
|
|
550
|
-
"""
|
|
551
|
-
if api_type == "chat":
|
|
552
|
-
# Chat API uses prompt_tokens/completion_tokens
|
|
553
|
-
return TokenUsage(
|
|
554
|
-
input_tokens=usage.get("prompt_tokens", 0),
|
|
555
|
-
output_tokens=usage.get("completion_tokens", 0),
|
|
556
|
-
cache_read_input_tokens=usage.get("cache_read_input_tokens"),
|
|
557
|
-
reasoning_tokens=usage.get("reasoning_tokens"),
|
|
558
|
-
)
|
|
559
|
-
else: # responses API
|
|
560
|
-
# Responses API uses input_tokens/output_tokens
|
|
561
|
-
return TokenUsage(
|
|
562
|
-
input_tokens=usage.get("input_tokens", 0),
|
|
563
|
-
output_tokens=usage.get("output_tokens", 0),
|
|
564
|
-
cache_read_input_tokens=usage.get("cache_read_input_tokens"),
|
|
565
|
-
reasoning_tokens=usage.get("reasoning_tokens"),
|
|
566
|
-
)
|
|
567
|
-
|
|
568
|
-
|
|
569
720
|
def format_openai_streaming_input(
|
|
570
721
|
kwargs: Dict[str, Any], api_type: str = "chat"
|
|
571
722
|
) -> Any:
|
|
@@ -579,7 +730,6 @@ def format_openai_streaming_input(
|
|
|
579
730
|
Returns:
|
|
580
731
|
Formatted input ready for PostHog tracking
|
|
581
732
|
"""
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
return kwargs.get("input")
|
|
733
|
+
from posthog.ai.utils import merge_system_prompt
|
|
734
|
+
|
|
735
|
+
return merge_system_prompt(kwargs, "openai")
|
posthog/ai/types.py
CHANGED
|
@@ -63,6 +63,7 @@ class TokenUsage(TypedDict, total=False):
|
|
|
63
63
|
cache_read_input_tokens: Optional[int]
|
|
64
64
|
cache_creation_input_tokens: Optional[int]
|
|
65
65
|
reasoning_tokens: Optional[int]
|
|
66
|
+
web_search_count: Optional[int]
|
|
66
67
|
|
|
67
68
|
|
|
68
69
|
class ProviderResponse(TypedDict, total=False):
|
|
@@ -77,24 +78,6 @@ class ProviderResponse(TypedDict, total=False):
|
|
|
77
78
|
error: Optional[str]
|
|
78
79
|
|
|
79
80
|
|
|
80
|
-
class StreamingUsageStats(TypedDict, total=False):
|
|
81
|
-
"""
|
|
82
|
-
Usage statistics collected during streaming.
|
|
83
|
-
|
|
84
|
-
Different providers populate different fields during streaming.
|
|
85
|
-
"""
|
|
86
|
-
|
|
87
|
-
input_tokens: int
|
|
88
|
-
output_tokens: int
|
|
89
|
-
cache_read_input_tokens: Optional[int]
|
|
90
|
-
cache_creation_input_tokens: Optional[int]
|
|
91
|
-
reasoning_tokens: Optional[int]
|
|
92
|
-
# OpenAI-specific names
|
|
93
|
-
prompt_tokens: Optional[int]
|
|
94
|
-
completion_tokens: Optional[int]
|
|
95
|
-
total_tokens: Optional[int]
|
|
96
|
-
|
|
97
|
-
|
|
98
81
|
class StreamingContentBlock(TypedDict, total=False):
|
|
99
82
|
"""
|
|
100
83
|
Content block used during streaming to accumulate content.
|
|
@@ -133,7 +116,7 @@ class StreamingEventData(TypedDict):
|
|
|
133
116
|
kwargs: Dict[str, Any] # Original kwargs for tool extraction and special handling
|
|
134
117
|
formatted_input: Any # Provider-formatted input ready for tracking
|
|
135
118
|
formatted_output: Any # Provider-formatted output ready for tracking
|
|
136
|
-
usage_stats: TokenUsage
|
|
119
|
+
usage_stats: TokenUsage
|
|
137
120
|
latency: float
|
|
138
121
|
distinct_id: Optional[str]
|
|
139
122
|
trace_id: Optional[str]
|