posthoganalytics 6.7.0__py3-none-any.whl → 7.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- posthoganalytics/__init__.py +84 -7
- posthoganalytics/ai/anthropic/__init__.py +10 -0
- posthoganalytics/ai/anthropic/anthropic.py +95 -65
- posthoganalytics/ai/anthropic/anthropic_async.py +95 -65
- posthoganalytics/ai/anthropic/anthropic_converter.py +443 -0
- posthoganalytics/ai/gemini/__init__.py +15 -1
- posthoganalytics/ai/gemini/gemini.py +66 -71
- posthoganalytics/ai/gemini/gemini_async.py +423 -0
- posthoganalytics/ai/gemini/gemini_converter.py +652 -0
- posthoganalytics/ai/langchain/callbacks.py +58 -13
- posthoganalytics/ai/openai/__init__.py +16 -1
- posthoganalytics/ai/openai/openai.py +140 -149
- posthoganalytics/ai/openai/openai_async.py +127 -82
- posthoganalytics/ai/openai/openai_converter.py +741 -0
- posthoganalytics/ai/sanitization.py +248 -0
- posthoganalytics/ai/types.py +125 -0
- posthoganalytics/ai/utils.py +339 -356
- posthoganalytics/client.py +345 -97
- posthoganalytics/contexts.py +81 -0
- posthoganalytics/exception_utils.py +250 -2
- posthoganalytics/feature_flags.py +26 -10
- posthoganalytics/flag_definition_cache.py +127 -0
- posthoganalytics/integrations/django.py +157 -19
- posthoganalytics/request.py +203 -23
- posthoganalytics/test/test_client.py +250 -22
- posthoganalytics/test/test_exception_capture.py +418 -0
- posthoganalytics/test/test_feature_flag_result.py +441 -2
- posthoganalytics/test/test_feature_flags.py +308 -104
- posthoganalytics/test/test_flag_definition_cache.py +612 -0
- posthoganalytics/test/test_module.py +0 -8
- posthoganalytics/test/test_request.py +536 -0
- posthoganalytics/test/test_utils.py +4 -1
- posthoganalytics/types.py +40 -0
- posthoganalytics/version.py +1 -1
- {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/METADATA +12 -12
- posthoganalytics-7.4.3.dist-info/RECORD +57 -0
- posthoganalytics-6.7.0.dist-info/RECORD +0 -49
- {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/WHEEL +0 -0
- {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/licenses/LICENSE +0 -0
- {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,741 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OpenAI-specific conversion utilities.
|
|
3
|
+
|
|
4
|
+
This module handles the conversion of OpenAI API responses and inputs
|
|
5
|
+
into standardized formats for PostHog tracking. It supports both
|
|
6
|
+
Chat Completions API and Responses API formats.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Any, Dict, List, Optional
|
|
10
|
+
|
|
11
|
+
from posthoganalytics.ai.types import (
|
|
12
|
+
FormattedContentItem,
|
|
13
|
+
FormattedFunctionCall,
|
|
14
|
+
FormattedImageContent,
|
|
15
|
+
FormattedMessage,
|
|
16
|
+
FormattedTextContent,
|
|
17
|
+
TokenUsage,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def format_openai_response(response: Any) -> List[FormattedMessage]:
|
|
22
|
+
"""
|
|
23
|
+
Format an OpenAI response into standardized message format.
|
|
24
|
+
|
|
25
|
+
Handles both Chat Completions API and Responses API formats.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
response: The response object from OpenAI API
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
List of formatted messages with role and content
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
output: List[FormattedMessage] = []
|
|
35
|
+
|
|
36
|
+
if response is None:
|
|
37
|
+
return output
|
|
38
|
+
|
|
39
|
+
# Handle Chat Completions response format
|
|
40
|
+
if hasattr(response, "choices"):
|
|
41
|
+
content: List[FormattedContentItem] = []
|
|
42
|
+
role = "assistant"
|
|
43
|
+
|
|
44
|
+
for choice in response.choices:
|
|
45
|
+
if hasattr(choice, "message") and choice.message:
|
|
46
|
+
if choice.message.role:
|
|
47
|
+
role = choice.message.role
|
|
48
|
+
|
|
49
|
+
if choice.message.content:
|
|
50
|
+
content.append(
|
|
51
|
+
{
|
|
52
|
+
"type": "text",
|
|
53
|
+
"text": choice.message.content,
|
|
54
|
+
}
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
if hasattr(choice.message, "tool_calls") and choice.message.tool_calls:
|
|
58
|
+
for tool_call in choice.message.tool_calls:
|
|
59
|
+
content.append(
|
|
60
|
+
{
|
|
61
|
+
"type": "function",
|
|
62
|
+
"id": tool_call.id,
|
|
63
|
+
"function": {
|
|
64
|
+
"name": tool_call.function.name,
|
|
65
|
+
"arguments": tool_call.function.arguments,
|
|
66
|
+
},
|
|
67
|
+
}
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# Handle audio output (gpt-4o-audio-preview)
|
|
71
|
+
if hasattr(choice.message, "audio") and choice.message.audio:
|
|
72
|
+
# Convert Pydantic model to dict to capture all fields from OpenAI
|
|
73
|
+
audio_dict = choice.message.audio.model_dump()
|
|
74
|
+
content.append({"type": "audio", **audio_dict})
|
|
75
|
+
|
|
76
|
+
if content:
|
|
77
|
+
output.append(
|
|
78
|
+
{
|
|
79
|
+
"role": role,
|
|
80
|
+
"content": content,
|
|
81
|
+
}
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Handle Responses API format
|
|
85
|
+
if hasattr(response, "output"):
|
|
86
|
+
content = []
|
|
87
|
+
role = "assistant"
|
|
88
|
+
|
|
89
|
+
for item in response.output:
|
|
90
|
+
if item.type == "message":
|
|
91
|
+
role = item.role
|
|
92
|
+
|
|
93
|
+
if hasattr(item, "content") and isinstance(item.content, list):
|
|
94
|
+
for content_item in item.content:
|
|
95
|
+
if (
|
|
96
|
+
hasattr(content_item, "type")
|
|
97
|
+
and content_item.type == "output_text"
|
|
98
|
+
and hasattr(content_item, "text")
|
|
99
|
+
):
|
|
100
|
+
content.append(
|
|
101
|
+
{
|
|
102
|
+
"type": "text",
|
|
103
|
+
"text": content_item.text,
|
|
104
|
+
}
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
elif hasattr(content_item, "text"):
|
|
108
|
+
content.append({"type": "text", "text": content_item.text})
|
|
109
|
+
|
|
110
|
+
elif (
|
|
111
|
+
hasattr(content_item, "type")
|
|
112
|
+
and content_item.type == "input_image"
|
|
113
|
+
and hasattr(content_item, "image_url")
|
|
114
|
+
):
|
|
115
|
+
image_content: FormattedImageContent = {
|
|
116
|
+
"type": "image",
|
|
117
|
+
"image": content_item.image_url,
|
|
118
|
+
}
|
|
119
|
+
content.append(image_content)
|
|
120
|
+
|
|
121
|
+
elif hasattr(item, "content"):
|
|
122
|
+
text_content = {"type": "text", "text": str(item.content)}
|
|
123
|
+
content.append(text_content)
|
|
124
|
+
|
|
125
|
+
elif hasattr(item, "type") and item.type == "function_call":
|
|
126
|
+
content.append(
|
|
127
|
+
{
|
|
128
|
+
"type": "function",
|
|
129
|
+
"id": getattr(item, "call_id", getattr(item, "id", "")),
|
|
130
|
+
"function": {
|
|
131
|
+
"name": item.name,
|
|
132
|
+
"arguments": getattr(item, "arguments", {}),
|
|
133
|
+
},
|
|
134
|
+
}
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
if content:
|
|
138
|
+
output.append(
|
|
139
|
+
{
|
|
140
|
+
"role": role,
|
|
141
|
+
"content": content,
|
|
142
|
+
}
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
return output
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def format_openai_input(
|
|
149
|
+
messages: Optional[List[Dict[str, Any]]] = None, input_data: Optional[Any] = None
|
|
150
|
+
) -> List[FormattedMessage]:
|
|
151
|
+
"""
|
|
152
|
+
Format OpenAI input messages.
|
|
153
|
+
|
|
154
|
+
Handles both messages parameter (Chat Completions) and input parameter (Responses API).
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
messages: List of message dictionaries for Chat Completions API
|
|
158
|
+
input_data: Input data for Responses API
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
List of formatted messages
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
formatted_messages: List[FormattedMessage] = []
|
|
165
|
+
|
|
166
|
+
# Handle Chat Completions API format
|
|
167
|
+
if messages is not None:
|
|
168
|
+
for msg in messages:
|
|
169
|
+
formatted_messages.append(
|
|
170
|
+
{
|
|
171
|
+
"role": msg.get("role", "user"),
|
|
172
|
+
"content": msg.get("content", ""),
|
|
173
|
+
}
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Handle Responses API format
|
|
177
|
+
if input_data is not None:
|
|
178
|
+
if isinstance(input_data, list):
|
|
179
|
+
for item in input_data:
|
|
180
|
+
role = "user"
|
|
181
|
+
content = ""
|
|
182
|
+
|
|
183
|
+
if isinstance(item, dict):
|
|
184
|
+
role = item.get("role", "user")
|
|
185
|
+
content = item.get("content", "")
|
|
186
|
+
|
|
187
|
+
elif isinstance(item, str):
|
|
188
|
+
content = item
|
|
189
|
+
|
|
190
|
+
else:
|
|
191
|
+
content = str(item)
|
|
192
|
+
|
|
193
|
+
formatted_messages.append({"role": role, "content": content})
|
|
194
|
+
|
|
195
|
+
elif isinstance(input_data, str):
|
|
196
|
+
formatted_messages.append({"role": "user", "content": input_data})
|
|
197
|
+
|
|
198
|
+
else:
|
|
199
|
+
formatted_messages.append({"role": "user", "content": str(input_data)})
|
|
200
|
+
|
|
201
|
+
return formatted_messages
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def extract_openai_tools(kwargs: Dict[str, Any]) -> Optional[Any]:
|
|
205
|
+
"""
|
|
206
|
+
Extract tool definitions from OpenAI API kwargs.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
kwargs: Keyword arguments passed to OpenAI API
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
Tool definitions if present, None otherwise
|
|
213
|
+
"""
|
|
214
|
+
|
|
215
|
+
# Check for tools parameter (newer API)
|
|
216
|
+
if "tools" in kwargs:
|
|
217
|
+
return kwargs["tools"]
|
|
218
|
+
|
|
219
|
+
# Check for functions parameter (older API)
|
|
220
|
+
if "functions" in kwargs:
|
|
221
|
+
return kwargs["functions"]
|
|
222
|
+
|
|
223
|
+
return None
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def format_openai_streaming_content(
|
|
227
|
+
accumulated_content: str, tool_calls: Optional[List[Dict[str, Any]]] = None
|
|
228
|
+
) -> List[FormattedContentItem]:
|
|
229
|
+
"""
|
|
230
|
+
Format content from OpenAI streaming response.
|
|
231
|
+
|
|
232
|
+
Used by streaming handlers to format accumulated content.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
accumulated_content: Accumulated text content from streaming
|
|
236
|
+
tool_calls: Optional list of tool calls accumulated during streaming
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
List of formatted content items
|
|
240
|
+
"""
|
|
241
|
+
formatted: List[FormattedContentItem] = []
|
|
242
|
+
|
|
243
|
+
# Add text content if present
|
|
244
|
+
if accumulated_content:
|
|
245
|
+
text_content: FormattedTextContent = {
|
|
246
|
+
"type": "text",
|
|
247
|
+
"text": accumulated_content,
|
|
248
|
+
}
|
|
249
|
+
formatted.append(text_content)
|
|
250
|
+
|
|
251
|
+
# Add tool calls if present
|
|
252
|
+
if tool_calls:
|
|
253
|
+
for tool_call in tool_calls:
|
|
254
|
+
function_call: FormattedFunctionCall = {
|
|
255
|
+
"type": "function",
|
|
256
|
+
"id": tool_call.get("id"),
|
|
257
|
+
"function": tool_call.get("function", {}),
|
|
258
|
+
}
|
|
259
|
+
formatted.append(function_call)
|
|
260
|
+
|
|
261
|
+
return formatted
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def extract_openai_web_search_count(response: Any) -> int:
|
|
265
|
+
"""
|
|
266
|
+
Extract web search count from OpenAI response.
|
|
267
|
+
|
|
268
|
+
Uses a two-tier detection strategy:
|
|
269
|
+
1. Priority 1 (exact count): Check for output[].type == "web_search_call" (Responses API)
|
|
270
|
+
2. Priority 2 (binary detection): Check for various web search indicators:
|
|
271
|
+
- Root-level citations, search_results, or usage.search_context_size (Perplexity)
|
|
272
|
+
- Annotations with type "url_citation" in choices/output (including delta for streaming)
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
response: The response from OpenAI API
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
Number of web search requests (exact count or binary 1/0)
|
|
279
|
+
"""
|
|
280
|
+
|
|
281
|
+
# Priority 1: Check for exact count in Responses API output
|
|
282
|
+
if hasattr(response, "output"):
|
|
283
|
+
web_search_count = 0
|
|
284
|
+
|
|
285
|
+
for item in response.output:
|
|
286
|
+
if hasattr(item, "type") and item.type == "web_search_call":
|
|
287
|
+
web_search_count += 1
|
|
288
|
+
|
|
289
|
+
web_search_count = max(0, web_search_count)
|
|
290
|
+
|
|
291
|
+
if web_search_count > 0:
|
|
292
|
+
return web_search_count
|
|
293
|
+
|
|
294
|
+
# Priority 2: Binary detection (returns 1 or 0)
|
|
295
|
+
|
|
296
|
+
# Check root-level indicators (Perplexity)
|
|
297
|
+
if hasattr(response, "citations"):
|
|
298
|
+
citations = getattr(response, "citations")
|
|
299
|
+
|
|
300
|
+
if citations and len(citations) > 0:
|
|
301
|
+
return 1
|
|
302
|
+
|
|
303
|
+
if hasattr(response, "search_results"):
|
|
304
|
+
search_results = getattr(response, "search_results")
|
|
305
|
+
|
|
306
|
+
if search_results and len(search_results) > 0:
|
|
307
|
+
return 1
|
|
308
|
+
|
|
309
|
+
if hasattr(response, "usage") and hasattr(response.usage, "search_context_size"):
|
|
310
|
+
if response.usage.search_context_size:
|
|
311
|
+
return 1
|
|
312
|
+
|
|
313
|
+
# Check for url_citation annotations in choices (Chat Completions)
|
|
314
|
+
if hasattr(response, "choices"):
|
|
315
|
+
for choice in response.choices:
|
|
316
|
+
# Check message.annotations (non-streaming or final chunk)
|
|
317
|
+
if hasattr(choice, "message") and hasattr(choice.message, "annotations"):
|
|
318
|
+
annotations = choice.message.annotations
|
|
319
|
+
|
|
320
|
+
if annotations:
|
|
321
|
+
for annotation in annotations:
|
|
322
|
+
# Support both dict and object formats
|
|
323
|
+
annotation_type = (
|
|
324
|
+
annotation.get("type")
|
|
325
|
+
if isinstance(annotation, dict)
|
|
326
|
+
else getattr(annotation, "type", None)
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
if annotation_type == "url_citation":
|
|
330
|
+
return 1
|
|
331
|
+
|
|
332
|
+
# Check delta.annotations (streaming chunks)
|
|
333
|
+
if hasattr(choice, "delta") and hasattr(choice.delta, "annotations"):
|
|
334
|
+
annotations = choice.delta.annotations
|
|
335
|
+
|
|
336
|
+
if annotations:
|
|
337
|
+
for annotation in annotations:
|
|
338
|
+
# Support both dict and object formats
|
|
339
|
+
annotation_type = (
|
|
340
|
+
annotation.get("type")
|
|
341
|
+
if isinstance(annotation, dict)
|
|
342
|
+
else getattr(annotation, "type", None)
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
if annotation_type == "url_citation":
|
|
346
|
+
return 1
|
|
347
|
+
|
|
348
|
+
# Check for url_citation annotations in output (Responses API)
|
|
349
|
+
if hasattr(response, "output"):
|
|
350
|
+
for item in response.output:
|
|
351
|
+
if hasattr(item, "content") and isinstance(item.content, list):
|
|
352
|
+
for content_item in item.content:
|
|
353
|
+
if hasattr(content_item, "annotations"):
|
|
354
|
+
annotations = content_item.annotations
|
|
355
|
+
|
|
356
|
+
if annotations:
|
|
357
|
+
for annotation in annotations:
|
|
358
|
+
# Support both dict and object formats
|
|
359
|
+
annotation_type = (
|
|
360
|
+
annotation.get("type")
|
|
361
|
+
if isinstance(annotation, dict)
|
|
362
|
+
else getattr(annotation, "type", None)
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
if annotation_type == "url_citation":
|
|
366
|
+
return 1
|
|
367
|
+
|
|
368
|
+
return 0
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def extract_openai_usage_from_response(response: Any) -> TokenUsage:
|
|
372
|
+
"""
|
|
373
|
+
Extract usage statistics from a full OpenAI response (non-streaming).
|
|
374
|
+
Handles both Chat Completions and Responses API.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
response: The complete response from OpenAI API
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
TokenUsage with standardized usage statistics
|
|
381
|
+
"""
|
|
382
|
+
if not hasattr(response, "usage"):
|
|
383
|
+
return TokenUsage(input_tokens=0, output_tokens=0)
|
|
384
|
+
|
|
385
|
+
cached_tokens = 0
|
|
386
|
+
input_tokens = 0
|
|
387
|
+
output_tokens = 0
|
|
388
|
+
reasoning_tokens = 0
|
|
389
|
+
|
|
390
|
+
# Responses API format
|
|
391
|
+
if hasattr(response.usage, "input_tokens"):
|
|
392
|
+
input_tokens = response.usage.input_tokens
|
|
393
|
+
if hasattr(response.usage, "output_tokens"):
|
|
394
|
+
output_tokens = response.usage.output_tokens
|
|
395
|
+
if hasattr(response.usage, "input_tokens_details") and hasattr(
|
|
396
|
+
response.usage.input_tokens_details, "cached_tokens"
|
|
397
|
+
):
|
|
398
|
+
cached_tokens = response.usage.input_tokens_details.cached_tokens
|
|
399
|
+
if hasattr(response.usage, "output_tokens_details") and hasattr(
|
|
400
|
+
response.usage.output_tokens_details, "reasoning_tokens"
|
|
401
|
+
):
|
|
402
|
+
reasoning_tokens = response.usage.output_tokens_details.reasoning_tokens
|
|
403
|
+
|
|
404
|
+
# Chat Completions format
|
|
405
|
+
if hasattr(response.usage, "prompt_tokens"):
|
|
406
|
+
input_tokens = response.usage.prompt_tokens
|
|
407
|
+
if hasattr(response.usage, "completion_tokens"):
|
|
408
|
+
output_tokens = response.usage.completion_tokens
|
|
409
|
+
if hasattr(response.usage, "prompt_tokens_details") and hasattr(
|
|
410
|
+
response.usage.prompt_tokens_details, "cached_tokens"
|
|
411
|
+
):
|
|
412
|
+
cached_tokens = response.usage.prompt_tokens_details.cached_tokens
|
|
413
|
+
if hasattr(response.usage, "completion_tokens_details") and hasattr(
|
|
414
|
+
response.usage.completion_tokens_details, "reasoning_tokens"
|
|
415
|
+
):
|
|
416
|
+
reasoning_tokens = response.usage.completion_tokens_details.reasoning_tokens
|
|
417
|
+
|
|
418
|
+
result = TokenUsage(
|
|
419
|
+
input_tokens=input_tokens,
|
|
420
|
+
output_tokens=output_tokens,
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
if cached_tokens > 0:
|
|
424
|
+
result["cache_read_input_tokens"] = cached_tokens
|
|
425
|
+
if reasoning_tokens > 0:
|
|
426
|
+
result["reasoning_tokens"] = reasoning_tokens
|
|
427
|
+
|
|
428
|
+
web_search_count = extract_openai_web_search_count(response)
|
|
429
|
+
if web_search_count > 0:
|
|
430
|
+
result["web_search_count"] = web_search_count
|
|
431
|
+
|
|
432
|
+
return result
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def extract_openai_usage_from_chunk(
|
|
436
|
+
chunk: Any, provider_type: str = "chat"
|
|
437
|
+
) -> TokenUsage:
|
|
438
|
+
"""
|
|
439
|
+
Extract usage statistics from an OpenAI streaming chunk.
|
|
440
|
+
|
|
441
|
+
Handles both Chat Completions and Responses API formats.
|
|
442
|
+
|
|
443
|
+
Args:
|
|
444
|
+
chunk: Streaming chunk from OpenAI API
|
|
445
|
+
provider_type: Either "chat" or "responses" to handle different API formats
|
|
446
|
+
|
|
447
|
+
Returns:
|
|
448
|
+
Dictionary of usage statistics
|
|
449
|
+
"""
|
|
450
|
+
|
|
451
|
+
usage: TokenUsage = TokenUsage()
|
|
452
|
+
|
|
453
|
+
if provider_type == "chat":
|
|
454
|
+
# Extract web search count from the chunk before checking for usage
|
|
455
|
+
# Web search indicators (citations, annotations) can appear on any chunk,
|
|
456
|
+
# not just those with usage data
|
|
457
|
+
web_search_count = extract_openai_web_search_count(chunk)
|
|
458
|
+
if web_search_count > 0:
|
|
459
|
+
usage["web_search_count"] = web_search_count
|
|
460
|
+
|
|
461
|
+
if not hasattr(chunk, "usage") or not chunk.usage:
|
|
462
|
+
return usage
|
|
463
|
+
|
|
464
|
+
# Chat Completions API uses prompt_tokens and completion_tokens
|
|
465
|
+
# Standardize to input_tokens and output_tokens
|
|
466
|
+
usage["input_tokens"] = getattr(chunk.usage, "prompt_tokens", 0)
|
|
467
|
+
usage["output_tokens"] = getattr(chunk.usage, "completion_tokens", 0)
|
|
468
|
+
|
|
469
|
+
# Handle cached tokens
|
|
470
|
+
if hasattr(chunk.usage, "prompt_tokens_details") and hasattr(
|
|
471
|
+
chunk.usage.prompt_tokens_details, "cached_tokens"
|
|
472
|
+
):
|
|
473
|
+
usage["cache_read_input_tokens"] = (
|
|
474
|
+
chunk.usage.prompt_tokens_details.cached_tokens
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
# Handle reasoning tokens
|
|
478
|
+
if hasattr(chunk.usage, "completion_tokens_details") and hasattr(
|
|
479
|
+
chunk.usage.completion_tokens_details, "reasoning_tokens"
|
|
480
|
+
):
|
|
481
|
+
usage["reasoning_tokens"] = (
|
|
482
|
+
chunk.usage.completion_tokens_details.reasoning_tokens
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
elif provider_type == "responses":
|
|
486
|
+
# For Responses API, usage is only in chunk.response.usage for completed events
|
|
487
|
+
if hasattr(chunk, "type") and chunk.type == "response.completed":
|
|
488
|
+
if (
|
|
489
|
+
hasattr(chunk, "response")
|
|
490
|
+
and hasattr(chunk.response, "usage")
|
|
491
|
+
and chunk.response.usage
|
|
492
|
+
):
|
|
493
|
+
response_usage = chunk.response.usage
|
|
494
|
+
usage["input_tokens"] = getattr(response_usage, "input_tokens", 0)
|
|
495
|
+
usage["output_tokens"] = getattr(response_usage, "output_tokens", 0)
|
|
496
|
+
|
|
497
|
+
# Handle cached tokens
|
|
498
|
+
if hasattr(response_usage, "input_tokens_details") and hasattr(
|
|
499
|
+
response_usage.input_tokens_details, "cached_tokens"
|
|
500
|
+
):
|
|
501
|
+
usage["cache_read_input_tokens"] = (
|
|
502
|
+
response_usage.input_tokens_details.cached_tokens
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
# Handle reasoning tokens
|
|
506
|
+
if hasattr(response_usage, "output_tokens_details") and hasattr(
|
|
507
|
+
response_usage.output_tokens_details, "reasoning_tokens"
|
|
508
|
+
):
|
|
509
|
+
usage["reasoning_tokens"] = (
|
|
510
|
+
response_usage.output_tokens_details.reasoning_tokens
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
# Extract web search count from the complete response
|
|
514
|
+
if hasattr(chunk, "response"):
|
|
515
|
+
web_search_count = extract_openai_web_search_count(chunk.response)
|
|
516
|
+
if web_search_count > 0:
|
|
517
|
+
usage["web_search_count"] = web_search_count
|
|
518
|
+
|
|
519
|
+
return usage
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def extract_openai_content_from_chunk(
|
|
523
|
+
chunk: Any, provider_type: str = "chat"
|
|
524
|
+
) -> Optional[str]:
|
|
525
|
+
"""
|
|
526
|
+
Extract content from an OpenAI streaming chunk.
|
|
527
|
+
|
|
528
|
+
Handles both Chat Completions and Responses API formats.
|
|
529
|
+
|
|
530
|
+
Args:
|
|
531
|
+
chunk: Streaming chunk from OpenAI API
|
|
532
|
+
provider_type: Either "chat" or "responses" to handle different API formats
|
|
533
|
+
|
|
534
|
+
Returns:
|
|
535
|
+
Text content if present, None otherwise
|
|
536
|
+
"""
|
|
537
|
+
|
|
538
|
+
if provider_type == "chat":
|
|
539
|
+
# Chat Completions API format
|
|
540
|
+
if (
|
|
541
|
+
hasattr(chunk, "choices")
|
|
542
|
+
and chunk.choices
|
|
543
|
+
and len(chunk.choices) > 0
|
|
544
|
+
and chunk.choices[0].delta
|
|
545
|
+
and chunk.choices[0].delta.content
|
|
546
|
+
):
|
|
547
|
+
return chunk.choices[0].delta.content
|
|
548
|
+
|
|
549
|
+
elif provider_type == "responses":
|
|
550
|
+
# Responses API format
|
|
551
|
+
if hasattr(chunk, "type") and chunk.type == "response.completed":
|
|
552
|
+
if hasattr(chunk, "response") and chunk.response:
|
|
553
|
+
res = chunk.response
|
|
554
|
+
if res.output and len(res.output) > 0:
|
|
555
|
+
# Return the full output for responses
|
|
556
|
+
return res.output[0]
|
|
557
|
+
|
|
558
|
+
return None
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
def extract_openai_tool_calls_from_chunk(chunk: Any) -> Optional[List[Dict[str, Any]]]:
|
|
562
|
+
"""
|
|
563
|
+
Extract tool calls from an OpenAI streaming chunk.
|
|
564
|
+
|
|
565
|
+
Args:
|
|
566
|
+
chunk: Streaming chunk from OpenAI API
|
|
567
|
+
|
|
568
|
+
Returns:
|
|
569
|
+
List of tool call deltas if present, None otherwise
|
|
570
|
+
"""
|
|
571
|
+
if (
|
|
572
|
+
hasattr(chunk, "choices")
|
|
573
|
+
and chunk.choices
|
|
574
|
+
and len(chunk.choices) > 0
|
|
575
|
+
and chunk.choices[0].delta
|
|
576
|
+
and hasattr(chunk.choices[0].delta, "tool_calls")
|
|
577
|
+
and chunk.choices[0].delta.tool_calls
|
|
578
|
+
):
|
|
579
|
+
tool_calls = []
|
|
580
|
+
for tool_call in chunk.choices[0].delta.tool_calls:
|
|
581
|
+
tc_dict = {
|
|
582
|
+
"index": getattr(tool_call, "index", None),
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
if hasattr(tool_call, "id") and tool_call.id:
|
|
586
|
+
tc_dict["id"] = tool_call.id
|
|
587
|
+
|
|
588
|
+
if hasattr(tool_call, "type") and tool_call.type:
|
|
589
|
+
tc_dict["type"] = tool_call.type
|
|
590
|
+
|
|
591
|
+
if hasattr(tool_call, "function") and tool_call.function:
|
|
592
|
+
function_dict = {}
|
|
593
|
+
if hasattr(tool_call.function, "name") and tool_call.function.name:
|
|
594
|
+
function_dict["name"] = tool_call.function.name
|
|
595
|
+
if (
|
|
596
|
+
hasattr(tool_call.function, "arguments")
|
|
597
|
+
and tool_call.function.arguments
|
|
598
|
+
):
|
|
599
|
+
function_dict["arguments"] = tool_call.function.arguments
|
|
600
|
+
tc_dict["function"] = function_dict
|
|
601
|
+
|
|
602
|
+
tool_calls.append(tc_dict)
|
|
603
|
+
return tool_calls
|
|
604
|
+
|
|
605
|
+
return None
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
def accumulate_openai_tool_calls(
|
|
609
|
+
accumulated_tool_calls: Dict[int, Dict[str, Any]],
|
|
610
|
+
chunk_tool_calls: List[Dict[str, Any]],
|
|
611
|
+
) -> None:
|
|
612
|
+
"""
|
|
613
|
+
Accumulate tool calls from streaming chunks.
|
|
614
|
+
|
|
615
|
+
OpenAI sends tool calls incrementally:
|
|
616
|
+
- First chunk has id, type, function.name and partial function.arguments
|
|
617
|
+
- Subsequent chunks have more function.arguments
|
|
618
|
+
|
|
619
|
+
Args:
|
|
620
|
+
accumulated_tool_calls: Dictionary mapping index to accumulated tool call data
|
|
621
|
+
chunk_tool_calls: List of tool call deltas from current chunk
|
|
622
|
+
"""
|
|
623
|
+
for tool_call_delta in chunk_tool_calls:
|
|
624
|
+
index = tool_call_delta.get("index")
|
|
625
|
+
if index is None:
|
|
626
|
+
continue
|
|
627
|
+
|
|
628
|
+
# Initialize tool call if first time seeing this index
|
|
629
|
+
if index not in accumulated_tool_calls:
|
|
630
|
+
accumulated_tool_calls[index] = {
|
|
631
|
+
"id": "",
|
|
632
|
+
"type": "function",
|
|
633
|
+
"function": {
|
|
634
|
+
"name": "",
|
|
635
|
+
"arguments": "",
|
|
636
|
+
},
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
# Update with new data from delta
|
|
640
|
+
tc = accumulated_tool_calls[index]
|
|
641
|
+
|
|
642
|
+
if "id" in tool_call_delta and tool_call_delta["id"]:
|
|
643
|
+
tc["id"] = tool_call_delta["id"]
|
|
644
|
+
|
|
645
|
+
if "type" in tool_call_delta and tool_call_delta["type"]:
|
|
646
|
+
tc["type"] = tool_call_delta["type"]
|
|
647
|
+
|
|
648
|
+
if "function" in tool_call_delta:
|
|
649
|
+
func_delta = tool_call_delta["function"]
|
|
650
|
+
if "name" in func_delta and func_delta["name"]:
|
|
651
|
+
tc["function"]["name"] = func_delta["name"]
|
|
652
|
+
if "arguments" in func_delta and func_delta["arguments"]:
|
|
653
|
+
# Arguments are sent incrementally, concatenate them
|
|
654
|
+
tc["function"]["arguments"] += func_delta["arguments"]
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
def format_openai_streaming_output(
|
|
658
|
+
accumulated_content: Any,
|
|
659
|
+
provider_type: str = "chat",
|
|
660
|
+
tool_calls: Optional[List[Dict[str, Any]]] = None,
|
|
661
|
+
) -> List[FormattedMessage]:
|
|
662
|
+
"""
|
|
663
|
+
Format the final output from OpenAI streaming.
|
|
664
|
+
|
|
665
|
+
Args:
|
|
666
|
+
accumulated_content: Accumulated content from streaming (string for chat, list for responses)
|
|
667
|
+
provider_type: Either "chat" or "responses" to handle different API formats
|
|
668
|
+
tool_calls: Optional list of accumulated tool calls
|
|
669
|
+
|
|
670
|
+
Returns:
|
|
671
|
+
List of formatted messages
|
|
672
|
+
"""
|
|
673
|
+
|
|
674
|
+
if provider_type == "chat":
|
|
675
|
+
content_items: List[FormattedContentItem] = []
|
|
676
|
+
|
|
677
|
+
# Add text content if present
|
|
678
|
+
if isinstance(accumulated_content, str) and accumulated_content:
|
|
679
|
+
content_items.append({"type": "text", "text": accumulated_content})
|
|
680
|
+
elif isinstance(accumulated_content, list):
|
|
681
|
+
# If it's a list of strings, join them
|
|
682
|
+
text = "".join(str(item) for item in accumulated_content if item)
|
|
683
|
+
if text:
|
|
684
|
+
content_items.append({"type": "text", "text": text})
|
|
685
|
+
|
|
686
|
+
# Add tool calls if present
|
|
687
|
+
if tool_calls:
|
|
688
|
+
for tool_call in tool_calls:
|
|
689
|
+
if "function" in tool_call:
|
|
690
|
+
function_call: FormattedFunctionCall = {
|
|
691
|
+
"type": "function",
|
|
692
|
+
"id": tool_call.get("id", ""),
|
|
693
|
+
"function": tool_call["function"],
|
|
694
|
+
}
|
|
695
|
+
content_items.append(function_call)
|
|
696
|
+
|
|
697
|
+
# Return formatted message with content
|
|
698
|
+
if content_items:
|
|
699
|
+
return [{"role": "assistant", "content": content_items}]
|
|
700
|
+
else:
|
|
701
|
+
# Empty response
|
|
702
|
+
return [{"role": "assistant", "content": []}]
|
|
703
|
+
|
|
704
|
+
elif provider_type == "responses":
|
|
705
|
+
# Responses API: accumulated_content is a list of output items
|
|
706
|
+
if isinstance(accumulated_content, list) and accumulated_content:
|
|
707
|
+
# The output is already formatted, just return it
|
|
708
|
+
return accumulated_content
|
|
709
|
+
elif isinstance(accumulated_content, str):
|
|
710
|
+
return [
|
|
711
|
+
{
|
|
712
|
+
"role": "assistant",
|
|
713
|
+
"content": [{"type": "text", "text": accumulated_content}],
|
|
714
|
+
}
|
|
715
|
+
]
|
|
716
|
+
|
|
717
|
+
# Fallback for any other format
|
|
718
|
+
return [
|
|
719
|
+
{
|
|
720
|
+
"role": "assistant",
|
|
721
|
+
"content": [{"type": "text", "text": str(accumulated_content)}],
|
|
722
|
+
}
|
|
723
|
+
]
|
|
724
|
+
|
|
725
|
+
|
|
726
|
+
def format_openai_streaming_input(
|
|
727
|
+
kwargs: Dict[str, Any], api_type: str = "chat"
|
|
728
|
+
) -> Any:
|
|
729
|
+
"""
|
|
730
|
+
Format OpenAI streaming input based on API type.
|
|
731
|
+
|
|
732
|
+
Args:
|
|
733
|
+
kwargs: Keyword arguments passed to OpenAI API
|
|
734
|
+
api_type: Either "chat" or "responses"
|
|
735
|
+
|
|
736
|
+
Returns:
|
|
737
|
+
Formatted input ready for PostHog tracking
|
|
738
|
+
"""
|
|
739
|
+
from posthoganalytics.ai.utils import merge_system_prompt
|
|
740
|
+
|
|
741
|
+
return merge_system_prompt(kwargs, "openai")
|