posthog 6.7.0__py3-none-any.whl → 6.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,585 @@
1
+ """
2
+ OpenAI-specific conversion utilities.
3
+
4
+ This module handles the conversion of OpenAI API responses and inputs
5
+ into standardized formats for PostHog tracking. It supports both
6
+ Chat Completions API and Responses API formats.
7
+ """
8
+
9
+ from typing import Any, Dict, List, Optional
10
+
11
+ from posthog.ai.types import (
12
+ FormattedContentItem,
13
+ FormattedFunctionCall,
14
+ FormattedImageContent,
15
+ FormattedMessage,
16
+ FormattedTextContent,
17
+ StreamingUsageStats,
18
+ TokenUsage,
19
+ )
20
+
21
+
22
+ def format_openai_response(response: Any) -> List[FormattedMessage]:
23
+ """
24
+ Format an OpenAI response into standardized message format.
25
+
26
+ Handles both Chat Completions API and Responses API formats.
27
+
28
+ Args:
29
+ response: The response object from OpenAI API
30
+
31
+ Returns:
32
+ List of formatted messages with role and content
33
+ """
34
+
35
+ output: List[FormattedMessage] = []
36
+
37
+ if response is None:
38
+ return output
39
+
40
+ # Handle Chat Completions response format
41
+ if hasattr(response, "choices"):
42
+ content: List[FormattedContentItem] = []
43
+ role = "assistant"
44
+
45
+ for choice in response.choices:
46
+ if hasattr(choice, "message") and choice.message:
47
+ if choice.message.role:
48
+ role = choice.message.role
49
+
50
+ if choice.message.content:
51
+ content.append(
52
+ {
53
+ "type": "text",
54
+ "text": choice.message.content,
55
+ }
56
+ )
57
+
58
+ if hasattr(choice.message, "tool_calls") and choice.message.tool_calls:
59
+ for tool_call in choice.message.tool_calls:
60
+ content.append(
61
+ {
62
+ "type": "function",
63
+ "id": tool_call.id,
64
+ "function": {
65
+ "name": tool_call.function.name,
66
+ "arguments": tool_call.function.arguments,
67
+ },
68
+ }
69
+ )
70
+
71
+ if content:
72
+ output.append(
73
+ {
74
+ "role": role,
75
+ "content": content,
76
+ }
77
+ )
78
+
79
+ # Handle Responses API format
80
+ if hasattr(response, "output"):
81
+ content = []
82
+ role = "assistant"
83
+
84
+ for item in response.output:
85
+ if item.type == "message":
86
+ role = item.role
87
+
88
+ if hasattr(item, "content") and isinstance(item.content, list):
89
+ for content_item in item.content:
90
+ if (
91
+ hasattr(content_item, "type")
92
+ and content_item.type == "output_text"
93
+ and hasattr(content_item, "text")
94
+ ):
95
+ content.append(
96
+ {
97
+ "type": "text",
98
+ "text": content_item.text,
99
+ }
100
+ )
101
+
102
+ elif hasattr(content_item, "text"):
103
+ content.append({"type": "text", "text": content_item.text})
104
+
105
+ elif (
106
+ hasattr(content_item, "type")
107
+ and content_item.type == "input_image"
108
+ and hasattr(content_item, "image_url")
109
+ ):
110
+ image_content: FormattedImageContent = {
111
+ "type": "image",
112
+ "image": content_item.image_url,
113
+ }
114
+ content.append(image_content)
115
+
116
+ elif hasattr(item, "content"):
117
+ text_content = {"type": "text", "text": str(item.content)}
118
+ content.append(text_content)
119
+
120
+ elif hasattr(item, "type") and item.type == "function_call":
121
+ content.append(
122
+ {
123
+ "type": "function",
124
+ "id": getattr(item, "call_id", getattr(item, "id", "")),
125
+ "function": {
126
+ "name": item.name,
127
+ "arguments": getattr(item, "arguments", {}),
128
+ },
129
+ }
130
+ )
131
+
132
+ if content:
133
+ output.append(
134
+ {
135
+ "role": role,
136
+ "content": content,
137
+ }
138
+ )
139
+
140
+ return output
141
+
142
+
143
+ def format_openai_input(
144
+ messages: Optional[List[Dict[str, Any]]] = None, input_data: Optional[Any] = None
145
+ ) -> List[FormattedMessage]:
146
+ """
147
+ Format OpenAI input messages.
148
+
149
+ Handles both messages parameter (Chat Completions) and input parameter (Responses API).
150
+
151
+ Args:
152
+ messages: List of message dictionaries for Chat Completions API
153
+ input_data: Input data for Responses API
154
+
155
+ Returns:
156
+ List of formatted messages
157
+ """
158
+
159
+ formatted_messages: List[FormattedMessage] = []
160
+
161
+ # Handle Chat Completions API format
162
+ if messages is not None:
163
+ for msg in messages:
164
+ formatted_messages.append(
165
+ {
166
+ "role": msg.get("role", "user"),
167
+ "content": msg.get("content", ""),
168
+ }
169
+ )
170
+
171
+ # Handle Responses API format
172
+ if input_data is not None:
173
+ if isinstance(input_data, list):
174
+ for item in input_data:
175
+ role = "user"
176
+ content = ""
177
+
178
+ if isinstance(item, dict):
179
+ role = item.get("role", "user")
180
+ content = item.get("content", "")
181
+
182
+ elif isinstance(item, str):
183
+ content = item
184
+
185
+ else:
186
+ content = str(item)
187
+
188
+ formatted_messages.append({"role": role, "content": content})
189
+
190
+ elif isinstance(input_data, str):
191
+ formatted_messages.append({"role": "user", "content": input_data})
192
+
193
+ else:
194
+ formatted_messages.append({"role": "user", "content": str(input_data)})
195
+
196
+ return formatted_messages
197
+
198
+
199
+ def extract_openai_tools(kwargs: Dict[str, Any]) -> Optional[Any]:
200
+ """
201
+ Extract tool definitions from OpenAI API kwargs.
202
+
203
+ Args:
204
+ kwargs: Keyword arguments passed to OpenAI API
205
+
206
+ Returns:
207
+ Tool definitions if present, None otherwise
208
+ """
209
+
210
+ # Check for tools parameter (newer API)
211
+ if "tools" in kwargs:
212
+ return kwargs["tools"]
213
+
214
+ # Check for functions parameter (older API)
215
+ if "functions" in kwargs:
216
+ return kwargs["functions"]
217
+
218
+ return None
219
+
220
+
221
+ def format_openai_streaming_content(
222
+ accumulated_content: str, tool_calls: Optional[List[Dict[str, Any]]] = None
223
+ ) -> List[FormattedContentItem]:
224
+ """
225
+ Format content from OpenAI streaming response.
226
+
227
+ Used by streaming handlers to format accumulated content.
228
+
229
+ Args:
230
+ accumulated_content: Accumulated text content from streaming
231
+ tool_calls: Optional list of tool calls accumulated during streaming
232
+
233
+ Returns:
234
+ List of formatted content items
235
+ """
236
+ formatted: List[FormattedContentItem] = []
237
+
238
+ # Add text content if present
239
+ if accumulated_content:
240
+ text_content: FormattedTextContent = {
241
+ "type": "text",
242
+ "text": accumulated_content,
243
+ }
244
+ formatted.append(text_content)
245
+
246
+ # Add tool calls if present
247
+ if tool_calls:
248
+ for tool_call in tool_calls:
249
+ function_call: FormattedFunctionCall = {
250
+ "type": "function",
251
+ "id": tool_call.get("id"),
252
+ "function": tool_call.get("function", {}),
253
+ }
254
+ formatted.append(function_call)
255
+
256
+ return formatted
257
+
258
+
259
+ def extract_openai_usage_from_chunk(
260
+ chunk: Any, provider_type: str = "chat"
261
+ ) -> StreamingUsageStats:
262
+ """
263
+ Extract usage statistics from an OpenAI streaming chunk.
264
+
265
+ Handles both Chat Completions and Responses API formats.
266
+
267
+ Args:
268
+ chunk: Streaming chunk from OpenAI API
269
+ provider_type: Either "chat" or "responses" to handle different API formats
270
+
271
+ Returns:
272
+ Dictionary of usage statistics
273
+ """
274
+
275
+ usage: StreamingUsageStats = {}
276
+
277
+ if provider_type == "chat":
278
+ if not hasattr(chunk, "usage") or not chunk.usage:
279
+ return usage
280
+
281
+ # Chat Completions API uses prompt_tokens and completion_tokens
282
+ usage["prompt_tokens"] = getattr(chunk.usage, "prompt_tokens", 0)
283
+ usage["completion_tokens"] = getattr(chunk.usage, "completion_tokens", 0)
284
+ usage["total_tokens"] = getattr(chunk.usage, "total_tokens", 0)
285
+
286
+ # Handle cached tokens
287
+ if hasattr(chunk.usage, "prompt_tokens_details") and hasattr(
288
+ chunk.usage.prompt_tokens_details, "cached_tokens"
289
+ ):
290
+ usage["cache_read_input_tokens"] = (
291
+ chunk.usage.prompt_tokens_details.cached_tokens
292
+ )
293
+
294
+ # Handle reasoning tokens
295
+ if hasattr(chunk.usage, "completion_tokens_details") and hasattr(
296
+ chunk.usage.completion_tokens_details, "reasoning_tokens"
297
+ ):
298
+ usage["reasoning_tokens"] = (
299
+ chunk.usage.completion_tokens_details.reasoning_tokens
300
+ )
301
+
302
+ elif provider_type == "responses":
303
+ # For Responses API, usage is only in chunk.response.usage for completed events
304
+ if hasattr(chunk, "type") and chunk.type == "response.completed":
305
+ if (
306
+ hasattr(chunk, "response")
307
+ and hasattr(chunk.response, "usage")
308
+ and chunk.response.usage
309
+ ):
310
+ response_usage = chunk.response.usage
311
+ usage["input_tokens"] = getattr(response_usage, "input_tokens", 0)
312
+ usage["output_tokens"] = getattr(response_usage, "output_tokens", 0)
313
+ usage["total_tokens"] = getattr(response_usage, "total_tokens", 0)
314
+
315
+ # Handle cached tokens
316
+ if hasattr(response_usage, "input_tokens_details") and hasattr(
317
+ response_usage.input_tokens_details, "cached_tokens"
318
+ ):
319
+ usage["cache_read_input_tokens"] = (
320
+ response_usage.input_tokens_details.cached_tokens
321
+ )
322
+
323
+ # Handle reasoning tokens
324
+ if hasattr(response_usage, "output_tokens_details") and hasattr(
325
+ response_usage.output_tokens_details, "reasoning_tokens"
326
+ ):
327
+ usage["reasoning_tokens"] = (
328
+ response_usage.output_tokens_details.reasoning_tokens
329
+ )
330
+
331
+ return usage
332
+
333
+
334
+ def extract_openai_content_from_chunk(
335
+ chunk: Any, provider_type: str = "chat"
336
+ ) -> Optional[str]:
337
+ """
338
+ Extract content from an OpenAI streaming chunk.
339
+
340
+ Handles both Chat Completions and Responses API formats.
341
+
342
+ Args:
343
+ chunk: Streaming chunk from OpenAI API
344
+ provider_type: Either "chat" or "responses" to handle different API formats
345
+
346
+ Returns:
347
+ Text content if present, None otherwise
348
+ """
349
+
350
+ if provider_type == "chat":
351
+ # Chat Completions API format
352
+ if (
353
+ hasattr(chunk, "choices")
354
+ and chunk.choices
355
+ and len(chunk.choices) > 0
356
+ and chunk.choices[0].delta
357
+ and chunk.choices[0].delta.content
358
+ ):
359
+ return chunk.choices[0].delta.content
360
+
361
+ elif provider_type == "responses":
362
+ # Responses API format
363
+ if hasattr(chunk, "type") and chunk.type == "response.completed":
364
+ if hasattr(chunk, "response") and chunk.response:
365
+ res = chunk.response
366
+ if res.output and len(res.output) > 0:
367
+ # Return the full output for responses
368
+ return res.output[0]
369
+
370
+ return None
371
+
372
+
373
+ def extract_openai_tool_calls_from_chunk(chunk: Any) -> Optional[List[Dict[str, Any]]]:
374
+ """
375
+ Extract tool calls from an OpenAI streaming chunk.
376
+
377
+ Args:
378
+ chunk: Streaming chunk from OpenAI API
379
+
380
+ Returns:
381
+ List of tool call deltas if present, None otherwise
382
+ """
383
+ if (
384
+ hasattr(chunk, "choices")
385
+ and chunk.choices
386
+ and len(chunk.choices) > 0
387
+ and chunk.choices[0].delta
388
+ and hasattr(chunk.choices[0].delta, "tool_calls")
389
+ and chunk.choices[0].delta.tool_calls
390
+ ):
391
+ tool_calls = []
392
+ for tool_call in chunk.choices[0].delta.tool_calls:
393
+ tc_dict = {
394
+ "index": getattr(tool_call, "index", None),
395
+ }
396
+
397
+ if hasattr(tool_call, "id") and tool_call.id:
398
+ tc_dict["id"] = tool_call.id
399
+
400
+ if hasattr(tool_call, "type") and tool_call.type:
401
+ tc_dict["type"] = tool_call.type
402
+
403
+ if hasattr(tool_call, "function") and tool_call.function:
404
+ function_dict = {}
405
+ if hasattr(tool_call.function, "name") and tool_call.function.name:
406
+ function_dict["name"] = tool_call.function.name
407
+ if (
408
+ hasattr(tool_call.function, "arguments")
409
+ and tool_call.function.arguments
410
+ ):
411
+ function_dict["arguments"] = tool_call.function.arguments
412
+ tc_dict["function"] = function_dict
413
+
414
+ tool_calls.append(tc_dict)
415
+ return tool_calls
416
+
417
+ return None
418
+
419
+
420
+ def accumulate_openai_tool_calls(
421
+ accumulated_tool_calls: Dict[int, Dict[str, Any]],
422
+ chunk_tool_calls: List[Dict[str, Any]],
423
+ ) -> None:
424
+ """
425
+ Accumulate tool calls from streaming chunks.
426
+
427
+ OpenAI sends tool calls incrementally:
428
+ - First chunk has id, type, function.name and partial function.arguments
429
+ - Subsequent chunks have more function.arguments
430
+
431
+ Args:
432
+ accumulated_tool_calls: Dictionary mapping index to accumulated tool call data
433
+ chunk_tool_calls: List of tool call deltas from current chunk
434
+ """
435
+ for tool_call_delta in chunk_tool_calls:
436
+ index = tool_call_delta.get("index")
437
+ if index is None:
438
+ continue
439
+
440
+ # Initialize tool call if first time seeing this index
441
+ if index not in accumulated_tool_calls:
442
+ accumulated_tool_calls[index] = {
443
+ "id": "",
444
+ "type": "function",
445
+ "function": {
446
+ "name": "",
447
+ "arguments": "",
448
+ },
449
+ }
450
+
451
+ # Update with new data from delta
452
+ tc = accumulated_tool_calls[index]
453
+
454
+ if "id" in tool_call_delta and tool_call_delta["id"]:
455
+ tc["id"] = tool_call_delta["id"]
456
+
457
+ if "type" in tool_call_delta and tool_call_delta["type"]:
458
+ tc["type"] = tool_call_delta["type"]
459
+
460
+ if "function" in tool_call_delta:
461
+ func_delta = tool_call_delta["function"]
462
+ if "name" in func_delta and func_delta["name"]:
463
+ tc["function"]["name"] = func_delta["name"]
464
+ if "arguments" in func_delta and func_delta["arguments"]:
465
+ # Arguments are sent incrementally, concatenate them
466
+ tc["function"]["arguments"] += func_delta["arguments"]
467
+
468
+
469
+ def format_openai_streaming_output(
470
+ accumulated_content: Any,
471
+ provider_type: str = "chat",
472
+ tool_calls: Optional[List[Dict[str, Any]]] = None,
473
+ ) -> List[FormattedMessage]:
474
+ """
475
+ Format the final output from OpenAI streaming.
476
+
477
+ Args:
478
+ accumulated_content: Accumulated content from streaming (string for chat, list for responses)
479
+ provider_type: Either "chat" or "responses" to handle different API formats
480
+ tool_calls: Optional list of accumulated tool calls
481
+
482
+ Returns:
483
+ List of formatted messages
484
+ """
485
+
486
+ if provider_type == "chat":
487
+ content_items: List[FormattedContentItem] = []
488
+
489
+ # Add text content if present
490
+ if isinstance(accumulated_content, str) and accumulated_content:
491
+ content_items.append({"type": "text", "text": accumulated_content})
492
+ elif isinstance(accumulated_content, list):
493
+ # If it's a list of strings, join them
494
+ text = "".join(str(item) for item in accumulated_content if item)
495
+ if text:
496
+ content_items.append({"type": "text", "text": text})
497
+
498
+ # Add tool calls if present
499
+ if tool_calls:
500
+ for tool_call in tool_calls:
501
+ if "function" in tool_call:
502
+ function_call: FormattedFunctionCall = {
503
+ "type": "function",
504
+ "id": tool_call.get("id", ""),
505
+ "function": tool_call["function"],
506
+ }
507
+ content_items.append(function_call)
508
+
509
+ # Return formatted message with content
510
+ if content_items:
511
+ return [{"role": "assistant", "content": content_items}]
512
+ else:
513
+ # Empty response
514
+ return [{"role": "assistant", "content": []}]
515
+
516
+ elif provider_type == "responses":
517
+ # Responses API: accumulated_content is a list of output items
518
+ if isinstance(accumulated_content, list) and accumulated_content:
519
+ # The output is already formatted, just return it
520
+ return accumulated_content
521
+ elif isinstance(accumulated_content, str):
522
+ return [
523
+ {
524
+ "role": "assistant",
525
+ "content": [{"type": "text", "text": accumulated_content}],
526
+ }
527
+ ]
528
+
529
+ # Fallback for any other format
530
+ return [
531
+ {
532
+ "role": "assistant",
533
+ "content": [{"type": "text", "text": str(accumulated_content)}],
534
+ }
535
+ ]
536
+
537
+
538
+ def standardize_openai_usage(
539
+ usage: Dict[str, Any], api_type: str = "chat"
540
+ ) -> TokenUsage:
541
+ """
542
+ Standardize OpenAI usage statistics to common TokenUsage format.
543
+
544
+ Args:
545
+ usage: Raw usage statistics from OpenAI
546
+ api_type: Either "chat" or "responses" to handle different field names
547
+
548
+ Returns:
549
+ Standardized TokenUsage dict
550
+ """
551
+ if api_type == "chat":
552
+ # Chat API uses prompt_tokens/completion_tokens
553
+ return TokenUsage(
554
+ input_tokens=usage.get("prompt_tokens", 0),
555
+ output_tokens=usage.get("completion_tokens", 0),
556
+ cache_read_input_tokens=usage.get("cache_read_input_tokens"),
557
+ reasoning_tokens=usage.get("reasoning_tokens"),
558
+ )
559
+ else: # responses API
560
+ # Responses API uses input_tokens/output_tokens
561
+ return TokenUsage(
562
+ input_tokens=usage.get("input_tokens", 0),
563
+ output_tokens=usage.get("output_tokens", 0),
564
+ cache_read_input_tokens=usage.get("cache_read_input_tokens"),
565
+ reasoning_tokens=usage.get("reasoning_tokens"),
566
+ )
567
+
568
+
569
+ def format_openai_streaming_input(
570
+ kwargs: Dict[str, Any], api_type: str = "chat"
571
+ ) -> Any:
572
+ """
573
+ Format OpenAI streaming input based on API type.
574
+
575
+ Args:
576
+ kwargs: Keyword arguments passed to OpenAI API
577
+ api_type: Either "chat" or "responses"
578
+
579
+ Returns:
580
+ Formatted input ready for PostHog tracking
581
+ """
582
+ if api_type == "chat":
583
+ return kwargs.get("messages")
584
+ else: # responses API
585
+ return kwargs.get("input")