posthoganalytics 6.7.1__py3-none-any.whl → 6.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,612 @@
1
+ """
2
+ OpenAI-specific conversion utilities.
3
+
4
+ This module handles the conversion of OpenAI API responses and inputs
5
+ into standardized formats for PostHog tracking. It supports both
6
+ Chat Completions API and Responses API formats.
7
+ """
8
+
9
+ from typing import Any, Dict, List, Optional
10
+
11
+ from posthoganalytics.ai.types import (
12
+ FormattedContentItem,
13
+ FormattedFunctionCall,
14
+ FormattedImageContent,
15
+ FormattedMessage,
16
+ FormattedTextContent,
17
+ TokenUsage,
18
+ )
19
+
20
+
21
+ def format_openai_response(response: Any) -> List[FormattedMessage]:
22
+ """
23
+ Format an OpenAI response into standardized message format.
24
+
25
+ Handles both Chat Completions API and Responses API formats.
26
+
27
+ Args:
28
+ response: The response object from OpenAI API
29
+
30
+ Returns:
31
+ List of formatted messages with role and content
32
+ """
33
+
34
+ output: List[FormattedMessage] = []
35
+
36
+ if response is None:
37
+ return output
38
+
39
+ # Handle Chat Completions response format
40
+ if hasattr(response, "choices"):
41
+ content: List[FormattedContentItem] = []
42
+ role = "assistant"
43
+
44
+ for choice in response.choices:
45
+ if hasattr(choice, "message") and choice.message:
46
+ if choice.message.role:
47
+ role = choice.message.role
48
+
49
+ if choice.message.content:
50
+ content.append(
51
+ {
52
+ "type": "text",
53
+ "text": choice.message.content,
54
+ }
55
+ )
56
+
57
+ if hasattr(choice.message, "tool_calls") and choice.message.tool_calls:
58
+ for tool_call in choice.message.tool_calls:
59
+ content.append(
60
+ {
61
+ "type": "function",
62
+ "id": tool_call.id,
63
+ "function": {
64
+ "name": tool_call.function.name,
65
+ "arguments": tool_call.function.arguments,
66
+ },
67
+ }
68
+ )
69
+
70
+ if content:
71
+ output.append(
72
+ {
73
+ "role": role,
74
+ "content": content,
75
+ }
76
+ )
77
+
78
+ # Handle Responses API format
79
+ if hasattr(response, "output"):
80
+ content = []
81
+ role = "assistant"
82
+
83
+ for item in response.output:
84
+ if item.type == "message":
85
+ role = item.role
86
+
87
+ if hasattr(item, "content") and isinstance(item.content, list):
88
+ for content_item in item.content:
89
+ if (
90
+ hasattr(content_item, "type")
91
+ and content_item.type == "output_text"
92
+ and hasattr(content_item, "text")
93
+ ):
94
+ content.append(
95
+ {
96
+ "type": "text",
97
+ "text": content_item.text,
98
+ }
99
+ )
100
+
101
+ elif hasattr(content_item, "text"):
102
+ content.append({"type": "text", "text": content_item.text})
103
+
104
+ elif (
105
+ hasattr(content_item, "type")
106
+ and content_item.type == "input_image"
107
+ and hasattr(content_item, "image_url")
108
+ ):
109
+ image_content: FormattedImageContent = {
110
+ "type": "image",
111
+ "image": content_item.image_url,
112
+ }
113
+ content.append(image_content)
114
+
115
+ elif hasattr(item, "content"):
116
+ text_content = {"type": "text", "text": str(item.content)}
117
+ content.append(text_content)
118
+
119
+ elif hasattr(item, "type") and item.type == "function_call":
120
+ content.append(
121
+ {
122
+ "type": "function",
123
+ "id": getattr(item, "call_id", getattr(item, "id", "")),
124
+ "function": {
125
+ "name": item.name,
126
+ "arguments": getattr(item, "arguments", {}),
127
+ },
128
+ }
129
+ )
130
+
131
+ if content:
132
+ output.append(
133
+ {
134
+ "role": role,
135
+ "content": content,
136
+ }
137
+ )
138
+
139
+ return output
140
+
141
+
142
+ def format_openai_input(
143
+ messages: Optional[List[Dict[str, Any]]] = None, input_data: Optional[Any] = None
144
+ ) -> List[FormattedMessage]:
145
+ """
146
+ Format OpenAI input messages.
147
+
148
+ Handles both messages parameter (Chat Completions) and input parameter (Responses API).
149
+
150
+ Args:
151
+ messages: List of message dictionaries for Chat Completions API
152
+ input_data: Input data for Responses API
153
+
154
+ Returns:
155
+ List of formatted messages
156
+ """
157
+
158
+ formatted_messages: List[FormattedMessage] = []
159
+
160
+ # Handle Chat Completions API format
161
+ if messages is not None:
162
+ for msg in messages:
163
+ formatted_messages.append(
164
+ {
165
+ "role": msg.get("role", "user"),
166
+ "content": msg.get("content", ""),
167
+ }
168
+ )
169
+
170
+ # Handle Responses API format
171
+ if input_data is not None:
172
+ if isinstance(input_data, list):
173
+ for item in input_data:
174
+ role = "user"
175
+ content = ""
176
+
177
+ if isinstance(item, dict):
178
+ role = item.get("role", "user")
179
+ content = item.get("content", "")
180
+
181
+ elif isinstance(item, str):
182
+ content = item
183
+
184
+ else:
185
+ content = str(item)
186
+
187
+ formatted_messages.append({"role": role, "content": content})
188
+
189
+ elif isinstance(input_data, str):
190
+ formatted_messages.append({"role": "user", "content": input_data})
191
+
192
+ else:
193
+ formatted_messages.append({"role": "user", "content": str(input_data)})
194
+
195
+ return formatted_messages
196
+
197
+
198
+ def extract_openai_tools(kwargs: Dict[str, Any]) -> Optional[Any]:
199
+ """
200
+ Extract tool definitions from OpenAI API kwargs.
201
+
202
+ Args:
203
+ kwargs: Keyword arguments passed to OpenAI API
204
+
205
+ Returns:
206
+ Tool definitions if present, None otherwise
207
+ """
208
+
209
+ # Check for tools parameter (newer API)
210
+ if "tools" in kwargs:
211
+ return kwargs["tools"]
212
+
213
+ # Check for functions parameter (older API)
214
+ if "functions" in kwargs:
215
+ return kwargs["functions"]
216
+
217
+ return None
218
+
219
+
220
+ def format_openai_streaming_content(
221
+ accumulated_content: str, tool_calls: Optional[List[Dict[str, Any]]] = None
222
+ ) -> List[FormattedContentItem]:
223
+ """
224
+ Format content from OpenAI streaming response.
225
+
226
+ Used by streaming handlers to format accumulated content.
227
+
228
+ Args:
229
+ accumulated_content: Accumulated text content from streaming
230
+ tool_calls: Optional list of tool calls accumulated during streaming
231
+
232
+ Returns:
233
+ List of formatted content items
234
+ """
235
+ formatted: List[FormattedContentItem] = []
236
+
237
+ # Add text content if present
238
+ if accumulated_content:
239
+ text_content: FormattedTextContent = {
240
+ "type": "text",
241
+ "text": accumulated_content,
242
+ }
243
+ formatted.append(text_content)
244
+
245
+ # Add tool calls if present
246
+ if tool_calls:
247
+ for tool_call in tool_calls:
248
+ function_call: FormattedFunctionCall = {
249
+ "type": "function",
250
+ "id": tool_call.get("id"),
251
+ "function": tool_call.get("function", {}),
252
+ }
253
+ formatted.append(function_call)
254
+
255
+ return formatted
256
+
257
+
258
+ def extract_openai_usage_from_response(response: Any) -> TokenUsage:
259
+ """
260
+ Extract usage statistics from a full OpenAI response (non-streaming).
261
+ Handles both Chat Completions and Responses API.
262
+
263
+ Args:
264
+ response: The complete response from OpenAI API
265
+
266
+ Returns:
267
+ TokenUsage with standardized usage statistics
268
+ """
269
+ if not hasattr(response, "usage"):
270
+ return TokenUsage(input_tokens=0, output_tokens=0)
271
+
272
+ cached_tokens = 0
273
+ input_tokens = 0
274
+ output_tokens = 0
275
+ reasoning_tokens = 0
276
+
277
+ # Responses API format
278
+ if hasattr(response.usage, "input_tokens"):
279
+ input_tokens = response.usage.input_tokens
280
+ if hasattr(response.usage, "output_tokens"):
281
+ output_tokens = response.usage.output_tokens
282
+ if hasattr(response.usage, "input_tokens_details") and hasattr(
283
+ response.usage.input_tokens_details, "cached_tokens"
284
+ ):
285
+ cached_tokens = response.usage.input_tokens_details.cached_tokens
286
+ if hasattr(response.usage, "output_tokens_details") and hasattr(
287
+ response.usage.output_tokens_details, "reasoning_tokens"
288
+ ):
289
+ reasoning_tokens = response.usage.output_tokens_details.reasoning_tokens
290
+
291
+ # Chat Completions format
292
+ if hasattr(response.usage, "prompt_tokens"):
293
+ input_tokens = response.usage.prompt_tokens
294
+ if hasattr(response.usage, "completion_tokens"):
295
+ output_tokens = response.usage.completion_tokens
296
+ if hasattr(response.usage, "prompt_tokens_details") and hasattr(
297
+ response.usage.prompt_tokens_details, "cached_tokens"
298
+ ):
299
+ cached_tokens = response.usage.prompt_tokens_details.cached_tokens
300
+ if hasattr(response.usage, "completion_tokens_details") and hasattr(
301
+ response.usage.completion_tokens_details, "reasoning_tokens"
302
+ ):
303
+ reasoning_tokens = response.usage.completion_tokens_details.reasoning_tokens
304
+
305
+ result = TokenUsage(
306
+ input_tokens=input_tokens,
307
+ output_tokens=output_tokens,
308
+ )
309
+
310
+ if cached_tokens > 0:
311
+ result["cache_read_input_tokens"] = cached_tokens
312
+ if reasoning_tokens > 0:
313
+ result["reasoning_tokens"] = reasoning_tokens
314
+
315
+ return result
316
+
317
+
318
+ def extract_openai_usage_from_chunk(
319
+ chunk: Any, provider_type: str = "chat"
320
+ ) -> TokenUsage:
321
+ """
322
+ Extract usage statistics from an OpenAI streaming chunk.
323
+
324
+ Handles both Chat Completions and Responses API formats.
325
+
326
+ Args:
327
+ chunk: Streaming chunk from OpenAI API
328
+ provider_type: Either "chat" or "responses" to handle different API formats
329
+
330
+ Returns:
331
+ Dictionary of usage statistics
332
+ """
333
+
334
+ usage: TokenUsage = TokenUsage()
335
+
336
+ if provider_type == "chat":
337
+ if not hasattr(chunk, "usage") or not chunk.usage:
338
+ return usage
339
+
340
+ # Chat Completions API uses prompt_tokens and completion_tokens
341
+ # Standardize to input_tokens and output_tokens
342
+ usage["input_tokens"] = getattr(chunk.usage, "prompt_tokens", 0)
343
+ usage["output_tokens"] = getattr(chunk.usage, "completion_tokens", 0)
344
+
345
+ # Handle cached tokens
346
+ if hasattr(chunk.usage, "prompt_tokens_details") and hasattr(
347
+ chunk.usage.prompt_tokens_details, "cached_tokens"
348
+ ):
349
+ usage["cache_read_input_tokens"] = (
350
+ chunk.usage.prompt_tokens_details.cached_tokens
351
+ )
352
+
353
+ # Handle reasoning tokens
354
+ if hasattr(chunk.usage, "completion_tokens_details") and hasattr(
355
+ chunk.usage.completion_tokens_details, "reasoning_tokens"
356
+ ):
357
+ usage["reasoning_tokens"] = (
358
+ chunk.usage.completion_tokens_details.reasoning_tokens
359
+ )
360
+
361
+ elif provider_type == "responses":
362
+ # For Responses API, usage is only in chunk.response.usage for completed events
363
+ if hasattr(chunk, "type") and chunk.type == "response.completed":
364
+ if (
365
+ hasattr(chunk, "response")
366
+ and hasattr(chunk.response, "usage")
367
+ and chunk.response.usage
368
+ ):
369
+ response_usage = chunk.response.usage
370
+ usage["input_tokens"] = getattr(response_usage, "input_tokens", 0)
371
+ usage["output_tokens"] = getattr(response_usage, "output_tokens", 0)
372
+
373
+ # Handle cached tokens
374
+ if hasattr(response_usage, "input_tokens_details") and hasattr(
375
+ response_usage.input_tokens_details, "cached_tokens"
376
+ ):
377
+ usage["cache_read_input_tokens"] = (
378
+ response_usage.input_tokens_details.cached_tokens
379
+ )
380
+
381
+ # Handle reasoning tokens
382
+ if hasattr(response_usage, "output_tokens_details") and hasattr(
383
+ response_usage.output_tokens_details, "reasoning_tokens"
384
+ ):
385
+ usage["reasoning_tokens"] = (
386
+ response_usage.output_tokens_details.reasoning_tokens
387
+ )
388
+
389
+ return usage
390
+
391
+
392
+ def extract_openai_content_from_chunk(
393
+ chunk: Any, provider_type: str = "chat"
394
+ ) -> Optional[str]:
395
+ """
396
+ Extract content from an OpenAI streaming chunk.
397
+
398
+ Handles both Chat Completions and Responses API formats.
399
+
400
+ Args:
401
+ chunk: Streaming chunk from OpenAI API
402
+ provider_type: Either "chat" or "responses" to handle different API formats
403
+
404
+ Returns:
405
+ Text content if present, None otherwise
406
+ """
407
+
408
+ if provider_type == "chat":
409
+ # Chat Completions API format
410
+ if (
411
+ hasattr(chunk, "choices")
412
+ and chunk.choices
413
+ and len(chunk.choices) > 0
414
+ and chunk.choices[0].delta
415
+ and chunk.choices[0].delta.content
416
+ ):
417
+ return chunk.choices[0].delta.content
418
+
419
+ elif provider_type == "responses":
420
+ # Responses API format
421
+ if hasattr(chunk, "type") and chunk.type == "response.completed":
422
+ if hasattr(chunk, "response") and chunk.response:
423
+ res = chunk.response
424
+ if res.output and len(res.output) > 0:
425
+ # Return the full output for responses
426
+ return res.output[0]
427
+
428
+ return None
429
+
430
+
431
+ def extract_openai_tool_calls_from_chunk(chunk: Any) -> Optional[List[Dict[str, Any]]]:
432
+ """
433
+ Extract tool calls from an OpenAI streaming chunk.
434
+
435
+ Args:
436
+ chunk: Streaming chunk from OpenAI API
437
+
438
+ Returns:
439
+ List of tool call deltas if present, None otherwise
440
+ """
441
+ if (
442
+ hasattr(chunk, "choices")
443
+ and chunk.choices
444
+ and len(chunk.choices) > 0
445
+ and chunk.choices[0].delta
446
+ and hasattr(chunk.choices[0].delta, "tool_calls")
447
+ and chunk.choices[0].delta.tool_calls
448
+ ):
449
+ tool_calls = []
450
+ for tool_call in chunk.choices[0].delta.tool_calls:
451
+ tc_dict = {
452
+ "index": getattr(tool_call, "index", None),
453
+ }
454
+
455
+ if hasattr(tool_call, "id") and tool_call.id:
456
+ tc_dict["id"] = tool_call.id
457
+
458
+ if hasattr(tool_call, "type") and tool_call.type:
459
+ tc_dict["type"] = tool_call.type
460
+
461
+ if hasattr(tool_call, "function") and tool_call.function:
462
+ function_dict = {}
463
+ if hasattr(tool_call.function, "name") and tool_call.function.name:
464
+ function_dict["name"] = tool_call.function.name
465
+ if (
466
+ hasattr(tool_call.function, "arguments")
467
+ and tool_call.function.arguments
468
+ ):
469
+ function_dict["arguments"] = tool_call.function.arguments
470
+ tc_dict["function"] = function_dict
471
+
472
+ tool_calls.append(tc_dict)
473
+ return tool_calls
474
+
475
+ return None
476
+
477
+
478
+ def accumulate_openai_tool_calls(
479
+ accumulated_tool_calls: Dict[int, Dict[str, Any]],
480
+ chunk_tool_calls: List[Dict[str, Any]],
481
+ ) -> None:
482
+ """
483
+ Accumulate tool calls from streaming chunks.
484
+
485
+ OpenAI sends tool calls incrementally:
486
+ - First chunk has id, type, function.name and partial function.arguments
487
+ - Subsequent chunks have more function.arguments
488
+
489
+ Args:
490
+ accumulated_tool_calls: Dictionary mapping index to accumulated tool call data
491
+ chunk_tool_calls: List of tool call deltas from current chunk
492
+ """
493
+ for tool_call_delta in chunk_tool_calls:
494
+ index = tool_call_delta.get("index")
495
+ if index is None:
496
+ continue
497
+
498
+ # Initialize tool call if first time seeing this index
499
+ if index not in accumulated_tool_calls:
500
+ accumulated_tool_calls[index] = {
501
+ "id": "",
502
+ "type": "function",
503
+ "function": {
504
+ "name": "",
505
+ "arguments": "",
506
+ },
507
+ }
508
+
509
+ # Update with new data from delta
510
+ tc = accumulated_tool_calls[index]
511
+
512
+ if "id" in tool_call_delta and tool_call_delta["id"]:
513
+ tc["id"] = tool_call_delta["id"]
514
+
515
+ if "type" in tool_call_delta and tool_call_delta["type"]:
516
+ tc["type"] = tool_call_delta["type"]
517
+
518
+ if "function" in tool_call_delta:
519
+ func_delta = tool_call_delta["function"]
520
+ if "name" in func_delta and func_delta["name"]:
521
+ tc["function"]["name"] = func_delta["name"]
522
+ if "arguments" in func_delta and func_delta["arguments"]:
523
+ # Arguments are sent incrementally, concatenate them
524
+ tc["function"]["arguments"] += func_delta["arguments"]
525
+
526
+
527
+ def format_openai_streaming_output(
528
+ accumulated_content: Any,
529
+ provider_type: str = "chat",
530
+ tool_calls: Optional[List[Dict[str, Any]]] = None,
531
+ ) -> List[FormattedMessage]:
532
+ """
533
+ Format the final output from OpenAI streaming.
534
+
535
+ Args:
536
+ accumulated_content: Accumulated content from streaming (string for chat, list for responses)
537
+ provider_type: Either "chat" or "responses" to handle different API formats
538
+ tool_calls: Optional list of accumulated tool calls
539
+
540
+ Returns:
541
+ List of formatted messages
542
+ """
543
+
544
+ if provider_type == "chat":
545
+ content_items: List[FormattedContentItem] = []
546
+
547
+ # Add text content if present
548
+ if isinstance(accumulated_content, str) and accumulated_content:
549
+ content_items.append({"type": "text", "text": accumulated_content})
550
+ elif isinstance(accumulated_content, list):
551
+ # If it's a list of strings, join them
552
+ text = "".join(str(item) for item in accumulated_content if item)
553
+ if text:
554
+ content_items.append({"type": "text", "text": text})
555
+
556
+ # Add tool calls if present
557
+ if tool_calls:
558
+ for tool_call in tool_calls:
559
+ if "function" in tool_call:
560
+ function_call: FormattedFunctionCall = {
561
+ "type": "function",
562
+ "id": tool_call.get("id", ""),
563
+ "function": tool_call["function"],
564
+ }
565
+ content_items.append(function_call)
566
+
567
+ # Return formatted message with content
568
+ if content_items:
569
+ return [{"role": "assistant", "content": content_items}]
570
+ else:
571
+ # Empty response
572
+ return [{"role": "assistant", "content": []}]
573
+
574
+ elif provider_type == "responses":
575
+ # Responses API: accumulated_content is a list of output items
576
+ if isinstance(accumulated_content, list) and accumulated_content:
577
+ # The output is already formatted, just return it
578
+ return accumulated_content
579
+ elif isinstance(accumulated_content, str):
580
+ return [
581
+ {
582
+ "role": "assistant",
583
+ "content": [{"type": "text", "text": accumulated_content}],
584
+ }
585
+ ]
586
+
587
+ # Fallback for any other format
588
+ return [
589
+ {
590
+ "role": "assistant",
591
+ "content": [{"type": "text", "text": str(accumulated_content)}],
592
+ }
593
+ ]
594
+
595
+
596
+ def format_openai_streaming_input(
597
+ kwargs: Dict[str, Any], api_type: str = "chat"
598
+ ) -> Any:
599
+ """
600
+ Format OpenAI streaming input based on API type.
601
+
602
+ Args:
603
+ kwargs: Keyword arguments passed to OpenAI API
604
+ api_type: Either "chat" or "responses"
605
+
606
+ Returns:
607
+ Formatted input ready for PostHog tracking
608
+ """
609
+ if api_type == "chat":
610
+ return kwargs.get("messages")
611
+ else: # responses API
612
+ return kwargs.get("input")