posthoganalytics 6.7.0__py3-none-any.whl → 7.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. posthoganalytics/__init__.py +84 -7
  2. posthoganalytics/ai/anthropic/__init__.py +10 -0
  3. posthoganalytics/ai/anthropic/anthropic.py +95 -65
  4. posthoganalytics/ai/anthropic/anthropic_async.py +95 -65
  5. posthoganalytics/ai/anthropic/anthropic_converter.py +443 -0
  6. posthoganalytics/ai/gemini/__init__.py +15 -1
  7. posthoganalytics/ai/gemini/gemini.py +66 -71
  8. posthoganalytics/ai/gemini/gemini_async.py +423 -0
  9. posthoganalytics/ai/gemini/gemini_converter.py +652 -0
  10. posthoganalytics/ai/langchain/callbacks.py +58 -13
  11. posthoganalytics/ai/openai/__init__.py +16 -1
  12. posthoganalytics/ai/openai/openai.py +140 -149
  13. posthoganalytics/ai/openai/openai_async.py +127 -82
  14. posthoganalytics/ai/openai/openai_converter.py +741 -0
  15. posthoganalytics/ai/sanitization.py +248 -0
  16. posthoganalytics/ai/types.py +125 -0
  17. posthoganalytics/ai/utils.py +339 -356
  18. posthoganalytics/client.py +345 -97
  19. posthoganalytics/contexts.py +81 -0
  20. posthoganalytics/exception_utils.py +250 -2
  21. posthoganalytics/feature_flags.py +26 -10
  22. posthoganalytics/flag_definition_cache.py +127 -0
  23. posthoganalytics/integrations/django.py +157 -19
  24. posthoganalytics/request.py +203 -23
  25. posthoganalytics/test/test_client.py +250 -22
  26. posthoganalytics/test/test_exception_capture.py +418 -0
  27. posthoganalytics/test/test_feature_flag_result.py +441 -2
  28. posthoganalytics/test/test_feature_flags.py +308 -104
  29. posthoganalytics/test/test_flag_definition_cache.py +612 -0
  30. posthoganalytics/test/test_module.py +0 -8
  31. posthoganalytics/test/test_request.py +536 -0
  32. posthoganalytics/test/test_utils.py +4 -1
  33. posthoganalytics/types.py +40 -0
  34. posthoganalytics/version.py +1 -1
  35. {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/METADATA +12 -12
  36. posthoganalytics-7.4.3.dist-info/RECORD +57 -0
  37. posthoganalytics-6.7.0.dist-info/RECORD +0 -49
  38. {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/WHEEL +0 -0
  39. {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/licenses/LICENSE +0 -0
  40. {posthoganalytics-6.7.0.dist-info → posthoganalytics-7.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,741 @@
1
+ """
2
+ OpenAI-specific conversion utilities.
3
+
4
+ This module handles the conversion of OpenAI API responses and inputs
5
+ into standardized formats for PostHog tracking. It supports both
6
+ Chat Completions API and Responses API formats.
7
+ """
8
+
9
+ from typing import Any, Dict, List, Optional
10
+
11
+ from posthoganalytics.ai.types import (
12
+ FormattedContentItem,
13
+ FormattedFunctionCall,
14
+ FormattedImageContent,
15
+ FormattedMessage,
16
+ FormattedTextContent,
17
+ TokenUsage,
18
+ )
19
+
20
+
21
+ def format_openai_response(response: Any) -> List[FormattedMessage]:
22
+ """
23
+ Format an OpenAI response into standardized message format.
24
+
25
+ Handles both Chat Completions API and Responses API formats.
26
+
27
+ Args:
28
+ response: The response object from OpenAI API
29
+
30
+ Returns:
31
+ List of formatted messages with role and content
32
+ """
33
+
34
+ output: List[FormattedMessage] = []
35
+
36
+ if response is None:
37
+ return output
38
+
39
+ # Handle Chat Completions response format
40
+ if hasattr(response, "choices"):
41
+ content: List[FormattedContentItem] = []
42
+ role = "assistant"
43
+
44
+ for choice in response.choices:
45
+ if hasattr(choice, "message") and choice.message:
46
+ if choice.message.role:
47
+ role = choice.message.role
48
+
49
+ if choice.message.content:
50
+ content.append(
51
+ {
52
+ "type": "text",
53
+ "text": choice.message.content,
54
+ }
55
+ )
56
+
57
+ if hasattr(choice.message, "tool_calls") and choice.message.tool_calls:
58
+ for tool_call in choice.message.tool_calls:
59
+ content.append(
60
+ {
61
+ "type": "function",
62
+ "id": tool_call.id,
63
+ "function": {
64
+ "name": tool_call.function.name,
65
+ "arguments": tool_call.function.arguments,
66
+ },
67
+ }
68
+ )
69
+
70
+ # Handle audio output (gpt-4o-audio-preview)
71
+ if hasattr(choice.message, "audio") and choice.message.audio:
72
+ # Convert Pydantic model to dict to capture all fields from OpenAI
73
+ audio_dict = choice.message.audio.model_dump()
74
+ content.append({"type": "audio", **audio_dict})
75
+
76
+ if content:
77
+ output.append(
78
+ {
79
+ "role": role,
80
+ "content": content,
81
+ }
82
+ )
83
+
84
+ # Handle Responses API format
85
+ if hasattr(response, "output"):
86
+ content = []
87
+ role = "assistant"
88
+
89
+ for item in response.output:
90
+ if item.type == "message":
91
+ role = item.role
92
+
93
+ if hasattr(item, "content") and isinstance(item.content, list):
94
+ for content_item in item.content:
95
+ if (
96
+ hasattr(content_item, "type")
97
+ and content_item.type == "output_text"
98
+ and hasattr(content_item, "text")
99
+ ):
100
+ content.append(
101
+ {
102
+ "type": "text",
103
+ "text": content_item.text,
104
+ }
105
+ )
106
+
107
+ elif hasattr(content_item, "text"):
108
+ content.append({"type": "text", "text": content_item.text})
109
+
110
+ elif (
111
+ hasattr(content_item, "type")
112
+ and content_item.type == "input_image"
113
+ and hasattr(content_item, "image_url")
114
+ ):
115
+ image_content: FormattedImageContent = {
116
+ "type": "image",
117
+ "image": content_item.image_url,
118
+ }
119
+ content.append(image_content)
120
+
121
+ elif hasattr(item, "content"):
122
+ text_content = {"type": "text", "text": str(item.content)}
123
+ content.append(text_content)
124
+
125
+ elif hasattr(item, "type") and item.type == "function_call":
126
+ content.append(
127
+ {
128
+ "type": "function",
129
+ "id": getattr(item, "call_id", getattr(item, "id", "")),
130
+ "function": {
131
+ "name": item.name,
132
+ "arguments": getattr(item, "arguments", {}),
133
+ },
134
+ }
135
+ )
136
+
137
+ if content:
138
+ output.append(
139
+ {
140
+ "role": role,
141
+ "content": content,
142
+ }
143
+ )
144
+
145
+ return output
146
+
147
+
148
+ def format_openai_input(
149
+ messages: Optional[List[Dict[str, Any]]] = None, input_data: Optional[Any] = None
150
+ ) -> List[FormattedMessage]:
151
+ """
152
+ Format OpenAI input messages.
153
+
154
+ Handles both messages parameter (Chat Completions) and input parameter (Responses API).
155
+
156
+ Args:
157
+ messages: List of message dictionaries for Chat Completions API
158
+ input_data: Input data for Responses API
159
+
160
+ Returns:
161
+ List of formatted messages
162
+ """
163
+
164
+ formatted_messages: List[FormattedMessage] = []
165
+
166
+ # Handle Chat Completions API format
167
+ if messages is not None:
168
+ for msg in messages:
169
+ formatted_messages.append(
170
+ {
171
+ "role": msg.get("role", "user"),
172
+ "content": msg.get("content", ""),
173
+ }
174
+ )
175
+
176
+ # Handle Responses API format
177
+ if input_data is not None:
178
+ if isinstance(input_data, list):
179
+ for item in input_data:
180
+ role = "user"
181
+ content = ""
182
+
183
+ if isinstance(item, dict):
184
+ role = item.get("role", "user")
185
+ content = item.get("content", "")
186
+
187
+ elif isinstance(item, str):
188
+ content = item
189
+
190
+ else:
191
+ content = str(item)
192
+
193
+ formatted_messages.append({"role": role, "content": content})
194
+
195
+ elif isinstance(input_data, str):
196
+ formatted_messages.append({"role": "user", "content": input_data})
197
+
198
+ else:
199
+ formatted_messages.append({"role": "user", "content": str(input_data)})
200
+
201
+ return formatted_messages
202
+
203
+
204
+ def extract_openai_tools(kwargs: Dict[str, Any]) -> Optional[Any]:
205
+ """
206
+ Extract tool definitions from OpenAI API kwargs.
207
+
208
+ Args:
209
+ kwargs: Keyword arguments passed to OpenAI API
210
+
211
+ Returns:
212
+ Tool definitions if present, None otherwise
213
+ """
214
+
215
+ # Check for tools parameter (newer API)
216
+ if "tools" in kwargs:
217
+ return kwargs["tools"]
218
+
219
+ # Check for functions parameter (older API)
220
+ if "functions" in kwargs:
221
+ return kwargs["functions"]
222
+
223
+ return None
224
+
225
+
226
+ def format_openai_streaming_content(
227
+ accumulated_content: str, tool_calls: Optional[List[Dict[str, Any]]] = None
228
+ ) -> List[FormattedContentItem]:
229
+ """
230
+ Format content from OpenAI streaming response.
231
+
232
+ Used by streaming handlers to format accumulated content.
233
+
234
+ Args:
235
+ accumulated_content: Accumulated text content from streaming
236
+ tool_calls: Optional list of tool calls accumulated during streaming
237
+
238
+ Returns:
239
+ List of formatted content items
240
+ """
241
+ formatted: List[FormattedContentItem] = []
242
+
243
+ # Add text content if present
244
+ if accumulated_content:
245
+ text_content: FormattedTextContent = {
246
+ "type": "text",
247
+ "text": accumulated_content,
248
+ }
249
+ formatted.append(text_content)
250
+
251
+ # Add tool calls if present
252
+ if tool_calls:
253
+ for tool_call in tool_calls:
254
+ function_call: FormattedFunctionCall = {
255
+ "type": "function",
256
+ "id": tool_call.get("id"),
257
+ "function": tool_call.get("function", {}),
258
+ }
259
+ formatted.append(function_call)
260
+
261
+ return formatted
262
+
263
+
264
+ def extract_openai_web_search_count(response: Any) -> int:
265
+ """
266
+ Extract web search count from OpenAI response.
267
+
268
+ Uses a two-tier detection strategy:
269
+ 1. Priority 1 (exact count): Check for output[].type == "web_search_call" (Responses API)
270
+ 2. Priority 2 (binary detection): Check for various web search indicators:
271
+ - Root-level citations, search_results, or usage.search_context_size (Perplexity)
272
+ - Annotations with type "url_citation" in choices/output (including delta for streaming)
273
+
274
+ Args:
275
+ response: The response from OpenAI API
276
+
277
+ Returns:
278
+ Number of web search requests (exact count or binary 1/0)
279
+ """
280
+
281
+ # Priority 1: Check for exact count in Responses API output
282
+ if hasattr(response, "output"):
283
+ web_search_count = 0
284
+
285
+ for item in response.output:
286
+ if hasattr(item, "type") and item.type == "web_search_call":
287
+ web_search_count += 1
288
+
289
+ web_search_count = max(0, web_search_count)
290
+
291
+ if web_search_count > 0:
292
+ return web_search_count
293
+
294
+ # Priority 2: Binary detection (returns 1 or 0)
295
+
296
+ # Check root-level indicators (Perplexity)
297
+ if hasattr(response, "citations"):
298
+ citations = getattr(response, "citations")
299
+
300
+ if citations and len(citations) > 0:
301
+ return 1
302
+
303
+ if hasattr(response, "search_results"):
304
+ search_results = getattr(response, "search_results")
305
+
306
+ if search_results and len(search_results) > 0:
307
+ return 1
308
+
309
+ if hasattr(response, "usage") and hasattr(response.usage, "search_context_size"):
310
+ if response.usage.search_context_size:
311
+ return 1
312
+
313
+ # Check for url_citation annotations in choices (Chat Completions)
314
+ if hasattr(response, "choices"):
315
+ for choice in response.choices:
316
+ # Check message.annotations (non-streaming or final chunk)
317
+ if hasattr(choice, "message") and hasattr(choice.message, "annotations"):
318
+ annotations = choice.message.annotations
319
+
320
+ if annotations:
321
+ for annotation in annotations:
322
+ # Support both dict and object formats
323
+ annotation_type = (
324
+ annotation.get("type")
325
+ if isinstance(annotation, dict)
326
+ else getattr(annotation, "type", None)
327
+ )
328
+
329
+ if annotation_type == "url_citation":
330
+ return 1
331
+
332
+ # Check delta.annotations (streaming chunks)
333
+ if hasattr(choice, "delta") and hasattr(choice.delta, "annotations"):
334
+ annotations = choice.delta.annotations
335
+
336
+ if annotations:
337
+ for annotation in annotations:
338
+ # Support both dict and object formats
339
+ annotation_type = (
340
+ annotation.get("type")
341
+ if isinstance(annotation, dict)
342
+ else getattr(annotation, "type", None)
343
+ )
344
+
345
+ if annotation_type == "url_citation":
346
+ return 1
347
+
348
+ # Check for url_citation annotations in output (Responses API)
349
+ if hasattr(response, "output"):
350
+ for item in response.output:
351
+ if hasattr(item, "content") and isinstance(item.content, list):
352
+ for content_item in item.content:
353
+ if hasattr(content_item, "annotations"):
354
+ annotations = content_item.annotations
355
+
356
+ if annotations:
357
+ for annotation in annotations:
358
+ # Support both dict and object formats
359
+ annotation_type = (
360
+ annotation.get("type")
361
+ if isinstance(annotation, dict)
362
+ else getattr(annotation, "type", None)
363
+ )
364
+
365
+ if annotation_type == "url_citation":
366
+ return 1
367
+
368
+ return 0
369
+
370
+
371
+ def extract_openai_usage_from_response(response: Any) -> TokenUsage:
372
+ """
373
+ Extract usage statistics from a full OpenAI response (non-streaming).
374
+ Handles both Chat Completions and Responses API.
375
+
376
+ Args:
377
+ response: The complete response from OpenAI API
378
+
379
+ Returns:
380
+ TokenUsage with standardized usage statistics
381
+ """
382
+ if not hasattr(response, "usage"):
383
+ return TokenUsage(input_tokens=0, output_tokens=0)
384
+
385
+ cached_tokens = 0
386
+ input_tokens = 0
387
+ output_tokens = 0
388
+ reasoning_tokens = 0
389
+
390
+ # Responses API format
391
+ if hasattr(response.usage, "input_tokens"):
392
+ input_tokens = response.usage.input_tokens
393
+ if hasattr(response.usage, "output_tokens"):
394
+ output_tokens = response.usage.output_tokens
395
+ if hasattr(response.usage, "input_tokens_details") and hasattr(
396
+ response.usage.input_tokens_details, "cached_tokens"
397
+ ):
398
+ cached_tokens = response.usage.input_tokens_details.cached_tokens
399
+ if hasattr(response.usage, "output_tokens_details") and hasattr(
400
+ response.usage.output_tokens_details, "reasoning_tokens"
401
+ ):
402
+ reasoning_tokens = response.usage.output_tokens_details.reasoning_tokens
403
+
404
+ # Chat Completions format
405
+ if hasattr(response.usage, "prompt_tokens"):
406
+ input_tokens = response.usage.prompt_tokens
407
+ if hasattr(response.usage, "completion_tokens"):
408
+ output_tokens = response.usage.completion_tokens
409
+ if hasattr(response.usage, "prompt_tokens_details") and hasattr(
410
+ response.usage.prompt_tokens_details, "cached_tokens"
411
+ ):
412
+ cached_tokens = response.usage.prompt_tokens_details.cached_tokens
413
+ if hasattr(response.usage, "completion_tokens_details") and hasattr(
414
+ response.usage.completion_tokens_details, "reasoning_tokens"
415
+ ):
416
+ reasoning_tokens = response.usage.completion_tokens_details.reasoning_tokens
417
+
418
+ result = TokenUsage(
419
+ input_tokens=input_tokens,
420
+ output_tokens=output_tokens,
421
+ )
422
+
423
+ if cached_tokens > 0:
424
+ result["cache_read_input_tokens"] = cached_tokens
425
+ if reasoning_tokens > 0:
426
+ result["reasoning_tokens"] = reasoning_tokens
427
+
428
+ web_search_count = extract_openai_web_search_count(response)
429
+ if web_search_count > 0:
430
+ result["web_search_count"] = web_search_count
431
+
432
+ return result
433
+
434
+
435
+ def extract_openai_usage_from_chunk(
436
+ chunk: Any, provider_type: str = "chat"
437
+ ) -> TokenUsage:
438
+ """
439
+ Extract usage statistics from an OpenAI streaming chunk.
440
+
441
+ Handles both Chat Completions and Responses API formats.
442
+
443
+ Args:
444
+ chunk: Streaming chunk from OpenAI API
445
+ provider_type: Either "chat" or "responses" to handle different API formats
446
+
447
+ Returns:
448
+ Dictionary of usage statistics
449
+ """
450
+
451
+ usage: TokenUsage = TokenUsage()
452
+
453
+ if provider_type == "chat":
454
+ # Extract web search count from the chunk before checking for usage
455
+ # Web search indicators (citations, annotations) can appear on any chunk,
456
+ # not just those with usage data
457
+ web_search_count = extract_openai_web_search_count(chunk)
458
+ if web_search_count > 0:
459
+ usage["web_search_count"] = web_search_count
460
+
461
+ if not hasattr(chunk, "usage") or not chunk.usage:
462
+ return usage
463
+
464
+ # Chat Completions API uses prompt_tokens and completion_tokens
465
+ # Standardize to input_tokens and output_tokens
466
+ usage["input_tokens"] = getattr(chunk.usage, "prompt_tokens", 0)
467
+ usage["output_tokens"] = getattr(chunk.usage, "completion_tokens", 0)
468
+
469
+ # Handle cached tokens
470
+ if hasattr(chunk.usage, "prompt_tokens_details") and hasattr(
471
+ chunk.usage.prompt_tokens_details, "cached_tokens"
472
+ ):
473
+ usage["cache_read_input_tokens"] = (
474
+ chunk.usage.prompt_tokens_details.cached_tokens
475
+ )
476
+
477
+ # Handle reasoning tokens
478
+ if hasattr(chunk.usage, "completion_tokens_details") and hasattr(
479
+ chunk.usage.completion_tokens_details, "reasoning_tokens"
480
+ ):
481
+ usage["reasoning_tokens"] = (
482
+ chunk.usage.completion_tokens_details.reasoning_tokens
483
+ )
484
+
485
+ elif provider_type == "responses":
486
+ # For Responses API, usage is only in chunk.response.usage for completed events
487
+ if hasattr(chunk, "type") and chunk.type == "response.completed":
488
+ if (
489
+ hasattr(chunk, "response")
490
+ and hasattr(chunk.response, "usage")
491
+ and chunk.response.usage
492
+ ):
493
+ response_usage = chunk.response.usage
494
+ usage["input_tokens"] = getattr(response_usage, "input_tokens", 0)
495
+ usage["output_tokens"] = getattr(response_usage, "output_tokens", 0)
496
+
497
+ # Handle cached tokens
498
+ if hasattr(response_usage, "input_tokens_details") and hasattr(
499
+ response_usage.input_tokens_details, "cached_tokens"
500
+ ):
501
+ usage["cache_read_input_tokens"] = (
502
+ response_usage.input_tokens_details.cached_tokens
503
+ )
504
+
505
+ # Handle reasoning tokens
506
+ if hasattr(response_usage, "output_tokens_details") and hasattr(
507
+ response_usage.output_tokens_details, "reasoning_tokens"
508
+ ):
509
+ usage["reasoning_tokens"] = (
510
+ response_usage.output_tokens_details.reasoning_tokens
511
+ )
512
+
513
+ # Extract web search count from the complete response
514
+ if hasattr(chunk, "response"):
515
+ web_search_count = extract_openai_web_search_count(chunk.response)
516
+ if web_search_count > 0:
517
+ usage["web_search_count"] = web_search_count
518
+
519
+ return usage
520
+
521
+
522
+ def extract_openai_content_from_chunk(
523
+ chunk: Any, provider_type: str = "chat"
524
+ ) -> Optional[str]:
525
+ """
526
+ Extract content from an OpenAI streaming chunk.
527
+
528
+ Handles both Chat Completions and Responses API formats.
529
+
530
+ Args:
531
+ chunk: Streaming chunk from OpenAI API
532
+ provider_type: Either "chat" or "responses" to handle different API formats
533
+
534
+ Returns:
535
+ Text content if present, None otherwise
536
+ """
537
+
538
+ if provider_type == "chat":
539
+ # Chat Completions API format
540
+ if (
541
+ hasattr(chunk, "choices")
542
+ and chunk.choices
543
+ and len(chunk.choices) > 0
544
+ and chunk.choices[0].delta
545
+ and chunk.choices[0].delta.content
546
+ ):
547
+ return chunk.choices[0].delta.content
548
+
549
+ elif provider_type == "responses":
550
+ # Responses API format
551
+ if hasattr(chunk, "type") and chunk.type == "response.completed":
552
+ if hasattr(chunk, "response") and chunk.response:
553
+ res = chunk.response
554
+ if res.output and len(res.output) > 0:
555
+ # Return the full output for responses
556
+ return res.output[0]
557
+
558
+ return None
559
+
560
+
561
+ def extract_openai_tool_calls_from_chunk(chunk: Any) -> Optional[List[Dict[str, Any]]]:
562
+ """
563
+ Extract tool calls from an OpenAI streaming chunk.
564
+
565
+ Args:
566
+ chunk: Streaming chunk from OpenAI API
567
+
568
+ Returns:
569
+ List of tool call deltas if present, None otherwise
570
+ """
571
+ if (
572
+ hasattr(chunk, "choices")
573
+ and chunk.choices
574
+ and len(chunk.choices) > 0
575
+ and chunk.choices[0].delta
576
+ and hasattr(chunk.choices[0].delta, "tool_calls")
577
+ and chunk.choices[0].delta.tool_calls
578
+ ):
579
+ tool_calls = []
580
+ for tool_call in chunk.choices[0].delta.tool_calls:
581
+ tc_dict = {
582
+ "index": getattr(tool_call, "index", None),
583
+ }
584
+
585
+ if hasattr(tool_call, "id") and tool_call.id:
586
+ tc_dict["id"] = tool_call.id
587
+
588
+ if hasattr(tool_call, "type") and tool_call.type:
589
+ tc_dict["type"] = tool_call.type
590
+
591
+ if hasattr(tool_call, "function") and tool_call.function:
592
+ function_dict = {}
593
+ if hasattr(tool_call.function, "name") and tool_call.function.name:
594
+ function_dict["name"] = tool_call.function.name
595
+ if (
596
+ hasattr(tool_call.function, "arguments")
597
+ and tool_call.function.arguments
598
+ ):
599
+ function_dict["arguments"] = tool_call.function.arguments
600
+ tc_dict["function"] = function_dict
601
+
602
+ tool_calls.append(tc_dict)
603
+ return tool_calls
604
+
605
+ return None
606
+
607
+
608
+ def accumulate_openai_tool_calls(
609
+ accumulated_tool_calls: Dict[int, Dict[str, Any]],
610
+ chunk_tool_calls: List[Dict[str, Any]],
611
+ ) -> None:
612
+ """
613
+ Accumulate tool calls from streaming chunks.
614
+
615
+ OpenAI sends tool calls incrementally:
616
+ - First chunk has id, type, function.name and partial function.arguments
617
+ - Subsequent chunks have more function.arguments
618
+
619
+ Args:
620
+ accumulated_tool_calls: Dictionary mapping index to accumulated tool call data
621
+ chunk_tool_calls: List of tool call deltas from current chunk
622
+ """
623
+ for tool_call_delta in chunk_tool_calls:
624
+ index = tool_call_delta.get("index")
625
+ if index is None:
626
+ continue
627
+
628
+ # Initialize tool call if first time seeing this index
629
+ if index not in accumulated_tool_calls:
630
+ accumulated_tool_calls[index] = {
631
+ "id": "",
632
+ "type": "function",
633
+ "function": {
634
+ "name": "",
635
+ "arguments": "",
636
+ },
637
+ }
638
+
639
+ # Update with new data from delta
640
+ tc = accumulated_tool_calls[index]
641
+
642
+ if "id" in tool_call_delta and tool_call_delta["id"]:
643
+ tc["id"] = tool_call_delta["id"]
644
+
645
+ if "type" in tool_call_delta and tool_call_delta["type"]:
646
+ tc["type"] = tool_call_delta["type"]
647
+
648
+ if "function" in tool_call_delta:
649
+ func_delta = tool_call_delta["function"]
650
+ if "name" in func_delta and func_delta["name"]:
651
+ tc["function"]["name"] = func_delta["name"]
652
+ if "arguments" in func_delta and func_delta["arguments"]:
653
+ # Arguments are sent incrementally, concatenate them
654
+ tc["function"]["arguments"] += func_delta["arguments"]
655
+
656
+
657
+ def format_openai_streaming_output(
658
+ accumulated_content: Any,
659
+ provider_type: str = "chat",
660
+ tool_calls: Optional[List[Dict[str, Any]]] = None,
661
+ ) -> List[FormattedMessage]:
662
+ """
663
+ Format the final output from OpenAI streaming.
664
+
665
+ Args:
666
+ accumulated_content: Accumulated content from streaming (string for chat, list for responses)
667
+ provider_type: Either "chat" or "responses" to handle different API formats
668
+ tool_calls: Optional list of accumulated tool calls
669
+
670
+ Returns:
671
+ List of formatted messages
672
+ """
673
+
674
+ if provider_type == "chat":
675
+ content_items: List[FormattedContentItem] = []
676
+
677
+ # Add text content if present
678
+ if isinstance(accumulated_content, str) and accumulated_content:
679
+ content_items.append({"type": "text", "text": accumulated_content})
680
+ elif isinstance(accumulated_content, list):
681
+ # If it's a list of strings, join them
682
+ text = "".join(str(item) for item in accumulated_content if item)
683
+ if text:
684
+ content_items.append({"type": "text", "text": text})
685
+
686
+ # Add tool calls if present
687
+ if tool_calls:
688
+ for tool_call in tool_calls:
689
+ if "function" in tool_call:
690
+ function_call: FormattedFunctionCall = {
691
+ "type": "function",
692
+ "id": tool_call.get("id", ""),
693
+ "function": tool_call["function"],
694
+ }
695
+ content_items.append(function_call)
696
+
697
+ # Return formatted message with content
698
+ if content_items:
699
+ return [{"role": "assistant", "content": content_items}]
700
+ else:
701
+ # Empty response
702
+ return [{"role": "assistant", "content": []}]
703
+
704
+ elif provider_type == "responses":
705
+ # Responses API: accumulated_content is a list of output items
706
+ if isinstance(accumulated_content, list) and accumulated_content:
707
+ # The output is already formatted, just return it
708
+ return accumulated_content
709
+ elif isinstance(accumulated_content, str):
710
+ return [
711
+ {
712
+ "role": "assistant",
713
+ "content": [{"type": "text", "text": accumulated_content}],
714
+ }
715
+ ]
716
+
717
+ # Fallback for any other format
718
+ return [
719
+ {
720
+ "role": "assistant",
721
+ "content": [{"type": "text", "text": str(accumulated_content)}],
722
+ }
723
+ ]
724
+
725
+
726
+ def format_openai_streaming_input(
727
+ kwargs: Dict[str, Any], api_type: str = "chat"
728
+ ) -> Any:
729
+ """
730
+ Format OpenAI streaming input based on API type.
731
+
732
+ Args:
733
+ kwargs: Keyword arguments passed to OpenAI API
734
+ api_type: Either "chat" or "responses"
735
+
736
+ Returns:
737
+ Formatted input ready for PostHog tracking
738
+ """
739
+ from posthoganalytics.ai.utils import merge_system_prompt
740
+
741
+ return merge_system_prompt(kwargs, "openai")