khoj 2.0.0b13.dev19__py3-none-any.whl → 2.0.0b14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. khoj/database/admin.py +2 -2
  2. khoj/interface/compiled/404/index.html +2 -2
  3. khoj/interface/compiled/_next/static/chunks/{2327-fe87dd989d71d0eb.js → 2327-438aaec1657c5ada.js} +1 -1
  4. khoj/interface/compiled/_next/static/chunks/{3260-43d3019b92c315bb.js → 3260-82d2521fab032ff1.js} +1 -1
  5. khoj/interface/compiled/_next/static/chunks/9808-c0742b05e1ef29ba.js +1 -0
  6. khoj/interface/compiled/_next/static/chunks/app/agents/layout-0114c87d7ccf6d9b.js +1 -0
  7. khoj/interface/compiled/_next/static/chunks/app/automations/layout-8639ff99d6c2fec6.js +1 -0
  8. khoj/interface/compiled/_next/static/chunks/app/automations/{page-198b26df6e09bbb0.js → page-1047097af99d31c7.js} +1 -1
  9. khoj/interface/compiled/_next/static/chunks/app/chat/layout-2ff3e18a6feae92a.js +1 -0
  10. khoj/interface/compiled/_next/static/chunks/app/chat/page-ac7ed0a1aff1b145.js +1 -0
  11. khoj/interface/compiled/_next/static/chunks/app/search/layout-78dd7cdd97510485.js +1 -0
  12. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-8addeb8079c3215b.js +1 -0
  13. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-e0dcb1762f8c8f88.js → page-819c6536c15e3d31.js} +1 -1
  14. khoj/interface/compiled/_next/static/chunks/{webpack-d60b0c57a6c38d0f.js → webpack-5393aad3d824e0cb.js} +1 -1
  15. khoj/interface/compiled/_next/static/css/5c7a72bad47e50b3.css +25 -0
  16. khoj/interface/compiled/_next/static/css/821d0d60b0b6871d.css +1 -0
  17. khoj/interface/compiled/_next/static/css/ecea704005ba630c.css +1 -0
  18. khoj/interface/compiled/agents/index.html +2 -2
  19. khoj/interface/compiled/agents/index.txt +2 -2
  20. khoj/interface/compiled/automations/index.html +2 -2
  21. khoj/interface/compiled/automations/index.txt +3 -3
  22. khoj/interface/compiled/chat/index.html +2 -2
  23. khoj/interface/compiled/chat/index.txt +5 -4
  24. khoj/interface/compiled/index.html +2 -2
  25. khoj/interface/compiled/index.txt +2 -2
  26. khoj/interface/compiled/search/index.html +2 -2
  27. khoj/interface/compiled/search/index.txt +2 -2
  28. khoj/interface/compiled/settings/index.html +2 -2
  29. khoj/interface/compiled/settings/index.txt +4 -4
  30. khoj/interface/compiled/share/chat/index.html +2 -2
  31. khoj/interface/compiled/share/chat/index.txt +2 -2
  32. khoj/processor/conversation/google/gemini_chat.py +1 -1
  33. khoj/processor/conversation/google/utils.py +62 -19
  34. khoj/processor/conversation/openai/gpt.py +65 -28
  35. khoj/processor/conversation/openai/utils.py +401 -28
  36. khoj/processor/conversation/prompts.py +48 -30
  37. khoj/processor/conversation/utils.py +5 -1
  38. khoj/processor/tools/run_code.py +15 -22
  39. khoj/routers/api_chat.py +8 -3
  40. khoj/routers/api_content.py +1 -1
  41. khoj/routers/helpers.py +62 -42
  42. khoj/routers/research.py +7 -5
  43. khoj/utils/constants.py +9 -1
  44. khoj/utils/helpers.py +55 -15
  45. {khoj-2.0.0b13.dev19.dist-info → khoj-2.0.0b14.dist-info}/METADATA +1 -1
  46. {khoj-2.0.0b13.dev19.dist-info → khoj-2.0.0b14.dist-info}/RECORD +58 -58
  47. khoj/interface/compiled/_next/static/chunks/7127-97b83757db125ba6.js +0 -1
  48. khoj/interface/compiled/_next/static/chunks/app/agents/layout-4e2a134ec26aa606.js +0 -1
  49. khoj/interface/compiled/_next/static/chunks/app/automations/layout-63603d2cb33279f7.js +0 -1
  50. khoj/interface/compiled/_next/static/chunks/app/chat/layout-ad4d1792ab1a4108.js +0 -1
  51. khoj/interface/compiled/_next/static/chunks/app/chat/page-9a75d7369f2a7cd2.js +0 -1
  52. khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +0 -1
  53. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +0 -1
  54. khoj/interface/compiled/_next/static/css/23b26df423cd8a9c.css +0 -1
  55. khoj/interface/compiled/_next/static/css/2945c4a857922f3b.css +0 -1
  56. khoj/interface/compiled/_next/static/css/3090706713c12a32.css +0 -25
  57. /khoj/interface/compiled/_next/static/{N-GdBSXoYe-DuObnbXVRO → Qn_2XyeVWxjaIRks7rzM-}/_buildManifest.js +0 -0
  58. /khoj/interface/compiled/_next/static/{N-GdBSXoYe-DuObnbXVRO → Qn_2XyeVWxjaIRks7rzM-}/_ssgManifest.js +0 -0
  59. /khoj/interface/compiled/_next/static/chunks/{1327-511bb0a862efce80.js → 1327-e254819a9172cfa7.js} +0 -0
  60. /khoj/interface/compiled/_next/static/chunks/{1915-fbfe167c84ad60c5.js → 1915-5c6508f6ebb62a30.js} +0 -0
  61. /khoj/interface/compiled/_next/static/chunks/{2117-e78b6902ad6f75ec.js → 2117-080746c8e170c81a.js} +0 -0
  62. /khoj/interface/compiled/_next/static/chunks/{2939-4d4084c5b888b960.js → 2939-4af3fd24b8ffc9ad.js} +0 -0
  63. /khoj/interface/compiled/_next/static/chunks/{4447-d6cf93724d57e34b.js → 4447-cd95608f8e93e711.js} +0 -0
  64. /khoj/interface/compiled/_next/static/chunks/{8667-4b7790573b08c50d.js → 8667-50b03a89e82e0ba7.js} +0 -0
  65. /khoj/interface/compiled/_next/static/chunks/{9139-ce1ae935dac9c871.js → 9139-8ac4d9feb10f8869.js} +0 -0
  66. {khoj-2.0.0b13.dev19.dist-info → khoj-2.0.0b14.dist-info}/WHEEL +0 -0
  67. {khoj-2.0.0b13.dev19.dist-info → khoj-2.0.0b14.dist-info}/entry_points.txt +0 -0
  68. {khoj-2.0.0b13.dev19.dist-info → khoj-2.0.0b14.dist-info}/licenses/LICENSE +0 -0
@@ -21,6 +21,8 @@ from openai.types.chat.chat_completion_chunk import (
21
21
  Choice,
22
22
  ChoiceDelta,
23
23
  )
24
+ from openai.types.responses import Response as OpenAIResponse
25
+ from openai.types.responses import ResponseFunctionToolCall, ResponseReasoningItem
24
26
  from pydantic import BaseModel
25
27
  from tenacity import (
26
28
  before_sleep_log,
@@ -53,13 +55,31 @@ openai_clients: Dict[str, openai.OpenAI] = {}
53
55
  openai_async_clients: Dict[str, openai.AsyncOpenAI] = {}
54
56
 
55
57
 
58
+ def _extract_text_for_instructions(content: Union[str, List, Dict, None]) -> str:
59
+ """Extract plain text from a message content suitable for Responses API instructions."""
60
+ if content is None:
61
+ return ""
62
+ if isinstance(content, str):
63
+ return content
64
+ if isinstance(content, list):
65
+ texts: List[str] = []
66
+ for part in content:
67
+ if isinstance(part, dict) and part.get("type") == "input_text" and part.get("text"):
68
+ texts.append(str(part.get("text")))
69
+ return "\n\n".join(texts)
70
+ if isinstance(content, dict):
71
+ # If a single part dict was passed
72
+ if content.get("type") == "input_text" and content.get("text"):
73
+ return str(content.get("text"))
74
+ # Fallback to string conversion
75
+ return str(content)
76
+
77
+
56
78
  @retry(
57
79
  retry=(
58
80
  retry_if_exception_type(openai._exceptions.APITimeoutError)
59
- | retry_if_exception_type(openai._exceptions.APIError)
60
- | retry_if_exception_type(openai._exceptions.APIConnectionError)
61
81
  | retry_if_exception_type(openai._exceptions.RateLimitError)
62
- | retry_if_exception_type(openai._exceptions.APIStatusError)
82
+ | retry_if_exception_type(openai._exceptions.InternalServerError)
63
83
  | retry_if_exception_type(ValueError)
64
84
  ),
65
85
  wait=wait_random_exponential(min=1, max=10),
@@ -106,7 +126,7 @@ def completion_with_backoff(
106
126
  if model_name.startswith("grok-4"):
107
127
  # Grok-4 models do not support reasoning_effort parameter
108
128
  model_kwargs.pop("reasoning_effort", None)
109
- elif model_name.startswith("deepseek-reasoner"):
129
+ elif model_name.startswith("deepseek-reasoner") or model_name.startswith("deepseek-chat"):
110
130
  stream_processor = in_stream_thought_processor
111
131
  # Two successive messages cannot be from the same role. Should merge any back-to-back messages from the same role.
112
132
  # The first message should always be a user message (except system message).
@@ -125,6 +145,8 @@ def completion_with_backoff(
125
145
  # See https://qwenlm.github.io/blog/qwen3/#advanced-usages
126
146
  if not deepthought:
127
147
  add_qwen_no_think_tag(formatted_messages)
148
+ elif is_groq_api(api_base_url):
149
+ model_kwargs["service_tier"] = "auto"
128
150
 
129
151
  read_timeout = 300 if is_local_api(api_base_url) else 60
130
152
  if os.getenv("KHOJ_LLM_SEED"):
@@ -150,8 +172,16 @@ def completion_with_backoff(
150
172
  chunk.type == "chunk"
151
173
  and chunk.chunk.choices
152
174
  and hasattr(chunk.chunk.choices[0].delta, "reasoning_content")
175
+ and chunk.chunk.choices[0].delta.reasoning_content
153
176
  ):
154
177
  thoughts += chunk.chunk.choices[0].delta.reasoning_content
178
+ elif (
179
+ chunk.type == "chunk"
180
+ and chunk.chunk.choices
181
+ and hasattr(chunk.chunk.choices[0].delta, "reasoning")
182
+ and chunk.chunk.choices[0].delta.reasoning
183
+ ):
184
+ thoughts += chunk.chunk.choices[0].delta.reasoning
155
185
  elif chunk.type == "chunk" and chunk.chunk.choices and chunk.chunk.choices[0].delta.tool_calls:
156
186
  tool_ids += [tool_call.id for tool_call in chunk.chunk.choices[0].delta.tool_calls]
157
187
  elif chunk.type == "tool_calls.function.arguments.done":
@@ -174,7 +204,6 @@ def completion_with_backoff(
174
204
  chunk = client.beta.chat.completions.parse(
175
205
  messages=formatted_messages, # type: ignore
176
206
  model=model_name,
177
- temperature=temperature,
178
207
  timeout=httpx.Timeout(30, read=read_timeout),
179
208
  **model_kwargs,
180
209
  )
@@ -199,6 +228,10 @@ def completion_with_backoff(
199
228
  # Json dump tool calls into aggregated response
200
229
  aggregated_response = json.dumps([tool_call.__dict__ for tool_call in tool_calls])
201
230
 
231
+ # Align chunk definition with non-streaming mode for post stream completion usage
232
+ if hasattr(chunk, "chunk"):
233
+ chunk = chunk.chunk
234
+
202
235
  # Calculate cost of chat
203
236
  input_tokens = chunk.usage.prompt_tokens if hasattr(chunk, "usage") and chunk.usage else 0
204
237
  output_tokens = chunk.usage.completion_tokens if hasattr(chunk, "usage") and chunk.usage else 0
@@ -227,10 +260,8 @@ def completion_with_backoff(
227
260
  @retry(
228
261
  retry=(
229
262
  retry_if_exception_type(openai._exceptions.APITimeoutError)
230
- | retry_if_exception_type(openai._exceptions.APIError)
231
- | retry_if_exception_type(openai._exceptions.APIConnectionError)
232
263
  | retry_if_exception_type(openai._exceptions.RateLimitError)
233
- | retry_if_exception_type(openai._exceptions.APIStatusError)
264
+ | retry_if_exception_type(openai._exceptions.InternalServerError)
234
265
  | retry_if_exception_type(ValueError)
235
266
  ),
236
267
  wait=wait_exponential(multiplier=1, min=4, max=10),
@@ -291,8 +322,12 @@ async def chat_completion_with_backoff(
291
322
  # Grok-4 models do not support reasoning_effort parameter
292
323
  if not model_name.startswith("grok-4"):
293
324
  model_kwargs["reasoning_effort"] = reasoning_effort
294
- elif model_name.startswith("deepseek-reasoner") or "deepseek-r1" in model_name:
295
- # Official Deepseek reasoner model and some inference APIs like vLLM return structured thinking output.
325
+ elif (
326
+ model_name.startswith("deepseek-chat")
327
+ or model_name.startswith("deepseek-reasoner")
328
+ or "deepseek-r1" in model_name
329
+ ):
330
+ # Official Deepseek models and some inference APIs like vLLM return structured thinking output.
296
331
  # Others like DeepInfra return it in response stream.
297
332
  # Using the instream thought processor handles both cases, structured thoughts and in response thoughts.
298
333
  stream_processor = ain_stream_thought_processor
@@ -317,6 +352,8 @@ async def chat_completion_with_backoff(
317
352
  # See https://qwenlm.github.io/blog/qwen3/#advanced-usages
318
353
  if not deepthought:
319
354
  add_qwen_no_think_tag(formatted_messages)
355
+ elif is_groq_api(api_base_url):
356
+ model_kwargs["service_tier"] = "auto"
320
357
 
321
358
  read_timeout = 300 if is_local_api(api_base_url) else 60
322
359
  if os.getenv("KHOJ_LLM_SEED"):
@@ -390,6 +427,283 @@ async def chat_completion_with_backoff(
390
427
  commit_conversation_trace(messages, aggregated_response, tracer)
391
428
 
392
429
 
430
+ @retry(
431
+ retry=(
432
+ retry_if_exception_type(openai._exceptions.APITimeoutError)
433
+ | retry_if_exception_type(openai._exceptions.RateLimitError)
434
+ | retry_if_exception_type(openai._exceptions.InternalServerError)
435
+ | retry_if_exception_type(ValueError)
436
+ ),
437
+ wait=wait_random_exponential(min=1, max=10),
438
+ stop=stop_after_attempt(3),
439
+ before_sleep=before_sleep_log(logger, logging.DEBUG),
440
+ reraise=True,
441
+ )
442
+ def responses_completion_with_backoff(
443
+ messages: List[ChatMessage],
444
+ model_name: str,
445
+ temperature=0.6,
446
+ openai_api_key=None,
447
+ api_base_url=None,
448
+ deepthought: bool = False,
449
+ model_kwargs: dict = {},
450
+ tracer: dict = {},
451
+ ) -> ResponseWithThought:
452
+ """
453
+ Synchronous helper using the OpenAI Responses API in streaming mode under the hood.
454
+ Aggregates streamed deltas and returns a ResponseWithThought.
455
+ """
456
+ client_key = f"{openai_api_key}--{api_base_url}"
457
+ client = openai_clients.get(client_key)
458
+ if not client:
459
+ client = get_openai_client(openai_api_key, api_base_url)
460
+ openai_clients[client_key] = client
461
+
462
+ formatted_messages = format_message_for_api(messages, api_base_url)
463
+ # Move the first system message to Responses API instructions
464
+ instructions: Optional[str] = None
465
+ if formatted_messages and formatted_messages[0].get("role") == "system":
466
+ instructions = _extract_text_for_instructions(formatted_messages[0].get("content")) or None
467
+ formatted_messages = formatted_messages[1:]
468
+
469
+ model_kwargs = deepcopy(model_kwargs)
470
+ model_kwargs["top_p"] = model_kwargs.get("top_p", 0.95)
471
+ # Configure thinking for openai reasoning models
472
+ if is_openai_reasoning_model(model_name, api_base_url):
473
+ temperature = 1
474
+ reasoning_effort = "medium" if deepthought else "low"
475
+ model_kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
476
+ model_kwargs["include"] = ["reasoning.encrypted_content"]
477
+ # Remove unsupported params for reasoning models
478
+ model_kwargs.pop("top_p", None)
479
+ model_kwargs.pop("stop", None)
480
+
481
+ read_timeout = 300 if is_local_api(api_base_url) else 60
482
+
483
+ # Stream and aggregate
484
+ model_response: OpenAIResponse = client.responses.create(
485
+ input=formatted_messages,
486
+ instructions=instructions,
487
+ model=model_name,
488
+ temperature=temperature,
489
+ timeout=httpx.Timeout(30, read=read_timeout), # type: ignore
490
+ store=False,
491
+ **model_kwargs,
492
+ )
493
+ if not model_response or not isinstance(model_response, OpenAIResponse) or not model_response.output:
494
+ raise ValueError(f"Empty response returned by {model_name}.")
495
+
496
+ raw_content = [item.model_dump() for item in model_response.output]
497
+ aggregated_text = model_response.output_text
498
+ thoughts = ""
499
+ tool_calls: List[ToolCall] = []
500
+ for item in model_response.output:
501
+ if isinstance(item, ResponseFunctionToolCall):
502
+ tool_calls.append(ToolCall(name=item.name, args=json.loads(item.arguments), id=item.call_id))
503
+ elif isinstance(item, ResponseReasoningItem):
504
+ thoughts = "\n\n".join([summary.text for summary in item.summary])
505
+
506
+ if tool_calls:
507
+ if thoughts and aggregated_text:
508
+ # If there are tool calls, aggregate thoughts and responses into thoughts
509
+ thoughts = "\n".join([f"*{line.strip()}*" for line in thoughts.splitlines() if line.strip()])
510
+ thoughts = f"{thoughts}\n\n{aggregated_text}"
511
+ else:
512
+ thoughts = thoughts or aggregated_text
513
+ # Json dump tool calls into aggregated response
514
+ aggregated_text = json.dumps([tool_call.__dict__ for tool_call in tool_calls])
515
+
516
+ # Usage/cost tracking
517
+ input_tokens = model_response.usage.input_tokens if model_response and model_response.usage else 0
518
+ output_tokens = model_response.usage.output_tokens if model_response and model_response.usage else 0
519
+ cost = 0
520
+ cache_read_tokens = 0
521
+ if model_response and model_response.usage and model_response.usage.input_tokens_details:
522
+ cache_read_tokens = model_response.usage.input_tokens_details.cached_tokens
523
+ input_tokens -= cache_read_tokens
524
+ tracer["usage"] = get_chat_usage_metrics(
525
+ model_name, input_tokens, output_tokens, cache_read_tokens, usage=tracer.get("usage"), cost=cost
526
+ )
527
+
528
+ # Validate final aggregated text (either message or tool-calls JSON)
529
+ if is_none_or_empty(aggregated_text):
530
+ logger.warning(f"No response by {model_name}\nLast Message by {messages[-1].role}: {messages[-1].content}.")
531
+ raise ValueError(f"Empty or no response by {model_name} over Responses API. Retry if needed.")
532
+
533
+ # Trace
534
+ tracer["chat_model"] = model_name
535
+ tracer["temperature"] = temperature
536
+ if is_promptrace_enabled():
537
+ commit_conversation_trace(messages, aggregated_text, tracer)
538
+
539
+ return ResponseWithThought(text=aggregated_text, thought=thoughts, raw_content=raw_content)
540
+
541
+
542
+ @retry(
543
+ retry=(
544
+ retry_if_exception_type(openai._exceptions.APITimeoutError)
545
+ | retry_if_exception_type(openai._exceptions.RateLimitError)
546
+ | retry_if_exception_type(openai._exceptions.InternalServerError)
547
+ | retry_if_exception_type(ValueError)
548
+ ),
549
+ wait=wait_exponential(multiplier=1, min=4, max=10),
550
+ stop=stop_after_attempt(3),
551
+ before_sleep=before_sleep_log(logger, logging.WARNING),
552
+ reraise=False,
553
+ )
554
+ async def responses_chat_completion_with_backoff(
555
+ messages: list[ChatMessage],
556
+ model_name: str,
557
+ temperature,
558
+ openai_api_key=None,
559
+ api_base_url=None,
560
+ deepthought=False, # Unused; parity with legacy signature
561
+ tracer: dict = {},
562
+ ) -> AsyncGenerator[ResponseWithThought, None]:
563
+ """
564
+ Async streaming helper using the OpenAI Responses API.
565
+ Yields ResponseWithThought chunks as text/think deltas arrive.
566
+ """
567
+ client_key = f"{openai_api_key}--{api_base_url}"
568
+ client = openai_async_clients.get(client_key)
569
+ if not client:
570
+ client = get_openai_async_client(openai_api_key, api_base_url)
571
+ openai_async_clients[client_key] = client
572
+
573
+ formatted_messages = format_message_for_api(messages, api_base_url)
574
+ # Move the first system message to Responses API instructions
575
+ instructions: Optional[str] = None
576
+ if formatted_messages and formatted_messages[0].get("role") == "system":
577
+ instructions = _extract_text_for_instructions(formatted_messages[0].get("content")) or None
578
+ formatted_messages = formatted_messages[1:]
579
+
580
+ model_kwargs: dict = {}
581
+ model_kwargs["top_p"] = model_kwargs.get("top_p", 0.95)
582
+ # Configure thinking for openai reasoning models
583
+ if is_openai_reasoning_model(model_name, api_base_url):
584
+ temperature = 1
585
+ reasoning_effort = "medium" if deepthought else "low"
586
+ model_kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
587
+ # Remove unsupported params for reasoning models
588
+ model_kwargs.pop("top_p", None)
589
+ model_kwargs.pop("stop", None)
590
+
591
+ read_timeout = 300 if is_local_api(api_base_url) else 60
592
+
593
+ aggregated_text = ""
594
+ last_final: Optional[OpenAIResponse] = None
595
+ # Tool call assembly buffers
596
+ tool_calls_args: Dict[str, str] = {}
597
+ tool_calls_name: Dict[str, str] = {}
598
+ tool_call_order: List[str] = []
599
+
600
+ async with client.responses.stream(
601
+ input=formatted_messages,
602
+ instructions=instructions,
603
+ model=model_name,
604
+ temperature=temperature,
605
+ timeout=httpx.Timeout(30, read=read_timeout),
606
+ **model_kwargs,
607
+ ) as stream: # type: ignore
608
+ async for event in stream: # type: ignore
609
+ et = getattr(event, "type", "")
610
+ if et == "response.output_text.delta":
611
+ delta = getattr(event, "delta", "") or getattr(event, "output_text", "")
612
+ if delta:
613
+ aggregated_text += delta
614
+ yield ResponseWithThought(text=delta)
615
+ elif et == "response.reasoning.delta":
616
+ delta = getattr(event, "delta", "")
617
+ if delta:
618
+ yield ResponseWithThought(thought=delta)
619
+ elif et == "response.tool_call.created":
620
+ item = getattr(event, "item", None)
621
+ tool_id = (
622
+ getattr(event, "id", None)
623
+ or getattr(event, "tool_call_id", None)
624
+ or (getattr(item, "id", None) if item is not None else None)
625
+ )
626
+ name = (
627
+ getattr(event, "name", None)
628
+ or (getattr(item, "name", None) if item is not None else None)
629
+ or getattr(event, "tool_name", None)
630
+ )
631
+ if tool_id:
632
+ if tool_id not in tool_calls_args:
633
+ tool_calls_args[tool_id] = ""
634
+ tool_call_order.append(tool_id)
635
+ if name:
636
+ tool_calls_name[tool_id] = name
637
+ elif et == "response.tool_call.delta":
638
+ tool_id = getattr(event, "id", None) or getattr(event, "tool_call_id", None)
639
+ delta = getattr(event, "delta", None)
640
+ if hasattr(delta, "arguments"):
641
+ arg_delta = getattr(delta, "arguments", "")
642
+ else:
643
+ arg_delta = delta if isinstance(delta, str) else getattr(event, "arguments", "")
644
+ if tool_id and arg_delta:
645
+ tool_calls_args[tool_id] = tool_calls_args.get(tool_id, "") + arg_delta
646
+ if tool_id not in tool_call_order:
647
+ tool_call_order.append(tool_id)
648
+ elif et == "response.tool_call.completed":
649
+ item = getattr(event, "item", None)
650
+ tool_id = (
651
+ getattr(event, "id", None)
652
+ or getattr(event, "tool_call_id", None)
653
+ or (getattr(item, "id", None) if item is not None else None)
654
+ )
655
+ args_final = None
656
+ if item is not None:
657
+ args_final = getattr(item, "arguments", None) or getattr(item, "args", None)
658
+ if tool_id and args_final:
659
+ tool_calls_args[tool_id] = args_final if isinstance(args_final, str) else json.dumps(args_final)
660
+ if tool_id not in tool_call_order:
661
+ tool_call_order.append(tool_id)
662
+ # ignore other events for now
663
+ last_final = await stream.get_final_response()
664
+
665
+ # Usage/cost tracking after stream ends
666
+ input_tokens = last_final.usage.input_tokens if last_final and last_final.usage else 0
667
+ output_tokens = last_final.usage.output_tokens if last_final and last_final.usage else 0
668
+ cost = 0
669
+ tracer["usage"] = get_chat_usage_metrics(
670
+ model_name, input_tokens, output_tokens, usage=tracer.get("usage"), cost=cost
671
+ )
672
+
673
+ # If there are tool calls, package them into aggregated text for tracing parity
674
+ if tool_call_order:
675
+ packaged_tool_calls: List[ToolCall] = []
676
+ for tool_id in tool_call_order:
677
+ name = tool_calls_name.get(tool_id) or ""
678
+ args_str = tool_calls_args.get(tool_id, "")
679
+ try:
680
+ args = json.loads(args_str) if isinstance(args_str, str) else args_str
681
+ except Exception:
682
+ logger.warning(f"Failed to parse tool call arguments for {tool_id}: {args_str}")
683
+ args = {}
684
+ packaged_tool_calls.append(ToolCall(name=name, args=args, id=tool_id))
685
+ # Move any text into trace thought
686
+ tracer_text = aggregated_text
687
+ aggregated_text = json.dumps([tc.__dict__ for tc in packaged_tool_calls])
688
+ # Save for trace below
689
+ if tracer_text:
690
+ tracer.setdefault("_responses_stream_text", tracer_text)
691
+
692
+ if is_none_or_empty(aggregated_text):
693
+ logger.warning(f"No response by {model_name}\nLast Message by {messages[-1].role}: {messages[-1].content}.")
694
+ raise ValueError(f"Empty or no response by {model_name} over Responses API. Retry if needed.")
695
+
696
+ tracer["chat_model"] = model_name
697
+ tracer["temperature"] = temperature
698
+ if is_promptrace_enabled():
699
+ # If tool-calls were present, include any streamed text in the trace thought
700
+ trace_payload = aggregated_text
701
+ if tracer.get("_responses_stream_text"):
702
+ thoughts = tracer.pop("_responses_stream_text")
703
+ trace_payload = thoughts
704
+ commit_conversation_trace(messages, trace_payload, tracer)
705
+
706
+
393
707
  def get_structured_output_support(model_name: str, api_base_url: str = None) -> StructuredOutputSupport:
394
708
  if model_name.startswith("deepseek-reasoner"):
395
709
  return StructuredOutputSupport.NONE
@@ -412,6 +726,12 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
412
726
  # Handle tool call and tool result message types
413
727
  message_type = message.additional_kwargs.get("message_type")
414
728
  if message_type == "tool_call":
729
+ if is_openai_api(api_base_url):
730
+ for part in message.content:
731
+ if "status" in part:
732
+ part.pop("status") # Drop unsupported tool call status field
733
+ formatted_messages.extend(message.content)
734
+ continue
415
735
  # Convert tool_call to OpenAI function call format
416
736
  content = []
417
737
  for part in message.content:
@@ -450,14 +770,23 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
450
770
  if not tool_call_id:
451
771
  logger.warning(f"Dropping tool result without valid tool_call_id: {part.get('name')}")
452
772
  continue
453
- formatted_messages.append(
454
- {
455
- "role": "tool",
456
- "tool_call_id": tool_call_id,
457
- "name": part.get("name"),
458
- "content": part.get("content"),
459
- }
460
- )
773
+ if is_openai_api(api_base_url):
774
+ formatted_messages.append(
775
+ {
776
+ "type": "function_call_output",
777
+ "call_id": tool_call_id,
778
+ "output": part.get("content") or "No output",
779
+ }
780
+ )
781
+ else:
782
+ formatted_messages.append(
783
+ {
784
+ "role": "tool",
785
+ "tool_call_id": tool_call_id,
786
+ "name": part.get("name"),
787
+ "content": part.get("content") or "No output",
788
+ }
789
+ )
461
790
  continue
462
791
  if isinstance(message.content, list) and not is_openai_api(api_base_url):
463
792
  assistant_texts = []
@@ -489,6 +818,12 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
489
818
  message.content.remove(part)
490
819
  elif part["type"] == "image_url" and not part.get("image_url"):
491
820
  message.content.remove(part)
821
+ # OpenAI models use the Responses API which uses slightly different content types
822
+ if part["type"] == "text":
823
+ part["type"] = "output_text" if message.role == "assistant" else "input_text"
824
+ if part["type"] == "image_url":
825
+ part["type"] = "output_image" if message.role == "assistant" else "input_image"
826
+ part["image_url"] = part["image_url"]["url"]
492
827
  # If no valid content parts left, remove the message
493
828
  if is_none_or_empty(message.content):
494
829
  messages.remove(message)
@@ -513,7 +848,11 @@ def is_openai_reasoning_model(model_name: str, api_base_url: str = None) -> bool
513
848
  """
514
849
  Check if the model is an OpenAI reasoning model
515
850
  """
516
- return model_name.lower().startswith("o") and is_openai_api(api_base_url)
851
+ return (
852
+ is_openai_api(api_base_url)
853
+ and (model_name.lower().startswith("o") or model_name.lower().startswith("gpt-5"))
854
+ or model_name.lower().startswith("gpt-oss")
855
+ )
517
856
 
518
857
 
519
858
  def is_non_streaming_model(model_name: str, api_base_url: str = None) -> bool:
@@ -536,6 +875,13 @@ def is_twitter_reasoning_model(model_name: str, api_base_url: str = None) -> boo
536
875
  )
537
876
 
538
877
 
878
+ def is_groq_api(api_base_url: str = None) -> bool:
879
+ """
880
+ Check if the model is served over the Groq API
881
+ """
882
+ return api_base_url is not None and api_base_url.startswith("https://api.groq.com")
883
+
884
+
539
885
  def is_qwen_style_reasoning_model(model_name: str, api_base_url: str = None) -> bool:
540
886
  """
541
887
  Check if the model is a Qwen style reasoning model
@@ -609,6 +955,9 @@ async def astream_thought_processor(
609
955
  if not chunk_data.get("object") or chunk_data.get("object") != "chat.completion.chunk":
610
956
  logger.warning(f"Skipping invalid chunk with object field: {chunk_data.get('object', 'missing')}")
611
957
  continue
958
+ # Handle unsupported service tiers like "on_demand" by Groq
959
+ if chunk.service_tier and chunk.service_tier == "on_demand":
960
+ chunk_data["service_tier"] = "auto"
612
961
 
613
962
  tchunk = ChatCompletionWithThoughtsChunk.model_validate(chunk_data)
614
963
 
@@ -620,6 +969,14 @@ async def astream_thought_processor(
620
969
  ):
621
970
  tchunk.choices[0].delta.thought = chunk.choices[0].delta.reasoning_content
622
971
 
972
+ # Handlle openai reasoning style response with thoughts. Used by gpt-oss.
973
+ if (
974
+ len(tchunk.choices) > 0
975
+ and hasattr(tchunk.choices[0].delta, "reasoning")
976
+ and tchunk.choices[0].delta.reasoning
977
+ ):
978
+ tchunk.choices[0].delta.thought = chunk.choices[0].delta.reasoning
979
+
623
980
  # Handlle llama.cpp server style response with thoughts.
624
981
  elif len(tchunk.choices) > 0 and tchunk.choices[0].delta.model_extra.get("reasoning_content"):
625
982
  tchunk.choices[0].delta.thought = tchunk.choices[0].delta.model_extra.get("reasoning_content")
@@ -750,6 +1107,10 @@ async def ain_stream_thought_processor(
750
1107
  yield chunk
751
1108
  continue
752
1109
 
1110
+ if chunk.choices[0].delta.content is None:
1111
+ # If delta content is None, we can't process it, just yield the chunk
1112
+ continue
1113
+
753
1114
  buf += chunk.choices[0].delta.content
754
1115
 
755
1116
  if mode == "detect_start":
@@ -850,20 +1211,32 @@ def add_qwen_no_think_tag(formatted_messages: List[dict]) -> None:
850
1211
  break
851
1212
 
852
1213
 
853
- def to_openai_tools(tools: List[ToolDefinition]) -> List[Dict] | None:
1214
+ def to_openai_tools(tools: List[ToolDefinition], use_responses_api: bool) -> List[Dict] | None:
854
1215
  "Transform tool definitions from standard format to OpenAI format."
855
- openai_tools = [
856
- {
857
- "type": "function",
858
- "function": {
1216
+ if use_responses_api:
1217
+ openai_tools = [
1218
+ {
1219
+ "type": "function",
859
1220
  "name": tool.name,
860
1221
  "description": tool.description,
861
1222
  "parameters": clean_response_schema(tool.schema),
862
1223
  "strict": True,
863
- },
864
- }
865
- for tool in tools
866
- ]
1224
+ }
1225
+ for tool in tools
1226
+ ]
1227
+ else:
1228
+ openai_tools = [
1229
+ {
1230
+ "type": "function",
1231
+ "function": {
1232
+ "name": tool.name,
1233
+ "description": tool.description,
1234
+ "parameters": clean_response_schema(tool.schema),
1235
+ "strict": True,
1236
+ },
1237
+ }
1238
+ for tool in tools
1239
+ ]
867
1240
 
868
1241
  return openai_tools or None
869
1242