khoj 2.0.0b14.dev43__py3-none-any.whl → 2.0.0b15.dev22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. khoj/database/adapters/__init__.py +59 -20
  2. khoj/database/admin.py +6 -2
  3. khoj/database/migrations/0094_serverchatsettings_think_free_deep_and_more.py +61 -0
  4. khoj/database/models/__init__.py +18 -2
  5. khoj/interface/compiled/404/index.html +1 -1
  6. khoj/interface/compiled/_next/static/chunks/{9808-c0742b05e1ef29ba.js → 9808-bd5d7361ad026094.js} +1 -1
  7. khoj/interface/compiled/_next/static/chunks/app/chat/page-ac7ed0a1aff1b145.js +1 -0
  8. khoj/interface/compiled/_next/static/css/fb7ea16e60b40ecd.css +1 -0
  9. khoj/interface/compiled/agents/index.html +1 -1
  10. khoj/interface/compiled/agents/index.txt +1 -1
  11. khoj/interface/compiled/automations/index.html +1 -1
  12. khoj/interface/compiled/automations/index.txt +1 -1
  13. khoj/interface/compiled/chat/index.html +2 -2
  14. khoj/interface/compiled/chat/index.txt +2 -2
  15. khoj/interface/compiled/index.html +2 -2
  16. khoj/interface/compiled/index.txt +1 -1
  17. khoj/interface/compiled/search/index.html +1 -1
  18. khoj/interface/compiled/search/index.txt +1 -1
  19. khoj/interface/compiled/settings/index.html +1 -1
  20. khoj/interface/compiled/settings/index.txt +1 -1
  21. khoj/interface/compiled/share/chat/index.html +2 -2
  22. khoj/interface/compiled/share/chat/index.txt +2 -2
  23. khoj/processor/conversation/anthropic/anthropic_chat.py +4 -88
  24. khoj/processor/conversation/anthropic/utils.py +1 -2
  25. khoj/processor/conversation/google/gemini_chat.py +5 -89
  26. khoj/processor/conversation/google/utils.py +8 -9
  27. khoj/processor/conversation/openai/gpt.py +16 -93
  28. khoj/processor/conversation/openai/utils.py +58 -43
  29. khoj/processor/conversation/prompts.py +30 -39
  30. khoj/processor/conversation/utils.py +71 -84
  31. khoj/processor/image/generate.py +69 -15
  32. khoj/processor/tools/run_code.py +3 -2
  33. khoj/routers/api_chat.py +8 -21
  34. khoj/routers/helpers.py +243 -156
  35. khoj/routers/research.py +6 -6
  36. khoj/utils/constants.py +3 -1
  37. khoj/utils/helpers.py +6 -2
  38. {khoj-2.0.0b14.dev43.dist-info → khoj-2.0.0b15.dev22.dist-info}/METADATA +1 -1
  39. {khoj-2.0.0b14.dev43.dist-info → khoj-2.0.0b15.dev22.dist-info}/RECORD +44 -43
  40. khoj/interface/compiled/_next/static/chunks/app/chat/page-1b4893b1a9957220.js +0 -1
  41. khoj/interface/compiled/_next/static/css/cea3bdfe98c144bd.css +0 -1
  42. /khoj/interface/compiled/_next/static/{OKbGpkzD6gHDfr1vAog6p → t8O_8CJ9p3UtV9kEsAAWT}/_buildManifest.js +0 -0
  43. /khoj/interface/compiled/_next/static/{OKbGpkzD6gHDfr1vAog6p → t8O_8CJ9p3UtV9kEsAAWT}/_ssgManifest.js +0 -0
  44. {khoj-2.0.0b14.dev43.dist-info → khoj-2.0.0b15.dev22.dist-info}/WHEEL +0 -0
  45. {khoj-2.0.0b14.dev43.dist-info → khoj-2.0.0b15.dev22.dist-info}/entry_points.txt +0 -0
  46. {khoj-2.0.0b14.dev43.dist-info → khoj-2.0.0b15.dev22.dist-info}/licenses/LICENSE +0 -0
@@ -1,22 +1,16 @@
1
1
  import logging
2
- from datetime import datetime
3
- from typing import AsyncGenerator, Dict, List, Optional
2
+ from typing import AsyncGenerator, List, Optional
3
+
4
+ from langchain_core.messages.chat import ChatMessage
4
5
 
5
- from khoj.database.models import Agent, ChatMessageModel, ChatModel
6
- from khoj.processor.conversation import prompts
7
6
  from khoj.processor.conversation.google.utils import (
8
7
  gemini_chat_completion_with_backoff,
9
8
  gemini_completion_with_backoff,
10
9
  )
11
10
  from khoj.processor.conversation.utils import (
12
- OperatorRun,
13
11
  ResponseWithThought,
14
- generate_chatml_messages_with_context,
15
12
  messages_to_print,
16
13
  )
17
- from khoj.utils.helpers import is_none_or_empty, truncate_code_context
18
- from khoj.utils.rawconfig import FileAttachment, LocationData
19
- from khoj.utils.yaml import yaml_dump
20
14
 
21
15
  logger = logging.getLogger(__name__)
22
16
 
@@ -61,95 +55,18 @@ def gemini_send_message_to_model(
61
55
 
62
56
  async def converse_gemini(
63
57
  # Query
64
- user_query: str,
65
- # Context
66
- references: list[dict],
67
- online_results: Optional[Dict[str, Dict]] = None,
68
- code_results: Optional[Dict[str, Dict]] = None,
69
- operator_results: Optional[List[OperatorRun]] = None,
70
- query_images: Optional[list[str]] = None,
71
- query_files: str = None,
72
- generated_files: List[FileAttachment] = None,
73
- generated_asset_results: Dict[str, Dict] = {},
74
- program_execution_context: List[str] = None,
75
- location_data: LocationData = None,
76
- user_name: str = None,
77
- chat_history: List[ChatMessageModel] = [],
58
+ messages: List[ChatMessage],
78
59
  # Model
79
- model: Optional[str] = "gemini-2.0-flash",
60
+ model: Optional[str] = "gemini-2.5-flash",
80
61
  api_key: Optional[str] = None,
81
62
  api_base_url: Optional[str] = None,
82
63
  temperature: float = 1.0,
83
- max_prompt_size=None,
84
- tokenizer_name=None,
85
- agent: Agent = None,
86
- vision_available: bool = False,
87
64
  deepthought: Optional[bool] = False,
88
65
  tracer={},
89
66
  ) -> AsyncGenerator[ResponseWithThought, None]:
90
67
  """
91
68
  Converse with user using Google's Gemini
92
69
  """
93
- # Initialize Variables
94
- current_date = datetime.now()
95
-
96
- if agent and agent.personality:
97
- system_prompt = prompts.custom_personality.format(
98
- name=agent.name,
99
- bio=agent.personality,
100
- current_date=current_date.strftime("%Y-%m-%d"),
101
- day_of_week=current_date.strftime("%A"),
102
- )
103
- else:
104
- system_prompt = prompts.personality.format(
105
- current_date=current_date.strftime("%Y-%m-%d"),
106
- day_of_week=current_date.strftime("%A"),
107
- )
108
-
109
- system_prompt += f"{system_prompt}\n\n{prompts.gemini_verbose_language_personality}"
110
- if location_data:
111
- location_prompt = prompts.user_location.format(location=f"{location_data}")
112
- system_prompt = f"{system_prompt}\n{location_prompt}"
113
-
114
- if user_name:
115
- user_name_prompt = prompts.user_name.format(name=user_name)
116
- system_prompt = f"{system_prompt}\n{user_name_prompt}"
117
-
118
- context_message = ""
119
- if not is_none_or_empty(references):
120
- context_message = f"{prompts.notes_conversation.format(query=user_query, references=yaml_dump(references))}\n\n"
121
- if not is_none_or_empty(online_results):
122
- context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
123
- if not is_none_or_empty(code_results):
124
- context_message += (
125
- f"{prompts.code_executed_context.format(code_results=truncate_code_context(code_results))}\n\n"
126
- )
127
- if not is_none_or_empty(operator_results):
128
- operator_content = [
129
- {"query": oc.query, "response": oc.response, "webpages": oc.webpages} for oc in operator_results
130
- ]
131
- context_message += (
132
- f"{prompts.operator_execution_context.format(operator_results=yaml_dump(operator_content))}\n\n"
133
- )
134
- context_message = context_message.strip()
135
-
136
- # Setup Prompt with Primer or Conversation History
137
- messages = generate_chatml_messages_with_context(
138
- user_query,
139
- context_message=context_message,
140
- chat_history=chat_history,
141
- model_name=model,
142
- max_prompt_size=max_prompt_size,
143
- tokenizer_name=tokenizer_name,
144
- query_images=query_images,
145
- vision_enabled=vision_available,
146
- model_type=ChatModel.ModelType.GOOGLE,
147
- query_files=query_files,
148
- generated_files=generated_files,
149
- generated_asset_results=generated_asset_results,
150
- program_execution_context=program_execution_context,
151
- )
152
-
153
70
  logger.debug(f"Conversation Context for Gemini: {messages_to_print(messages)}")
154
71
 
155
72
  # Get Response from Google AI
@@ -159,7 +76,6 @@ async def converse_gemini(
159
76
  temperature=temperature,
160
77
  api_key=api_key,
161
78
  api_base_url=api_base_url,
162
- system_prompt=system_prompt,
163
79
  deepthought=deepthought,
164
80
  tracer=tracer,
165
81
  ):
@@ -46,7 +46,7 @@ gemini_clients: Dict[str, genai.Client] = {}
46
46
  # This avoids premature response termination.
47
47
  MAX_OUTPUT_TOKENS_FOR_REASONING_GEMINI = 20000
48
48
  MAX_OUTPUT_TOKENS_FOR_STANDARD_GEMINI = 8000
49
- MAX_REASONING_TOKENS_GEMINI = 10000
49
+ MAX_REASONING_TOKENS_GEMINI = 512
50
50
 
51
51
  SAFETY_SETTINGS = [
52
52
  gtypes.SafetySetting(
@@ -120,11 +120,7 @@ def _is_retryable_error(exception: BaseException) -> bool:
120
120
  if isinstance(exception, (gerrors.APIError, gerrors.ClientError, GeminiRetryableClientError)):
121
121
  return exception.code in [429, 502, 503, 504]
122
122
  # client errors
123
- if (
124
- isinstance(exception, httpx.TimeoutException)
125
- or isinstance(exception, httpx.NetworkError)
126
- or isinstance(exception, httpx.ReadError)
127
- ):
123
+ if isinstance(exception, httpx.TimeoutException) or isinstance(exception, httpx.NetworkError):
128
124
  return True
129
125
  # validation errors
130
126
  if isinstance(exception, ValueError):
@@ -312,7 +308,7 @@ async def gemini_chat_completion_with_backoff(
312
308
  temperature: float,
313
309
  api_key: str,
314
310
  api_base_url: str,
315
- system_prompt: str,
311
+ system_prompt: str = "",
316
312
  model_kwargs=None,
317
313
  deepthought=False,
318
314
  tracer: dict = {},
@@ -476,9 +472,12 @@ def format_messages_for_gemini(
476
472
  for message in messages.copy():
477
473
  if message.role == "system":
478
474
  if isinstance(message.content, list):
479
- system_prompt += "\n".join([part["text"] for part in message.content if part["type"] == "text"])
475
+ system_prompt += "\n\n" + "\n".join(
476
+ [part["text"] for part in message.content if part["type"] == "text"]
477
+ )
480
478
  else:
481
- system_prompt += message.content
479
+ system_prompt += "\n\n" + message.content
480
+ system_prompt = system_prompt.strip()
482
481
  messages.remove(message)
483
482
  system_prompt = None if is_none_or_empty(system_prompt) else system_prompt
484
483
 
@@ -1,29 +1,25 @@
1
1
  import logging
2
- from datetime import datetime
3
2
  from typing import Any, AsyncGenerator, Dict, List, Optional
4
3
 
5
- from khoj.database.models import Agent, ChatMessageModel, ChatModel
6
- from khoj.processor.conversation import prompts
4
+ from langchain_core.messages.chat import ChatMessage
5
+
7
6
  from khoj.processor.conversation.openai.utils import (
8
7
  chat_completion_with_backoff,
9
8
  clean_response_schema,
10
9
  completion_with_backoff,
11
10
  get_structured_output_support,
12
- is_openai_api,
11
+ is_cerebras_api,
13
12
  responses_chat_completion_with_backoff,
14
13
  responses_completion_with_backoff,
14
+ supports_responses_api,
15
15
  to_openai_tools,
16
16
  )
17
17
  from khoj.processor.conversation.utils import (
18
- OperatorRun,
19
18
  ResponseWithThought,
20
19
  StructuredOutputSupport,
21
- generate_chatml_messages_with_context,
22
20
  messages_to_print,
23
21
  )
24
- from khoj.utils.helpers import ToolDefinition, is_none_or_empty, truncate_code_context
25
- from khoj.utils.rawconfig import FileAttachment, LocationData
26
- from khoj.utils.yaml import yaml_dump
22
+ from khoj.utils.helpers import ToolDefinition
27
23
 
28
24
  logger = logging.getLogger(__name__)
29
25
 
@@ -45,16 +41,19 @@ def send_message_to_model(
45
41
 
46
42
  model_kwargs: Dict[str, Any] = {}
47
43
  json_support = get_structured_output_support(model, api_base_url)
44
+ strict = not is_cerebras_api(api_base_url)
48
45
  if tools and json_support == StructuredOutputSupport.TOOL:
49
- model_kwargs["tools"] = to_openai_tools(tools, use_responses_api=is_openai_api(api_base_url))
46
+ model_kwargs["tools"] = to_openai_tools(
47
+ tools, use_responses_api=supports_responses_api(model, api_base_url), strict=strict
48
+ )
50
49
  elif response_schema and json_support >= StructuredOutputSupport.SCHEMA:
51
50
  # Drop unsupported fields from schema passed to OpenAI APi
52
51
  cleaned_response_schema = clean_response_schema(response_schema)
53
- if is_openai_api(api_base_url):
52
+ if supports_responses_api(model, api_base_url):
54
53
  model_kwargs["text"] = {
55
54
  "format": {
56
55
  "type": "json_schema",
57
- "strict": True,
56
+ "strict": strict,
58
57
  "name": response_schema.__name__,
59
58
  "schema": cleaned_response_schema,
60
59
  }
@@ -65,14 +64,14 @@ def send_message_to_model(
65
64
  "json_schema": {
66
65
  "schema": cleaned_response_schema,
67
66
  "name": response_schema.__name__,
68
- "strict": True,
67
+ "strict": strict,
69
68
  },
70
69
  }
71
70
  elif response_type == "json_object" and json_support == StructuredOutputSupport.OBJECT:
72
71
  model_kwargs["response_format"] = {"type": response_type}
73
72
 
74
73
  # Get Response from GPT
75
- if is_openai_api(api_base_url):
74
+ if supports_responses_api(model, api_base_url):
76
75
  return responses_completion_with_backoff(
77
76
  messages=messages,
78
77
  model_name=model,
@@ -96,98 +95,22 @@ def send_message_to_model(
96
95
 
97
96
  async def converse_openai(
98
97
  # Query
99
- user_query: str,
100
- # Context
101
- references: list[dict],
102
- online_results: Optional[Dict[str, Dict]] = None,
103
- code_results: Optional[Dict[str, Dict]] = None,
104
- operator_results: Optional[List[OperatorRun]] = None,
105
- query_images: Optional[list[str]] = None,
106
- query_files: str = None,
107
- generated_files: List[FileAttachment] = None,
108
- generated_asset_results: Dict[str, Dict] = {},
109
- program_execution_context: List[str] = None,
110
- location_data: LocationData = None,
111
- chat_history: list[ChatMessageModel] = [],
98
+ messages: List[ChatMessage],
99
+ # Model
112
100
  model: str = "gpt-4.1-mini",
113
101
  api_key: Optional[str] = None,
114
102
  api_base_url: Optional[str] = None,
115
103
  temperature: float = 0.6,
116
- max_prompt_size=None,
117
- tokenizer_name=None,
118
- user_name: str = None,
119
- agent: Agent = None,
120
- vision_available: bool = False,
121
104
  deepthought: Optional[bool] = False,
122
105
  tracer: dict = {},
123
106
  ) -> AsyncGenerator[ResponseWithThought, None]:
124
107
  """
125
108
  Converse with user using OpenAI's ChatGPT
126
109
  """
127
- # Initialize Variables
128
- current_date = datetime.now()
129
-
130
- if agent and agent.personality:
131
- system_prompt = prompts.custom_personality.format(
132
- name=agent.name,
133
- bio=agent.personality,
134
- current_date=current_date.strftime("%Y-%m-%d"),
135
- day_of_week=current_date.strftime("%A"),
136
- )
137
- else:
138
- system_prompt = prompts.personality.format(
139
- current_date=current_date.strftime("%Y-%m-%d"),
140
- day_of_week=current_date.strftime("%A"),
141
- )
142
-
143
- if location_data:
144
- location_prompt = prompts.user_location.format(location=f"{location_data}")
145
- system_prompt = f"{system_prompt}\n{location_prompt}"
146
-
147
- if user_name:
148
- user_name_prompt = prompts.user_name.format(name=user_name)
149
- system_prompt = f"{system_prompt}\n{user_name_prompt}"
150
-
151
- context_message = ""
152
- if not is_none_or_empty(references):
153
- context_message = f"{prompts.notes_conversation.format(references=yaml_dump(references))}\n\n"
154
- if not is_none_or_empty(online_results):
155
- context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
156
- if not is_none_or_empty(code_results):
157
- context_message += (
158
- f"{prompts.code_executed_context.format(code_results=truncate_code_context(code_results))}\n\n"
159
- )
160
- if not is_none_or_empty(operator_results):
161
- operator_content = [
162
- {"query": oc.query, "response": oc.response, "webpages": oc.webpages} for oc in operator_results
163
- ]
164
- context_message += (
165
- f"{prompts.operator_execution_context.format(operator_results=yaml_dump(operator_content))}\n\n"
166
- )
167
-
168
- context_message = context_message.strip()
169
-
170
- # Setup Prompt with Primer or Conversation History
171
- messages = generate_chatml_messages_with_context(
172
- user_query,
173
- system_prompt,
174
- chat_history,
175
- context_message=context_message,
176
- model_name=model,
177
- max_prompt_size=max_prompt_size,
178
- tokenizer_name=tokenizer_name,
179
- query_images=query_images,
180
- vision_enabled=vision_available,
181
- model_type=ChatModel.ModelType.OPENAI,
182
- query_files=query_files,
183
- generated_files=generated_files,
184
- generated_asset_results=generated_asset_results,
185
- program_execution_context=program_execution_context,
186
- )
187
110
  logger.debug(f"Conversation Context for GPT: {messages_to_print(messages)}")
188
111
 
189
112
  # Get Response from GPT
190
- if is_openai_api(api_base_url):
113
+ if supports_responses_api(model, api_base_url):
191
114
  async for chunk in responses_chat_completion_with_backoff(
192
115
  messages=messages,
193
116
  model_name=model,
@@ -111,14 +111,16 @@ def completion_with_backoff(
111
111
  model_kwargs["temperature"] = temperature
112
112
  model_kwargs["top_p"] = model_kwargs.get("top_p", 0.95)
113
113
 
114
- formatted_messages = format_message_for_api(messages, api_base_url)
114
+ formatted_messages = format_message_for_api(messages, model_name, api_base_url)
115
115
 
116
116
  # Tune reasoning models arguments
117
117
  if is_openai_reasoning_model(model_name, api_base_url):
118
118
  model_kwargs["temperature"] = 1
119
119
  reasoning_effort = "medium" if deepthought else "low"
120
120
  model_kwargs["reasoning_effort"] = reasoning_effort
121
+ # Remove unsupported params for reasoning models
121
122
  model_kwargs.pop("top_p", None)
123
+ model_kwargs.pop("stop", None)
122
124
  elif is_twitter_reasoning_model(model_name, api_base_url):
123
125
  model_kwargs.pop("temperature", None)
124
126
  reasoning_effort = "high" if deepthought else "low"
@@ -126,7 +128,7 @@ def completion_with_backoff(
126
128
  if model_name.startswith("grok-4"):
127
129
  # Grok-4 models do not support reasoning_effort parameter
128
130
  model_kwargs.pop("reasoning_effort", None)
129
- elif model_name.startswith("deepseek-reasoner"):
131
+ elif model_name.startswith("deepseek-reasoner") or model_name.startswith("deepseek-chat"):
130
132
  stream_processor = in_stream_thought_processor
131
133
  # Two successive messages cannot be from the same role. Should merge any back-to-back messages from the same role.
132
134
  # The first message should always be a user message (except system message).
@@ -145,11 +147,8 @@ def completion_with_backoff(
145
147
  # See https://qwenlm.github.io/blog/qwen3/#advanced-usages
146
148
  if not deepthought:
147
149
  add_qwen_no_think_tag(formatted_messages)
148
- elif "gpt-oss" in model_name.lower():
149
- model_kwargs["temperature"] = 1
150
- reasoning_effort = "medium" if deepthought else "low"
151
- model_kwargs["reasoning_effort"] = reasoning_effort
152
- model_kwargs["top_p"] = 1.0
150
+ elif is_groq_api(api_base_url):
151
+ model_kwargs["service_tier"] = "auto"
153
152
 
154
153
  read_timeout = 300 if is_local_api(api_base_url) else 60
155
154
  if os.getenv("KHOJ_LLM_SEED"):
@@ -297,7 +296,7 @@ async def chat_completion_with_backoff(
297
296
 
298
297
  model_kwargs["top_p"] = model_kwargs.get("top_p", 0.95)
299
298
 
300
- formatted_messages = format_message_for_api(messages, api_base_url)
299
+ formatted_messages = format_message_for_api(messages, model_name, api_base_url)
301
300
 
302
301
  # Configure thinking for openai reasoning models
303
302
  if is_openai_reasoning_model(model_name, api_base_url):
@@ -307,26 +306,17 @@ async def chat_completion_with_backoff(
307
306
  # Remove unsupported params for reasoning models
308
307
  model_kwargs.pop("top_p", None)
309
308
  model_kwargs.pop("stop", None)
310
-
311
- # Get the first system message and add the string `Formatting re-enabled` to it.
312
- # See https://platform.openai.com/docs/guides/reasoning-best-practices
313
- if len(formatted_messages) > 0:
314
- system_messages = [
315
- (i, message) for i, message in enumerate(formatted_messages) if message["role"] == "system"
316
- ]
317
- if len(system_messages) > 0:
318
- first_system_message_index, first_system_message = system_messages[0]
319
- first_system_message_content = first_system_message["content"]
320
- formatted_messages[first_system_message_index]["content"] = (
321
- f"{first_system_message_content}\nFormatting re-enabled"
322
- )
323
309
  elif is_twitter_reasoning_model(model_name, api_base_url):
324
310
  reasoning_effort = "high" if deepthought else "low"
325
311
  # Grok-4 models do not support reasoning_effort parameter
326
312
  if not model_name.startswith("grok-4"):
327
313
  model_kwargs["reasoning_effort"] = reasoning_effort
328
- elif model_name.startswith("deepseek-reasoner") or "deepseek-r1" in model_name:
329
- # Official Deepseek reasoner model and some inference APIs like vLLM return structured thinking output.
314
+ elif (
315
+ model_name.startswith("deepseek-chat")
316
+ or model_name.startswith("deepseek-reasoner")
317
+ or "deepseek-r1" in model_name.lower()
318
+ ):
319
+ # Official Deepseek models and some inference APIs like vLLM return structured thinking output.
330
320
  # Others like DeepInfra return it in response stream.
331
321
  # Using the instream thought processor handles both cases, structured thoughts and in response thoughts.
332
322
  stream_processor = ain_stream_thought_processor
@@ -351,11 +341,8 @@ async def chat_completion_with_backoff(
351
341
  # See https://qwenlm.github.io/blog/qwen3/#advanced-usages
352
342
  if not deepthought:
353
343
  add_qwen_no_think_tag(formatted_messages)
354
- elif "gpt-oss" in model_name.lower():
355
- temperature = 1
356
- reasoning_effort = "medium" if deepthought else "low"
357
- model_kwargs["reasoning_effort"] = reasoning_effort
358
- model_kwargs["top_p"] = 1.0
344
+ elif is_groq_api(api_base_url):
345
+ model_kwargs["service_tier"] = "auto"
359
346
 
360
347
  read_timeout = 300 if is_local_api(api_base_url) else 60
361
348
  if os.getenv("KHOJ_LLM_SEED"):
@@ -461,7 +448,7 @@ def responses_completion_with_backoff(
461
448
  client = get_openai_client(openai_api_key, api_base_url)
462
449
  openai_clients[client_key] = client
463
450
 
464
- formatted_messages = format_message_for_api(messages, api_base_url)
451
+ formatted_messages = format_message_for_api(messages, model_name, api_base_url)
465
452
  # Move the first system message to Responses API instructions
466
453
  instructions: Optional[str] = None
467
454
  if formatted_messages and formatted_messages[0].get("role") == "system":
@@ -474,8 +461,10 @@ def responses_completion_with_backoff(
474
461
  if is_openai_reasoning_model(model_name, api_base_url):
475
462
  temperature = 1
476
463
  reasoning_effort = "medium" if deepthought else "low"
477
- model_kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
478
- model_kwargs["include"] = ["reasoning.encrypted_content"]
464
+ model_kwargs["reasoning"] = {"effort": reasoning_effort}
465
+ if is_openai_api(api_base_url):
466
+ model_kwargs["reasoning"]["summary"] = "auto"
467
+ model_kwargs["include"] = ["reasoning.encrypted_content"]
479
468
  # Remove unsupported params for reasoning models
480
469
  model_kwargs.pop("top_p", None)
481
470
  model_kwargs.pop("stop", None)
@@ -572,7 +561,7 @@ async def responses_chat_completion_with_backoff(
572
561
  client = get_openai_async_client(openai_api_key, api_base_url)
573
562
  openai_async_clients[client_key] = client
574
563
 
575
- formatted_messages = format_message_for_api(messages, api_base_url)
564
+ formatted_messages = format_message_for_api(messages, model_name, api_base_url)
576
565
  # Move the first system message to Responses API instructions
577
566
  instructions: Optional[str] = None
578
567
  if formatted_messages and formatted_messages[0].get("role") == "system":
@@ -585,7 +574,10 @@ async def responses_chat_completion_with_backoff(
585
574
  if is_openai_reasoning_model(model_name, api_base_url):
586
575
  temperature = 1
587
576
  reasoning_effort = "medium" if deepthought else "low"
588
- model_kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
577
+ model_kwargs["reasoning"] = {"effort": reasoning_effort}
578
+ if is_openai_api(api_base_url):
579
+ model_kwargs["reasoning"]["summary"] = "auto"
580
+ model_kwargs["include"] = ["reasoning.encrypted_content"]
589
581
  # Remove unsupported params for reasoning models
590
582
  model_kwargs.pop("top_p", None)
591
583
  model_kwargs.pop("stop", None)
@@ -718,7 +710,7 @@ def get_structured_output_support(model_name: str, api_base_url: str = None) ->
718
710
  return StructuredOutputSupport.TOOL
719
711
 
720
712
 
721
- def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -> List[dict]:
713
+ def format_message_for_api(raw_messages: List[ChatMessage], model_name: str, api_base_url: str) -> List[dict]:
722
714
  """
723
715
  Format messages to send to chat model served over OpenAI (compatible) API.
724
716
  """
@@ -728,7 +720,7 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
728
720
  # Handle tool call and tool result message types
729
721
  message_type = message.additional_kwargs.get("message_type")
730
722
  if message_type == "tool_call":
731
- if is_openai_api(api_base_url):
723
+ if supports_responses_api(model_name, api_base_url):
732
724
  for part in message.content:
733
725
  if "status" in part:
734
726
  part.pop("status") # Drop unsupported tool call status field
@@ -772,7 +764,7 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
772
764
  if not tool_call_id:
773
765
  logger.warning(f"Dropping tool result without valid tool_call_id: {part.get('name')}")
774
766
  continue
775
- if is_openai_api(api_base_url):
767
+ if supports_responses_api(model_name, api_base_url):
776
768
  formatted_messages.append(
777
769
  {
778
770
  "type": "function_call_output",
@@ -790,7 +782,7 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
790
782
  }
791
783
  )
792
784
  continue
793
- if isinstance(message.content, list) and not is_openai_api(api_base_url):
785
+ if isinstance(message.content, list) and not supports_responses_api(model_name, api_base_url):
794
786
  assistant_texts = []
795
787
  has_images = False
796
788
  for idx, part in enumerate(message.content):
@@ -803,7 +795,7 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
803
795
  if (
804
796
  part.get("type") == "text"
805
797
  and message.role == "assistant"
806
- and api_base_url.startswith("https://api.deepinfra.com/v1")
798
+ and (api_base_url.startswith("https://api.deepinfra.com/v1") or is_cerebras_api(api_base_url))
807
799
  ):
808
800
  assistant_texts += [part["text"]]
809
801
  message.content.pop(idx)
@@ -846,12 +838,21 @@ def is_openai_api(api_base_url: str = None) -> bool:
846
838
  return api_base_url is None or api_base_url.startswith("https://api.openai.com/v1")
847
839
 
848
840
 
841
+ def supports_responses_api(model_name: str, api_base_url: str = None) -> bool:
842
+ """
843
+ Check if the model, ai api supports the OpenAI Responses API
844
+ """
845
+ return is_openai_api(api_base_url)
846
+
847
+
849
848
  def is_openai_reasoning_model(model_name: str, api_base_url: str = None) -> bool:
850
849
  """
851
850
  Check if the model is an OpenAI reasoning model
852
851
  """
853
- return is_openai_api(api_base_url) and (
854
- model_name.lower().startswith("o") or model_name.lower().startswith("gpt-5")
852
+ return (
853
+ is_openai_api(api_base_url)
854
+ and (model_name.lower().startswith("o") or model_name.lower().startswith("gpt-5"))
855
+ or "gpt-oss" in model_name.lower()
855
856
  )
856
857
 
857
858
 
@@ -875,6 +876,20 @@ def is_twitter_reasoning_model(model_name: str, api_base_url: str = None) -> boo
875
876
  )
876
877
 
877
878
 
879
+ def is_cerebras_api(api_base_url: str = None) -> bool:
880
+ """
881
+ Check if the model is served over the Cerebras API
882
+ """
883
+ return api_base_url is not None and api_base_url.startswith("https://api.cerebras.ai/v1")
884
+
885
+
886
+ def is_groq_api(api_base_url: str = None) -> bool:
887
+ """
888
+ Check if the model is served over the Groq API
889
+ """
890
+ return api_base_url is not None and api_base_url.startswith("https://api.groq.com")
891
+
892
+
878
893
  def is_qwen_style_reasoning_model(model_name: str, api_base_url: str = None) -> bool:
879
894
  """
880
895
  Check if the model is a Qwen style reasoning model
@@ -1204,7 +1219,7 @@ def add_qwen_no_think_tag(formatted_messages: List[dict]) -> None:
1204
1219
  break
1205
1220
 
1206
1221
 
1207
- def to_openai_tools(tools: List[ToolDefinition], use_responses_api: bool) -> List[Dict] | None:
1222
+ def to_openai_tools(tools: List[ToolDefinition], use_responses_api: bool, strict: bool) -> List[Dict] | None:
1208
1223
  "Transform tool definitions from standard format to OpenAI format."
1209
1224
  if use_responses_api:
1210
1225
  openai_tools = [
@@ -1213,7 +1228,7 @@ def to_openai_tools(tools: List[ToolDefinition], use_responses_api: bool) -> Lis
1213
1228
  "name": tool.name,
1214
1229
  "description": tool.description,
1215
1230
  "parameters": clean_response_schema(tool.schema),
1216
- "strict": True,
1231
+ "strict": strict,
1217
1232
  }
1218
1233
  for tool in tools
1219
1234
  ]
@@ -1225,7 +1240,7 @@ def to_openai_tools(tools: List[ToolDefinition], use_responses_api: bool) -> Lis
1225
1240
  "name": tool.name,
1226
1241
  "description": tool.description,
1227
1242
  "parameters": clean_response_schema(tool.schema),
1228
- "strict": True,
1243
+ "strict": strict,
1229
1244
  },
1230
1245
  }
1231
1246
  for tool in tools