khoj 2.0.0b14.dev51__py3-none-any.whl → 2.0.0b15.dev23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. khoj/database/adapters/__init__.py +59 -20
  2. khoj/database/admin.py +4 -0
  3. khoj/database/migrations/0094_serverchatsettings_think_free_deep_and_more.py +61 -0
  4. khoj/database/models/__init__.py +18 -2
  5. khoj/interface/compiled/404/index.html +2 -2
  6. khoj/interface/compiled/_next/static/chunks/{9808-0ae18d938933fea3.js → 9808-bd5d7361ad026094.js} +1 -1
  7. khoj/interface/compiled/_next/static/chunks/{webpack-e572645654c4335e.js → webpack-820bd66958a5e279.js} +1 -1
  8. khoj/interface/compiled/_next/static/css/821d0d60b0b6871d.css +1 -0
  9. khoj/interface/compiled/_next/static/css/fb7ea16e60b40ecd.css +1 -0
  10. khoj/interface/compiled/agents/index.html +2 -2
  11. khoj/interface/compiled/agents/index.txt +2 -2
  12. khoj/interface/compiled/automations/index.html +2 -2
  13. khoj/interface/compiled/automations/index.txt +3 -3
  14. khoj/interface/compiled/chat/index.html +2 -2
  15. khoj/interface/compiled/chat/index.txt +3 -3
  16. khoj/interface/compiled/index.html +2 -2
  17. khoj/interface/compiled/index.txt +2 -2
  18. khoj/interface/compiled/search/index.html +2 -2
  19. khoj/interface/compiled/search/index.txt +2 -2
  20. khoj/interface/compiled/settings/index.html +2 -2
  21. khoj/interface/compiled/settings/index.txt +4 -4
  22. khoj/interface/compiled/share/chat/index.html +2 -2
  23. khoj/interface/compiled/share/chat/index.txt +2 -2
  24. khoj/processor/conversation/anthropic/anthropic_chat.py +4 -88
  25. khoj/processor/conversation/anthropic/utils.py +1 -2
  26. khoj/processor/conversation/google/gemini_chat.py +4 -88
  27. khoj/processor/conversation/google/utils.py +6 -3
  28. khoj/processor/conversation/openai/gpt.py +16 -93
  29. khoj/processor/conversation/openai/utils.py +38 -30
  30. khoj/processor/conversation/prompts.py +30 -39
  31. khoj/processor/conversation/utils.py +72 -84
  32. khoj/processor/image/generate.py +69 -15
  33. khoj/processor/tools/run_code.py +3 -2
  34. khoj/routers/api_chat.py +8 -21
  35. khoj/routers/helpers.py +243 -156
  36. khoj/routers/research.py +6 -6
  37. khoj/utils/helpers.py +6 -2
  38. {khoj-2.0.0b14.dev51.dist-info → khoj-2.0.0b15.dev23.dist-info}/METADATA +1 -1
  39. {khoj-2.0.0b14.dev51.dist-info → khoj-2.0.0b15.dev23.dist-info}/RECORD +51 -50
  40. khoj/interface/compiled/_next/static/css/2945c4a857922f3b.css +0 -1
  41. khoj/interface/compiled/_next/static/css/ecea704005ba630c.css +0 -1
  42. /khoj/interface/compiled/_next/static/{yBzbL9kxl5BudSA9F4Gr6 → PcD2gC0kChVzNip15DdDQ}/_buildManifest.js +0 -0
  43. /khoj/interface/compiled/_next/static/{yBzbL9kxl5BudSA9F4Gr6 → PcD2gC0kChVzNip15DdDQ}/_ssgManifest.js +0 -0
  44. /khoj/interface/compiled/_next/static/chunks/{1327-511bb0a862efce80.js → 1327-e254819a9172cfa7.js} +0 -0
  45. /khoj/interface/compiled/_next/static/chunks/{1915-fbfe167c84ad60c5.js → 1915-5c6508f6ebb62a30.js} +0 -0
  46. /khoj/interface/compiled/_next/static/chunks/{2117-e78b6902ad6f75ec.js → 2117-080746c8e170c81a.js} +0 -0
  47. /khoj/interface/compiled/_next/static/chunks/{2939-4d4084c5b888b960.js → 2939-4af3fd24b8ffc9ad.js} +0 -0
  48. /khoj/interface/compiled/_next/static/chunks/{4447-d6cf93724d57e34b.js → 4447-cd95608f8e93e711.js} +0 -0
  49. /khoj/interface/compiled/_next/static/chunks/{8667-4b7790573b08c50d.js → 8667-50b03a89e82e0ba7.js} +0 -0
  50. /khoj/interface/compiled/_next/static/chunks/{9139-ce1ae935dac9c871.js → 9139-8ac4d9feb10f8869.js} +0 -0
  51. {khoj-2.0.0b14.dev51.dist-info → khoj-2.0.0b15.dev23.dist-info}/WHEEL +0 -0
  52. {khoj-2.0.0b14.dev51.dist-info → khoj-2.0.0b15.dev23.dist-info}/entry_points.txt +0 -0
  53. {khoj-2.0.0b14.dev51.dist-info → khoj-2.0.0b15.dev23.dist-info}/licenses/LICENSE +0 -0
@@ -1,22 +1,16 @@
1
1
  import logging
2
- from datetime import datetime
3
- from typing import AsyncGenerator, Dict, List, Optional
2
+ from typing import AsyncGenerator, List, Optional
3
+
4
+ from langchain_core.messages.chat import ChatMessage
4
5
 
5
- from khoj.database.models import Agent, ChatMessageModel, ChatModel
6
- from khoj.processor.conversation import prompts
7
6
  from khoj.processor.conversation.google.utils import (
8
7
  gemini_chat_completion_with_backoff,
9
8
  gemini_completion_with_backoff,
10
9
  )
11
10
  from khoj.processor.conversation.utils import (
12
- OperatorRun,
13
11
  ResponseWithThought,
14
- generate_chatml_messages_with_context,
15
12
  messages_to_print,
16
13
  )
17
- from khoj.utils.helpers import is_none_or_empty, truncate_code_context
18
- from khoj.utils.rawconfig import FileAttachment, LocationData
19
- from khoj.utils.yaml import yaml_dump
20
14
 
21
15
  logger = logging.getLogger(__name__)
22
16
 
@@ -61,95 +55,18 @@ def gemini_send_message_to_model(
61
55
 
62
56
  async def converse_gemini(
63
57
  # Query
64
- user_query: str,
65
- # Context
66
- references: list[dict],
67
- online_results: Optional[Dict[str, Dict]] = None,
68
- code_results: Optional[Dict[str, Dict]] = None,
69
- operator_results: Optional[List[OperatorRun]] = None,
70
- query_images: Optional[list[str]] = None,
71
- query_files: str = None,
72
- generated_files: List[FileAttachment] = None,
73
- generated_asset_results: Dict[str, Dict] = {},
74
- program_execution_context: List[str] = None,
75
- location_data: LocationData = None,
76
- user_name: str = None,
77
- chat_history: List[ChatMessageModel] = [],
58
+ messages: List[ChatMessage],
78
59
  # Model
79
60
  model: Optional[str] = "gemini-2.5-flash",
80
61
  api_key: Optional[str] = None,
81
62
  api_base_url: Optional[str] = None,
82
63
  temperature: float = 1.0,
83
- max_prompt_size=None,
84
- tokenizer_name=None,
85
- agent: Agent = None,
86
- vision_available: bool = False,
87
64
  deepthought: Optional[bool] = False,
88
65
  tracer={},
89
66
  ) -> AsyncGenerator[ResponseWithThought, None]:
90
67
  """
91
68
  Converse with user using Google's Gemini
92
69
  """
93
- # Initialize Variables
94
- current_date = datetime.now()
95
-
96
- if agent and agent.personality:
97
- system_prompt = prompts.custom_personality.format(
98
- name=agent.name,
99
- bio=agent.personality,
100
- current_date=current_date.strftime("%Y-%m-%d"),
101
- day_of_week=current_date.strftime("%A"),
102
- )
103
- else:
104
- system_prompt = prompts.personality.format(
105
- current_date=current_date.strftime("%Y-%m-%d"),
106
- day_of_week=current_date.strftime("%A"),
107
- )
108
-
109
- system_prompt += f"{system_prompt}\n\n{prompts.gemini_verbose_language_personality}"
110
- if location_data:
111
- location_prompt = prompts.user_location.format(location=f"{location_data}")
112
- system_prompt = f"{system_prompt}\n{location_prompt}"
113
-
114
- if user_name:
115
- user_name_prompt = prompts.user_name.format(name=user_name)
116
- system_prompt = f"{system_prompt}\n{user_name_prompt}"
117
-
118
- context_message = ""
119
- if not is_none_or_empty(references):
120
- context_message = f"{prompts.notes_conversation.format(query=user_query, references=yaml_dump(references))}\n\n"
121
- if not is_none_or_empty(online_results):
122
- context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
123
- if not is_none_or_empty(code_results):
124
- context_message += (
125
- f"{prompts.code_executed_context.format(code_results=truncate_code_context(code_results))}\n\n"
126
- )
127
- if not is_none_or_empty(operator_results):
128
- operator_content = [
129
- {"query": oc.query, "response": oc.response, "webpages": oc.webpages} for oc in operator_results
130
- ]
131
- context_message += (
132
- f"{prompts.operator_execution_context.format(operator_results=yaml_dump(operator_content))}\n\n"
133
- )
134
- context_message = context_message.strip()
135
-
136
- # Setup Prompt with Primer or Conversation History
137
- messages = generate_chatml_messages_with_context(
138
- user_query,
139
- context_message=context_message,
140
- chat_history=chat_history,
141
- model_name=model,
142
- max_prompt_size=max_prompt_size,
143
- tokenizer_name=tokenizer_name,
144
- query_images=query_images,
145
- vision_enabled=vision_available,
146
- model_type=ChatModel.ModelType.GOOGLE,
147
- query_files=query_files,
148
- generated_files=generated_files,
149
- generated_asset_results=generated_asset_results,
150
- program_execution_context=program_execution_context,
151
- )
152
-
153
70
  logger.debug(f"Conversation Context for Gemini: {messages_to_print(messages)}")
154
71
 
155
72
  # Get Response from Google AI
@@ -159,7 +76,6 @@ async def converse_gemini(
159
76
  temperature=temperature,
160
77
  api_key=api_key,
161
78
  api_base_url=api_base_url,
162
- system_prompt=system_prompt,
163
79
  deepthought=deepthought,
164
80
  tracer=tracer,
165
81
  ):
@@ -308,7 +308,7 @@ async def gemini_chat_completion_with_backoff(
308
308
  temperature: float,
309
309
  api_key: str,
310
310
  api_base_url: str,
311
- system_prompt: str,
311
+ system_prompt: str = "",
312
312
  model_kwargs=None,
313
313
  deepthought=False,
314
314
  tracer: dict = {},
@@ -472,9 +472,12 @@ def format_messages_for_gemini(
472
472
  for message in messages.copy():
473
473
  if message.role == "system":
474
474
  if isinstance(message.content, list):
475
- system_prompt += "\n".join([part["text"] for part in message.content if part["type"] == "text"])
475
+ system_prompt += "\n\n" + "\n".join(
476
+ [part["text"] for part in message.content if part["type"] == "text"]
477
+ )
476
478
  else:
477
- system_prompt += message.content
479
+ system_prompt += "\n\n" + message.content
480
+ system_prompt = system_prompt.strip()
478
481
  messages.remove(message)
479
482
  system_prompt = None if is_none_or_empty(system_prompt) else system_prompt
480
483
 
@@ -1,29 +1,25 @@
1
1
  import logging
2
- from datetime import datetime
3
2
  from typing import Any, AsyncGenerator, Dict, List, Optional
4
3
 
5
- from khoj.database.models import Agent, ChatMessageModel, ChatModel
6
- from khoj.processor.conversation import prompts
4
+ from langchain_core.messages.chat import ChatMessage
5
+
7
6
  from khoj.processor.conversation.openai.utils import (
8
7
  chat_completion_with_backoff,
9
8
  clean_response_schema,
10
9
  completion_with_backoff,
11
10
  get_structured_output_support,
12
- is_openai_api,
11
+ is_cerebras_api,
13
12
  responses_chat_completion_with_backoff,
14
13
  responses_completion_with_backoff,
14
+ supports_responses_api,
15
15
  to_openai_tools,
16
16
  )
17
17
  from khoj.processor.conversation.utils import (
18
- OperatorRun,
19
18
  ResponseWithThought,
20
19
  StructuredOutputSupport,
21
- generate_chatml_messages_with_context,
22
20
  messages_to_print,
23
21
  )
24
- from khoj.utils.helpers import ToolDefinition, is_none_or_empty, truncate_code_context
25
- from khoj.utils.rawconfig import FileAttachment, LocationData
26
- from khoj.utils.yaml import yaml_dump
22
+ from khoj.utils.helpers import ToolDefinition
27
23
 
28
24
  logger = logging.getLogger(__name__)
29
25
 
@@ -45,16 +41,19 @@ def send_message_to_model(
45
41
 
46
42
  model_kwargs: Dict[str, Any] = {}
47
43
  json_support = get_structured_output_support(model, api_base_url)
44
+ strict = not is_cerebras_api(api_base_url)
48
45
  if tools and json_support == StructuredOutputSupport.TOOL:
49
- model_kwargs["tools"] = to_openai_tools(tools, use_responses_api=is_openai_api(api_base_url))
46
+ model_kwargs["tools"] = to_openai_tools(
47
+ tools, use_responses_api=supports_responses_api(model, api_base_url), strict=strict
48
+ )
50
49
  elif response_schema and json_support >= StructuredOutputSupport.SCHEMA:
51
50
  # Drop unsupported fields from schema passed to OpenAI APi
52
51
  cleaned_response_schema = clean_response_schema(response_schema)
53
- if is_openai_api(api_base_url):
52
+ if supports_responses_api(model, api_base_url):
54
53
  model_kwargs["text"] = {
55
54
  "format": {
56
55
  "type": "json_schema",
57
- "strict": True,
56
+ "strict": strict,
58
57
  "name": response_schema.__name__,
59
58
  "schema": cleaned_response_schema,
60
59
  }
@@ -65,14 +64,14 @@ def send_message_to_model(
65
64
  "json_schema": {
66
65
  "schema": cleaned_response_schema,
67
66
  "name": response_schema.__name__,
68
- "strict": True,
67
+ "strict": strict,
69
68
  },
70
69
  }
71
70
  elif response_type == "json_object" and json_support == StructuredOutputSupport.OBJECT:
72
71
  model_kwargs["response_format"] = {"type": response_type}
73
72
 
74
73
  # Get Response from GPT
75
- if is_openai_api(api_base_url):
74
+ if supports_responses_api(model, api_base_url):
76
75
  return responses_completion_with_backoff(
77
76
  messages=messages,
78
77
  model_name=model,
@@ -96,98 +95,22 @@ def send_message_to_model(
96
95
 
97
96
  async def converse_openai(
98
97
  # Query
99
- user_query: str,
100
- # Context
101
- references: list[dict],
102
- online_results: Optional[Dict[str, Dict]] = None,
103
- code_results: Optional[Dict[str, Dict]] = None,
104
- operator_results: Optional[List[OperatorRun]] = None,
105
- query_images: Optional[list[str]] = None,
106
- query_files: str = None,
107
- generated_files: List[FileAttachment] = None,
108
- generated_asset_results: Dict[str, Dict] = {},
109
- program_execution_context: List[str] = None,
110
- location_data: LocationData = None,
111
- chat_history: list[ChatMessageModel] = [],
98
+ messages: List[ChatMessage],
99
+ # Model
112
100
  model: str = "gpt-4.1-mini",
113
101
  api_key: Optional[str] = None,
114
102
  api_base_url: Optional[str] = None,
115
103
  temperature: float = 0.6,
116
- max_prompt_size=None,
117
- tokenizer_name=None,
118
- user_name: str = None,
119
- agent: Agent = None,
120
- vision_available: bool = False,
121
104
  deepthought: Optional[bool] = False,
122
105
  tracer: dict = {},
123
106
  ) -> AsyncGenerator[ResponseWithThought, None]:
124
107
  """
125
108
  Converse with user using OpenAI's ChatGPT
126
109
  """
127
- # Initialize Variables
128
- current_date = datetime.now()
129
-
130
- if agent and agent.personality:
131
- system_prompt = prompts.custom_personality.format(
132
- name=agent.name,
133
- bio=agent.personality,
134
- current_date=current_date.strftime("%Y-%m-%d"),
135
- day_of_week=current_date.strftime("%A"),
136
- )
137
- else:
138
- system_prompt = prompts.personality.format(
139
- current_date=current_date.strftime("%Y-%m-%d"),
140
- day_of_week=current_date.strftime("%A"),
141
- )
142
-
143
- if location_data:
144
- location_prompt = prompts.user_location.format(location=f"{location_data}")
145
- system_prompt = f"{system_prompt}\n{location_prompt}"
146
-
147
- if user_name:
148
- user_name_prompt = prompts.user_name.format(name=user_name)
149
- system_prompt = f"{system_prompt}\n{user_name_prompt}"
150
-
151
- context_message = ""
152
- if not is_none_or_empty(references):
153
- context_message = f"{prompts.notes_conversation.format(references=yaml_dump(references))}\n\n"
154
- if not is_none_or_empty(online_results):
155
- context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
156
- if not is_none_or_empty(code_results):
157
- context_message += (
158
- f"{prompts.code_executed_context.format(code_results=truncate_code_context(code_results))}\n\n"
159
- )
160
- if not is_none_or_empty(operator_results):
161
- operator_content = [
162
- {"query": oc.query, "response": oc.response, "webpages": oc.webpages} for oc in operator_results
163
- ]
164
- context_message += (
165
- f"{prompts.operator_execution_context.format(operator_results=yaml_dump(operator_content))}\n\n"
166
- )
167
-
168
- context_message = context_message.strip()
169
-
170
- # Setup Prompt with Primer or Conversation History
171
- messages = generate_chatml_messages_with_context(
172
- user_query,
173
- system_prompt,
174
- chat_history,
175
- context_message=context_message,
176
- model_name=model,
177
- max_prompt_size=max_prompt_size,
178
- tokenizer_name=tokenizer_name,
179
- query_images=query_images,
180
- vision_enabled=vision_available,
181
- model_type=ChatModel.ModelType.OPENAI,
182
- query_files=query_files,
183
- generated_files=generated_files,
184
- generated_asset_results=generated_asset_results,
185
- program_execution_context=program_execution_context,
186
- )
187
110
  logger.debug(f"Conversation Context for GPT: {messages_to_print(messages)}")
188
111
 
189
112
  # Get Response from GPT
190
- if is_openai_api(api_base_url):
113
+ if supports_responses_api(model, api_base_url):
191
114
  async for chunk in responses_chat_completion_with_backoff(
192
115
  messages=messages,
193
116
  model_name=model,
@@ -111,14 +111,16 @@ def completion_with_backoff(
111
111
  model_kwargs["temperature"] = temperature
112
112
  model_kwargs["top_p"] = model_kwargs.get("top_p", 0.95)
113
113
 
114
- formatted_messages = format_message_for_api(messages, api_base_url)
114
+ formatted_messages = format_message_for_api(messages, model_name, api_base_url)
115
115
 
116
116
  # Tune reasoning models arguments
117
117
  if is_openai_reasoning_model(model_name, api_base_url):
118
118
  model_kwargs["temperature"] = 1
119
119
  reasoning_effort = "medium" if deepthought else "low"
120
120
  model_kwargs["reasoning_effort"] = reasoning_effort
121
+ # Remove unsupported params for reasoning models
121
122
  model_kwargs.pop("top_p", None)
123
+ model_kwargs.pop("stop", None)
122
124
  elif is_twitter_reasoning_model(model_name, api_base_url):
123
125
  model_kwargs.pop("temperature", None)
124
126
  reasoning_effort = "high" if deepthought else "low"
@@ -294,7 +296,7 @@ async def chat_completion_with_backoff(
294
296
 
295
297
  model_kwargs["top_p"] = model_kwargs.get("top_p", 0.95)
296
298
 
297
- formatted_messages = format_message_for_api(messages, api_base_url)
299
+ formatted_messages = format_message_for_api(messages, model_name, api_base_url)
298
300
 
299
301
  # Configure thinking for openai reasoning models
300
302
  if is_openai_reasoning_model(model_name, api_base_url):
@@ -304,19 +306,6 @@ async def chat_completion_with_backoff(
304
306
  # Remove unsupported params for reasoning models
305
307
  model_kwargs.pop("top_p", None)
306
308
  model_kwargs.pop("stop", None)
307
-
308
- # Get the first system message and add the string `Formatting re-enabled` to it.
309
- # See https://platform.openai.com/docs/guides/reasoning-best-practices
310
- if len(formatted_messages) > 0:
311
- system_messages = [
312
- (i, message) for i, message in enumerate(formatted_messages) if message["role"] == "system"
313
- ]
314
- if len(system_messages) > 0:
315
- first_system_message_index, first_system_message = system_messages[0]
316
- first_system_message_content = first_system_message["content"]
317
- formatted_messages[first_system_message_index]["content"] = (
318
- f"{first_system_message_content}\nFormatting re-enabled"
319
- )
320
309
  elif is_twitter_reasoning_model(model_name, api_base_url):
321
310
  reasoning_effort = "high" if deepthought else "low"
322
311
  # Grok-4 models do not support reasoning_effort parameter
@@ -325,7 +314,7 @@ async def chat_completion_with_backoff(
325
314
  elif (
326
315
  model_name.startswith("deepseek-chat")
327
316
  or model_name.startswith("deepseek-reasoner")
328
- or "deepseek-r1" in model_name
317
+ or "deepseek-r1" in model_name.lower()
329
318
  ):
330
319
  # Official Deepseek models and some inference APIs like vLLM return structured thinking output.
331
320
  # Others like DeepInfra return it in response stream.
@@ -459,7 +448,7 @@ def responses_completion_with_backoff(
459
448
  client = get_openai_client(openai_api_key, api_base_url)
460
449
  openai_clients[client_key] = client
461
450
 
462
- formatted_messages = format_message_for_api(messages, api_base_url)
451
+ formatted_messages = format_message_for_api(messages, model_name, api_base_url)
463
452
  # Move the first system message to Responses API instructions
464
453
  instructions: Optional[str] = None
465
454
  if formatted_messages and formatted_messages[0].get("role") == "system":
@@ -472,8 +461,10 @@ def responses_completion_with_backoff(
472
461
  if is_openai_reasoning_model(model_name, api_base_url):
473
462
  temperature = 1
474
463
  reasoning_effort = "medium" if deepthought else "low"
475
- model_kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
476
- model_kwargs["include"] = ["reasoning.encrypted_content"]
464
+ model_kwargs["reasoning"] = {"effort": reasoning_effort}
465
+ if is_openai_api(api_base_url):
466
+ model_kwargs["reasoning"]["summary"] = "auto"
467
+ model_kwargs["include"] = ["reasoning.encrypted_content"]
477
468
  # Remove unsupported params for reasoning models
478
469
  model_kwargs.pop("top_p", None)
479
470
  model_kwargs.pop("stop", None)
@@ -570,7 +561,7 @@ async def responses_chat_completion_with_backoff(
570
561
  client = get_openai_async_client(openai_api_key, api_base_url)
571
562
  openai_async_clients[client_key] = client
572
563
 
573
- formatted_messages = format_message_for_api(messages, api_base_url)
564
+ formatted_messages = format_message_for_api(messages, model_name, api_base_url)
574
565
  # Move the first system message to Responses API instructions
575
566
  instructions: Optional[str] = None
576
567
  if formatted_messages and formatted_messages[0].get("role") == "system":
@@ -583,7 +574,10 @@ async def responses_chat_completion_with_backoff(
583
574
  if is_openai_reasoning_model(model_name, api_base_url):
584
575
  temperature = 1
585
576
  reasoning_effort = "medium" if deepthought else "low"
586
- model_kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
577
+ model_kwargs["reasoning"] = {"effort": reasoning_effort}
578
+ if is_openai_api(api_base_url):
579
+ model_kwargs["reasoning"]["summary"] = "auto"
580
+ model_kwargs["include"] = ["reasoning.encrypted_content"]
587
581
  # Remove unsupported params for reasoning models
588
582
  model_kwargs.pop("top_p", None)
589
583
  model_kwargs.pop("stop", None)
@@ -716,7 +710,7 @@ def get_structured_output_support(model_name: str, api_base_url: str = None) ->
716
710
  return StructuredOutputSupport.TOOL
717
711
 
718
712
 
719
- def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -> List[dict]:
713
+ def format_message_for_api(raw_messages: List[ChatMessage], model_name: str, api_base_url: str) -> List[dict]:
720
714
  """
721
715
  Format messages to send to chat model served over OpenAI (compatible) API.
722
716
  """
@@ -726,7 +720,7 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
726
720
  # Handle tool call and tool result message types
727
721
  message_type = message.additional_kwargs.get("message_type")
728
722
  if message_type == "tool_call":
729
- if is_openai_api(api_base_url):
723
+ if supports_responses_api(model_name, api_base_url):
730
724
  for part in message.content:
731
725
  if "status" in part:
732
726
  part.pop("status") # Drop unsupported tool call status field
@@ -770,7 +764,7 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
770
764
  if not tool_call_id:
771
765
  logger.warning(f"Dropping tool result without valid tool_call_id: {part.get('name')}")
772
766
  continue
773
- if is_openai_api(api_base_url):
767
+ if supports_responses_api(model_name, api_base_url):
774
768
  formatted_messages.append(
775
769
  {
776
770
  "type": "function_call_output",
@@ -788,7 +782,7 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
788
782
  }
789
783
  )
790
784
  continue
791
- if isinstance(message.content, list) and not is_openai_api(api_base_url):
785
+ if isinstance(message.content, list) and not supports_responses_api(model_name, api_base_url):
792
786
  assistant_texts = []
793
787
  has_images = False
794
788
  for idx, part in enumerate(message.content):
@@ -801,7 +795,7 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
801
795
  if (
802
796
  part.get("type") == "text"
803
797
  and message.role == "assistant"
804
- and api_base_url.startswith("https://api.deepinfra.com/v1")
798
+ and (api_base_url.startswith("https://api.deepinfra.com/v1") or is_cerebras_api(api_base_url))
805
799
  ):
806
800
  assistant_texts += [part["text"]]
807
801
  message.content.pop(idx)
@@ -844,6 +838,13 @@ def is_openai_api(api_base_url: str = None) -> bool:
844
838
  return api_base_url is None or api_base_url.startswith("https://api.openai.com/v1")
845
839
 
846
840
 
841
+ def supports_responses_api(model_name: str, api_base_url: str = None) -> bool:
842
+ """
843
+ Check if the model, ai api supports the OpenAI Responses API
844
+ """
845
+ return is_openai_api(api_base_url)
846
+
847
+
847
848
  def is_openai_reasoning_model(model_name: str, api_base_url: str = None) -> bool:
848
849
  """
849
850
  Check if the model is an OpenAI reasoning model
@@ -851,7 +852,7 @@ def is_openai_reasoning_model(model_name: str, api_base_url: str = None) -> bool
851
852
  return (
852
853
  is_openai_api(api_base_url)
853
854
  and (model_name.lower().startswith("o") or model_name.lower().startswith("gpt-5"))
854
- or model_name.lower().startswith("gpt-oss")
855
+ or "gpt-oss" in model_name.lower()
855
856
  )
856
857
 
857
858
 
@@ -875,6 +876,13 @@ def is_twitter_reasoning_model(model_name: str, api_base_url: str = None) -> boo
875
876
  )
876
877
 
877
878
 
879
+ def is_cerebras_api(api_base_url: str = None) -> bool:
880
+ """
881
+ Check if the model is served over the Cerebras API
882
+ """
883
+ return api_base_url is not None and api_base_url.startswith("https://api.cerebras.ai/v1")
884
+
885
+
878
886
  def is_groq_api(api_base_url: str = None) -> bool:
879
887
  """
880
888
  Check if the model is served over the Groq API
@@ -1211,7 +1219,7 @@ def add_qwen_no_think_tag(formatted_messages: List[dict]) -> None:
1211
1219
  break
1212
1220
 
1213
1221
 
1214
- def to_openai_tools(tools: List[ToolDefinition], use_responses_api: bool) -> List[Dict] | None:
1222
+ def to_openai_tools(tools: List[ToolDefinition], use_responses_api: bool, strict: bool) -> List[Dict] | None:
1215
1223
  "Transform tool definitions from standard format to OpenAI format."
1216
1224
  if use_responses_api:
1217
1225
  openai_tools = [
@@ -1220,7 +1228,7 @@ def to_openai_tools(tools: List[ToolDefinition], use_responses_api: bool) -> Lis
1220
1228
  "name": tool.name,
1221
1229
  "description": tool.description,
1222
1230
  "parameters": clean_response_schema(tool.schema),
1223
- "strict": True,
1231
+ "strict": strict,
1224
1232
  }
1225
1233
  for tool in tools
1226
1234
  ]
@@ -1232,7 +1240,7 @@ def to_openai_tools(tools: List[ToolDefinition], use_responses_api: bool) -> Lis
1232
1240
  "name": tool.name,
1233
1241
  "description": tool.description,
1234
1242
  "parameters": clean_response_schema(tool.schema),
1235
- "strict": True,
1243
+ "strict": strict,
1236
1244
  },
1237
1245
  }
1238
1246
  for tool in tools
@@ -18,12 +18,11 @@ Today is {day_of_week}, {current_date} in UTC.
18
18
 
19
19
  # Style
20
20
  - Your responses should be helpful, conversational and tuned to the user's communication style.
21
- - Make sure to use the specific LaTeX math mode delimiters for your response. LaTex math mode specific delimiters as following
22
- - inline math mode : \\( and \\)
23
- - display math mode: insert linebreak after opening $$, \\[ and before closing $$, \\]
24
21
  - Provide inline citations to documents and websites referenced. Add them inline in markdown format to directly support your claim.
25
22
  For example: "The weather today is sunny [1](https://weather.com)."
26
- - Mention generated assets like images by reference, e.g ![chart](/visualization/image.png). Do not manually output raw, b64 encoded bytes in your response.
23
+ - KaTeX is used to render LaTeX expressions. Make sure you only use the KaTeX math mode delimiters specified below:
24
+ - inline math mode : \\( and \\)
25
+ - display math mode: insert linebreak after opening $$, \\[ and before closing $$, \\]
27
26
  - Do not respond with raw programs or scripts in your final response unless you know the user is a programmer or has explicitly requested code.
28
27
  """.strip()
29
28
  )
@@ -41,12 +40,11 @@ Today is {day_of_week}, {current_date} in UTC.
41
40
  - Users can share files and other information with you using the Khoj Web, Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window.
42
41
 
43
42
  # Style
44
- - Make sure to use the specific LaTeX math mode delimiters for your response. LaTex math mode specific delimiters as following
45
- - inline math mode : `\\(` and `\\)`
46
- - display math mode: insert linebreak after opening `$$`, `\\[` and before closing `$$`, `\\]`
47
43
  - Provide inline citations to documents and websites referenced. Add them inline in markdown format to directly support your claim.
48
44
  For example: "The weather today is sunny [1](https://weather.com)."
49
- - Mention generated assets like images by reference, e.g ![chart](/visualization/image.png). Do not manually output raw, b64 encoded bytes in your response.
45
+ - KaTeX is used to render LaTeX expressions. Make sure you only use the KaTeX math mode delimiters specified below:
46
+ - inline math mode : \\( and \\)
47
+ - display math mode: insert linebreak after opening $$, \\[ and before closing $$, \\]
50
48
 
51
49
  # Instructions:\n{bio}
52
50
  """.strip()
@@ -115,45 +113,38 @@ User's Notes:
115
113
  ## Image Generation
116
114
  ## --
117
115
 
118
- image_generation_improve_prompt_base = """
116
+ enhance_image_system_message = PromptTemplate.from_template(
117
+ """
119
118
  You are a talented media artist with the ability to describe images to compose in professional, fine detail.
119
+ Your image description will be transformed into an image by an AI model on your team.
120
120
  {personality_context}
121
- Generate a vivid description of the image to be rendered using the provided context and user prompt below:
122
-
123
- Today's Date: {current_date}
124
- User's Location: {location}
125
-
126
- User's Notes:
127
- {references}
128
-
129
- Online References:
130
- {online_results}
131
121
 
132
- Conversation Log:
133
- {chat_history}
134
-
135
- User Prompt: "{query}"
136
-
137
- Now generate an professional description of the image to generate in vivid, fine detail.
138
- - Use today's date, user's location, user's notes and online references to weave in any context that will improve the image generation.
139
- - Retain any important information and follow any instructions in the conversation log or user prompt.
122
+ # Instructions
123
+ - Retain important information and follow instructions by the user when composing the image description.
124
+ - Weave in the context provided below if it will enhance the image.
125
+ - Specify desired elements, lighting, mood, and composition in the description.
126
+ - Decide the shape best suited to render the image. It can be one of square, portrait or landscape.
140
127
  - Add specific, fine position details. Mention painting style, camera parameters to compose the image.
141
- - Ensure your improved prompt is in prose format."""
128
+ - Transform any negations in user instructions into positive alternatives.
129
+ Instead of saying what should NOT be in the image, describe what SHOULD be there instead.
130
+ Examples:
131
+ - "no sun" → "overcast cloudy sky"
132
+ - "don't include people" → "empty landscape" or "solitary scene"
133
+ - Ensure your image description is in prose format (e.g no lists, links).
134
+ - If any text is to be rendered in the image put it within double quotes in your image description.
142
135
 
143
- image_generation_improve_prompt_dalle = PromptTemplate.from_template(
144
- f"""
145
- {image_generation_improve_prompt_base}
136
+ # Context
146
137
 
147
- Improved Prompt:
148
- """.strip()
149
- )
138
+ ## User Location: {location}
150
139
 
151
- image_generation_improve_prompt_sd = PromptTemplate.from_template(
152
- f"""
153
- {image_generation_improve_prompt_base}
154
- - If any text is to be rendered in the image put it within double quotes in your improved prompt.
140
+ ## User Documents
141
+ {references}
142
+
143
+ ## Online References
144
+ {online_results}
155
145
 
156
- Improved Prompt:
146
+ Now generate a vivid description of the image and image shape to be rendered.
147
+ Your response should be a JSON object with 'description' and 'shape' fields specified.
157
148
  """.strip()
158
149
  )
159
150