khoj 2.0.0b14.dev43__py3-none-any.whl → 2.0.0b15.dev22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/adapters/__init__.py +59 -20
- khoj/database/admin.py +6 -2
- khoj/database/migrations/0094_serverchatsettings_think_free_deep_and_more.py +61 -0
- khoj/database/models/__init__.py +18 -2
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/{9808-c0742b05e1ef29ba.js → 9808-bd5d7361ad026094.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-ac7ed0a1aff1b145.js +1 -0
- khoj/interface/compiled/_next/static/css/fb7ea16e60b40ecd.css +1 -0
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +1 -1
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +1 -1
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +1 -1
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +1 -1
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +1 -1
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/anthropic_chat.py +4 -88
- khoj/processor/conversation/anthropic/utils.py +1 -2
- khoj/processor/conversation/google/gemini_chat.py +5 -89
- khoj/processor/conversation/google/utils.py +8 -9
- khoj/processor/conversation/openai/gpt.py +16 -93
- khoj/processor/conversation/openai/utils.py +58 -43
- khoj/processor/conversation/prompts.py +30 -39
- khoj/processor/conversation/utils.py +71 -84
- khoj/processor/image/generate.py +69 -15
- khoj/processor/tools/run_code.py +3 -2
- khoj/routers/api_chat.py +8 -21
- khoj/routers/helpers.py +243 -156
- khoj/routers/research.py +6 -6
- khoj/utils/constants.py +3 -1
- khoj/utils/helpers.py +6 -2
- {khoj-2.0.0b14.dev43.dist-info → khoj-2.0.0b15.dev22.dist-info}/METADATA +1 -1
- {khoj-2.0.0b14.dev43.dist-info → khoj-2.0.0b15.dev22.dist-info}/RECORD +44 -43
- khoj/interface/compiled/_next/static/chunks/app/chat/page-1b4893b1a9957220.js +0 -1
- khoj/interface/compiled/_next/static/css/cea3bdfe98c144bd.css +0 -1
- /khoj/interface/compiled/_next/static/{OKbGpkzD6gHDfr1vAog6p → t8O_8CJ9p3UtV9kEsAAWT}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{OKbGpkzD6gHDfr1vAog6p → t8O_8CJ9p3UtV9kEsAAWT}/_ssgManifest.js +0 -0
- {khoj-2.0.0b14.dev43.dist-info → khoj-2.0.0b15.dev22.dist-info}/WHEEL +0 -0
- {khoj-2.0.0b14.dev43.dist-info → khoj-2.0.0b15.dev22.dist-info}/entry_points.txt +0 -0
- {khoj-2.0.0b14.dev43.dist-info → khoj-2.0.0b15.dev22.dist-info}/licenses/LICENSE +0 -0
@@ -1,22 +1,16 @@
|
|
1
1
|
import logging
|
2
|
-
from
|
3
|
-
|
2
|
+
from typing import AsyncGenerator, List, Optional
|
3
|
+
|
4
|
+
from langchain_core.messages.chat import ChatMessage
|
4
5
|
|
5
|
-
from khoj.database.models import Agent, ChatMessageModel, ChatModel
|
6
|
-
from khoj.processor.conversation import prompts
|
7
6
|
from khoj.processor.conversation.google.utils import (
|
8
7
|
gemini_chat_completion_with_backoff,
|
9
8
|
gemini_completion_with_backoff,
|
10
9
|
)
|
11
10
|
from khoj.processor.conversation.utils import (
|
12
|
-
OperatorRun,
|
13
11
|
ResponseWithThought,
|
14
|
-
generate_chatml_messages_with_context,
|
15
12
|
messages_to_print,
|
16
13
|
)
|
17
|
-
from khoj.utils.helpers import is_none_or_empty, truncate_code_context
|
18
|
-
from khoj.utils.rawconfig import FileAttachment, LocationData
|
19
|
-
from khoj.utils.yaml import yaml_dump
|
20
14
|
|
21
15
|
logger = logging.getLogger(__name__)
|
22
16
|
|
@@ -61,95 +55,18 @@ def gemini_send_message_to_model(
|
|
61
55
|
|
62
56
|
async def converse_gemini(
|
63
57
|
# Query
|
64
|
-
|
65
|
-
# Context
|
66
|
-
references: list[dict],
|
67
|
-
online_results: Optional[Dict[str, Dict]] = None,
|
68
|
-
code_results: Optional[Dict[str, Dict]] = None,
|
69
|
-
operator_results: Optional[List[OperatorRun]] = None,
|
70
|
-
query_images: Optional[list[str]] = None,
|
71
|
-
query_files: str = None,
|
72
|
-
generated_files: List[FileAttachment] = None,
|
73
|
-
generated_asset_results: Dict[str, Dict] = {},
|
74
|
-
program_execution_context: List[str] = None,
|
75
|
-
location_data: LocationData = None,
|
76
|
-
user_name: str = None,
|
77
|
-
chat_history: List[ChatMessageModel] = [],
|
58
|
+
messages: List[ChatMessage],
|
78
59
|
# Model
|
79
|
-
model: Optional[str] = "gemini-2.
|
60
|
+
model: Optional[str] = "gemini-2.5-flash",
|
80
61
|
api_key: Optional[str] = None,
|
81
62
|
api_base_url: Optional[str] = None,
|
82
63
|
temperature: float = 1.0,
|
83
|
-
max_prompt_size=None,
|
84
|
-
tokenizer_name=None,
|
85
|
-
agent: Agent = None,
|
86
|
-
vision_available: bool = False,
|
87
64
|
deepthought: Optional[bool] = False,
|
88
65
|
tracer={},
|
89
66
|
) -> AsyncGenerator[ResponseWithThought, None]:
|
90
67
|
"""
|
91
68
|
Converse with user using Google's Gemini
|
92
69
|
"""
|
93
|
-
# Initialize Variables
|
94
|
-
current_date = datetime.now()
|
95
|
-
|
96
|
-
if agent and agent.personality:
|
97
|
-
system_prompt = prompts.custom_personality.format(
|
98
|
-
name=agent.name,
|
99
|
-
bio=agent.personality,
|
100
|
-
current_date=current_date.strftime("%Y-%m-%d"),
|
101
|
-
day_of_week=current_date.strftime("%A"),
|
102
|
-
)
|
103
|
-
else:
|
104
|
-
system_prompt = prompts.personality.format(
|
105
|
-
current_date=current_date.strftime("%Y-%m-%d"),
|
106
|
-
day_of_week=current_date.strftime("%A"),
|
107
|
-
)
|
108
|
-
|
109
|
-
system_prompt += f"{system_prompt}\n\n{prompts.gemini_verbose_language_personality}"
|
110
|
-
if location_data:
|
111
|
-
location_prompt = prompts.user_location.format(location=f"{location_data}")
|
112
|
-
system_prompt = f"{system_prompt}\n{location_prompt}"
|
113
|
-
|
114
|
-
if user_name:
|
115
|
-
user_name_prompt = prompts.user_name.format(name=user_name)
|
116
|
-
system_prompt = f"{system_prompt}\n{user_name_prompt}"
|
117
|
-
|
118
|
-
context_message = ""
|
119
|
-
if not is_none_or_empty(references):
|
120
|
-
context_message = f"{prompts.notes_conversation.format(query=user_query, references=yaml_dump(references))}\n\n"
|
121
|
-
if not is_none_or_empty(online_results):
|
122
|
-
context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
|
123
|
-
if not is_none_or_empty(code_results):
|
124
|
-
context_message += (
|
125
|
-
f"{prompts.code_executed_context.format(code_results=truncate_code_context(code_results))}\n\n"
|
126
|
-
)
|
127
|
-
if not is_none_or_empty(operator_results):
|
128
|
-
operator_content = [
|
129
|
-
{"query": oc.query, "response": oc.response, "webpages": oc.webpages} for oc in operator_results
|
130
|
-
]
|
131
|
-
context_message += (
|
132
|
-
f"{prompts.operator_execution_context.format(operator_results=yaml_dump(operator_content))}\n\n"
|
133
|
-
)
|
134
|
-
context_message = context_message.strip()
|
135
|
-
|
136
|
-
# Setup Prompt with Primer or Conversation History
|
137
|
-
messages = generate_chatml_messages_with_context(
|
138
|
-
user_query,
|
139
|
-
context_message=context_message,
|
140
|
-
chat_history=chat_history,
|
141
|
-
model_name=model,
|
142
|
-
max_prompt_size=max_prompt_size,
|
143
|
-
tokenizer_name=tokenizer_name,
|
144
|
-
query_images=query_images,
|
145
|
-
vision_enabled=vision_available,
|
146
|
-
model_type=ChatModel.ModelType.GOOGLE,
|
147
|
-
query_files=query_files,
|
148
|
-
generated_files=generated_files,
|
149
|
-
generated_asset_results=generated_asset_results,
|
150
|
-
program_execution_context=program_execution_context,
|
151
|
-
)
|
152
|
-
|
153
70
|
logger.debug(f"Conversation Context for Gemini: {messages_to_print(messages)}")
|
154
71
|
|
155
72
|
# Get Response from Google AI
|
@@ -159,7 +76,6 @@ async def converse_gemini(
|
|
159
76
|
temperature=temperature,
|
160
77
|
api_key=api_key,
|
161
78
|
api_base_url=api_base_url,
|
162
|
-
system_prompt=system_prompt,
|
163
79
|
deepthought=deepthought,
|
164
80
|
tracer=tracer,
|
165
81
|
):
|
@@ -46,7 +46,7 @@ gemini_clients: Dict[str, genai.Client] = {}
|
|
46
46
|
# This avoids premature response termination.
|
47
47
|
MAX_OUTPUT_TOKENS_FOR_REASONING_GEMINI = 20000
|
48
48
|
MAX_OUTPUT_TOKENS_FOR_STANDARD_GEMINI = 8000
|
49
|
-
MAX_REASONING_TOKENS_GEMINI =
|
49
|
+
MAX_REASONING_TOKENS_GEMINI = 512
|
50
50
|
|
51
51
|
SAFETY_SETTINGS = [
|
52
52
|
gtypes.SafetySetting(
|
@@ -120,11 +120,7 @@ def _is_retryable_error(exception: BaseException) -> bool:
|
|
120
120
|
if isinstance(exception, (gerrors.APIError, gerrors.ClientError, GeminiRetryableClientError)):
|
121
121
|
return exception.code in [429, 502, 503, 504]
|
122
122
|
# client errors
|
123
|
-
if (
|
124
|
-
isinstance(exception, httpx.TimeoutException)
|
125
|
-
or isinstance(exception, httpx.NetworkError)
|
126
|
-
or isinstance(exception, httpx.ReadError)
|
127
|
-
):
|
123
|
+
if isinstance(exception, httpx.TimeoutException) or isinstance(exception, httpx.NetworkError):
|
128
124
|
return True
|
129
125
|
# validation errors
|
130
126
|
if isinstance(exception, ValueError):
|
@@ -312,7 +308,7 @@ async def gemini_chat_completion_with_backoff(
|
|
312
308
|
temperature: float,
|
313
309
|
api_key: str,
|
314
310
|
api_base_url: str,
|
315
|
-
system_prompt: str,
|
311
|
+
system_prompt: str = "",
|
316
312
|
model_kwargs=None,
|
317
313
|
deepthought=False,
|
318
314
|
tracer: dict = {},
|
@@ -476,9 +472,12 @@ def format_messages_for_gemini(
|
|
476
472
|
for message in messages.copy():
|
477
473
|
if message.role == "system":
|
478
474
|
if isinstance(message.content, list):
|
479
|
-
system_prompt += "\n"
|
475
|
+
system_prompt += "\n\n" + "\n".join(
|
476
|
+
[part["text"] for part in message.content if part["type"] == "text"]
|
477
|
+
)
|
480
478
|
else:
|
481
|
-
system_prompt += message.content
|
479
|
+
system_prompt += "\n\n" + message.content
|
480
|
+
system_prompt = system_prompt.strip()
|
482
481
|
messages.remove(message)
|
483
482
|
system_prompt = None if is_none_or_empty(system_prompt) else system_prompt
|
484
483
|
|
@@ -1,29 +1,25 @@
|
|
1
1
|
import logging
|
2
|
-
from datetime import datetime
|
3
2
|
from typing import Any, AsyncGenerator, Dict, List, Optional
|
4
3
|
|
5
|
-
from
|
6
|
-
|
4
|
+
from langchain_core.messages.chat import ChatMessage
|
5
|
+
|
7
6
|
from khoj.processor.conversation.openai.utils import (
|
8
7
|
chat_completion_with_backoff,
|
9
8
|
clean_response_schema,
|
10
9
|
completion_with_backoff,
|
11
10
|
get_structured_output_support,
|
12
|
-
|
11
|
+
is_cerebras_api,
|
13
12
|
responses_chat_completion_with_backoff,
|
14
13
|
responses_completion_with_backoff,
|
14
|
+
supports_responses_api,
|
15
15
|
to_openai_tools,
|
16
16
|
)
|
17
17
|
from khoj.processor.conversation.utils import (
|
18
|
-
OperatorRun,
|
19
18
|
ResponseWithThought,
|
20
19
|
StructuredOutputSupport,
|
21
|
-
generate_chatml_messages_with_context,
|
22
20
|
messages_to_print,
|
23
21
|
)
|
24
|
-
from khoj.utils.helpers import ToolDefinition
|
25
|
-
from khoj.utils.rawconfig import FileAttachment, LocationData
|
26
|
-
from khoj.utils.yaml import yaml_dump
|
22
|
+
from khoj.utils.helpers import ToolDefinition
|
27
23
|
|
28
24
|
logger = logging.getLogger(__name__)
|
29
25
|
|
@@ -45,16 +41,19 @@ def send_message_to_model(
|
|
45
41
|
|
46
42
|
model_kwargs: Dict[str, Any] = {}
|
47
43
|
json_support = get_structured_output_support(model, api_base_url)
|
44
|
+
strict = not is_cerebras_api(api_base_url)
|
48
45
|
if tools and json_support == StructuredOutputSupport.TOOL:
|
49
|
-
model_kwargs["tools"] = to_openai_tools(
|
46
|
+
model_kwargs["tools"] = to_openai_tools(
|
47
|
+
tools, use_responses_api=supports_responses_api(model, api_base_url), strict=strict
|
48
|
+
)
|
50
49
|
elif response_schema and json_support >= StructuredOutputSupport.SCHEMA:
|
51
50
|
# Drop unsupported fields from schema passed to OpenAI APi
|
52
51
|
cleaned_response_schema = clean_response_schema(response_schema)
|
53
|
-
if
|
52
|
+
if supports_responses_api(model, api_base_url):
|
54
53
|
model_kwargs["text"] = {
|
55
54
|
"format": {
|
56
55
|
"type": "json_schema",
|
57
|
-
"strict":
|
56
|
+
"strict": strict,
|
58
57
|
"name": response_schema.__name__,
|
59
58
|
"schema": cleaned_response_schema,
|
60
59
|
}
|
@@ -65,14 +64,14 @@ def send_message_to_model(
|
|
65
64
|
"json_schema": {
|
66
65
|
"schema": cleaned_response_schema,
|
67
66
|
"name": response_schema.__name__,
|
68
|
-
"strict":
|
67
|
+
"strict": strict,
|
69
68
|
},
|
70
69
|
}
|
71
70
|
elif response_type == "json_object" and json_support == StructuredOutputSupport.OBJECT:
|
72
71
|
model_kwargs["response_format"] = {"type": response_type}
|
73
72
|
|
74
73
|
# Get Response from GPT
|
75
|
-
if
|
74
|
+
if supports_responses_api(model, api_base_url):
|
76
75
|
return responses_completion_with_backoff(
|
77
76
|
messages=messages,
|
78
77
|
model_name=model,
|
@@ -96,98 +95,22 @@ def send_message_to_model(
|
|
96
95
|
|
97
96
|
async def converse_openai(
|
98
97
|
# Query
|
99
|
-
|
100
|
-
#
|
101
|
-
references: list[dict],
|
102
|
-
online_results: Optional[Dict[str, Dict]] = None,
|
103
|
-
code_results: Optional[Dict[str, Dict]] = None,
|
104
|
-
operator_results: Optional[List[OperatorRun]] = None,
|
105
|
-
query_images: Optional[list[str]] = None,
|
106
|
-
query_files: str = None,
|
107
|
-
generated_files: List[FileAttachment] = None,
|
108
|
-
generated_asset_results: Dict[str, Dict] = {},
|
109
|
-
program_execution_context: List[str] = None,
|
110
|
-
location_data: LocationData = None,
|
111
|
-
chat_history: list[ChatMessageModel] = [],
|
98
|
+
messages: List[ChatMessage],
|
99
|
+
# Model
|
112
100
|
model: str = "gpt-4.1-mini",
|
113
101
|
api_key: Optional[str] = None,
|
114
102
|
api_base_url: Optional[str] = None,
|
115
103
|
temperature: float = 0.6,
|
116
|
-
max_prompt_size=None,
|
117
|
-
tokenizer_name=None,
|
118
|
-
user_name: str = None,
|
119
|
-
agent: Agent = None,
|
120
|
-
vision_available: bool = False,
|
121
104
|
deepthought: Optional[bool] = False,
|
122
105
|
tracer: dict = {},
|
123
106
|
) -> AsyncGenerator[ResponseWithThought, None]:
|
124
107
|
"""
|
125
108
|
Converse with user using OpenAI's ChatGPT
|
126
109
|
"""
|
127
|
-
# Initialize Variables
|
128
|
-
current_date = datetime.now()
|
129
|
-
|
130
|
-
if agent and agent.personality:
|
131
|
-
system_prompt = prompts.custom_personality.format(
|
132
|
-
name=agent.name,
|
133
|
-
bio=agent.personality,
|
134
|
-
current_date=current_date.strftime("%Y-%m-%d"),
|
135
|
-
day_of_week=current_date.strftime("%A"),
|
136
|
-
)
|
137
|
-
else:
|
138
|
-
system_prompt = prompts.personality.format(
|
139
|
-
current_date=current_date.strftime("%Y-%m-%d"),
|
140
|
-
day_of_week=current_date.strftime("%A"),
|
141
|
-
)
|
142
|
-
|
143
|
-
if location_data:
|
144
|
-
location_prompt = prompts.user_location.format(location=f"{location_data}")
|
145
|
-
system_prompt = f"{system_prompt}\n{location_prompt}"
|
146
|
-
|
147
|
-
if user_name:
|
148
|
-
user_name_prompt = prompts.user_name.format(name=user_name)
|
149
|
-
system_prompt = f"{system_prompt}\n{user_name_prompt}"
|
150
|
-
|
151
|
-
context_message = ""
|
152
|
-
if not is_none_or_empty(references):
|
153
|
-
context_message = f"{prompts.notes_conversation.format(references=yaml_dump(references))}\n\n"
|
154
|
-
if not is_none_or_empty(online_results):
|
155
|
-
context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
|
156
|
-
if not is_none_or_empty(code_results):
|
157
|
-
context_message += (
|
158
|
-
f"{prompts.code_executed_context.format(code_results=truncate_code_context(code_results))}\n\n"
|
159
|
-
)
|
160
|
-
if not is_none_or_empty(operator_results):
|
161
|
-
operator_content = [
|
162
|
-
{"query": oc.query, "response": oc.response, "webpages": oc.webpages} for oc in operator_results
|
163
|
-
]
|
164
|
-
context_message += (
|
165
|
-
f"{prompts.operator_execution_context.format(operator_results=yaml_dump(operator_content))}\n\n"
|
166
|
-
)
|
167
|
-
|
168
|
-
context_message = context_message.strip()
|
169
|
-
|
170
|
-
# Setup Prompt with Primer or Conversation History
|
171
|
-
messages = generate_chatml_messages_with_context(
|
172
|
-
user_query,
|
173
|
-
system_prompt,
|
174
|
-
chat_history,
|
175
|
-
context_message=context_message,
|
176
|
-
model_name=model,
|
177
|
-
max_prompt_size=max_prompt_size,
|
178
|
-
tokenizer_name=tokenizer_name,
|
179
|
-
query_images=query_images,
|
180
|
-
vision_enabled=vision_available,
|
181
|
-
model_type=ChatModel.ModelType.OPENAI,
|
182
|
-
query_files=query_files,
|
183
|
-
generated_files=generated_files,
|
184
|
-
generated_asset_results=generated_asset_results,
|
185
|
-
program_execution_context=program_execution_context,
|
186
|
-
)
|
187
110
|
logger.debug(f"Conversation Context for GPT: {messages_to_print(messages)}")
|
188
111
|
|
189
112
|
# Get Response from GPT
|
190
|
-
if
|
113
|
+
if supports_responses_api(model, api_base_url):
|
191
114
|
async for chunk in responses_chat_completion_with_backoff(
|
192
115
|
messages=messages,
|
193
116
|
model_name=model,
|
@@ -111,14 +111,16 @@ def completion_with_backoff(
|
|
111
111
|
model_kwargs["temperature"] = temperature
|
112
112
|
model_kwargs["top_p"] = model_kwargs.get("top_p", 0.95)
|
113
113
|
|
114
|
-
formatted_messages = format_message_for_api(messages, api_base_url)
|
114
|
+
formatted_messages = format_message_for_api(messages, model_name, api_base_url)
|
115
115
|
|
116
116
|
# Tune reasoning models arguments
|
117
117
|
if is_openai_reasoning_model(model_name, api_base_url):
|
118
118
|
model_kwargs["temperature"] = 1
|
119
119
|
reasoning_effort = "medium" if deepthought else "low"
|
120
120
|
model_kwargs["reasoning_effort"] = reasoning_effort
|
121
|
+
# Remove unsupported params for reasoning models
|
121
122
|
model_kwargs.pop("top_p", None)
|
123
|
+
model_kwargs.pop("stop", None)
|
122
124
|
elif is_twitter_reasoning_model(model_name, api_base_url):
|
123
125
|
model_kwargs.pop("temperature", None)
|
124
126
|
reasoning_effort = "high" if deepthought else "low"
|
@@ -126,7 +128,7 @@ def completion_with_backoff(
|
|
126
128
|
if model_name.startswith("grok-4"):
|
127
129
|
# Grok-4 models do not support reasoning_effort parameter
|
128
130
|
model_kwargs.pop("reasoning_effort", None)
|
129
|
-
elif model_name.startswith("deepseek-reasoner"):
|
131
|
+
elif model_name.startswith("deepseek-reasoner") or model_name.startswith("deepseek-chat"):
|
130
132
|
stream_processor = in_stream_thought_processor
|
131
133
|
# Two successive messages cannot be from the same role. Should merge any back-to-back messages from the same role.
|
132
134
|
# The first message should always be a user message (except system message).
|
@@ -145,11 +147,8 @@ def completion_with_backoff(
|
|
145
147
|
# See https://qwenlm.github.io/blog/qwen3/#advanced-usages
|
146
148
|
if not deepthought:
|
147
149
|
add_qwen_no_think_tag(formatted_messages)
|
148
|
-
elif
|
149
|
-
model_kwargs["
|
150
|
-
reasoning_effort = "medium" if deepthought else "low"
|
151
|
-
model_kwargs["reasoning_effort"] = reasoning_effort
|
152
|
-
model_kwargs["top_p"] = 1.0
|
150
|
+
elif is_groq_api(api_base_url):
|
151
|
+
model_kwargs["service_tier"] = "auto"
|
153
152
|
|
154
153
|
read_timeout = 300 if is_local_api(api_base_url) else 60
|
155
154
|
if os.getenv("KHOJ_LLM_SEED"):
|
@@ -297,7 +296,7 @@ async def chat_completion_with_backoff(
|
|
297
296
|
|
298
297
|
model_kwargs["top_p"] = model_kwargs.get("top_p", 0.95)
|
299
298
|
|
300
|
-
formatted_messages = format_message_for_api(messages, api_base_url)
|
299
|
+
formatted_messages = format_message_for_api(messages, model_name, api_base_url)
|
301
300
|
|
302
301
|
# Configure thinking for openai reasoning models
|
303
302
|
if is_openai_reasoning_model(model_name, api_base_url):
|
@@ -307,26 +306,17 @@ async def chat_completion_with_backoff(
|
|
307
306
|
# Remove unsupported params for reasoning models
|
308
307
|
model_kwargs.pop("top_p", None)
|
309
308
|
model_kwargs.pop("stop", None)
|
310
|
-
|
311
|
-
# Get the first system message and add the string `Formatting re-enabled` to it.
|
312
|
-
# See https://platform.openai.com/docs/guides/reasoning-best-practices
|
313
|
-
if len(formatted_messages) > 0:
|
314
|
-
system_messages = [
|
315
|
-
(i, message) for i, message in enumerate(formatted_messages) if message["role"] == "system"
|
316
|
-
]
|
317
|
-
if len(system_messages) > 0:
|
318
|
-
first_system_message_index, first_system_message = system_messages[0]
|
319
|
-
first_system_message_content = first_system_message["content"]
|
320
|
-
formatted_messages[first_system_message_index]["content"] = (
|
321
|
-
f"{first_system_message_content}\nFormatting re-enabled"
|
322
|
-
)
|
323
309
|
elif is_twitter_reasoning_model(model_name, api_base_url):
|
324
310
|
reasoning_effort = "high" if deepthought else "low"
|
325
311
|
# Grok-4 models do not support reasoning_effort parameter
|
326
312
|
if not model_name.startswith("grok-4"):
|
327
313
|
model_kwargs["reasoning_effort"] = reasoning_effort
|
328
|
-
elif
|
329
|
-
|
314
|
+
elif (
|
315
|
+
model_name.startswith("deepseek-chat")
|
316
|
+
or model_name.startswith("deepseek-reasoner")
|
317
|
+
or "deepseek-r1" in model_name.lower()
|
318
|
+
):
|
319
|
+
# Official Deepseek models and some inference APIs like vLLM return structured thinking output.
|
330
320
|
# Others like DeepInfra return it in response stream.
|
331
321
|
# Using the instream thought processor handles both cases, structured thoughts and in response thoughts.
|
332
322
|
stream_processor = ain_stream_thought_processor
|
@@ -351,11 +341,8 @@ async def chat_completion_with_backoff(
|
|
351
341
|
# See https://qwenlm.github.io/blog/qwen3/#advanced-usages
|
352
342
|
if not deepthought:
|
353
343
|
add_qwen_no_think_tag(formatted_messages)
|
354
|
-
elif
|
355
|
-
|
356
|
-
reasoning_effort = "medium" if deepthought else "low"
|
357
|
-
model_kwargs["reasoning_effort"] = reasoning_effort
|
358
|
-
model_kwargs["top_p"] = 1.0
|
344
|
+
elif is_groq_api(api_base_url):
|
345
|
+
model_kwargs["service_tier"] = "auto"
|
359
346
|
|
360
347
|
read_timeout = 300 if is_local_api(api_base_url) else 60
|
361
348
|
if os.getenv("KHOJ_LLM_SEED"):
|
@@ -461,7 +448,7 @@ def responses_completion_with_backoff(
|
|
461
448
|
client = get_openai_client(openai_api_key, api_base_url)
|
462
449
|
openai_clients[client_key] = client
|
463
450
|
|
464
|
-
formatted_messages = format_message_for_api(messages, api_base_url)
|
451
|
+
formatted_messages = format_message_for_api(messages, model_name, api_base_url)
|
465
452
|
# Move the first system message to Responses API instructions
|
466
453
|
instructions: Optional[str] = None
|
467
454
|
if formatted_messages and formatted_messages[0].get("role") == "system":
|
@@ -474,8 +461,10 @@ def responses_completion_with_backoff(
|
|
474
461
|
if is_openai_reasoning_model(model_name, api_base_url):
|
475
462
|
temperature = 1
|
476
463
|
reasoning_effort = "medium" if deepthought else "low"
|
477
|
-
model_kwargs["reasoning"] = {"effort": reasoning_effort
|
478
|
-
|
464
|
+
model_kwargs["reasoning"] = {"effort": reasoning_effort}
|
465
|
+
if is_openai_api(api_base_url):
|
466
|
+
model_kwargs["reasoning"]["summary"] = "auto"
|
467
|
+
model_kwargs["include"] = ["reasoning.encrypted_content"]
|
479
468
|
# Remove unsupported params for reasoning models
|
480
469
|
model_kwargs.pop("top_p", None)
|
481
470
|
model_kwargs.pop("stop", None)
|
@@ -572,7 +561,7 @@ async def responses_chat_completion_with_backoff(
|
|
572
561
|
client = get_openai_async_client(openai_api_key, api_base_url)
|
573
562
|
openai_async_clients[client_key] = client
|
574
563
|
|
575
|
-
formatted_messages = format_message_for_api(messages, api_base_url)
|
564
|
+
formatted_messages = format_message_for_api(messages, model_name, api_base_url)
|
576
565
|
# Move the first system message to Responses API instructions
|
577
566
|
instructions: Optional[str] = None
|
578
567
|
if formatted_messages and formatted_messages[0].get("role") == "system":
|
@@ -585,7 +574,10 @@ async def responses_chat_completion_with_backoff(
|
|
585
574
|
if is_openai_reasoning_model(model_name, api_base_url):
|
586
575
|
temperature = 1
|
587
576
|
reasoning_effort = "medium" if deepthought else "low"
|
588
|
-
model_kwargs["reasoning"] = {"effort": reasoning_effort
|
577
|
+
model_kwargs["reasoning"] = {"effort": reasoning_effort}
|
578
|
+
if is_openai_api(api_base_url):
|
579
|
+
model_kwargs["reasoning"]["summary"] = "auto"
|
580
|
+
model_kwargs["include"] = ["reasoning.encrypted_content"]
|
589
581
|
# Remove unsupported params for reasoning models
|
590
582
|
model_kwargs.pop("top_p", None)
|
591
583
|
model_kwargs.pop("stop", None)
|
@@ -718,7 +710,7 @@ def get_structured_output_support(model_name: str, api_base_url: str = None) ->
|
|
718
710
|
return StructuredOutputSupport.TOOL
|
719
711
|
|
720
712
|
|
721
|
-
def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -> List[dict]:
|
713
|
+
def format_message_for_api(raw_messages: List[ChatMessage], model_name: str, api_base_url: str) -> List[dict]:
|
722
714
|
"""
|
723
715
|
Format messages to send to chat model served over OpenAI (compatible) API.
|
724
716
|
"""
|
@@ -728,7 +720,7 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
|
|
728
720
|
# Handle tool call and tool result message types
|
729
721
|
message_type = message.additional_kwargs.get("message_type")
|
730
722
|
if message_type == "tool_call":
|
731
|
-
if
|
723
|
+
if supports_responses_api(model_name, api_base_url):
|
732
724
|
for part in message.content:
|
733
725
|
if "status" in part:
|
734
726
|
part.pop("status") # Drop unsupported tool call status field
|
@@ -772,7 +764,7 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
|
|
772
764
|
if not tool_call_id:
|
773
765
|
logger.warning(f"Dropping tool result without valid tool_call_id: {part.get('name')}")
|
774
766
|
continue
|
775
|
-
if
|
767
|
+
if supports_responses_api(model_name, api_base_url):
|
776
768
|
formatted_messages.append(
|
777
769
|
{
|
778
770
|
"type": "function_call_output",
|
@@ -790,7 +782,7 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
|
|
790
782
|
}
|
791
783
|
)
|
792
784
|
continue
|
793
|
-
if isinstance(message.content, list) and not
|
785
|
+
if isinstance(message.content, list) and not supports_responses_api(model_name, api_base_url):
|
794
786
|
assistant_texts = []
|
795
787
|
has_images = False
|
796
788
|
for idx, part in enumerate(message.content):
|
@@ -803,7 +795,7 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
|
|
803
795
|
if (
|
804
796
|
part.get("type") == "text"
|
805
797
|
and message.role == "assistant"
|
806
|
-
and api_base_url.startswith("https://api.deepinfra.com/v1")
|
798
|
+
and (api_base_url.startswith("https://api.deepinfra.com/v1") or is_cerebras_api(api_base_url))
|
807
799
|
):
|
808
800
|
assistant_texts += [part["text"]]
|
809
801
|
message.content.pop(idx)
|
@@ -846,12 +838,21 @@ def is_openai_api(api_base_url: str = None) -> bool:
|
|
846
838
|
return api_base_url is None or api_base_url.startswith("https://api.openai.com/v1")
|
847
839
|
|
848
840
|
|
841
|
+
def supports_responses_api(model_name: str, api_base_url: str = None) -> bool:
|
842
|
+
"""
|
843
|
+
Check if the model, ai api supports the OpenAI Responses API
|
844
|
+
"""
|
845
|
+
return is_openai_api(api_base_url)
|
846
|
+
|
847
|
+
|
849
848
|
def is_openai_reasoning_model(model_name: str, api_base_url: str = None) -> bool:
|
850
849
|
"""
|
851
850
|
Check if the model is an OpenAI reasoning model
|
852
851
|
"""
|
853
|
-
return
|
854
|
-
|
852
|
+
return (
|
853
|
+
is_openai_api(api_base_url)
|
854
|
+
and (model_name.lower().startswith("o") or model_name.lower().startswith("gpt-5"))
|
855
|
+
or "gpt-oss" in model_name.lower()
|
855
856
|
)
|
856
857
|
|
857
858
|
|
@@ -875,6 +876,20 @@ def is_twitter_reasoning_model(model_name: str, api_base_url: str = None) -> boo
|
|
875
876
|
)
|
876
877
|
|
877
878
|
|
879
|
+
def is_cerebras_api(api_base_url: str = None) -> bool:
|
880
|
+
"""
|
881
|
+
Check if the model is served over the Cerebras API
|
882
|
+
"""
|
883
|
+
return api_base_url is not None and api_base_url.startswith("https://api.cerebras.ai/v1")
|
884
|
+
|
885
|
+
|
886
|
+
def is_groq_api(api_base_url: str = None) -> bool:
|
887
|
+
"""
|
888
|
+
Check if the model is served over the Groq API
|
889
|
+
"""
|
890
|
+
return api_base_url is not None and api_base_url.startswith("https://api.groq.com")
|
891
|
+
|
892
|
+
|
878
893
|
def is_qwen_style_reasoning_model(model_name: str, api_base_url: str = None) -> bool:
|
879
894
|
"""
|
880
895
|
Check if the model is a Qwen style reasoning model
|
@@ -1204,7 +1219,7 @@ def add_qwen_no_think_tag(formatted_messages: List[dict]) -> None:
|
|
1204
1219
|
break
|
1205
1220
|
|
1206
1221
|
|
1207
|
-
def to_openai_tools(tools: List[ToolDefinition], use_responses_api: bool) -> List[Dict] | None:
|
1222
|
+
def to_openai_tools(tools: List[ToolDefinition], use_responses_api: bool, strict: bool) -> List[Dict] | None:
|
1208
1223
|
"Transform tool definitions from standard format to OpenAI format."
|
1209
1224
|
if use_responses_api:
|
1210
1225
|
openai_tools = [
|
@@ -1213,7 +1228,7 @@ def to_openai_tools(tools: List[ToolDefinition], use_responses_api: bool) -> Lis
|
|
1213
1228
|
"name": tool.name,
|
1214
1229
|
"description": tool.description,
|
1215
1230
|
"parameters": clean_response_schema(tool.schema),
|
1216
|
-
"strict":
|
1231
|
+
"strict": strict,
|
1217
1232
|
}
|
1218
1233
|
for tool in tools
|
1219
1234
|
]
|
@@ -1225,7 +1240,7 @@ def to_openai_tools(tools: List[ToolDefinition], use_responses_api: bool) -> Lis
|
|
1225
1240
|
"name": tool.name,
|
1226
1241
|
"description": tool.description,
|
1227
1242
|
"parameters": clean_response_schema(tool.schema),
|
1228
|
-
"strict":
|
1243
|
+
"strict": strict,
|
1229
1244
|
},
|
1230
1245
|
}
|
1231
1246
|
for tool in tools
|