khoj 1.42.8.dev4__py3-none-any.whl → 1.42.9.dev16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/adapters/__init__.py +20 -0
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-2e626327abfbe612.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-d6acbba22ccac0ff.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/{page-802dedbf1d9d5e1e.js → page-9967631715682f3c.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{page-85b9b416898738f7.js → page-6e91caf9bc0c8aba.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-94c76c3a41db42a2.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-95998f0bdc22bb13.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-c062269e6906ef22.js → page-8c8c175f7f212b03.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-88659b10d39e393f.js → webpack-4bf3eab7681a1206.js} +1 -1
- khoj/interface/compiled/_next/static/css/1e9b757ee2a2b34b.css +1 -0
- khoj/interface/compiled/_next/static/css/440ae0f0f650dc35.css +1 -0
- khoj/interface/compiled/_next/static/css/bd2071cad2ecf293.css +1 -0
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +1 -1
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +1 -1
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +1 -1
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +1 -1
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/anthropic_chat.py +11 -2
- khoj/processor/conversation/anthropic/utils.py +90 -103
- khoj/processor/conversation/google/gemini_chat.py +4 -1
- khoj/processor/conversation/google/utils.py +84 -19
- khoj/processor/conversation/offline/chat_model.py +3 -3
- khoj/processor/conversation/openai/gpt.py +13 -38
- khoj/processor/conversation/openai/utils.py +113 -12
- khoj/processor/conversation/prompts.py +17 -35
- khoj/processor/conversation/utils.py +128 -57
- khoj/processor/operator/grounding_agent.py +1 -1
- khoj/processor/operator/operator_agent_binary.py +4 -3
- khoj/processor/tools/online_search.py +18 -0
- khoj/processor/tools/run_code.py +1 -1
- khoj/routers/api_chat.py +1 -1
- khoj/routers/helpers.py +293 -26
- khoj/routers/research.py +169 -155
- khoj/utils/helpers.py +284 -8
- {khoj-1.42.8.dev4.dist-info → khoj-1.42.9.dev16.dist-info}/METADATA +1 -1
- {khoj-1.42.8.dev4.dist-info → khoj-1.42.9.dev16.dist-info}/RECORD +51 -51
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-d5ae861e1ade9d08.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-f5881c7ae3ba0795.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-b3f7ae1ef8871d30.js +0 -1
- khoj/interface/compiled/_next/static/css/02f60900b0d89ec7.css +0 -1
- khoj/interface/compiled/_next/static/css/76c658ee459140a9.css +0 -1
- khoj/interface/compiled/_next/static/css/fbacbdfd5e7f3f0e.css +0 -1
- /khoj/interface/compiled/_next/static/{8Wx2kDD5oC-v77JDu6vKI → w19FJJa9p2AFJB6DEektd}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{8Wx2kDD5oC-v77JDu6vKI → w19FJJa9p2AFJB6DEektd}/_ssgManifest.js +0 -0
- {khoj-1.42.8.dev4.dist-info → khoj-1.42.9.dev16.dist-info}/WHEEL +0 -0
- {khoj-1.42.8.dev4.dist-info → khoj-1.42.9.dev16.dist-info}/entry_points.txt +0 -0
- {khoj-1.42.8.dev4.dist-info → khoj-1.42.9.dev16.dist-info}/licenses/LICENSE +0 -0
@@ -1,9 +1,8 @@
|
|
1
1
|
import json
|
2
2
|
import logging
|
3
3
|
from copy import deepcopy
|
4
|
-
from textwrap import dedent
|
5
4
|
from time import perf_counter
|
6
|
-
from typing import AsyncGenerator, Dict, List
|
5
|
+
from typing import AsyncGenerator, Dict, List
|
7
6
|
|
8
7
|
import anthropic
|
9
8
|
from langchain_core.messages.chat import ChatMessage
|
@@ -18,11 +17,14 @@ from tenacity import (
|
|
18
17
|
|
19
18
|
from khoj.processor.conversation.utils import (
|
20
19
|
ResponseWithThought,
|
20
|
+
ToolCall,
|
21
21
|
commit_conversation_trace,
|
22
22
|
get_image_from_base64,
|
23
23
|
get_image_from_url,
|
24
24
|
)
|
25
25
|
from khoj.utils.helpers import (
|
26
|
+
ToolDefinition,
|
27
|
+
create_tool_definition,
|
26
28
|
get_anthropic_async_client,
|
27
29
|
get_anthropic_client,
|
28
30
|
get_chat_usage_metrics,
|
@@ -57,9 +59,10 @@ def anthropic_completion_with_backoff(
|
|
57
59
|
max_tokens: int | None = None,
|
58
60
|
response_type: str = "text",
|
59
61
|
response_schema: BaseModel | None = None,
|
62
|
+
tools: List[ToolDefinition] = None,
|
60
63
|
deepthought: bool = False,
|
61
64
|
tracer: dict = {},
|
62
|
-
) ->
|
65
|
+
) -> ResponseWithThought:
|
63
66
|
client = anthropic_clients.get(api_key)
|
64
67
|
if not client:
|
65
68
|
client = get_anthropic_client(api_key, api_base_url)
|
@@ -67,12 +70,26 @@ def anthropic_completion_with_backoff(
|
|
67
70
|
|
68
71
|
formatted_messages, system = format_messages_for_anthropic(messages, system_prompt)
|
69
72
|
|
73
|
+
thoughts = ""
|
70
74
|
aggregated_response = ""
|
71
75
|
final_message = None
|
72
76
|
model_kwargs = model_kwargs or dict()
|
73
|
-
|
74
|
-
|
75
|
-
|
77
|
+
|
78
|
+
# Configure structured output
|
79
|
+
if tools:
|
80
|
+
# Convert tools to Anthropic format
|
81
|
+
model_kwargs["tools"] = [
|
82
|
+
anthropic.types.ToolParam(name=tool.name, description=tool.description, input_schema=tool.schema)
|
83
|
+
for tool in tools
|
84
|
+
]
|
85
|
+
# Cache tool definitions
|
86
|
+
last_tool = model_kwargs["tools"][-1]
|
87
|
+
last_tool["cache_control"] = {"type": "ephemeral"}
|
88
|
+
elif response_schema:
|
89
|
+
tool = create_tool_definition(response_schema)
|
90
|
+
model_kwargs["tools"] = [
|
91
|
+
anthropic.types.ToolParam(name=tool.name, description=tool.description, input_schema=tool.schema)
|
92
|
+
]
|
76
93
|
elif response_type == "json_object" and not (is_reasoning_model(model_name) and deepthought):
|
77
94
|
# Prefill model response with '{' to make it output a valid JSON object. Not supported with extended thinking.
|
78
95
|
formatted_messages.append(anthropic.types.MessageParam(role="assistant", content="{"))
|
@@ -96,15 +113,41 @@ def anthropic_completion_with_backoff(
|
|
96
113
|
max_tokens=max_tokens,
|
97
114
|
**(model_kwargs),
|
98
115
|
) as stream:
|
99
|
-
for
|
100
|
-
|
116
|
+
for chunk in stream:
|
117
|
+
if chunk.type != "content_block_delta":
|
118
|
+
continue
|
119
|
+
if chunk.delta.type == "thinking_delta":
|
120
|
+
thoughts += chunk.delta.thinking
|
121
|
+
elif chunk.delta.type == "text_delta":
|
122
|
+
aggregated_response += chunk.delta.text
|
101
123
|
final_message = stream.get_final_message()
|
102
124
|
|
103
|
-
#
|
104
|
-
for item in final_message.content
|
105
|
-
|
106
|
-
|
107
|
-
|
125
|
+
# Track raw content of model response to reuse for cache hits in multi-turn chats
|
126
|
+
raw_content = [item.model_dump() for item in final_message.content]
|
127
|
+
|
128
|
+
# Extract all tool calls if tools are enabled
|
129
|
+
if tools:
|
130
|
+
tool_calls = [
|
131
|
+
ToolCall(name=item.name, args=item.input, id=item.id).__dict__
|
132
|
+
for item in final_message.content
|
133
|
+
if item.type == "tool_use"
|
134
|
+
]
|
135
|
+
if tool_calls:
|
136
|
+
# If there are tool calls, aggregate thoughts and responses into thoughts
|
137
|
+
if thoughts and aggregated_response:
|
138
|
+
# wrap each line of thought in italics
|
139
|
+
thoughts = "\n".join([f"*{line.strip()}*" for line in thoughts.splitlines() if line.strip()])
|
140
|
+
thoughts = f"{thoughts}\n\n{aggregated_response}"
|
141
|
+
else:
|
142
|
+
thoughts = thoughts or aggregated_response
|
143
|
+
# Json dump tool calls into aggregated response
|
144
|
+
aggregated_response = json.dumps(tool_calls)
|
145
|
+
# If response schema is used, return the first tool call's input
|
146
|
+
elif response_schema:
|
147
|
+
for item in final_message.content:
|
148
|
+
if item.type == "tool_use":
|
149
|
+
aggregated_response = json.dumps(item.input)
|
150
|
+
break
|
108
151
|
|
109
152
|
# Calculate cost of chat
|
110
153
|
input_tokens = final_message.usage.input_tokens
|
@@ -126,7 +169,7 @@ def anthropic_completion_with_backoff(
|
|
126
169
|
if is_promptrace_enabled():
|
127
170
|
commit_conversation_trace(messages, aggregated_response, tracer)
|
128
171
|
|
129
|
-
return aggregated_response
|
172
|
+
return ResponseWithThought(text=aggregated_response, thought=thoughts, raw_content=raw_content)
|
130
173
|
|
131
174
|
|
132
175
|
@retry(
|
@@ -183,10 +226,10 @@ async def anthropic_chat_completion_with_backoff(
|
|
183
226
|
if chunk.type == "message_delta":
|
184
227
|
if chunk.delta.stop_reason == "refusal":
|
185
228
|
yield ResponseWithThought(
|
186
|
-
|
229
|
+
text="...I'm sorry, but my safety filters prevent me from assisting with this query."
|
187
230
|
)
|
188
231
|
elif chunk.delta.stop_reason == "max_tokens":
|
189
|
-
yield ResponseWithThought(
|
232
|
+
yield ResponseWithThought(text="...I'm sorry, but I've hit my response length limit.")
|
190
233
|
if chunk.delta.stop_reason in ["refusal", "max_tokens"]:
|
191
234
|
logger.warning(
|
192
235
|
f"LLM Response Prevented for {model_name}: {chunk.delta.stop_reason}.\n"
|
@@ -199,7 +242,7 @@ async def anthropic_chat_completion_with_backoff(
|
|
199
242
|
# Handle streamed response chunk
|
200
243
|
response_chunk: ResponseWithThought = None
|
201
244
|
if chunk.delta.type == "text_delta":
|
202
|
-
response_chunk = ResponseWithThought(
|
245
|
+
response_chunk = ResponseWithThought(text=chunk.delta.text)
|
203
246
|
aggregated_response += chunk.delta.text
|
204
247
|
if chunk.delta.type == "thinking_delta":
|
205
248
|
response_chunk = ResponseWithThought(thought=chunk.delta.thinking)
|
@@ -232,13 +275,14 @@ async def anthropic_chat_completion_with_backoff(
|
|
232
275
|
commit_conversation_trace(messages, aggregated_response, tracer)
|
233
276
|
|
234
277
|
|
235
|
-
def format_messages_for_anthropic(
|
278
|
+
def format_messages_for_anthropic(raw_messages: list[ChatMessage], system_prompt: str = None):
|
236
279
|
"""
|
237
280
|
Format messages for Anthropic
|
238
281
|
"""
|
239
282
|
# Extract system prompt
|
240
283
|
system_prompt = system_prompt or ""
|
241
|
-
|
284
|
+
messages = deepcopy(raw_messages)
|
285
|
+
for message in messages:
|
242
286
|
if message.role == "system":
|
243
287
|
if isinstance(message.content, list):
|
244
288
|
system_prompt += "\n".join([part["text"] for part in message.content if part["type"] == "text"])
|
@@ -250,15 +294,30 @@ def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: st
|
|
250
294
|
else:
|
251
295
|
system = None
|
252
296
|
|
253
|
-
# Anthropic requires the first message to be a
|
254
|
-
|
297
|
+
# Anthropic requires the first message to be a user message unless its a tool call
|
298
|
+
message_type = messages[0].additional_kwargs.get("message_type", None)
|
299
|
+
if len(messages) == 1 and message_type != "tool_call":
|
255
300
|
messages[0].role = "user"
|
256
|
-
elif len(messages) > 1 and messages[0].role == "assistant":
|
257
|
-
messages = messages[1:]
|
258
301
|
|
259
|
-
# Convert image urls to base64 encoded images in Anthropic message format
|
260
302
|
for message in messages:
|
261
|
-
|
303
|
+
# Handle tool call and tool result message types from additional_kwargs
|
304
|
+
message_type = message.additional_kwargs.get("message_type")
|
305
|
+
if message_type == "tool_call":
|
306
|
+
pass
|
307
|
+
elif message_type == "tool_result":
|
308
|
+
# Convert tool_result to Anthropic tool_result format
|
309
|
+
content = []
|
310
|
+
for part in message.content:
|
311
|
+
content.append(
|
312
|
+
{
|
313
|
+
"type": "tool_result",
|
314
|
+
"tool_use_id": part["id"],
|
315
|
+
"content": part["content"],
|
316
|
+
}
|
317
|
+
)
|
318
|
+
message.content = content
|
319
|
+
# Convert image urls to base64 encoded images in Anthropic message format
|
320
|
+
elif isinstance(message.content, list):
|
262
321
|
content = []
|
263
322
|
# Sort the content. Anthropic models prefer that text comes after images.
|
264
323
|
message.content.sort(key=lambda x: 0 if x["type"] == "image_url" else 1)
|
@@ -304,18 +363,15 @@ def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: st
|
|
304
363
|
if isinstance(block, dict) and "cache_control" in block:
|
305
364
|
del block["cache_control"]
|
306
365
|
|
307
|
-
# Add cache control to the last content block of
|
308
|
-
#
|
309
|
-
|
310
|
-
cache_message = messages[-2]
|
366
|
+
# Add cache control to the last content block of last message.
|
367
|
+
# Caching should improve research efficiency.
|
368
|
+
cache_message = messages[-1]
|
311
369
|
if isinstance(cache_message.content, list) and cache_message.content:
|
312
370
|
# Add cache control to the last content block only if it's a text block with non-empty content
|
313
371
|
last_block = cache_message.content[-1]
|
314
|
-
if (
|
315
|
-
|
316
|
-
|
317
|
-
and last_block.get("text")
|
318
|
-
and last_block.get("text").strip()
|
372
|
+
if isinstance(last_block, dict) and (
|
373
|
+
(last_block.get("type") == "text" and last_block.get("text", "").strip())
|
374
|
+
or (last_block.get("type") == "tool_result" and last_block.get("content", []))
|
319
375
|
):
|
320
376
|
last_block["cache_control"] = {"type": "ephemeral"}
|
321
377
|
|
@@ -326,74 +382,5 @@ def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: st
|
|
326
382
|
return formatted_messages, system
|
327
383
|
|
328
384
|
|
329
|
-
def create_anthropic_tool_definition(
|
330
|
-
response_schema: Type[BaseModel],
|
331
|
-
tool_name: str = None,
|
332
|
-
tool_description: Optional[str] = None,
|
333
|
-
) -> anthropic.types.ToolParam:
|
334
|
-
"""
|
335
|
-
Converts a response schema BaseModel class into an Anthropic tool definition dictionary.
|
336
|
-
|
337
|
-
This format is expected by Anthropic's API when defining tools the model can use.
|
338
|
-
|
339
|
-
Args:
|
340
|
-
response_schema: The Pydantic BaseModel class to convert.
|
341
|
-
This class defines the response schema for the tool.
|
342
|
-
tool_name: The name for the Anthropic tool (e.g., "get_weather", "plan_next_step").
|
343
|
-
tool_description: Optional description for the Anthropic tool.
|
344
|
-
If None, it attempts to use the Pydantic model's docstring.
|
345
|
-
If that's also missing, a fallback description is generated.
|
346
|
-
|
347
|
-
Returns:
|
348
|
-
An tool definition for Anthropic's API.
|
349
|
-
"""
|
350
|
-
model_schema = response_schema.model_json_schema()
|
351
|
-
|
352
|
-
name = tool_name or response_schema.__name__.lower()
|
353
|
-
description = tool_description
|
354
|
-
if description is None:
|
355
|
-
docstring = response_schema.__doc__
|
356
|
-
if docstring:
|
357
|
-
description = dedent(docstring).strip()
|
358
|
-
else:
|
359
|
-
# Fallback description if no explicit one or docstring is provided
|
360
|
-
description = f"Tool named '{name}' accepts specified parameters."
|
361
|
-
|
362
|
-
# Process properties to inline enums and remove $defs dependency
|
363
|
-
processed_properties = {}
|
364
|
-
original_properties = model_schema.get("properties", {})
|
365
|
-
defs = model_schema.get("$defs", {})
|
366
|
-
|
367
|
-
for prop_name, prop_schema in original_properties.items():
|
368
|
-
current_prop_schema = deepcopy(prop_schema) # Work on a copy
|
369
|
-
# Check for enums defined directly in the property for simpler direct enum definitions.
|
370
|
-
if "$ref" in current_prop_schema:
|
371
|
-
ref_path = current_prop_schema["$ref"]
|
372
|
-
if ref_path.startswith("#/$defs/"):
|
373
|
-
def_name = ref_path.split("/")[-1]
|
374
|
-
if def_name in defs and "enum" in defs[def_name]:
|
375
|
-
enum_def = defs[def_name]
|
376
|
-
current_prop_schema["enum"] = enum_def["enum"]
|
377
|
-
current_prop_schema["type"] = enum_def.get("type", "string")
|
378
|
-
if "description" not in current_prop_schema and "description" in enum_def:
|
379
|
-
current_prop_schema["description"] = enum_def["description"]
|
380
|
-
del current_prop_schema["$ref"] # Remove the $ref as it's been inlined
|
381
|
-
|
382
|
-
processed_properties[prop_name] = current_prop_schema
|
383
|
-
|
384
|
-
# The input_schema for Anthropic tools is a JSON Schema object.
|
385
|
-
# Pydantic's model_json_schema() provides most of what's needed.
|
386
|
-
input_schema = {
|
387
|
-
"type": "object",
|
388
|
-
"properties": processed_properties,
|
389
|
-
}
|
390
|
-
|
391
|
-
# Include 'required' fields if specified in the Pydantic model
|
392
|
-
if "required" in model_schema and model_schema["required"]:
|
393
|
-
input_schema["required"] = model_schema["required"]
|
394
|
-
|
395
|
-
return anthropic.types.ToolParam(name=name, description=description, input_schema=input_schema)
|
396
|
-
|
397
|
-
|
398
385
|
def is_reasoning_model(model_name: str) -> bool:
|
399
386
|
return any(model_name.startswith(model) for model in REASONING_MODELS)
|
@@ -28,6 +28,7 @@ def gemini_send_message_to_model(
|
|
28
28
|
api_base_url=None,
|
29
29
|
response_type="text",
|
30
30
|
response_schema=None,
|
31
|
+
tools=None,
|
31
32
|
model_kwargs=None,
|
32
33
|
deepthought=False,
|
33
34
|
tracer={},
|
@@ -37,8 +38,10 @@ def gemini_send_message_to_model(
|
|
37
38
|
"""
|
38
39
|
model_kwargs = {}
|
39
40
|
|
41
|
+
if tools:
|
42
|
+
model_kwargs["tools"] = tools
|
40
43
|
# Monitor for flakiness in 1.5+ models. This would cause unwanted behavior and terminate response early in 1.5 models.
|
41
|
-
|
44
|
+
elif response_type == "json_object" and not model.startswith("gemini-1.5"):
|
42
45
|
model_kwargs["response_mime_type"] = "application/json"
|
43
46
|
if response_schema:
|
44
47
|
model_kwargs["response_schema"] = response_schema
|
@@ -1,9 +1,10 @@
|
|
1
|
+
import json
|
1
2
|
import logging
|
2
3
|
import os
|
3
4
|
import random
|
4
5
|
from copy import deepcopy
|
5
6
|
from time import perf_counter
|
6
|
-
from typing import AsyncGenerator, AsyncIterator, Dict
|
7
|
+
from typing import AsyncGenerator, AsyncIterator, Dict, List
|
7
8
|
|
8
9
|
import httpx
|
9
10
|
from google import genai
|
@@ -22,11 +23,13 @@ from tenacity import (
|
|
22
23
|
|
23
24
|
from khoj.processor.conversation.utils import (
|
24
25
|
ResponseWithThought,
|
26
|
+
ToolCall,
|
25
27
|
commit_conversation_trace,
|
26
28
|
get_image_from_base64,
|
27
29
|
get_image_from_url,
|
28
30
|
)
|
29
31
|
from khoj.utils.helpers import (
|
32
|
+
ToolDefinition,
|
30
33
|
get_chat_usage_metrics,
|
31
34
|
get_gemini_client,
|
32
35
|
is_none_or_empty,
|
@@ -92,29 +95,32 @@ def gemini_completion_with_backoff(
|
|
92
95
|
messages: list[ChatMessage],
|
93
96
|
system_prompt: str,
|
94
97
|
model_name: str,
|
95
|
-
temperature=1.
|
98
|
+
temperature=1.2,
|
96
99
|
api_key=None,
|
97
100
|
api_base_url: str = None,
|
98
|
-
model_kwargs=
|
101
|
+
model_kwargs={},
|
99
102
|
deepthought=False,
|
100
103
|
tracer={},
|
101
|
-
) ->
|
104
|
+
) -> ResponseWithThought:
|
102
105
|
client = gemini_clients.get(api_key)
|
103
106
|
if not client:
|
104
107
|
client = get_gemini_client(api_key, api_base_url)
|
105
108
|
gemini_clients[api_key] = client
|
106
109
|
|
107
110
|
formatted_messages, system_instruction = format_messages_for_gemini(messages, system_prompt)
|
108
|
-
response_thoughts
|
111
|
+
raw_content, response_text, response_thoughts = [], "", None
|
109
112
|
|
110
|
-
#
|
113
|
+
# Configure structured output
|
114
|
+
tools = None
|
111
115
|
response_schema = None
|
112
|
-
if model_kwargs
|
116
|
+
if model_kwargs.get("tools"):
|
117
|
+
tools = to_gemini_tools(model_kwargs["tools"])
|
118
|
+
elif model_kwargs.get("response_schema"):
|
113
119
|
response_schema = clean_response_schema(model_kwargs["response_schema"])
|
114
120
|
|
115
121
|
thinking_config = None
|
116
122
|
if deepthought and is_reasoning_model(model_name):
|
117
|
-
thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI)
|
123
|
+
thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI, include_thoughts=True)
|
118
124
|
|
119
125
|
max_output_tokens = MAX_OUTPUT_TOKENS_FOR_STANDARD_GEMINI
|
120
126
|
if is_reasoning_model(model_name):
|
@@ -127,16 +133,36 @@ def gemini_completion_with_backoff(
|
|
127
133
|
thinking_config=thinking_config,
|
128
134
|
max_output_tokens=max_output_tokens,
|
129
135
|
safety_settings=SAFETY_SETTINGS,
|
130
|
-
response_mime_type=model_kwargs.get("response_mime_type", "text/plain")
|
136
|
+
response_mime_type=model_kwargs.get("response_mime_type", "text/plain"),
|
131
137
|
response_schema=response_schema,
|
138
|
+
tools=tools,
|
132
139
|
seed=seed,
|
140
|
+
top_p=0.95,
|
133
141
|
http_options=gtypes.HttpOptions(client_args={"timeout": httpx.Timeout(30.0, read=60.0)}),
|
134
142
|
)
|
135
143
|
|
136
144
|
try:
|
137
145
|
# Generate the response
|
138
146
|
response = client.models.generate_content(model=model_name, config=config, contents=formatted_messages)
|
139
|
-
|
147
|
+
if (
|
148
|
+
not response.candidates
|
149
|
+
or not response.candidates[0].content
|
150
|
+
or response.candidates[0].content.parts is None
|
151
|
+
):
|
152
|
+
raise ValueError(f"Failed to get response from model.")
|
153
|
+
raw_content = [part.model_dump() for part in response.candidates[0].content.parts]
|
154
|
+
if response.function_calls:
|
155
|
+
function_calls = [
|
156
|
+
ToolCall(name=function_call.name, args=function_call.args, id=function_call.id).__dict__
|
157
|
+
for function_call in response.function_calls
|
158
|
+
]
|
159
|
+
response_text = json.dumps(function_calls)
|
160
|
+
else:
|
161
|
+
# If no function calls, use the text response
|
162
|
+
response_text = response.text
|
163
|
+
response_thoughts = "\n".join(
|
164
|
+
[part.text for part in response.candidates[0].content.parts if part.thought and isinstance(part.text, str)]
|
165
|
+
)
|
140
166
|
except gerrors.ClientError as e:
|
141
167
|
response = None
|
142
168
|
response_text, _ = handle_gemini_response(e.args)
|
@@ -150,8 +176,14 @@ def gemini_completion_with_backoff(
|
|
150
176
|
input_tokens = response.usage_metadata.prompt_token_count or 0 if response else 0
|
151
177
|
output_tokens = response.usage_metadata.candidates_token_count or 0 if response else 0
|
152
178
|
thought_tokens = response.usage_metadata.thoughts_token_count or 0 if response else 0
|
179
|
+
cache_read_tokens = response.usage_metadata.cached_content_token_count or 0 if response else 0
|
153
180
|
tracer["usage"] = get_chat_usage_metrics(
|
154
|
-
model_name,
|
181
|
+
model_name,
|
182
|
+
input_tokens,
|
183
|
+
output_tokens,
|
184
|
+
cache_read_tokens=cache_read_tokens,
|
185
|
+
thought_tokens=thought_tokens,
|
186
|
+
usage=tracer.get("usage"),
|
155
187
|
)
|
156
188
|
|
157
189
|
# Validate the response. If empty, raise an error to retry.
|
@@ -165,7 +197,7 @@ def gemini_completion_with_backoff(
|
|
165
197
|
if is_promptrace_enabled():
|
166
198
|
commit_conversation_trace(messages, response_text, tracer)
|
167
199
|
|
168
|
-
return response_text
|
200
|
+
return ResponseWithThought(text=response_text, thought=response_thoughts, raw_content=raw_content)
|
169
201
|
|
170
202
|
|
171
203
|
@retry(
|
@@ -201,10 +233,12 @@ async def gemini_chat_completion_with_backoff(
|
|
201
233
|
if is_reasoning_model(model_name):
|
202
234
|
max_output_tokens = MAX_OUTPUT_TOKENS_FOR_REASONING_GEMINI
|
203
235
|
|
236
|
+
top_p = 0.95
|
204
237
|
seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
|
205
238
|
config = gtypes.GenerateContentConfig(
|
206
239
|
system_instruction=system_instruction,
|
207
240
|
temperature=temperature,
|
241
|
+
top_p=top_p,
|
208
242
|
thinking_config=thinking_config,
|
209
243
|
max_output_tokens=max_output_tokens,
|
210
244
|
stop_sequences=["Notes:\n["],
|
@@ -231,7 +265,7 @@ async def gemini_chat_completion_with_backoff(
|
|
231
265
|
# handle safety, rate-limit, other finish reasons
|
232
266
|
stop_message, stopped = handle_gemini_response(chunk.candidates, chunk.prompt_feedback)
|
233
267
|
if stopped:
|
234
|
-
yield ResponseWithThought(
|
268
|
+
yield ResponseWithThought(text=stop_message)
|
235
269
|
logger.warning(
|
236
270
|
f"LLM Response Prevented for {model_name}: {stop_message}.\n"
|
237
271
|
+ f"Last Message by {messages[-1].role}: {messages[-1].content}"
|
@@ -244,7 +278,7 @@ async def gemini_chat_completion_with_backoff(
|
|
244
278
|
yield ResponseWithThought(thought=part.text)
|
245
279
|
elif part.text:
|
246
280
|
aggregated_response += part.text
|
247
|
-
yield ResponseWithThought(
|
281
|
+
yield ResponseWithThought(text=part.text)
|
248
282
|
# Calculate cost of chat
|
249
283
|
input_tokens = final_chunk.usage_metadata.prompt_token_count or 0 if final_chunk else 0
|
250
284
|
output_tokens = final_chunk.usage_metadata.candidates_token_count or 0 if final_chunk else 0
|
@@ -343,8 +377,24 @@ def format_messages_for_gemini(
|
|
343
377
|
system_prompt = None if is_none_or_empty(system_prompt) else system_prompt
|
344
378
|
|
345
379
|
for message in messages:
|
380
|
+
if message.role == "assistant":
|
381
|
+
message.role = "model"
|
382
|
+
|
383
|
+
# Handle tool call and tool result message types from additional_kwargs
|
384
|
+
message_type = message.additional_kwargs.get("message_type")
|
385
|
+
if message_type == "tool_call":
|
386
|
+
pass
|
387
|
+
elif message_type == "tool_result":
|
388
|
+
# Convert tool_result to Gemini function response format
|
389
|
+
# Need to find the corresponding function call from previous messages
|
390
|
+
tool_result_msg_content = []
|
391
|
+
for part in message.content:
|
392
|
+
tool_result_msg_content.append(
|
393
|
+
gtypes.Part.from_function_response(name=part["name"], response={"result": part["content"]})
|
394
|
+
)
|
395
|
+
message.content = tool_result_msg_content
|
346
396
|
# Convert message content to string list from chatml dictionary list
|
347
|
-
|
397
|
+
elif isinstance(message.content, list):
|
348
398
|
# Convert image_urls to PIL.Image and place them at beginning of list (better for Gemini)
|
349
399
|
message_content = []
|
350
400
|
for item in sorted(message.content, key=lambda x: 0 if x["type"] == "image_url" else 1):
|
@@ -364,16 +414,13 @@ def format_messages_for_gemini(
|
|
364
414
|
messages.remove(message)
|
365
415
|
continue
|
366
416
|
message.content = message_content
|
367
|
-
elif isinstance(message.content, str):
|
417
|
+
elif isinstance(message.content, str) and message.content.strip():
|
368
418
|
message.content = [gtypes.Part.from_text(text=message.content)]
|
369
419
|
else:
|
370
420
|
logger.error(f"Dropping invalid type: {type(message.content)} of message content: {message.content}")
|
371
421
|
messages.remove(message)
|
372
422
|
continue
|
373
423
|
|
374
|
-
if message.role == "assistant":
|
375
|
-
message.role = "model"
|
376
|
-
|
377
424
|
if len(messages) == 1:
|
378
425
|
messages[0].role = "user"
|
379
426
|
|
@@ -401,3 +448,21 @@ def is_reasoning_model(model_name: str) -> bool:
|
|
401
448
|
Check if the model is a reasoning model.
|
402
449
|
"""
|
403
450
|
return model_name.startswith("gemini-2.5")
|
451
|
+
|
452
|
+
|
453
|
+
def to_gemini_tools(tools: List[ToolDefinition]) -> List[gtypes.ToolDict] | None:
|
454
|
+
"Transform tool definitions from standard format to Gemini format."
|
455
|
+
gemini_tools = [
|
456
|
+
gtypes.ToolDict(
|
457
|
+
function_declarations=[
|
458
|
+
gtypes.FunctionDeclarationDict(
|
459
|
+
name=tool.name,
|
460
|
+
description=tool.description,
|
461
|
+
parameters=tool.schema,
|
462
|
+
)
|
463
|
+
for tool in tools
|
464
|
+
]
|
465
|
+
)
|
466
|
+
]
|
467
|
+
|
468
|
+
return gemini_tools or None
|
@@ -145,12 +145,12 @@ async def converse_offline(
|
|
145
145
|
aggregated_response += response_delta
|
146
146
|
# Put chunk into the asyncio queue (non-blocking)
|
147
147
|
try:
|
148
|
-
queue.put_nowait(ResponseWithThought(
|
148
|
+
queue.put_nowait(ResponseWithThought(text=response_delta))
|
149
149
|
except asyncio.QueueFull:
|
150
150
|
# Should not happen with default queue size unless consumer is very slow
|
151
151
|
logger.warning("Asyncio queue full during offline LLM streaming.")
|
152
152
|
# Potentially block here or handle differently if needed
|
153
|
-
asyncio.run(queue.put(ResponseWithThought(
|
153
|
+
asyncio.run(queue.put(ResponseWithThought(text=response_delta)))
|
154
154
|
|
155
155
|
# Log the time taken to stream the entire response
|
156
156
|
logger.info(f"Chat streaming took: {perf_counter() - start_time:.3f} seconds")
|
@@ -221,4 +221,4 @@ def send_message_to_model_offline(
|
|
221
221
|
if is_promptrace_enabled():
|
222
222
|
commit_conversation_trace(messages, response_text, tracer)
|
223
223
|
|
224
|
-
return response_text
|
224
|
+
return ResponseWithThought(text=response_text)
|
@@ -1,25 +1,24 @@
|
|
1
1
|
import logging
|
2
2
|
from datetime import datetime
|
3
|
-
from typing import AsyncGenerator, Dict, List, Optional
|
4
|
-
|
5
|
-
from openai.lib._pydantic import _ensure_strict_json_schema
|
6
|
-
from pydantic import BaseModel
|
3
|
+
from typing import Any, AsyncGenerator, Dict, List, Optional
|
7
4
|
|
8
5
|
from khoj.database.models import Agent, ChatMessageModel, ChatModel
|
9
6
|
from khoj.processor.conversation import prompts
|
10
7
|
from khoj.processor.conversation.openai.utils import (
|
11
8
|
chat_completion_with_backoff,
|
9
|
+
clean_response_schema,
|
12
10
|
completion_with_backoff,
|
13
|
-
|
11
|
+
get_structured_output_support,
|
12
|
+
to_openai_tools,
|
14
13
|
)
|
15
14
|
from khoj.processor.conversation.utils import (
|
16
|
-
JsonSupport,
|
17
15
|
OperatorRun,
|
18
16
|
ResponseWithThought,
|
17
|
+
StructuredOutputSupport,
|
19
18
|
generate_chatml_messages_with_context,
|
20
19
|
messages_to_print,
|
21
20
|
)
|
22
|
-
from khoj.utils.helpers import is_none_or_empty, truncate_code_context
|
21
|
+
from khoj.utils.helpers import ToolDefinition, is_none_or_empty, truncate_code_context
|
23
22
|
from khoj.utils.rawconfig import FileAttachment, LocationData
|
24
23
|
from khoj.utils.yaml import yaml_dump
|
25
24
|
|
@@ -32,6 +31,7 @@ def send_message_to_model(
|
|
32
31
|
model,
|
33
32
|
response_type="text",
|
34
33
|
response_schema=None,
|
34
|
+
tools: list[ToolDefinition] = None,
|
35
35
|
deepthought=False,
|
36
36
|
api_base_url=None,
|
37
37
|
tracer: dict = {},
|
@@ -40,9 +40,11 @@ def send_message_to_model(
|
|
40
40
|
Send message to model
|
41
41
|
"""
|
42
42
|
|
43
|
-
model_kwargs = {}
|
44
|
-
json_support =
|
45
|
-
if
|
43
|
+
model_kwargs: Dict[str, Any] = {}
|
44
|
+
json_support = get_structured_output_support(model, api_base_url)
|
45
|
+
if tools and json_support == StructuredOutputSupport.TOOL:
|
46
|
+
model_kwargs["tools"] = to_openai_tools(tools)
|
47
|
+
elif response_schema and json_support >= StructuredOutputSupport.SCHEMA:
|
46
48
|
# Drop unsupported fields from schema passed to OpenAI APi
|
47
49
|
cleaned_response_schema = clean_response_schema(response_schema)
|
48
50
|
model_kwargs["response_format"] = {
|
@@ -53,7 +55,7 @@ def send_message_to_model(
|
|
53
55
|
"strict": True,
|
54
56
|
},
|
55
57
|
}
|
56
|
-
elif response_type == "json_object" and json_support ==
|
58
|
+
elif response_type == "json_object" and json_support == StructuredOutputSupport.OBJECT:
|
57
59
|
model_kwargs["response_format"] = {"type": response_type}
|
58
60
|
|
59
61
|
# Get Response from GPT
|
@@ -171,30 +173,3 @@ async def converse_openai(
|
|
171
173
|
tracer=tracer,
|
172
174
|
):
|
173
175
|
yield chunk
|
174
|
-
|
175
|
-
|
176
|
-
def clean_response_schema(schema: BaseModel | dict) -> dict:
|
177
|
-
"""
|
178
|
-
Format response schema to be compatible with OpenAI API.
|
179
|
-
|
180
|
-
Clean the response schema by removing unsupported fields.
|
181
|
-
"""
|
182
|
-
# Normalize schema to OpenAI compatible JSON schema format
|
183
|
-
schema_json = schema if isinstance(schema, dict) else schema.model_json_schema()
|
184
|
-
schema_json = _ensure_strict_json_schema(schema_json, path=(), root=schema_json)
|
185
|
-
|
186
|
-
# Recursively drop unsupported fields from schema passed to OpenAI API
|
187
|
-
# See https://platform.openai.com/docs/guides/structured-outputs#supported-schemas
|
188
|
-
fields_to_exclude = ["minItems", "maxItems"]
|
189
|
-
if isinstance(schema_json, dict) and isinstance(schema_json.get("properties"), dict):
|
190
|
-
for _, prop_value in schema_json["properties"].items():
|
191
|
-
if isinstance(prop_value, dict):
|
192
|
-
# Remove specified fields from direct properties
|
193
|
-
for field in fields_to_exclude:
|
194
|
-
prop_value.pop(field, None)
|
195
|
-
# Recursively remove specified fields from child properties
|
196
|
-
if "items" in prop_value and isinstance(prop_value["items"], dict):
|
197
|
-
clean_response_schema(prop_value["items"])
|
198
|
-
|
199
|
-
# Return cleaned schema
|
200
|
-
return schema_json
|