khoj 1.42.8.dev6__py3-none-any.whl → 1.42.9.dev16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/adapters/__init__.py +20 -0
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-2e626327abfbe612.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/{page-9a4610474cd59a71.js → page-0006674668eb5a4d.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-f7bb9d777b7745d4.js → page-4c465cde2d14cb52.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-d6acbba22ccac0ff.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/{page-ef738950ea1babc3.js → page-9967631715682f3c.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{page-2b3056cba8aa96ce.js → page-6e91caf9bc0c8aba.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-94c76c3a41db42a2.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/{page-4885df3cd175c957.js → page-883b7d8d2e3abe3e.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-8be3b35178abf2ec.js → page-95e994ddac31473f.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-95998f0bdc22bb13.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-4a4b0c0f4749c2b2.js → page-8c8c175f7f212b03.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-15412ee214acd999.js → webpack-4bf3eab7681a1206.js} +1 -1
- khoj/interface/compiled/_next/static/css/1e9b757ee2a2b34b.css +1 -0
- khoj/interface/compiled/_next/static/css/440ae0f0f650dc35.css +1 -0
- khoj/interface/compiled/_next/static/css/bd2071cad2ecf293.css +1 -0
- khoj/interface/compiled/_next/static/css/ee66643a6a5bf71c.css +1 -0
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +3 -3
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +4 -4
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/anthropic_chat.py +11 -2
- khoj/processor/conversation/anthropic/utils.py +90 -103
- khoj/processor/conversation/google/gemini_chat.py +4 -1
- khoj/processor/conversation/google/utils.py +80 -18
- khoj/processor/conversation/offline/chat_model.py +3 -3
- khoj/processor/conversation/openai/gpt.py +13 -38
- khoj/processor/conversation/openai/utils.py +113 -12
- khoj/processor/conversation/prompts.py +17 -35
- khoj/processor/conversation/utils.py +128 -57
- khoj/processor/operator/grounding_agent.py +1 -1
- khoj/processor/operator/operator_agent_binary.py +4 -3
- khoj/processor/tools/online_search.py +18 -0
- khoj/processor/tools/run_code.py +1 -1
- khoj/routers/api_chat.py +1 -1
- khoj/routers/helpers.py +293 -26
- khoj/routers/research.py +169 -155
- khoj/utils/helpers.py +284 -8
- {khoj-1.42.8.dev6.dist-info → khoj-1.42.9.dev16.dist-info}/METADATA +1 -1
- {khoj-1.42.8.dev6.dist-info → khoj-1.42.9.dev16.dist-info}/RECORD +62 -62
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-4e2a134ec26aa606.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-ad4d1792ab1a4108.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +0 -1
- khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +0 -1
- khoj/interface/compiled/_next/static/css/76c658ee459140a9.css +0 -1
- khoj/interface/compiled/_next/static/css/821d0d60b0b6871d.css +0 -1
- khoj/interface/compiled/_next/static/css/e6da1287d41f5409.css +0 -1
- /khoj/interface/compiled/_next/static/chunks/{1327-1a9107b9a2a04a98.js → 1327-3b1a41af530fa8ee.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1915-5c6508f6ebb62a30.js → 1915-fbfe167c84ad60c5.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2117-080746c8e170c81a.js → 2117-e78b6902ad6f75ec.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2939-4af3fd24b8ffc9ad.js → 2939-4d4084c5b888b960.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{4447-cd95608f8e93e711.js → 4447-d6cf93724d57e34b.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{8667-50b03a89e82e0ba7.js → 8667-4b7790573b08c50d.js} +0 -0
- /khoj/interface/compiled/_next/static/{cJdFAXV3MR9BSimUwQ40G → w19FJJa9p2AFJB6DEektd}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{cJdFAXV3MR9BSimUwQ40G → w19FJJa9p2AFJB6DEektd}/_ssgManifest.js +0 -0
- {khoj-1.42.8.dev6.dist-info → khoj-1.42.9.dev16.dist-info}/WHEEL +0 -0
- {khoj-1.42.8.dev6.dist-info → khoj-1.42.9.dev16.dist-info}/entry_points.txt +0 -0
- {khoj-1.42.8.dev6.dist-info → khoj-1.42.9.dev16.dist-info}/licenses/LICENSE +0 -0
@@ -1,9 +1,8 @@
|
|
1
1
|
import json
|
2
2
|
import logging
|
3
3
|
from copy import deepcopy
|
4
|
-
from textwrap import dedent
|
5
4
|
from time import perf_counter
|
6
|
-
from typing import AsyncGenerator, Dict, List
|
5
|
+
from typing import AsyncGenerator, Dict, List
|
7
6
|
|
8
7
|
import anthropic
|
9
8
|
from langchain_core.messages.chat import ChatMessage
|
@@ -18,11 +17,14 @@ from tenacity import (
|
|
18
17
|
|
19
18
|
from khoj.processor.conversation.utils import (
|
20
19
|
ResponseWithThought,
|
20
|
+
ToolCall,
|
21
21
|
commit_conversation_trace,
|
22
22
|
get_image_from_base64,
|
23
23
|
get_image_from_url,
|
24
24
|
)
|
25
25
|
from khoj.utils.helpers import (
|
26
|
+
ToolDefinition,
|
27
|
+
create_tool_definition,
|
26
28
|
get_anthropic_async_client,
|
27
29
|
get_anthropic_client,
|
28
30
|
get_chat_usage_metrics,
|
@@ -57,9 +59,10 @@ def anthropic_completion_with_backoff(
|
|
57
59
|
max_tokens: int | None = None,
|
58
60
|
response_type: str = "text",
|
59
61
|
response_schema: BaseModel | None = None,
|
62
|
+
tools: List[ToolDefinition] = None,
|
60
63
|
deepthought: bool = False,
|
61
64
|
tracer: dict = {},
|
62
|
-
) ->
|
65
|
+
) -> ResponseWithThought:
|
63
66
|
client = anthropic_clients.get(api_key)
|
64
67
|
if not client:
|
65
68
|
client = get_anthropic_client(api_key, api_base_url)
|
@@ -67,12 +70,26 @@ def anthropic_completion_with_backoff(
|
|
67
70
|
|
68
71
|
formatted_messages, system = format_messages_for_anthropic(messages, system_prompt)
|
69
72
|
|
73
|
+
thoughts = ""
|
70
74
|
aggregated_response = ""
|
71
75
|
final_message = None
|
72
76
|
model_kwargs = model_kwargs or dict()
|
73
|
-
|
74
|
-
|
75
|
-
|
77
|
+
|
78
|
+
# Configure structured output
|
79
|
+
if tools:
|
80
|
+
# Convert tools to Anthropic format
|
81
|
+
model_kwargs["tools"] = [
|
82
|
+
anthropic.types.ToolParam(name=tool.name, description=tool.description, input_schema=tool.schema)
|
83
|
+
for tool in tools
|
84
|
+
]
|
85
|
+
# Cache tool definitions
|
86
|
+
last_tool = model_kwargs["tools"][-1]
|
87
|
+
last_tool["cache_control"] = {"type": "ephemeral"}
|
88
|
+
elif response_schema:
|
89
|
+
tool = create_tool_definition(response_schema)
|
90
|
+
model_kwargs["tools"] = [
|
91
|
+
anthropic.types.ToolParam(name=tool.name, description=tool.description, input_schema=tool.schema)
|
92
|
+
]
|
76
93
|
elif response_type == "json_object" and not (is_reasoning_model(model_name) and deepthought):
|
77
94
|
# Prefill model response with '{' to make it output a valid JSON object. Not supported with extended thinking.
|
78
95
|
formatted_messages.append(anthropic.types.MessageParam(role="assistant", content="{"))
|
@@ -96,15 +113,41 @@ def anthropic_completion_with_backoff(
|
|
96
113
|
max_tokens=max_tokens,
|
97
114
|
**(model_kwargs),
|
98
115
|
) as stream:
|
99
|
-
for
|
100
|
-
|
116
|
+
for chunk in stream:
|
117
|
+
if chunk.type != "content_block_delta":
|
118
|
+
continue
|
119
|
+
if chunk.delta.type == "thinking_delta":
|
120
|
+
thoughts += chunk.delta.thinking
|
121
|
+
elif chunk.delta.type == "text_delta":
|
122
|
+
aggregated_response += chunk.delta.text
|
101
123
|
final_message = stream.get_final_message()
|
102
124
|
|
103
|
-
#
|
104
|
-
for item in final_message.content
|
105
|
-
|
106
|
-
|
107
|
-
|
125
|
+
# Track raw content of model response to reuse for cache hits in multi-turn chats
|
126
|
+
raw_content = [item.model_dump() for item in final_message.content]
|
127
|
+
|
128
|
+
# Extract all tool calls if tools are enabled
|
129
|
+
if tools:
|
130
|
+
tool_calls = [
|
131
|
+
ToolCall(name=item.name, args=item.input, id=item.id).__dict__
|
132
|
+
for item in final_message.content
|
133
|
+
if item.type == "tool_use"
|
134
|
+
]
|
135
|
+
if tool_calls:
|
136
|
+
# If there are tool calls, aggregate thoughts and responses into thoughts
|
137
|
+
if thoughts and aggregated_response:
|
138
|
+
# wrap each line of thought in italics
|
139
|
+
thoughts = "\n".join([f"*{line.strip()}*" for line in thoughts.splitlines() if line.strip()])
|
140
|
+
thoughts = f"{thoughts}\n\n{aggregated_response}"
|
141
|
+
else:
|
142
|
+
thoughts = thoughts or aggregated_response
|
143
|
+
# Json dump tool calls into aggregated response
|
144
|
+
aggregated_response = json.dumps(tool_calls)
|
145
|
+
# If response schema is used, return the first tool call's input
|
146
|
+
elif response_schema:
|
147
|
+
for item in final_message.content:
|
148
|
+
if item.type == "tool_use":
|
149
|
+
aggregated_response = json.dumps(item.input)
|
150
|
+
break
|
108
151
|
|
109
152
|
# Calculate cost of chat
|
110
153
|
input_tokens = final_message.usage.input_tokens
|
@@ -126,7 +169,7 @@ def anthropic_completion_with_backoff(
|
|
126
169
|
if is_promptrace_enabled():
|
127
170
|
commit_conversation_trace(messages, aggregated_response, tracer)
|
128
171
|
|
129
|
-
return aggregated_response
|
172
|
+
return ResponseWithThought(text=aggregated_response, thought=thoughts, raw_content=raw_content)
|
130
173
|
|
131
174
|
|
132
175
|
@retry(
|
@@ -183,10 +226,10 @@ async def anthropic_chat_completion_with_backoff(
|
|
183
226
|
if chunk.type == "message_delta":
|
184
227
|
if chunk.delta.stop_reason == "refusal":
|
185
228
|
yield ResponseWithThought(
|
186
|
-
|
229
|
+
text="...I'm sorry, but my safety filters prevent me from assisting with this query."
|
187
230
|
)
|
188
231
|
elif chunk.delta.stop_reason == "max_tokens":
|
189
|
-
yield ResponseWithThought(
|
232
|
+
yield ResponseWithThought(text="...I'm sorry, but I've hit my response length limit.")
|
190
233
|
if chunk.delta.stop_reason in ["refusal", "max_tokens"]:
|
191
234
|
logger.warning(
|
192
235
|
f"LLM Response Prevented for {model_name}: {chunk.delta.stop_reason}.\n"
|
@@ -199,7 +242,7 @@ async def anthropic_chat_completion_with_backoff(
|
|
199
242
|
# Handle streamed response chunk
|
200
243
|
response_chunk: ResponseWithThought = None
|
201
244
|
if chunk.delta.type == "text_delta":
|
202
|
-
response_chunk = ResponseWithThought(
|
245
|
+
response_chunk = ResponseWithThought(text=chunk.delta.text)
|
203
246
|
aggregated_response += chunk.delta.text
|
204
247
|
if chunk.delta.type == "thinking_delta":
|
205
248
|
response_chunk = ResponseWithThought(thought=chunk.delta.thinking)
|
@@ -232,13 +275,14 @@ async def anthropic_chat_completion_with_backoff(
|
|
232
275
|
commit_conversation_trace(messages, aggregated_response, tracer)
|
233
276
|
|
234
277
|
|
235
|
-
def format_messages_for_anthropic(
|
278
|
+
def format_messages_for_anthropic(raw_messages: list[ChatMessage], system_prompt: str = None):
|
236
279
|
"""
|
237
280
|
Format messages for Anthropic
|
238
281
|
"""
|
239
282
|
# Extract system prompt
|
240
283
|
system_prompt = system_prompt or ""
|
241
|
-
|
284
|
+
messages = deepcopy(raw_messages)
|
285
|
+
for message in messages:
|
242
286
|
if message.role == "system":
|
243
287
|
if isinstance(message.content, list):
|
244
288
|
system_prompt += "\n".join([part["text"] for part in message.content if part["type"] == "text"])
|
@@ -250,15 +294,30 @@ def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: st
|
|
250
294
|
else:
|
251
295
|
system = None
|
252
296
|
|
253
|
-
# Anthropic requires the first message to be a
|
254
|
-
|
297
|
+
# Anthropic requires the first message to be a user message unless its a tool call
|
298
|
+
message_type = messages[0].additional_kwargs.get("message_type", None)
|
299
|
+
if len(messages) == 1 and message_type != "tool_call":
|
255
300
|
messages[0].role = "user"
|
256
|
-
elif len(messages) > 1 and messages[0].role == "assistant":
|
257
|
-
messages = messages[1:]
|
258
301
|
|
259
|
-
# Convert image urls to base64 encoded images in Anthropic message format
|
260
302
|
for message in messages:
|
261
|
-
|
303
|
+
# Handle tool call and tool result message types from additional_kwargs
|
304
|
+
message_type = message.additional_kwargs.get("message_type")
|
305
|
+
if message_type == "tool_call":
|
306
|
+
pass
|
307
|
+
elif message_type == "tool_result":
|
308
|
+
# Convert tool_result to Anthropic tool_result format
|
309
|
+
content = []
|
310
|
+
for part in message.content:
|
311
|
+
content.append(
|
312
|
+
{
|
313
|
+
"type": "tool_result",
|
314
|
+
"tool_use_id": part["id"],
|
315
|
+
"content": part["content"],
|
316
|
+
}
|
317
|
+
)
|
318
|
+
message.content = content
|
319
|
+
# Convert image urls to base64 encoded images in Anthropic message format
|
320
|
+
elif isinstance(message.content, list):
|
262
321
|
content = []
|
263
322
|
# Sort the content. Anthropic models prefer that text comes after images.
|
264
323
|
message.content.sort(key=lambda x: 0 if x["type"] == "image_url" else 1)
|
@@ -304,18 +363,15 @@ def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: st
|
|
304
363
|
if isinstance(block, dict) and "cache_control" in block:
|
305
364
|
del block["cache_control"]
|
306
365
|
|
307
|
-
# Add cache control to the last content block of
|
308
|
-
#
|
309
|
-
|
310
|
-
cache_message = messages[-2]
|
366
|
+
# Add cache control to the last content block of last message.
|
367
|
+
# Caching should improve research efficiency.
|
368
|
+
cache_message = messages[-1]
|
311
369
|
if isinstance(cache_message.content, list) and cache_message.content:
|
312
370
|
# Add cache control to the last content block only if it's a text block with non-empty content
|
313
371
|
last_block = cache_message.content[-1]
|
314
|
-
if (
|
315
|
-
|
316
|
-
|
317
|
-
and last_block.get("text")
|
318
|
-
and last_block.get("text").strip()
|
372
|
+
if isinstance(last_block, dict) and (
|
373
|
+
(last_block.get("type") == "text" and last_block.get("text", "").strip())
|
374
|
+
or (last_block.get("type") == "tool_result" and last_block.get("content", []))
|
319
375
|
):
|
320
376
|
last_block["cache_control"] = {"type": "ephemeral"}
|
321
377
|
|
@@ -326,74 +382,5 @@ def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: st
|
|
326
382
|
return formatted_messages, system
|
327
383
|
|
328
384
|
|
329
|
-
def create_anthropic_tool_definition(
|
330
|
-
response_schema: Type[BaseModel],
|
331
|
-
tool_name: str = None,
|
332
|
-
tool_description: Optional[str] = None,
|
333
|
-
) -> anthropic.types.ToolParam:
|
334
|
-
"""
|
335
|
-
Converts a response schema BaseModel class into an Anthropic tool definition dictionary.
|
336
|
-
|
337
|
-
This format is expected by Anthropic's API when defining tools the model can use.
|
338
|
-
|
339
|
-
Args:
|
340
|
-
response_schema: The Pydantic BaseModel class to convert.
|
341
|
-
This class defines the response schema for the tool.
|
342
|
-
tool_name: The name for the Anthropic tool (e.g., "get_weather", "plan_next_step").
|
343
|
-
tool_description: Optional description for the Anthropic tool.
|
344
|
-
If None, it attempts to use the Pydantic model's docstring.
|
345
|
-
If that's also missing, a fallback description is generated.
|
346
|
-
|
347
|
-
Returns:
|
348
|
-
An tool definition for Anthropic's API.
|
349
|
-
"""
|
350
|
-
model_schema = response_schema.model_json_schema()
|
351
|
-
|
352
|
-
name = tool_name or response_schema.__name__.lower()
|
353
|
-
description = tool_description
|
354
|
-
if description is None:
|
355
|
-
docstring = response_schema.__doc__
|
356
|
-
if docstring:
|
357
|
-
description = dedent(docstring).strip()
|
358
|
-
else:
|
359
|
-
# Fallback description if no explicit one or docstring is provided
|
360
|
-
description = f"Tool named '{name}' accepts specified parameters."
|
361
|
-
|
362
|
-
# Process properties to inline enums and remove $defs dependency
|
363
|
-
processed_properties = {}
|
364
|
-
original_properties = model_schema.get("properties", {})
|
365
|
-
defs = model_schema.get("$defs", {})
|
366
|
-
|
367
|
-
for prop_name, prop_schema in original_properties.items():
|
368
|
-
current_prop_schema = deepcopy(prop_schema) # Work on a copy
|
369
|
-
# Check for enums defined directly in the property for simpler direct enum definitions.
|
370
|
-
if "$ref" in current_prop_schema:
|
371
|
-
ref_path = current_prop_schema["$ref"]
|
372
|
-
if ref_path.startswith("#/$defs/"):
|
373
|
-
def_name = ref_path.split("/")[-1]
|
374
|
-
if def_name in defs and "enum" in defs[def_name]:
|
375
|
-
enum_def = defs[def_name]
|
376
|
-
current_prop_schema["enum"] = enum_def["enum"]
|
377
|
-
current_prop_schema["type"] = enum_def.get("type", "string")
|
378
|
-
if "description" not in current_prop_schema and "description" in enum_def:
|
379
|
-
current_prop_schema["description"] = enum_def["description"]
|
380
|
-
del current_prop_schema["$ref"] # Remove the $ref as it's been inlined
|
381
|
-
|
382
|
-
processed_properties[prop_name] = current_prop_schema
|
383
|
-
|
384
|
-
# The input_schema for Anthropic tools is a JSON Schema object.
|
385
|
-
# Pydantic's model_json_schema() provides most of what's needed.
|
386
|
-
input_schema = {
|
387
|
-
"type": "object",
|
388
|
-
"properties": processed_properties,
|
389
|
-
}
|
390
|
-
|
391
|
-
# Include 'required' fields if specified in the Pydantic model
|
392
|
-
if "required" in model_schema and model_schema["required"]:
|
393
|
-
input_schema["required"] = model_schema["required"]
|
394
|
-
|
395
|
-
return anthropic.types.ToolParam(name=name, description=description, input_schema=input_schema)
|
396
|
-
|
397
|
-
|
398
385
|
def is_reasoning_model(model_name: str) -> bool:
|
399
386
|
return any(model_name.startswith(model) for model in REASONING_MODELS)
|
@@ -28,6 +28,7 @@ def gemini_send_message_to_model(
|
|
28
28
|
api_base_url=None,
|
29
29
|
response_type="text",
|
30
30
|
response_schema=None,
|
31
|
+
tools=None,
|
31
32
|
model_kwargs=None,
|
32
33
|
deepthought=False,
|
33
34
|
tracer={},
|
@@ -37,8 +38,10 @@ def gemini_send_message_to_model(
|
|
37
38
|
"""
|
38
39
|
model_kwargs = {}
|
39
40
|
|
41
|
+
if tools:
|
42
|
+
model_kwargs["tools"] = tools
|
40
43
|
# Monitor for flakiness in 1.5+ models. This would cause unwanted behavior and terminate response early in 1.5 models.
|
41
|
-
|
44
|
+
elif response_type == "json_object" and not model.startswith("gemini-1.5"):
|
42
45
|
model_kwargs["response_mime_type"] = "application/json"
|
43
46
|
if response_schema:
|
44
47
|
model_kwargs["response_schema"] = response_schema
|
@@ -1,9 +1,10 @@
|
|
1
|
+
import json
|
1
2
|
import logging
|
2
3
|
import os
|
3
4
|
import random
|
4
5
|
from copy import deepcopy
|
5
6
|
from time import perf_counter
|
6
|
-
from typing import AsyncGenerator, AsyncIterator, Dict
|
7
|
+
from typing import AsyncGenerator, AsyncIterator, Dict, List
|
7
8
|
|
8
9
|
import httpx
|
9
10
|
from google import genai
|
@@ -22,11 +23,13 @@ from tenacity import (
|
|
22
23
|
|
23
24
|
from khoj.processor.conversation.utils import (
|
24
25
|
ResponseWithThought,
|
26
|
+
ToolCall,
|
25
27
|
commit_conversation_trace,
|
26
28
|
get_image_from_base64,
|
27
29
|
get_image_from_url,
|
28
30
|
)
|
29
31
|
from khoj.utils.helpers import (
|
32
|
+
ToolDefinition,
|
30
33
|
get_chat_usage_metrics,
|
31
34
|
get_gemini_client,
|
32
35
|
is_none_or_empty,
|
@@ -95,26 +98,29 @@ def gemini_completion_with_backoff(
|
|
95
98
|
temperature=1.2,
|
96
99
|
api_key=None,
|
97
100
|
api_base_url: str = None,
|
98
|
-
model_kwargs=
|
101
|
+
model_kwargs={},
|
99
102
|
deepthought=False,
|
100
103
|
tracer={},
|
101
|
-
) ->
|
104
|
+
) -> ResponseWithThought:
|
102
105
|
client = gemini_clients.get(api_key)
|
103
106
|
if not client:
|
104
107
|
client = get_gemini_client(api_key, api_base_url)
|
105
108
|
gemini_clients[api_key] = client
|
106
109
|
|
107
110
|
formatted_messages, system_instruction = format_messages_for_gemini(messages, system_prompt)
|
108
|
-
response_thoughts
|
111
|
+
raw_content, response_text, response_thoughts = [], "", None
|
109
112
|
|
110
|
-
#
|
113
|
+
# Configure structured output
|
114
|
+
tools = None
|
111
115
|
response_schema = None
|
112
|
-
if model_kwargs
|
116
|
+
if model_kwargs.get("tools"):
|
117
|
+
tools = to_gemini_tools(model_kwargs["tools"])
|
118
|
+
elif model_kwargs.get("response_schema"):
|
113
119
|
response_schema = clean_response_schema(model_kwargs["response_schema"])
|
114
120
|
|
115
121
|
thinking_config = None
|
116
122
|
if deepthought and is_reasoning_model(model_name):
|
117
|
-
thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI)
|
123
|
+
thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI, include_thoughts=True)
|
118
124
|
|
119
125
|
max_output_tokens = MAX_OUTPUT_TOKENS_FOR_STANDARD_GEMINI
|
120
126
|
if is_reasoning_model(model_name):
|
@@ -127,8 +133,9 @@ def gemini_completion_with_backoff(
|
|
127
133
|
thinking_config=thinking_config,
|
128
134
|
max_output_tokens=max_output_tokens,
|
129
135
|
safety_settings=SAFETY_SETTINGS,
|
130
|
-
response_mime_type=model_kwargs.get("response_mime_type", "text/plain")
|
136
|
+
response_mime_type=model_kwargs.get("response_mime_type", "text/plain"),
|
131
137
|
response_schema=response_schema,
|
138
|
+
tools=tools,
|
132
139
|
seed=seed,
|
133
140
|
top_p=0.95,
|
134
141
|
http_options=gtypes.HttpOptions(client_args={"timeout": httpx.Timeout(30.0, read=60.0)}),
|
@@ -137,7 +144,25 @@ def gemini_completion_with_backoff(
|
|
137
144
|
try:
|
138
145
|
# Generate the response
|
139
146
|
response = client.models.generate_content(model=model_name, config=config, contents=formatted_messages)
|
140
|
-
|
147
|
+
if (
|
148
|
+
not response.candidates
|
149
|
+
or not response.candidates[0].content
|
150
|
+
or response.candidates[0].content.parts is None
|
151
|
+
):
|
152
|
+
raise ValueError(f"Failed to get response from model.")
|
153
|
+
raw_content = [part.model_dump() for part in response.candidates[0].content.parts]
|
154
|
+
if response.function_calls:
|
155
|
+
function_calls = [
|
156
|
+
ToolCall(name=function_call.name, args=function_call.args, id=function_call.id).__dict__
|
157
|
+
for function_call in response.function_calls
|
158
|
+
]
|
159
|
+
response_text = json.dumps(function_calls)
|
160
|
+
else:
|
161
|
+
# If no function calls, use the text response
|
162
|
+
response_text = response.text
|
163
|
+
response_thoughts = "\n".join(
|
164
|
+
[part.text for part in response.candidates[0].content.parts if part.thought and isinstance(part.text, str)]
|
165
|
+
)
|
141
166
|
except gerrors.ClientError as e:
|
142
167
|
response = None
|
143
168
|
response_text, _ = handle_gemini_response(e.args)
|
@@ -151,8 +176,14 @@ def gemini_completion_with_backoff(
|
|
151
176
|
input_tokens = response.usage_metadata.prompt_token_count or 0 if response else 0
|
152
177
|
output_tokens = response.usage_metadata.candidates_token_count or 0 if response else 0
|
153
178
|
thought_tokens = response.usage_metadata.thoughts_token_count or 0 if response else 0
|
179
|
+
cache_read_tokens = response.usage_metadata.cached_content_token_count or 0 if response else 0
|
154
180
|
tracer["usage"] = get_chat_usage_metrics(
|
155
|
-
model_name,
|
181
|
+
model_name,
|
182
|
+
input_tokens,
|
183
|
+
output_tokens,
|
184
|
+
cache_read_tokens=cache_read_tokens,
|
185
|
+
thought_tokens=thought_tokens,
|
186
|
+
usage=tracer.get("usage"),
|
156
187
|
)
|
157
188
|
|
158
189
|
# Validate the response. If empty, raise an error to retry.
|
@@ -166,7 +197,7 @@ def gemini_completion_with_backoff(
|
|
166
197
|
if is_promptrace_enabled():
|
167
198
|
commit_conversation_trace(messages, response_text, tracer)
|
168
199
|
|
169
|
-
return response_text
|
200
|
+
return ResponseWithThought(text=response_text, thought=response_thoughts, raw_content=raw_content)
|
170
201
|
|
171
202
|
|
172
203
|
@retry(
|
@@ -234,7 +265,7 @@ async def gemini_chat_completion_with_backoff(
|
|
234
265
|
# handle safety, rate-limit, other finish reasons
|
235
266
|
stop_message, stopped = handle_gemini_response(chunk.candidates, chunk.prompt_feedback)
|
236
267
|
if stopped:
|
237
|
-
yield ResponseWithThought(
|
268
|
+
yield ResponseWithThought(text=stop_message)
|
238
269
|
logger.warning(
|
239
270
|
f"LLM Response Prevented for {model_name}: {stop_message}.\n"
|
240
271
|
+ f"Last Message by {messages[-1].role}: {messages[-1].content}"
|
@@ -247,7 +278,7 @@ async def gemini_chat_completion_with_backoff(
|
|
247
278
|
yield ResponseWithThought(thought=part.text)
|
248
279
|
elif part.text:
|
249
280
|
aggregated_response += part.text
|
250
|
-
yield ResponseWithThought(
|
281
|
+
yield ResponseWithThought(text=part.text)
|
251
282
|
# Calculate cost of chat
|
252
283
|
input_tokens = final_chunk.usage_metadata.prompt_token_count or 0 if final_chunk else 0
|
253
284
|
output_tokens = final_chunk.usage_metadata.candidates_token_count or 0 if final_chunk else 0
|
@@ -346,8 +377,24 @@ def format_messages_for_gemini(
|
|
346
377
|
system_prompt = None if is_none_or_empty(system_prompt) else system_prompt
|
347
378
|
|
348
379
|
for message in messages:
|
380
|
+
if message.role == "assistant":
|
381
|
+
message.role = "model"
|
382
|
+
|
383
|
+
# Handle tool call and tool result message types from additional_kwargs
|
384
|
+
message_type = message.additional_kwargs.get("message_type")
|
385
|
+
if message_type == "tool_call":
|
386
|
+
pass
|
387
|
+
elif message_type == "tool_result":
|
388
|
+
# Convert tool_result to Gemini function response format
|
389
|
+
# Need to find the corresponding function call from previous messages
|
390
|
+
tool_result_msg_content = []
|
391
|
+
for part in message.content:
|
392
|
+
tool_result_msg_content.append(
|
393
|
+
gtypes.Part.from_function_response(name=part["name"], response={"result": part["content"]})
|
394
|
+
)
|
395
|
+
message.content = tool_result_msg_content
|
349
396
|
# Convert message content to string list from chatml dictionary list
|
350
|
-
|
397
|
+
elif isinstance(message.content, list):
|
351
398
|
# Convert image_urls to PIL.Image and place them at beginning of list (better for Gemini)
|
352
399
|
message_content = []
|
353
400
|
for item in sorted(message.content, key=lambda x: 0 if x["type"] == "image_url" else 1):
|
@@ -367,16 +414,13 @@ def format_messages_for_gemini(
|
|
367
414
|
messages.remove(message)
|
368
415
|
continue
|
369
416
|
message.content = message_content
|
370
|
-
elif isinstance(message.content, str):
|
417
|
+
elif isinstance(message.content, str) and message.content.strip():
|
371
418
|
message.content = [gtypes.Part.from_text(text=message.content)]
|
372
419
|
else:
|
373
420
|
logger.error(f"Dropping invalid type: {type(message.content)} of message content: {message.content}")
|
374
421
|
messages.remove(message)
|
375
422
|
continue
|
376
423
|
|
377
|
-
if message.role == "assistant":
|
378
|
-
message.role = "model"
|
379
|
-
|
380
424
|
if len(messages) == 1:
|
381
425
|
messages[0].role = "user"
|
382
426
|
|
@@ -404,3 +448,21 @@ def is_reasoning_model(model_name: str) -> bool:
|
|
404
448
|
Check if the model is a reasoning model.
|
405
449
|
"""
|
406
450
|
return model_name.startswith("gemini-2.5")
|
451
|
+
|
452
|
+
|
453
|
+
def to_gemini_tools(tools: List[ToolDefinition]) -> List[gtypes.ToolDict] | None:
|
454
|
+
"Transform tool definitions from standard format to Gemini format."
|
455
|
+
gemini_tools = [
|
456
|
+
gtypes.ToolDict(
|
457
|
+
function_declarations=[
|
458
|
+
gtypes.FunctionDeclarationDict(
|
459
|
+
name=tool.name,
|
460
|
+
description=tool.description,
|
461
|
+
parameters=tool.schema,
|
462
|
+
)
|
463
|
+
for tool in tools
|
464
|
+
]
|
465
|
+
)
|
466
|
+
]
|
467
|
+
|
468
|
+
return gemini_tools or None
|
@@ -145,12 +145,12 @@ async def converse_offline(
|
|
145
145
|
aggregated_response += response_delta
|
146
146
|
# Put chunk into the asyncio queue (non-blocking)
|
147
147
|
try:
|
148
|
-
queue.put_nowait(ResponseWithThought(
|
148
|
+
queue.put_nowait(ResponseWithThought(text=response_delta))
|
149
149
|
except asyncio.QueueFull:
|
150
150
|
# Should not happen with default queue size unless consumer is very slow
|
151
151
|
logger.warning("Asyncio queue full during offline LLM streaming.")
|
152
152
|
# Potentially block here or handle differently if needed
|
153
|
-
asyncio.run(queue.put(ResponseWithThought(
|
153
|
+
asyncio.run(queue.put(ResponseWithThought(text=response_delta)))
|
154
154
|
|
155
155
|
# Log the time taken to stream the entire response
|
156
156
|
logger.info(f"Chat streaming took: {perf_counter() - start_time:.3f} seconds")
|
@@ -221,4 +221,4 @@ def send_message_to_model_offline(
|
|
221
221
|
if is_promptrace_enabled():
|
222
222
|
commit_conversation_trace(messages, response_text, tracer)
|
223
223
|
|
224
|
-
return response_text
|
224
|
+
return ResponseWithThought(text=response_text)
|
@@ -1,25 +1,24 @@
|
|
1
1
|
import logging
|
2
2
|
from datetime import datetime
|
3
|
-
from typing import AsyncGenerator, Dict, List, Optional
|
4
|
-
|
5
|
-
from openai.lib._pydantic import _ensure_strict_json_schema
|
6
|
-
from pydantic import BaseModel
|
3
|
+
from typing import Any, AsyncGenerator, Dict, List, Optional
|
7
4
|
|
8
5
|
from khoj.database.models import Agent, ChatMessageModel, ChatModel
|
9
6
|
from khoj.processor.conversation import prompts
|
10
7
|
from khoj.processor.conversation.openai.utils import (
|
11
8
|
chat_completion_with_backoff,
|
9
|
+
clean_response_schema,
|
12
10
|
completion_with_backoff,
|
13
|
-
|
11
|
+
get_structured_output_support,
|
12
|
+
to_openai_tools,
|
14
13
|
)
|
15
14
|
from khoj.processor.conversation.utils import (
|
16
|
-
JsonSupport,
|
17
15
|
OperatorRun,
|
18
16
|
ResponseWithThought,
|
17
|
+
StructuredOutputSupport,
|
19
18
|
generate_chatml_messages_with_context,
|
20
19
|
messages_to_print,
|
21
20
|
)
|
22
|
-
from khoj.utils.helpers import is_none_or_empty, truncate_code_context
|
21
|
+
from khoj.utils.helpers import ToolDefinition, is_none_or_empty, truncate_code_context
|
23
22
|
from khoj.utils.rawconfig import FileAttachment, LocationData
|
24
23
|
from khoj.utils.yaml import yaml_dump
|
25
24
|
|
@@ -32,6 +31,7 @@ def send_message_to_model(
|
|
32
31
|
model,
|
33
32
|
response_type="text",
|
34
33
|
response_schema=None,
|
34
|
+
tools: list[ToolDefinition] = None,
|
35
35
|
deepthought=False,
|
36
36
|
api_base_url=None,
|
37
37
|
tracer: dict = {},
|
@@ -40,9 +40,11 @@ def send_message_to_model(
|
|
40
40
|
Send message to model
|
41
41
|
"""
|
42
42
|
|
43
|
-
model_kwargs = {}
|
44
|
-
json_support =
|
45
|
-
if
|
43
|
+
model_kwargs: Dict[str, Any] = {}
|
44
|
+
json_support = get_structured_output_support(model, api_base_url)
|
45
|
+
if tools and json_support == StructuredOutputSupport.TOOL:
|
46
|
+
model_kwargs["tools"] = to_openai_tools(tools)
|
47
|
+
elif response_schema and json_support >= StructuredOutputSupport.SCHEMA:
|
46
48
|
# Drop unsupported fields from schema passed to OpenAI APi
|
47
49
|
cleaned_response_schema = clean_response_schema(response_schema)
|
48
50
|
model_kwargs["response_format"] = {
|
@@ -53,7 +55,7 @@ def send_message_to_model(
|
|
53
55
|
"strict": True,
|
54
56
|
},
|
55
57
|
}
|
56
|
-
elif response_type == "json_object" and json_support ==
|
58
|
+
elif response_type == "json_object" and json_support == StructuredOutputSupport.OBJECT:
|
57
59
|
model_kwargs["response_format"] = {"type": response_type}
|
58
60
|
|
59
61
|
# Get Response from GPT
|
@@ -171,30 +173,3 @@ async def converse_openai(
|
|
171
173
|
tracer=tracer,
|
172
174
|
):
|
173
175
|
yield chunk
|
174
|
-
|
175
|
-
|
176
|
-
def clean_response_schema(schema: BaseModel | dict) -> dict:
|
177
|
-
"""
|
178
|
-
Format response schema to be compatible with OpenAI API.
|
179
|
-
|
180
|
-
Clean the response schema by removing unsupported fields.
|
181
|
-
"""
|
182
|
-
# Normalize schema to OpenAI compatible JSON schema format
|
183
|
-
schema_json = schema if isinstance(schema, dict) else schema.model_json_schema()
|
184
|
-
schema_json = _ensure_strict_json_schema(schema_json, path=(), root=schema_json)
|
185
|
-
|
186
|
-
# Recursively drop unsupported fields from schema passed to OpenAI API
|
187
|
-
# See https://platform.openai.com/docs/guides/structured-outputs#supported-schemas
|
188
|
-
fields_to_exclude = ["minItems", "maxItems"]
|
189
|
-
if isinstance(schema_json, dict) and isinstance(schema_json.get("properties"), dict):
|
190
|
-
for _, prop_value in schema_json["properties"].items():
|
191
|
-
if isinstance(prop_value, dict):
|
192
|
-
# Remove specified fields from direct properties
|
193
|
-
for field in fields_to_exclude:
|
194
|
-
prop_value.pop(field, None)
|
195
|
-
# Recursively remove specified fields from child properties
|
196
|
-
if "items" in prop_value and isinstance(prop_value["items"], dict):
|
197
|
-
clean_response_schema(prop_value["items"])
|
198
|
-
|
199
|
-
# Return cleaned schema
|
200
|
-
return schema_json
|