khoj 1.42.9.dev26__py3-none-any.whl → 1.42.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/adapters/__init__.py +0 -20
- khoj/database/models/__init__.py +0 -1
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/app/agents/{page-5db6ad18da10d353.js → page-9a4610474cd59a71.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-6271e2e31c7571d1.js → page-f7bb9d777b7745d4.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-e3b6206ca5190c32.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/{page-a19a597629e87fb8.js → page-2b3056cba8aa96ce.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/{page-fa366ac14b228688.js → page-4885df3cd175c957.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-8f9a85f96088c18b.js → page-8be3b35178abf2ec.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-ed7787cf4938b8e3.js → page-4a4b0c0f4749c2b2.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-70e0762712341826.js → webpack-15412ee214acd999.js} +1 -1
- khoj/interface/compiled/_next/static/css/{93eeacc43e261162.css → 821d0d60b0b6871d.css} +1 -1
- khoj/interface/compiled/_next/static/css/{02f60900b0d89ec7.css → e6da1287d41f5409.css} +1 -1
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +3 -3
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +4 -4
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/content/markdown/markdown_to_entries.py +9 -38
- khoj/processor/content/org_mode/org_to_entries.py +2 -18
- khoj/processor/content/org_mode/orgnode.py +16 -18
- khoj/processor/content/text_to_entries.py +0 -30
- khoj/processor/conversation/anthropic/anthropic_chat.py +2 -11
- khoj/processor/conversation/anthropic/utils.py +103 -90
- khoj/processor/conversation/google/gemini_chat.py +1 -4
- khoj/processor/conversation/google/utils.py +18 -80
- khoj/processor/conversation/offline/chat_model.py +3 -3
- khoj/processor/conversation/openai/gpt.py +38 -13
- khoj/processor/conversation/openai/utils.py +12 -113
- khoj/processor/conversation/prompts.py +35 -17
- khoj/processor/conversation/utils.py +58 -129
- khoj/processor/operator/grounding_agent.py +1 -1
- khoj/processor/operator/operator_agent_binary.py +3 -4
- khoj/processor/tools/online_search.py +0 -18
- khoj/processor/tools/run_code.py +1 -1
- khoj/routers/api_chat.py +1 -1
- khoj/routers/api_content.py +6 -6
- khoj/routers/helpers.py +27 -297
- khoj/routers/research.py +155 -169
- khoj/search_type/text_search.py +0 -2
- khoj/utils/helpers.py +8 -284
- khoj/utils/initialization.py +2 -0
- khoj/utils/rawconfig.py +0 -11
- {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dist-info}/METADATA +1 -1
- {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dist-info}/RECORD +67 -67
- khoj/interface/compiled/_next/static/chunks/app/chat/page-76fc915800aa90f4.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-f5881c7ae3ba0795.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-abb6c5f4239ad7be.js +0 -1
- /khoj/interface/compiled/_next/static/{IYGyer2N7GdUJ7QHFghtY → P0Niz53SXQbBiZBs-WnaS}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{IYGyer2N7GdUJ7QHFghtY → P0Niz53SXQbBiZBs-WnaS}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1327-3b1a41af530fa8ee.js → 1327-1a9107b9a2a04a98.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1915-fbfe167c84ad60c5.js → 1915-5c6508f6ebb62a30.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2117-e78b6902ad6f75ec.js → 2117-080746c8e170c81a.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2939-4d4084c5b888b960.js → 2939-4af3fd24b8ffc9ad.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{4447-d6cf93724d57e34b.js → 4447-cd95608f8e93e711.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{8667-4b7790573b08c50d.js → 8667-50b03a89e82e0ba7.js} +0 -0
- {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dist-info}/WHEEL +0 -0
- {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dist-info}/entry_points.txt +0 -0
- {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dist-info}/licenses/LICENSE +0 -0
@@ -1,10 +1,9 @@
|
|
1
|
-
import json
|
2
1
|
import logging
|
3
2
|
import os
|
4
3
|
import random
|
5
4
|
from copy import deepcopy
|
6
5
|
from time import perf_counter
|
7
|
-
from typing import AsyncGenerator, AsyncIterator, Dict
|
6
|
+
from typing import AsyncGenerator, AsyncIterator, Dict
|
8
7
|
|
9
8
|
import httpx
|
10
9
|
from google import genai
|
@@ -23,13 +22,11 @@ from tenacity import (
|
|
23
22
|
|
24
23
|
from khoj.processor.conversation.utils import (
|
25
24
|
ResponseWithThought,
|
26
|
-
ToolCall,
|
27
25
|
commit_conversation_trace,
|
28
26
|
get_image_from_base64,
|
29
27
|
get_image_from_url,
|
30
28
|
)
|
31
29
|
from khoj.utils.helpers import (
|
32
|
-
ToolDefinition,
|
33
30
|
get_chat_usage_metrics,
|
34
31
|
get_gemini_client,
|
35
32
|
is_none_or_empty,
|
@@ -102,29 +99,26 @@ def gemini_completion_with_backoff(
|
|
102
99
|
temperature=1.2,
|
103
100
|
api_key=None,
|
104
101
|
api_base_url: str = None,
|
105
|
-
model_kwargs=
|
102
|
+
model_kwargs=None,
|
106
103
|
deepthought=False,
|
107
104
|
tracer={},
|
108
|
-
) ->
|
105
|
+
) -> str:
|
109
106
|
client = gemini_clients.get(api_key)
|
110
107
|
if not client:
|
111
108
|
client = get_gemini_client(api_key, api_base_url)
|
112
109
|
gemini_clients[api_key] = client
|
113
110
|
|
114
111
|
formatted_messages, system_instruction = format_messages_for_gemini(messages, system_prompt)
|
115
|
-
|
112
|
+
response_thoughts: str | None = None
|
116
113
|
|
117
|
-
#
|
118
|
-
tools = None
|
114
|
+
# format model response schema
|
119
115
|
response_schema = None
|
120
|
-
if model_kwargs.get("
|
121
|
-
tools = to_gemini_tools(model_kwargs["tools"])
|
122
|
-
elif model_kwargs.get("response_schema"):
|
116
|
+
if model_kwargs and model_kwargs.get("response_schema"):
|
123
117
|
response_schema = clean_response_schema(model_kwargs["response_schema"])
|
124
118
|
|
125
119
|
thinking_config = None
|
126
120
|
if deepthought and is_reasoning_model(model_name):
|
127
|
-
thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI
|
121
|
+
thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI)
|
128
122
|
|
129
123
|
max_output_tokens = MAX_OUTPUT_TOKENS_FOR_STANDARD_GEMINI
|
130
124
|
if is_reasoning_model(model_name):
|
@@ -137,9 +131,8 @@ def gemini_completion_with_backoff(
|
|
137
131
|
thinking_config=thinking_config,
|
138
132
|
max_output_tokens=max_output_tokens,
|
139
133
|
safety_settings=SAFETY_SETTINGS,
|
140
|
-
response_mime_type=model_kwargs.get("response_mime_type", "text/plain"),
|
134
|
+
response_mime_type=model_kwargs.get("response_mime_type", "text/plain") if model_kwargs else "text/plain",
|
141
135
|
response_schema=response_schema,
|
142
|
-
tools=tools,
|
143
136
|
seed=seed,
|
144
137
|
top_p=0.95,
|
145
138
|
http_options=gtypes.HttpOptions(client_args={"timeout": httpx.Timeout(30.0, read=60.0)}),
|
@@ -148,25 +141,7 @@ def gemini_completion_with_backoff(
|
|
148
141
|
try:
|
149
142
|
# Generate the response
|
150
143
|
response = client.models.generate_content(model=model_name, config=config, contents=formatted_messages)
|
151
|
-
|
152
|
-
not response.candidates
|
153
|
-
or not response.candidates[0].content
|
154
|
-
or response.candidates[0].content.parts is None
|
155
|
-
):
|
156
|
-
raise ValueError(f"Failed to get response from model.")
|
157
|
-
raw_content = [part.model_dump() for part in response.candidates[0].content.parts]
|
158
|
-
if response.function_calls:
|
159
|
-
function_calls = [
|
160
|
-
ToolCall(name=function_call.name, args=function_call.args, id=function_call.id).__dict__
|
161
|
-
for function_call in response.function_calls
|
162
|
-
]
|
163
|
-
response_text = json.dumps(function_calls)
|
164
|
-
else:
|
165
|
-
# If no function calls, use the text response
|
166
|
-
response_text = response.text
|
167
|
-
response_thoughts = "\n".join(
|
168
|
-
[part.text for part in response.candidates[0].content.parts if part.thought and isinstance(part.text, str)]
|
169
|
-
)
|
144
|
+
response_text = response.text
|
170
145
|
except gerrors.ClientError as e:
|
171
146
|
response = None
|
172
147
|
response_text, _ = handle_gemini_response(e.args)
|
@@ -180,14 +155,8 @@ def gemini_completion_with_backoff(
|
|
180
155
|
input_tokens = response.usage_metadata.prompt_token_count or 0 if response else 0
|
181
156
|
output_tokens = response.usage_metadata.candidates_token_count or 0 if response else 0
|
182
157
|
thought_tokens = response.usage_metadata.thoughts_token_count or 0 if response else 0
|
183
|
-
cache_read_tokens = response.usage_metadata.cached_content_token_count or 0 if response else 0
|
184
158
|
tracer["usage"] = get_chat_usage_metrics(
|
185
|
-
model_name,
|
186
|
-
input_tokens,
|
187
|
-
output_tokens,
|
188
|
-
cache_read_tokens=cache_read_tokens,
|
189
|
-
thought_tokens=thought_tokens,
|
190
|
-
usage=tracer.get("usage"),
|
159
|
+
model_name, input_tokens, output_tokens, thought_tokens=thought_tokens, usage=tracer.get("usage")
|
191
160
|
)
|
192
161
|
|
193
162
|
# Validate the response. If empty, raise an error to retry.
|
@@ -201,7 +170,7 @@ def gemini_completion_with_backoff(
|
|
201
170
|
if is_promptrace_enabled():
|
202
171
|
commit_conversation_trace(messages, response_text, tracer)
|
203
172
|
|
204
|
-
return
|
173
|
+
return response_text
|
205
174
|
|
206
175
|
|
207
176
|
@retry(
|
@@ -269,7 +238,7 @@ async def gemini_chat_completion_with_backoff(
|
|
269
238
|
# handle safety, rate-limit, other finish reasons
|
270
239
|
stop_message, stopped = handle_gemini_response(chunk.candidates, chunk.prompt_feedback)
|
271
240
|
if stopped:
|
272
|
-
yield ResponseWithThought(
|
241
|
+
yield ResponseWithThought(response=stop_message)
|
273
242
|
logger.warning(
|
274
243
|
f"LLM Response Prevented for {model_name}: {stop_message}.\n"
|
275
244
|
+ f"Last Message by {messages[-1].role}: {messages[-1].content}"
|
@@ -282,7 +251,7 @@ async def gemini_chat_completion_with_backoff(
|
|
282
251
|
yield ResponseWithThought(thought=part.text)
|
283
252
|
elif part.text:
|
284
253
|
aggregated_response += part.text
|
285
|
-
yield ResponseWithThought(
|
254
|
+
yield ResponseWithThought(response=part.text)
|
286
255
|
# Calculate cost of chat
|
287
256
|
input_tokens = final_chunk.usage_metadata.prompt_token_count or 0 if final_chunk else 0
|
288
257
|
output_tokens = final_chunk.usage_metadata.candidates_token_count or 0 if final_chunk else 0
|
@@ -381,24 +350,8 @@ def format_messages_for_gemini(
|
|
381
350
|
system_prompt = None if is_none_or_empty(system_prompt) else system_prompt
|
382
351
|
|
383
352
|
for message in messages:
|
384
|
-
if message.role == "assistant":
|
385
|
-
message.role = "model"
|
386
|
-
|
387
|
-
# Handle tool call and tool result message types from additional_kwargs
|
388
|
-
message_type = message.additional_kwargs.get("message_type")
|
389
|
-
if message_type == "tool_call":
|
390
|
-
pass
|
391
|
-
elif message_type == "tool_result":
|
392
|
-
# Convert tool_result to Gemini function response format
|
393
|
-
# Need to find the corresponding function call from previous messages
|
394
|
-
tool_result_msg_content = []
|
395
|
-
for part in message.content:
|
396
|
-
tool_result_msg_content.append(
|
397
|
-
gtypes.Part.from_function_response(name=part["name"], response={"result": part["content"]})
|
398
|
-
)
|
399
|
-
message.content = tool_result_msg_content
|
400
353
|
# Convert message content to string list from chatml dictionary list
|
401
|
-
|
354
|
+
if isinstance(message.content, list):
|
402
355
|
# Convert image_urls to PIL.Image and place them at beginning of list (better for Gemini)
|
403
356
|
message_content = []
|
404
357
|
for item in sorted(message.content, key=lambda x: 0 if x["type"] == "image_url" else 1):
|
@@ -418,13 +371,16 @@ def format_messages_for_gemini(
|
|
418
371
|
messages.remove(message)
|
419
372
|
continue
|
420
373
|
message.content = message_content
|
421
|
-
elif isinstance(message.content, str)
|
374
|
+
elif isinstance(message.content, str):
|
422
375
|
message.content = [gtypes.Part.from_text(text=message.content)]
|
423
376
|
else:
|
424
377
|
logger.error(f"Dropping invalid type: {type(message.content)} of message content: {message.content}")
|
425
378
|
messages.remove(message)
|
426
379
|
continue
|
427
380
|
|
381
|
+
if message.role == "assistant":
|
382
|
+
message.role = "model"
|
383
|
+
|
428
384
|
if len(messages) == 1:
|
429
385
|
messages[0].role = "user"
|
430
386
|
|
@@ -452,21 +408,3 @@ def is_reasoning_model(model_name: str) -> bool:
|
|
452
408
|
Check if the model is a reasoning model.
|
453
409
|
"""
|
454
410
|
return model_name.startswith("gemini-2.5")
|
455
|
-
|
456
|
-
|
457
|
-
def to_gemini_tools(tools: List[ToolDefinition]) -> List[gtypes.ToolDict] | None:
|
458
|
-
"Transform tool definitions from standard format to Gemini format."
|
459
|
-
gemini_tools = [
|
460
|
-
gtypes.ToolDict(
|
461
|
-
function_declarations=[
|
462
|
-
gtypes.FunctionDeclarationDict(
|
463
|
-
name=tool.name,
|
464
|
-
description=tool.description,
|
465
|
-
parameters=tool.schema,
|
466
|
-
)
|
467
|
-
for tool in tools
|
468
|
-
]
|
469
|
-
)
|
470
|
-
]
|
471
|
-
|
472
|
-
return gemini_tools or None
|
@@ -145,12 +145,12 @@ async def converse_offline(
|
|
145
145
|
aggregated_response += response_delta
|
146
146
|
# Put chunk into the asyncio queue (non-blocking)
|
147
147
|
try:
|
148
|
-
queue.put_nowait(ResponseWithThought(
|
148
|
+
queue.put_nowait(ResponseWithThought(response=response_delta))
|
149
149
|
except asyncio.QueueFull:
|
150
150
|
# Should not happen with default queue size unless consumer is very slow
|
151
151
|
logger.warning("Asyncio queue full during offline LLM streaming.")
|
152
152
|
# Potentially block here or handle differently if needed
|
153
|
-
asyncio.run(queue.put(ResponseWithThought(
|
153
|
+
asyncio.run(queue.put(ResponseWithThought(response=response_delta)))
|
154
154
|
|
155
155
|
# Log the time taken to stream the entire response
|
156
156
|
logger.info(f"Chat streaming took: {perf_counter() - start_time:.3f} seconds")
|
@@ -221,4 +221,4 @@ def send_message_to_model_offline(
|
|
221
221
|
if is_promptrace_enabled():
|
222
222
|
commit_conversation_trace(messages, response_text, tracer)
|
223
223
|
|
224
|
-
return
|
224
|
+
return response_text
|
@@ -1,24 +1,25 @@
|
|
1
1
|
import logging
|
2
2
|
from datetime import datetime
|
3
|
-
from typing import
|
3
|
+
from typing import AsyncGenerator, Dict, List, Optional
|
4
|
+
|
5
|
+
from openai.lib._pydantic import _ensure_strict_json_schema
|
6
|
+
from pydantic import BaseModel
|
4
7
|
|
5
8
|
from khoj.database.models import Agent, ChatMessageModel, ChatModel
|
6
9
|
from khoj.processor.conversation import prompts
|
7
10
|
from khoj.processor.conversation.openai.utils import (
|
8
11
|
chat_completion_with_backoff,
|
9
|
-
clean_response_schema,
|
10
12
|
completion_with_backoff,
|
11
|
-
|
12
|
-
to_openai_tools,
|
13
|
+
get_openai_api_json_support,
|
13
14
|
)
|
14
15
|
from khoj.processor.conversation.utils import (
|
16
|
+
JsonSupport,
|
15
17
|
OperatorRun,
|
16
18
|
ResponseWithThought,
|
17
|
-
StructuredOutputSupport,
|
18
19
|
generate_chatml_messages_with_context,
|
19
20
|
messages_to_print,
|
20
21
|
)
|
21
|
-
from khoj.utils.helpers import
|
22
|
+
from khoj.utils.helpers import is_none_or_empty, truncate_code_context
|
22
23
|
from khoj.utils.rawconfig import FileAttachment, LocationData
|
23
24
|
from khoj.utils.yaml import yaml_dump
|
24
25
|
|
@@ -31,7 +32,6 @@ def send_message_to_model(
|
|
31
32
|
model,
|
32
33
|
response_type="text",
|
33
34
|
response_schema=None,
|
34
|
-
tools: list[ToolDefinition] = None,
|
35
35
|
deepthought=False,
|
36
36
|
api_base_url=None,
|
37
37
|
tracer: dict = {},
|
@@ -40,11 +40,9 @@ def send_message_to_model(
|
|
40
40
|
Send message to model
|
41
41
|
"""
|
42
42
|
|
43
|
-
model_kwargs
|
44
|
-
json_support =
|
45
|
-
if
|
46
|
-
model_kwargs["tools"] = to_openai_tools(tools)
|
47
|
-
elif response_schema and json_support >= StructuredOutputSupport.SCHEMA:
|
43
|
+
model_kwargs = {}
|
44
|
+
json_support = get_openai_api_json_support(model, api_base_url)
|
45
|
+
if response_schema and json_support == JsonSupport.SCHEMA:
|
48
46
|
# Drop unsupported fields from schema passed to OpenAI APi
|
49
47
|
cleaned_response_schema = clean_response_schema(response_schema)
|
50
48
|
model_kwargs["response_format"] = {
|
@@ -55,7 +53,7 @@ def send_message_to_model(
|
|
55
53
|
"strict": True,
|
56
54
|
},
|
57
55
|
}
|
58
|
-
elif response_type == "json_object" and json_support ==
|
56
|
+
elif response_type == "json_object" and json_support == JsonSupport.OBJECT:
|
59
57
|
model_kwargs["response_format"] = {"type": response_type}
|
60
58
|
|
61
59
|
# Get Response from GPT
|
@@ -173,3 +171,30 @@ async def converse_openai(
|
|
173
171
|
tracer=tracer,
|
174
172
|
):
|
175
173
|
yield chunk
|
174
|
+
|
175
|
+
|
176
|
+
def clean_response_schema(schema: BaseModel | dict) -> dict:
|
177
|
+
"""
|
178
|
+
Format response schema to be compatible with OpenAI API.
|
179
|
+
|
180
|
+
Clean the response schema by removing unsupported fields.
|
181
|
+
"""
|
182
|
+
# Normalize schema to OpenAI compatible JSON schema format
|
183
|
+
schema_json = schema if isinstance(schema, dict) else schema.model_json_schema()
|
184
|
+
schema_json = _ensure_strict_json_schema(schema_json, path=(), root=schema_json)
|
185
|
+
|
186
|
+
# Recursively drop unsupported fields from schema passed to OpenAI API
|
187
|
+
# See https://platform.openai.com/docs/guides/structured-outputs#supported-schemas
|
188
|
+
fields_to_exclude = ["minItems", "maxItems"]
|
189
|
+
if isinstance(schema_json, dict) and isinstance(schema_json.get("properties"), dict):
|
190
|
+
for _, prop_value in schema_json["properties"].items():
|
191
|
+
if isinstance(prop_value, dict):
|
192
|
+
# Remove specified fields from direct properties
|
193
|
+
for field in fields_to_exclude:
|
194
|
+
prop_value.pop(field, None)
|
195
|
+
# Recursively remove specified fields from child properties
|
196
|
+
if "items" in prop_value and isinstance(prop_value["items"], dict):
|
197
|
+
clean_response_schema(prop_value["items"])
|
198
|
+
|
199
|
+
# Return cleaned schema
|
200
|
+
return schema_json
|
@@ -1,4 +1,3 @@
|
|
1
|
-
import json
|
2
1
|
import logging
|
3
2
|
import os
|
4
3
|
from copy import deepcopy
|
@@ -10,7 +9,6 @@ from urllib.parse import urlparse
|
|
10
9
|
import httpx
|
11
10
|
import openai
|
12
11
|
from langchain_core.messages.chat import ChatMessage
|
13
|
-
from openai.lib._pydantic import _ensure_strict_json_schema
|
14
12
|
from openai.lib.streaming.chat import (
|
15
13
|
ChatCompletionStream,
|
16
14
|
ChatCompletionStreamEvent,
|
@@ -22,7 +20,6 @@ from openai.types.chat.chat_completion_chunk import (
|
|
22
20
|
Choice,
|
23
21
|
ChoiceDelta,
|
24
22
|
)
|
25
|
-
from pydantic import BaseModel
|
26
23
|
from tenacity import (
|
27
24
|
before_sleep_log,
|
28
25
|
retry,
|
@@ -33,13 +30,11 @@ from tenacity import (
|
|
33
30
|
)
|
34
31
|
|
35
32
|
from khoj.processor.conversation.utils import (
|
33
|
+
JsonSupport,
|
36
34
|
ResponseWithThought,
|
37
|
-
StructuredOutputSupport,
|
38
|
-
ToolCall,
|
39
35
|
commit_conversation_trace,
|
40
36
|
)
|
41
37
|
from khoj.utils.helpers import (
|
42
|
-
ToolDefinition,
|
43
38
|
convert_image_data_uri,
|
44
39
|
get_chat_usage_metrics,
|
45
40
|
get_openai_async_client,
|
@@ -77,7 +72,7 @@ def completion_with_backoff(
|
|
77
72
|
deepthought: bool = False,
|
78
73
|
model_kwargs: dict = {},
|
79
74
|
tracer: dict = {},
|
80
|
-
) ->
|
75
|
+
) -> str:
|
81
76
|
client_key = f"{openai_api_key}--{api_base_url}"
|
82
77
|
client = openai_clients.get(client_key)
|
83
78
|
if not client:
|
@@ -122,9 +117,6 @@ def completion_with_backoff(
|
|
122
117
|
if os.getenv("KHOJ_LLM_SEED"):
|
123
118
|
model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
|
124
119
|
|
125
|
-
tool_ids = []
|
126
|
-
tool_calls: list[ToolCall] = []
|
127
|
-
thoughts = ""
|
128
120
|
aggregated_response = ""
|
129
121
|
if stream:
|
130
122
|
with client.beta.chat.completions.stream(
|
@@ -138,16 +130,7 @@ def completion_with_backoff(
|
|
138
130
|
if chunk.type == "content.delta":
|
139
131
|
aggregated_response += chunk.delta
|
140
132
|
elif chunk.type == "thought.delta":
|
141
|
-
|
142
|
-
elif chunk.type == "chunk" and chunk.chunk.choices and chunk.chunk.choices[0].delta.tool_calls:
|
143
|
-
tool_ids += [tool_call.id for tool_call in chunk.chunk.choices[0].delta.tool_calls]
|
144
|
-
elif chunk.type == "tool_calls.function.arguments.done":
|
145
|
-
tool_calls += [ToolCall(name=chunk.name, args=json.loads(chunk.arguments), id=None)]
|
146
|
-
if tool_calls:
|
147
|
-
tool_calls = [
|
148
|
-
ToolCall(name=chunk.name, args=chunk.args, id=tool_id) for chunk, tool_id in zip(tool_calls, tool_ids)
|
149
|
-
]
|
150
|
-
aggregated_response = json.dumps([tool_call.__dict__ for tool_call in tool_calls])
|
133
|
+
pass
|
151
134
|
else:
|
152
135
|
# Non-streaming chat completion
|
153
136
|
chunk = client.beta.chat.completions.parse(
|
@@ -181,7 +164,7 @@ def completion_with_backoff(
|
|
181
164
|
if is_promptrace_enabled():
|
182
165
|
commit_conversation_trace(messages, aggregated_response, tracer)
|
183
166
|
|
184
|
-
return
|
167
|
+
return aggregated_response
|
185
168
|
|
186
169
|
|
187
170
|
@retry(
|
@@ -207,7 +190,6 @@ async def chat_completion_with_backoff(
|
|
207
190
|
deepthought=False,
|
208
191
|
model_kwargs: dict = {},
|
209
192
|
tracer: dict = {},
|
210
|
-
tools=None,
|
211
193
|
) -> AsyncGenerator[ResponseWithThought, None]:
|
212
194
|
client_key = f"{openai_api_key}--{api_base_url}"
|
213
195
|
client = openai_async_clients.get(client_key)
|
@@ -276,8 +258,6 @@ async def chat_completion_with_backoff(
|
|
276
258
|
read_timeout = 300 if is_local_api(api_base_url) else 60
|
277
259
|
if os.getenv("KHOJ_LLM_SEED"):
|
278
260
|
model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
|
279
|
-
if tools:
|
280
|
-
model_kwargs["tools"] = tools
|
281
261
|
|
282
262
|
aggregated_response = ""
|
283
263
|
final_chunk = None
|
@@ -297,7 +277,7 @@ async def chat_completion_with_backoff(
|
|
297
277
|
raise ValueError("No response by model.")
|
298
278
|
aggregated_response = response.choices[0].message.content
|
299
279
|
final_chunk = response
|
300
|
-
yield ResponseWithThought(
|
280
|
+
yield ResponseWithThought(response=aggregated_response)
|
301
281
|
else:
|
302
282
|
async for chunk in stream_processor(response):
|
303
283
|
# Log the time taken to start response
|
@@ -313,8 +293,8 @@ async def chat_completion_with_backoff(
|
|
313
293
|
response_chunk: ResponseWithThought = None
|
314
294
|
response_delta = chunk.choices[0].delta
|
315
295
|
if response_delta.content:
|
316
|
-
response_chunk = ResponseWithThought(
|
317
|
-
aggregated_response += response_chunk.
|
296
|
+
response_chunk = ResponseWithThought(response=response_delta.content)
|
297
|
+
aggregated_response += response_chunk.response
|
318
298
|
elif response_delta.thought:
|
319
299
|
response_chunk = ResponseWithThought(thought=response_delta.thought)
|
320
300
|
if response_chunk:
|
@@ -347,16 +327,16 @@ async def chat_completion_with_backoff(
|
|
347
327
|
commit_conversation_trace(messages, aggregated_response, tracer)
|
348
328
|
|
349
329
|
|
350
|
-
def
|
330
|
+
def get_openai_api_json_support(model_name: str, api_base_url: str = None) -> JsonSupport:
|
351
331
|
if model_name.startswith("deepseek-reasoner"):
|
352
|
-
return
|
332
|
+
return JsonSupport.NONE
|
353
333
|
if api_base_url:
|
354
334
|
host = urlparse(api_base_url).hostname
|
355
335
|
if host and host.endswith(".ai.azure.com"):
|
356
|
-
return
|
336
|
+
return JsonSupport.OBJECT
|
357
337
|
if host == "api.deepinfra.com":
|
358
|
-
return
|
359
|
-
return
|
338
|
+
return JsonSupport.OBJECT
|
339
|
+
return JsonSupport.SCHEMA
|
360
340
|
|
361
341
|
|
362
342
|
def format_message_for_api(messages: List[ChatMessage], api_base_url: str) -> List[dict]:
|
@@ -365,43 +345,6 @@ def format_message_for_api(messages: List[ChatMessage], api_base_url: str) -> Li
|
|
365
345
|
"""
|
366
346
|
formatted_messages = []
|
367
347
|
for message in deepcopy(messages):
|
368
|
-
# Handle tool call and tool result message types
|
369
|
-
message_type = message.additional_kwargs.get("message_type")
|
370
|
-
if message_type == "tool_call":
|
371
|
-
# Convert tool_call to OpenAI function call format
|
372
|
-
content = []
|
373
|
-
for part in message.content:
|
374
|
-
content.append(
|
375
|
-
{
|
376
|
-
"type": "function",
|
377
|
-
"id": part.get("id"),
|
378
|
-
"function": {
|
379
|
-
"name": part.get("name"),
|
380
|
-
"arguments": json.dumps(part.get("input", part.get("args", {}))),
|
381
|
-
},
|
382
|
-
}
|
383
|
-
)
|
384
|
-
formatted_messages.append(
|
385
|
-
{
|
386
|
-
"role": "assistant",
|
387
|
-
"content": None,
|
388
|
-
"tool_calls": content,
|
389
|
-
}
|
390
|
-
)
|
391
|
-
continue
|
392
|
-
if message_type == "tool_result":
|
393
|
-
# Convert tool_result to OpenAI tool result format
|
394
|
-
# Each part is a result for a tool call
|
395
|
-
for part in message.content:
|
396
|
-
formatted_messages.append(
|
397
|
-
{
|
398
|
-
"role": "tool",
|
399
|
-
"tool_call_id": part.get("id") or part.get("tool_use_id"),
|
400
|
-
"name": part.get("name"),
|
401
|
-
"content": part.get("content"),
|
402
|
-
}
|
403
|
-
)
|
404
|
-
continue
|
405
348
|
if isinstance(message.content, list) and not is_openai_api(api_base_url):
|
406
349
|
assistant_texts = []
|
407
350
|
has_images = False
|
@@ -765,47 +708,3 @@ def add_qwen_no_think_tag(formatted_messages: List[dict]) -> None:
|
|
765
708
|
if isinstance(content_part, dict) and content_part.get("type") == "text":
|
766
709
|
content_part["text"] += " /no_think"
|
767
710
|
break
|
768
|
-
|
769
|
-
|
770
|
-
def to_openai_tools(tools: List[ToolDefinition]) -> List[Dict] | None:
|
771
|
-
"Transform tool definitions from standard format to OpenAI format."
|
772
|
-
openai_tools = [
|
773
|
-
{
|
774
|
-
"type": "function",
|
775
|
-
"function": {
|
776
|
-
"name": tool.name,
|
777
|
-
"description": tool.description,
|
778
|
-
"parameters": clean_response_schema(tool.schema),
|
779
|
-
},
|
780
|
-
}
|
781
|
-
for tool in tools
|
782
|
-
]
|
783
|
-
|
784
|
-
return openai_tools or None
|
785
|
-
|
786
|
-
|
787
|
-
def clean_response_schema(schema: BaseModel | dict) -> dict:
|
788
|
-
"""
|
789
|
-
Format response schema to be compatible with OpenAI API.
|
790
|
-
|
791
|
-
Clean the response schema by removing unsupported fields.
|
792
|
-
"""
|
793
|
-
# Normalize schema to OpenAI compatible JSON schema format
|
794
|
-
schema_json = schema if isinstance(schema, dict) else schema.model_json_schema()
|
795
|
-
schema_json = _ensure_strict_json_schema(schema_json, path=(), root=schema_json)
|
796
|
-
|
797
|
-
# Recursively drop unsupported fields from schema passed to OpenAI API
|
798
|
-
# See https://platform.openai.com/docs/guides/structured-outputs#supported-schemas
|
799
|
-
fields_to_exclude = ["minItems", "maxItems"]
|
800
|
-
if isinstance(schema_json, dict) and isinstance(schema_json.get("properties"), dict):
|
801
|
-
for _, prop_value in schema_json["properties"].items():
|
802
|
-
if isinstance(prop_value, dict):
|
803
|
-
# Remove specified fields from direct properties
|
804
|
-
for field in fields_to_exclude:
|
805
|
-
prop_value.pop(field, None)
|
806
|
-
# Recursively remove specified fields from child properties
|
807
|
-
if "items" in prop_value and isinstance(prop_value["items"], dict):
|
808
|
-
clean_response_schema(prop_value["items"])
|
809
|
-
|
810
|
-
# Return cleaned schema
|
811
|
-
return schema_json
|
@@ -667,37 +667,33 @@ Here's some additional context about you:
|
|
667
667
|
|
668
668
|
plan_function_execution = PromptTemplate.from_template(
|
669
669
|
"""
|
670
|
-
You are Khoj, a smart, creative and
|
671
|
-
Create a multi-step plan and intelligently iterate on the plan to
|
670
|
+
You are Khoj, a smart, creative and methodical researcher. Use the provided tool AIs to investigate information to answer query.
|
671
|
+
Create a multi-step plan and intelligently iterate on the plan based on the retrieved information to find the requested information.
|
672
672
|
{personality_context}
|
673
673
|
|
674
674
|
# Instructions
|
675
|
-
-
|
675
|
+
- Ask highly diverse, detailed queries to the tool AIs, one tool AI at a time, to discover required information or run calculations. Their response will be shown to you in the next iteration.
|
676
676
|
- Break down your research process into independent, self-contained steps that can be executed sequentially using the available tool AIs to answer the user's query. Write your step-by-step plan in the scratchpad.
|
677
677
|
- Always ask a new query that was not asked to the tool AI in a previous iteration. Build on the results of the previous iterations.
|
678
678
|
- Ensure that all required context is passed to the tool AIs for successful execution. Include any relevant stuff that has previously been attempted. They only know the context provided in your query.
|
679
679
|
- Think step by step to come up with creative strategies when the previous iteration did not yield useful results.
|
680
|
-
- You are allowed upto {max_iterations} iterations to use the help of the provided tool AIs to
|
680
|
+
- You are allowed upto {max_iterations} iterations to use the help of the provided tool AIs to answer the user's question.
|
681
|
+
- Stop when you have the required information by returning a JSON object with the "tool" field set to "text" and "query" field empty. E.g., {{"scratchpad": "I have all I need", "tool": "text", "query": ""}}
|
681
682
|
|
682
683
|
# Examples
|
683
|
-
Assuming you can search the user's
|
684
|
+
Assuming you can search the user's notes and the internet.
|
684
685
|
- When the user asks for the population of their hometown
|
685
|
-
1. Try look up their hometown in their notes. Ask the
|
686
|
-
2.
|
687
|
-
3.
|
688
|
-
4. Only then try find the latest population of their hometown by reading official websites with the help of the online search and web page reading AI.
|
686
|
+
1. Try look up their hometown in their notes. Ask the note search AI to search for their birth certificate, childhood memories, school, resume etc.
|
687
|
+
2. If not found in their notes, try infer their hometown from their online social media profiles. Ask the online search AI to look for {username}'s biography, school, resume on linkedin, facebook, website etc.
|
688
|
+
3. Only then try find the latest population of their hometown by reading official websites with the help of the online search and web page reading AI.
|
689
689
|
- When the user asks for their computer's specs
|
690
|
-
1. Try find their computer model in their
|
690
|
+
1. Try find their computer model in their notes.
|
691
691
|
2. Now find webpages with their computer model's spec online.
|
692
692
|
3. Ask the webpage tool AI to extract the required information from the relevant webpages.
|
693
693
|
- When the user asks what clothes to carry for their upcoming trip
|
694
|
-
1.
|
694
|
+
1. Find the itinerary of their upcoming trip in their notes.
|
695
695
|
2. Next find the weather forecast at the destination online.
|
696
|
-
3. Then
|
697
|
-
- When the user asks you to summarize their expenses in a particular month
|
698
|
-
1. Combine the semantic search and regex search tool AI to find all transactions in the user's documents for that month.
|
699
|
-
2. Use the view file tool to read the line ranges in the matched files
|
700
|
-
3. Finally summarize the expenses
|
696
|
+
3. Then find if they mentioned what clothes they own in their notes.
|
701
697
|
|
702
698
|
# Background Context
|
703
699
|
- Current Date: {day_of_week}, {current_date}
|
@@ -705,9 +701,31 @@ Assuming you can search the user's files and the internet.
|
|
705
701
|
- User Name: {username}
|
706
702
|
|
707
703
|
# Available Tool AIs
|
708
|
-
You decide which of the tool AIs listed below would you use to
|
704
|
+
You decide which of the tool AIs listed below would you use to answer the user's question. You **only** have access to the following tool AIs:
|
709
705
|
|
710
706
|
{tools}
|
707
|
+
|
708
|
+
Your response should always be a valid JSON object with keys: "scratchpad" (str), "tool" (str) and "query" (str). Do not say anything else.
|
709
|
+
Response format:
|
710
|
+
{{"scratchpad": "<your_scratchpad_to_reason_about_which_tool_to_use>", "tool": "<name_of_tool_ai>", "query": "<your_detailed_query_for_the_tool_ai>"}}
|
711
|
+
""".strip()
|
712
|
+
)
|
713
|
+
|
714
|
+
plan_function_execution_next_tool = PromptTemplate.from_template(
|
715
|
+
"""
|
716
|
+
Given the results of your previous iterations, which tool AI will you use next to answer the target query?
|
717
|
+
|
718
|
+
# Target Query:
|
719
|
+
{query}
|
720
|
+
""".strip()
|
721
|
+
)
|
722
|
+
|
723
|
+
previous_iteration = PromptTemplate.from_template(
|
724
|
+
"""
|
725
|
+
# Iteration {index}:
|
726
|
+
- tool: {tool}
|
727
|
+
- query: {query}
|
728
|
+
- result: {result}
|
711
729
|
""".strip()
|
712
730
|
)
|
713
731
|
|