khoj 1.42.9.dev26__py3-none-any.whl → 1.42.10.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. khoj/database/adapters/__init__.py +0 -20
  2. khoj/database/models/__init__.py +0 -1
  3. khoj/interface/compiled/404/index.html +2 -2
  4. khoj/interface/compiled/_next/static/chunks/app/chat/page-4c6b873a4a5c7d2f.js +1 -0
  5. khoj/interface/compiled/agents/index.html +2 -2
  6. khoj/interface/compiled/agents/index.txt +2 -2
  7. khoj/interface/compiled/automations/index.html +2 -2
  8. khoj/interface/compiled/automations/index.txt +3 -3
  9. khoj/interface/compiled/chat/index.html +2 -2
  10. khoj/interface/compiled/chat/index.txt +2 -2
  11. khoj/interface/compiled/index.html +2 -2
  12. khoj/interface/compiled/index.txt +2 -2
  13. khoj/interface/compiled/search/index.html +2 -2
  14. khoj/interface/compiled/search/index.txt +2 -2
  15. khoj/interface/compiled/settings/index.html +2 -2
  16. khoj/interface/compiled/settings/index.txt +4 -4
  17. khoj/interface/compiled/share/chat/index.html +2 -2
  18. khoj/interface/compiled/share/chat/index.txt +2 -2
  19. khoj/processor/content/markdown/markdown_to_entries.py +9 -38
  20. khoj/processor/content/org_mode/org_to_entries.py +2 -18
  21. khoj/processor/content/org_mode/orgnode.py +16 -18
  22. khoj/processor/content/text_to_entries.py +0 -30
  23. khoj/processor/conversation/anthropic/anthropic_chat.py +2 -11
  24. khoj/processor/conversation/anthropic/utils.py +103 -90
  25. khoj/processor/conversation/google/gemini_chat.py +1 -4
  26. khoj/processor/conversation/google/utils.py +18 -80
  27. khoj/processor/conversation/offline/chat_model.py +3 -3
  28. khoj/processor/conversation/openai/gpt.py +38 -13
  29. khoj/processor/conversation/openai/utils.py +12 -113
  30. khoj/processor/conversation/prompts.py +35 -17
  31. khoj/processor/conversation/utils.py +58 -129
  32. khoj/processor/operator/grounding_agent.py +1 -1
  33. khoj/processor/operator/operator_agent_binary.py +3 -4
  34. khoj/processor/tools/online_search.py +0 -18
  35. khoj/processor/tools/run_code.py +1 -1
  36. khoj/routers/api_chat.py +1 -1
  37. khoj/routers/api_content.py +6 -6
  38. khoj/routers/helpers.py +27 -297
  39. khoj/routers/research.py +155 -169
  40. khoj/search_type/text_search.py +0 -2
  41. khoj/utils/helpers.py +8 -284
  42. khoj/utils/initialization.py +2 -0
  43. khoj/utils/rawconfig.py +0 -11
  44. {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/METADATA +1 -1
  45. {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/RECORD +57 -57
  46. khoj/interface/compiled/_next/static/chunks/app/chat/page-76fc915800aa90f4.js +0 -1
  47. /khoj/interface/compiled/_next/static/chunks/{1327-3b1a41af530fa8ee.js → 1327-1a9107b9a2a04a98.js} +0 -0
  48. /khoj/interface/compiled/_next/static/chunks/{1915-fbfe167c84ad60c5.js → 1915-5c6508f6ebb62a30.js} +0 -0
  49. /khoj/interface/compiled/_next/static/chunks/{2117-e78b6902ad6f75ec.js → 2117-080746c8e170c81a.js} +0 -0
  50. /khoj/interface/compiled/_next/static/chunks/{2939-4d4084c5b888b960.js → 2939-4af3fd24b8ffc9ad.js} +0 -0
  51. /khoj/interface/compiled/_next/static/chunks/{4447-d6cf93724d57e34b.js → 4447-cd95608f8e93e711.js} +0 -0
  52. /khoj/interface/compiled/_next/static/chunks/{8667-4b7790573b08c50d.js → 8667-50b03a89e82e0ba7.js} +0 -0
  53. /khoj/interface/compiled/_next/static/chunks/{webpack-70e0762712341826.js → webpack-92ce8aaf95718ec4.js} +0 -0
  54. /khoj/interface/compiled/_next/static/{IYGyer2N7GdUJ7QHFghtY → cuzJcS32_a4L4a6gCZ63y}/_buildManifest.js +0 -0
  55. /khoj/interface/compiled/_next/static/{IYGyer2N7GdUJ7QHFghtY → cuzJcS32_a4L4a6gCZ63y}/_ssgManifest.js +0 -0
  56. {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/WHEEL +0 -0
  57. {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/entry_points.txt +0 -0
  58. {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/licenses/LICENSE +0 -0
@@ -1,10 +1,9 @@
1
- import json
2
1
  import logging
3
2
  import os
4
3
  import random
5
4
  from copy import deepcopy
6
5
  from time import perf_counter
7
- from typing import AsyncGenerator, AsyncIterator, Dict, List
6
+ from typing import AsyncGenerator, AsyncIterator, Dict
8
7
 
9
8
  import httpx
10
9
  from google import genai
@@ -23,13 +22,11 @@ from tenacity import (
23
22
 
24
23
  from khoj.processor.conversation.utils import (
25
24
  ResponseWithThought,
26
- ToolCall,
27
25
  commit_conversation_trace,
28
26
  get_image_from_base64,
29
27
  get_image_from_url,
30
28
  )
31
29
  from khoj.utils.helpers import (
32
- ToolDefinition,
33
30
  get_chat_usage_metrics,
34
31
  get_gemini_client,
35
32
  is_none_or_empty,
@@ -102,29 +99,26 @@ def gemini_completion_with_backoff(
102
99
  temperature=1.2,
103
100
  api_key=None,
104
101
  api_base_url: str = None,
105
- model_kwargs={},
102
+ model_kwargs=None,
106
103
  deepthought=False,
107
104
  tracer={},
108
- ) -> ResponseWithThought:
105
+ ) -> str:
109
106
  client = gemini_clients.get(api_key)
110
107
  if not client:
111
108
  client = get_gemini_client(api_key, api_base_url)
112
109
  gemini_clients[api_key] = client
113
110
 
114
111
  formatted_messages, system_instruction = format_messages_for_gemini(messages, system_prompt)
115
- raw_content, response_text, response_thoughts = [], "", None
112
+ response_thoughts: str | None = None
116
113
 
117
- # Configure structured output
118
- tools = None
114
+ # format model response schema
119
115
  response_schema = None
120
- if model_kwargs.get("tools"):
121
- tools = to_gemini_tools(model_kwargs["tools"])
122
- elif model_kwargs.get("response_schema"):
116
+ if model_kwargs and model_kwargs.get("response_schema"):
123
117
  response_schema = clean_response_schema(model_kwargs["response_schema"])
124
118
 
125
119
  thinking_config = None
126
120
  if deepthought and is_reasoning_model(model_name):
127
- thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI, include_thoughts=True)
121
+ thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI)
128
122
 
129
123
  max_output_tokens = MAX_OUTPUT_TOKENS_FOR_STANDARD_GEMINI
130
124
  if is_reasoning_model(model_name):
@@ -137,9 +131,8 @@ def gemini_completion_with_backoff(
137
131
  thinking_config=thinking_config,
138
132
  max_output_tokens=max_output_tokens,
139
133
  safety_settings=SAFETY_SETTINGS,
140
- response_mime_type=model_kwargs.get("response_mime_type", "text/plain"),
134
+ response_mime_type=model_kwargs.get("response_mime_type", "text/plain") if model_kwargs else "text/plain",
141
135
  response_schema=response_schema,
142
- tools=tools,
143
136
  seed=seed,
144
137
  top_p=0.95,
145
138
  http_options=gtypes.HttpOptions(client_args={"timeout": httpx.Timeout(30.0, read=60.0)}),
@@ -148,25 +141,7 @@ def gemini_completion_with_backoff(
148
141
  try:
149
142
  # Generate the response
150
143
  response = client.models.generate_content(model=model_name, config=config, contents=formatted_messages)
151
- if (
152
- not response.candidates
153
- or not response.candidates[0].content
154
- or response.candidates[0].content.parts is None
155
- ):
156
- raise ValueError(f"Failed to get response from model.")
157
- raw_content = [part.model_dump() for part in response.candidates[0].content.parts]
158
- if response.function_calls:
159
- function_calls = [
160
- ToolCall(name=function_call.name, args=function_call.args, id=function_call.id).__dict__
161
- for function_call in response.function_calls
162
- ]
163
- response_text = json.dumps(function_calls)
164
- else:
165
- # If no function calls, use the text response
166
- response_text = response.text
167
- response_thoughts = "\n".join(
168
- [part.text for part in response.candidates[0].content.parts if part.thought and isinstance(part.text, str)]
169
- )
144
+ response_text = response.text
170
145
  except gerrors.ClientError as e:
171
146
  response = None
172
147
  response_text, _ = handle_gemini_response(e.args)
@@ -180,14 +155,8 @@ def gemini_completion_with_backoff(
180
155
  input_tokens = response.usage_metadata.prompt_token_count or 0 if response else 0
181
156
  output_tokens = response.usage_metadata.candidates_token_count or 0 if response else 0
182
157
  thought_tokens = response.usage_metadata.thoughts_token_count or 0 if response else 0
183
- cache_read_tokens = response.usage_metadata.cached_content_token_count or 0 if response else 0
184
158
  tracer["usage"] = get_chat_usage_metrics(
185
- model_name,
186
- input_tokens,
187
- output_tokens,
188
- cache_read_tokens=cache_read_tokens,
189
- thought_tokens=thought_tokens,
190
- usage=tracer.get("usage"),
159
+ model_name, input_tokens, output_tokens, thought_tokens=thought_tokens, usage=tracer.get("usage")
191
160
  )
192
161
 
193
162
  # Validate the response. If empty, raise an error to retry.
@@ -201,7 +170,7 @@ def gemini_completion_with_backoff(
201
170
  if is_promptrace_enabled():
202
171
  commit_conversation_trace(messages, response_text, tracer)
203
172
 
204
- return ResponseWithThought(text=response_text, thought=response_thoughts, raw_content=raw_content)
173
+ return response_text
205
174
 
206
175
 
207
176
  @retry(
@@ -269,7 +238,7 @@ async def gemini_chat_completion_with_backoff(
269
238
  # handle safety, rate-limit, other finish reasons
270
239
  stop_message, stopped = handle_gemini_response(chunk.candidates, chunk.prompt_feedback)
271
240
  if stopped:
272
- yield ResponseWithThought(text=stop_message)
241
+ yield ResponseWithThought(response=stop_message)
273
242
  logger.warning(
274
243
  f"LLM Response Prevented for {model_name}: {stop_message}.\n"
275
244
  + f"Last Message by {messages[-1].role}: {messages[-1].content}"
@@ -282,7 +251,7 @@ async def gemini_chat_completion_with_backoff(
282
251
  yield ResponseWithThought(thought=part.text)
283
252
  elif part.text:
284
253
  aggregated_response += part.text
285
- yield ResponseWithThought(text=part.text)
254
+ yield ResponseWithThought(response=part.text)
286
255
  # Calculate cost of chat
287
256
  input_tokens = final_chunk.usage_metadata.prompt_token_count or 0 if final_chunk else 0
288
257
  output_tokens = final_chunk.usage_metadata.candidates_token_count or 0 if final_chunk else 0
@@ -381,24 +350,8 @@ def format_messages_for_gemini(
381
350
  system_prompt = None if is_none_or_empty(system_prompt) else system_prompt
382
351
 
383
352
  for message in messages:
384
- if message.role == "assistant":
385
- message.role = "model"
386
-
387
- # Handle tool call and tool result message types from additional_kwargs
388
- message_type = message.additional_kwargs.get("message_type")
389
- if message_type == "tool_call":
390
- pass
391
- elif message_type == "tool_result":
392
- # Convert tool_result to Gemini function response format
393
- # Need to find the corresponding function call from previous messages
394
- tool_result_msg_content = []
395
- for part in message.content:
396
- tool_result_msg_content.append(
397
- gtypes.Part.from_function_response(name=part["name"], response={"result": part["content"]})
398
- )
399
- message.content = tool_result_msg_content
400
353
  # Convert message content to string list from chatml dictionary list
401
- elif isinstance(message.content, list):
354
+ if isinstance(message.content, list):
402
355
  # Convert image_urls to PIL.Image and place them at beginning of list (better for Gemini)
403
356
  message_content = []
404
357
  for item in sorted(message.content, key=lambda x: 0 if x["type"] == "image_url" else 1):
@@ -418,13 +371,16 @@ def format_messages_for_gemini(
418
371
  messages.remove(message)
419
372
  continue
420
373
  message.content = message_content
421
- elif isinstance(message.content, str) and message.content.strip():
374
+ elif isinstance(message.content, str):
422
375
  message.content = [gtypes.Part.from_text(text=message.content)]
423
376
  else:
424
377
  logger.error(f"Dropping invalid type: {type(message.content)} of message content: {message.content}")
425
378
  messages.remove(message)
426
379
  continue
427
380
 
381
+ if message.role == "assistant":
382
+ message.role = "model"
383
+
428
384
  if len(messages) == 1:
429
385
  messages[0].role = "user"
430
386
 
@@ -452,21 +408,3 @@ def is_reasoning_model(model_name: str) -> bool:
452
408
  Check if the model is a reasoning model.
453
409
  """
454
410
  return model_name.startswith("gemini-2.5")
455
-
456
-
457
- def to_gemini_tools(tools: List[ToolDefinition]) -> List[gtypes.ToolDict] | None:
458
- "Transform tool definitions from standard format to Gemini format."
459
- gemini_tools = [
460
- gtypes.ToolDict(
461
- function_declarations=[
462
- gtypes.FunctionDeclarationDict(
463
- name=tool.name,
464
- description=tool.description,
465
- parameters=tool.schema,
466
- )
467
- for tool in tools
468
- ]
469
- )
470
- ]
471
-
472
- return gemini_tools or None
@@ -145,12 +145,12 @@ async def converse_offline(
145
145
  aggregated_response += response_delta
146
146
  # Put chunk into the asyncio queue (non-blocking)
147
147
  try:
148
- queue.put_nowait(ResponseWithThought(text=response_delta))
148
+ queue.put_nowait(ResponseWithThought(response=response_delta))
149
149
  except asyncio.QueueFull:
150
150
  # Should not happen with default queue size unless consumer is very slow
151
151
  logger.warning("Asyncio queue full during offline LLM streaming.")
152
152
  # Potentially block here or handle differently if needed
153
- asyncio.run(queue.put(ResponseWithThought(text=response_delta)))
153
+ asyncio.run(queue.put(ResponseWithThought(response=response_delta)))
154
154
 
155
155
  # Log the time taken to stream the entire response
156
156
  logger.info(f"Chat streaming took: {perf_counter() - start_time:.3f} seconds")
@@ -221,4 +221,4 @@ def send_message_to_model_offline(
221
221
  if is_promptrace_enabled():
222
222
  commit_conversation_trace(messages, response_text, tracer)
223
223
 
224
- return ResponseWithThought(text=response_text)
224
+ return response_text
@@ -1,24 +1,25 @@
1
1
  import logging
2
2
  from datetime import datetime
3
- from typing import Any, AsyncGenerator, Dict, List, Optional
3
+ from typing import AsyncGenerator, Dict, List, Optional
4
+
5
+ from openai.lib._pydantic import _ensure_strict_json_schema
6
+ from pydantic import BaseModel
4
7
 
5
8
  from khoj.database.models import Agent, ChatMessageModel, ChatModel
6
9
  from khoj.processor.conversation import prompts
7
10
  from khoj.processor.conversation.openai.utils import (
8
11
  chat_completion_with_backoff,
9
- clean_response_schema,
10
12
  completion_with_backoff,
11
- get_structured_output_support,
12
- to_openai_tools,
13
+ get_openai_api_json_support,
13
14
  )
14
15
  from khoj.processor.conversation.utils import (
16
+ JsonSupport,
15
17
  OperatorRun,
16
18
  ResponseWithThought,
17
- StructuredOutputSupport,
18
19
  generate_chatml_messages_with_context,
19
20
  messages_to_print,
20
21
  )
21
- from khoj.utils.helpers import ToolDefinition, is_none_or_empty, truncate_code_context
22
+ from khoj.utils.helpers import is_none_or_empty, truncate_code_context
22
23
  from khoj.utils.rawconfig import FileAttachment, LocationData
23
24
  from khoj.utils.yaml import yaml_dump
24
25
 
@@ -31,7 +32,6 @@ def send_message_to_model(
31
32
  model,
32
33
  response_type="text",
33
34
  response_schema=None,
34
- tools: list[ToolDefinition] = None,
35
35
  deepthought=False,
36
36
  api_base_url=None,
37
37
  tracer: dict = {},
@@ -40,11 +40,9 @@ def send_message_to_model(
40
40
  Send message to model
41
41
  """
42
42
 
43
- model_kwargs: Dict[str, Any] = {}
44
- json_support = get_structured_output_support(model, api_base_url)
45
- if tools and json_support == StructuredOutputSupport.TOOL:
46
- model_kwargs["tools"] = to_openai_tools(tools)
47
- elif response_schema and json_support >= StructuredOutputSupport.SCHEMA:
43
+ model_kwargs = {}
44
+ json_support = get_openai_api_json_support(model, api_base_url)
45
+ if response_schema and json_support == JsonSupport.SCHEMA:
48
46
  # Drop unsupported fields from schema passed to OpenAI APi
49
47
  cleaned_response_schema = clean_response_schema(response_schema)
50
48
  model_kwargs["response_format"] = {
@@ -55,7 +53,7 @@ def send_message_to_model(
55
53
  "strict": True,
56
54
  },
57
55
  }
58
- elif response_type == "json_object" and json_support == StructuredOutputSupport.OBJECT:
56
+ elif response_type == "json_object" and json_support == JsonSupport.OBJECT:
59
57
  model_kwargs["response_format"] = {"type": response_type}
60
58
 
61
59
  # Get Response from GPT
@@ -173,3 +171,30 @@ async def converse_openai(
173
171
  tracer=tracer,
174
172
  ):
175
173
  yield chunk
174
+
175
+
176
+ def clean_response_schema(schema: BaseModel | dict) -> dict:
177
+ """
178
+ Format response schema to be compatible with OpenAI API.
179
+
180
+ Clean the response schema by removing unsupported fields.
181
+ """
182
+ # Normalize schema to OpenAI compatible JSON schema format
183
+ schema_json = schema if isinstance(schema, dict) else schema.model_json_schema()
184
+ schema_json = _ensure_strict_json_schema(schema_json, path=(), root=schema_json)
185
+
186
+ # Recursively drop unsupported fields from schema passed to OpenAI API
187
+ # See https://platform.openai.com/docs/guides/structured-outputs#supported-schemas
188
+ fields_to_exclude = ["minItems", "maxItems"]
189
+ if isinstance(schema_json, dict) and isinstance(schema_json.get("properties"), dict):
190
+ for _, prop_value in schema_json["properties"].items():
191
+ if isinstance(prop_value, dict):
192
+ # Remove specified fields from direct properties
193
+ for field in fields_to_exclude:
194
+ prop_value.pop(field, None)
195
+ # Recursively remove specified fields from child properties
196
+ if "items" in prop_value and isinstance(prop_value["items"], dict):
197
+ clean_response_schema(prop_value["items"])
198
+
199
+ # Return cleaned schema
200
+ return schema_json
@@ -1,4 +1,3 @@
1
- import json
2
1
  import logging
3
2
  import os
4
3
  from copy import deepcopy
@@ -10,7 +9,6 @@ from urllib.parse import urlparse
10
9
  import httpx
11
10
  import openai
12
11
  from langchain_core.messages.chat import ChatMessage
13
- from openai.lib._pydantic import _ensure_strict_json_schema
14
12
  from openai.lib.streaming.chat import (
15
13
  ChatCompletionStream,
16
14
  ChatCompletionStreamEvent,
@@ -22,7 +20,6 @@ from openai.types.chat.chat_completion_chunk import (
22
20
  Choice,
23
21
  ChoiceDelta,
24
22
  )
25
- from pydantic import BaseModel
26
23
  from tenacity import (
27
24
  before_sleep_log,
28
25
  retry,
@@ -33,13 +30,11 @@ from tenacity import (
33
30
  )
34
31
 
35
32
  from khoj.processor.conversation.utils import (
33
+ JsonSupport,
36
34
  ResponseWithThought,
37
- StructuredOutputSupport,
38
- ToolCall,
39
35
  commit_conversation_trace,
40
36
  )
41
37
  from khoj.utils.helpers import (
42
- ToolDefinition,
43
38
  convert_image_data_uri,
44
39
  get_chat_usage_metrics,
45
40
  get_openai_async_client,
@@ -77,7 +72,7 @@ def completion_with_backoff(
77
72
  deepthought: bool = False,
78
73
  model_kwargs: dict = {},
79
74
  tracer: dict = {},
80
- ) -> ResponseWithThought:
75
+ ) -> str:
81
76
  client_key = f"{openai_api_key}--{api_base_url}"
82
77
  client = openai_clients.get(client_key)
83
78
  if not client:
@@ -122,9 +117,6 @@ def completion_with_backoff(
122
117
  if os.getenv("KHOJ_LLM_SEED"):
123
118
  model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
124
119
 
125
- tool_ids = []
126
- tool_calls: list[ToolCall] = []
127
- thoughts = ""
128
120
  aggregated_response = ""
129
121
  if stream:
130
122
  with client.beta.chat.completions.stream(
@@ -138,16 +130,7 @@ def completion_with_backoff(
138
130
  if chunk.type == "content.delta":
139
131
  aggregated_response += chunk.delta
140
132
  elif chunk.type == "thought.delta":
141
- thoughts += chunk.delta
142
- elif chunk.type == "chunk" and chunk.chunk.choices and chunk.chunk.choices[0].delta.tool_calls:
143
- tool_ids += [tool_call.id for tool_call in chunk.chunk.choices[0].delta.tool_calls]
144
- elif chunk.type == "tool_calls.function.arguments.done":
145
- tool_calls += [ToolCall(name=chunk.name, args=json.loads(chunk.arguments), id=None)]
146
- if tool_calls:
147
- tool_calls = [
148
- ToolCall(name=chunk.name, args=chunk.args, id=tool_id) for chunk, tool_id in zip(tool_calls, tool_ids)
149
- ]
150
- aggregated_response = json.dumps([tool_call.__dict__ for tool_call in tool_calls])
133
+ pass
151
134
  else:
152
135
  # Non-streaming chat completion
153
136
  chunk = client.beta.chat.completions.parse(
@@ -181,7 +164,7 @@ def completion_with_backoff(
181
164
  if is_promptrace_enabled():
182
165
  commit_conversation_trace(messages, aggregated_response, tracer)
183
166
 
184
- return ResponseWithThought(text=aggregated_response, thought=thoughts)
167
+ return aggregated_response
185
168
 
186
169
 
187
170
  @retry(
@@ -207,7 +190,6 @@ async def chat_completion_with_backoff(
207
190
  deepthought=False,
208
191
  model_kwargs: dict = {},
209
192
  tracer: dict = {},
210
- tools=None,
211
193
  ) -> AsyncGenerator[ResponseWithThought, None]:
212
194
  client_key = f"{openai_api_key}--{api_base_url}"
213
195
  client = openai_async_clients.get(client_key)
@@ -276,8 +258,6 @@ async def chat_completion_with_backoff(
276
258
  read_timeout = 300 if is_local_api(api_base_url) else 60
277
259
  if os.getenv("KHOJ_LLM_SEED"):
278
260
  model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
279
- if tools:
280
- model_kwargs["tools"] = tools
281
261
 
282
262
  aggregated_response = ""
283
263
  final_chunk = None
@@ -297,7 +277,7 @@ async def chat_completion_with_backoff(
297
277
  raise ValueError("No response by model.")
298
278
  aggregated_response = response.choices[0].message.content
299
279
  final_chunk = response
300
- yield ResponseWithThought(text=aggregated_response)
280
+ yield ResponseWithThought(response=aggregated_response)
301
281
  else:
302
282
  async for chunk in stream_processor(response):
303
283
  # Log the time taken to start response
@@ -313,8 +293,8 @@ async def chat_completion_with_backoff(
313
293
  response_chunk: ResponseWithThought = None
314
294
  response_delta = chunk.choices[0].delta
315
295
  if response_delta.content:
316
- response_chunk = ResponseWithThought(text=response_delta.content)
317
- aggregated_response += response_chunk.text
296
+ response_chunk = ResponseWithThought(response=response_delta.content)
297
+ aggregated_response += response_chunk.response
318
298
  elif response_delta.thought:
319
299
  response_chunk = ResponseWithThought(thought=response_delta.thought)
320
300
  if response_chunk:
@@ -347,16 +327,16 @@ async def chat_completion_with_backoff(
347
327
  commit_conversation_trace(messages, aggregated_response, tracer)
348
328
 
349
329
 
350
- def get_structured_output_support(model_name: str, api_base_url: str = None) -> StructuredOutputSupport:
330
+ def get_openai_api_json_support(model_name: str, api_base_url: str = None) -> JsonSupport:
351
331
  if model_name.startswith("deepseek-reasoner"):
352
- return StructuredOutputSupport.NONE
332
+ return JsonSupport.NONE
353
333
  if api_base_url:
354
334
  host = urlparse(api_base_url).hostname
355
335
  if host and host.endswith(".ai.azure.com"):
356
- return StructuredOutputSupport.OBJECT
336
+ return JsonSupport.OBJECT
357
337
  if host == "api.deepinfra.com":
358
- return StructuredOutputSupport.OBJECT
359
- return StructuredOutputSupport.TOOL
338
+ return JsonSupport.OBJECT
339
+ return JsonSupport.SCHEMA
360
340
 
361
341
 
362
342
  def format_message_for_api(messages: List[ChatMessage], api_base_url: str) -> List[dict]:
@@ -365,43 +345,6 @@ def format_message_for_api(messages: List[ChatMessage], api_base_url: str) -> Li
365
345
  """
366
346
  formatted_messages = []
367
347
  for message in deepcopy(messages):
368
- # Handle tool call and tool result message types
369
- message_type = message.additional_kwargs.get("message_type")
370
- if message_type == "tool_call":
371
- # Convert tool_call to OpenAI function call format
372
- content = []
373
- for part in message.content:
374
- content.append(
375
- {
376
- "type": "function",
377
- "id": part.get("id"),
378
- "function": {
379
- "name": part.get("name"),
380
- "arguments": json.dumps(part.get("input", part.get("args", {}))),
381
- },
382
- }
383
- )
384
- formatted_messages.append(
385
- {
386
- "role": "assistant",
387
- "content": None,
388
- "tool_calls": content,
389
- }
390
- )
391
- continue
392
- if message_type == "tool_result":
393
- # Convert tool_result to OpenAI tool result format
394
- # Each part is a result for a tool call
395
- for part in message.content:
396
- formatted_messages.append(
397
- {
398
- "role": "tool",
399
- "tool_call_id": part.get("id") or part.get("tool_use_id"),
400
- "name": part.get("name"),
401
- "content": part.get("content"),
402
- }
403
- )
404
- continue
405
348
  if isinstance(message.content, list) and not is_openai_api(api_base_url):
406
349
  assistant_texts = []
407
350
  has_images = False
@@ -765,47 +708,3 @@ def add_qwen_no_think_tag(formatted_messages: List[dict]) -> None:
765
708
  if isinstance(content_part, dict) and content_part.get("type") == "text":
766
709
  content_part["text"] += " /no_think"
767
710
  break
768
-
769
-
770
- def to_openai_tools(tools: List[ToolDefinition]) -> List[Dict] | None:
771
- "Transform tool definitions from standard format to OpenAI format."
772
- openai_tools = [
773
- {
774
- "type": "function",
775
- "function": {
776
- "name": tool.name,
777
- "description": tool.description,
778
- "parameters": clean_response_schema(tool.schema),
779
- },
780
- }
781
- for tool in tools
782
- ]
783
-
784
- return openai_tools or None
785
-
786
-
787
- def clean_response_schema(schema: BaseModel | dict) -> dict:
788
- """
789
- Format response schema to be compatible with OpenAI API.
790
-
791
- Clean the response schema by removing unsupported fields.
792
- """
793
- # Normalize schema to OpenAI compatible JSON schema format
794
- schema_json = schema if isinstance(schema, dict) else schema.model_json_schema()
795
- schema_json = _ensure_strict_json_schema(schema_json, path=(), root=schema_json)
796
-
797
- # Recursively drop unsupported fields from schema passed to OpenAI API
798
- # See https://platform.openai.com/docs/guides/structured-outputs#supported-schemas
799
- fields_to_exclude = ["minItems", "maxItems"]
800
- if isinstance(schema_json, dict) and isinstance(schema_json.get("properties"), dict):
801
- for _, prop_value in schema_json["properties"].items():
802
- if isinstance(prop_value, dict):
803
- # Remove specified fields from direct properties
804
- for field in fields_to_exclude:
805
- prop_value.pop(field, None)
806
- # Recursively remove specified fields from child properties
807
- if "items" in prop_value and isinstance(prop_value["items"], dict):
808
- clean_response_schema(prop_value["items"])
809
-
810
- # Return cleaned schema
811
- return schema_json
@@ -667,37 +667,33 @@ Here's some additional context about you:
667
667
 
668
668
  plan_function_execution = PromptTemplate.from_template(
669
669
  """
670
- You are Khoj, a smart, creative and meticulous researcher. Use the provided tool AIs to accomplish the task assigned to you.
671
- Create a multi-step plan and intelligently iterate on the plan to complete the task.
670
+ You are Khoj, a smart, creative and methodical researcher. Use the provided tool AIs to investigate information to answer query.
671
+ Create a multi-step plan and intelligently iterate on the plan based on the retrieved information to find the requested information.
672
672
  {personality_context}
673
673
 
674
674
  # Instructions
675
- - Provide highly diverse, detailed requests to the tool AIs, one tool AI at a time, to gather information, perform actions etc. Their response will be shown to you in the next iteration.
675
+ - Ask highly diverse, detailed queries to the tool AIs, one tool AI at a time, to discover required information or run calculations. Their response will be shown to you in the next iteration.
676
676
  - Break down your research process into independent, self-contained steps that can be executed sequentially using the available tool AIs to answer the user's query. Write your step-by-step plan in the scratchpad.
677
677
  - Always ask a new query that was not asked to the tool AI in a previous iteration. Build on the results of the previous iterations.
678
678
  - Ensure that all required context is passed to the tool AIs for successful execution. Include any relevant stuff that has previously been attempted. They only know the context provided in your query.
679
679
  - Think step by step to come up with creative strategies when the previous iteration did not yield useful results.
680
- - You are allowed upto {max_iterations} iterations to use the help of the provided tool AIs to accomplish the task assigned to you. Only stop when you have completed the task.
680
+ - You are allowed upto {max_iterations} iterations to use the help of the provided tool AIs to answer the user's question.
681
+ - Stop when you have the required information by returning a JSON object with the "tool" field set to "text" and "query" field empty. E.g., {{"scratchpad": "I have all I need", "tool": "text", "query": ""}}
681
682
 
682
683
  # Examples
683
- Assuming you can search the user's files and the internet.
684
+ Assuming you can search the user's notes and the internet.
684
685
  - When the user asks for the population of their hometown
685
- 1. Try look up their hometown in their notes. Ask the semantic search AI to search for their birth certificate, childhood memories, school, resume etc.
686
- 2. Use the other document retrieval tools to build on the semantic search results, fill in the gaps, add more details or confirm your hypothesis.
687
- 3. If not found in their notes, try infer their hometown from their online social media profiles. Ask the online search AI to look for {username}'s biography, school, resume on linkedin, facebook, website etc.
688
- 4. Only then try find the latest population of their hometown by reading official websites with the help of the online search and web page reading AI.
686
+ 1. Try look up their hometown in their notes. Ask the note search AI to search for their birth certificate, childhood memories, school, resume etc.
687
+ 2. If not found in their notes, try infer their hometown from their online social media profiles. Ask the online search AI to look for {username}'s biography, school, resume on linkedin, facebook, website etc.
688
+ 3. Only then try find the latest population of their hometown by reading official websites with the help of the online search and web page reading AI.
689
689
  - When the user asks for their computer's specs
690
- 1. Try find their computer model in their documents.
690
+ 1. Try find their computer model in their notes.
691
691
  2. Now find webpages with their computer model's spec online.
692
692
  3. Ask the webpage tool AI to extract the required information from the relevant webpages.
693
693
  - When the user asks what clothes to carry for their upcoming trip
694
- 1. Use the semantic search tool to find the itinerary of their upcoming trip in their documents.
694
+ 1. Find the itinerary of their upcoming trip in their notes.
695
695
  2. Next find the weather forecast at the destination online.
696
- 3. Then combine the semantic search, regex search, view file and list files tools to find if all the clothes they own in their files.
697
- - When the user asks you to summarize their expenses in a particular month
698
- 1. Combine the semantic search and regex search tool AI to find all transactions in the user's documents for that month.
699
- 2. Use the view file tool to read the line ranges in the matched files
700
- 3. Finally summarize the expenses
696
+ 3. Then find if they mentioned what clothes they own in their notes.
701
697
 
702
698
  # Background Context
703
699
  - Current Date: {day_of_week}, {current_date}
@@ -705,9 +701,31 @@ Assuming you can search the user's files and the internet.
705
701
  - User Name: {username}
706
702
 
707
703
  # Available Tool AIs
708
- You decide which of the tool AIs listed below would you use to accomplish the user assigned task. You **only** have access to the following tool AIs:
704
+ You decide which of the tool AIs listed below would you use to answer the user's question. You **only** have access to the following tool AIs:
709
705
 
710
706
  {tools}
707
+
708
+ Your response should always be a valid JSON object with keys: "scratchpad" (str), "tool" (str) and "query" (str). Do not say anything else.
709
+ Response format:
710
+ {{"scratchpad": "<your_scratchpad_to_reason_about_which_tool_to_use>", "tool": "<name_of_tool_ai>", "query": "<your_detailed_query_for_the_tool_ai>"}}
711
+ """.strip()
712
+ )
713
+
714
+ plan_function_execution_next_tool = PromptTemplate.from_template(
715
+ """
716
+ Given the results of your previous iterations, which tool AI will you use next to answer the target query?
717
+
718
+ # Target Query:
719
+ {query}
720
+ """.strip()
721
+ )
722
+
723
+ previous_iteration = PromptTemplate.from_template(
724
+ """
725
+ # Iteration {index}:
726
+ - tool: {tool}
727
+ - query: {query}
728
+ - result: {result}
711
729
  """.strip()
712
730
  )
713
731