khoj 1.42.8.dev6__py3-none-any.whl → 1.42.9.dev17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. khoj/database/adapters/__init__.py +20 -0
  2. khoj/interface/compiled/404/index.html +2 -2
  3. khoj/interface/compiled/_next/static/chunks/app/agents/{page-9a4610474cd59a71.js → page-5db6ad18da10d353.js} +1 -1
  4. khoj/interface/compiled/_next/static/chunks/app/automations/{page-f7bb9d777b7745d4.js → page-6271e2e31c7571d1.js} +1 -1
  5. khoj/interface/compiled/_next/static/chunks/app/chat/layout-ad68326d2f849cec.js +1 -0
  6. khoj/interface/compiled/_next/static/chunks/app/chat/{page-ef738950ea1babc3.js → page-76fc915800aa90f4.js} +1 -1
  7. khoj/interface/compiled/_next/static/chunks/app/{page-2b3056cba8aa96ce.js → page-a19a597629e87fb8.js} +1 -1
  8. khoj/interface/compiled/_next/static/chunks/app/search/layout-484d34239ed0f2b1.js +1 -0
  9. khoj/interface/compiled/_next/static/chunks/app/search/{page-4885df3cd175c957.js → page-fa366ac14b228688.js} +1 -1
  10. khoj/interface/compiled/_next/static/chunks/app/settings/{page-8be3b35178abf2ec.js → page-8f9a85f96088c18b.js} +1 -1
  11. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-abb6c5f4239ad7be.js +1 -0
  12. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-4a4b0c0f4749c2b2.js → page-ed7787cf4938b8e3.js} +1 -1
  13. khoj/interface/compiled/_next/static/chunks/{webpack-15412ee214acd999.js → webpack-92ce8aaf95718ec4.js} +1 -1
  14. khoj/interface/compiled/_next/static/css/{e6da1287d41f5409.css → 02f60900b0d89ec7.css} +1 -1
  15. khoj/interface/compiled/_next/static/css/{821d0d60b0b6871d.css → 93eeacc43e261162.css} +1 -1
  16. khoj/interface/compiled/agents/index.html +2 -2
  17. khoj/interface/compiled/agents/index.txt +2 -2
  18. khoj/interface/compiled/automations/index.html +2 -2
  19. khoj/interface/compiled/automations/index.txt +2 -2
  20. khoj/interface/compiled/chat/index.html +2 -2
  21. khoj/interface/compiled/chat/index.txt +2 -2
  22. khoj/interface/compiled/index.html +2 -2
  23. khoj/interface/compiled/index.txt +2 -2
  24. khoj/interface/compiled/search/index.html +2 -2
  25. khoj/interface/compiled/search/index.txt +2 -2
  26. khoj/interface/compiled/settings/index.html +2 -2
  27. khoj/interface/compiled/settings/index.txt +2 -2
  28. khoj/interface/compiled/share/chat/index.html +2 -2
  29. khoj/interface/compiled/share/chat/index.txt +2 -2
  30. khoj/processor/conversation/anthropic/anthropic_chat.py +11 -2
  31. khoj/processor/conversation/anthropic/utils.py +90 -103
  32. khoj/processor/conversation/google/gemini_chat.py +4 -1
  33. khoj/processor/conversation/google/utils.py +80 -18
  34. khoj/processor/conversation/offline/chat_model.py +3 -3
  35. khoj/processor/conversation/openai/gpt.py +13 -38
  36. khoj/processor/conversation/openai/utils.py +113 -12
  37. khoj/processor/conversation/prompts.py +17 -35
  38. khoj/processor/conversation/utils.py +128 -57
  39. khoj/processor/operator/grounding_agent.py +1 -1
  40. khoj/processor/operator/operator_agent_binary.py +4 -3
  41. khoj/processor/tools/online_search.py +18 -0
  42. khoj/processor/tools/run_code.py +1 -1
  43. khoj/routers/api_chat.py +1 -1
  44. khoj/routers/api_subscription.py +22 -0
  45. khoj/routers/helpers.py +293 -26
  46. khoj/routers/research.py +169 -155
  47. khoj/utils/helpers.py +284 -8
  48. {khoj-1.42.8.dev6.dist-info → khoj-1.42.9.dev17.dist-info}/METADATA +1 -1
  49. {khoj-1.42.8.dev6.dist-info → khoj-1.42.9.dev17.dist-info}/RECORD +54 -54
  50. khoj/interface/compiled/_next/static/chunks/app/chat/layout-ad4d1792ab1a4108.js +0 -1
  51. khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +0 -1
  52. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +0 -1
  53. /khoj/interface/compiled/_next/static/{cJdFAXV3MR9BSimUwQ40G → rRy7eX2lAtmXdtQuJoVrw}/_buildManifest.js +0 -0
  54. /khoj/interface/compiled/_next/static/{cJdFAXV3MR9BSimUwQ40G → rRy7eX2lAtmXdtQuJoVrw}/_ssgManifest.js +0 -0
  55. {khoj-1.42.8.dev6.dist-info → khoj-1.42.9.dev17.dist-info}/WHEEL +0 -0
  56. {khoj-1.42.8.dev6.dist-info → khoj-1.42.9.dev17.dist-info}/entry_points.txt +0 -0
  57. {khoj-1.42.8.dev6.dist-info → khoj-1.42.9.dev17.dist-info}/licenses/LICENSE +0 -0
@@ -1,9 +1,8 @@
1
1
  import json
2
2
  import logging
3
3
  from copy import deepcopy
4
- from textwrap import dedent
5
4
  from time import perf_counter
6
- from typing import AsyncGenerator, Dict, List, Optional, Type
5
+ from typing import AsyncGenerator, Dict, List
7
6
 
8
7
  import anthropic
9
8
  from langchain_core.messages.chat import ChatMessage
@@ -18,11 +17,14 @@ from tenacity import (
18
17
 
19
18
  from khoj.processor.conversation.utils import (
20
19
  ResponseWithThought,
20
+ ToolCall,
21
21
  commit_conversation_trace,
22
22
  get_image_from_base64,
23
23
  get_image_from_url,
24
24
  )
25
25
  from khoj.utils.helpers import (
26
+ ToolDefinition,
27
+ create_tool_definition,
26
28
  get_anthropic_async_client,
27
29
  get_anthropic_client,
28
30
  get_chat_usage_metrics,
@@ -57,9 +59,10 @@ def anthropic_completion_with_backoff(
57
59
  max_tokens: int | None = None,
58
60
  response_type: str = "text",
59
61
  response_schema: BaseModel | None = None,
62
+ tools: List[ToolDefinition] = None,
60
63
  deepthought: bool = False,
61
64
  tracer: dict = {},
62
- ) -> str:
65
+ ) -> ResponseWithThought:
63
66
  client = anthropic_clients.get(api_key)
64
67
  if not client:
65
68
  client = get_anthropic_client(api_key, api_base_url)
@@ -67,12 +70,26 @@ def anthropic_completion_with_backoff(
67
70
 
68
71
  formatted_messages, system = format_messages_for_anthropic(messages, system_prompt)
69
72
 
73
+ thoughts = ""
70
74
  aggregated_response = ""
71
75
  final_message = None
72
76
  model_kwargs = model_kwargs or dict()
73
- if response_schema:
74
- tool = create_anthropic_tool_definition(response_schema=response_schema)
75
- model_kwargs["tools"] = [tool]
77
+
78
+ # Configure structured output
79
+ if tools:
80
+ # Convert tools to Anthropic format
81
+ model_kwargs["tools"] = [
82
+ anthropic.types.ToolParam(name=tool.name, description=tool.description, input_schema=tool.schema)
83
+ for tool in tools
84
+ ]
85
+ # Cache tool definitions
86
+ last_tool = model_kwargs["tools"][-1]
87
+ last_tool["cache_control"] = {"type": "ephemeral"}
88
+ elif response_schema:
89
+ tool = create_tool_definition(response_schema)
90
+ model_kwargs["tools"] = [
91
+ anthropic.types.ToolParam(name=tool.name, description=tool.description, input_schema=tool.schema)
92
+ ]
76
93
  elif response_type == "json_object" and not (is_reasoning_model(model_name) and deepthought):
77
94
  # Prefill model response with '{' to make it output a valid JSON object. Not supported with extended thinking.
78
95
  formatted_messages.append(anthropic.types.MessageParam(role="assistant", content="{"))
@@ -96,15 +113,41 @@ def anthropic_completion_with_backoff(
96
113
  max_tokens=max_tokens,
97
114
  **(model_kwargs),
98
115
  ) as stream:
99
- for text in stream.text_stream:
100
- aggregated_response += text
116
+ for chunk in stream:
117
+ if chunk.type != "content_block_delta":
118
+ continue
119
+ if chunk.delta.type == "thinking_delta":
120
+ thoughts += chunk.delta.thinking
121
+ elif chunk.delta.type == "text_delta":
122
+ aggregated_response += chunk.delta.text
101
123
  final_message = stream.get_final_message()
102
124
 
103
- # Extract first tool call from final message
104
- for item in final_message.content:
105
- if item.type == "tool_use":
106
- aggregated_response = json.dumps(item.input)
107
- break
125
+ # Track raw content of model response to reuse for cache hits in multi-turn chats
126
+ raw_content = [item.model_dump() for item in final_message.content]
127
+
128
+ # Extract all tool calls if tools are enabled
129
+ if tools:
130
+ tool_calls = [
131
+ ToolCall(name=item.name, args=item.input, id=item.id).__dict__
132
+ for item in final_message.content
133
+ if item.type == "tool_use"
134
+ ]
135
+ if tool_calls:
136
+ # If there are tool calls, aggregate thoughts and responses into thoughts
137
+ if thoughts and aggregated_response:
138
+ # wrap each line of thought in italics
139
+ thoughts = "\n".join([f"*{line.strip()}*" for line in thoughts.splitlines() if line.strip()])
140
+ thoughts = f"{thoughts}\n\n{aggregated_response}"
141
+ else:
142
+ thoughts = thoughts or aggregated_response
143
+ # Json dump tool calls into aggregated response
144
+ aggregated_response = json.dumps(tool_calls)
145
+ # If response schema is used, return the first tool call's input
146
+ elif response_schema:
147
+ for item in final_message.content:
148
+ if item.type == "tool_use":
149
+ aggregated_response = json.dumps(item.input)
150
+ break
108
151
 
109
152
  # Calculate cost of chat
110
153
  input_tokens = final_message.usage.input_tokens
@@ -126,7 +169,7 @@ def anthropic_completion_with_backoff(
126
169
  if is_promptrace_enabled():
127
170
  commit_conversation_trace(messages, aggregated_response, tracer)
128
171
 
129
- return aggregated_response
172
+ return ResponseWithThought(text=aggregated_response, thought=thoughts, raw_content=raw_content)
130
173
 
131
174
 
132
175
  @retry(
@@ -183,10 +226,10 @@ async def anthropic_chat_completion_with_backoff(
183
226
  if chunk.type == "message_delta":
184
227
  if chunk.delta.stop_reason == "refusal":
185
228
  yield ResponseWithThought(
186
- response="...I'm sorry, but my safety filters prevent me from assisting with this query."
229
+ text="...I'm sorry, but my safety filters prevent me from assisting with this query."
187
230
  )
188
231
  elif chunk.delta.stop_reason == "max_tokens":
189
- yield ResponseWithThought(response="...I'm sorry, but I've hit my response length limit.")
232
+ yield ResponseWithThought(text="...I'm sorry, but I've hit my response length limit.")
190
233
  if chunk.delta.stop_reason in ["refusal", "max_tokens"]:
191
234
  logger.warning(
192
235
  f"LLM Response Prevented for {model_name}: {chunk.delta.stop_reason}.\n"
@@ -199,7 +242,7 @@ async def anthropic_chat_completion_with_backoff(
199
242
  # Handle streamed response chunk
200
243
  response_chunk: ResponseWithThought = None
201
244
  if chunk.delta.type == "text_delta":
202
- response_chunk = ResponseWithThought(response=chunk.delta.text)
245
+ response_chunk = ResponseWithThought(text=chunk.delta.text)
203
246
  aggregated_response += chunk.delta.text
204
247
  if chunk.delta.type == "thinking_delta":
205
248
  response_chunk = ResponseWithThought(thought=chunk.delta.thinking)
@@ -232,13 +275,14 @@ async def anthropic_chat_completion_with_backoff(
232
275
  commit_conversation_trace(messages, aggregated_response, tracer)
233
276
 
234
277
 
235
- def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: str = None):
278
+ def format_messages_for_anthropic(raw_messages: list[ChatMessage], system_prompt: str = None):
236
279
  """
237
280
  Format messages for Anthropic
238
281
  """
239
282
  # Extract system prompt
240
283
  system_prompt = system_prompt or ""
241
- for message in messages.copy():
284
+ messages = deepcopy(raw_messages)
285
+ for message in messages:
242
286
  if message.role == "system":
243
287
  if isinstance(message.content, list):
244
288
  system_prompt += "\n".join([part["text"] for part in message.content if part["type"] == "text"])
@@ -250,15 +294,30 @@ def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: st
250
294
  else:
251
295
  system = None
252
296
 
253
- # Anthropic requires the first message to be a 'user' message
254
- if len(messages) == 1:
297
+ # Anthropic requires the first message to be a user message unless its a tool call
298
+ message_type = messages[0].additional_kwargs.get("message_type", None)
299
+ if len(messages) == 1 and message_type != "tool_call":
255
300
  messages[0].role = "user"
256
- elif len(messages) > 1 and messages[0].role == "assistant":
257
- messages = messages[1:]
258
301
 
259
- # Convert image urls to base64 encoded images in Anthropic message format
260
302
  for message in messages:
261
- if isinstance(message.content, list):
303
+ # Handle tool call and tool result message types from additional_kwargs
304
+ message_type = message.additional_kwargs.get("message_type")
305
+ if message_type == "tool_call":
306
+ pass
307
+ elif message_type == "tool_result":
308
+ # Convert tool_result to Anthropic tool_result format
309
+ content = []
310
+ for part in message.content:
311
+ content.append(
312
+ {
313
+ "type": "tool_result",
314
+ "tool_use_id": part["id"],
315
+ "content": part["content"],
316
+ }
317
+ )
318
+ message.content = content
319
+ # Convert image urls to base64 encoded images in Anthropic message format
320
+ elif isinstance(message.content, list):
262
321
  content = []
263
322
  # Sort the content. Anthropic models prefer that text comes after images.
264
323
  message.content.sort(key=lambda x: 0 if x["type"] == "image_url" else 1)
@@ -304,18 +363,15 @@ def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: st
304
363
  if isinstance(block, dict) and "cache_control" in block:
305
364
  del block["cache_control"]
306
365
 
307
- # Add cache control to the last content block of second to last message.
308
- # In research mode, this message content is list of iterations, updated after each research iteration.
309
- # Caching it should improve research efficiency.
310
- cache_message = messages[-2]
366
+ # Add cache control to the last content block of last message.
367
+ # Caching should improve research efficiency.
368
+ cache_message = messages[-1]
311
369
  if isinstance(cache_message.content, list) and cache_message.content:
312
370
  # Add cache control to the last content block only if it's a text block with non-empty content
313
371
  last_block = cache_message.content[-1]
314
- if (
315
- isinstance(last_block, dict)
316
- and last_block.get("type") == "text"
317
- and last_block.get("text")
318
- and last_block.get("text").strip()
372
+ if isinstance(last_block, dict) and (
373
+ (last_block.get("type") == "text" and last_block.get("text", "").strip())
374
+ or (last_block.get("type") == "tool_result" and last_block.get("content", []))
319
375
  ):
320
376
  last_block["cache_control"] = {"type": "ephemeral"}
321
377
 
@@ -326,74 +382,5 @@ def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: st
326
382
  return formatted_messages, system
327
383
 
328
384
 
329
- def create_anthropic_tool_definition(
330
- response_schema: Type[BaseModel],
331
- tool_name: str = None,
332
- tool_description: Optional[str] = None,
333
- ) -> anthropic.types.ToolParam:
334
- """
335
- Converts a response schema BaseModel class into an Anthropic tool definition dictionary.
336
-
337
- This format is expected by Anthropic's API when defining tools the model can use.
338
-
339
- Args:
340
- response_schema: The Pydantic BaseModel class to convert.
341
- This class defines the response schema for the tool.
342
- tool_name: The name for the Anthropic tool (e.g., "get_weather", "plan_next_step").
343
- tool_description: Optional description for the Anthropic tool.
344
- If None, it attempts to use the Pydantic model's docstring.
345
- If that's also missing, a fallback description is generated.
346
-
347
- Returns:
348
- An tool definition for Anthropic's API.
349
- """
350
- model_schema = response_schema.model_json_schema()
351
-
352
- name = tool_name or response_schema.__name__.lower()
353
- description = tool_description
354
- if description is None:
355
- docstring = response_schema.__doc__
356
- if docstring:
357
- description = dedent(docstring).strip()
358
- else:
359
- # Fallback description if no explicit one or docstring is provided
360
- description = f"Tool named '{name}' accepts specified parameters."
361
-
362
- # Process properties to inline enums and remove $defs dependency
363
- processed_properties = {}
364
- original_properties = model_schema.get("properties", {})
365
- defs = model_schema.get("$defs", {})
366
-
367
- for prop_name, prop_schema in original_properties.items():
368
- current_prop_schema = deepcopy(prop_schema) # Work on a copy
369
- # Check for enums defined directly in the property for simpler direct enum definitions.
370
- if "$ref" in current_prop_schema:
371
- ref_path = current_prop_schema["$ref"]
372
- if ref_path.startswith("#/$defs/"):
373
- def_name = ref_path.split("/")[-1]
374
- if def_name in defs and "enum" in defs[def_name]:
375
- enum_def = defs[def_name]
376
- current_prop_schema["enum"] = enum_def["enum"]
377
- current_prop_schema["type"] = enum_def.get("type", "string")
378
- if "description" not in current_prop_schema and "description" in enum_def:
379
- current_prop_schema["description"] = enum_def["description"]
380
- del current_prop_schema["$ref"] # Remove the $ref as it's been inlined
381
-
382
- processed_properties[prop_name] = current_prop_schema
383
-
384
- # The input_schema for Anthropic tools is a JSON Schema object.
385
- # Pydantic's model_json_schema() provides most of what's needed.
386
- input_schema = {
387
- "type": "object",
388
- "properties": processed_properties,
389
- }
390
-
391
- # Include 'required' fields if specified in the Pydantic model
392
- if "required" in model_schema and model_schema["required"]:
393
- input_schema["required"] = model_schema["required"]
394
-
395
- return anthropic.types.ToolParam(name=name, description=description, input_schema=input_schema)
396
-
397
-
398
385
  def is_reasoning_model(model_name: str) -> bool:
399
386
  return any(model_name.startswith(model) for model in REASONING_MODELS)
@@ -28,6 +28,7 @@ def gemini_send_message_to_model(
28
28
  api_base_url=None,
29
29
  response_type="text",
30
30
  response_schema=None,
31
+ tools=None,
31
32
  model_kwargs=None,
32
33
  deepthought=False,
33
34
  tracer={},
@@ -37,8 +38,10 @@ def gemini_send_message_to_model(
37
38
  """
38
39
  model_kwargs = {}
39
40
 
41
+ if tools:
42
+ model_kwargs["tools"] = tools
40
43
  # Monitor for flakiness in 1.5+ models. This would cause unwanted behavior and terminate response early in 1.5 models.
41
- if response_type == "json_object" and not model.startswith("gemini-1.5"):
44
+ elif response_type == "json_object" and not model.startswith("gemini-1.5"):
42
45
  model_kwargs["response_mime_type"] = "application/json"
43
46
  if response_schema:
44
47
  model_kwargs["response_schema"] = response_schema
@@ -1,9 +1,10 @@
1
+ import json
1
2
  import logging
2
3
  import os
3
4
  import random
4
5
  from copy import deepcopy
5
6
  from time import perf_counter
6
- from typing import AsyncGenerator, AsyncIterator, Dict
7
+ from typing import AsyncGenerator, AsyncIterator, Dict, List
7
8
 
8
9
  import httpx
9
10
  from google import genai
@@ -22,11 +23,13 @@ from tenacity import (
22
23
 
23
24
  from khoj.processor.conversation.utils import (
24
25
  ResponseWithThought,
26
+ ToolCall,
25
27
  commit_conversation_trace,
26
28
  get_image_from_base64,
27
29
  get_image_from_url,
28
30
  )
29
31
  from khoj.utils.helpers import (
32
+ ToolDefinition,
30
33
  get_chat_usage_metrics,
31
34
  get_gemini_client,
32
35
  is_none_or_empty,
@@ -95,26 +98,29 @@ def gemini_completion_with_backoff(
95
98
  temperature=1.2,
96
99
  api_key=None,
97
100
  api_base_url: str = None,
98
- model_kwargs=None,
101
+ model_kwargs={},
99
102
  deepthought=False,
100
103
  tracer={},
101
- ) -> str:
104
+ ) -> ResponseWithThought:
102
105
  client = gemini_clients.get(api_key)
103
106
  if not client:
104
107
  client = get_gemini_client(api_key, api_base_url)
105
108
  gemini_clients[api_key] = client
106
109
 
107
110
  formatted_messages, system_instruction = format_messages_for_gemini(messages, system_prompt)
108
- response_thoughts: str | None = None
111
+ raw_content, response_text, response_thoughts = [], "", None
109
112
 
110
- # format model response schema
113
+ # Configure structured output
114
+ tools = None
111
115
  response_schema = None
112
- if model_kwargs and model_kwargs.get("response_schema"):
116
+ if model_kwargs.get("tools"):
117
+ tools = to_gemini_tools(model_kwargs["tools"])
118
+ elif model_kwargs.get("response_schema"):
113
119
  response_schema = clean_response_schema(model_kwargs["response_schema"])
114
120
 
115
121
  thinking_config = None
116
122
  if deepthought and is_reasoning_model(model_name):
117
- thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI)
123
+ thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI, include_thoughts=True)
118
124
 
119
125
  max_output_tokens = MAX_OUTPUT_TOKENS_FOR_STANDARD_GEMINI
120
126
  if is_reasoning_model(model_name):
@@ -127,8 +133,9 @@ def gemini_completion_with_backoff(
127
133
  thinking_config=thinking_config,
128
134
  max_output_tokens=max_output_tokens,
129
135
  safety_settings=SAFETY_SETTINGS,
130
- response_mime_type=model_kwargs.get("response_mime_type", "text/plain") if model_kwargs else "text/plain",
136
+ response_mime_type=model_kwargs.get("response_mime_type", "text/plain"),
131
137
  response_schema=response_schema,
138
+ tools=tools,
132
139
  seed=seed,
133
140
  top_p=0.95,
134
141
  http_options=gtypes.HttpOptions(client_args={"timeout": httpx.Timeout(30.0, read=60.0)}),
@@ -137,7 +144,25 @@ def gemini_completion_with_backoff(
137
144
  try:
138
145
  # Generate the response
139
146
  response = client.models.generate_content(model=model_name, config=config, contents=formatted_messages)
140
- response_text = response.text
147
+ if (
148
+ not response.candidates
149
+ or not response.candidates[0].content
150
+ or response.candidates[0].content.parts is None
151
+ ):
152
+ raise ValueError(f"Failed to get response from model.")
153
+ raw_content = [part.model_dump() for part in response.candidates[0].content.parts]
154
+ if response.function_calls:
155
+ function_calls = [
156
+ ToolCall(name=function_call.name, args=function_call.args, id=function_call.id).__dict__
157
+ for function_call in response.function_calls
158
+ ]
159
+ response_text = json.dumps(function_calls)
160
+ else:
161
+ # If no function calls, use the text response
162
+ response_text = response.text
163
+ response_thoughts = "\n".join(
164
+ [part.text for part in response.candidates[0].content.parts if part.thought and isinstance(part.text, str)]
165
+ )
141
166
  except gerrors.ClientError as e:
142
167
  response = None
143
168
  response_text, _ = handle_gemini_response(e.args)
@@ -151,8 +176,14 @@ def gemini_completion_with_backoff(
151
176
  input_tokens = response.usage_metadata.prompt_token_count or 0 if response else 0
152
177
  output_tokens = response.usage_metadata.candidates_token_count or 0 if response else 0
153
178
  thought_tokens = response.usage_metadata.thoughts_token_count or 0 if response else 0
179
+ cache_read_tokens = response.usage_metadata.cached_content_token_count or 0 if response else 0
154
180
  tracer["usage"] = get_chat_usage_metrics(
155
- model_name, input_tokens, output_tokens, thought_tokens=thought_tokens, usage=tracer.get("usage")
181
+ model_name,
182
+ input_tokens,
183
+ output_tokens,
184
+ cache_read_tokens=cache_read_tokens,
185
+ thought_tokens=thought_tokens,
186
+ usage=tracer.get("usage"),
156
187
  )
157
188
 
158
189
  # Validate the response. If empty, raise an error to retry.
@@ -166,7 +197,7 @@ def gemini_completion_with_backoff(
166
197
  if is_promptrace_enabled():
167
198
  commit_conversation_trace(messages, response_text, tracer)
168
199
 
169
- return response_text
200
+ return ResponseWithThought(text=response_text, thought=response_thoughts, raw_content=raw_content)
170
201
 
171
202
 
172
203
  @retry(
@@ -234,7 +265,7 @@ async def gemini_chat_completion_with_backoff(
234
265
  # handle safety, rate-limit, other finish reasons
235
266
  stop_message, stopped = handle_gemini_response(chunk.candidates, chunk.prompt_feedback)
236
267
  if stopped:
237
- yield ResponseWithThought(response=stop_message)
268
+ yield ResponseWithThought(text=stop_message)
238
269
  logger.warning(
239
270
  f"LLM Response Prevented for {model_name}: {stop_message}.\n"
240
271
  + f"Last Message by {messages[-1].role}: {messages[-1].content}"
@@ -247,7 +278,7 @@ async def gemini_chat_completion_with_backoff(
247
278
  yield ResponseWithThought(thought=part.text)
248
279
  elif part.text:
249
280
  aggregated_response += part.text
250
- yield ResponseWithThought(response=part.text)
281
+ yield ResponseWithThought(text=part.text)
251
282
  # Calculate cost of chat
252
283
  input_tokens = final_chunk.usage_metadata.prompt_token_count or 0 if final_chunk else 0
253
284
  output_tokens = final_chunk.usage_metadata.candidates_token_count or 0 if final_chunk else 0
@@ -346,8 +377,24 @@ def format_messages_for_gemini(
346
377
  system_prompt = None if is_none_or_empty(system_prompt) else system_prompt
347
378
 
348
379
  for message in messages:
380
+ if message.role == "assistant":
381
+ message.role = "model"
382
+
383
+ # Handle tool call and tool result message types from additional_kwargs
384
+ message_type = message.additional_kwargs.get("message_type")
385
+ if message_type == "tool_call":
386
+ pass
387
+ elif message_type == "tool_result":
388
+ # Convert tool_result to Gemini function response format
389
+ # Need to find the corresponding function call from previous messages
390
+ tool_result_msg_content = []
391
+ for part in message.content:
392
+ tool_result_msg_content.append(
393
+ gtypes.Part.from_function_response(name=part["name"], response={"result": part["content"]})
394
+ )
395
+ message.content = tool_result_msg_content
349
396
  # Convert message content to string list from chatml dictionary list
350
- if isinstance(message.content, list):
397
+ elif isinstance(message.content, list):
351
398
  # Convert image_urls to PIL.Image and place them at beginning of list (better for Gemini)
352
399
  message_content = []
353
400
  for item in sorted(message.content, key=lambda x: 0 if x["type"] == "image_url" else 1):
@@ -367,16 +414,13 @@ def format_messages_for_gemini(
367
414
  messages.remove(message)
368
415
  continue
369
416
  message.content = message_content
370
- elif isinstance(message.content, str):
417
+ elif isinstance(message.content, str) and message.content.strip():
371
418
  message.content = [gtypes.Part.from_text(text=message.content)]
372
419
  else:
373
420
  logger.error(f"Dropping invalid type: {type(message.content)} of message content: {message.content}")
374
421
  messages.remove(message)
375
422
  continue
376
423
 
377
- if message.role == "assistant":
378
- message.role = "model"
379
-
380
424
  if len(messages) == 1:
381
425
  messages[0].role = "user"
382
426
 
@@ -404,3 +448,21 @@ def is_reasoning_model(model_name: str) -> bool:
404
448
  Check if the model is a reasoning model.
405
449
  """
406
450
  return model_name.startswith("gemini-2.5")
451
+
452
+
453
+ def to_gemini_tools(tools: List[ToolDefinition]) -> List[gtypes.ToolDict] | None:
454
+ "Transform tool definitions from standard format to Gemini format."
455
+ gemini_tools = [
456
+ gtypes.ToolDict(
457
+ function_declarations=[
458
+ gtypes.FunctionDeclarationDict(
459
+ name=tool.name,
460
+ description=tool.description,
461
+ parameters=tool.schema,
462
+ )
463
+ for tool in tools
464
+ ]
465
+ )
466
+ ]
467
+
468
+ return gemini_tools or None
@@ -145,12 +145,12 @@ async def converse_offline(
145
145
  aggregated_response += response_delta
146
146
  # Put chunk into the asyncio queue (non-blocking)
147
147
  try:
148
- queue.put_nowait(ResponseWithThought(response=response_delta))
148
+ queue.put_nowait(ResponseWithThought(text=response_delta))
149
149
  except asyncio.QueueFull:
150
150
  # Should not happen with default queue size unless consumer is very slow
151
151
  logger.warning("Asyncio queue full during offline LLM streaming.")
152
152
  # Potentially block here or handle differently if needed
153
- asyncio.run(queue.put(ResponseWithThought(response=response_delta)))
153
+ asyncio.run(queue.put(ResponseWithThought(text=response_delta)))
154
154
 
155
155
  # Log the time taken to stream the entire response
156
156
  logger.info(f"Chat streaming took: {perf_counter() - start_time:.3f} seconds")
@@ -221,4 +221,4 @@ def send_message_to_model_offline(
221
221
  if is_promptrace_enabled():
222
222
  commit_conversation_trace(messages, response_text, tracer)
223
223
 
224
- return response_text
224
+ return ResponseWithThought(text=response_text)
@@ -1,25 +1,24 @@
1
1
  import logging
2
2
  from datetime import datetime
3
- from typing import AsyncGenerator, Dict, List, Optional
4
-
5
- from openai.lib._pydantic import _ensure_strict_json_schema
6
- from pydantic import BaseModel
3
+ from typing import Any, AsyncGenerator, Dict, List, Optional
7
4
 
8
5
  from khoj.database.models import Agent, ChatMessageModel, ChatModel
9
6
  from khoj.processor.conversation import prompts
10
7
  from khoj.processor.conversation.openai.utils import (
11
8
  chat_completion_with_backoff,
9
+ clean_response_schema,
12
10
  completion_with_backoff,
13
- get_openai_api_json_support,
11
+ get_structured_output_support,
12
+ to_openai_tools,
14
13
  )
15
14
  from khoj.processor.conversation.utils import (
16
- JsonSupport,
17
15
  OperatorRun,
18
16
  ResponseWithThought,
17
+ StructuredOutputSupport,
19
18
  generate_chatml_messages_with_context,
20
19
  messages_to_print,
21
20
  )
22
- from khoj.utils.helpers import is_none_or_empty, truncate_code_context
21
+ from khoj.utils.helpers import ToolDefinition, is_none_or_empty, truncate_code_context
23
22
  from khoj.utils.rawconfig import FileAttachment, LocationData
24
23
  from khoj.utils.yaml import yaml_dump
25
24
 
@@ -32,6 +31,7 @@ def send_message_to_model(
32
31
  model,
33
32
  response_type="text",
34
33
  response_schema=None,
34
+ tools: list[ToolDefinition] = None,
35
35
  deepthought=False,
36
36
  api_base_url=None,
37
37
  tracer: dict = {},
@@ -40,9 +40,11 @@ def send_message_to_model(
40
40
  Send message to model
41
41
  """
42
42
 
43
- model_kwargs = {}
44
- json_support = get_openai_api_json_support(model, api_base_url)
45
- if response_schema and json_support == JsonSupport.SCHEMA:
43
+ model_kwargs: Dict[str, Any] = {}
44
+ json_support = get_structured_output_support(model, api_base_url)
45
+ if tools and json_support == StructuredOutputSupport.TOOL:
46
+ model_kwargs["tools"] = to_openai_tools(tools)
47
+ elif response_schema and json_support >= StructuredOutputSupport.SCHEMA:
46
48
  # Drop unsupported fields from schema passed to OpenAI APi
47
49
  cleaned_response_schema = clean_response_schema(response_schema)
48
50
  model_kwargs["response_format"] = {
@@ -53,7 +55,7 @@ def send_message_to_model(
53
55
  "strict": True,
54
56
  },
55
57
  }
56
- elif response_type == "json_object" and json_support == JsonSupport.OBJECT:
58
+ elif response_type == "json_object" and json_support == StructuredOutputSupport.OBJECT:
57
59
  model_kwargs["response_format"] = {"type": response_type}
58
60
 
59
61
  # Get Response from GPT
@@ -171,30 +173,3 @@ async def converse_openai(
171
173
  tracer=tracer,
172
174
  ):
173
175
  yield chunk
174
-
175
-
176
- def clean_response_schema(schema: BaseModel | dict) -> dict:
177
- """
178
- Format response schema to be compatible with OpenAI API.
179
-
180
- Clean the response schema by removing unsupported fields.
181
- """
182
- # Normalize schema to OpenAI compatible JSON schema format
183
- schema_json = schema if isinstance(schema, dict) else schema.model_json_schema()
184
- schema_json = _ensure_strict_json_schema(schema_json, path=(), root=schema_json)
185
-
186
- # Recursively drop unsupported fields from schema passed to OpenAI API
187
- # See https://platform.openai.com/docs/guides/structured-outputs#supported-schemas
188
- fields_to_exclude = ["minItems", "maxItems"]
189
- if isinstance(schema_json, dict) and isinstance(schema_json.get("properties"), dict):
190
- for _, prop_value in schema_json["properties"].items():
191
- if isinstance(prop_value, dict):
192
- # Remove specified fields from direct properties
193
- for field in fields_to_exclude:
194
- prop_value.pop(field, None)
195
- # Recursively remove specified fields from child properties
196
- if "items" in prop_value and isinstance(prop_value["items"], dict):
197
- clean_response_schema(prop_value["items"])
198
-
199
- # Return cleaned schema
200
- return schema_json