PyPI - khoj - Versions diffs - 1.42.8.dev4__py3-none-any.whl → 1.42.9.dev16__py3-none-any.whl - Mend

khoj 1.42.8.dev4py3-none-any.whl → 1.42.9.dev16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

khoj/processor/conversation/anthropic/utils.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import json
 import logging
 from copy import deepcopy
-from textwrap import dedent
 from time import perf_counter
-from typing import AsyncGenerator, Dict, List, Optional, Type
+from typing import AsyncGenerator, Dict, List
 import anthropic
 from langchain_core.messages.chat import ChatMessage
@@ -18,11 +17,14 @@ from tenacity import (
 from khoj.processor.conversation.utils import (
     ResponseWithThought,
+    ToolCall,
     commit_conversation_trace,
     get_image_from_base64,
     get_image_from_url,
 )
 from khoj.utils.helpers import (
+    ToolDefinition,
+    create_tool_definition,
     get_anthropic_async_client,
     get_anthropic_client,
     get_chat_usage_metrics,
@@ -57,9 +59,10 @@ def anthropic_completion_with_backoff(
     max_tokens: int | None = None,
     response_type: str = "text",
     response_schema: BaseModel | None = None,
+    tools: List[ToolDefinition] = None,
     deepthought: bool = False,
     tracer: dict = {},
-) -> str:
+) -> ResponseWithThought:
     client = anthropic_clients.get(api_key)
     if not client:
         client = get_anthropic_client(api_key, api_base_url)
@@ -67,12 +70,26 @@ def anthropic_completion_with_backoff(
     formatted_messages, system = format_messages_for_anthropic(messages, system_prompt)
+    thoughts = ""
     aggregated_response = ""
     final_message = None
     model_kwargs = model_kwargs or dict()
-    if response_schema:
-        tool = create_anthropic_tool_definition(response_schema=response_schema)
-        model_kwargs["tools"] = [tool]
+    # Configure structured output
+    if tools:
+        # Convert tools to Anthropic format
+        model_kwargs["tools"] = [
+            anthropic.types.ToolParam(name=tool.name, description=tool.description, input_schema=tool.schema)
+            for tool in tools
+        ]
+        # Cache tool definitions
+        last_tool = model_kwargs["tools"][-1]
+        last_tool["cache_control"] = {"type": "ephemeral"}
+    elif response_schema:
+        tool = create_tool_definition(response_schema)
+        model_kwargs["tools"] = [
+            anthropic.types.ToolParam(name=tool.name, description=tool.description, input_schema=tool.schema)
+        ]
     elif response_type == "json_object" and not (is_reasoning_model(model_name) and deepthought):
         # Prefill model response with '{' to make it output a valid JSON object. Not supported with extended thinking.
         formatted_messages.append(anthropic.types.MessageParam(role="assistant", content="{"))
@@ -96,15 +113,41 @@ def anthropic_completion_with_backoff(
         max_tokens=max_tokens,
         **(model_kwargs),
     ) as stream:
-        for text in stream.text_stream:
-            aggregated_response += text
+        for chunk in stream:
+            if chunk.type != "content_block_delta":
+                continue
+            if chunk.delta.type == "thinking_delta":
+                thoughts += chunk.delta.thinking
+            elif chunk.delta.type == "text_delta":
+                aggregated_response += chunk.delta.text
         final_message = stream.get_final_message()
-    # Extract first tool call from final message
-    for item in final_message.content:
-        if item.type == "tool_use":
-            aggregated_response = json.dumps(item.input)
-            break
+    # Track raw content of model response to reuse for cache hits in multi-turn chats
+    raw_content = [item.model_dump() for item in final_message.content]
+    # Extract all tool calls if tools are enabled
+    if tools:
+        tool_calls = [
+            ToolCall(name=item.name, args=item.input, id=item.id).__dict__
+            for item in final_message.content
+            if item.type == "tool_use"
+        ]
+        if tool_calls:
+            # If there are tool calls, aggregate thoughts and responses into thoughts
+            if thoughts and aggregated_response:
+                # wrap each line of thought in italics
+                thoughts = "\n".join([f"*{line.strip()}*" for line in thoughts.splitlines() if line.strip()])
+                thoughts = f"{thoughts}\n\n{aggregated_response}"
+            else:
+                thoughts = thoughts or aggregated_response
+            # Json dump tool calls into aggregated response
+            aggregated_response = json.dumps(tool_calls)
+    # If response schema is used, return the first tool call's input
+    elif response_schema:
+        for item in final_message.content:
+            if item.type == "tool_use":
+                aggregated_response = json.dumps(item.input)
+                break
     # Calculate cost of chat
     input_tokens = final_message.usage.input_tokens
@@ -126,7 +169,7 @@ def anthropic_completion_with_backoff(
     if is_promptrace_enabled():
         commit_conversation_trace(messages, aggregated_response, tracer)
-    return aggregated_response
+    return ResponseWithThought(text=aggregated_response, thought=thoughts, raw_content=raw_content)
 @retry(
@@ -183,10 +226,10 @@ async def anthropic_chat_completion_with_backoff(
             if chunk.type == "message_delta":
                 if chunk.delta.stop_reason == "refusal":
                     yield ResponseWithThought(
-                        response="...I'm sorry, but my safety filters prevent me from assisting with this query."
+                        text="...I'm sorry, but my safety filters prevent me from assisting with this query."
                     )
                 elif chunk.delta.stop_reason == "max_tokens":
-                    yield ResponseWithThought(response="...I'm sorry, but I've hit my response length limit.")
+                    yield ResponseWithThought(text="...I'm sorry, but I've hit my response length limit.")
                 if chunk.delta.stop_reason in ["refusal", "max_tokens"]:
                     logger.warning(
                         f"LLM Response Prevented for {model_name}: {chunk.delta.stop_reason}.\n"
@@ -199,7 +242,7 @@ async def anthropic_chat_completion_with_backoff(
             # Handle streamed response chunk
             response_chunk: ResponseWithThought = None
             if chunk.delta.type == "text_delta":
-                response_chunk = ResponseWithThought(response=chunk.delta.text)
+                response_chunk = ResponseWithThought(text=chunk.delta.text)
                 aggregated_response += chunk.delta.text
             if chunk.delta.type == "thinking_delta":
                 response_chunk = ResponseWithThought(thought=chunk.delta.thinking)
@@ -232,13 +275,14 @@ async def anthropic_chat_completion_with_backoff(
         commit_conversation_trace(messages, aggregated_response, tracer)
-def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: str = None):
+def format_messages_for_anthropic(raw_messages: list[ChatMessage], system_prompt: str = None):
     """
     Format messages for Anthropic
     """
     # Extract system prompt
     system_prompt = system_prompt or ""
-    for message in messages.copy():
+    messages = deepcopy(raw_messages)
+    for message in messages:
         if message.role == "system":
             if isinstance(message.content, list):
                 system_prompt += "\n".join([part["text"] for part in message.content if part["type"] == "text"])
@@ -250,15 +294,30 @@ def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: st
     else:
         system = None
-    # Anthropic requires the first message to be a 'user' message
-    if len(messages) == 1:
+    # Anthropic requires the first message to be a user message unless its a tool call
+    message_type = messages[0].additional_kwargs.get("message_type", None)
+    if len(messages) == 1 and message_type != "tool_call":
         messages[0].role = "user"
-    elif len(messages) > 1 and messages[0].role == "assistant":
-        messages = messages[1:]
-    # Convert image urls to base64 encoded images in Anthropic message format
     for message in messages:
-        if isinstance(message.content, list):
+        # Handle tool call and tool result message types from additional_kwargs
+        message_type = message.additional_kwargs.get("message_type")
+        if message_type == "tool_call":
+            pass
+        elif message_type == "tool_result":
+            # Convert tool_result to Anthropic tool_result format
+            content = []
+            for part in message.content:
+                content.append(
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": part["id"],
+                        "content": part["content"],
+                    }
+                )
+            message.content = content
+        # Convert image urls to base64 encoded images in Anthropic message format
+        elif isinstance(message.content, list):
             content = []
             # Sort the content. Anthropic models prefer that text comes after images.
             message.content.sort(key=lambda x: 0 if x["type"] == "image_url" else 1)
@@ -304,18 +363,15 @@ def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: st
                     if isinstance(block, dict) and "cache_control" in block:
                         del block["cache_control"]
-        # Add cache control to the last content block of second to last message.
-        # In research mode, this message content is list of iterations, updated after each research iteration.
-        # Caching it should improve research efficiency.
-        cache_message = messages[-2]
+        # Add cache control to the last content block of last message.
+        # Caching should improve research efficiency.
+        cache_message = messages[-1]
         if isinstance(cache_message.content, list) and cache_message.content:
             # Add cache control to the last content block only if it's a text block with non-empty content
             last_block = cache_message.content[-1]
-            if (
-                isinstance(last_block, dict)
-                and last_block.get("type") == "text"
-                and last_block.get("text")
-                and last_block.get("text").strip()
+            if isinstance(last_block, dict) and (
+                (last_block.get("type") == "text" and last_block.get("text", "").strip())
+                or (last_block.get("type") == "tool_result" and last_block.get("content", []))
             ):
                 last_block["cache_control"] = {"type": "ephemeral"}
@@ -326,74 +382,5 @@ def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: st
     return formatted_messages, system
-def create_anthropic_tool_definition(
-    response_schema: Type[BaseModel],
-    tool_name: str = None,
-    tool_description: Optional[str] = None,
-) -> anthropic.types.ToolParam:
-    """
-    Converts a response schema BaseModel class into an Anthropic tool definition dictionary.
-    This format is expected by Anthropic's API when defining tools the model can use.
-    Args:
-        response_schema: The Pydantic BaseModel class to convert.
-                         This class defines the response schema for the tool.
-        tool_name: The name for the Anthropic tool (e.g., "get_weather", "plan_next_step").
-        tool_description: Optional description for the Anthropic tool.
-                           If None, it attempts to use the Pydantic model's docstring.
-                           If that's also missing, a fallback description is generated.
-    Returns:
-        An tool definition for Anthropic's API.
-    """
-    model_schema = response_schema.model_json_schema()
-    name = tool_name or response_schema.__name__.lower()
-    description = tool_description
-    if description is None:
-        docstring = response_schema.__doc__
-        if docstring:
-            description = dedent(docstring).strip()
-        else:
-            # Fallback description if no explicit one or docstring is provided
-            description = f"Tool named '{name}' accepts specified parameters."
-    # Process properties to inline enums and remove $defs dependency
-    processed_properties = {}
-    original_properties = model_schema.get("properties", {})
-    defs = model_schema.get("$defs", {})
-    for prop_name, prop_schema in original_properties.items():
-        current_prop_schema = deepcopy(prop_schema)  # Work on a copy
-        # Check for enums defined directly in the property for simpler direct enum definitions.
-        if "$ref" in current_prop_schema:
-            ref_path = current_prop_schema["$ref"]
-            if ref_path.startswith("#/$defs/"):
-                def_name = ref_path.split("/")[-1]
-                if def_name in defs and "enum" in defs[def_name]:
-                    enum_def = defs[def_name]
-                    current_prop_schema["enum"] = enum_def["enum"]
-                    current_prop_schema["type"] = enum_def.get("type", "string")
-                    if "description" not in current_prop_schema and "description" in enum_def:
-                        current_prop_schema["description"] = enum_def["description"]
-                    del current_prop_schema["$ref"]  # Remove the $ref as it's been inlined
-        processed_properties[prop_name] = current_prop_schema
-    # The input_schema for Anthropic tools is a JSON Schema object.
-    # Pydantic's model_json_schema() provides most of what's needed.
-    input_schema = {
-        "type": "object",
-        "properties": processed_properties,
-    }
-    # Include 'required' fields if specified in the Pydantic model
-    if "required" in model_schema and model_schema["required"]:
-        input_schema["required"] = model_schema["required"]
-    return anthropic.types.ToolParam(name=name, description=description, input_schema=input_schema)
 def is_reasoning_model(model_name: str) -> bool:
     return any(model_name.startswith(model) for model in REASONING_MODELS)

khoj/processor/conversation/google/gemini_chat.py CHANGED Viewed

@@ -28,6 +28,7 @@ def gemini_send_message_to_model(
     api_base_url=None,
     response_type="text",
     response_schema=None,
+    tools=None,
     model_kwargs=None,
     deepthought=False,
     tracer={},
@@ -37,8 +38,10 @@ def gemini_send_message_to_model(
     """
     model_kwargs = {}
+    if tools:
+        model_kwargs["tools"] = tools
     # Monitor for flakiness in 1.5+ models. This would cause unwanted behavior and terminate response early in 1.5 models.
-    if response_type == "json_object" and not model.startswith("gemini-1.5"):
+    elif response_type == "json_object" and not model.startswith("gemini-1.5"):
         model_kwargs["response_mime_type"] = "application/json"
         if response_schema:
             model_kwargs["response_schema"] = response_schema

khoj/processor/conversation/google/utils.py CHANGED Viewed

@@ -1,9 +1,10 @@
+import json
 import logging
 import os
 import random
 from copy import deepcopy
 from time import perf_counter
-from typing import AsyncGenerator, AsyncIterator, Dict
+from typing import AsyncGenerator, AsyncIterator, Dict, List
 import httpx
 from google import genai
@@ -22,11 +23,13 @@ from tenacity import (
 from khoj.processor.conversation.utils import (
     ResponseWithThought,
+    ToolCall,
     commit_conversation_trace,
     get_image_from_base64,
     get_image_from_url,
 )
 from khoj.utils.helpers import (
+    ToolDefinition,
     get_chat_usage_metrics,
     get_gemini_client,
     is_none_or_empty,
@@ -92,29 +95,32 @@ def gemini_completion_with_backoff(
     messages: list[ChatMessage],
     system_prompt: str,
     model_name: str,
-    temperature=1.0,
+    temperature=1.2,
     api_key=None,
     api_base_url: str = None,
-    model_kwargs=None,
+    model_kwargs={},
     deepthought=False,
     tracer={},
-) -> str:
+) -> ResponseWithThought:
     client = gemini_clients.get(api_key)
     if not client:
         client = get_gemini_client(api_key, api_base_url)
         gemini_clients[api_key] = client
     formatted_messages, system_instruction = format_messages_for_gemini(messages, system_prompt)
-    response_thoughts: str | None = None
+    raw_content, response_text, response_thoughts = [], "", None
-    # format model response schema
+    # Configure structured output
+    tools = None
     response_schema = None
-    if model_kwargs and model_kwargs.get("response_schema"):
+    if model_kwargs.get("tools"):
+        tools = to_gemini_tools(model_kwargs["tools"])
+    elif model_kwargs.get("response_schema"):
         response_schema = clean_response_schema(model_kwargs["response_schema"])
     thinking_config = None
     if deepthought and is_reasoning_model(model_name):
-        thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI)
+        thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI, include_thoughts=True)
     max_output_tokens = MAX_OUTPUT_TOKENS_FOR_STANDARD_GEMINI
     if is_reasoning_model(model_name):
@@ -127,16 +133,36 @@ def gemini_completion_with_backoff(
         thinking_config=thinking_config,
         max_output_tokens=max_output_tokens,
         safety_settings=SAFETY_SETTINGS,
-        response_mime_type=model_kwargs.get("response_mime_type", "text/plain") if model_kwargs else "text/plain",
+        response_mime_type=model_kwargs.get("response_mime_type", "text/plain"),
         response_schema=response_schema,
+        tools=tools,
         seed=seed,
+        top_p=0.95,
         http_options=gtypes.HttpOptions(client_args={"timeout": httpx.Timeout(30.0, read=60.0)}),
     )
     try:
         # Generate the response
         response = client.models.generate_content(model=model_name, config=config, contents=formatted_messages)
-        response_text = response.text
+        if (
+            not response.candidates
+            or not response.candidates[0].content
+            or response.candidates[0].content.parts is None
+        ):
+            raise ValueError(f"Failed to get response from model.")
+        raw_content = [part.model_dump() for part in response.candidates[0].content.parts]
+        if response.function_calls:
+            function_calls = [
+                ToolCall(name=function_call.name, args=function_call.args, id=function_call.id).__dict__
+                for function_call in response.function_calls
+            ]
+            response_text = json.dumps(function_calls)
+        else:
+            # If no function calls, use the text response
+            response_text = response.text
+        response_thoughts = "\n".join(
+            [part.text for part in response.candidates[0].content.parts if part.thought and isinstance(part.text, str)]
+        )
     except gerrors.ClientError as e:
         response = None
         response_text, _ = handle_gemini_response(e.args)
@@ -150,8 +176,14 @@ def gemini_completion_with_backoff(
     input_tokens = response.usage_metadata.prompt_token_count or 0 if response else 0
     output_tokens = response.usage_metadata.candidates_token_count or 0 if response else 0
     thought_tokens = response.usage_metadata.thoughts_token_count or 0 if response else 0
+    cache_read_tokens = response.usage_metadata.cached_content_token_count or 0 if response else 0
     tracer["usage"] = get_chat_usage_metrics(
-        model_name, input_tokens, output_tokens, thought_tokens=thought_tokens, usage=tracer.get("usage")
+        model_name,
+        input_tokens,
+        output_tokens,
+        cache_read_tokens=cache_read_tokens,
+        thought_tokens=thought_tokens,
+        usage=tracer.get("usage"),
     )
     # Validate the response. If empty, raise an error to retry.
@@ -165,7 +197,7 @@ def gemini_completion_with_backoff(
     if is_promptrace_enabled():
         commit_conversation_trace(messages, response_text, tracer)
-    return response_text
+    return ResponseWithThought(text=response_text, thought=response_thoughts, raw_content=raw_content)
 @retry(
@@ -201,10 +233,12 @@ async def gemini_chat_completion_with_backoff(
     if is_reasoning_model(model_name):
         max_output_tokens = MAX_OUTPUT_TOKENS_FOR_REASONING_GEMINI
+    top_p = 0.95
     seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
     config = gtypes.GenerateContentConfig(
         system_instruction=system_instruction,
         temperature=temperature,
+        top_p=top_p,
         thinking_config=thinking_config,
         max_output_tokens=max_output_tokens,
         stop_sequences=["Notes:\n["],
@@ -231,7 +265,7 @@ async def gemini_chat_completion_with_backoff(
         # handle safety, rate-limit, other finish reasons
         stop_message, stopped = handle_gemini_response(chunk.candidates, chunk.prompt_feedback)
         if stopped:
-            yield ResponseWithThought(response=stop_message)
+            yield ResponseWithThought(text=stop_message)
             logger.warning(
                 f"LLM Response Prevented for {model_name}: {stop_message}.\n"
                 + f"Last Message by {messages[-1].role}: {messages[-1].content}"
@@ -244,7 +278,7 @@ async def gemini_chat_completion_with_backoff(
                 yield ResponseWithThought(thought=part.text)
             elif part.text:
                 aggregated_response += part.text
-                yield ResponseWithThought(response=part.text)
+                yield ResponseWithThought(text=part.text)
     # Calculate cost of chat
     input_tokens = final_chunk.usage_metadata.prompt_token_count or 0 if final_chunk else 0
     output_tokens = final_chunk.usage_metadata.candidates_token_count or 0 if final_chunk else 0
@@ -343,8 +377,24 @@ def format_messages_for_gemini(
     system_prompt = None if is_none_or_empty(system_prompt) else system_prompt
     for message in messages:
+        if message.role == "assistant":
+            message.role = "model"
+        # Handle tool call and tool result message types from additional_kwargs
+        message_type = message.additional_kwargs.get("message_type")
+        if message_type == "tool_call":
+            pass
+        elif message_type == "tool_result":
+            # Convert tool_result to Gemini function response format
+            # Need to find the corresponding function call from previous messages
+            tool_result_msg_content = []
+            for part in message.content:
+                tool_result_msg_content.append(
+                    gtypes.Part.from_function_response(name=part["name"], response={"result": part["content"]})
+                )
+            message.content = tool_result_msg_content
         # Convert message content to string list from chatml dictionary list
-        if isinstance(message.content, list):
+        elif isinstance(message.content, list):
             # Convert image_urls to PIL.Image and place them at beginning of list (better for Gemini)
             message_content = []
             for item in sorted(message.content, key=lambda x: 0 if x["type"] == "image_url" else 1):
@@ -364,16 +414,13 @@ def format_messages_for_gemini(
                 messages.remove(message)
                 continue
             message.content = message_content
-        elif isinstance(message.content, str):
+        elif isinstance(message.content, str) and message.content.strip():
             message.content = [gtypes.Part.from_text(text=message.content)]
         else:
             logger.error(f"Dropping invalid type: {type(message.content)} of message content: {message.content}")
             messages.remove(message)
             continue
-        if message.role == "assistant":
-            message.role = "model"
     if len(messages) == 1:
         messages[0].role = "user"
@@ -401,3 +448,21 @@ def is_reasoning_model(model_name: str) -> bool:
     Check if the model is a reasoning model.
     """
     return model_name.startswith("gemini-2.5")
+def to_gemini_tools(tools: List[ToolDefinition]) -> List[gtypes.ToolDict] | None:
+    "Transform tool definitions from standard format to Gemini format."
+    gemini_tools = [
+        gtypes.ToolDict(
+            function_declarations=[
+                gtypes.FunctionDeclarationDict(
+                    name=tool.name,
+                    description=tool.description,
+                    parameters=tool.schema,
+                )
+                for tool in tools
+            ]
+        )
+    ]
+    return gemini_tools or None

khoj/processor/conversation/offline/chat_model.py CHANGED Viewed

@@ -145,12 +145,12 @@ async def converse_offline(
                 aggregated_response += response_delta
                 # Put chunk into the asyncio queue (non-blocking)
                 try:
-                    queue.put_nowait(ResponseWithThought(response=response_delta))
+                    queue.put_nowait(ResponseWithThought(text=response_delta))
                 except asyncio.QueueFull:
                     # Should not happen with default queue size unless consumer is very slow
                     logger.warning("Asyncio queue full during offline LLM streaming.")
                     # Potentially block here or handle differently if needed
-                    asyncio.run(queue.put(ResponseWithThought(response=response_delta)))
+                    asyncio.run(queue.put(ResponseWithThought(text=response_delta)))
             # Log the time taken to stream the entire response
             logger.info(f"Chat streaming took: {perf_counter() - start_time:.3f} seconds")
@@ -221,4 +221,4 @@ def send_message_to_model_offline(
     if is_promptrace_enabled():
         commit_conversation_trace(messages, response_text, tracer)
-    return response_text
+    return ResponseWithThought(text=response_text)

khoj/processor/conversation/openai/gpt.py CHANGED Viewed

@@ -1,25 +1,24 @@
 import logging
 from datetime import datetime
-from typing import AsyncGenerator, Dict, List, Optional
-from openai.lib._pydantic import _ensure_strict_json_schema
-from pydantic import BaseModel
+from typing import Any, AsyncGenerator, Dict, List, Optional
 from khoj.database.models import Agent, ChatMessageModel, ChatModel
 from khoj.processor.conversation import prompts
 from khoj.processor.conversation.openai.utils import (
     chat_completion_with_backoff,
+    clean_response_schema,
     completion_with_backoff,
-    get_openai_api_json_support,
+    get_structured_output_support,
+    to_openai_tools,
 )
 from khoj.processor.conversation.utils import (
-    JsonSupport,
     OperatorRun,
     ResponseWithThought,
+    StructuredOutputSupport,
     generate_chatml_messages_with_context,
     messages_to_print,
 )
-from khoj.utils.helpers import is_none_or_empty, truncate_code_context
+from khoj.utils.helpers import ToolDefinition, is_none_or_empty, truncate_code_context
 from khoj.utils.rawconfig import FileAttachment, LocationData
 from khoj.utils.yaml import yaml_dump
@@ -32,6 +31,7 @@ def send_message_to_model(
     model,
     response_type="text",
     response_schema=None,
+    tools: list[ToolDefinition] = None,
     deepthought=False,
     api_base_url=None,
     tracer: dict = {},
@@ -40,9 +40,11 @@ def send_message_to_model(
     Send message to model
     """
-    model_kwargs = {}
-    json_support = get_openai_api_json_support(model, api_base_url)
-    if response_schema and json_support == JsonSupport.SCHEMA:
+    model_kwargs: Dict[str, Any] = {}
+    json_support = get_structured_output_support(model, api_base_url)
+    if tools and json_support == StructuredOutputSupport.TOOL:
+        model_kwargs["tools"] = to_openai_tools(tools)
+    elif response_schema and json_support >= StructuredOutputSupport.SCHEMA:
         # Drop unsupported fields from schema passed to OpenAI APi
         cleaned_response_schema = clean_response_schema(response_schema)
         model_kwargs["response_format"] = {
@@ -53,7 +55,7 @@ def send_message_to_model(
                 "strict": True,
             },
         }
-    elif response_type == "json_object" and json_support == JsonSupport.OBJECT:
+    elif response_type == "json_object" and json_support == StructuredOutputSupport.OBJECT:
         model_kwargs["response_format"] = {"type": response_type}
     # Get Response from GPT
@@ -171,30 +173,3 @@ async def converse_openai(
         tracer=tracer,
     ):
         yield chunk
-def clean_response_schema(schema: BaseModel | dict) -> dict:
-    """
-    Format response schema to be compatible with OpenAI API.
-    Clean the response schema by removing unsupported fields.
-    """
-    # Normalize schema to OpenAI compatible JSON schema format
-    schema_json = schema if isinstance(schema, dict) else schema.model_json_schema()
-    schema_json = _ensure_strict_json_schema(schema_json, path=(), root=schema_json)
-    # Recursively drop unsupported fields from schema passed to OpenAI API
-    # See https://platform.openai.com/docs/guides/structured-outputs#supported-schemas
-    fields_to_exclude = ["minItems", "maxItems"]
-    if isinstance(schema_json, dict) and isinstance(schema_json.get("properties"), dict):
-        for _, prop_value in schema_json["properties"].items():
-            if isinstance(prop_value, dict):
-                # Remove specified fields from direct properties
-                for field in fields_to_exclude:
-                    prop_value.pop(field, None)
-            # Recursively remove specified fields from child properties
-            if "items" in prop_value and isinstance(prop_value["items"], dict):
-                clean_response_schema(prop_value["items"])
-    # Return cleaned schema
-    return schema_json

khoj 1.42.8.dev4__py3-none-any.whl → 1.42.9.dev16__py3-none-any.whl

khoj 1.42.8.dev4py3-none-any.whl → 1.42.9.dev16py3-none-any.whl