PyPI - khoj - Versions diffs - 1.41.1.dev90__py3-none-any.whl → 1.41.1.dev107__py3-none-any.whl - Mend

khoj 1.41.1.dev90py3-none-any.whl → 1.41.1.dev107py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

khoj/processor/conversation/utils.py CHANGED Viewed

@@ -73,6 +73,10 @@ model_to_prompt_size = {
     "claude-3-7-sonnet-20250219": 60000,
     "claude-3-7-sonnet-latest": 60000,
     "claude-3-5-haiku-20241022": 60000,
+    "claude-sonnet-4": 60000,
+    "claude-sonnet-4-20250514": 60000,
+    "claude-opus-4": 60000,
+    "claude-opus-4-20250514": 60000,
     # Offline Models
     "bartowski/Qwen2.5-14B-Instruct-GGUF": 20000,
     "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
@@ -106,9 +110,12 @@ class InformationCollectionIteration:
 def construct_iteration_history(
-    query: str, previous_iterations: List[InformationCollectionIteration], previous_iteration_prompt: str
+    previous_iterations: List[InformationCollectionIteration],
+    previous_iteration_prompt: str,
+    query: str = None,
 ) -> list[dict]:
-    previous_iterations_history = []
+    iteration_history: list[dict] = []
+    previous_iteration_messages: list[dict] = []
     for idx, iteration in enumerate(previous_iterations):
         iteration_data = previous_iteration_prompt.format(
             tool=iteration.tool,
@@ -117,23 +124,19 @@ def construct_iteration_history(
             index=idx + 1,
         )
-        previous_iterations_history.append(iteration_data)
+        previous_iteration_messages.append({"type": "text", "text": iteration_data})
-    return (
-        [
-            {
-                "by": "you",
-                "message": query,
-            },
+    if previous_iteration_messages:
+        if query:
+            iteration_history.append({"by": "you", "message": query})
+        iteration_history.append(
             {
                 "by": "khoj",
                 "intent": {"type": "remember", "query": query},
-                "message": previous_iterations_history,
-            },
-        ]
-        if previous_iterations_history
-        else []
-    )
+                "message": previous_iteration_messages,
+            }
+        )
+    return iteration_history
 def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="AI") -> str:
@@ -281,6 +284,7 @@ async def save_to_conversation_log(
     generated_images: List[str] = [],
     raw_generated_files: List[FileAttachment] = [],
     generated_mermaidjs_diagram: str = None,
+    research_results: Optional[List[InformationCollectionIteration]] = None,
     train_of_thought: List[Any] = [],
     tracer: Dict[str, Any] = {},
 ):
@@ -298,6 +302,7 @@ async def save_to_conversation_log(
         "onlineContext": online_results,
         "codeContext": code_results,
         "operatorContext": operator_results,
+        "researchContext": [vars(r) for r in research_results] if research_results and not chat_response else None,
         "automationId": automation_id,
         "trainOfThought": train_of_thought,
         "turnId": turn_id,
@@ -337,7 +342,7 @@ Khoj: "{chat_response}"
 def construct_structured_message(
-    message: list[str] | str,
+    message: list[dict] | str,
     images: list[str],
     model_type: str,
     vision_enabled: bool,
@@ -351,11 +356,9 @@ def construct_structured_message(
         ChatModel.ModelType.GOOGLE,
         ChatModel.ModelType.ANTHROPIC,
     ]:
-        message = [message] if isinstance(message, str) else message
-        constructed_messages: List[dict[str, Any]] = [
-            {"type": "text", "text": message_part} for message_part in message
-        ]
+        constructed_messages: List[dict[str, Any]] = (
+            [{"type": "text", "text": message}] if isinstance(message, str) else message
+        )
         if not is_none_or_empty(attached_file_context):
             constructed_messages.append({"type": "text", "text": attached_file_context})
@@ -364,6 +367,7 @@ def construct_structured_message(
                 constructed_messages.append({"type": "image_url", "image_url": {"url": image}})
         return constructed_messages
+    message = message if isinstance(message, str) else "\n\n".join(m["text"] for m in message)
     if not is_none_or_empty(attached_file_context):
         return f"{attached_file_context}\n\n{message}"
@@ -387,7 +391,7 @@ def gather_raw_query_files(
 def generate_chatml_messages_with_context(
-    user_message,
+    user_message: str,
     system_message: str = None,
     conversation_log={},
     model_name="gpt-4o-mini",
@@ -417,7 +421,7 @@ def generate_chatml_messages_with_context(
     # Extract Chat History for Context
     chatml_messages: List[ChatMessage] = []
     for chat in conversation_log.get("chat", []):
-        message_context = ""
+        message_context = []
         message_attached_files = ""
         generated_assets = {}
@@ -429,16 +433,6 @@ def generate_chatml_messages_with_context(
         if chat["by"] == "khoj" and "excalidraw" in chat["intent"].get("type", ""):
             chat_message = chat["intent"].get("inferred-queries")[0]
-        if not is_none_or_empty(chat.get("context")):
-            references = "\n\n".join(
-                {
-                    f"# File: {item['file']}\n## {item['compiled']}\n"
-                    for item in chat.get("context") or []
-                    if isinstance(item, dict)
-                }
-            )
-            message_context += f"{prompts.notes_conversation.format(references=references)}\n\n"
         if chat.get("queryFiles"):
             raw_query_files = chat.get("queryFiles")
             query_files_dict = dict()
@@ -449,15 +443,38 @@ def generate_chatml_messages_with_context(
             chatml_messages.append(ChatMessage(content=message_attached_files, role=role))
         if not is_none_or_empty(chat.get("onlineContext")):
-            message_context += f"{prompts.online_search_conversation.format(online_results=chat.get('onlineContext'))}"
+            message_context += [
+                {
+                    "type": "text",
+                    "text": f"{prompts.online_search_conversation.format(online_results=chat.get('onlineContext'))}",
+                }
+            ]
         if not is_none_or_empty(chat.get("codeContext")):
-            message_context += f"{prompts.code_executed_context.format(code_results=chat.get('codeContext'))}"
+            message_context += [
+                {
+                    "type": "text",
+                    "text": f"{prompts.code_executed_context.format(code_results=chat.get('codeContext'))}",
+                }
+            ]
         if not is_none_or_empty(chat.get("operatorContext")):
-            message_context += (
-                f"{prompts.operator_execution_context.format(operator_results=chat.get('operatorContext'))}"
+            message_context += [
+                {
+                    "type": "text",
+                    "text": f"{prompts.operator_execution_context.format(operator_results=chat.get('operatorContext'))}",
+                }
+            ]
+        if not is_none_or_empty(chat.get("context")):
+            references = "\n\n".join(
+                {
+                    f"# File: {item['file']}\n## {item['compiled']}\n"
+                    for item in chat.get("context") or []
+                    if isinstance(item, dict)
+                }
             )
+            message_context += [{"type": "text", "text": f"{prompts.notes_conversation.format(references=references)}"}]
         if not is_none_or_empty(message_context):
             reconstructed_context_message = ChatMessage(content=message_context, role="user")
@@ -697,8 +714,9 @@ def clean_code_python(code: str):
 def load_complex_json(json_str):
     """
-    Preprocess a raw JSON string to escape unescaped double quotes within value strings,
-    while preserving the JSON structure and already escaped quotes.
+    Preprocess a raw JSON string to
+    - escape unescaped double quotes within value strings while preserving the JSON structure and already escaped quotes.
+    - remove suffix after the first valid JSON object,
     """
     def replace_unescaped_quotes(match):
@@ -726,9 +744,20 @@ def load_complex_json(json_str):
     for loads in json_loaders_to_try:
         try:
             return loads(processed)
-        except (json.JSONDecodeError, pyjson5.Json5Exception) as e:
-            errors.append(f"{type(e).__name__}: {str(e)}")
+        except (json.JSONDecodeError, pyjson5.Json5Exception) as e_load:
+            loader_name = loads.__name__
+            errors.append(f"{loader_name} (initial parse): {type(e_load).__name__}: {str(e_load)}")
+            # Handle plain text suffixes by slicing at error position
+            if hasattr(e_load, "pos") and 0 < e_load.pos < len(processed):
+                try:
+                    sliced = processed[: e_load.pos].strip()
+                    if sliced:
+                        return loads(sliced)
+                except Exception as e_slice:
+                    errors.append(
+                        f"{loader_name} after slice at {e_load.pos}: {type(e_slice).__name__}: {str(e_slice)}"
+                    )
     # If all loaders fail, raise the aggregated error
     raise ValueError(
         f"Failed to load JSON with errors: {'; '.join(errors)}\n\n"

khoj/processor/operator/grounding_agent_uitars.py CHANGED Viewed

@@ -13,7 +13,7 @@ from io import BytesIO
 from typing import Any, List
 import numpy as np
-from openai import AzureOpenAI, OpenAI
+from openai import AsyncAzureOpenAI, AsyncOpenAI
 from openai.types.chat import ChatCompletion
 from PIL import Image
@@ -72,7 +72,7 @@ class GroundingAgentUitars:
     def __init__(
         self,
         model_name: str,
-        client: OpenAI | AzureOpenAI,
+        client: AsyncOpenAI | AsyncAzureOpenAI,
         max_iterations=50,
         environment_type: Literal["computer", "web"] = "computer",
         runtime_conf: dict = {

khoj/processor/operator/operate_browser.py CHANGED Viewed

@@ -4,8 +4,6 @@ import logging
 import os
 from typing import Callable, List, Optional
-import requests
 from khoj.database.adapters import AgentAdapters, ConversationAdapters
 from khoj.database.models import Agent, ChatModel, KhojUser
 from khoj.processor.operator.operator_actions import *
@@ -49,9 +47,9 @@ async def operate_browser(
     # Initialize Agent
     max_iterations = int(os.getenv("KHOJ_OPERATOR_ITERATIONS", 40))
     operator_agent: OperatorAgent
-    if reasoning_model.name.startswith("gpt-4o"):
+    if is_operator_model(reasoning_model.name) == ChatModel.ModelType.OPENAI:
         operator_agent = OpenAIOperatorAgent(query, reasoning_model, max_iterations, tracer)
-    elif reasoning_model.name.startswith("claude-3-7-sonnet"):
+    elif is_operator_model(reasoning_model.name) == ChatModel.ModelType.ANTHROPIC:
         operator_agent = AnthropicOperatorAgent(query, reasoning_model, max_iterations, tracer)
     else:
         grounding_model_name = "ui-tars-1.5"
@@ -150,3 +148,18 @@ async def operate_browser(
         "result": user_input_message or response,
         "webpages": [{"link": url, "snippet": ""} for url in environment.visited_urls],
     }
+def is_operator_model(model: str) -> ChatModel.ModelType | None:
+    """Check if the model is an operator model."""
+    operator_models = {
+        "gpt-4o": ChatModel.ModelType.OPENAI,
+        "claude-3-7-sonnet": ChatModel.ModelType.ANTHROPIC,
+        "claude-sonnet-4": ChatModel.ModelType.ANTHROPIC,
+        "claude-opus-4": ChatModel.ModelType.ANTHROPIC,
+        "ui-tars-1.5": ChatModel.ModelType.OFFLINE,
+    }
+    for operator_model in operator_models:
+        if model.startswith(operator_model):
+            return operator_models[operator_model]  # type: ignore[return-value]
+    return None

khoj/processor/operator/operator_agent_anthropic.py CHANGED Viewed

@@ -3,10 +3,11 @@ import json
 import logging
 from copy import deepcopy
 from datetime import datetime
-from typing import Any, List, Optional, cast
+from typing import List, Optional, cast
 from anthropic.types.beta import BetaContentBlock
+from khoj.processor.conversation.anthropic.utils import is_reasoning_model
 from khoj.processor.operator.operator_actions import *
 from khoj.processor.operator.operator_agent_base import (
     AgentActResult,
@@ -25,8 +26,7 @@ class AnthropicOperatorAgent(OperatorAgent):
         client = get_anthropic_async_client(
             self.vision_model.ai_model_api.api_key, self.vision_model.ai_model_api.api_base_url
         )
-        tool_version = "2025-01-24"
-        betas = [f"computer-use-{tool_version}", "token-efficient-tools-2025-02-19"]
+        betas = self.model_default_headers()
         temperature = 1.0
         actions: List[OperatorAction] = []
         action_results: List[dict] = []
@@ -56,7 +56,7 @@ class AnthropicOperatorAgent(OperatorAgent):
         tools = [
             {
-                "type": f"computer_20250124",
+                "type": self.model_default_tool("computer"),
                 "name": "computer",
                 "display_width_px": 1024,
                 "display_height_px": 768,
@@ -78,7 +78,7 @@ class AnthropicOperatorAgent(OperatorAgent):
         ]
         thinking: dict[str, str | int] = {"type": "disabled"}
-        if self.vision_model.name.startswith("claude-3-7"):
+        if is_reasoning_model(self.vision_model.name):
             thinking = {"type": "enabled", "budget_tokens": 1024}
         messages_for_api = self._format_message_for_api(self.messages)
@@ -381,3 +381,22 @@ class AnthropicOperatorAgent(OperatorAgent):
             return None
         return coord
+    def model_default_tool(self, tool_type: Literal["computer", "editor", "terminal"]) -> str:
+        """Get the default tool of specified type for the given model."""
+        if self.vision_model.name.startswith("claude-3-7-sonnet"):
+            if tool_type == "computer":
+                return "computer_20250124"
+        elif self.vision_model.name.startswith("claude-sonnet-4") or self.vision_model.name.startswith("claude-opus-4"):
+            if tool_type == "computer":
+                return "computer_20250124"
+        raise ValueError(f"Unsupported tool type for model '{self.vision_model.name}': {tool_type}")
+    def model_default_headers(self) -> list[str]:
+        """Get the default computer use headers for the given model."""
+        if self.vision_model.name.startswith("claude-3-7-sonnet"):
+            return [f"computer-use-2025-01-24", "token-efficient-tools-2025-02-19"]
+        elif self.vision_model.name.startswith("claude-sonnet-4") or self.vision_model.name.startswith("claude-opus-4"):
+            return ["computer-use-2025-01-24"]
+        else:
+            return []

khoj/routers/api_chat.py CHANGED Viewed

@@ -682,11 +682,13 @@ async def chat(
     timezone = body.timezone
     raw_images = body.images
     raw_query_files = body.files
+    interrupt_flag = body.interrupt
     async def event_generator(q: str, images: list[str]):
         start_time = time.perf_counter()
         ttft = None
         chat_metadata: dict = {}
+        conversation = None
         user: KhojUser = request.user.object
         is_subscribed = has_required_scope(request, ["premium"])
         q = unquote(q)
@@ -720,6 +722,20 @@ async def chat(
             for file in raw_query_files:
                 query_files[file.name] = file.content
+        research_results: List[InformationCollectionIteration] = []
+        online_results: Dict = dict()
+        code_results: Dict = dict()
+        operator_results: Dict[str, str] = {}
+        compiled_references: List[Any] = []
+        inferred_queries: List[Any] = []
+        attached_file_context = gather_raw_query_files(query_files)
+        generated_images: List[str] = []
+        generated_files: List[FileAttachment] = []
+        generated_mermaidjs_diagram: str = None
+        generated_asset_results: Dict = dict()
+        program_execution_context: List[str] = []
         # Create a task to monitor for disconnections
         disconnect_monitor_task = None
@@ -727,8 +743,34 @@ async def chat(
             try:
                 msg = await request.receive()
                 if msg["type"] == "http.disconnect":
-                    logger.debug(f"User {user} disconnected from {common.client} client.")
+                    logger.debug(f"Request cancelled. User {user} disconnected from {common.client} client.")
                     cancellation_event.set()
+                    # ensure partial chat state saved on interrupt
+                    # shield the save against task cancellation
+                    if conversation:
+                        await asyncio.shield(
+                            save_to_conversation_log(
+                                q,
+                                chat_response="",
+                                user=user,
+                                meta_log=meta_log,
+                                compiled_references=compiled_references,
+                                online_results=online_results,
+                                code_results=code_results,
+                                operator_results=operator_results,
+                                research_results=research_results,
+                                inferred_queries=inferred_queries,
+                                client_application=request.user.client_app,
+                                conversation_id=conversation_id,
+                                query_images=uploaded_images,
+                                train_of_thought=train_of_thought,
+                                raw_query_files=raw_query_files,
+                                generated_images=generated_images,
+                                raw_generated_files=generated_asset_results,
+                                generated_mermaidjs_diagram=generated_mermaidjs_diagram,
+                                tracer=tracer,
+                            )
+                        )
             except Exception as e:
                 logger.error(f"Error in disconnect monitor: {e}")
@@ -746,7 +788,6 @@ async def chat(
             nonlocal ttft, train_of_thought
             event_delimiter = "␃🔚␗"
             if cancellation_event.is_set():
-                logger.debug(f"User {user} disconnected from {common.client} client. Setting cancellation event.")
                 return
             try:
                 if event_type == ChatEvent.END_LLM_RESPONSE:
@@ -770,9 +811,6 @@ async def chat(
                     yield data
                 elif event_type == ChatEvent.REFERENCES or ChatEvent.METADATA or stream:
                     yield json.dumps({"type": event_type.value, "data": data}, ensure_ascii=False)
-            except asyncio.CancelledError as e:
-                if cancellation_event.is_set():
-                    logger.debug(f"Request cancelled. User {user} disconnected from {common.client} client: {e}.")
             except Exception as e:
                 if not cancellation_event.is_set():
                     logger.error(
@@ -860,9 +898,9 @@ async def chat(
             async for result in send_llm_response(f"Conversation {conversation_id} not found", tracer.get("usage")):
                 yield result
             return
-        conversation_id = conversation.id
+        conversation_id = str(conversation.id)
-        async for event in send_event(ChatEvent.METADATA, {"conversationId": str(conversation_id), "turnId": turn_id}):
+        async for event in send_event(ChatEvent.METADATA, {"conversationId": conversation_id, "turnId": turn_id}):
             yield event
         agent: Agent | None = None
@@ -883,21 +921,53 @@ async def chat(
         user_message_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
         meta_log = conversation.conversation_log
-        researched_results = ""
-        online_results: Dict = dict()
-        code_results: Dict = dict()
-        operator_results: Dict[str, str] = {}
-        generated_asset_results: Dict = dict()
-        ## Extract Document References
-        compiled_references: List[Any] = []
-        inferred_queries: List[Any] = []
-        file_filters = conversation.file_filters if conversation and conversation.file_filters else []
-        attached_file_context = gather_raw_query_files(query_files)
+        # If interrupt flag is set, wait for the previous turn to be saved before proceeding
+        if interrupt_flag:
+            max_wait_time = 20.0  # seconds
+            wait_interval = 0.3  # seconds
+            wait_start = wait_current = time.time()
+            while wait_current - wait_start < max_wait_time:
+                # Refresh conversation to check if interrupted message saved to DB
+                conversation = await ConversationAdapters.aget_conversation_by_user(
+                    user,
+                    client_application=request.user.client_app,
+                    conversation_id=conversation_id,
+                )
+                if (
+                    conversation
+                    and conversation.messages
+                    and conversation.messages[-1].by == "khoj"
+                    and not conversation.messages[-1].message
+                ):
+                    logger.info(f"Detected interrupted message save to conversation {conversation_id}.")
+                    break
+                await asyncio.sleep(wait_interval)
+                wait_current = time.time()
-        generated_images: List[str] = []
-        generated_files: List[FileAttachment] = []
-        generated_mermaidjs_diagram: str = None
-        program_execution_context: List[str] = []
+            if wait_current - wait_start >= max_wait_time:
+                logger.warning(
+                    f"Timeout waiting to load interrupted context from conversation {conversation_id}. Proceed without previous context."
+                )
+        # If interrupted message in DB
+        if (
+            conversation
+            and conversation.messages
+            and conversation.messages[-1].by == "khoj"
+            and not conversation.messages[-1].message
+        ):
+            # Populate context from interrupted message
+            last_message = conversation.messages[-1]
+            online_results = {key: val.model_dump() for key, val in last_message.onlineContext.items() or []}
+            code_results = {key: val.model_dump() for key, val in last_message.codeContext.items() or []}
+            operator_results = last_message.operatorContext or {}
+            compiled_references = [ref.model_dump() for ref in last_message.context or []]
+            research_results = [
+                InformationCollectionIteration(**iter_dict) for iter_dict in last_message.researchContext or []
+            ]
+            # Drop the interrupted message from conversation history
+            meta_log["chat"].pop()
+            logger.info(f"Loaded interrupted partial context from conversation {conversation_id}.")
         if conversation_commands == [ConversationCommand.Default]:
             try:
@@ -936,6 +1006,7 @@ async def chat(
                 return
         defiltered_query = defilter_query(q)
+        file_filters = conversation.file_filters if conversation and conversation.file_filters else []
         if conversation_commands == [ConversationCommand.Research]:
             async for research_result in execute_information_collection(
@@ -943,12 +1014,13 @@ async def chat(
                 query=defiltered_query,
                 conversation_id=conversation_id,
                 conversation_history=meta_log,
+                previous_iterations=research_results,
                 query_images=uploaded_images,
                 agent=agent,
                 send_status_func=partial(send_event, ChatEvent.STATUS),
                 user_name=user_name,
                 location=location,
-                file_filters=conversation.file_filters if conversation else [],
+                file_filters=file_filters,
                 query_files=attached_file_context,
                 tracer=tracer,
                 cancellation_event=cancellation_event,
@@ -963,17 +1035,16 @@ async def chat(
                             compiled_references.extend(research_result.context)
                         if research_result.operatorContext:
                             operator_results.update(research_result.operatorContext)
-                        researched_results += research_result.summarizedResult
+                        research_results.append(research_result)
                 else:
                     yield research_result
             # researched_results = await extract_relevant_info(q, researched_results, agent)
             if state.verbose > 1:
-                logger.debug(f"Researched Results: {researched_results}")
+                logger.debug(f'Researched Results: {"".join(r.summarizedResult for r in research_results)}')
         used_slash_summarize = conversation_commands == [ConversationCommand.Summarize]
-        file_filters = conversation.file_filters if conversation else []
         # Skip trying to summarize if
         if (
             # summarization intent was inferred
@@ -1362,7 +1433,7 @@ async def chat(
         # Check if the user has disconnected
         if cancellation_event.is_set():
-            logger.debug(f"User {user} disconnected from {common.client} client. Stopping LLM response.")
+            logger.debug(f"Stopping LLM response to user {user} on {common.client} client.")
             # Cancel the disconnect monitor task if it is still running
             await cancel_disconnect_monitor()
             return
@@ -1379,14 +1450,13 @@ async def chat(
             online_results,
             code_results,
             operator_results,
+            research_results,
             inferred_queries,
             conversation_commands,
             user,
             request.user.client_app,
-            conversation_id,
             location,
             user_name,
-            researched_results,
             uploaded_images,
             train_of_thought,
             attached_file_context,

khoj/routers/api_model.py CHANGED Viewed

@@ -72,7 +72,7 @@ async def update_chat_model(
     if chat_model is None:
         return Response(status_code=404, content=json.dumps({"status": "error", "message": "Chat model not found"}))
     if not subscribed and chat_model.price_tier != PriceTier.FREE:
-        raise Response(
+        return Response(
             status_code=403,
             content=json.dumps({"status": "error", "message": "Subscribe to switch to this chat model"}),
         )
@@ -108,7 +108,7 @@ async def update_voice_model(
     if voice_model is None:
         return Response(status_code=404, content=json.dumps({"status": "error", "message": "Voice model not found"}))
     if not subscribed and voice_model.price_tier != PriceTier.FREE:
-        raise Response(
+        return Response(
             status_code=403,
             content=json.dumps({"status": "error", "message": "Subscribe to switch to this voice model"}),
         )
@@ -143,7 +143,7 @@ async def update_paint_model(
     if image_model is None:
         return Response(status_code=404, content=json.dumps({"status": "error", "message": "Image model not found"}))
     if not subscribed and image_model.price_tier != PriceTier.FREE:
-        raise Response(
+        return Response(
             status_code=403,
             content=json.dumps({"status": "error", "message": "Subscribe to switch to this image model"}),
         )

khoj 1.41.1.dev90__py3-none-any.whl → 1.41.1.dev107__py3-none-any.whl

khoj 1.41.1.dev90py3-none-any.whl → 1.41.1.dev107py3-none-any.whl