PyPI - khoj - Versions diffs - 2.0.0b12.dev5__py3-none-any.whl → 2.0.0b13__py3-none-any.whl - Mend

khoj 2.0.0b12.dev5py3-none-any.whl → 2.0.0b13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (187) hide show

khoj/processor/conversation/utils.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import base64
 import json
 import logging
-import math
 import mimetypes
 import os
 import re
@@ -18,9 +17,7 @@ import requests
 import tiktoken
 import yaml
 from langchain_core.messages.chat import ChatMessage
-from llama_cpp import LlamaTokenizer
-from llama_cpp.llama import Llama
-from pydantic import BaseModel, ConfigDict, ValidationError, create_model
+from pydantic import BaseModel, ConfigDict, ValidationError
 from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
 from khoj.database.adapters import ConversationAdapters
@@ -32,7 +29,6 @@ from khoj.database.models import (
     KhojUser,
 )
 from khoj.processor.conversation import prompts
-from khoj.processor.conversation.offline.utils import download_model, infer_max_tokens
 from khoj.search_filter.base_filter import BaseFilter
 from khoj.search_filter.date_filter import DateFilter
 from khoj.search_filter.file_filter import FileFilter
@@ -50,7 +46,11 @@ from khoj.utils.yaml import yaml_dump
 logger = logging.getLogger(__name__)
 try:
-    from git import Repo
+    import importlib.util
+    git_spec = importlib.util.find_spec("git")
+    if git_spec is None:
+        raise ImportError
 except ImportError:
     if is_promptrace_enabled():
         logger.warning("GitPython not installed. `pip install gitpython` to use prompt tracer.")
@@ -68,6 +68,9 @@ model_to_prompt_size = {
     "o3": 60000,
     "o3-pro": 30000,
     "o4-mini": 90000,
+    "gpt-5-2025-08-07": 120000,
+    "gpt-5-mini-2025-08-07": 120000,
+    "gpt-5-nano-2025-08-07": 120000,
     # Google Models
     "gemini-2.5-flash": 120000,
     "gemini-2.5-pro": 60000,
@@ -85,12 +88,6 @@ model_to_prompt_size = {
     "claude-sonnet-4-20250514": 60000,
     "claude-opus-4-0": 60000,
     "claude-opus-4-20250514": 60000,
-    # Offline Models
-    "bartowski/Qwen2.5-14B-Instruct-GGUF": 20000,
-    "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
-    "bartowski/Llama-3.2-3B-Instruct-GGUF": 20000,
-    "bartowski/gemma-2-9b-it-GGUF": 6000,
-    "bartowski/gemma-2-2b-it-GGUF": 6000,
 }
 model_to_tokenizer: Dict[str, str] = {}
@@ -303,7 +300,7 @@ def construct_chat_history_for_operator(conversation_history: List[ChatMessageMo
         if chat.by == "you" and chat.message:
             content = [{"type": "text", "text": chat.message}]
             for file in chat.queryFiles or []:
-                content += [{"type": "text", "text": f'## File: {file["name"]}\n\n{file["content"]}'}]
+                content += [{"type": "text", "text": f"## File: {file['name']}\n\n{file['content']}"}]
             user_message = AgentMessage(role="user", content=content)
         elif chat.by == "khoj" and chat.message:
             chat_history += [user_message, AgentMessage(role="assistant", content=chat.message)]
@@ -320,7 +317,10 @@ def construct_tool_chat_history(
     If no tool is provided inferred query for all tools used are added.
     """
     chat_history: list = []
-    base_extractor: Callable[[ResearchIteration], List[str]] = lambda iteration: []
+    def base_extractor(iteration: ResearchIteration) -> List[str]:
+        return []
     extract_inferred_query_map: Dict[ConversationCommand, Callable[[ResearchIteration], List[str]]] = {
         ConversationCommand.SemanticSearchFiles: (
             lambda iteration: [c["query"] for c in iteration.context] if iteration.context else []
@@ -331,7 +331,7 @@ def construct_tool_chat_history(
         ConversationCommand.ReadWebpage: (
             lambda iteration: list(iteration.onlineContext.keys()) if iteration.onlineContext else []
         ),
-        ConversationCommand.RunCode: (
+        ConversationCommand.PythonCoder: (
             lambda iteration: list(iteration.codeContext.keys()) if iteration.codeContext else []
         ),
     }
@@ -507,7 +507,7 @@ async def save_to_conversation_log(
     logger.info(
         f"""
-Saved Conversation Turn ({db_conversation.id if db_conversation else 'N/A'}):
+Saved Conversation Turn ({db_conversation.id if db_conversation else "N/A"}):
 You ({user.username}): "{q}"
 Khoj: "{chat_response}"
@@ -573,7 +573,6 @@ def generate_chatml_messages_with_context(
     system_message: str = None,
     chat_history: list[ChatMessageModel] = [],
     model_name="gpt-4o-mini",
-    loaded_model: Optional[Llama] = None,
     max_prompt_size=None,
     tokenizer_name=None,
     query_images=None,
@@ -588,10 +587,7 @@ def generate_chatml_messages_with_context(
     """Generate chat messages with appropriate context from previous conversation to send to the chat model"""
     # Set max prompt size from user config or based on pre-configured for model and machine specs
     if not max_prompt_size:
-        if loaded_model:
-            max_prompt_size = infer_max_tokens(loaded_model.n_ctx(), model_to_prompt_size.get(model_name, math.inf))
-        else:
-            max_prompt_size = model_to_prompt_size.get(model_name, 10000)
+        max_prompt_size = model_to_prompt_size.get(model_name, 10000)
     # Scale lookback turns proportional to max prompt size supported by model
     lookback_turns = max_prompt_size // 750
@@ -638,7 +634,7 @@ def generate_chatml_messages_with_context(
         if not is_none_or_empty(chat.operatorContext):
             operator_context = chat.operatorContext
-            operator_content = "\n\n".join([f'## Task: {oc["query"]}\n{oc["response"]}\n' for oc in operator_context])
+            operator_content = "\n\n".join([f"## Task: {oc['query']}\n{oc['response']}\n" for oc in operator_context])
             message_context += [
                 {
                     "type": "text",
@@ -735,7 +731,7 @@ def generate_chatml_messages_with_context(
             message.content = [{"type": "text", "text": message.content}]
     # Truncate oldest messages from conversation history until under max supported prompt size by model
-    messages = truncate_messages(messages, max_prompt_size, model_name, loaded_model, tokenizer_name)
+    messages = truncate_messages(messages, max_prompt_size, model_name, tokenizer_name)
     # Return message in chronological order
     return messages[::-1]
@@ -743,26 +739,21 @@ def generate_chatml_messages_with_context(
 def get_encoder(
     model_name: str,
-    loaded_model: Optional[Llama] = None,
     tokenizer_name=None,
-) -> tiktoken.Encoding | PreTrainedTokenizer | PreTrainedTokenizerFast | LlamaTokenizer:
+) -> tiktoken.Encoding | PreTrainedTokenizer | PreTrainedTokenizerFast:
     default_tokenizer = "gpt-4o"
     try:
-        if loaded_model:
-            encoder = loaded_model.tokenizer()
-        elif model_name.startswith("gpt-") or model_name.startswith("o1"):
-            # as tiktoken doesn't recognize o1 model series yet
-            encoder = tiktoken.encoding_for_model("gpt-4o" if model_name.startswith("o1") else model_name)
-        elif tokenizer_name:
+        if tokenizer_name:
             if tokenizer_name in state.pretrained_tokenizers:
                 encoder = state.pretrained_tokenizers[tokenizer_name]
             else:
                 encoder = AutoTokenizer.from_pretrained(tokenizer_name)
                 state.pretrained_tokenizers[tokenizer_name] = encoder
         else:
-            encoder = download_model(model_name).tokenizer()
-    except:
+            # as tiktoken doesn't recognize o1 model series yet
+            encoder = tiktoken.encoding_for_model("gpt-4o" if model_name.startswith("o1") else model_name)
+    except Exception:
         encoder = tiktoken.encoding_for_model(default_tokenizer)
         if state.verbose > 2:
             logger.debug(
@@ -773,7 +764,7 @@ def get_encoder(
 def count_tokens(
     message_content: str | list[str | dict],
-    encoder: PreTrainedTokenizer | PreTrainedTokenizerFast | LlamaTokenizer | tiktoken.Encoding,
+    encoder: PreTrainedTokenizer | PreTrainedTokenizerFast | tiktoken.Encoding,
 ) -> int:
     """
     Count the total number of tokens in a list of messages.
@@ -825,11 +816,10 @@ def truncate_messages(
     messages: list[ChatMessage],
     max_prompt_size: int,
     model_name: str,
-    loaded_model: Optional[Llama] = None,
     tokenizer_name=None,
 ) -> list[ChatMessage]:
     """Truncate messages to fit within max prompt size supported by model"""
-    encoder = get_encoder(model_name, loaded_model, tokenizer_name)
+    encoder = get_encoder(model_name, tokenizer_name)
     # Extract system message from messages
     system_message = None
@@ -865,9 +855,9 @@ def truncate_messages(
     total_tokens, _ = count_total_tokens(messages, encoder, system_message)
     if total_tokens > max_prompt_size:
         # At this point, a single message with a single content part of type dict should remain
-        assert (
-            len(messages) == 1 and len(messages[0].content) == 1 and isinstance(messages[0].content[0], dict)
-        ), "Expected a single message with a single content part remaining at this point in truncation"
+        assert len(messages) == 1 and len(messages[0].content) == 1 and isinstance(messages[0].content[0], dict), (
+            "Expected a single message with a single content part remaining at this point in truncation"
+        )
         # Collate message content into single string to ease truncation
         part = messages[0].content[0]

khoj/processor/embeddings.py CHANGED Viewed

@@ -1,8 +1,6 @@
 import logging
 from typing import List
-from urllib.parse import urlparse
-import openai
 import requests
 import tqdm
 from sentence_transformers import CrossEncoder, SentenceTransformer

khoj/processor/image/generate.py CHANGED Viewed

@@ -108,12 +108,12 @@ async def text_to_image(
             if "content_policy_violation" in e.message:
                 logger.error(f"Image Generation blocked by OpenAI: {e}")
                 status_code = e.status_code  # type: ignore
-                message = f"Image generation blocked by OpenAI due to policy violation"  # type: ignore
+                message = "Image generation blocked by OpenAI due to policy violation"  # type: ignore
                 yield image_url or image, status_code, message
                 return
             else:
                 logger.error(f"Image Generation failed with {e}", exc_info=True)
-                message = f"Image generation failed using OpenAI"  # type: ignore
+                message = "Image generation failed using OpenAI"  # type: ignore
                 status_code = e.status_code  # type: ignore
                 yield image_url or image, status_code, message
                 return
@@ -199,7 +199,7 @@ def generate_image_with_stability(
     # Call Stability AI API to generate image
     response = requests.post(
-        f"https://api.stability.ai/v2beta/stable-image/generate/sd3",
+        "https://api.stability.ai/v2beta/stable-image/generate/sd3",
         headers={"authorization": f"Bearer {text_to_image_config.api_key}", "accept": "image/*"},
         files={"none": ""},
         data={

khoj/processor/operator/__init__.py CHANGED Viewed

@@ -11,7 +11,7 @@ from khoj.processor.conversation.utils import (
     OperatorRun,
     construct_chat_history_for_operator,
 )
-from khoj.processor.operator.operator_actions import *
+from khoj.processor.operator.operator_actions import RequestUserAction
 from khoj.processor.operator.operator_agent_anthropic import AnthropicOperatorAgent
 from khoj.processor.operator.operator_agent_base import OperatorAgent
 from khoj.processor.operator.operator_agent_binary import BinaryOperatorAgent
@@ -59,7 +59,7 @@ async def operate_environment(
     if not reasoning_model or not reasoning_model.vision_enabled:
         reasoning_model = await ConversationAdapters.aget_vision_enabled_config()
     if not reasoning_model:
-        raise ValueError(f"No vision enabled chat model found. Configure a vision chat model to operate environment.")
+        raise ValueError("No vision enabled chat model found. Configure a vision chat model to operate environment.")
     # Create conversation history from conversation log
     chat_history = construct_chat_history_for_operator(conversation_log)
@@ -235,7 +235,6 @@ def is_operator_model(model: str) -> ChatModel.ModelType | None:
         "claude-3-7-sonnet": ChatModel.ModelType.ANTHROPIC,
         "claude-sonnet-4": ChatModel.ModelType.ANTHROPIC,
         "claude-opus-4": ChatModel.ModelType.ANTHROPIC,
-        "ui-tars-1.5": ChatModel.ModelType.OFFLINE,
     }
     for operator_model in operator_models:
         if model.startswith(operator_model):

khoj/processor/operator/grounding_agent.py CHANGED Viewed

@@ -1,14 +1,27 @@
 import json
 import logging
 from textwrap import dedent
+from typing import List, Optional
 from openai import AzureOpenAI, OpenAI
 from openai.types.chat import ChatCompletion, ChatCompletionMessage
 from khoj.database.models import ChatModel
 from khoj.processor.conversation.utils import construct_structured_message
-from khoj.processor.operator.operator_actions import *
-from khoj.processor.operator.operator_agent_base import AgentActResult
+from khoj.processor.operator.operator_actions import (
+    BackAction,
+    ClickAction,
+    DoubleClickAction,
+    DragAction,
+    GotoAction,
+    KeypressAction,
+    OperatorAction,
+    Point,
+    ScreenshotAction,
+    ScrollAction,
+    TypeAction,
+    WaitAction,
+)
 from khoj.processor.operator.operator_environment_base import EnvironmentType, EnvState
 from khoj.utils.helpers import get_chat_usage_metrics

khoj/processor/operator/grounding_agent_uitars.py CHANGED Viewed

@@ -18,7 +18,22 @@ from openai import AsyncAzureOpenAI, AsyncOpenAI
 from openai.types.chat import ChatCompletion
 from PIL import Image
-from khoj.processor.operator.operator_actions import *
+from khoj.processor.operator.operator_actions import (
+    BackAction,
+    ClickAction,
+    DoubleClickAction,
+    DragAction,
+    GotoAction,
+    KeyDownAction,
+    KeypressAction,
+    KeyUpAction,
+    MoveAction,
+    OperatorAction,
+    RequestUserAction,
+    ScrollAction,
+    TypeAction,
+    WaitAction,
+)
 from khoj.processor.operator.operator_environment_base import EnvironmentType, EnvState
 from khoj.utils.helpers import get_chat_usage_metrics
@@ -122,11 +137,10 @@ class GroundingAgentUitars:
         )
         temperature = self.temperature
-        top_k = self.top_k
         try_times = 3
         while not parsed_responses:
             if try_times <= 0:
-                logger.warning(f"Reach max retry times to fetch response from client, as error flag.")
+                logger.warning("Reach max retry times to fetch response from client, as error flag.")
                 return "client error\nFAIL", []
             try:
                 message_content = "\n".join([msg["content"][0].get("text") or "[image]" for msg in messages])
@@ -163,7 +177,6 @@ class GroundingAgentUitars:
                 prediction = None
                 try_times -= 1
                 temperature = 1
-                top_k = -1
         if prediction is None:
             return "client error\nFAIL", []
@@ -264,9 +277,9 @@ class GroundingAgentUitars:
             raise ValueError(f"Unsupported environment type: {environment_type}")
     def _format_messages_for_api(self, instruction: str, current_state: EnvState):
-        assert len(self.observations) == len(self.actions) and len(self.actions) == len(
-            self.thoughts
-        ), "The number of observations and actions should be the same."
+        assert len(self.observations) == len(self.actions) and len(self.actions) == len(self.thoughts), (
+            "The number of observations and actions should be the same."
+        )
         self.history_images.append(base64.b64decode(current_state.screenshot))
         self.observations.append({"screenshot": current_state.screenshot, "accessibility_tree": None})
@@ -524,7 +537,7 @@ class GroundingAgentUitars:
         parsed_actions = [self.parse_action_string(action.replace("\n", "\\n").lstrip()) for action in all_action]
         actions: list[dict] = []
         for action_instance, raw_str in zip(parsed_actions, all_action):
-            if action_instance == None:
+            if action_instance is None:
                 print(f"Action can't parse: {raw_str}")
                 raise ValueError(f"Action can't parse: {raw_str}")
             action_type = action_instance["function"]
@@ -756,7 +769,7 @@ class GroundingAgentUitars:
             The pyautogui code string
         """
-        pyautogui_code = f"import pyautogui\nimport time\n"
+        pyautogui_code = "import pyautogui\nimport time\n"
         actions = []
         if isinstance(responses, dict):
             responses = [responses]
@@ -774,7 +787,7 @@ class GroundingAgentUitars:
             if response_id == 0:
                 pyautogui_code += f"'''\nObservation:\n{observation}\n\nThought:\n{thought}\n'''\n"
             else:
-                pyautogui_code += f"\ntime.sleep(1)\n"
+                pyautogui_code += "\ntime.sleep(1)\n"
             action_dict = response
             action_type = action_dict.get("action_type")
@@ -846,17 +859,17 @@ class GroundingAgentUitars:
                 if content:
                     if input_swap:
                         actions += TypeAction()
-                        pyautogui_code += f"\nimport pyperclip"
+                        pyautogui_code += "\nimport pyperclip"
                         pyautogui_code += f"\npyperclip.copy('{stripped_content}')"
-                        pyautogui_code += f"\npyautogui.hotkey('ctrl', 'v')"
-                        pyautogui_code += f"\ntime.sleep(0.5)\n"
+                        pyautogui_code += "\npyautogui.hotkey('ctrl', 'v')"
+                        pyautogui_code += "\ntime.sleep(0.5)\n"
                         if content.endswith("\n") or content.endswith("\\n"):
-                            pyautogui_code += f"\npyautogui.press('enter')"
+                            pyautogui_code += "\npyautogui.press('enter')"
                     else:
                         pyautogui_code += f"\npyautogui.write('{stripped_content}', interval=0.1)"
-                        pyautogui_code += f"\ntime.sleep(0.5)\n"
+                        pyautogui_code += "\ntime.sleep(0.5)\n"
                         if content.endswith("\n") or content.endswith("\\n"):
-                            pyautogui_code += f"\npyautogui.press('enter')"
+                            pyautogui_code += "\npyautogui.press('enter')"
             elif action_type in ["drag", "select"]:
                 # Parsing drag or select action based on start and end_boxes
@@ -869,9 +882,7 @@ class GroundingAgentUitars:
                     x1, y1, x2, y2 = eval(end_box)  # Assuming box is in [x1, y1, x2, y2]
                     ex = round(float((x1 + x2) / 2) * image_width, 3)
                     ey = round(float((y1 + y2) / 2) * image_height, 3)
-                    pyautogui_code += (
-                        f"\npyautogui.moveTo({sx}, {sy})\n" f"\npyautogui.dragTo({ex}, {ey}, duration=1.0)\n"
-                    )
+                    pyautogui_code += f"\npyautogui.moveTo({sx}, {sy})\n\npyautogui.dragTo({ex}, {ey}, duration=1.0)\n"
             elif action_type == "scroll":
                 # Parsing scroll action
@@ -888,11 +899,11 @@ class GroundingAgentUitars:
                     y = None
                 direction = action_inputs.get("direction", "")
-                if x == None:
+                if x is None:
                     if "up" in direction.lower():
-                        pyautogui_code += f"\npyautogui.scroll(5)"
+                        pyautogui_code += "\npyautogui.scroll(5)"
                     elif "down" in direction.lower():
-                        pyautogui_code += f"\npyautogui.scroll(-5)"
+                        pyautogui_code += "\npyautogui.scroll(-5)"
                 else:
                     if "up" in direction.lower():
                         pyautogui_code += f"\npyautogui.scroll(5, x={x}, y={y})"
@@ -923,7 +934,7 @@ class GroundingAgentUitars:
                         pyautogui_code += f"\npyautogui.moveTo({x}, {y})"
             elif action_type in ["finished"]:
-                pyautogui_code = f"DONE"
+                pyautogui_code = "DONE"
             else:
                 pyautogui_code += f"\n# Unrecognized action type: {action_type}"

khoj/processor/operator/operator_agent_anthropic.py CHANGED Viewed

@@ -11,7 +11,32 @@ from anthropic.types.beta import BetaContentBlock, BetaTextBlock, BetaToolUseBlo
 from khoj.database.models import ChatModel
 from khoj.processor.conversation.anthropic.utils import is_reasoning_model
 from khoj.processor.conversation.utils import AgentMessage
-from khoj.processor.operator.operator_actions import *
+from khoj.processor.operator.operator_actions import (
+    BackAction,
+    ClickAction,
+    CursorPositionAction,
+    DoubleClickAction,
+    DragAction,
+    GotoAction,
+    HoldKeyAction,
+    KeypressAction,
+    MouseDownAction,
+    MouseUpAction,
+    MoveAction,
+    NoopAction,
+    OperatorAction,
+    Point,
+    ScreenshotAction,
+    ScrollAction,
+    TerminalAction,
+    TextEditorCreateAction,
+    TextEditorInsertAction,
+    TextEditorStrReplaceAction,
+    TextEditorViewAction,
+    TripleClickAction,
+    TypeAction,
+    WaitAction,
+)
 from khoj.processor.operator.operator_agent_base import AgentActResult, OperatorAgent
 from khoj.processor.operator.operator_environment_base import (
     EnvironmentType,
@@ -518,7 +543,7 @@ class AnthropicOperatorAgent(OperatorAgent):
     def model_default_headers(self) -> list[str]:
         """Get the default computer use headers for the given model."""
         if self.vision_model.name.startswith("claude-3-7-sonnet"):
-            return [f"computer-use-2025-01-24", "token-efficient-tools-2025-02-19"]
+            return ["computer-use-2025-01-24", "token-efficient-tools-2025-02-19"]
         elif self.vision_model.name.startswith("claude-sonnet-4") or self.vision_model.name.startswith("claude-opus-4"):
             return ["computer-use-2025-01-24"]
         else:
@@ -538,7 +563,7 @@ class AnthropicOperatorAgent(OperatorAgent):
                 * When viewing a webpage it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available.
                 * When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
                 * Perform web searches using DuckDuckGo. Don't use Google even if requested as the query will fail.
-                * The current date is {datetime.today().strftime('%A, %B %-d, %Y')}.
+                * The current date is {datetime.today().strftime("%A, %B %-d, %Y")}.
                 * The current URL is {current_state.url}.
                 </SYSTEM_CAPABILITY>
@@ -563,7 +588,7 @@ class AnthropicOperatorAgent(OperatorAgent):
                 </SYSTEM_CAPABILITY>
                 <CONTEXT>
-                * The current date is {datetime.today().strftime('%A, %B %-d, %Y')}.
+                * The current date is {datetime.today().strftime("%A, %B %-d, %Y")}.
                 </CONTEXT>
                 """
             ).lstrip()

khoj/processor/operator/operator_agent_base.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import logging
 from abc import ABC, abstractmethod
-from typing import List, Literal, Optional, Union
+from typing import List, Optional
 from pydantic import BaseModel

khoj/processor/operator/operator_agent_binary.py CHANGED Viewed

@@ -12,7 +12,7 @@ from khoj.processor.conversation.utils import (
 )
 from khoj.processor.operator.grounding_agent import GroundingAgent
 from khoj.processor.operator.grounding_agent_uitars import GroundingAgentUitars
-from khoj.processor.operator.operator_actions import *
+from khoj.processor.operator.operator_actions import OperatorAction, WaitAction
 from khoj.processor.operator.operator_agent_base import AgentActResult, OperatorAgent
 from khoj.processor.operator.operator_environment_base import (
     EnvironmentType,
@@ -181,7 +181,7 @@ class BinaryOperatorAgent(OperatorAgent):
                     elif action.type == "key_down":
                         rendered_parts += [f'**Action**: Press Key "{action.key}"']
                     elif action.type == "screenshot" and not current_state.screenshot:
-                        rendered_parts += [f"**Error**: Failed to take screenshot"]
+                        rendered_parts += ["**Error**: Failed to take screenshot"]
                     elif action.type == "goto":
                         rendered_parts += [f"**Action**: Open URL {action.url}"]
                     else:
@@ -317,7 +317,7 @@ class BinaryOperatorAgent(OperatorAgent):
                 # Introduction
                 * You are Khoj, a smart and resourceful web browsing assistant. You help the user accomplish their task using a web browser.
                 * You are given the user's query and screenshots of the browser's state transitions.
-                * The current date is {datetime.today().strftime('%A, %B %-d, %Y')}.
+                * The current date is {datetime.today().strftime("%A, %B %-d, %Y")}.
                 * The current URL is {env_state.url}.
                 # Your Task
@@ -362,7 +362,7 @@ class BinaryOperatorAgent(OperatorAgent):
                 # Introduction
                 * You are Khoj, a smart and resourceful computer assistant. You help the user accomplish their task using a computer.
                 * You are given the user's query and screenshots of the computer's state transitions.
-                * The current date is {datetime.today().strftime('%A, %B %-d, %Y')}.
+                * The current date is {datetime.today().strftime("%A, %B %-d, %Y")}.
                 # Your Task
                 * First look at the screenshots carefully to notice all pertinent information.

khoj/processor/operator/operator_agent_openai.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import json
 import logging
-import platform
 from copy import deepcopy
 from datetime import datetime
 from textwrap import dedent
@@ -10,7 +9,23 @@ from openai.types.responses import Response, ResponseOutputItem
 from khoj.database.models import ChatModel
 from khoj.processor.conversation.utils import AgentMessage
-from khoj.processor.operator.operator_actions import *
+from khoj.processor.operator.operator_actions import (
+    BackAction,
+    ClickAction,
+    DoubleClickAction,
+    DragAction,
+    GotoAction,
+    KeypressAction,
+    MoveAction,
+    NoopAction,
+    OperatorAction,
+    Point,
+    RequestUserAction,
+    ScreenshotAction,
+    ScrollAction,
+    TypeAction,
+    WaitAction,
+)
 from khoj.processor.operator.operator_agent_base import AgentActResult, OperatorAgent
 from khoj.processor.operator.operator_environment_base import (
     EnvironmentType,
@@ -152,7 +167,7 @@ class OpenAIOperatorAgent(OperatorAgent):
                 # Add screenshot data in openai message format
                 action_result["output"] = {
                     "type": "input_image",
-                    "image_url": f'data:image/webp;base64,{result_content["image"]}',
+                    "image_url": f"data:image/webp;base64,{result_content['image']}",
                     "current_url": result_content["url"],
                 }
             elif action_result["type"] == "computer_call_output" and idx == len(env_steps) - 1:
@@ -311,7 +326,7 @@ class OpenAIOperatorAgent(OperatorAgent):
             elif block.type == "function_call":
                 if block.name == "goto":
                     args = json.loads(block.arguments)
-                    render_texts = [f'Open URL: {args.get("url", "[Missing URL]")}']
+                    render_texts = [f"Open URL: {args.get('url', '[Missing URL]')}"]
                 else:
                     render_texts += [block.name]
             elif block.type == "computer_call":
@@ -351,7 +366,7 @@ class OpenAIOperatorAgent(OperatorAgent):
                 * When viewing a webpage it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available.
                 * When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
                 * Perform web searches using DuckDuckGo. Don't use Google even if requested as the query will fail.
-                * The current date is {datetime.today().strftime('%A, %B %-d, %Y')}.
+                * The current date is {datetime.today().strftime("%A, %B %-d, %Y")}.
                 * The current URL is {current_state.url}.
                 </SYSTEM_CAPABILITY>
@@ -374,7 +389,7 @@ class OpenAIOperatorAgent(OperatorAgent):
                 </SYSTEM_CAPABILITY>
                 <CONTEXT>
-                * The current date is {datetime.today().strftime('%A, %B %-d, %Y')}.
+                * The current date is {datetime.today().strftime("%A, %B %-d, %Y")}.
                 </CONTEXT>
                 """
             ).lstrip()

khoj/processor/operator/operator_environment_browser.py CHANGED Viewed

@@ -247,7 +247,7 @@ class BrowserEnvironment(Environment):
                 case "drag":
                     if not isinstance(action, DragAction):
-                        raise TypeError(f"Invalid action type for drag")
+                        raise TypeError("Invalid action type for drag")
                     path = action.path
                     if not path:
                         error = "Missing path for drag action"

khoj/processor/operator/operator_environment_computer.py CHANGED Viewed

@@ -532,7 +532,7 @@ class ComputerEnvironment(Environment):
             else:
                 return {"success": False, "output": process.stdout, "error": process.stderr}
         except asyncio.TimeoutError:
-            return {"success": False, "output": "", "error": f"Command timed out after 120 seconds."}
+            return {"success": False, "output": "", "error": "Command timed out after 120 seconds."}
         except Exception as e:
             return {"success": False, "output": "", "error": str(e)}

khoj/processor/speech/text_to_speech.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import json  # Used for working with JSON data
 import os
 import requests  # Used for making HTTP requests

khoj/processor/tools/online_search.py CHANGED Viewed

@@ -385,7 +385,7 @@ async def read_webpages(
     tracer: dict = {},
 ):
     "Infer web pages to read from the query and extract relevant information from them"
-    logger.info(f"Inferring web pages to read")
+    logger.info("Inferring web pages to read")
     urls = await infer_webpage_urls(
         query,
         max_webpages_to_read,

khoj/processor/tools/run_code.py CHANGED Viewed

@@ -93,7 +93,7 @@ async def run_code(
     # Run Code
     if send_status_func:
-        async for event in send_status_func(f"**Running code snippet**"):
+        async for event in send_status_func("**Running code snippet**"):
             yield {ChatEvent.STATUS: event}
     try:
         with timer("Chat actor: Execute generated program", logger, log_level=logging.INFO):

khoj 2.0.0b12.dev5__py3-none-any.whl → 2.0.0b13__py3-none-any.whl

khoj 2.0.0b12.dev5py3-none-any.whl → 2.0.0b13py3-none-any.whl