PyPI - khoj - Versions diffs - 1.42.9.dev26__py3-none-any.whl → 1.42.10.dev2__py3-none-any.whl - Mend

khoj 1.42.9.dev26py3-none-any.whl → 1.42.10.dev2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

khoj/utils/helpers.py CHANGED Viewed

@@ -12,7 +12,6 @@ import random
 import urllib.parse
 import uuid
 from collections import OrderedDict
-from copy import deepcopy
 from enum import Enum
 from functools import lru_cache
 from importlib import import_module
@@ -20,9 +19,8 @@ from importlib.metadata import version
 from itertools import islice
 from os import path
 from pathlib import Path
-from textwrap import dedent
 from time import perf_counter
-from typing import TYPE_CHECKING, Any, NamedTuple, Optional, Tuple, Type, Union
+from typing import TYPE_CHECKING, Any, NamedTuple, Optional, Tuple, Union
 from urllib.parse import ParseResult, urlparse
 import anthropic
@@ -38,7 +36,6 @@ from google.auth.credentials import Credentials
 from google.oauth2 import service_account
 from magika import Magika
 from PIL import Image
-from pydantic import BaseModel
 from pytz import country_names, country_timezones
 from khoj.utils import constants
@@ -337,85 +334,6 @@ def is_e2b_code_sandbox_enabled():
     return not is_none_or_empty(os.getenv("E2B_API_KEY"))
-class ToolDefinition:
-    def __init__(self, name: str, description: str, schema: dict):
-        self.name = name
-        self.description = description
-        self.schema = schema
-def create_tool_definition(
-    schema: Type[BaseModel],
-    name: str = None,
-    description: Optional[str] = None,
-) -> ToolDefinition:
-    """
-    Converts a response schema BaseModel class into a normalized tool definition.
-    A standard AI provider agnostic tool format to specify tools the model can use.
-    Common logic used across models is kept here. AI provider specific adaptations
-    should be handled in provider code.
-    Args:
-        response_schema: The Pydantic BaseModel class to convert.
-                         This class defines the response schema for the tool.
-        tool_name: The name for the AI model tool (e.g., "get_weather", "plan_next_step").
-        tool_description: Optional description for the AI model tool.
-                           If None, it attempts to use the Pydantic model's docstring.
-                           If that's also missing, a fallback description is generated.
-    Returns:
-        A normalized tool definition for AI model APIs.
-    """
-    raw_schema_dict = schema.model_json_schema()
-    name = name or schema.__name__.lower()
-    description = description
-    if description is None:
-        docstring = schema.__doc__
-        if docstring:
-            description = dedent(docstring).strip()
-        else:
-            # Fallback description if no explicit one or docstring is provided
-            description = f"Tool named '{name}' accepts specified parameters."
-    # Process properties to inline enums and remove $defs dependency
-    processed_properties = {}
-    original_properties = raw_schema_dict.get("properties", {})
-    defs = raw_schema_dict.get("$defs", {})
-    for prop_name, prop_schema in original_properties.items():
-        current_prop_schema = deepcopy(prop_schema)  # Work on a copy
-        # Check for enums defined directly in the property for simpler direct enum definitions.
-        if "$ref" in current_prop_schema:
-            ref_path = current_prop_schema["$ref"]
-            if ref_path.startswith("#/$defs/"):
-                def_name = ref_path.split("/")[-1]
-                if def_name in defs and "enum" in defs[def_name]:
-                    enum_def = defs[def_name]
-                    current_prop_schema["enum"] = enum_def["enum"]
-                    current_prop_schema["type"] = enum_def.get("type", "string")
-                    if "description" not in current_prop_schema and "description" in enum_def:
-                        current_prop_schema["description"] = enum_def["description"]
-                    del current_prop_schema["$ref"]  # Remove the $ref as it's been inlined
-        processed_properties[prop_name] = current_prop_schema
-    # Generate the compiled schema dictionary for the tool definition.
-    compiled_schema = {
-        "type": "object",
-        "properties": processed_properties,
-        # Generate content in the order in which the schema properties were defined
-        "property_ordering": list(schema.model_fields.keys()),
-    }
-    # Include 'required' fields if specified in the Pydantic model
-    if "required" in raw_schema_dict and raw_schema_dict["required"]:
-        compiled_schema["required"] = raw_schema_dict["required"]
-    return ToolDefinition(name=name, description=description, schema=compiled_schema)
 class ConversationCommand(str, Enum):
     Default = "default"
     General = "general"
@@ -429,14 +347,6 @@ class ConversationCommand(str, Enum):
     Diagram = "diagram"
     Research = "research"
     Operator = "operator"
-    ViewFile = "view_file"
-    ListFiles = "list_files"
-    RegexSearchFiles = "regex_search_files"
-    SemanticSearchFiles = "semantic_search_files"
-    SearchWeb = "search_web"
-    ReadWebpage = "read_webpage"
-    RunCode = "run_code"
-    OperateComputer = "operate_computer"
 command_descriptions = {
@@ -450,9 +360,6 @@ command_descriptions = {
     ConversationCommand.Diagram: "Draw a flowchart, diagram, or any other visual representation best expressed with primitives like lines, rectangles, and text.",
     ConversationCommand.Research: "Do deep research on a topic. This will take longer than usual, but give a more detailed, comprehensive answer.",
     ConversationCommand.Operator: "Operate and perform tasks using a computer.",
-    ConversationCommand.ViewFile: "View the contents of a file with optional line range specification.",
-    ConversationCommand.ListFiles: "List files under a given path with optional glob pattern.",
-    ConversationCommand.RegexSearchFiles: "Search for lines in files matching regex pattern with an optional path prefix.",
 }
 command_descriptions_for_agent = {
@@ -478,186 +385,13 @@ tool_descriptions_for_llm = {
     ConversationCommand.Operator: "To use when you need to operate a computer to complete the task.",
 }
-tools_for_research_llm = {
-    ConversationCommand.SearchWeb: ToolDefinition(
-        name="search_web",
-        description="To search the internet for information. Useful to get a quick, broad overview from the internet. Provide all relevant context to ensure new searches, not in previous iterations, are performed. Max {max_search_queries} search queries allowed per iteration.",
-        schema={
-            "type": "object",
-            "properties": {
-                "query": {
-                    "type": "string",
-                    "description": "The query to search on the internet.",
-                },
-            },
-            "required": ["query"],
-        },
-    ),
-    ConversationCommand.ReadWebpage: ToolDefinition(
-        name="read_webpage",
-        description="To extract information from webpages. Useful for more detailed research from the internet. Usually used when you know the webpage links to refer to. Share upto {max_webpages_to_read} webpage links and what information to extract from them in your query.",
-        schema={
-            "type": "object",
-            "properties": {
-                "urls": {
-                    "type": "array",
-                    "items": {
-                        "type": "string",
-                    },
-                    "description": "The webpage URLs to extract information from.",
-                },
-                "query": {
-                    "type": "string",
-                    "description": "The query to extract information from the webpages.",
-                },
-            },
-            "required": ["urls", "query"],
-        },
-    ),
-    ConversationCommand.RunCode: ToolDefinition(
-        name="run_code",
-        description=e2b_tool_description if is_e2b_code_sandbox_enabled() else terrarium_tool_description,
-        schema={
-            "type": "object",
-            "properties": {
-                "query": {
-                    "type": "string",
-                    "description": "Detailed query and all input data required to generate, execute code in the sandbox.",
-                },
-            },
-            "required": ["query"],
-        },
-    ),
-    ConversationCommand.OperateComputer: ToolDefinition(
-        name="operate_computer",
-        description="To operate a computer to complete the task.",
-        schema={
-            "type": "object",
-            "properties": {
-                "query": {
-                    "type": "string",
-                    "description": "The task to perform on the computer.",
-                },
-            },
-            "required": ["query"],
-        },
-    ),
-    ConversationCommand.ViewFile: ToolDefinition(
-        name="view_file",
-        description=dedent(
-            """
-            To view the contents of specific note or document in the user's personal knowledge base.
-            Especially helpful if the question expects context from the user's notes or documents.
-            It can be used after finding the document path with the document search tool.
-            Optionally specify a line range to view only specific sections of large files.
-            """
-        ).strip(),
-        schema={
-            "type": "object",
-            "properties": {
-                "path": {
-                    "type": "string",
-                    "description": "The file path to view (can be absolute or relative).",
-                },
-                "start_line": {
-                    "type": "integer",
-                    "description": "Optional starting line number for viewing a specific range (1-indexed).",
-                },
-                "end_line": {
-                    "type": "integer",
-                    "description": "Optional ending line number for viewing a specific range (1-indexed).",
-                },
-            },
-            "required": ["path"],
-        },
-    ),
-    ConversationCommand.ListFiles: ToolDefinition(
-        name="list_files",
-        description=dedent(
-            """
-            To list files in the user's knowledge base.
-            Use the path parameter to only show files under the specified path.
-            """
-        ).strip(),
-        schema={
-            "type": "object",
-            "properties": {
-                "path": {
-                    "type": "string",
-                    "description": "The directory path to list files from.",
-                },
-                "pattern": {
-                    "type": "string",
-                    "description": "Optional glob pattern to filter files (e.g., '*.md').",
-                },
-            },
-        },
-    ),
-    ConversationCommand.SemanticSearchFiles: ToolDefinition(
-        name="semantic_search_files",
-        description=dedent(
-            """
-            To have the tool AI semantic search through the user's knowledge base.
-            Helpful to answer questions for which finding some relevant notes or documents can complete the search. Example: "When was Tom born?"
-            This tool AI cannot find all relevant notes or documents, only a subset of them.
-            It is a good starting point to find keywords, discover similar topics or related concepts and some relevant notes or documents.
-            The tool AI can perform a maximum of {max_search_queries} semantic search queries per iteration.
-            """
-        ).strip(),
-        schema={
-            "type": "object",
-            "properties": {
-                "q": {
-                    "type": "string",
-                    "description": "Your natural language query for the tool to search in the user's knowledge base.",
-                },
-            },
-            "required": ["q"],
-        },
-    ),
-    ConversationCommand.RegexSearchFiles: ToolDefinition(
-        name="regex_search_files",
-        description=dedent(
-            """
-            To search through the user's knowledge base using regex patterns. Returns all lines matching the pattern.
-            Helpful to answer questions for which all relevant notes or documents are needed to complete the search. Example: "Notes that mention Tom".
-            You need to know all the correct keywords or regex patterns for this tool to be useful.
-            REMEMBER:
-            - The regex pattern will ONLY match content on a single line. Multi-line matches are NOT supported (even if you use \\n).
-            An optional path prefix can restrict search to specific files/directories.
-            Use lines_before, lines_after to show context around matches.
-            """
-        ).strip(),
-        schema={
-            "type": "object",
-            "properties": {
-                "regex_pattern": {
-                    "type": "string",
-                    "description": "The regex pattern to search for content in the user's files.",
-                },
-                "path_prefix": {
-                    "type": "string",
-                    "description": "Optional path prefix to limit the search to files under a specified path.",
-                },
-                "lines_before": {
-                    "type": "integer",
-                    "description": "Optional number of lines to show before each line match for context.",
-                    "minimum": 0,
-                    "maximum": 20,
-                },
-                "lines_after": {
-                    "type": "integer",
-                    "description": "Optional number of lines to show after each line match for context.",
-                    "minimum": 0,
-                    "maximum": 20,
-                },
-            },
-            "required": ["regex_pattern"],
-        },
-    ),
+tool_description_for_research_llm = {
+    ConversationCommand.Notes: "To search the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents. Max {max_search_queries} search queries allowed per iteration.",
+    ConversationCommand.Online: "To search the internet for information. Useful to get a quick, broad overview from the internet. Provide all relevant context to ensure new searches, not in previous iterations, are performed. Max {max_search_queries} search queries allowed per iteration.",
+    ConversationCommand.Webpage: "To extract information from webpages. Useful for more detailed research from the internet. Usually used when you know the webpage links to refer to. Share upto {max_webpages_to_read} webpage links and what information to extract from them in your query.",
+    ConversationCommand.Code: e2b_tool_description if is_e2b_code_sandbox_enabled() else terrarium_tool_description,
+    ConversationCommand.Text: "To respond to the user once you've completed your research and have the required information.",
+    ConversationCommand.Operator: "To operate a computer to complete the task.",
 }
 mode_descriptions_for_llm = {
@@ -1116,13 +850,3 @@ def clean_object_for_db(data):
         return [clean_object_for_db(item) for item in data]
     else:
         return data
-def dict_to_tuple(d):
-    # Recursively convert dicts to sorted tuples for hashability
-    if isinstance(d, dict):
-        return tuple(sorted((k, dict_to_tuple(v)) for k, v in d.items()))
-    elif isinstance(d, list):
-        return tuple(dict_to_tuple(i) for i in d)
-    else:
-        return d

khoj/utils/initialization.py CHANGED Viewed

@@ -235,6 +235,7 @@ def initialization(interactive: bool = True):
             chat_model_options = {
                 "name": chat_model,
+                "friendly_name": chat_model,
                 "model_type": model_type,
                 "max_prompt_size": default_max_tokens,
                 "vision_enabled": vision_enabled,
@@ -275,6 +276,7 @@ def initialization(interactive: bool = True):
                         if not existing_models.filter(name=model_name).exists():
                             ChatModel.objects.create(
                                 name=model_name,
+                                friendly_name=model_name,
                                 model_type=ChatModel.ModelType.OPENAI,
                                 max_prompt_size=model_to_prompt_size.get(model_name),
                                 vision_enabled=model_name in default_openai_chat_models,

khoj/utils/rawconfig.py CHANGED Viewed

@@ -176,7 +176,6 @@ class Entry:
     compiled: str
     heading: Optional[str]
     file: Optional[str]
-    uri: Optional[str] = None
     corpus_id: str
     def __init__(
@@ -185,7 +184,6 @@ class Entry:
         compiled: str = None,
         heading: Optional[str] = None,
         file: Optional[str] = None,
-        uri: Optional[str] = None,
         corpus_id: uuid.UUID = None,
     ):
         self.raw = raw
@@ -193,14 +191,6 @@ class Entry:
         self.heading = heading
         self.file = file
         self.corpus_id = str(corpus_id)
-        if uri:
-            self.uri = uri
-        elif file and (file.startswith("http") or file.startswith("file://")):
-            self.uri = file
-        elif file:
-            self.uri = f"file://{file}"
-        else:
-            self.uri = None
     def to_json(self) -> str:
         return json.dumps(self.__dict__, ensure_ascii=False)
@@ -216,5 +206,4 @@ class Entry:
             file=dictionary.get("file", None),
             heading=dictionary.get("heading", None),
             corpus_id=dictionary.get("corpus_id", None),
-            uri=dictionary.get("uri", None),
         )

{khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: khoj
-Version: 1.42.9.dev26
+Version: 1.42.10.dev2
 Summary: Your Second Brain
 Project-URL: Homepage, https://khoj.dev
 Project-URL: Documentation, https://docs.khoj.dev

khoj 1.42.9.dev26__py3-none-any.whl → 1.42.10.dev2__py3-none-any.whl

khoj 1.42.9.dev26py3-none-any.whl → 1.42.10.dev2py3-none-any.whl