PyPI - khoj - Versions diffs - 1.28.3__py3-none-any.whl → 1.28.4.dev92__py3-none-any.whl - Mend

khoj 1.28.3py3-none-any.whl → 1.28.4.dev92py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (134) hide show

khoj/routers/research.py CHANGED Viewed

@@ -11,6 +11,7 @@ from khoj.processor.conversation import prompts
 from khoj.processor.conversation.utils import (
     InformationCollectionIteration,
     clean_json,
+    construct_chat_history,
     construct_iteration_history,
     construct_tool_chat_history,
 )
@@ -19,8 +20,6 @@ from khoj.processor.tools.run_code import run_code
 from khoj.routers.api import extract_references_and_questions
 from khoj.routers.helpers import (
     ChatEvent,
-    construct_chat_history,
-    extract_relevant_info,
     generate_summary_from_files,
     send_message_to_model_wrapper,
 )
@@ -43,38 +42,36 @@ async def apick_next_tool(
     location: LocationData = None,
     user_name: str = None,
     agent: Agent = None,
-    previous_iterations_history: str = None,
+    previous_iterations: List[InformationCollectionIteration] = [],
     max_iterations: int = 5,
     send_status_func: Optional[Callable] = None,
     tracer: dict = {},
+    query_files: str = None,
 ):
-    """
-    Given a query, determine which of the available tools the agent should use in order to answer appropriately. One at a time, and it's able to use subsequent iterations to refine the answer.
-    """
+    """Given a query, determine which of the available tools the agent should use in order to answer appropriately."""
+    # Construct tool options for the agent to choose from
     tool_options = dict()
     tool_options_str = ""
     agent_tools = agent.input_tools if agent else []
     for tool, description in function_calling_description_for_llm.items():
         tool_options[tool.value] = description
         if len(agent_tools) == 0 or tool.value in agent_tools:
             tool_options_str += f'- "{tool.value}": "{description}"\n'
+    # Construct chat history with user and iteration history with researcher agent for context
     chat_history = construct_chat_history(conversation_history, agent_name=agent.name if agent else "Khoj")
+    previous_iterations_history = construct_iteration_history(previous_iterations, prompts.previous_iteration)
     if query_images:
         query = f"[placeholder for user attached images]\n{query}"
+    today = datetime.today()
+    location_data = f"{location}" if location else "Unknown"
     personality_context = (
         prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
     )
-    # Extract Past User Message and Inferred Questions from Conversation Log
-    today = datetime.today()
-    location_data = f"{location}" if location else "Unknown"
     function_planning_prompt = prompts.plan_function_execution.format(
         tools=tool_options_str,
         chat_history=chat_history,
@@ -87,15 +84,25 @@ async def apick_next_tool(
         max_iterations=max_iterations,
     )
-    with timer("Chat actor: Infer information sources to refer", logger):
-        response = await send_message_to_model_wrapper(
-            query=query,
-            context=function_planning_prompt,
-            response_type="json_object",
-            user=user,
-            query_images=query_images,
-            tracer=tracer,
+    try:
+        with timer("Chat actor: Infer information sources to refer", logger):
+            response = await send_message_to_model_wrapper(
+                query=query,
+                context=function_planning_prompt,
+                response_type="json_object",
+                user=user,
+                query_images=query_images,
+                query_files=query_files,
+                tracer=tracer,
+            )
+    except Exception as e:
+        logger.error(f"Failed to infer information sources to refer: {e}", exc_info=True)
+        yield InformationCollectionIteration(
+            tool=None,
+            query=None,
+            warning="Failed to infer information sources to refer. Skipping iteration. Try again.",
         )
+        return
     try:
         response = clean_json(response)
@@ -103,8 +110,15 @@ async def apick_next_tool(
         selected_tool = response.get("tool", None)
         generated_query = response.get("query", None)
         scratchpad = response.get("scratchpad", None)
+        warning = None
         logger.info(f"Response for determining relevant tools: {response}")
-        if send_status_func:
+        # Detect selection of previously used query, tool combination.
+        previous_tool_query_combinations = {(i.tool, i.query) for i in previous_iterations}
+        if (selected_tool, generated_query) in previous_tool_query_combinations:
+            warning = f"Repeated tool, query combination detected. Skipping iteration. Try something different."
+        # Only send client status updates if we'll execute this iteration
+        elif send_status_func:
             determined_tool_message = "**Determined Tool**: "
             determined_tool_message += f"{selected_tool}({generated_query})." if selected_tool else "respond."
             determined_tool_message += f"\nReason: {scratchpad}" if scratchpad else ""
@@ -114,13 +128,14 @@ async def apick_next_tool(
         yield InformationCollectionIteration(
             tool=selected_tool,
             query=generated_query,
+            warning=warning,
         )
     except Exception as e:
         logger.error(f"Invalid response for determining relevant tools: {response}. {e}", exc_info=True)
         yield InformationCollectionIteration(
             tool=None,
             query=None,
+            warning=f"Invalid response for determining relevant tools: {response}. Skipping iteration. Fix error: {e}",
         )
@@ -137,6 +152,7 @@ async def execute_information_collection(
     location: LocationData = None,
     file_filters: List[str] = [],
     tracer: dict = {},
+    query_files: str = None,
 ):
     current_iteration = 0
     MAX_ITERATIONS = 5
@@ -147,7 +163,6 @@ async def execute_information_collection(
         document_results: List[Dict[str, str]] = []
         summarize_files: str = ""
         this_iteration = InformationCollectionIteration(tool=None, query=query)
-        previous_iterations_history = construct_iteration_history(previous_iterations, prompts.previous_iteration)
         async for result in apick_next_tool(
             query,
@@ -157,19 +172,27 @@ async def execute_information_collection(
             location,
             user_name,
             agent,
-            previous_iterations_history,
+            previous_iterations,
             MAX_ITERATIONS,
             send_status_func,
             tracer=tracer,
+            query_files=query_files,
         ):
             if isinstance(result, dict) and ChatEvent.STATUS in result:
                 yield result[ChatEvent.STATUS]
             elif isinstance(result, InformationCollectionIteration):
                 this_iteration = result
-        if this_iteration.tool == ConversationCommand.Notes:
+        # Skip running iteration if warning present in iteration
+        if this_iteration.warning:
+            logger.warning(f"Research mode: {this_iteration.warning}.")
+        elif this_iteration.tool == ConversationCommand.Notes:
             this_iteration.context = []
             document_results = []
+            previous_inferred_queries = {
+                c["query"] for iteration in previous_iterations if iteration.context for c in iteration.context
+            }
             async for result in extract_references_and_questions(
                 request,
                 construct_tool_chat_history(previous_iterations, ConversationCommand.Notes),
@@ -181,8 +204,10 @@ async def execute_information_collection(
                 location,
                 send_status_func,
                 query_images,
+                previous_inferred_queries=previous_inferred_queries,
                 agent=agent,
                 tracer=tracer,
+                query_files=query_files,
             ):
                 if isinstance(result, dict) and ChatEvent.STATUS in result:
                     yield result[ChatEvent.STATUS]
@@ -204,6 +229,12 @@ async def execute_information_collection(
                     logger.error(f"Error extracting document references: {e}", exc_info=True)
         elif this_iteration.tool == ConversationCommand.Online:
+            previous_subqueries = {
+                subquery
+                for iteration in previous_iterations
+                if iteration.onlineContext
+                for subquery in iteration.onlineContext.keys()
+            }
             async for result in search_online(
                 this_iteration.query,
                 construct_tool_chat_history(previous_iterations, ConversationCommand.Online),
@@ -213,11 +244,16 @@ async def execute_information_collection(
                 [],
                 max_webpages_to_read=0,
                 query_images=query_images,
+                previous_subqueries=previous_subqueries,
                 agent=agent,
                 tracer=tracer,
             ):
                 if isinstance(result, dict) and ChatEvent.STATUS in result:
                     yield result[ChatEvent.STATUS]
+                elif is_none_or_empty(result):
+                    this_iteration.warning = (
+                        "Detected previously run online search queries. Skipping iteration. Try something different."
+                    )
                 else:
                     online_results: Dict[str, Dict] = result  # type: ignore
                     this_iteration.onlineContext = online_results
@@ -233,6 +269,7 @@ async def execute_information_collection(
                     query_images=query_images,
                     agent=agent,
                     tracer=tracer,
+                    query_files=query_files,
                 ):
                     if isinstance(result, dict) and ChatEvent.STATUS in result:
                         yield result[ChatEvent.STATUS]
@@ -263,6 +300,7 @@ async def execute_information_collection(
                     send_status_func,
                     query_images=query_images,
                     agent=agent,
+                    query_files=query_files,
                     tracer=tracer,
                 ):
                     if isinstance(result, dict) and ChatEvent.STATUS in result:
@@ -288,6 +326,7 @@ async def execute_information_collection(
                     query_images=query_images,
                     agent=agent,
                     send_status_func=send_status_func,
+                    query_files=query_files,
                 ):
                     if isinstance(result, dict) and ChatEvent.STATUS in result:
                         yield result[ChatEvent.STATUS]
@@ -302,16 +341,19 @@ async def execute_information_collection(
         current_iteration += 1
-        if document_results or online_results or code_results or summarize_files:
-            results_data = f"**Results**:\n"
+        if document_results or online_results or code_results or summarize_files or this_iteration.warning:
+            results_data = f"\n<iteration>{current_iteration}\n<tool>{this_iteration.tool}</tool>\n<query>{this_iteration.query}</query>\n<results>"
             if document_results:
-                results_data += f"**Document References**:\n{yaml.dump(document_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
+                results_data += f"\n<document_references>\n{yaml.dump(document_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</document_references>"
             if online_results:
-                results_data += f"**Online Results**:\n{yaml.dump(online_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
+                results_data += f"\n<online_results>\n{yaml.dump(online_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</online_results>"
             if code_results:
-                results_data += f"**Code Results**:\n{yaml.dump(code_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
+                results_data += f"\n<code_results>\n{yaml.dump(code_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</code_results>"
             if summarize_files:
-                results_data += f"**Summarized Files**:\n{yaml.dump(summarize_files, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
+                results_data += f"\n<summarized_files>\n{yaml.dump(summarize_files, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</summarized_files>"
+            if this_iteration.warning:
+                results_data += f"\n<warning>\n{this_iteration.warning}\n</warning>"
+            results_data += "\n</results>\n</iteration>"
             # intermediate_result = await extract_relevant_info(this_iteration.query, results_data, agent)
             this_iteration.summarizedResult = results_data

khoj/routers/web_client.py CHANGED Viewed

@@ -51,16 +51,6 @@ def chat_page(request: Request):
     )
-@web_client.get("/factchecker", response_class=FileResponse)
-def fact_checker_page(request: Request):
-    return templates.TemplateResponse(
-        "factchecker/index.html",
-        context={
-            "request": request,
-        },
-    )
 @web_client.get("/login", response_class=FileResponse)
 def login_page(request: Request):
     next_url = get_next_url(request)

khoj/search_type/text_search.py CHANGED Viewed

@@ -8,11 +8,7 @@ import torch
 from asgiref.sync import sync_to_async
 from sentence_transformers import util
-from khoj.database.adapters import (
-    EntryAdapters,
-    get_default_search_model,
-    get_user_default_search_model,
-)
+from khoj.database.adapters import EntryAdapters, get_default_search_model
 from khoj.database.models import Agent
 from khoj.database.models import Entry as DbEntry
 from khoj.database.models import KhojUser
@@ -114,7 +110,7 @@ async def query(
     file_type = search_type_to_embeddings_type[type.value]
     query = raw_query
-    search_model = await sync_to_async(get_user_default_search_model)(user)
+    search_model = await sync_to_async(get_default_search_model)()
     if not max_distance:
         if search_model.bi_encoder_confidence_threshold:
             max_distance = search_model.bi_encoder_confidence_threshold
@@ -212,7 +208,7 @@ def setup(
     text_to_entries: Type[TextToEntries],
     files: dict[str, str],
     regenerate: bool,
-    user: KhojUser = None,
+    user: KhojUser,
     config=None,
 ) -> Tuple[int, int]:
     if config:

khoj/utils/cli.py CHANGED Viewed

@@ -16,7 +16,7 @@ from khoj.migrations.migrate_processor_config_openai import (
 )
 from khoj.migrations.migrate_server_pg import migrate_server_pg
 from khoj.migrations.migrate_version import migrate_config_to_version
-from khoj.utils.helpers import in_debug_mode, resolve_absolute_path
+from khoj.utils.helpers import in_debug_mode, is_env_var_true, resolve_absolute_path
 from khoj.utils.yaml import parse_config_from_file
@@ -79,7 +79,7 @@ def cli(args=None):
     else:
         args = run_migrations(args)
         args.config = parse_config_from_file(args.config_file)
-        if in_debug_mode():
+        if is_env_var_true("KHOJ_TELEMETRY_DISABLE") or in_debug_mode():
             args.config.app.should_log_telemetry = False
     return args

khoj/utils/fs_syncer.py CHANGED Viewed

@@ -8,6 +8,7 @@ from bs4 import BeautifulSoup
 from magika import Magika
 from khoj.database.models import (
+    KhojUser,
     LocalMarkdownConfig,
     LocalOrgConfig,
     LocalPdfConfig,
@@ -21,7 +22,7 @@ logger = logging.getLogger(__name__)
 magika = Magika()
-def collect_files(search_type: Optional[SearchType] = SearchType.All, user=None) -> dict:
+def collect_files(user: KhojUser, search_type: Optional[SearchType] = SearchType.All) -> dict:
     files: dict[str, dict] = {"docx": {}, "image": {}}
     if search_type == SearchType.All or search_type == SearchType.Org:

khoj/utils/helpers.py CHANGED Viewed

@@ -254,8 +254,10 @@ def get_server_id():
     return server_id
-def telemetry_disabled(app_config: AppConfig):
-    return not app_config or not app_config.should_log_telemetry
+def telemetry_disabled(app_config: AppConfig, telemetry_disable_env) -> bool:
+    return (
+        not app_config.should_log_telemetry if app_config and app_config.should_log_telemetry else telemetry_disable_env
+    )
 def log_telemetry(
@@ -263,11 +265,12 @@ def log_telemetry(
     api: str = None,
     client: Optional[str] = None,
     app_config: Optional[AppConfig] = None,
+    disable_telemetry_env: bool = False,
     properties: dict = None,
 ):
     """Log basic app usage telemetry like client, os, api called"""
     # Do not log usage telemetry, if telemetry is disabled via app config
-    if telemetry_disabled(app_config):
+    if telemetry_disabled(app_config, disable_telemetry_env):
         return []
     if properties.get("server_id") is None:

khoj/utils/rawconfig.py CHANGED Viewed

@@ -138,6 +138,38 @@ class SearchResponse(ConfigBase):
     corpus_id: str
+class FileData(BaseModel):
+    name: str
+    content: bytes
+    file_type: str
+    encoding: str | None = None
+class FileAttachment(BaseModel):
+    name: str
+    content: str
+    file_type: str
+    size: int
+class ChatRequestBody(BaseModel):
+    q: str
+    n: Optional[int] = 7
+    d: Optional[float] = None
+    stream: Optional[bool] = False
+    title: Optional[str] = None
+    conversation_id: Optional[str] = None
+    turn_id: Optional[str] = None
+    city: Optional[str] = None
+    region: Optional[str] = None
+    country: Optional[str] = None
+    country_code: Optional[str] = None
+    timezone: Optional[str] = None
+    images: Optional[list[str]] = None
+    files: Optional[list[FileAttachment]] = []
+    create_new: Optional[bool] = False
 class Entry:
     raw: str
     compiled: str

khoj/utils/state.py CHANGED Viewed

@@ -12,7 +12,7 @@ from khoj.database.models import ProcessLock
 from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel
 from khoj.utils import config as utils_config
 from khoj.utils.config import OfflineChatProcessorModel, SearchModels
-from khoj.utils.helpers import LRU, get_device
+from khoj.utils.helpers import LRU, get_device, is_env_var_true
 from khoj.utils.rawconfig import FullConfig
 # Application Global State
@@ -34,6 +34,7 @@ SearchType = utils_config.SearchType
 scheduler: BackgroundScheduler = None
 schedule_leader_process_lock: ProcessLock = None
 telemetry: List[Dict[str, str]] = []
+telemetry_disabled: bool = is_env_var_true("KHOJ_TELEMETRY_DISABLE")
 khoj_version: str = None
 device = get_device()
 chat_on_gpu: bool = True

{khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/METADATA RENAMED Viewed

@@ -1,13 +1,11 @@
 Metadata-Version: 2.3
 Name: khoj
-Version: 1.28.3
+Version: 1.28.4.dev92
 Summary: Your Second Brain
 Project-URL: Homepage, https://khoj.dev
 Project-URL: Documentation, https://docs.khoj.dev
 Project-URL: Code, https://github.com/khoj-ai/khoj
 Author: Debanjum Singh Solanky, Saba Imran
-License-Expression: AGPL-3.0-or-later
-License-File: LICENSE
 Keywords: AI,NLP,images,markdown,org-mode,pdf,productivity,search,semantic-search
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Information Technology
@@ -76,12 +74,14 @@ Requires-Dist: websockets==12.0
 Provides-Extra: dev
 Requires-Dist: black>=23.1.0; extra == 'dev'
 Requires-Dist: boto3>=1.34.57; extra == 'dev'
+Requires-Dist: datasets; extra == 'dev'
 Requires-Dist: factory-boy>=3.2.1; extra == 'dev'
 Requires-Dist: freezegun>=1.2.0; extra == 'dev'
 Requires-Dist: gitpython~=3.1.43; extra == 'dev'
 Requires-Dist: google-auth==2.23.3; extra == 'dev'
 Requires-Dist: gunicorn==22.0.0; extra == 'dev'
 Requires-Dist: mypy>=1.0.1; extra == 'dev'
+Requires-Dist: pandas; extra == 'dev'
 Requires-Dist: pre-commit>=3.0.4; extra == 'dev'
 Requires-Dist: pytest-asyncio==0.21.1; extra == 'dev'
 Requires-Dist: pytest-django==4.5.2; extra == 'dev'

khoj 1.28.3__py3-none-any.whl → 1.28.4.dev92__py3-none-any.whl

khoj 1.28.3py3-none-any.whl → 1.28.4.dev92py3-none-any.whl