PyPI - khoj - Versions diffs - 1.29.2.dev5__py3-none-any.whl → 1.29.2.dev35__py3-none-any.whl - Mend

khoj 1.29.2.dev5py3-none-any.whl → 1.29.2.dev35py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

khoj/routers/api_chat.py CHANGED Viewed

@@ -46,8 +46,7 @@ from khoj.routers.helpers import (
     FeedbackData,
     acreate_title_from_history,
     agenerate_chat_response,
-    aget_relevant_information_sources,
-    aget_relevant_output_modes,
+    aget_relevant_tools_to_execute,
     construct_automation_created_message,
     create_automation,
     gather_raw_query_files,
@@ -753,7 +752,7 @@ async def chat(
         attached_file_context = gather_raw_query_files(query_files)
         if conversation_commands == [ConversationCommand.Default] or is_automated_task:
-            conversation_commands = await aget_relevant_information_sources(
+            conversation_commands = await aget_relevant_tools_to_execute(
                 q,
                 meta_log,
                 is_automated_task,
@@ -769,19 +768,9 @@ async def chat(
                 conversation_commands = [ConversationCommand.Research]
             conversation_commands_str = ", ".join([cmd.value for cmd in conversation_commands])
-            async for result in send_event(
-                ChatEvent.STATUS, f"**Chose Data Sources to Search:** {conversation_commands_str}"
-            ):
+            async for result in send_event(ChatEvent.STATUS, f"**Selected Tools:** {conversation_commands_str}"):
                 yield result
-            mode = await aget_relevant_output_modes(
-                q, meta_log, is_automated_task, user, uploaded_images, agent, tracer=tracer
-            )
-            async for result in send_event(ChatEvent.STATUS, f"**Decided Response Mode:** {mode.value}"):
-                yield result
-            if mode not in conversation_commands:
-                conversation_commands.append(mode)
         for cmd in conversation_commands:
             try:
                 await conversation_command_rate_limiter.update_and_check_if_valid(request, cmd)
@@ -1175,8 +1164,27 @@ async def chat(
                         inferred_queries.append(better_diagram_description_prompt)
                         diagram_description = excalidraw_diagram_description
                     else:
-                        async for result in send_llm_response(f"Failed to generate diagram. Please try again later."):
+                        error_message = "Failed to generate diagram. Please try again later."
+                        async for result in send_llm_response(error_message):
                             yield result
+                        await sync_to_async(save_to_conversation_log)(
+                            q,
+                            error_message,
+                            user,
+                            meta_log,
+                            user_message_time,
+                            inferred_queries=[better_diagram_description_prompt],
+                            client_application=request.user.client_app,
+                            conversation_id=conversation_id,
+                            compiled_references=compiled_references,
+                            online_results=online_results,
+                            code_results=code_results,
+                            query_images=uploaded_images,
+                            train_of_thought=train_of_thought,
+                            raw_query_files=raw_query_files,
+                            tracer=tracer,
+                        )
                         return
             content_obj = {

khoj/routers/helpers.py CHANGED Viewed

@@ -336,7 +336,7 @@ async def acheck_if_safe_prompt(system_prompt: str, user: KhojUser = None, lax:
     return is_safe, reason
-async def aget_relevant_information_sources(
+async def aget_relevant_tools_to_execute(
     query: str,
     conversation_history: dict,
     is_task: bool,
@@ -360,6 +360,19 @@ async def aget_relevant_information_sources(
         if len(agent_tools) == 0 or tool.value in agent_tools:
             tool_options_str += f'- "{tool.value}": "{description}"\n'
+    mode_options = dict()
+    mode_options_str = ""
+    output_modes = agent.output_modes if agent else []
+    for mode, description in mode_descriptions_for_llm.items():
+        # Do not allow tasks to schedule another task
+        if is_task and mode == ConversationCommand.Automation:
+            continue
+        mode_options[mode.value] = description
+        if len(output_modes) == 0 or mode.value in output_modes:
+            mode_options_str += f'- "{mode.value}": "{description}"\n'
     chat_history = construct_chat_history(conversation_history)
     if query_images:
@@ -369,9 +382,10 @@ async def aget_relevant_information_sources(
         prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
     )
-    relevant_tools_prompt = prompts.pick_relevant_information_collection_tools.format(
+    relevant_tools_prompt = prompts.pick_relevant_tools.format(
         query=query,
         tools=tool_options_str,
+        outputs=mode_options_str,
         chat_history=chat_history,
         personality_context=personality_context,
     )
@@ -388,13 +402,18 @@ async def aget_relevant_information_sources(
     try:
         response = clean_json(response)
         response = json.loads(response)
-        response = [q.strip() for q in response["source"] if q.strip()]
-        if not isinstance(response, list) or not response or len(response) == 0:
-            logger.error(f"Invalid response for determining relevant tools: {response}")
+        input_tools = [q.strip() for q in response["source"] if q.strip()]
+        if not isinstance(input_tools, list) or not input_tools or len(input_tools) == 0:
+            logger.error(f"Invalid response for determining relevant tools: {input_tools}")
             return tool_options
+        output_modes = [q.strip() for q in response["output"] if q.strip()]
+        if not isinstance(output_modes, list) or not output_modes or len(output_modes) == 0:
+            logger.error(f"Invalid response for determining relevant output modes: {output_modes}")
+            return mode_options
         final_response = [] if not is_task else [ConversationCommand.AutomatedTask]
-        for llm_suggested_tool in response:
+        for llm_suggested_tool in input_tools:
             # Add a double check to verify it's in the agent list, because the LLM sometimes gets confused by the tool options.
             if llm_suggested_tool in tool_options.keys() and (
                 len(agent_tools) == 0 or llm_suggested_tool in agent_tools
@@ -402,88 +421,28 @@ async def aget_relevant_information_sources(
                 # Check whether the tool exists as a valid ConversationCommand
                 final_response.append(ConversationCommand(llm_suggested_tool))
+        for llm_suggested_output in output_modes:
+            # Add a double check to verify it's in the agent list, because the LLM sometimes gets confused by the tool options.
+            if llm_suggested_output in mode_options.keys() and (
+                len(output_modes) == 0 or llm_suggested_output in output_modes
+            ):
+                # Check whether the tool exists as a valid ConversationCommand
+                final_response.append(ConversationCommand(llm_suggested_output))
         if is_none_or_empty(final_response):
             if len(agent_tools) == 0:
-                final_response = [ConversationCommand.Default]
+                final_response = [ConversationCommand.Default, ConversationCommand.Text]
             else:
-                final_response = [ConversationCommand.General]
+                final_response = [ConversationCommand.General, ConversationCommand.Text]
     except Exception:
         logger.error(f"Invalid response for determining relevant tools: {response}")
         if len(agent_tools) == 0:
-            final_response = [ConversationCommand.Default]
+            final_response = [ConversationCommand.Default, ConversationCommand.Text]
         else:
             final_response = agent_tools
     return final_response
-async def aget_relevant_output_modes(
-    query: str,
-    conversation_history: dict,
-    is_task: bool = False,
-    user: KhojUser = None,
-    query_images: List[str] = None,
-    agent: Agent = None,
-    tracer: dict = {},
-):
-    """
-    Given a query, determine which of the available tools the agent should use in order to answer appropriately.
-    """
-    mode_options = dict()
-    mode_options_str = ""
-    output_modes = agent.output_modes if agent else []
-    for mode, description in mode_descriptions_for_llm.items():
-        # Do not allow tasks to schedule another task
-        if is_task and mode == ConversationCommand.Automation:
-            continue
-        mode_options[mode.value] = description
-        if len(output_modes) == 0 or mode.value in output_modes:
-            mode_options_str += f'- "{mode.value}": "{description}"\n'
-    chat_history = construct_chat_history(conversation_history)
-    if query_images:
-        query = f"[placeholder for {len(query_images)} user attached images]\n{query}"
-    personality_context = (
-        prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
-    )
-    relevant_mode_prompt = prompts.pick_relevant_output_mode.format(
-        query=query,
-        modes=mode_options_str,
-        chat_history=chat_history,
-        personality_context=personality_context,
-    )
-    with timer("Chat actor: Infer output mode for chat response", logger):
-        response = await send_message_to_model_wrapper(
-            relevant_mode_prompt, response_type="json_object", user=user, tracer=tracer
-        )
-    try:
-        response = clean_json(response)
-        response = json.loads(response)
-        if is_none_or_empty(response):
-            return ConversationCommand.Text
-        output_mode = response["output"]
-        # Add a double check to verify it's in the agent list, because the LLM sometimes gets confused by the tool options.
-        if output_mode in mode_options.keys() and (len(output_modes) == 0 or output_mode in output_modes):
-            # Check whether the tool exists as a valid ConversationCommand
-            return ConversationCommand(output_mode)
-        logger.error(f"Invalid output mode selected: {output_mode}. Defaulting to text.")
-        return ConversationCommand.Text
-    except Exception:
-        logger.error(f"Invalid response for determining output mode: {response}")
-        return ConversationCommand.Text
 async def infer_webpage_urls(
     q: str,
     conversation_history: dict,

khoj/routers/research.py CHANGED Viewed

@@ -114,7 +114,7 @@ async def apick_next_tool(
         logger.info(f"Response for determining relevant tools: {response}")
         # Detect selection of previously used query, tool combination.
-        previous_tool_query_combinations = {(i.tool, i.query) for i in previous_iterations}
+        previous_tool_query_combinations = {(i.tool, i.query) for i in previous_iterations if i.warning is None}
         if (selected_tool, generated_query) in previous_tool_query_combinations:
             warning = f"Repeated tool, query combination detected. Skipping iteration. Try something different."
         # Only send client status updates if we'll execute this iteration
@@ -226,7 +226,8 @@ async def execute_information_collection(
                     ):
                         yield result
                 except Exception as e:
-                    logger.error(f"Error extracting document references: {e}", exc_info=True)
+                    this_iteration.warning = f"Error extracting document references: {e}"
+                    logger.error(this_iteration.warning, exc_info=True)
         elif this_iteration.tool == ConversationCommand.Online:
             previous_subqueries = {
@@ -235,28 +236,30 @@ async def execute_information_collection(
                 if iteration.onlineContext
                 for subquery in iteration.onlineContext.keys()
             }
-            async for result in search_online(
-                this_iteration.query,
-                construct_tool_chat_history(previous_iterations, ConversationCommand.Online),
-                location,
-                user,
-                send_status_func,
-                [],
-                max_webpages_to_read=0,
-                query_images=query_images,
-                previous_subqueries=previous_subqueries,
-                agent=agent,
-                tracer=tracer,
-            ):
-                if isinstance(result, dict) and ChatEvent.STATUS in result:
-                    yield result[ChatEvent.STATUS]
-                elif is_none_or_empty(result):
-                    this_iteration.warning = (
-                        "Detected previously run online search queries. Skipping iteration. Try something different."
-                    )
-                else:
-                    online_results: Dict[str, Dict] = result  # type: ignore
-                    this_iteration.onlineContext = online_results
+            try:
+                async for result in search_online(
+                    this_iteration.query,
+                    construct_tool_chat_history(previous_iterations, ConversationCommand.Online),
+                    location,
+                    user,
+                    send_status_func,
+                    [],
+                    max_webpages_to_read=0,
+                    query_images=query_images,
+                    previous_subqueries=previous_subqueries,
+                    agent=agent,
+                    tracer=tracer,
+                ):
+                    if isinstance(result, dict) and ChatEvent.STATUS in result:
+                        yield result[ChatEvent.STATUS]
+                    elif is_none_or_empty(result):
+                        this_iteration.warning = "Detected previously run online search queries. Skipping iteration. Try something different."
+                    else:
+                        online_results: Dict[str, Dict] = result  # type: ignore
+                        this_iteration.onlineContext = online_results
+            except Exception as e:
+                this_iteration.warning = f"Error searching online: {e}"
+                logger.error(this_iteration.warning, exc_info=True)
         elif this_iteration.tool == ConversationCommand.Webpage:
             try:
@@ -287,7 +290,8 @@ async def execute_information_collection(
                                 webpages.append(webpage["link"])
                         this_iteration.onlineContext = online_results
             except Exception as e:
-                logger.error(f"Error reading webpages: {e}", exc_info=True)
+                this_iteration.warning = f"Error reading webpages: {e}"
+                logger.error(this_iteration.warning, exc_info=True)
         elif this_iteration.tool == ConversationCommand.Code:
             try:
@@ -311,10 +315,8 @@ async def execute_information_collection(
                 async for result in send_status_func(f"**Ran code snippets**: {len(this_iteration.codeContext)}"):
                     yield result
             except ValueError as e:
-                logger.warning(
-                    f"Failed to use code tool: {e}. Attempting to respond without code results",
-                    exc_info=True,
-                )
+                this_iteration.warning = f"Error running code: {e}"
+                logger.warning(this_iteration.warning, exc_info=True)
         elif this_iteration.tool == ConversationCommand.Summarize:
             try:
@@ -333,7 +335,8 @@ async def execute_information_collection(
                     else:
                         summarize_files = result  # type: ignore
             except Exception as e:
-                logger.error(f"Error generating summary: {e}", exc_info=True)
+                this_iteration.warning = f"Error summarizing files: {e}"
+                logger.error(this_iteration.warning, exc_info=True)
         else:
             # No valid tools. This is our exit condition.

khoj/utils/constants.py CHANGED Viewed

@@ -16,7 +16,7 @@ default_offline_chat_models = [
 ]
 default_openai_chat_models = ["gpt-4o-mini", "gpt-4o"]
 default_gemini_chat_models = ["gemini-1.5-flash", "gemini-1.5-pro"]
-default_anthropic_chat_models = ["claude-3-5-sonnet-20240620", "claude-3-opus-20240229"]
+default_anthropic_chat_models = ["claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022"]
 empty_config = {
     "search-type": {

khoj/utils/helpers.py CHANGED Viewed

@@ -365,7 +365,7 @@ tool_descriptions_for_llm = {
     ConversationCommand.Notes: "To search the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents.",
     ConversationCommand.Online: "To search for the latest, up-to-date information from the internet. Note: **Questions about Khoj should always use this data source**",
     ConversationCommand.Webpage: "To use if the user has directly provided the webpage urls or you are certain of the webpage urls to read.",
-    ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse information, run complex calculations, create documents and charts for user. Matplotlib, bs4, pandas, numpy, etc. are available.",
+    ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse information, run complex calculations, create plaintext documents, and create charts with quantitative data. Matplotlib, bs4, pandas, numpy, etc. are available.",
     ConversationCommand.Summarize: "To retrieve an answer that depends on the entire document or a large text.",
 }
@@ -373,14 +373,13 @@ function_calling_description_for_llm = {
     ConversationCommand.Notes: "To search the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents.",
     ConversationCommand.Online: "To search the internet for information. Useful to get a quick, broad overview from the internet. Provide all relevant context to ensure new searches, not in previous iterations, are performed.",
     ConversationCommand.Webpage: "To extract information from webpages. Useful for more detailed research from the internet. Usually used when you know the webpage links to refer to. Share the webpage links and information to extract in your query.",
-    ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse information, run complex calculations, create charts for user. Matplotlib, bs4, pandas, numpy, etc. are available.",
+    ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse information, run complex calculations, create plaintext documents, and create charts with quantitative data. Matplotlib, bs4, pandas, numpy, etc. are available.",
 }
 mode_descriptions_for_llm = {
-    ConversationCommand.Image: "Use this if you are confident the user is requesting you to create a new picture based on their description. This does not support generating charts or graphs.",
-    ConversationCommand.Automation: "Use this if you are confident the user is requesting a response at a scheduled date, time and frequency",
-    ConversationCommand.Text: "Use this if a normal text response would be sufficient for accurately responding to the query.",
-    ConversationCommand.Diagram: "Use this if the user is requesting a diagram or visual representation that requires primitives like lines, rectangles, and text.",
+    ConversationCommand.Image: "Use this if you are confident the user is requesting you to create a new picture based on their description. This DOES NOT support generating charts or graphs. It is for creative images.",
+    ConversationCommand.Text: "Use this if a normal text response would be sufficient for accurately responding to the query or you don't feel strongly about the other modes.",
+    ConversationCommand.Diagram: "Use this if the user is requesting a diagram or visual representation that requires primitives like lines, rectangles, and text. This does not work for charts, graphs, or quantitative data. It is for mind mapping, flowcharts, etc.",
 }
 mode_descriptions_for_agent = {

khoj/utils/initialization.py CHANGED Viewed

@@ -2,12 +2,13 @@ import logging
 import os
 from typing import Tuple
+import openai
 from khoj.database.adapters import ConversationAdapters
 from khoj.database.models import (
     ChatModelOptions,
     KhojUser,
     OpenAIProcessorConversationConfig,
-    ServerChatSettings,
     SpeechToTextModelOptions,
     TextToImageModelConfig,
 )
@@ -42,14 +43,32 @@ def initialization(interactive: bool = True):
             "🗣️ Configure chat models available to your server. You can always update these at /server/admin using your admin account"
         )
+        openai_api_base = os.getenv("OPENAI_API_BASE")
+        provider = "Ollama" if openai_api_base and openai_api_base.endswith(":11434/v1/") else "OpenAI"
+        openai_api_key = os.getenv("OPENAI_API_KEY", "placeholder" if openai_api_base else None)
+        default_chat_models = default_openai_chat_models
+        if openai_api_base:
+            # Get available chat models from OpenAI compatible API
+            try:
+                openai_client = openai.OpenAI(api_key=openai_api_key, base_url=openai_api_base)
+                default_chat_models = [model.id for model in openai_client.models.list()]
+                # Put the available default OpenAI models at the top
+                valid_default_models = [model for model in default_openai_chat_models if model in default_chat_models]
+                other_available_models = [model for model in default_chat_models if model not in valid_default_models]
+                default_chat_models = valid_default_models + other_available_models
+            except Exception:
+                logger.warning(f"⚠️ Failed to fetch {provider} chat models. Fallback to default models. Error: {e}")
         # Set up OpenAI's online chat models
         openai_configured, openai_provider = _setup_chat_model_provider(
             ChatModelOptions.ModelType.OPENAI,
-            default_openai_chat_models,
-            default_api_key=os.getenv("OPENAI_API_KEY"),
+            default_chat_models,
+            default_api_key=openai_api_key,
+            api_base_url=openai_api_base,
             vision_enabled=True,
             is_offline=False,
             interactive=interactive,
+            provider_name=provider,
         )
         # Setup OpenAI speech to text model
@@ -87,7 +106,7 @@ def initialization(interactive: bool = True):
             ChatModelOptions.ModelType.GOOGLE,
             default_gemini_chat_models,
             default_api_key=os.getenv("GEMINI_API_KEY"),
-            vision_enabled=False,
+            vision_enabled=True,
             is_offline=False,
             interactive=interactive,
             provider_name="Google Gemini",
@@ -98,7 +117,7 @@ def initialization(interactive: bool = True):
             ChatModelOptions.ModelType.ANTHROPIC,
             default_anthropic_chat_models,
             default_api_key=os.getenv("ANTHROPIC_API_KEY"),
-            vision_enabled=False,
+            vision_enabled=True,
             is_offline=False,
             interactive=interactive,
         )
@@ -154,11 +173,14 @@ def initialization(interactive: bool = True):
         default_chat_models: list,
         default_api_key: str,
         interactive: bool,
+        api_base_url: str = None,
         vision_enabled: bool = False,
         is_offline: bool = False,
         provider_name: str = None,
     ) -> Tuple[bool, OpenAIProcessorConversationConfig]:
-        supported_vision_models = ["gpt-4o-mini", "gpt-4o"]
+        supported_vision_models = (
+            default_openai_chat_models + default_anthropic_chat_models + default_gemini_chat_models
+        )
         provider_name = provider_name or model_type.name.capitalize()
         default_use_model = {True: "y", False: "n"}[default_api_key is not None or is_offline]
         use_model_provider = (
@@ -170,14 +192,16 @@ def initialization(interactive: bool = True):
         logger.info(f"️💬 Setting up your {provider_name} chat configuration")
-        chat_model_provider = None
+        chat_provider = None
         if not is_offline:
             if interactive:
                 user_api_key = input(f"Enter your {provider_name} API key (default: {default_api_key}): ")
                 api_key = user_api_key if user_api_key != "" else default_api_key
             else:
                 api_key = default_api_key
-            chat_model_provider = OpenAIProcessorConversationConfig.objects.create(api_key=api_key, name=provider_name)
+            chat_provider = OpenAIProcessorConversationConfig.objects.create(
+                api_key=api_key, name=provider_name, api_base_url=api_base_url
+            )
         if interactive:
             chat_model_names = input(
@@ -199,13 +223,53 @@ def initialization(interactive: bool = True):
                 "max_prompt_size": default_max_tokens,
                 "vision_enabled": vision_enabled,
                 "tokenizer": default_tokenizer,
-                "openai_config": chat_model_provider,
+                "openai_config": chat_provider,
             }
             ChatModelOptions.objects.create(**chat_model_options)
         logger.info(f"🗣️ {provider_name} chat model configuration complete")
-        return True, chat_model_provider
+        return True, chat_provider
+    def _update_chat_model_options():
+        """Update available chat models for OpenAI-compatible APIs"""
+        try:
+            # Get OpenAI configs with custom base URLs
+            custom_configs = OpenAIProcessorConversationConfig.objects.exclude(api_base_url__isnull=True)
+            for config in custom_configs:
+                try:
+                    # Create OpenAI client with custom base URL
+                    openai_client = openai.OpenAI(api_key=config.api_key, base_url=config.api_base_url)
+                    # Get available models
+                    available_models = [model.id for model in openai_client.models.list()]
+                    # Get existing chat model options for this config
+                    existing_models = ChatModelOptions.objects.filter(
+                        openai_config=config, model_type=ChatModelOptions.ModelType.OPENAI
+                    )
+                    # Add new models
+                    for model in available_models:
+                        if not existing_models.filter(chat_model=model).exists():
+                            ChatModelOptions.objects.create(
+                                chat_model=model,
+                                model_type=ChatModelOptions.ModelType.OPENAI,
+                                max_prompt_size=model_to_prompt_size.get(model),
+                                vision_enabled=model in default_openai_chat_models,
+                                tokenizer=model_to_tokenizer.get(model),
+                                openai_config=config,
+                            )
+                    # Remove models that are no longer available
+                    existing_models.exclude(chat_model__in=available_models).delete()
+                except Exception as e:
+                    logger.warning(f"Failed to update models for {config.name}: {str(e)}")
+        except Exception as e:
+            logger.error(f"Failed to update chat model options: {str(e)}")
     admin_user = KhojUser.objects.filter(is_staff=True).first()
     if admin_user is None:
@@ -228,3 +292,6 @@ def initialization(interactive: bool = True):
                 return
             except Exception as e:
                 logger.error(f"🚨 Failed to create chat configuration: {e}", exc_info=True)
+    else:
+        _update_chat_model_options()
+        logger.info("🗣️ Chat model configuration updated")

{khoj-1.29.2.dev5.dist-info → khoj-1.29.2.dev35.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: khoj
-Version: 1.29.2.dev5
+Version: 1.29.2.dev35
 Summary: Your Second Brain
 Project-URL: Homepage, https://khoj.dev
 Project-URL: Documentation, https://docs.khoj.dev

khoj 1.29.2.dev5__py3-none-any.whl → 1.29.2.dev35__py3-none-any.whl

khoj 1.29.2.dev5py3-none-any.whl → 1.29.2.dev35py3-none-any.whl