PyPI - khoj - Versions diffs - 1.24.2.dev2__py3-none-any.whl → 1.24.2.dev16__py3-none-any.whl - Mend

khoj 1.24.2.dev2py3-none-any.whl → 1.24.2.dev16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

khoj/routers/api_content.py CHANGED Viewed

@@ -2,11 +2,13 @@ import asyncio
 import json
 import logging
 import math
+from concurrent.futures import ThreadPoolExecutor
 from typing import Dict, List, Optional, Union
 from asgiref.sync import sync_to_async
 from fastapi import (
     APIRouter,
+    BackgroundTasks,
     Depends,
     Header,
     HTTPException,
@@ -58,6 +60,8 @@ logger = logging.getLogger(__name__)
 api_content = APIRouter()
+executor = ThreadPoolExecutor()
 class File(BaseModel):
     path: str
@@ -77,6 +81,11 @@ class IndexerInput(BaseModel):
     docx: Optional[dict[str, bytes]] = None
+async def run_in_executor(func, *args):
+    loop = asyncio.get_event_loop()
+    return await loop.run_in_executor(executor, func, *args)
 @api_content.put("")
 @requires(["authenticated"])
 async def put_content(
@@ -209,6 +218,7 @@ async def set_content_github(
 @requires(["authenticated"])
 async def set_content_notion(
     request: Request,
+    background_tasks: BackgroundTasks,
     updated_config: Union[NotionContentConfig, None],
     client: Optional[str] = None,
 ):
@@ -225,6 +235,10 @@ async def set_content_notion(
         logger.error(e, exc_info=True)
         raise HTTPException(status_code=500, detail="Failed to set Notion config")
+    if updated_config.token:
+        # Trigger an async job to configure_content. Let it run without blocking the response.
+        background_tasks.add_task(run_in_executor, configure_content, {}, False, SearchType.Notion, user)
     update_telemetry_state(
         request=request,
         telemetry_type="api",

khoj/routers/helpers.py CHANGED Viewed

@@ -47,6 +47,7 @@ from khoj.database.adapters import (
     run_with_process_lock,
 )
 from khoj.database.models import (
+    Agent,
     ChatModelOptions,
     ClientApplication,
     Conversation,
@@ -257,8 +258,39 @@ async def acreate_title_from_query(query: str) -> str:
     return response.strip()
+async def acheck_if_safe_prompt(system_prompt: str) -> Tuple[bool, str]:
+    """
+    Check if the system prompt is safe to use
+    """
+    safe_prompt_check = prompts.personality_prompt_safety_expert.format(prompt=system_prompt)
+    is_safe = True
+    reason = ""
+    with timer("Chat actor: Check if safe prompt", logger):
+        response = await send_message_to_model_wrapper(safe_prompt_check)
+        response = response.strip()
+        try:
+            response = json.loads(response)
+            is_safe = response.get("safe", "True") == "True"
+            if not is_safe:
+                reason = response.get("reason", "")
+        except Exception:
+            logger.error(f"Invalid response for checking safe prompt: {response}")
+    if not is_safe:
+        logger.error(f"Unsafe prompt: {system_prompt}. Reason: {reason}")
+    return is_safe, reason
 async def aget_relevant_information_sources(
-    query: str, conversation_history: dict, is_task: bool, subscribed: bool, uploaded_image_url: str = None
+    query: str,
+    conversation_history: dict,
+    is_task: bool,
+    subscribed: bool,
+    uploaded_image_url: str = None,
+    agent: Agent = None,
 ):
     """
     Given a query, determine which of the available tools the agent should use in order to answer appropriately.
@@ -267,19 +299,27 @@ async def aget_relevant_information_sources(
     tool_options = dict()
     tool_options_str = ""
+    agent_tools = agent.input_tools if agent else []
     for tool, description in tool_descriptions_for_llm.items():
         tool_options[tool.value] = description
-        tool_options_str += f'- "{tool.value}": "{description}"\n'
+        if len(agent_tools) == 0 or tool.value in agent_tools:
+            tool_options_str += f'- "{tool.value}": "{description}"\n'
     chat_history = construct_chat_history(conversation_history)
     if uploaded_image_url:
         query = f"[placeholder for user attached image]\n{query}"
+    personality_context = (
+        prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
+    )
     relevant_tools_prompt = prompts.pick_relevant_information_collection_tools.format(
         query=query,
         tools=tool_options_str,
         chat_history=chat_history,
+        personality_context=personality_context,
     )
     with timer("Chat actor: Infer information sources to refer", logger):
@@ -300,7 +340,10 @@ async def aget_relevant_information_sources(
         final_response = [] if not is_task else [ConversationCommand.AutomatedTask]
         for llm_suggested_tool in response:
-            if llm_suggested_tool in tool_options.keys():
+            # Add a double check to verify it's in the agent list, because the LLM sometimes gets confused by the tool options.
+            if llm_suggested_tool in tool_options.keys() and (
+                len(agent_tools) == 0 or llm_suggested_tool in agent_tools
+            ):
                 # Check whether the tool exists as a valid ConversationCommand
                 final_response.append(ConversationCommand(llm_suggested_tool))
@@ -313,7 +356,7 @@ async def aget_relevant_information_sources(
 async def aget_relevant_output_modes(
-    query: str, conversation_history: dict, is_task: bool = False, uploaded_image_url: str = None
+    query: str, conversation_history: dict, is_task: bool = False, uploaded_image_url: str = None, agent: Agent = None
 ):
     """
     Given a query, determine which of the available tools the agent should use in order to answer appropriately.
@@ -322,22 +365,30 @@ async def aget_relevant_output_modes(
     mode_options = dict()
     mode_options_str = ""
+    output_modes = agent.output_modes if agent else []
     for mode, description in mode_descriptions_for_llm.items():
         # Do not allow tasks to schedule another task
         if is_task and mode == ConversationCommand.Automation:
             continue
         mode_options[mode.value] = description
-        mode_options_str += f'- "{mode.value}": "{description}"\n'
+        if len(output_modes) == 0 or mode.value in output_modes:
+            mode_options_str += f'- "{mode.value}": "{description}"\n'
     chat_history = construct_chat_history(conversation_history)
     if uploaded_image_url:
         query = f"[placeholder for user attached image]\n{query}"
+    personality_context = (
+        prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
+    )
     relevant_mode_prompt = prompts.pick_relevant_output_mode.format(
         query=query,
         modes=mode_options_str,
         chat_history=chat_history,
+        personality_context=personality_context,
     )
     with timer("Chat actor: Infer output mode for chat response", logger):
@@ -352,7 +403,9 @@ async def aget_relevant_output_modes(
             return ConversationCommand.Text
         output_mode = response["output"]
-        if output_mode in mode_options.keys():
+        # Add a double check to verify it's in the agent list, because the LLM sometimes gets confused by the tool options.
+        if output_mode in mode_options.keys() and (len(output_modes) == 0 or output_mode in output_modes):
             # Check whether the tool exists as a valid ConversationCommand
             return ConversationCommand(output_mode)
@@ -364,7 +417,12 @@ async def aget_relevant_output_modes(
 async def infer_webpage_urls(
-    q: str, conversation_history: dict, location_data: LocationData, user: KhojUser, uploaded_image_url: str = None
+    q: str,
+    conversation_history: dict,
+    location_data: LocationData,
+    user: KhojUser,
+    uploaded_image_url: str = None,
+    agent: Agent = None,
 ) -> List[str]:
     """
     Infer webpage links from the given query
@@ -374,12 +432,17 @@ async def infer_webpage_urls(
     chat_history = construct_chat_history(conversation_history)
     utc_date = datetime.utcnow().strftime("%Y-%m-%d")
+    personality_context = (
+        prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
+    )
     online_queries_prompt = prompts.infer_webpages_to_read.format(
         current_date=utc_date,
         query=q,
         chat_history=chat_history,
         location=location,
         username=username,
+        personality_context=personality_context,
     )
     with timer("Chat actor: Infer webpage urls to read", logger):
@@ -400,7 +463,12 @@ async def infer_webpage_urls(
 async def generate_online_subqueries(
-    q: str, conversation_history: dict, location_data: LocationData, user: KhojUser, uploaded_image_url: str = None
+    q: str,
+    conversation_history: dict,
+    location_data: LocationData,
+    user: KhojUser,
+    uploaded_image_url: str = None,
+    agent: Agent = None,
 ) -> List[str]:
     """
     Generate subqueries from the given query
@@ -410,12 +478,17 @@ async def generate_online_subqueries(
     chat_history = construct_chat_history(conversation_history)
     utc_date = datetime.utcnow().strftime("%Y-%m-%d")
+    personality_context = (
+        prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
+    )
     online_queries_prompt = prompts.online_search_conversation_subqueries.format(
         current_date=utc_date,
         query=q,
         chat_history=chat_history,
         location=location,
         username=username,
+        personality_context=personality_context,
     )
     with timer("Chat actor: Generate online search subqueries", logger):
@@ -464,7 +537,7 @@ async def schedule_query(q: str, conversation_history: dict, uploaded_image_url:
         raise AssertionError(f"Invalid response for scheduling query: {raw_response}")
-async def extract_relevant_info(q: str, corpus: str, subscribed: bool) -> Union[str, None]:
+async def extract_relevant_info(q: str, corpus: str, subscribed: bool, agent: Agent = None) -> Union[str, None]:
     """
     Extract relevant information for a given query from the target corpus
     """
@@ -472,9 +545,14 @@ async def extract_relevant_info(q: str, corpus: str, subscribed: bool) -> Union[
     if is_none_or_empty(corpus) or is_none_or_empty(q):
         return None
+    personality_context = (
+        prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
+    )
     extract_relevant_information = prompts.extract_relevant_information.format(
         query=q,
         corpus=corpus.strip(),
+        personality_context=personality_context,
     )
     chat_model: ChatModelOptions = await ConversationAdapters.aget_default_conversation_config()
@@ -490,7 +568,7 @@ async def extract_relevant_info(q: str, corpus: str, subscribed: bool) -> Union[
 async def extract_relevant_summary(
-    q: str, corpus: str, subscribed: bool = False, uploaded_image_url: str = None
+    q: str, corpus: str, subscribed: bool = False, uploaded_image_url: str = None, agent: Agent = None
 ) -> Union[str, None]:
     """
     Extract relevant information for a given query from the target corpus
@@ -499,9 +577,14 @@ async def extract_relevant_summary(
     if is_none_or_empty(corpus) or is_none_or_empty(q):
         return None
+    personality_context = (
+        prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
+    )
     extract_relevant_information = prompts.extract_relevant_summary.format(
         query=q,
         corpus=corpus.strip(),
+        personality_context=personality_context,
     )
     chat_model: ChatModelOptions = await ConversationAdapters.aget_default_conversation_config()
@@ -526,12 +609,16 @@ async def generate_better_image_prompt(
     model_type: Optional[str] = None,
     subscribed: bool = False,
     uploaded_image_url: Optional[str] = None,
+    agent: Agent = None,
 ) -> str:
     """
     Generate a better image prompt from the given query
     """
     today_date = datetime.now(tz=timezone.utc).strftime("%Y-%m-%d, %A")
+    personality_context = (
+        prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
+    )
     model_type = model_type or TextToImageModelConfig.ModelType.OPENAI
     if location_data:
@@ -558,6 +645,7 @@ async def generate_better_image_prompt(
             current_date=today_date,
             references=user_references,
             online_results=simplified_online_results,
+            personality_context=personality_context,
         )
     elif model_type in [TextToImageModelConfig.ModelType.STABILITYAI, TextToImageModelConfig.ModelType.REPLICATE]:
         image_prompt = prompts.image_generation_improve_prompt_sd.format(
@@ -567,6 +655,7 @@ async def generate_better_image_prompt(
             current_date=today_date,
             references=user_references,
             online_results=simplified_online_results,
+            personality_context=personality_context,
         )
     chat_model: ChatModelOptions = await ConversationAdapters.aget_default_conversation_config()
@@ -651,15 +740,13 @@ async def send_message_to_model_wrapper(
             model_type=conversation_config.model_type,
         )
-        openai_response = send_message_to_model(
+        return send_message_to_model(
             messages=truncated_messages,
             api_key=api_key,
             model=chat_model,
             response_type=response_type,
             api_base_url=api_base_url,
         )
-        return openai_response
     elif model_type == ChatModelOptions.ModelType.ANTHROPIC:
         api_key = conversation_config.openai_config.api_key
         truncated_messages = generate_chatml_messages_with_context(
@@ -942,13 +1029,23 @@ class ApiUserRateLimiter:
         # Check if the user has exceeded the rate limit
         if subscribed and count_requests >= self.subscribed_requests:
+            logger.info(
+                f"Rate limit: {count_requests} requests in {self.window} seconds for user: {user}. Limit is {self.subscribed_requests} requests."
+            )
             raise HTTPException(status_code=429, detail="Slow down! Too Many Requests")
         if not subscribed and count_requests >= self.requests:
             if self.requests >= self.subscribed_requests:
+                logger.info(
+                    f"Rate limit: {count_requests} requests in {self.window} seconds for user: {user}. Limit is {self.subscribed_requests} requests."
+                )
                 raise HTTPException(
                     status_code=429,
                     detail="Slow down! Too Many Requests",
                 )
+            logger.info(
+                f"Rate limit: {count_requests} requests in {self.window} seconds for user: {user}. Limit is {self.subscribed_requests} requests."
+            )
             raise HTTPException(
                 status_code=429,
                 detail="We're glad you're enjoying Khoj! You've exceeded your usage limit for today. Come back tomorrow or subscribe to increase your usage limit via [your settings](https://app.khoj.dev/settings).",
@@ -986,6 +1083,9 @@ class ConversationCommandRateLimiter:
         ).acount()
         if subscribed and count_requests >= self.subscribed_rate_limit:
+            logger.info(
+                f"Rate limit: {count_requests} requests in 24 hours for user: {user}. Limit is {self.subscribed_rate_limit} requests."
+            )
             raise HTTPException(status_code=429, detail="Slow down! Too Many Requests")
         if not subscribed and count_requests >= self.trial_rate_limit:
             raise HTTPException(

khoj/search_type/text_search.py CHANGED Viewed

@@ -1,13 +1,14 @@
 import logging
 import math
 from pathlib import Path
-from typing import List, Tuple, Type, Union
+from typing import List, Optional, Tuple, Type, Union
 import torch
 from asgiref.sync import sync_to_async
 from sentence_transformers import util
 from khoj.database.adapters import EntryAdapters, get_user_search_model_or_default
+from khoj.database.models import Agent
 from khoj.database.models import Entry as DbEntry
 from khoj.database.models import KhojUser
 from khoj.processor.content.text_to_entries import TextToEntries
@@ -101,6 +102,7 @@ async def query(
     type: SearchType = SearchType.All,
     question_embedding: Union[torch.Tensor, None] = None,
     max_distance: float = None,
+    agent: Optional[Agent] = None,
 ) -> Tuple[List[dict], List[Entry]]:
     "Search for entries that answer the query"
@@ -129,6 +131,7 @@ async def query(
             file_type_filter=file_type,
             raw_query=raw_query,
             max_distance=max_distance,
+            agent=agent,
         ).all()
         hits = await sync_to_async(list)(hits)  # type: ignore[call-arg]

khoj/utils/helpers.py CHANGED Viewed

@@ -325,7 +325,15 @@ command_descriptions = {
     ConversationCommand.Image: "Generate images by describing your imagination in words.",
     ConversationCommand.Automation: "Automatically run your query at a specified time or interval.",
     ConversationCommand.Help: "Get help with how to use or setup Khoj from the documentation",
-    ConversationCommand.Summarize: "Create an appropriate summary using provided documents.",
+    ConversationCommand.Summarize: "Get help with a question pertaining to an entire document.",
+}
+command_descriptions_for_agent = {
+    ConversationCommand.General: "Respond without any outside information or personal knowledge.",
+    ConversationCommand.Notes: "Search through the knowledge base. Required if the agent expects context from the knowledge base.",
+    ConversationCommand.Online: "Search for the latest, up-to-date information from the internet.",
+    ConversationCommand.Webpage: "Scrape specific web pages for information.",
+    ConversationCommand.Summarize: "Retrieve an answer that depends on the entire document or a large text. Knowledge base must be a single document.",
 }
 tool_descriptions_for_llm = {
@@ -334,7 +342,7 @@ tool_descriptions_for_llm = {
     ConversationCommand.Notes: "To search the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents.",
     ConversationCommand.Online: "To search for the latest, up-to-date information from the internet. Note: **Questions about Khoj should always use this data source**",
     ConversationCommand.Webpage: "To use if the user has directly provided the webpage urls or you are certain of the webpage urls to read.",
-    ConversationCommand.Summarize: "To create a summary of the document provided by the user.",
+    ConversationCommand.Summarize: "To retrieve an answer that depends on the entire document or a large text.",
 }
 mode_descriptions_for_llm = {
@@ -343,6 +351,11 @@ mode_descriptions_for_llm = {
     ConversationCommand.Text: "Use this if the other response modes don't seem to fit the query.",
 }
+mode_descriptions_for_agent = {
+    ConversationCommand.Image: "Allow the agent to generate images.",
+    ConversationCommand.Text: "Allow the agent to generate text.",
+}
 class ImageIntentType(Enum):
     """

{khoj-1.24.2.dev2.dist-info → khoj-1.24.2.dev16.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: khoj
-Version: 1.24.2.dev2
+Version: 1.24.2.dev16
 Summary: Your Second Brain
 Project-URL: Homepage, https://khoj.dev
 Project-URL: Documentation, https://docs.khoj.dev
@@ -169,10 +169,3 @@ Made with [contrib.rocks](https://contrib.rocks).
 ### Interested in Contributing?
 We are always looking for contributors to help us build new features, improve the project documentation, or fix bugs. If you're interested, please see our [Contributing Guidelines](https://docs.khoj.dev/contributing/development) and check out our [Contributors Project Board](https://github.com/orgs/khoj-ai/projects/4).
-## [Sponsors](https://github.com/sponsors/khoj-ai)
-Shout out to our brilliant sponsors! 🌈
-<a href="http://github.com/beekeeb">
-  <img src="https://raw.githubusercontent.com/beekeeb/piantor/main/docs/beekeeb.png" width=250/>
-</a>

khoj 1.24.2.dev2__py3-none-any.whl → 1.24.2.dev16__py3-none-any.whl

khoj 1.24.2.dev2py3-none-any.whl → 1.24.2.dev16py3-none-any.whl