PyPI - khoj - Versions diffs - 1.27.2.dev18__py3-none-any.whl → 1.27.2.dev130__py3-none-any.whl - Mend

khoj 1.27.2.dev18py3-none-any.whl → 1.27.2.dev130py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

khoj/processor/tools/online_search.py CHANGED Viewed

@@ -4,7 +4,7 @@ import logging
 import os
 import urllib.parse
 from collections import defaultdict
-from typing import Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 import aiohttp
 from bs4 import BeautifulSoup
@@ -52,7 +52,8 @@ OLOSTEP_QUERY_PARAMS = {
     "expandMarkdown": "True",
     "expandHtml": "False",
 }
-MAX_WEBPAGES_TO_READ = 1
+DEFAULT_MAX_WEBPAGES_TO_READ = 1
 async def search_online(
@@ -62,8 +63,10 @@ async def search_online(
     user: KhojUser,
     send_status_func: Optional[Callable] = None,
     custom_filters: List[str] = [],
+    max_webpages_to_read: int = DEFAULT_MAX_WEBPAGES_TO_READ,
     query_images: List[str] = None,
     agent: Agent = None,
+    tracer: dict = {},
 ):
     query += " ".join(custom_filters)
     if not is_internet_connected():
@@ -73,7 +76,7 @@ async def search_online(
     # Breakdown the query into subqueries to get the correct answer
     subqueries = await generate_online_subqueries(
-        query, conversation_history, location, user, query_images=query_images, agent=agent
+        query, conversation_history, location, user, query_images=query_images, agent=agent, tracer=tracer
     )
     response_dict = {}
@@ -96,7 +99,7 @@ async def search_online(
     for subquery in response_dict:
         if "answerBox" in response_dict[subquery]:
             continue
-        for organic in response_dict[subquery].get("organic", [])[:MAX_WEBPAGES_TO_READ]:
+        for organic in response_dict[subquery].get("organic", [])[:max_webpages_to_read]:
             link = organic.get("link")
             if link in webpages:
                 webpages[link]["queries"].add(subquery)
@@ -111,7 +114,7 @@ async def search_online(
             async for event in send_status_func(f"**Reading web pages**: {webpage_links_str}"):
                 yield {ChatEvent.STATUS: event}
     tasks = [
-        read_webpage_and_extract_content(data["queries"], link, data["content"], user=user, agent=agent)
+        read_webpage_and_extract_content(data["queries"], link, data["content"], user=user, agent=agent, tracer=tracer)
         for link, data in webpages.items()
     ]
     results = await asyncio.gather(*tasks)
@@ -153,6 +156,7 @@ async def read_webpages(
     send_status_func: Optional[Callable] = None,
     query_images: List[str] = None,
     agent: Agent = None,
+    tracer: dict = {},
 ):
     "Infer web pages to read from the query and extract relevant information from them"
     logger.info(f"Inferring web pages to read")
@@ -166,7 +170,7 @@ async def read_webpages(
         webpage_links_str = "\n- " + "\n- ".join(list(urls))
         async for event in send_status_func(f"**Reading web pages**: {webpage_links_str}"):
             yield {ChatEvent.STATUS: event}
-    tasks = [read_webpage_and_extract_content({query}, url, user=user, agent=agent) for url in urls]
+    tasks = [read_webpage_and_extract_content({query}, url, user=user, agent=agent, tracer=tracer) for url in urls]
     results = await asyncio.gather(*tasks)
     response: Dict[str, Dict] = defaultdict(dict)
@@ -192,7 +196,12 @@ async def read_webpage(
 async def read_webpage_and_extract_content(
-    subqueries: set[str], url: str, content: str = None, user: KhojUser = None, agent: Agent = None
+    subqueries: set[str],
+    url: str,
+    content: str = None,
+    user: KhojUser = None,
+    agent: Agent = None,
+    tracer: dict = {},
 ) -> Tuple[set[str], str, Union[None, str]]:
     # Select the web scrapers to use for reading the web page
     web_scrapers = await ConversationAdapters.aget_enabled_webscrapers()
@@ -214,7 +223,9 @@ async def read_webpage_and_extract_content(
             # Extract relevant information from the web page
             if is_none_or_empty(extracted_info):
                 with timer(f"Extracting relevant information from web page at '{url}' took", logger):
-                    extracted_info = await extract_relevant_info(subqueries, content, user=user, agent=agent)
+                    extracted_info = await extract_relevant_info(
+                        subqueries, content, user=user, agent=agent, tracer=tracer
+                    )
             # If we successfully extracted information, break the loop
             if not is_none_or_empty(extracted_info):

khoj/processor/tools/run_code.py ADDED Viewed

@@ -0,0 +1,144 @@
+import asyncio
+import datetime
+import json
+import logging
+import os
+from typing import Any, Callable, List, Optional
+import aiohttp
+from khoj.database.adapters import ais_user_subscribed
+from khoj.database.models import Agent, KhojUser
+from khoj.processor.conversation import prompts
+from khoj.processor.conversation.utils import (
+    ChatEvent,
+    clean_code_python,
+    clean_json,
+    construct_chat_history,
+)
+from khoj.routers.helpers import send_message_to_model_wrapper
+from khoj.utils.helpers import timer
+from khoj.utils.rawconfig import LocationData
+logger = logging.getLogger(__name__)
+SANDBOX_URL = os.getenv("KHOJ_TERRARIUM_URL", "http://localhost:8080")
+async def run_code(
+    query: str,
+    conversation_history: dict,
+    context: str,
+    location_data: LocationData,
+    user: KhojUser,
+    send_status_func: Optional[Callable] = None,
+    query_images: List[str] = None,
+    agent: Agent = None,
+    sandbox_url: str = SANDBOX_URL,
+    tracer: dict = {},
+):
+    # Generate Code
+    if send_status_func:
+        async for event in send_status_func(f"**Generate code snippets** for {query}"):
+            yield {ChatEvent.STATUS: event}
+    try:
+        with timer("Chat actor: Generate programs to execute", logger):
+            codes = await generate_python_code(
+                query,
+                conversation_history,
+                context,
+                location_data,
+                user,
+                query_images,
+                agent,
+                tracer,
+            )
+    except Exception as e:
+        raise ValueError(f"Failed to generate code for {query} with error: {e}")
+    # Run Code
+    if send_status_func:
+        async for event in send_status_func(f"**Running {len(codes)} code snippets**"):
+            yield {ChatEvent.STATUS: event}
+    try:
+        tasks = [execute_sandboxed_python(code, sandbox_url) for code in codes]
+        with timer("Chat actor: Execute generated programs", logger):
+            results = await asyncio.gather(*tasks)
+        for result in results:
+            code = result.pop("code")
+            logger.info(f"Executed Code:\n--@@--\n{code}\n--@@--Result:\n--@@--\n{result}\n--@@--")
+            yield {query: {"code": code, "results": result}}
+    except Exception as e:
+        raise ValueError(f"Failed to run code for {query} with error: {e}")
+async def generate_python_code(
+    q: str,
+    conversation_history: dict,
+    context: str,
+    location_data: LocationData,
+    user: KhojUser,
+    query_images: List[str] = None,
+    agent: Agent = None,
+    tracer: dict = {},
+) -> List[str]:
+    location = f"{location_data}" if location_data else "Unknown"
+    username = prompts.user_name.format(name=user.get_full_name()) if user.get_full_name() else ""
+    subscribed = await ais_user_subscribed(user)
+    chat_history = construct_chat_history(conversation_history)
+    utc_date = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d")
+    personality_context = (
+        prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
+    )
+    code_generation_prompt = prompts.python_code_generation_prompt.format(
+        current_date=utc_date,
+        query=q,
+        chat_history=chat_history,
+        context=context,
+        location=location,
+        username=username,
+        personality_context=personality_context,
+    )
+    response = await send_message_to_model_wrapper(
+        code_generation_prompt,
+        query_images=query_images,
+        response_type="json_object",
+        user=user,
+        tracer=tracer,
+    )
+    # Validate that the response is a non-empty, JSON-serializable list
+    response = clean_json(response)
+    response = json.loads(response)
+    codes = [code.strip() for code in response["codes"] if code.strip()]
+    if not isinstance(codes, list) or not codes or len(codes) == 0:
+        raise ValueError
+    return codes
+async def execute_sandboxed_python(code: str, sandbox_url: str = SANDBOX_URL) -> dict[str, Any]:
+    """
+    Takes code to run as a string and calls the terrarium API to execute it.
+    Returns the result of the code execution as a dictionary.
+    """
+    headers = {"Content-Type": "application/json"}
+    cleaned_code = clean_code_python(code)
+    data = {"code": cleaned_code}
+    async with aiohttp.ClientSession() as session:
+        async with session.post(sandbox_url, json=data, headers=headers) as response:
+            if response.status == 200:
+                result: dict[str, Any] = await response.json()
+                result["code"] = cleaned_code
+                return result
+            else:
+                return {
+                    "code": cleaned_code,
+                    "success": False,
+                    "std_err": f"Failed to execute code with {response.status}",
+                }

khoj/routers/api.py CHANGED Viewed

@@ -44,6 +44,7 @@ from khoj.processor.conversation.offline.chat_model import extract_questions_off
 from khoj.processor.conversation.offline.whisper import transcribe_audio_offline
 from khoj.processor.conversation.openai.gpt import extract_questions
 from khoj.processor.conversation.openai.whisper import transcribe_audio
+from khoj.processor.conversation.utils import defilter_query
 from khoj.routers.helpers import (
     ApiUserRateLimiter,
     ChatEvent,
@@ -167,8 +168,8 @@ async def execute_search(
             search_futures += [
                 executor.submit(
                     text_search.query,
-                    user,
                     user_query,
+                    user,
                     t,
                     question_embedding=encoded_asymmetric_query,
                     max_distance=max_distance,
@@ -350,11 +351,12 @@ async def extract_references_and_questions(
     send_status_func: Optional[Callable] = None,
     query_images: Optional[List[str]] = None,
     agent: Agent = None,
+    tracer: dict = {},
 ):
     user = request.user.object if request.user.is_authenticated else None
     # Initialize Variables
-    compiled_references: List[Any] = []
+    compiled_references: List[dict[str, str]] = []
     inferred_queries: List[str] = []
     agent_has_entries = False
@@ -383,9 +385,7 @@ async def extract_references_and_questions(
             return
     # Extract filter terms from user message
-    defiltered_query = q
-    for filter in [DateFilter(), WordFilter(), FileFilter()]:
-        defiltered_query = filter.defilter(defiltered_query)
+    defiltered_query = defilter_query(q)
     filters_in_query = q.replace(defiltered_query, "").strip()
     conversation = await sync_to_async(ConversationAdapters.get_conversation_by_id)(conversation_id)
@@ -425,6 +425,7 @@ async def extract_references_and_questions(
                 user=user,
                 max_prompt_size=conversation_config.max_prompt_size,
                 personality_context=personality_context,
+                tracer=tracer,
             )
         elif conversation_config.model_type == ChatModelOptions.ModelType.OPENAI:
             openai_chat_config = conversation_config.openai_config
@@ -442,6 +443,7 @@ async def extract_references_and_questions(
                 query_images=query_images,
                 vision_enabled=vision_enabled,
                 personality_context=personality_context,
+                tracer=tracer,
             )
         elif conversation_config.model_type == ChatModelOptions.ModelType.ANTHROPIC:
             api_key = conversation_config.openai_config.api_key
@@ -456,6 +458,7 @@ async def extract_references_and_questions(
                 user=user,
                 vision_enabled=vision_enabled,
                 personality_context=personality_context,
+                tracer=tracer,
             )
         elif conversation_config.model_type == ChatModelOptions.ModelType.GOOGLE:
             api_key = conversation_config.openai_config.api_key
@@ -471,6 +474,7 @@ async def extract_references_and_questions(
                 user=user,
                 vision_enabled=vision_enabled,
                 personality_context=personality_context,
+                tracer=tracer,
             )
     # Collate search results as context for GPT
@@ -497,7 +501,8 @@ async def extract_references_and_questions(
             )
         search_results = text_search.deduplicated_search_responses(search_results)
         compiled_references = [
-            {"compiled": item.additional["compiled"], "file": item.additional["file"]} for item in search_results
+            {"query": q, "compiled": item.additional["compiled"], "file": item.additional["file"]}
+            for q, item in zip(inferred_queries, search_results)
         ]
     yield compiled_references, inferred_queries, defiltered_query

khoj 1.27.2.dev18__py3-none-any.whl → 1.27.2.dev130__py3-none-any.whl

khoj 1.27.2.dev18py3-none-any.whl → 1.27.2.dev130py3-none-any.whl