PyPI - khoj - Versions diffs - 1.27.2.dev15__py3-none-any.whl → 1.27.2.dev29__py3-none-any.whl - Mend

khoj 1.27.2.dev15py3-none-any.whl → 1.27.2.dev29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

khoj/processor/conversation/google/utils.py CHANGED Viewed

@@ -19,8 +19,13 @@ from tenacity import (
     wait_random_exponential,
 )
-from khoj.processor.conversation.utils import ThreadedGenerator, get_image_from_url
-from khoj.utils.helpers import is_none_or_empty
+from khoj.processor.conversation.utils import (
+    ThreadedGenerator,
+    commit_conversation_trace,
+    get_image_from_url,
+)
+from khoj.utils import state
+from khoj.utils.helpers import in_debug_mode, is_none_or_empty
 logger = logging.getLogger(__name__)
@@ -35,7 +40,7 @@ MAX_OUTPUT_TOKENS_GEMINI = 8192
     reraise=True,
 )
 def gemini_completion_with_backoff(
-    messages, system_prompt, model_name, temperature=0, api_key=None, model_kwargs=None
+    messages, system_prompt, model_name, temperature=0, api_key=None, model_kwargs=None, tracer={}
 ) -> str:
     genai.configure(api_key=api_key)
     model_kwargs = model_kwargs or dict()
@@ -60,16 +65,23 @@ def gemini_completion_with_backoff(
     try:
         # Generate the response. The last message is considered to be the current prompt
-        aggregated_response = chat_session.send_message(formatted_messages[-1]["parts"])
-        return aggregated_response.text
+        response = chat_session.send_message(formatted_messages[-1]["parts"])
+        response_text = response.text
     except StopCandidateException as e:
-        response_message, _ = handle_gemini_response(e.args)
+        response_text, _ = handle_gemini_response(e.args)
         # Respond with reason for stopping
         logger.warning(
-            f"LLM Response Prevented for {model_name}: {response_message}.\n"
+            f"LLM Response Prevented for {model_name}: {response_text}.\n"
             + f"Last Message by {messages[-1].role}: {messages[-1].content}"
         )
-        return response_message
+    # Save conversation trace
+    tracer["chat_model"] = model_name
+    tracer["temperature"] = temperature
+    if in_debug_mode() or state.verbose > 1:
+        commit_conversation_trace(messages, response_text, tracer)
+    return response_text
 @retry(
@@ -88,17 +100,20 @@ def gemini_chat_completion_with_backoff(
     system_prompt,
     completion_func=None,
     model_kwargs=None,
+    tracer: dict = {},
 ):
     g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
     t = Thread(
         target=gemini_llm_thread,
-        args=(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs),
+        args=(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs, tracer),
     )
     t.start()
     return g
-def gemini_llm_thread(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs=None):
+def gemini_llm_thread(
+    g, messages, system_prompt, model_name, temperature, api_key, model_kwargs=None, tracer: dict = {}
+):
     try:
         genai.configure(api_key=api_key)
         model_kwargs = model_kwargs or dict()
@@ -117,16 +132,25 @@ def gemini_llm_thread(g, messages, system_prompt, model_name, temperature, api_k
             },
         )
+        aggregated_response = ""
         formatted_messages = [{"role": message.role, "parts": message.content} for message in messages]
         # all messages up to the last are considered to be part of the chat history
         chat_session = model.start_chat(history=formatted_messages[0:-1])
         # the last message is considered to be the current prompt
         for chunk in chat_session.send_message(formatted_messages[-1]["parts"], stream=True):
             message, stopped = handle_gemini_response(chunk.candidates, chunk.prompt_feedback)
             message = message or chunk.text
+            aggregated_response += message
             g.send(message)
             if stopped:
                 raise StopCandidateException(message)
+        # Save conversation trace
+        tracer["chat_model"] = model_name
+        tracer["temperature"] = temperature
+        if in_debug_mode() or state.verbose > 1:
+            commit_conversation_trace(messages, aggregated_response, tracer)
     except StopCandidateException as e:
         logger.warning(
             f"LLM Response Prevented for {model_name}: {e.args[0]}.\n"

khoj/processor/conversation/offline/chat_model.py CHANGED Viewed

@@ -12,11 +12,12 @@ from khoj.processor.conversation import prompts
 from khoj.processor.conversation.offline.utils import download_model
 from khoj.processor.conversation.utils import (
     ThreadedGenerator,
+    commit_conversation_trace,
     generate_chatml_messages_with_context,
 )
 from khoj.utils import state
 from khoj.utils.constants import empty_escape_sequences
-from khoj.utils.helpers import ConversationCommand, is_none_or_empty
+from khoj.utils.helpers import ConversationCommand, in_debug_mode, is_none_or_empty
 from khoj.utils.rawconfig import LocationData
 logger = logging.getLogger(__name__)
@@ -34,6 +35,7 @@ def extract_questions_offline(
     max_prompt_size: int = None,
     temperature: float = 0.7,
     personality_context: Optional[str] = None,
+    tracer: dict = {},
 ) -> List[str]:
     """
     Infer search queries to retrieve relevant notes to answer user query
@@ -94,6 +96,7 @@ def extract_questions_offline(
             max_prompt_size=max_prompt_size,
             temperature=temperature,
             response_type="json_object",
+            tracer=tracer,
         )
     finally:
         state.chat_lock.release()
@@ -146,6 +149,7 @@ def converse_offline(
     location_data: LocationData = None,
     user_name: str = None,
     agent: Agent = None,
+    tracer: dict = {},
 ) -> Union[ThreadedGenerator, Iterator[str]]:
     """
     Converse with user using Llama
@@ -153,8 +157,9 @@ def converse_offline(
     # Initialize Variables
     assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
     offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
-    compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
+    tracer["chat_model"] = model
+    compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
     current_date = datetime.now()
     if agent and agent.personality:
@@ -215,13 +220,14 @@ def converse_offline(
     logger.debug(f"Conversation Context for {model}: {truncated_messages}")
     g = ThreadedGenerator(references, online_results, completion_func=completion_func)
-    t = Thread(target=llm_thread, args=(g, messages, offline_chat_model, max_prompt_size))
+    t = Thread(target=llm_thread, args=(g, messages, offline_chat_model, max_prompt_size, tracer))
     t.start()
     return g
-def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int = None):
+def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int = None, tracer: dict = {}):
     stop_phrases = ["<s>", "INST]", "Notes:"]
+    aggregated_response = ""
     state.chat_lock.acquire()
     try:
@@ -229,7 +235,14 @@ def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int
             messages, loaded_model=model, stop=stop_phrases, max_prompt_size=max_prompt_size, streaming=True
         )
         for response in response_iterator:
-            g.send(response["choices"][0]["delta"].get("content", ""))
+            response_delta = response["choices"][0]["delta"].get("content", "")
+            aggregated_response += response_delta
+            g.send(response_delta)
+        # Save conversation trace
+        if in_debug_mode() or state.verbose > 1:
+            commit_conversation_trace(messages, aggregated_response, tracer)
     finally:
         state.chat_lock.release()
         g.close()
@@ -244,6 +257,7 @@ def send_message_to_model_offline(
     stop=[],
     max_prompt_size: int = None,
     response_type: str = "text",
+    tracer: dict = {},
 ):
     assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
     offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
@@ -251,7 +265,17 @@ def send_message_to_model_offline(
     response = offline_chat_model.create_chat_completion(
         messages_dict, stop=stop, stream=streaming, temperature=temperature, response_format={"type": response_type}
     )
     if streaming:
         return response
-    else:
-        return response["choices"][0]["message"].get("content", "")
+    response_text = response["choices"][0]["message"].get("content", "")
+    # Save conversation trace for non-streaming responses
+    # Streamed responses need to be saved by the calling function
+    tracer["chat_model"] = model
+    tracer["temperature"] = temperature
+    if in_debug_mode() or state.verbose > 1:
+        commit_conversation_trace(messages, response_text, tracer)
+    return response_text

khoj/processor/conversation/openai/gpt.py CHANGED Viewed

@@ -33,6 +33,7 @@ def extract_questions(
     query_images: Optional[list[str]] = None,
     vision_enabled: bool = False,
     personality_context: Optional[str] = None,
+    tracer: dict = {},
 ):
     """
     Infer search queries to retrieve relevant notes to answer user query
@@ -82,7 +83,13 @@ def extract_questions(
     messages = [ChatMessage(content=prompt, role="user")]
     response = send_message_to_model(
-        messages, api_key, model, response_type="json_object", api_base_url=api_base_url, temperature=temperature
+        messages,
+        api_key,
+        model,
+        response_type="json_object",
+        api_base_url=api_base_url,
+        temperature=temperature,
+        tracer=tracer,
     )
     # Extract, Clean Message from GPT's Response
@@ -103,7 +110,9 @@ def extract_questions(
     return questions
-def send_message_to_model(messages, api_key, model, response_type="text", api_base_url=None, temperature=0):
+def send_message_to_model(
+    messages, api_key, model, response_type="text", api_base_url=None, temperature=0, tracer: dict = {}
+):
     """
     Send message to model
     """
@@ -116,6 +125,7 @@ def send_message_to_model(messages, api_key, model, response_type="text", api_ba
         temperature=temperature,
         api_base_url=api_base_url,
         model_kwargs={"response_format": {"type": response_type}},
+        tracer=tracer,
     )
@@ -137,6 +147,7 @@ def converse(
     agent: Agent = None,
     query_images: Optional[list[str]] = None,
     vision_available: bool = False,
+    tracer: dict = {},
 ):
     """
     Converse with user using OpenAI's ChatGPT
@@ -207,4 +218,5 @@ def converse(
         api_base_url=api_base_url,
         completion_func=completion_func,
         model_kwargs={"stop": ["Notes:\n["]},
+        tracer=tracer,
     )

khoj/processor/conversation/openai/utils.py CHANGED Viewed

@@ -12,7 +12,12 @@ from tenacity import (
     wait_random_exponential,
 )
-from khoj.processor.conversation.utils import ThreadedGenerator
+from khoj.processor.conversation.utils import (
+    ThreadedGenerator,
+    commit_conversation_trace,
+)
+from khoj.utils import state
+from khoj.utils.helpers import in_debug_mode
 logger = logging.getLogger(__name__)
@@ -33,7 +38,7 @@ openai_clients: Dict[str, openai.OpenAI] = {}
     reraise=True,
 )
 def completion_with_backoff(
-    messages, model, temperature=0, openai_api_key=None, api_base_url=None, model_kwargs=None
+    messages, model, temperature=0, openai_api_key=None, api_base_url=None, model_kwargs=None, tracer: dict = {}
 ) -> str:
     client_key = f"{openai_api_key}--{api_base_url}"
     client: openai.OpenAI | None = openai_clients.get(client_key)
@@ -77,6 +82,12 @@ def completion_with_backoff(
         elif delta_chunk.content:
             aggregated_response += delta_chunk.content
+    # Save conversation trace
+    tracer["chat_model"] = model
+    tracer["temperature"] = temperature
+    if in_debug_mode() or state.verbose > 1:
+        commit_conversation_trace(messages, aggregated_response, tracer)
     return aggregated_response
@@ -103,26 +114,37 @@ def chat_completion_with_backoff(
     api_base_url=None,
     completion_func=None,
     model_kwargs=None,
+    tracer: dict = {},
 ):
     g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
     t = Thread(
-        target=llm_thread, args=(g, messages, model_name, temperature, openai_api_key, api_base_url, model_kwargs)
+        target=llm_thread,
+        args=(g, messages, model_name, temperature, openai_api_key, api_base_url, model_kwargs, tracer),
     )
     t.start()
     return g
-def llm_thread(g, messages, model_name, temperature, openai_api_key=None, api_base_url=None, model_kwargs=None):
+def llm_thread(
+    g,
+    messages,
+    model_name,
+    temperature,
+    openai_api_key=None,
+    api_base_url=None,
+    model_kwargs=None,
+    tracer: dict = {},
+):
     try:
         client_key = f"{openai_api_key}--{api_base_url}"
         if client_key not in openai_clients:
-            client: openai.OpenAI = openai.OpenAI(
+            client = openai.OpenAI(
                 api_key=openai_api_key,
                 base_url=api_base_url,
             )
             openai_clients[client_key] = client
         else:
-            client: openai.OpenAI = openai_clients[client_key]
+            client = openai_clients[client_key]
         formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
         stream = True
@@ -144,17 +166,29 @@ def llm_thread(g, messages, model_name, temperature, openai_api_key=None, api_ba
             **(model_kwargs or dict()),
         )
+        aggregated_response = ""
         if not stream:
-            g.send(chat.choices[0].message.content)
+            aggregated_response = chat.choices[0].message.content
+            g.send(aggregated_response)
         else:
             for chunk in chat:
                 if len(chunk.choices) == 0:
                     continue
                 delta_chunk = chunk.choices[0].delta
+                text_chunk = ""
                 if isinstance(delta_chunk, str):
-                    g.send(delta_chunk)
+                    text_chunk = delta_chunk
                 elif delta_chunk.content:
-                    g.send(delta_chunk.content)
+                    text_chunk = delta_chunk.content
+                if text_chunk:
+                    aggregated_response += text_chunk
+                    g.send(text_chunk)
+        # Save conversation trace
+        tracer["chat_model"] = model_name
+        tracer["temperature"] = temperature
+        if in_debug_mode() or state.verbose > 1:
+            commit_conversation_trace(messages, aggregated_response, tracer)
     except Exception as e:
         logger.error(f"Error in llm_thread: {e}", exc_info=True)
     finally:

khoj/processor/conversation/prompts.py CHANGED Viewed

@@ -193,7 +193,6 @@ you need to convert the user's query to a description format that the novice art
 - ellipse
 - line
 - arrow
-- frame
 use these primitives to describe what sort of diagram the drawer should create. the artist must recreate the diagram every time, so include all relevant prior information in your description.
@@ -284,21 +283,6 @@ For text, you must use the `text` property to specify the text to be rendered. Y
     text: string,
 }}
-For frames, use the `children` property to specify the elements that are inside the frame by their ids.
-{{
-    type: "frame",
-    id: string,
-    x: number,
-    y: number,
-    width: number,
-    height: number,
-    name: string,
-    children: [
-        string
-    ]
-}}
 Here's an example of a valid diagram:
 Design Description: Create a diagram describing a circular development process with 3 stages: design, implementation and feedback. The design stage is connected to the implementation stage and the implementation stage is connected to the feedback stage and the feedback stage is connected to the design stage. Each stage should be labeled with the stage name.

khoj/processor/conversation/utils.py CHANGED Viewed

@@ -2,6 +2,7 @@ import base64
 import logging
 import math
 import mimetypes
+import os
 import queue
 from dataclasses import dataclass
 from datetime import datetime
@@ -12,6 +13,8 @@ from typing import Any, Dict, List, Optional
 import PIL.Image
 import requests
 import tiktoken
+import yaml
+from git import Repo
 from langchain.schema import ChatMessage
 from llama_cpp.llama import Llama
 from transformers import AutoTokenizer
@@ -21,7 +24,7 @@ from khoj.database.models import ChatModelOptions, ClientApplication, KhojUser
 from khoj.processor.conversation import prompts
 from khoj.processor.conversation.offline.utils import download_model, infer_max_tokens
 from khoj.utils import state
-from khoj.utils.helpers import is_none_or_empty, merge_dicts
+from khoj.utils.helpers import in_debug_mode, is_none_or_empty, merge_dicts
 logger = logging.getLogger(__name__)
 model_to_prompt_size = {
@@ -117,6 +120,7 @@ def save_to_conversation_log(
     conversation_id: str = None,
     automation_id: str = None,
     query_images: List[str] = None,
+    tracer: Dict[str, Any] = {},
 ):
     user_message_time = user_message_time or datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     updated_conversation = message_to_log(
@@ -142,6 +146,9 @@ def save_to_conversation_log(
         user_message=q,
     )
+    if in_debug_mode() or state.verbose > 1:
+        merge_message_into_conversation_trace(q, chat_response, tracer)
     logger.info(
         f"""
 Saved Conversation Turn
@@ -354,3 +361,163 @@ def get_image_from_url(image_url: str, type="pil"):
     except requests.exceptions.RequestException as e:
         logger.error(f"Failed to get image from URL {image_url}: {e}")
         return ImageWithType(content=None, type=None)
+def commit_conversation_trace(
+    session: list[ChatMessage],
+    response: str | list[dict],
+    tracer: dict,
+    system_message: str | list[dict] = "",
+    repo_path: str = "/tmp/promptrace",
+) -> str:
+    """
+    Save trace of conversation step using git. Useful to visualize, compare and debug traces.
+    Returns the path to the repository.
+    """
+    # Serialize session, system message and response to yaml
+    system_message_yaml = yaml.dump(system_message, allow_unicode=True, sort_keys=False, default_flow_style=False)
+    response_yaml = yaml.dump(response, allow_unicode=True, sort_keys=False, default_flow_style=False)
+    formatted_session = [{"role": message.role, "content": message.content} for message in session]
+    session_yaml = yaml.dump(formatted_session, allow_unicode=True, sort_keys=False, default_flow_style=False)
+    query = (
+        yaml.dump(session[-1].content, allow_unicode=True, sort_keys=False, default_flow_style=False)
+        .strip()
+        .removeprefix("'")
+        .removesuffix("'")
+    )  # Extract serialized query from chat session
+    # Extract chat metadata for session
+    uid, cid, mid = tracer.get("uid", "main"), tracer.get("cid", "main"), tracer.get("mid")
+    # Infer repository path from environment variable or provided path
+    repo_path = os.getenv("PROMPTRACE_DIR", repo_path)
+    try:
+        # Prepare git repository
+        os.makedirs(repo_path, exist_ok=True)
+        repo = Repo.init(repo_path)
+        # Remove post-commit hook if it exists
+        hooks_dir = os.path.join(repo_path, ".git", "hooks")
+        post_commit_hook = os.path.join(hooks_dir, "post-commit")
+        if os.path.exists(post_commit_hook):
+            os.remove(post_commit_hook)
+        # Configure git user if not set
+        if not repo.config_reader().has_option("user", "email"):
+            repo.config_writer().set_value("user", "name", "Prompt Tracer").release()
+            repo.config_writer().set_value("user", "email", "promptracer@khoj.dev").release()
+        # Create an initial commit if the repository is newly created
+        if not repo.head.is_valid():
+            repo.index.commit("And then there was a trace")
+        # Check out the initial commit
+        initial_commit = repo.commit("HEAD~0")
+        repo.head.reference = initial_commit
+        repo.head.reset(index=True, working_tree=True)
+        # Create or switch to user branch from initial commit
+        user_branch = f"u_{uid}"
+        if user_branch not in repo.branches:
+            repo.create_head(user_branch)
+        repo.heads[user_branch].checkout()
+        # Create or switch to conversation branch from user branch
+        conv_branch = f"c_{cid}"
+        if conv_branch not in repo.branches:
+            repo.create_head(conv_branch)
+        repo.heads[conv_branch].checkout()
+        # Create or switch to message branch from conversation branch
+        msg_branch = f"m_{mid}" if mid else None
+        if msg_branch and msg_branch not in repo.branches:
+            repo.create_head(msg_branch)
+        if msg_branch:
+            repo.heads[msg_branch].checkout()
+        # Include file with content to commit
+        files_to_commit = {"query": session_yaml, "response": response_yaml, "system_prompt": system_message_yaml}
+        # Write files and stage them
+        for filename, content in files_to_commit.items():
+            file_path = os.path.join(repo_path, filename)
+            # Unescape special characters in content for better readability
+            content = content.strip().replace("\\n", "\n").replace("\\t", "\t")
+            with open(file_path, "w", encoding="utf-8") as f:
+                f.write(content)
+            repo.index.add([filename])
+        # Create commit
+        metadata_yaml = yaml.dump(tracer, allow_unicode=True, sort_keys=False, default_flow_style=False)
+        commit_message = f"""
+{query[:250]}
+Response:
+---
+{response[:500]}...
+Metadata
+---
+{metadata_yaml}
+""".strip()
+        repo.index.commit(commit_message)
+        logger.debug(f"Saved conversation trace to repo at {repo_path}")
+        return repo_path
+    except Exception as e:
+        logger.error(f"Failed to add conversation trace to repo: {str(e)}", exc_info=True)
+        return None
+def merge_message_into_conversation_trace(query: str, response: str, tracer: dict, repo_path="/tmp/promptrace") -> bool:
+    """
+    Merge the message branch into its parent conversation branch.
+    Args:
+        query: User query
+        response: Assistant response
+        tracer: Dictionary containing uid, cid and mid
+        repo_path: Path to the git repository
+    Returns:
+        bool: True if merge was successful, False otherwise
+    """
+    try:
+        # Extract branch names
+        msg_branch = f"m_{tracer['mid']}"
+        conv_branch = f"c_{tracer['cid']}"
+        # Infer repository path from environment variable or provided path
+        repo_path = os.getenv("PROMPTRACE_DIR", repo_path)
+        repo = Repo(repo_path)
+        # Checkout conversation branch
+        repo.heads[conv_branch].checkout()
+        # Create commit message
+        metadata_yaml = yaml.dump(tracer, allow_unicode=True, sort_keys=False, default_flow_style=False)
+        commit_message = f"""
+{query[:250]}
+Response:
+---
+{response[:500]}...
+Metadata
+---
+{metadata_yaml}
+""".strip()
+        # Merge message branch into conversation branch
+        repo.git.merge(msg_branch, no_ff=True, m=commit_message)
+        # Delete message branch after merge
+        repo.delete_head(msg_branch, force=True)
+        logger.debug(f"Successfully merged {msg_branch} into {conv_branch}")
+        return True
+    except Exception as e:
+        logger.error(f"Failed to merge message {msg_branch} into conversation {conv_branch}: {str(e)}", exc_info=True)
+        return False

khoj/processor/image/generate.py CHANGED Viewed

@@ -28,6 +28,7 @@ async def text_to_image(
     send_status_func: Optional[Callable] = None,
     query_images: Optional[List[str]] = None,
     agent: Agent = None,
+    tracer: dict = {},
 ):
     status_code = 200
     image = None
@@ -68,6 +69,7 @@ async def text_to_image(
         query_images=query_images,
         user=user,
         agent=agent,
+        tracer=tracer,
     )
     if send_status_func:

khoj 1.27.2.dev15__py3-none-any.whl → 1.27.2.dev29__py3-none-any.whl

khoj 1.27.2.dev15py3-none-any.whl → 1.27.2.dev29py3-none-any.whl