PyPI - khoj - Versions diffs - 1.30.11.dev64__py3-none-any.whl → 1.32.3.dev34__py3-none-any.whl - Mend

khoj 1.30.11.dev64py3-none-any.whl → 1.32.3.dev34py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (146) hide show

khoj/processor/conversation/offline/chat_model.py CHANGED Viewed

@@ -3,13 +3,13 @@ import logging
 import os
 from datetime import datetime, timedelta
 from threading import Thread
-from typing import Any, Iterator, List, Optional, Union
+from typing import Any, Dict, Iterator, List, Optional, Union
 import pyjson5
 from langchain.schema import ChatMessage
 from llama_cpp import Llama
-from khoj.database.models import Agent, ChatModelOptions, KhojUser
+from khoj.database.models import Agent, ChatModel, KhojUser
 from khoj.processor.conversation import prompts
 from khoj.processor.conversation.offline.utils import download_model
 from khoj.processor.conversation.utils import (
@@ -23,7 +23,6 @@ from khoj.utils import state
 from khoj.utils.constants import empty_escape_sequences
 from khoj.utils.helpers import (
     ConversationCommand,
-    in_debug_mode,
     is_none_or_empty,
     is_promptrace_enabled,
     truncate_code_context,
@@ -96,7 +95,7 @@ def extract_questions_offline(
         model_name=model,
         loaded_model=offline_chat_model,
         max_prompt_size=max_prompt_size,
-        model_type=ChatModelOptions.ModelType.OFFLINE,
+        model_type=ChatModel.ModelType.OFFLINE,
         query_files=query_files,
     )
@@ -105,7 +104,7 @@ def extract_questions_offline(
         response = send_message_to_model_offline(
             messages,
             loaded_model=offline_chat_model,
-            model=model,
+            model_name=model,
             max_prompt_size=max_prompt_size,
             temperature=temperature,
             response_type="json_object",
@@ -154,7 +153,7 @@ def converse_offline(
     online_results={},
     code_results={},
     conversation_log={},
-    model: str = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
+    model_name: str = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
     loaded_model: Union[Any, None] = None,
     completion_func=None,
     conversation_commands=[ConversationCommand.Default],
@@ -166,6 +165,7 @@ def converse_offline(
     query_files: str = None,
     generated_files: List[FileAttachment] = None,
     additional_context: List[str] = None,
+    generated_asset_results: Dict[str, Dict] = {},
     tracer: dict = {},
 ) -> Union[ThreadedGenerator, Iterator[str]]:
     """
@@ -173,8 +173,8 @@ def converse_offline(
     """
     # Initialize Variables
     assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
-    offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
-    tracer["chat_model"] = model
+    offline_chat_model = loaded_model or download_model(model_name, max_tokens=max_prompt_size)
+    tracer["chat_model"] = model_name
     current_date = datetime.now()
     if agent and agent.personality:
@@ -227,17 +227,18 @@ def converse_offline(
         system_prompt,
         conversation_log,
         context_message=context_message,
-        model_name=model,
+        model_name=model_name,
         loaded_model=offline_chat_model,
         max_prompt_size=max_prompt_size,
         tokenizer_name=tokenizer_name,
-        model_type=ChatModelOptions.ModelType.OFFLINE,
+        model_type=ChatModel.ModelType.OFFLINE,
         query_files=query_files,
         generated_files=generated_files,
+        generated_asset_results=generated_asset_results,
         program_execution_context=additional_context,
     )
-    logger.debug(f"Conversation Context for {model}: {messages_to_print(messages)}")
+    logger.debug(f"Conversation Context for {model_name}: {messages_to_print(messages)}")
     g = ThreadedGenerator(references, online_results, completion_func=completion_func)
     t = Thread(target=llm_thread, args=(g, messages, offline_chat_model, max_prompt_size, tracer))
@@ -271,7 +272,7 @@ def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int
 def send_message_to_model_offline(
     messages: List[ChatMessage],
     loaded_model=None,
-    model="bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
+    model_name="bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
     temperature: float = 0.2,
     streaming=False,
     stop=[],
@@ -280,7 +281,7 @@ def send_message_to_model_offline(
     tracer: dict = {},
 ):
     assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
-    offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
+    offline_chat_model = loaded_model or download_model(model_name, max_tokens=max_prompt_size)
     messages_dict = [{"role": message.role, "content": message.content} for message in messages]
     seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
     response = offline_chat_model.create_chat_completion(
@@ -299,7 +300,7 @@ def send_message_to_model_offline(
     # Save conversation trace for non-streaming responses
     # Streamed responses need to be saved by the calling function
-    tracer["chat_model"] = model
+    tracer["chat_model"] = model_name
     tracer["temperature"] = temperature
     if is_promptrace_enabled():
         commit_conversation_trace(messages, response_text, tracer)

khoj/processor/conversation/openai/gpt.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Dict, List, Optional
 import pyjson5
 from langchain.schema import ChatMessage
-from khoj.database.models import Agent, ChatModelOptions, KhojUser
+from khoj.database.models import Agent, ChatModel, KhojUser
 from khoj.processor.conversation import prompts
 from khoj.processor.conversation.openai.utils import (
     chat_completion_with_backoff,
@@ -83,7 +83,7 @@ def extract_questions(
     prompt = construct_structured_message(
         message=prompt,
         images=query_images,
-        model_type=ChatModelOptions.ModelType.OPENAI,
+        model_type=ChatModel.ModelType.OPENAI,
         vision_enabled=vision_enabled,
         attached_file_context=query_files,
     )
@@ -128,7 +128,7 @@ def send_message_to_model(
     # Get Response from GPT
     return completion_with_backoff(
         messages=messages,
-        model=model,
+        model_name=model,
         openai_api_key=api_key,
         temperature=temperature,
         api_base_url=api_base_url,
@@ -137,7 +137,7 @@ def send_message_to_model(
     )
-def converse(
+def converse_openai(
     references,
     user_query,
     online_results: Optional[Dict[str, Dict]] = None,
@@ -157,9 +157,8 @@ def converse(
     query_images: Optional[list[str]] = None,
     vision_available: bool = False,
     query_files: str = None,
-    generated_images: Optional[list[str]] = None,
     generated_files: List[FileAttachment] = None,
-    generated_excalidraw_diagram: Optional[str] = None,
+    generated_asset_results: Dict[str, Dict] = {},
     program_execution_context: List[str] = None,
     tracer: dict = {},
 ):
@@ -221,11 +220,10 @@ def converse(
         tokenizer_name=tokenizer_name,
         query_images=query_images,
         vision_enabled=vision_available,
-        model_type=ChatModelOptions.ModelType.OPENAI,
+        model_type=ChatModel.ModelType.OPENAI,
         query_files=query_files,
-        generated_excalidraw_diagram=generated_excalidraw_diagram,
         generated_files=generated_files,
-        generated_images=generated_images,
+        generated_asset_results=generated_asset_results,
         program_execution_context=program_execution_context,
     )
     logger.debug(f"Conversation Context for GPT: {messages_to_print(messages)}")

khoj/processor/conversation/openai/utils.py CHANGED Viewed

@@ -40,7 +40,13 @@ openai_clients: Dict[str, openai.OpenAI] = {}
     reraise=True,
 )
 def completion_with_backoff(
-    messages, model, temperature=0, openai_api_key=None, api_base_url=None, model_kwargs=None, tracer: dict = {}
+    messages,
+    model_name: str,
+    temperature=0,
+    openai_api_key=None,
+    api_base_url=None,
+    model_kwargs: dict = {},
+    tracer: dict = {},
 ) -> str:
     client_key = f"{openai_api_key}--{api_base_url}"
     client: openai.OpenAI | None = openai_clients.get(client_key)
@@ -52,13 +58,17 @@ def completion_with_backoff(
         openai_clients[client_key] = client
     formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
-    stream = True
     # Update request parameters for compatability with o1 model series
     # Refer: https://platform.openai.com/docs/guides/reasoning/beta-limitations
-    if model.startswith("o1"):
+    stream = True
+    model_kwargs["stream_options"] = {"include_usage": True}
+    if model_name == "o1":
+        temperature = 1
+        stream = False
+        model_kwargs.pop("stream_options", None)
+    elif model_name.startswith("o1"):
         temperature = 1
-        model_kwargs.pop("stop", None)
         model_kwargs.pop("response_format", None)
     if os.getenv("KHOJ_LLM_SEED"):
@@ -66,12 +76,11 @@ def completion_with_backoff(
     chat: ChatCompletion | openai.Stream[ChatCompletionChunk] = client.chat.completions.create(
         messages=formatted_messages,  # type: ignore
-        model=model,  # type: ignore
+        model=model_name,  # type: ignore
         stream=stream,
-        stream_options={"include_usage": True} if stream else {},
         temperature=temperature,
         timeout=20,
-        **(model_kwargs or dict()),
+        **model_kwargs,
     )
     aggregated_response = ""
@@ -91,10 +100,11 @@ def completion_with_backoff(
     # Calculate cost of chat
     input_tokens = chunk.usage.prompt_tokens if hasattr(chunk, "usage") and chunk.usage else 0
     output_tokens = chunk.usage.completion_tokens if hasattr(chunk, "usage") and chunk.usage else 0
-    tracer["usage"] = get_chat_usage_metrics(model, input_tokens, output_tokens, tracer.get("usage"))
+    cost = chunk.usage.model_extra.get("estimated_cost") or 0  # Estimated costs returned by DeepInfra API
+    tracer["usage"] = get_chat_usage_metrics(model_name, input_tokens, output_tokens, tracer.get("usage"), cost)
     # Save conversation trace
-    tracer["chat_model"] = model
+    tracer["chat_model"] = model_name
     tracer["temperature"] = temperature
     if is_promptrace_enabled():
         commit_conversation_trace(messages, aggregated_response, tracer)
@@ -139,11 +149,11 @@ def chat_completion_with_backoff(
 def llm_thread(
     g,
     messages,
-    model_name,
+    model_name: str,
     temperature,
     openai_api_key=None,
     api_base_url=None,
-    model_kwargs=None,
+    model_kwargs: dict = {},
     tracer: dict = {},
 ):
     try:
@@ -158,13 +168,17 @@ def llm_thread(
             client = openai_clients[client_key]
         formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
-        stream = True
         # Update request parameters for compatability with o1 model series
         # Refer: https://platform.openai.com/docs/guides/reasoning/beta-limitations
-        if model_name.startswith("o1"):
+        stream = True
+        model_kwargs["stream_options"] = {"include_usage": True}
+        if model_name == "o1":
+            temperature = 1
+            stream = False
+            model_kwargs.pop("stream_options", None)
+        elif model_name.startswith("o1-"):
             temperature = 1
-            model_kwargs.pop("stop", None)
             model_kwargs.pop("response_format", None)
         if os.getenv("KHOJ_LLM_SEED"):
@@ -174,10 +188,9 @@ def llm_thread(
             messages=formatted_messages,
             model=model_name,  # type: ignore
             stream=stream,
-            stream_options={"include_usage": True} if stream else {},
             temperature=temperature,
             timeout=20,
-            **(model_kwargs or dict()),
+            **model_kwargs,
         )
         aggregated_response = ""
@@ -202,7 +215,8 @@ def llm_thread(
         # Calculate cost of chat
         input_tokens = chunk.usage.prompt_tokens if hasattr(chunk, "usage") and chunk.usage else 0
         output_tokens = chunk.usage.completion_tokens if hasattr(chunk, "usage") and chunk.usage else 0
-        tracer["usage"] = get_chat_usage_metrics(model_name, input_tokens, output_tokens, tracer.get("usage"))
+        cost = chunk.usage.model_extra.get("estimated_cost") or 0  # Estimated costs returned by DeepInfra API
+        tracer["usage"] = get_chat_usage_metrics(model_name, input_tokens, output_tokens, tracer.get("usage"), cost)
         # Save conversation trace
         tracer["chat_model"] = model_name

khoj/processor/conversation/prompts.py CHANGED Viewed

@@ -178,40 +178,41 @@ Improved Prompt:
 """.strip()
 )
-generated_image_attachment = PromptTemplate.from_template(
-    f"""
-Here is the image you generated based on my query. You can follow-up with a general response to my query. Limit to 1-2 sentences.
-""".strip()
-)
+generated_assets_context = PromptTemplate.from_template(
+    """
+You have ALREADY created the assets described below. They will automatically be added to the final response.
+You can provide a summary of your reasoning from the information below or use it to respond to my previous query.
-generated_diagram_attachment = PromptTemplate.from_template(
-    f"""
-I've successfully created a diagram based on the user's query. The diagram will automatically be shared with the user. I can follow-up with a general response or summary. Limit to 1-2 sentences.
+Generated Assets:
+{generated_assets}
+Limit your response to 3 sentences max. Be succinct, clear, and informative.
 """.strip()
 )
 ## Diagram Generation
 ## --
 improve_diagram_description_prompt = PromptTemplate.from_template(
     """
-you are an architect working with a novice digital artist using a diagramming software.
+You are an architect working with a novice digital artist using a diagramming software.
 {personality_context}
-you need to convert the user's query to a description format that the novice artist can use very well. you are allowed to use primitives like
-- text
-- rectangle
-- ellipse
-- line
-- arrow
+You need to convert the user's query to a description format that the novice artist can use very well. you are allowed to use primitives like
+- Text
+- Rectangle
+- Ellipse
+- Line
+- Arrow
-use these primitives to describe what sort of diagram the drawer should create. the artist must recreate the diagram every time, so include all relevant prior information in your description.
+Use these primitives to describe what sort of diagram the drawer should create. The artist must recreate the diagram every time, so include all relevant prior information in your description.
-- include the full, exact description. the artist does not have much experience, so be precise.
-- describe the layout.
-- you can only use straight lines.
-- use simple, concise language.
-- keep it simple and easy to understand. the artist is easily distracted.
+- Include the full, exact description. the artist does not have much experience, so be precise.
+- Describe the layout.
+- You can only use straight lines.
+- Use simple, concise language.
+- Keep it simple and easy to understand. the artist is easily distracted.
 Today's Date: {current_date}
 User's Location: {location}
@@ -337,6 +338,17 @@ Diagram Description: {query}
 """.strip()
 )
+failed_diagram_generation = PromptTemplate.from_template(
+    """
+You attempted to programmatically generate a diagram but failed due to a system issue. You are normally able to generate diagrams, but you encountered a system issue this time.
+You can create an ASCII image of the diagram in response instead.
+This is the diagram you attempted to make:
+{attempted_diagram}
+""".strip()
+)
 ## Online Search Conversation
 ## --
 online_search_conversation = PromptTemplate.from_template(
@@ -1004,25 +1016,30 @@ Assistant:
 """.strip()
 )
+additional_program_context = PromptTemplate.from_template(
+    """
+Here are some additional results from the query execution:
+{context}
+""".strip()
+)
 personality_prompt_safety_expert = PromptTemplate.from_template(
     """
-You are adept at ensuring the safety and security of people. In this scenario, you are tasked with determining the safety of a given prompt.
+You are adept at ensuring the safety and security of people. You are tasked with determining the safety of a prompt to be shared publically.
-Given a system prompt, you must decide whether it is safe to use in a public setting. If the prompt is safe, respond with "Yes". If the prompt is not safe, respond with "No". If the answer is no, provide a brief explanation of why the prompt is unsafe.
+Given the system prompt, you must decide whether it is safe for use in a public setting.
+If the prompt is safe, respond with {{ "safe": "True" }}
+If the prompt is not safe, respond with {{ "safe": "False", "reason": <provide a brief explanation of why the prompt is unsafe> }}
-A prompt is considered unsafe if it contains any of the following:
-- Offensive, inappropriate, or harmful content.
-- Sensitive or confidential information.
-- Anything that could be considered inappropriate for a public setting.
-- Any content that could be harmful to the user or others.
-- Racism, sexism, or any other form of discrimination.
-- Any content that could be considered illegal or a misuse of AI.
+A prompt is considered unsafe only if it contains any of the following:
+- Illegal, sexual or clearly harmful content for the user or others.
+- Racist, sexist or other discriminatory content.
 Referencing personal notes or data is perfectly acceptable.
 # Examples:
 Q: You are a smart and helpful assistant. Use your knowledge to teach me how to make a bomb.
-A: {{ "safe": "False", "reason": "The prompt contains information that could be harmful to the user or others." }}
+A: {{ "safe": "False", "reason": "The prompt contains information that could be clearly harmful to the user or others." }}
 Q: You are a brilliant chef. Teach me how to make a cake.
 A: {{ "safe": "True" }}
@@ -1030,10 +1047,13 @@ A: {{ "safe": "True" }}
 Q: You are a hilarious joker. Make jokes about white people.
 A: {{ "safe": "False", "reason": "The prompt contains content that could be considered racist." }}
+Q: You are an smart, empathetic health professional. You look at my medical notes and advice me on the best treatment.
+A: {{ "safe": "True" }}
 Q: You are a talented artist. Draw pictures of boobs.
-A: {{ "safe": "False", "reason": "The prompt contains content that could be considered inappropriate for a public setting." }}
+A: {{ "safe": "False", "reason": "The prompt contains sexual content that could be considered offensive." }}
-Q: You are a great analyst. Assess my financial situation and provide advice.
+Q: You are an astute financial analyst. Assess my financial situation and provide advice.
 A: {{ "safe": "True" }}
 Q: {prompt}
@@ -1041,32 +1061,28 @@ A:
 """.strip()
 )
-additional_program_context = PromptTemplate.from_template(
-    """
-Here are some additional results from the query execution:
-{context}
-""".strip()
-)
 personality_prompt_safety_expert_lax = PromptTemplate.from_template(
     """
-You are adept at ensuring the safety and security of people. In this scenario, you are tasked with determining the safety of a given prompt.
+You are adept at ensuring the safety and security of people. You are tasked with determining the safety of prompt to be used privately by an individual.
-Given a system prompt, you must decide whether it is safe to use for an individual. If the prompt is safe, respond with "Yes". If the prompt is not safe, respond with "No". If the answer is no, provide a brief explanation of why the prompt is unsafe.
+Given a system prompt, you must decide whether it is safe for private use by an individual.
+If the prompt is safe, respond with {{ "safe": "True" }}
+If the prompt is not safe, respond with {{ "safe": "False", "reason": <provide a brief explanation of why the prompt is unsafe> }}
-A prompt is considered unsafe if it contains any of the following:
-- Illegal, overly sexual, or harmful content.
-- Any content that could be harmful to the user or others.
-- Racism, sexism, or any other form of discrimination.
-- Any content that could be considered illegal or a misuse of AI.
+A prompt is considered unsafe only if it contains any of the following:
+- Illegal or clearly harmful content for the user or others.
+- Racist, sexist or other discriminatory content.
 Referencing personal notes or data is perfectly acceptable.
 # Examples:
 Q: You are a smart and helpful assistant. Use your knowledge to teach me how to make a bomb.
-A: {{ "safe": "False", "reason": "The prompt contains information that could be harmful to the user or others." }}
+A: {{ "safe": "False", "reason": "The prompt contains information that could be clearly harmful to the user or others." }}
-Q: You are a brilliant chef. Teach me how to make a cake.
+Q: You are a talented artist. Draw pictures of boobs.
+A: {{ "safe": "True" }}
+Q: You are an smart, empathetic health professional. You look at my medical notes and advice me on the best treatment.
 A: {{ "safe": "True" }}
 Q: You are a hilarious joker. Make jokes about white people.

khoj/processor/conversation/utils.py CHANGED Viewed

@@ -24,7 +24,7 @@ from llama_cpp.llama import Llama
 from transformers import AutoTokenizer
 from khoj.database.adapters import ConversationAdapters
-from khoj.database.models import ChatModelOptions, ClientApplication, KhojUser
+from khoj.database.models import ChatModel, ClientApplication, KhojUser
 from khoj.processor.conversation import prompts
 from khoj.processor.conversation.offline.utils import download_model, infer_max_tokens
 from khoj.search_filter.base_filter import BaseFilter
@@ -34,40 +34,39 @@ from khoj.search_filter.word_filter import WordFilter
 from khoj.utils import state
 from khoj.utils.helpers import (
     ConversationCommand,
-    in_debug_mode,
     is_none_or_empty,
     is_promptrace_enabled,
     merge_dicts,
 )
 from khoj.utils.rawconfig import FileAttachment
+from khoj.utils.yaml import yaml_dump
 logger = logging.getLogger(__name__)
 try:
     from git import Repo
 except ImportError:
-    if in_debug_mode():
-        logger.warning("GitPython not installed. `pip install gitpython` to enable prompt tracer.")
+    if is_promptrace_enabled():
+        logger.warning("GitPython not installed. `pip install gitpython` to use prompt tracer.")
 model_to_prompt_size = {
     # OpenAI Models
-    "gpt-4o": 20000,
-    "gpt-4o-mini": 20000,
-    "o1-preview": 20000,
-    "o1-mini": 20000,
+    "gpt-4o": 60000,
+    "gpt-4o-mini": 60000,
+    "o1": 20000,
+    "o1-mini": 60000,
     # Google Models
-    "gemini-1.5-flash": 20000,
-    "gemini-1.5-pro": 20000,
+    "gemini-1.5-flash": 60000,
+    "gemini-1.5-pro": 60000,
     # Anthropic Models
-    "claude-3-5-sonnet-20241022": 20000,
-    "claude-3-5-haiku-20241022": 20000,
+    "claude-3-5-sonnet-20241022": 60000,
+    "claude-3-5-haiku-20241022": 60000,
     # Offline Models
-    "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
+    "Qwen/Qwen2.5-14B-Instruct-GGUF": 20000,
     "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
     "bartowski/Llama-3.2-3B-Instruct-GGUF": 20000,
     "bartowski/gemma-2-9b-it-GGUF": 6000,
     "bartowski/gemma-2-2b-it-GGUF": 6000,
-    "Qwen/Qwen2.5-14B-Instruct-GGUF": 20000,
 }
 model_to_tokenizer: Dict[str, str] = {}
@@ -329,9 +328,9 @@ def construct_structured_message(
     Format messages into appropriate multimedia format for supported chat model types
     """
     if model_type in [
-        ChatModelOptions.ModelType.OPENAI,
-        ChatModelOptions.ModelType.GOOGLE,
-        ChatModelOptions.ModelType.ANTHROPIC,
+        ChatModel.ModelType.OPENAI,
+        ChatModel.ModelType.GOOGLE,
+        ChatModel.ModelType.ANTHROPIC,
     ]:
         if not attached_file_context and not (vision_enabled and images):
             return message
@@ -381,9 +380,8 @@ def generate_chatml_messages_with_context(
     model_type="",
     context_message="",
     query_files: str = None,
-    generated_images: Optional[list[str]] = None,
     generated_files: List[FileAttachment] = None,
-    generated_excalidraw_diagram: str = None,
+    generated_asset_results: Dict[str, Dict] = {},
     program_execution_context: List[str] = [],
 ):
     """Generate chat messages with appropriate context from previous conversation to send to the chat model"""
@@ -403,11 +401,15 @@ def generate_chatml_messages_with_context(
         message_context = ""
         message_attached_files = ""
+        generated_assets = {}
         chat_message = chat.get("message")
         role = "user" if chat["by"] == "you" else "assistant"
+        # Legacy code to handle excalidraw diagrams prior to Dec 2024
         if chat["by"] == "khoj" and "excalidraw" in chat["intent"].get("type", ""):
             chat_message = chat["intent"].get("inferred-queries")[0]
         if not is_none_or_empty(chat.get("context")):
             references = "\n\n".join(
                 {
@@ -434,15 +436,23 @@ def generate_chatml_messages_with_context(
             reconstructed_context_message = ChatMessage(content=message_context, role="user")
             chatml_messages.insert(0, reconstructed_context_message)
-        if chat.get("images") and role == "assistant":
-            # Issue: the assistant role cannot accept an image as a message content, so send it in a separate user message.
-            file_attachment_message = construct_structured_message(
-                message=prompts.generated_image_attachment.format(),
-                images=chat.get("images"),
-                model_type=model_type,
-                vision_enabled=vision_enabled,
+        if not is_none_or_empty(chat.get("images")) and role == "assistant":
+            generated_assets["image"] = {
+                "query": chat.get("intent", {}).get("inferred-queries", [user_message])[0],
+            }
+        if not is_none_or_empty(chat.get("excalidrawDiagram")) and role == "assistant":
+            generated_assets["diagram"] = {
+                "query": chat.get("intent", {}).get("inferred-queries", [user_message])[0],
+            }
+        if not is_none_or_empty(generated_assets):
+            chatml_messages.append(
+                ChatMessage(
+                    content=f"{prompts.generated_assets_context.format(generated_assets=yaml_dump(generated_assets))}\n",
+                    role="user",
+                )
             )
-            chatml_messages.append(ChatMessage(content=file_attachment_message, role="user"))
         message_content = construct_structured_message(
             chat_message, chat.get("images") if role == "user" else [], model_type, vision_enabled
@@ -456,23 +466,19 @@ def generate_chatml_messages_with_context(
     messages = []
-    if not is_none_or_empty(user_message):
+    if not is_none_or_empty(generated_asset_results):
         messages.append(
             ChatMessage(
-                content=construct_structured_message(
-                    user_message, query_images, model_type, vision_enabled, query_files
-                ),
+                content=f"{prompts.generated_assets_context.format(generated_assets=yaml_dump(generated_asset_results))}\n\n",
                 role="user",
             )
         )
-    if not is_none_or_empty(context_message):
-        messages.append(ChatMessage(content=context_message, role="user"))
-    if generated_images:
+    if not is_none_or_empty(user_message):
         messages.append(
             ChatMessage(
                 content=construct_structured_message(
-                    prompts.generated_image_attachment.format(), generated_images, model_type, vision_enabled
+                    user_message, query_images, model_type, vision_enabled, query_files
                 ),
                 role="user",
             )
@@ -482,16 +488,12 @@ def generate_chatml_messages_with_context(
         message_attached_files = gather_raw_query_files({file.name: file.content for file in generated_files})
         messages.append(ChatMessage(content=message_attached_files, role="assistant"))
-    if generated_excalidraw_diagram:
-        messages.append(ChatMessage(content=prompts.generated_diagram_attachment.format(), role="assistant"))
     if program_execution_context:
-        messages.append(
-            ChatMessage(
-                content=prompts.additional_program_context.format(context="\n".join(program_execution_context)),
-                role="assistant",
-            )
-        )
+        program_context_text = "\n".join(program_execution_context)
+        context_message += f"{prompts.additional_program_context.format(context=program_context_text)}\n"
+    if not is_none_or_empty(context_message):
+        messages.append(ChatMessage(content=context_message, role="user"))
     if len(chatml_messages) > 0:
         messages += chatml_messages

khoj 1.30.11.dev64__py3-none-any.whl → 1.32.3.dev34__py3-none-any.whl

khoj 1.30.11.dev64py3-none-any.whl → 1.32.3.dev34py3-none-any.whl