PyPI - khoj - Versions diffs - 2.0.0b14.dev43__py3-none-any.whl → 2.0.0b15.dev22__py3-none-any.whl - Mend

khoj 2.0.0b14.dev43py3-none-any.whl → 2.0.0b15.dev22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

khoj/processor/conversation/prompts.py CHANGED Viewed

@@ -18,12 +18,11 @@ Today is {day_of_week}, {current_date} in UTC.
 # Style
 - Your responses should be helpful, conversational and tuned to the user's communication style.
-- Make sure to use the specific LaTeX math mode delimiters for your response. LaTex math mode specific delimiters as following
-    - inline math mode : \\( and \\)
-    - display math mode: insert linebreak after opening $$, \\[ and before closing $$, \\]
 - Provide inline citations to documents and websites referenced. Add them inline in markdown format to directly support your claim.
   For example: "The weather today is sunny [1](https://weather.com)."
-- Mention generated assets like images by reference, e.g ![chart](/visualization/image.png). Do not manually output raw, b64 encoded bytes in your response.
+- KaTeX is used to render LaTeX expressions. Make sure you only use the KaTeX math mode delimiters specified below:
+  - inline math mode : \\( and \\)
+  - display math mode: insert linebreak after opening $$, \\[ and before closing $$, \\]
 - Do not respond with raw programs or scripts in your final response unless you know the user is a programmer or has explicitly requested code.
 """.strip()
 )
@@ -41,12 +40,11 @@ Today is {day_of_week}, {current_date} in UTC.
 - Users can share files and other information with you using the Khoj Web, Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window.
 # Style
-- Make sure to use the specific LaTeX math mode delimiters for your response. LaTex math mode specific delimiters as following
-    - inline math mode : `\\(` and `\\)`
-    - display math mode: insert linebreak after opening `$$`, `\\[` and before closing `$$`, `\\]`
 - Provide inline citations to documents and websites referenced. Add them inline in markdown format to directly support your claim.
   For example: "The weather today is sunny [1](https://weather.com)."
-- Mention generated assets like images by reference, e.g ![chart](/visualization/image.png). Do not manually output raw, b64 encoded bytes in your response.
+- KaTeX is used to render LaTeX expressions. Make sure you only use the KaTeX math mode delimiters specified below:
+  - inline math mode : \\( and \\)
+  - display math mode: insert linebreak after opening $$, \\[ and before closing $$, \\]
 # Instructions:\n{bio}
 """.strip()
@@ -115,45 +113,38 @@ User's Notes:
 ## Image Generation
 ## --
-image_generation_improve_prompt_base = """
+enhance_image_system_message = PromptTemplate.from_template(
+    """
 You are a talented media artist with the ability to describe images to compose in professional, fine detail.
+Your image description will be transformed into an image by an AI model on your team.
 {personality_context}
-Generate a vivid description of the image to be rendered using the provided context and user prompt below:
-Today's Date: {current_date}
-User's Location: {location}
-User's Notes:
-{references}
-Online References:
-{online_results}
-Conversation Log:
-{chat_history}
-User Prompt: "{query}"
-Now generate an professional description of the image to generate in vivid, fine detail.
-- Use today's date, user's location, user's notes and online references to weave in any context that will improve the image generation.
-- Retain any important information and follow any instructions in the conversation log or user prompt.
+# Instructions
+- Retain important information and follow instructions by the user when composing the image description.
+- Weave in the context provided below if it will enhance the image.
+- Specify desired elements, lighting, mood, and composition in the description.
+- Decide the shape best suited to render the image. It can be one of square, portrait or landscape.
 - Add specific, fine position details. Mention painting style, camera parameters to compose the image.
-- Ensure your improved prompt is in prose format."""
+- Transform any negations in user instructions into positive alternatives.
+  Instead of saying what should NOT be in the image, describe what SHOULD be there instead.
+  Examples:
+  - "no sun" → "overcast cloudy sky"
+  - "don't include people" → "empty landscape" or "solitary scene"
+- Ensure your image description is in prose format (e.g no lists, links).
+- If any text is to be rendered in the image put it within double quotes in your image description.
-image_generation_improve_prompt_dalle = PromptTemplate.from_template(
-    f"""
-{image_generation_improve_prompt_base}
+# Context
-Improved Prompt:
-""".strip()
-)
+## User Location: {location}
-image_generation_improve_prompt_sd = PromptTemplate.from_template(
-    f"""
-{image_generation_improve_prompt_base}
-- If any text is to be rendered in the image put it within double quotes in your improved prompt.
+## User Documents
+{references}
+## Online References
+{online_results}
-Improved Prompt:
+Now generate a vivid description of the image and image shape to be rendered.
+Your response should be a JSON object with 'description' and 'shape' fields specified.
 """.strip()
 )

khoj/processor/conversation/utils.py CHANGED Viewed

@@ -23,7 +23,6 @@ from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizer
 from khoj.database.adapters import ConversationAdapters
 from khoj.database.models import (
     ChatMessageModel,
-    ChatModel,
     ClientApplication,
     Intent,
     KhojUser,
@@ -73,6 +72,7 @@ model_to_prompt_size = {
     "gpt-5-nano-2025-08-07": 120000,
     # Google Models
     "gemini-2.5-flash": 120000,
+    "gemini-2.5-flash-lite": 120000,
     "gemini-2.5-pro": 60000,
     "gemini-2.0-flash": 120000,
     "gemini-2.0-flash-lite": 120000,
@@ -262,7 +262,7 @@ def construct_question_history(
                 continue
             message = chat.message
-            inferred_queries_list = chat.intent.inferred_queries or []
+            inferred_queries_list = chat.intent.inferred_queries or [] if chat.intent else []
             # Ensure inferred_queries_list is a list, defaulting to the original query in a list
             if not inferred_queries_list:
@@ -449,7 +449,6 @@ async def save_to_conversation_log(
     query_images: List[str] = None,
     raw_query_files: List[FileAttachment] = [],
     generated_images: List[str] = [],
-    raw_generated_files: List[FileAttachment] = [],
     generated_mermaidjs_diagram: str = None,
     research_results: Optional[List[ResearchIteration]] = None,
     train_of_thought: List[Any] = [],
@@ -474,7 +473,6 @@ async def save_to_conversation_log(
         "trainOfThought": train_of_thought,
         "turnId": turn_id,
         "images": generated_images,
-        "queryFiles": [file.model_dump(mode="json") for file in raw_generated_files],
     }
     if generated_mermaidjs_diagram:
@@ -527,29 +525,18 @@ def construct_structured_message(
     Assume vision is enabled and chat model provider supports messages in chatml format, unless specified otherwise.
     """
-    if not model_type or model_type in [
-        ChatModel.ModelType.OPENAI,
-        ChatModel.ModelType.GOOGLE,
-        ChatModel.ModelType.ANTHROPIC,
-    ]:
-        constructed_messages: List[dict[str, Any]] = []
-        if not is_none_or_empty(message):
-            constructed_messages += [{"type": "text", "text": message}] if isinstance(message, str) else message
-        # Drop image message passed by caller if chat model does not have vision enabled
-        if not vision_enabled:
-            constructed_messages = [m for m in constructed_messages if m.get("type") != "image_url"]
-        if not is_none_or_empty(attached_file_context):
-            constructed_messages += [{"type": "text", "text": attached_file_context}]
-        if vision_enabled and images:
-            for image in images:
-                constructed_messages += [{"type": "image_url", "image_url": {"url": image}}]
-        return constructed_messages
-    message = message if isinstance(message, str) else "\n\n".join(m["text"] for m in message)
+    constructed_messages: List[dict[str, Any]] = []
+    if not is_none_or_empty(message):
+        constructed_messages += [{"type": "text", "text": message}] if isinstance(message, str) else message
+    # Drop image message passed by caller if chat model does not have vision enabled
+    if not vision_enabled:
+        constructed_messages = [m for m in constructed_messages if m.get("type") != "image_url"]
     if not is_none_or_empty(attached_file_context):
-        return f"{attached_file_context}\n\n{message}"
-    return message
+        constructed_messages += [{"type": "text", "text": attached_file_context}]
+    if vision_enabled and images:
+        for image in images:
+            constructed_messages += [{"type": "image_url", "image_url": {"url": image}}]
+    return constructed_messages
 def gather_raw_query_files(
@@ -569,20 +556,21 @@ def gather_raw_query_files(
 def generate_chatml_messages_with_context(
+    # Context
     user_message: str,
-    system_message: str = None,
+    query_files: str = None,
+    query_images=None,
+    context_message="",
+    generated_asset_results: Dict[str, Dict] = {},
+    program_execution_context: List[str] = [],
     chat_history: list[ChatMessageModel] = [],
+    system_message: str = None,
+    # Model Config
     model_name="gpt-4o-mini",
+    model_type="",
     max_prompt_size=None,
     tokenizer_name=None,
-    query_images=None,
     vision_enabled=False,
-    model_type="",
-    context_message="",
-    query_files: str = None,
-    generated_files: List[FileAttachment] = None,
-    generated_asset_results: Dict[str, Dict] = {},
-    program_execution_context: List[str] = [],
 ):
     """Generate chat messages with appropriate context from previous conversation to send to the chat model"""
     # Set max prompt size from user config or based on pre-configured for model and machine specs
@@ -604,18 +592,10 @@ def generate_chatml_messages_with_context(
         role = "user" if chat.by == "you" else "assistant"
         # Legacy code to handle excalidraw diagrams prior to Dec 2024
-        if chat.by == "khoj" and "excalidraw" in chat.intent.type or "":
+        if chat.by == "khoj" and chat.intent and "excalidraw" in chat.intent.type:
             chat_message = (chat.intent.inferred_queries or [])[0]
-        if chat.queryFiles:
-            raw_query_files = chat.queryFiles
-            query_files_dict = dict()
-            for file in raw_query_files:
-                query_files_dict[file["name"]] = file["content"]
-            message_attached_files = gather_raw_query_files(query_files_dict)
-            chatml_messages.append(ChatMessage(content=message_attached_files, role=role))
+        # Add search and action context
         if not is_none_or_empty(chat.onlineContext):
             message_context += [
                 {
@@ -654,11 +634,12 @@ def generate_chatml_messages_with_context(
         if not is_none_or_empty(message_context):
             reconstructed_context_message = ChatMessage(content=message_context, role="user")
-            chatml_messages.insert(0, reconstructed_context_message)
+            chatml_messages.append(reconstructed_context_message)
+        # Add generated assets
         if not is_none_or_empty(chat.images) and role == "assistant":
             generated_assets["image"] = {
-                "query": (chat.intent.inferred_queries or [user_message])[0],
+                "description": (chat.intent.inferred_queries or [user_message])[0],
             }
         if not is_none_or_empty(chat.mermaidjsDiagram) and role == "assistant":
@@ -674,8 +655,17 @@ def generate_chatml_messages_with_context(
                 )
             )
+        # Add user query with attached file, images or khoj response
+        if chat.queryFiles:
+            raw_query_files = chat.queryFiles
+            query_files_dict = dict()
+            for file in raw_query_files:
+                query_files_dict[file["name"]] = file["content"]
+            message_attached_files = gather_raw_query_files(query_files_dict)
         message_content = construct_structured_message(
-            chat_message, chat.images if role == "user" else [], model_type, vision_enabled
+            chat_message, chat.images if role == "user" else [], model_type, vision_enabled, message_attached_files
         )
         reconstructed_message = ChatMessage(
@@ -683,19 +673,32 @@ def generate_chatml_messages_with_context(
             role=role,
             additional_kwargs={"message_type": chat.intent.type if chat.intent else None},
         )
-        chatml_messages.insert(0, reconstructed_message)
+        chatml_messages.append(reconstructed_message)
         if len(chatml_messages) >= 3 * lookback_turns:
             break
     messages: list[ChatMessage] = []
+    if not is_none_or_empty(system_message):
+        messages.append(ChatMessage(content=system_message, role="system"))
+    if len(chatml_messages) > 0:
+        messages += chatml_messages
+    if program_execution_context:
+        program_context_text = "\n".join(program_execution_context)
+        context_message += f"{prompts.additional_program_context.format(context=program_context_text)}\n"
+    if not is_none_or_empty(context_message):
+        messages.append(ChatMessage(content=context_message, role="user"))
     if not is_none_or_empty(generated_asset_results):
         messages.append(
             ChatMessage(
-                content=f"{prompts.generated_assets_context.format(generated_assets=yaml_dump(generated_asset_results))}\n\n",
+                content=prompts.generated_assets_context.format(generated_assets=yaml_dump(generated_asset_results)),
                 role="user",
-            )
+            ),
         )
     if not is_none_or_empty(user_message):
@@ -708,23 +711,6 @@ def generate_chatml_messages_with_context(
             )
         )
-    if generated_files:
-        message_attached_files = gather_raw_query_files({file.name: file.content for file in generated_files})
-        messages.append(ChatMessage(content=message_attached_files, role="assistant"))
-    if program_execution_context:
-        program_context_text = "\n".join(program_execution_context)
-        context_message += f"{prompts.additional_program_context.format(context=program_context_text)}\n"
-    if not is_none_or_empty(context_message):
-        messages.append(ChatMessage(content=context_message, role="user"))
-    if len(chatml_messages) > 0:
-        messages += chatml_messages
-    if not is_none_or_empty(system_message):
-        messages.append(ChatMessage(content=system_message, role="system"))
     # Normalize message content to list of chatml dictionaries
     for message in messages:
         if isinstance(message.content, str):
@@ -733,8 +719,8 @@ def generate_chatml_messages_with_context(
     # Truncate oldest messages from conversation history until under max supported prompt size by model
     messages = truncate_messages(messages, max_prompt_size, model_name, tokenizer_name)
-    # Return message in chronological order
-    return messages[::-1]
+    # Return messages in chronological order
+    return messages
 def get_encoder(
@@ -805,7 +791,9 @@ def count_tokens(
 def count_total_tokens(messages: list[ChatMessage], encoder, system_message: Optional[ChatMessage]) -> Tuple[int, int]:
     """Count total tokens in messages including system message"""
-    system_message_tokens = count_tokens(system_message.content, encoder) if system_message else 0
+    system_message_tokens = (
+        sum([count_tokens(message.content, encoder) for message in system_message]) if system_message else 0
+    )
     message_tokens = sum([count_tokens(message.content, encoder) for message in messages])
     # Reserves 4 tokens to demarcate each message (e.g <|im_start|>user, <|im_end|>, <|endoftext|> etc.)
     total_tokens = message_tokens + system_message_tokens + 4 * len(messages)
@@ -822,11 +810,14 @@ def truncate_messages(
     encoder = get_encoder(model_name, tokenizer_name)
     # Extract system message from messages
-    system_message = None
-    for idx, message in enumerate(messages):
+    system_message = []
+    non_system_messages = []
+    for message in messages:
         if message.role == "system":
-            system_message = messages.pop(idx)
-            break
+            system_message.append(message)
+        else:
+            non_system_messages.append(message)
+    messages = non_system_messages
     # Drop older messages until under max supported prompt size by model
     total_tokens, system_message_tokens = count_total_tokens(messages, encoder, system_message)
@@ -834,20 +825,20 @@ def truncate_messages(
     while total_tokens > max_prompt_size and (len(messages) > 1 or len(messages[0].content) > 1):
         # If the last message has more than one content part, pop the oldest content part.
         # For tool calls, the whole message should dropped, assistant's tool call content being truncated annoys AI APIs.
-        if len(messages[-1].content) > 1 and messages[-1].additional_kwargs.get("message_type") != "tool_call":
+        if len(messages[0].content) > 1 and messages[0].additional_kwargs.get("message_type") != "tool_call":
             # The oldest content part is earlier in content list. So pop from the front.
-            messages[-1].content.pop(0)
+            messages[0].content.pop(0)
         # Otherwise, pop the last message if it has only one content part or is a tool call.
         else:
             # The oldest message is the last one. So pop from the back.
-            dropped_message = messages.pop()
+            dropped_message = messages.pop(0)
             # Drop tool result pair of tool call, if tool call message has been removed
             if (
                 dropped_message.additional_kwargs.get("message_type") == "tool_call"
                 and messages
-                and messages[-1].additional_kwargs.get("message_type") == "tool_result"
+                and messages[0].additional_kwargs.get("message_type") == "tool_result"
             ):
-                messages.pop()
+                messages.pop(0)
         total_tokens, _ = count_total_tokens(messages, encoder, system_message)
@@ -886,11 +877,7 @@ def truncate_messages(
             f"Truncate current message to fit within max prompt size of {max_prompt_size} supported by {model_name} model:\n {truncated_snippet}"
         )
-    if system_message:
-        # Default system message role is system.
-        # Fallback to system message role of user for models that do not support this role like gemma-2 and openai's o1 model series.
-        system_message.role = "user" if "gemma-2" in model_name or model_name.startswith("o1") else "system"
-    return messages + [system_message] if system_message else messages
+    return system_message + messages if system_message else messages
 def reciprocal_conversation_to_chatml(message_pair):

khoj/processor/image/generate.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import base64
 import io
 import logging
+import os
 import time
 from typing import Any, Callable, Dict, List, Optional
@@ -21,11 +22,12 @@ from khoj.database.adapters import ConversationAdapters
 from khoj.database.models import (
     Agent,
     ChatMessageModel,
+    Intent,
     KhojUser,
     TextToImageModelConfig,
 )
 from khoj.processor.conversation.google.utils import _is_retryable_error
-from khoj.routers.helpers import ChatEvent, generate_better_image_prompt
+from khoj.routers.helpers import ChatEvent, ImageShape, generate_better_image_prompt
 from khoj.routers.storage import upload_generated_image_to_bucket
 from khoj.utils import state
 from khoj.utils.helpers import convert_image_to_webp, timer
@@ -60,14 +62,17 @@ async def text_to_image(
         return
     text2image_model = text_to_image_config.model_name
-    chat_history_str = ""
+    image_chat_history: List[ChatMessageModel] = []
+    default_intent = Intent(type="remember")
     for chat in chat_history[-4:]:
         if chat.by == "you":
-            chat_history_str += f"Q: {chat.message}\n"
+            image_chat_history += [ChatMessageModel(by=chat.by, message=chat.message, intent=default_intent)]
+        elif chat.by == "khoj" and chat.images and chat.intent and chat.intent.inferred_queries:
+            image_chat_history += [
+                ChatMessageModel(by=chat.by, message=chat.intent.inferred_queries[0], intent=default_intent)
+            ]
         elif chat.by == "khoj" and chat.intent and chat.intent.type in ["remember", "reminder"]:
-            chat_history_str += f"A: {chat.message}\n"
-        elif chat.by == "khoj" and chat.images:
-            chat_history_str += f"A: Improved Prompt: {chat.intent.inferred_queries[0]}\n"
+            image_chat_history += [ChatMessageModel(by=chat.by, message=chat.message, intent=default_intent)]
     if send_status_func:
         async for event in send_status_func("**Enhancing the Painting Prompt**"):
@@ -75,9 +80,9 @@ async def text_to_image(
     # Generate a better image prompt
     # Use the user's message, chat history, and other context
-    image_prompt = await generate_better_image_prompt(
+    image_prompt_response = await generate_better_image_prompt(
         message,
-        chat_history_str,
+        image_chat_history,
         location_data=location_data,
         note_references=references,
         online_results=online_results,
@@ -88,6 +93,8 @@ async def text_to_image(
         query_files=query_files,
         tracer=tracer,
     )
+    image_prompt = image_prompt_response["description"]
+    image_shape = image_prompt_response["shape"]
     if send_status_func:
         async for event in send_status_func(f"**Painting to Imagine**:\n{image_prompt}"):
@@ -97,13 +104,19 @@ async def text_to_image(
     with timer(f"Generate image with {text_to_image_config.model_type}", logger):
         try:
             if text_to_image_config.model_type == TextToImageModelConfig.ModelType.OPENAI:
-                webp_image_bytes = generate_image_with_openai(image_prompt, text_to_image_config, text2image_model)
+                webp_image_bytes = generate_image_with_openai(
+                    image_prompt, text_to_image_config, text2image_model, image_shape
+                )
             elif text_to_image_config.model_type == TextToImageModelConfig.ModelType.STABILITYAI:
                 webp_image_bytes = generate_image_with_stability(image_prompt, text_to_image_config, text2image_model)
             elif text_to_image_config.model_type == TextToImageModelConfig.ModelType.REPLICATE:
-                webp_image_bytes = generate_image_with_replicate(image_prompt, text_to_image_config, text2image_model)
+                webp_image_bytes = generate_image_with_replicate(
+                    image_prompt, text_to_image_config, text2image_model, image_shape
+                )
             elif text_to_image_config.model_type == TextToImageModelConfig.ModelType.GOOGLE:
-                webp_image_bytes = generate_image_with_google(image_prompt, text_to_image_config, text2image_model)
+                webp_image_bytes = generate_image_with_google(
+                    image_prompt, text_to_image_config, text2image_model, image_shape
+                )
         except openai.OpenAIError or openai.BadRequestError or openai.APIConnectionError as e:
             if "content_policy_violation" in e.message:
                 logger.error(f"Image Generation blocked by OpenAI: {e}")
@@ -154,7 +167,10 @@ async def text_to_image(
     reraise=True,
 )
 def generate_image_with_openai(
-    improved_image_prompt: str, text_to_image_config: TextToImageModelConfig, text2image_model: str
+    improved_image_prompt: str,
+    text_to_image_config: TextToImageModelConfig,
+    text2image_model: str,
+    shape: ImageShape = ImageShape.SQUARE,
 ):
     "Generate image using OpenAI (compatible) API"
@@ -170,12 +186,21 @@ def generate_image_with_openai(
     elif state.openai_client:
         openai_client = state.openai_client
+    # Convert shape to size for OpenAI
+    if shape == ImageShape.PORTRAIT:
+        size = "1024x1536"
+    elif shape == ImageShape.LANDSCAPE:
+        size = "1536x1024"
+    else:  # Square
+        size = "1024x1024"
     # Generate image using OpenAI API
     OPENAI_IMAGE_GEN_STYLE = "vivid"
     response = openai_client.images.generate(
         prompt=improved_image_prompt,
         model=text2image_model,
         style=OPENAI_IMAGE_GEN_STYLE,
+        size=size,
         response_format="b64_json",
     )
@@ -222,10 +247,22 @@ def generate_image_with_stability(
     reraise=True,
 )
 def generate_image_with_replicate(
-    improved_image_prompt: str, text_to_image_config: TextToImageModelConfig, text2image_model: str
+    improved_image_prompt: str,
+    text_to_image_config: TextToImageModelConfig,
+    text2image_model: str,
+    shape: ImageShape = ImageShape.SQUARE,
 ):
     "Generate image using Replicate API"
+    # Convert shape to aspect ratio for Replicate
+    # Replicate supports only 1:1, 3:4, and 4:3 aspect ratios
+    if shape == ImageShape.PORTRAIT:
+        aspect_ratio = "3:4"
+    elif shape == ImageShape.LANDSCAPE:
+        aspect_ratio = "4:3"
+    else:  # Square
+        aspect_ratio = "1:1"
     # Create image generation task on Replicate
     replicate_create_prediction_url = f"https://api.replicate.com/v1/models/{text2image_model}/predictions"
     headers = {
@@ -236,11 +273,16 @@ def generate_image_with_replicate(
         "input": {
             "prompt": improved_image_prompt,
             "num_outputs": 1,
-            "aspect_ratio": "1:1",
+            "aspect_ratio": aspect_ratio,
             "output_format": "webp",
             "output_quality": 100,
         }
     }
+    seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
+    if seed:
+        json["input"]["seed"] = seed
     create_prediction = requests.post(replicate_create_prediction_url, headers=headers, json=json).json()
     # Get status of image generation task
@@ -276,7 +318,10 @@ def generate_image_with_replicate(
     reraise=True,
 )
 def generate_image_with_google(
-    improved_image_prompt: str, text_to_image_config: TextToImageModelConfig, text2image_model: str
+    improved_image_prompt: str,
+    text_to_image_config: TextToImageModelConfig,
+    text2image_model: str,
+    shape: ImageShape = ImageShape.SQUARE,
 ):
     """Generate image using Google's AI over API"""
@@ -284,6 +329,14 @@ def generate_image_with_google(
     api_key = text_to_image_config.api_key or text_to_image_config.ai_model_api.api_key
     client = genai.Client(api_key=api_key)
+    # Convert shape to aspect ratio for Google
+    if shape == ImageShape.PORTRAIT:
+        aspect_ratio = "3:4"
+    elif shape == ImageShape.LANDSCAPE:
+        aspect_ratio = "4:3"
+    else:  # Square
+        aspect_ratio = "1:1"
     # Configure image generation settings
     config = gtypes.GenerateImagesConfig(
         number_of_images=1,
@@ -291,6 +344,7 @@ def generate_image_with_google(
         person_generation=gtypes.PersonGeneration.ALLOW_ADULT,
         include_rai_reason=True,
         output_mime_type="image/png",
+        aspect_ratio=aspect_ratio,
     )
     # Call the Gemini API to generate the image

khoj/processor/tools/run_code.py CHANGED Viewed

@@ -156,10 +156,11 @@ async def generate_python_code(
     response = await send_message_to_model_wrapper(
         code_generation_prompt,
-        query_images=query_images,
         query_files=query_files,
-        user=user,
+        query_images=query_images,
+        fast_model=False,
         agent_chat_model=agent_chat_model,
+        user=user,
         tracer=tracer,
     )

khoj 2.0.0b14.dev43__py3-none-any.whl → 2.0.0b15.dev22__py3-none-any.whl

khoj 2.0.0b14.dev43py3-none-any.whl → 2.0.0b15.dev22py3-none-any.whl