PyPI - khoj - Versions diffs - 1.41.1.dev37__py3-none-any.whl → 1.41.1.dev39__py3-none-any.whl - Mend

khoj 1.41.1.dev37py3-none-any.whl → 1.41.1.dev39py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

khoj/processor/conversation/google/utils.py CHANGED Viewed

@@ -73,6 +73,9 @@ def _is_retryable_error(exception: BaseException) -> bool:
     # client errors
     if isinstance(exception, httpx.TimeoutException) or isinstance(exception, httpx.NetworkError):
         return True
+    # validation errors
+    if isinstance(exception, ValueError):
+        return True
     return False
@@ -84,8 +87,8 @@ def _is_retryable_error(exception: BaseException) -> bool:
     reraise=True,
 )
 def gemini_completion_with_backoff(
-    messages,
-    system_prompt,
+    messages: list[ChatMessage],
+    system_prompt: str,
     model_name: str,
     temperature=1.0,
     api_key=None,
@@ -144,6 +147,11 @@ def gemini_completion_with_backoff(
         model_name, input_tokens, output_tokens, thought_tokens=thought_tokens, usage=tracer.get("usage")
     )
+    # Validate the response. If empty, raise an error to retry.
+    if is_none_or_empty(response_text):
+        logger.warning(f"No response by {model_name}\nLast Message by {messages[-1].role}: {messages[-1].content}.")
+        raise ValueError(f"Empty or no response by {model_name} over API. Retry if needed.")
     # Save conversation trace
     tracer["chat_model"] = model_name
     tracer["temperature"] = temperature
@@ -157,89 +165,90 @@ def gemini_completion_with_backoff(
     retry=retry_if_exception(_is_retryable_error),
     wait=wait_exponential(multiplier=1, min=4, max=10),
     stop=stop_after_attempt(3),
-    before_sleep=before_sleep_log(logger, logging.DEBUG),
-    reraise=True,
+    before_sleep=before_sleep_log(logger, logging.WARNING),
+    reraise=False,
 )
 async def gemini_chat_completion_with_backoff(
-    messages,
-    model_name,
-    temperature,
-    api_key,
-    api_base_url,
-    system_prompt,
+    messages: list[ChatMessage],
+    model_name: str,
+    temperature: float,
+    api_key: str,
+    api_base_url: str,
+    system_prompt: str,
     model_kwargs=None,
     deepthought=False,
     tracer: dict = {},
 ) -> AsyncGenerator[str, None]:
-    try:
-        client = gemini_clients.get(api_key)
-        if not client:
-            client = get_gemini_client(api_key, api_base_url)
-            gemini_clients[api_key] = client
-        formatted_messages, system_prompt = format_messages_for_gemini(messages, system_prompt)
-        thinking_config = None
-        if deepthought and model_name.startswith("gemini-2-5"):
-            thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI)
-        seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
-        config = gtypes.GenerateContentConfig(
-            system_instruction=system_prompt,
-            temperature=temperature,
-            thinking_config=thinking_config,
-            max_output_tokens=MAX_OUTPUT_TOKENS_GEMINI,
-            stop_sequences=["Notes:\n["],
-            safety_settings=SAFETY_SETTINGS,
-            seed=seed,
-            http_options=gtypes.HttpOptions(async_client_args={"timeout": httpx.Timeout(30.0, read=60.0)}),
-        )
+    client = gemini_clients.get(api_key)
+    if not client:
+        client = get_gemini_client(api_key, api_base_url)
+        gemini_clients[api_key] = client
-        aggregated_response = ""
-        final_chunk = None
-        response_started = False
-        start_time = perf_counter()
-        chat_stream: AsyncIterator[gtypes.GenerateContentResponse] = await client.aio.models.generate_content_stream(
-            model=model_name, config=config, contents=formatted_messages
-        )
-        async for chunk in chat_stream:
-            # Log the time taken to start response
-            if not response_started:
-                response_started = True
-                logger.info(f"First response took: {perf_counter() - start_time:.3f} seconds")
-            # Keep track of the last chunk for usage data
-            final_chunk = chunk
-            # Handle streamed response chunk
-            message, stopped = handle_gemini_response(chunk.candidates, chunk.prompt_feedback)
-            message = message or chunk.text
-            aggregated_response += message
-            yield message
-            if stopped:
-                raise ValueError(message)
-        # Log the time taken to stream the entire response
-        logger.info(f"Chat streaming took: {perf_counter() - start_time:.3f} seconds")
-        # Calculate cost of chat
-        input_tokens = final_chunk.usage_metadata.prompt_token_count or 0 if final_chunk else 0
-        output_tokens = final_chunk.usage_metadata.candidates_token_count or 0 if final_chunk else 0
-        thought_tokens = final_chunk.usage_metadata.thoughts_token_count or 0 if final_chunk else 0
-        tracer["usage"] = get_chat_usage_metrics(
-            model_name, input_tokens, output_tokens, thought_tokens=thought_tokens, usage=tracer.get("usage")
-        )
+    formatted_messages, system_prompt = format_messages_for_gemini(messages, system_prompt)
-        # Save conversation trace
-        tracer["chat_model"] = model_name
-        tracer["temperature"] = temperature
-        if is_promptrace_enabled():
-            commit_conversation_trace(messages, aggregated_response, tracer)
-    except ValueError as e:
-        logger.warning(
-            f"LLM Response Prevented for {model_name}: {e.args[0]}.\n"
-            + f"Last Message by {messages[-1].role}: {messages[-1].content}"
-        )
-    except Exception as e:
-        logger.error(f"Error in gemini_chat_completion_with_backoff stream: {e}", exc_info=True)
+    thinking_config = None
+    if deepthought and model_name.startswith("gemini-2-5"):
+        thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI)
+    seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
+    config = gtypes.GenerateContentConfig(
+        system_instruction=system_prompt,
+        temperature=temperature,
+        thinking_config=thinking_config,
+        max_output_tokens=MAX_OUTPUT_TOKENS_GEMINI,
+        stop_sequences=["Notes:\n["],
+        safety_settings=SAFETY_SETTINGS,
+        seed=seed,
+        http_options=gtypes.HttpOptions(async_client_args={"timeout": httpx.Timeout(30.0, read=60.0)}),
+    )
+    aggregated_response = ""
+    final_chunk = None
+    response_started = False
+    start_time = perf_counter()
+    chat_stream: AsyncIterator[gtypes.GenerateContentResponse] = await client.aio.models.generate_content_stream(
+        model=model_name, config=config, contents=formatted_messages
+    )
+    async for chunk in chat_stream:
+        # Log the time taken to start response
+        if not response_started:
+            response_started = True
+            logger.info(f"First response took: {perf_counter() - start_time:.3f} seconds")
+        # Keep track of the last chunk for usage data
+        final_chunk = chunk
+        # Handle streamed response chunk
+        stop_message, stopped = handle_gemini_response(chunk.candidates, chunk.prompt_feedback)
+        message = stop_message or chunk.text
+        aggregated_response += message
+        yield message
+        if stopped:
+            logger.warning(
+                f"LLM Response Prevented for {model_name}: {stop_message}.\n"
+                + f"Last Message by {messages[-1].role}: {messages[-1].content}"
+            )
+            break
+    # Calculate cost of chat
+    input_tokens = final_chunk.usage_metadata.prompt_token_count or 0 if final_chunk else 0
+    output_tokens = final_chunk.usage_metadata.candidates_token_count or 0 if final_chunk else 0
+    thought_tokens = final_chunk.usage_metadata.thoughts_token_count or 0 if final_chunk else 0
+    tracer["usage"] = get_chat_usage_metrics(
+        model_name, input_tokens, output_tokens, thought_tokens=thought_tokens, usage=tracer.get("usage")
+    )
+    # Validate the response. If empty, raise an error to retry.
+    if is_none_or_empty(aggregated_response):
+        logger.warning(f"No response by {model_name}\nLast Message by {messages[-1].role}: {messages[-1].content}.")
+        raise ValueError(f"Empty or no response by {model_name} over API. Retry if needed.")
+    # Log the time taken to stream the entire response
+    logger.info(f"Chat streaming took: {perf_counter() - start_time:.3f} seconds")
+    # Save conversation trace
+    tracer["chat_model"] = model_name
+    tracer["temperature"] = temperature
+    if is_promptrace_enabled():
+        commit_conversation_trace(messages, aggregated_response, tracer)
 def handle_gemini_response(

khoj/processor/conversation/openai/utils.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import logging
 import os
+from copy import deepcopy
 from functools import partial
 from time import perf_counter
 from typing import AsyncGenerator, Dict, Generator, List, Literal, Optional, Union
@@ -7,6 +8,7 @@ from urllib.parse import urlparse
 import httpx
 import openai
+from langchain_core.messages.chat import ChatMessage
 from openai.lib.streaming.chat import (
     ChatCompletionStream,
     ChatCompletionStreamEvent,
@@ -32,9 +34,11 @@ from khoj.processor.conversation.utils import (
     commit_conversation_trace,
 )
 from khoj.utils.helpers import (
+    convert_image_data_uri,
     get_chat_usage_metrics,
     get_openai_async_client,
     get_openai_client,
+    is_none_or_empty,
     is_promptrace_enabled,
 )
@@ -51,6 +55,7 @@ openai_async_clients: Dict[str, openai.AsyncOpenAI] = {}
         | retry_if_exception_type(openai._exceptions.APIConnectionError)
         | retry_if_exception_type(openai._exceptions.RateLimitError)
         | retry_if_exception_type(openai._exceptions.APIStatusError)
+        | retry_if_exception_type(ValueError)
     ),
     wait=wait_random_exponential(min=1, max=10),
     stop=stop_after_attempt(3),
@@ -58,7 +63,7 @@ openai_async_clients: Dict[str, openai.AsyncOpenAI] = {}
     reraise=True,
 )
 def completion_with_backoff(
-    messages,
+    messages: List[ChatMessage],
     model_name: str,
     temperature=0.8,
     openai_api_key=None,
@@ -74,7 +79,7 @@ def completion_with_backoff(
         openai_clients[client_key] = client
     stream_processor = default_stream_processor
-    formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
+    formatted_messages = format_message_for_api(messages, api_base_url)
     # Tune reasoning models arguments
     if is_openai_reasoning_model(model_name, api_base_url):
@@ -133,6 +138,11 @@ def completion_with_backoff(
         model_name, input_tokens, output_tokens, usage=tracer.get("usage"), cost=cost
     )
+    # Validate the response. If empty, raise an error to retry.
+    if is_none_or_empty(aggregated_response):
+        logger.warning(f"No response by {model_name}\nLast Message by {messages[-1].role}: {messages[-1].content}.")
+        raise ValueError(f"Empty or no response by {model_name} over API. Retry if needed.")
     # Save conversation trace
     tracer["chat_model"] = model_name
     tracer["temperature"] = temperature
@@ -149,14 +159,15 @@ def completion_with_backoff(
         | retry_if_exception_type(openai._exceptions.APIConnectionError)
         | retry_if_exception_type(openai._exceptions.RateLimitError)
         | retry_if_exception_type(openai._exceptions.APIStatusError)
+        | retry_if_exception_type(ValueError)
     ),
     wait=wait_exponential(multiplier=1, min=4, max=10),
     stop=stop_after_attempt(3),
-    before_sleep=before_sleep_log(logger, logging.DEBUG),
-    reraise=True,
+    before_sleep=before_sleep_log(logger, logging.WARNING),
+    reraise=False,
 )
 async def chat_completion_with_backoff(
-    messages,
+    messages: list[ChatMessage],
     model_name: str,
     temperature,
     openai_api_key=None,
@@ -165,120 +176,122 @@ async def chat_completion_with_backoff(
     model_kwargs: dict = {},
     tracer: dict = {},
 ) -> AsyncGenerator[ResponseWithThought, None]:
-    try:
-        client_key = f"{openai_api_key}--{api_base_url}"
-        client = openai_async_clients.get(client_key)
-        if not client:
-            client = get_openai_async_client(openai_api_key, api_base_url)
-            openai_async_clients[client_key] = client
-        stream_processor = adefault_stream_processor
-        formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
-        # Configure thinking for openai reasoning models
-        if is_openai_reasoning_model(model_name, api_base_url):
-            temperature = 1
-            reasoning_effort = "medium" if deepthought else "low"
-            model_kwargs["reasoning_effort"] = reasoning_effort
-            model_kwargs.pop("stop", None)  # Remove unsupported stop param for reasoning models
-            # Get the first system message and add the string `Formatting re-enabled` to it.
-            # See https://platform.openai.com/docs/guides/reasoning-best-practices
-            if len(formatted_messages) > 0:
-                system_messages = [
-                    (i, message) for i, message in enumerate(formatted_messages) if message["role"] == "system"
-                ]
-                if len(system_messages) > 0:
-                    first_system_message_index, first_system_message = system_messages[0]
-                    first_system_message_content = first_system_message["content"]
-                    formatted_messages[first_system_message_index][
-                        "content"
-                    ] = f"{first_system_message_content}\nFormatting re-enabled"
-        elif is_twitter_reasoning_model(model_name, api_base_url):
-            stream_processor = adeepseek_stream_processor
-            reasoning_effort = "high" if deepthought else "low"
-            model_kwargs["reasoning_effort"] = reasoning_effort
-        elif model_name.startswith("deepseek-reasoner"):
-            stream_processor = adeepseek_stream_processor
-            # Two successive messages cannot be from the same role. Should merge any back-to-back messages from the same role.
-            # The first message should always be a user message (except system message).
-            updated_messages: List[dict] = []
-            for i, message in enumerate(formatted_messages):
-                if i > 0 and message["role"] == formatted_messages[i - 1]["role"]:
-                    updated_messages[-1]["content"] += " " + message["content"]
-                elif i == 1 and formatted_messages[i - 1]["role"] == "system" and message["role"] == "assistant":
-                    updated_messages[-1]["content"] += " " + message["content"]
-                else:
-                    updated_messages.append(message)
-            formatted_messages = updated_messages
-        elif is_qwen_reasoning_model(model_name, api_base_url):
-            stream_processor = partial(ain_stream_thought_processor, thought_tag="think")
-            # Reasoning is enabled by default. Disable when deepthought is False.
-            # See https://qwenlm.github.io/blog/qwen3/#advanced-usages
-            if not deepthought and len(formatted_messages) > 0:
-                formatted_messages[-1]["content"] = formatted_messages[-1]["content"] + " /no_think"
-        stream = True
-        read_timeout = 300 if is_local_api(api_base_url) else 60
-        model_kwargs["stream_options"] = {"include_usage": True}
-        if os.getenv("KHOJ_LLM_SEED"):
-            model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
-        aggregated_response = ""
-        final_chunk = None
-        response_started = False
-        start_time = perf_counter()
-        chat_stream: openai.AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
-            messages=formatted_messages,  # type: ignore
-            model=model_name,
-            stream=stream,
-            temperature=temperature,
-            timeout=httpx.Timeout(30, read=read_timeout),
-            **model_kwargs,
-        )
-        async for chunk in stream_processor(chat_stream):
-            # Log the time taken to start response
-            if not response_started:
-                response_started = True
-                logger.info(f"First response took: {perf_counter() - start_time:.3f} seconds")
-            # Keep track of the last chunk for usage data
-            final_chunk = chunk
-            # Skip empty chunks
-            if len(chunk.choices) == 0:
-                continue
-            # Handle streamed response chunk
-            response_chunk: ResponseWithThought = None
-            response_delta = chunk.choices[0].delta
-            if response_delta.content:
-                response_chunk = ResponseWithThought(response=response_delta.content)
-                aggregated_response += response_chunk.response
-            elif response_delta.thought:
-                response_chunk = ResponseWithThought(thought=response_delta.thought)
-            if response_chunk:
-                yield response_chunk
-        # Log the time taken to stream the entire response
-        logger.info(f"Chat streaming took: {perf_counter() - start_time:.3f} seconds")
-        # Calculate cost of chat after stream finishes
-        input_tokens, output_tokens, cost = 0, 0, 0
-        if final_chunk and hasattr(final_chunk, "usage") and final_chunk.usage:
-            input_tokens = final_chunk.usage.prompt_tokens
-            output_tokens = final_chunk.usage.completion_tokens
-            # Estimated costs returned by DeepInfra API
-            if final_chunk.usage.model_extra and "estimated_cost" in final_chunk.usage.model_extra:
-                cost = final_chunk.usage.model_extra.get("estimated_cost", 0)
-        # Save conversation trace
-        tracer["chat_model"] = model_name
-        tracer["temperature"] = temperature
-        tracer["usage"] = get_chat_usage_metrics(
-            model_name, input_tokens, output_tokens, usage=tracer.get("usage"), cost=cost
-        )
-        if is_promptrace_enabled():
-            commit_conversation_trace(messages, aggregated_response, tracer)
-    except Exception as e:
-        logger.error(f"Error in chat_completion_with_backoff stream: {e}", exc_info=True)
+    client_key = f"{openai_api_key}--{api_base_url}"
+    client = openai_async_clients.get(client_key)
+    if not client:
+        client = get_openai_async_client(openai_api_key, api_base_url)
+        openai_async_clients[client_key] = client
+    stream_processor = adefault_stream_processor
+    formatted_messages = format_message_for_api(messages, api_base_url)
+    # Configure thinking for openai reasoning models
+    if is_openai_reasoning_model(model_name, api_base_url):
+        temperature = 1
+        reasoning_effort = "medium" if deepthought else "low"
+        model_kwargs["reasoning_effort"] = reasoning_effort
+        model_kwargs.pop("stop", None)  # Remove unsupported stop param for reasoning models
+        # Get the first system message and add the string `Formatting re-enabled` to it.
+        # See https://platform.openai.com/docs/guides/reasoning-best-practices
+        if len(formatted_messages) > 0:
+            system_messages = [
+                (i, message) for i, message in enumerate(formatted_messages) if message["role"] == "system"
+            ]
+            if len(system_messages) > 0:
+                first_system_message_index, first_system_message = system_messages[0]
+                first_system_message_content = first_system_message["content"]
+                formatted_messages[first_system_message_index][
+                    "content"
+                ] = f"{first_system_message_content}\nFormatting re-enabled"
+    elif is_twitter_reasoning_model(model_name, api_base_url):
+        stream_processor = adeepseek_stream_processor
+        reasoning_effort = "high" if deepthought else "low"
+        model_kwargs["reasoning_effort"] = reasoning_effort
+    elif model_name.startswith("deepseek-reasoner"):
+        stream_processor = adeepseek_stream_processor
+        # Two successive messages cannot be from the same role. Should merge any back-to-back messages from the same role.
+        # The first message should always be a user message (except system message).
+        updated_messages: List[dict] = []
+        for i, message in enumerate(formatted_messages):
+            if i > 0 and message["role"] == formatted_messages[i - 1]["role"]:
+                updated_messages[-1]["content"] += " " + message["content"]
+            elif i == 1 and formatted_messages[i - 1]["role"] == "system" and message["role"] == "assistant":
+                updated_messages[-1]["content"] += " " + message["content"]
+            else:
+                updated_messages.append(message)
+        formatted_messages = updated_messages
+    elif is_qwen_reasoning_model(model_name, api_base_url):
+        stream_processor = partial(ain_stream_thought_processor, thought_tag="think")
+        # Reasoning is enabled by default. Disable when deepthought is False.
+        # See https://qwenlm.github.io/blog/qwen3/#advanced-usages
+        if not deepthought and len(formatted_messages) > 0:
+            formatted_messages[-1]["content"] = formatted_messages[-1]["content"] + " /no_think"
+    stream = True
+    read_timeout = 300 if is_local_api(api_base_url) else 60
+    model_kwargs["stream_options"] = {"include_usage": True}
+    if os.getenv("KHOJ_LLM_SEED"):
+        model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
+    aggregated_response = ""
+    final_chunk = None
+    response_started = False
+    start_time = perf_counter()
+    chat_stream: openai.AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
+        messages=formatted_messages,  # type: ignore
+        model=model_name,
+        stream=stream,
+        temperature=temperature,
+        timeout=httpx.Timeout(30, read=read_timeout),
+        **model_kwargs,
+    )
+    async for chunk in stream_processor(chat_stream):
+        # Log the time taken to start response
+        if not response_started:
+            response_started = True
+            logger.info(f"First response took: {perf_counter() - start_time:.3f} seconds")
+        # Keep track of the last chunk for usage data
+        final_chunk = chunk
+        # Skip empty chunks
+        if len(chunk.choices) == 0:
+            continue
+        # Handle streamed response chunk
+        response_chunk: ResponseWithThought = None
+        response_delta = chunk.choices[0].delta
+        if response_delta.content:
+            response_chunk = ResponseWithThought(response=response_delta.content)
+            aggregated_response += response_chunk.response
+        elif response_delta.thought:
+            response_chunk = ResponseWithThought(thought=response_delta.thought)
+        if response_chunk:
+            yield response_chunk
+    # Calculate cost of chat after stream finishes
+    input_tokens, output_tokens, cost = 0, 0, 0
+    if final_chunk and hasattr(final_chunk, "usage") and final_chunk.usage:
+        input_tokens = final_chunk.usage.prompt_tokens
+        output_tokens = final_chunk.usage.completion_tokens
+        # Estimated costs returned by DeepInfra API
+        if final_chunk.usage.model_extra and "estimated_cost" in final_chunk.usage.model_extra:
+            cost = final_chunk.usage.model_extra.get("estimated_cost", 0)
+    tracer["usage"] = get_chat_usage_metrics(
+        model_name, input_tokens, output_tokens, usage=tracer.get("usage"), cost=cost
+    )
+    # Validate the response. If empty, raise an error to retry.
+    if is_none_or_empty(aggregated_response):
+        logger.warning(f"No response by {model_name}\nLast Message by {messages[-1].role}: {messages[-1].content}.")
+        raise ValueError(f"Empty or no response by {model_name} over API. Retry if needed.")
+    # Log the time taken to stream the entire response
+    logger.info(f"Chat streaming took: {perf_counter() - start_time:.3f} seconds")
+    # Save conversation trace
+    tracer["chat_model"] = model_name
+    tracer["temperature"] = temperature
+    if is_promptrace_enabled():
+        commit_conversation_trace(messages, aggregated_response, tracer)
 def get_openai_api_json_support(model_name: str, api_base_url: str = None) -> JsonSupport:
@@ -293,11 +306,34 @@ def get_openai_api_json_support(model_name: str, api_base_url: str = None) -> Js
     return JsonSupport.SCHEMA
+def format_message_for_api(messages: List[ChatMessage], api_base_url: str) -> List[dict]:
+    """
+    Format messages to send to chat model served over OpenAI (compatible) API.
+    """
+    formatted_messages = []
+    for message in deepcopy(messages):
+        # Convert images to PNG format if message to be sent to non OpenAI API
+        if isinstance(message.content, list) and not is_openai_api(api_base_url):
+            for part in message.content:
+                if part.get("type") == "image_url":
+                    part["image_url"]["url"] = convert_image_data_uri(part["image_url"]["url"], target_format="png")
+        formatted_messages.append({"role": message.role, "content": message.content})
+    return formatted_messages
+def is_openai_api(api_base_url: str = None) -> bool:
+    """
+    Check if the model is served over the official OpenAI API
+    """
+    return api_base_url is None or api_base_url.startswith("https://api.openai.com/v1")
 def is_openai_reasoning_model(model_name: str, api_base_url: str = None) -> bool:
     """
     Check if the model is an OpenAI reasoning model
     """
-    return model_name.startswith("o") and (api_base_url is None or api_base_url.startswith("https://api.openai.com/v1"))
+    return model_name.startswith("o") and is_openai_api(api_base_url)
 def is_twitter_reasoning_model(model_name: str, api_base_url: str = None) -> bool:

khoj/utils/helpers.py CHANGED Viewed

@@ -555,6 +555,32 @@ def convert_image_to_webp(image_bytes):
         return webp_image_bytes
+def convert_image_data_uri(image_data_uri: str, target_format: str = "png") -> str:
+    """
+    Convert image (in data URI) to target format.
+    Target format can be png, jpg, webp etc.
+    Returns the converted image as a data URI.
+    """
+    base64_data = image_data_uri.split(",", 1)[1]
+    image_type = image_data_uri.split(";")[0].split(":")[1].split("/")[1]
+    if image_type.lower() == target_format.lower():
+        return image_data_uri
+    image_bytes = base64.b64decode(base64_data)
+    image_io = io.BytesIO(image_bytes)
+    with Image.open(image_io) as original_image:
+        output_image_io = io.BytesIO()
+        original_image.save(output_image_io, target_format.upper())
+        # Encode the image back to base64
+        output_image_bytes = output_image_io.getvalue()
+        output_image_io.close()
+        output_base64_data = base64.b64encode(output_image_bytes).decode("utf-8")
+        output_data_uri = f"data:image/{target_format};base64,{output_base64_data}"
+        return output_data_uri
 def truncate_code_context(original_code_results: dict[str, Any], max_chars=10000) -> dict[str, Any]:
     """
     Truncate large output files and drop image file data from code results.

{khoj-1.41.1.dev37.dist-info → khoj-1.41.1.dev39.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: khoj
-Version: 1.41.1.dev37
+Version: 1.41.1.dev39
 Summary: Your Second Brain
 Project-URL: Homepage, https://khoj.dev
 Project-URL: Documentation, https://docs.khoj.dev

khoj 1.41.1.dev37__py3-none-any.whl → 1.41.1.dev39__py3-none-any.whl

khoj 1.41.1.dev37py3-none-any.whl → 1.41.1.dev39py3-none-any.whl