PyPI - khoj - Versions diffs - 1.21.6.dev14__py3-none-any.whl → 1.21.7.dev1__py3-none-any.whl - Mend

khoj 1.21.6.dev14py3-none-any.whl → 1.21.7.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

khoj/routers/api_chat.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import asyncio
+import base64
 import json
 import logging
 import time
@@ -46,11 +47,13 @@ from khoj.routers.helpers import (
     update_telemetry_state,
     validate_conversation_config,
 )
+from khoj.routers.storage import upload_image_to_bucket
 from khoj.utils import state
 from khoj.utils.helpers import (
     AsyncIteratorWrapper,
     ConversationCommand,
     command_descriptions,
+    convert_image_to_webp,
     get_device,
     is_none_or_empty,
 )
@@ -517,7 +520,11 @@ async def set_conversation_title(
     )
-@api_chat.get("")
+class ImageUploadObject(BaseModel):
+    image: str
+@api_chat.post("")
 @requires(["authenticated"])
 async def chat(
     request: Request,
@@ -532,6 +539,7 @@ async def chat(
     region: Optional[str] = None,
     country: Optional[str] = None,
     timezone: Optional[str] = None,
+    image: Optional[ImageUploadObject] = None,
     rate_limiter_per_minute=Depends(
         ApiUserRateLimiter(requests=60, subscribed_requests=60, window=60, slug="chat_minute")
     ),
@@ -539,7 +547,7 @@ async def chat(
         ApiUserRateLimiter(requests=600, subscribed_requests=600, window=60 * 60 * 24, slug="chat_day")
     ),
 ):
-    async def event_generator(q: str):
+    async def event_generator(q: str, image: ImageUploadObject):
         start_time = time.perf_counter()
         ttft = None
         chat_metadata: dict = {}
@@ -550,6 +558,17 @@ async def chat(
         q = unquote(q)
         nonlocal conversation_id
+        uploaded_image_url = None
+        if image:
+            decoded_string = unquote(image.image)
+            base64_data = decoded_string.split(",", 1)[1]
+            image_bytes = base64.b64decode(base64_data)
+            webp_image_bytes = convert_image_to_webp(image_bytes)
+            try:
+                uploaded_image_url = upload_image_to_bucket(webp_image_bytes, request.user.object.id)
+            except:
+                uploaded_image_url = None
         async def send_event(event_type: ChatEvent, data: str | dict):
             nonlocal connection_alive, ttft
             if not connection_alive or await request.is_disconnected():
@@ -637,7 +656,7 @@ async def chat(
         if conversation_commands == [ConversationCommand.Default] or is_automated_task:
             conversation_commands = await aget_relevant_information_sources(
-                q, meta_log, is_automated_task, subscribed=subscribed
+                q, meta_log, is_automated_task, subscribed=subscribed, uploaded_image_url=uploaded_image_url
             )
             conversation_commands_str = ", ".join([cmd.value for cmd in conversation_commands])
             async for result in send_event(
@@ -645,7 +664,7 @@ async def chat(
             ):
                 yield result
-            mode = await aget_relevant_output_modes(q, meta_log, is_automated_task)
+            mode = await aget_relevant_output_modes(q, meta_log, is_automated_task, uploaded_image_url)
             async for result in send_event(ChatEvent.STATUS, f"**Decided Response Mode:** {mode.value}"):
                 yield result
             if mode not in conversation_commands:
@@ -693,7 +712,9 @@ async def chat(
                     ):
                         yield result
-                    response = await extract_relevant_summary(q, contextual_data, subscribed=subscribed)
+                    response = await extract_relevant_summary(
+                        q, contextual_data, subscribed=subscribed, uploaded_image_url=uploaded_image_url
+                    )
                     response_log = str(response)
                     async for result in send_llm_response(response_log):
                         yield result
@@ -711,6 +732,7 @@ async def chat(
                 intent_type="summarize",
                 client_application=request.user.client_app,
                 conversation_id=conversation_id,
+                uploaded_image_url=uploaded_image_url,
             )
             return
@@ -753,6 +775,7 @@ async def chat(
                 conversation_id=conversation_id,
                 inferred_queries=[query_to_run],
                 automation_id=automation.id,
+                uploaded_image_url=uploaded_image_url,
             )
             async for result in send_llm_response(llm_response):
                 yield result
@@ -807,6 +830,7 @@ async def chat(
                     subscribed,
                     partial(send_event, ChatEvent.STATUS),
                     custom_filters,
+                    uploaded_image_url=uploaded_image_url,
                 ):
                     if isinstance(result, dict) and ChatEvent.STATUS in result:
                         yield result[ChatEvent.STATUS]
@@ -823,7 +847,13 @@ async def chat(
         if ConversationCommand.Webpage in conversation_commands:
             try:
                 async for result in read_webpages(
-                    defiltered_query, meta_log, location, user, subscribed, partial(send_event, ChatEvent.STATUS)
+                    defiltered_query,
+                    meta_log,
+                    location,
+                    user,
+                    subscribed,
+                    partial(send_event, ChatEvent.STATUS),
+                    uploaded_image_url=uploaded_image_url,
                 ):
                     if isinstance(result, dict) and ChatEvent.STATUS in result:
                         yield result[ChatEvent.STATUS]
@@ -869,6 +899,7 @@ async def chat(
                 online_results=online_results,
                 subscribed=subscribed,
                 send_status_func=partial(send_event, ChatEvent.STATUS),
+                uploaded_image_url=uploaded_image_url,
             ):
                 if isinstance(result, dict) and ChatEvent.STATUS in result:
                     yield result[ChatEvent.STATUS]
@@ -898,6 +929,7 @@ async def chat(
                 conversation_id=conversation_id,
                 compiled_references=compiled_references,
                 online_results=online_results,
+                uploaded_image_url=uploaded_image_url,
             )
             content_obj = {
                 "intentType": intent_type,
@@ -924,6 +956,7 @@ async def chat(
             conversation_id,
             location,
             user_name,
+            uploaded_image_url,
         )
         # Send Response
@@ -949,9 +982,9 @@ async def chat(
     ## Stream Text Response
     if stream:
-        return StreamingResponse(event_generator(q), media_type="text/plain")
+        return StreamingResponse(event_generator(q, image=image), media_type="text/plain")
     ## Non-Streaming Text Response
     else:
-        response_iterator = event_generator(q)
+        response_iterator = event_generator(q, image=image)
         response_data = await read_chat_stream(response_iterator)
         return Response(content=json.dumps(response_data), media_type="application/json", status_code=200)

khoj/routers/helpers.py CHANGED Viewed

@@ -97,6 +97,7 @@ from khoj.utils.helpers import (
     LRU,
     ConversationCommand,
     ImageIntentType,
+    convert_image_to_webp,
     is_none_or_empty,
     is_valid_url,
     log_telemetry,
@@ -252,7 +253,9 @@ async def acreate_title_from_query(query: str) -> str:
     return response.strip()
-async def aget_relevant_information_sources(query: str, conversation_history: dict, is_task: bool, subscribed: bool):
+async def aget_relevant_information_sources(
+    query: str, conversation_history: dict, is_task: bool, subscribed: bool, uploaded_image_url: str = None
+):
     """
     Given a query, determine which of the available tools the agent should use in order to answer appropriately.
     """
@@ -266,6 +269,9 @@ async def aget_relevant_information_sources(query: str, conversation_history: di
     chat_history = construct_chat_history(conversation_history)
+    if uploaded_image_url:
+        query = f"[placeholder for image attached to this message]\n{query}"
     relevant_tools_prompt = prompts.pick_relevant_information_collection_tools.format(
         query=query,
         tools=tool_options_str,
@@ -274,7 +280,9 @@ async def aget_relevant_information_sources(query: str, conversation_history: di
     with timer("Chat actor: Infer information sources to refer", logger):
         response = await send_message_to_model_wrapper(
-            relevant_tools_prompt, response_type="json_object", subscribed=subscribed
+            relevant_tools_prompt,
+            response_type="json_object",
+            subscribed=subscribed,
         )
     try:
@@ -302,7 +310,9 @@ async def aget_relevant_information_sources(query: str, conversation_history: di
         return [ConversationCommand.Default]
-async def aget_relevant_output_modes(query: str, conversation_history: dict, is_task: bool = False):
+async def aget_relevant_output_modes(
+    query: str, conversation_history: dict, is_task: bool = False, uploaded_image_url: str = None
+):
     """
     Given a query, determine which of the available tools the agent should use in order to answer appropriately.
     """
@@ -319,6 +329,9 @@ async def aget_relevant_output_modes(query: str, conversation_history: dict, is_
     chat_history = construct_chat_history(conversation_history)
+    if uploaded_image_url:
+        query = f"[placeholder for image attached to this message]\n{query}"
     relevant_mode_prompt = prompts.pick_relevant_output_mode.format(
         query=query,
         modes=mode_options_str,
@@ -347,7 +360,7 @@ async def aget_relevant_output_modes(query: str, conversation_history: dict, is_
 async def infer_webpage_urls(
-    q: str, conversation_history: dict, location_data: LocationData, user: KhojUser
+    q: str, conversation_history: dict, location_data: LocationData, user: KhojUser, uploaded_image_url: str = None
 ) -> List[str]:
     """
     Infer webpage links from the given query
@@ -366,7 +379,9 @@ async def infer_webpage_urls(
     )
     with timer("Chat actor: Infer webpage urls to read", logger):
-        response = await send_message_to_model_wrapper(online_queries_prompt, response_type="json_object")
+        response = await send_message_to_model_wrapper(
+            online_queries_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object"
+        )
     # Validate that the response is a non-empty, JSON-serializable list of URLs
     try:
@@ -381,7 +396,7 @@ async def infer_webpage_urls(
 async def generate_online_subqueries(
-    q: str, conversation_history: dict, location_data: LocationData, user: KhojUser
+    q: str, conversation_history: dict, location_data: LocationData, user: KhojUser, uploaded_image_url: str = None
 ) -> List[str]:
     """
     Generate subqueries from the given query
@@ -400,7 +415,9 @@ async def generate_online_subqueries(
     )
     with timer("Chat actor: Generate online search subqueries", logger):
-        response = await send_message_to_model_wrapper(online_queries_prompt, response_type="json_object")
+        response = await send_message_to_model_wrapper(
+            online_queries_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object"
+        )
     # Validate that the response is a non-empty, JSON-serializable list
     try:
@@ -419,7 +436,7 @@ async def generate_online_subqueries(
         return [q]
-async def schedule_query(q: str, conversation_history: dict) -> Tuple[str, ...]:
+async def schedule_query(q: str, conversation_history: dict, uploaded_image_url: str = None) -> Tuple[str, ...]:
     """
     Schedule the date, time to run the query. Assume the server timezone is UTC.
     """
@@ -430,7 +447,9 @@ async def schedule_query(q: str, conversation_history: dict) -> Tuple[str, ...]:
         chat_history=chat_history,
     )
-    raw_response = await send_message_to_model_wrapper(crontime_prompt, response_type="json_object")
+    raw_response = await send_message_to_model_wrapper(
+        crontime_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object"
+    )
     # Validate that the response is a non-empty, JSON-serializable list
     try:
@@ -468,7 +487,9 @@ async def extract_relevant_info(q: str, corpus: str, subscribed: bool) -> Union[
     return response.strip()
-async def extract_relevant_summary(q: str, corpus: str, subscribed: bool = False) -> Union[str, None]:
+async def extract_relevant_summary(
+    q: str, corpus: str, subscribed: bool = False, uploaded_image_url: str = None
+) -> Union[str, None]:
     """
     Extract relevant information for a given query from the target corpus
     """
@@ -489,6 +510,7 @@ async def extract_relevant_summary(q: str, corpus: str, subscribed: bool = False
             prompts.system_prompt_extract_relevant_summary,
             chat_model_option=chat_model,
             subscribed=subscribed,
+            uploaded_image_url=uploaded_image_url,
         )
     return response.strip()
@@ -501,6 +523,7 @@ async def generate_better_image_prompt(
     online_results: Optional[dict] = None,
     model_type: Optional[str] = None,
     subscribed: bool = False,
+    uploaded_image_url: Optional[str] = None,
 ) -> str:
     """
     Generate a better image prompt from the given query
@@ -549,7 +572,7 @@ async def generate_better_image_prompt(
     with timer("Chat actor: Generate contextual image prompt", logger):
         response = await send_message_to_model_wrapper(
-            image_prompt, chat_model_option=chat_model, subscribed=subscribed
+            image_prompt, chat_model_option=chat_model, subscribed=subscribed, uploaded_image_url=uploaded_image_url
         )
         response = response.strip()
         if response.startswith(('"', "'")) and response.endswith(('"', "'")):
@@ -564,11 +587,19 @@ async def send_message_to_model_wrapper(
     response_type: str = "text",
     chat_model_option: ChatModelOptions = None,
     subscribed: bool = False,
+    uploaded_image_url: str = None,
 ):
     conversation_config: ChatModelOptions = (
         chat_model_option or await ConversationAdapters.aget_default_conversation_config()
     )
+    vision_available = conversation_config.vision_enabled
+    if not vision_available and uploaded_image_url:
+        vision_enabled_config = ConversationAdapters.get_vision_enabled_config()
+        if vision_enabled_config:
+            conversation_config = vision_enabled_config
+            vision_available = True
     chat_model = conversation_config.chat_model
     max_tokens = (
         conversation_config.subscribed_max_prompt_size
@@ -576,6 +607,7 @@ async def send_message_to_model_wrapper(
         else conversation_config.max_prompt_size
     )
     tokenizer = conversation_config.tokenizer
+    vision_available = conversation_config.vision_enabled
     if conversation_config.model_type == "offline":
         if state.offline_chat_processor_config is None or state.offline_chat_processor_config.loaded_model is None:
@@ -589,6 +621,7 @@ async def send_message_to_model_wrapper(
             loaded_model=loaded_model,
             tokenizer_name=tokenizer,
             max_prompt_size=max_tokens,
+            vision_enabled=vision_available,
         )
         return send_message_to_model_offline(
@@ -609,6 +642,8 @@ async def send_message_to_model_wrapper(
             model_name=chat_model,
             max_prompt_size=max_tokens,
             tokenizer_name=tokenizer,
+            vision_enabled=vision_available,
+            uploaded_image_url=uploaded_image_url,
         )
         openai_response = send_message_to_model(
@@ -628,6 +663,7 @@ async def send_message_to_model_wrapper(
             model_name=chat_model,
             max_prompt_size=max_tokens,
             tokenizer_name=tokenizer,
+            vision_enabled=vision_available,
         )
         return anthropic_send_message_to_model(
@@ -651,6 +687,7 @@ def send_message_to_model_wrapper_sync(
     chat_model = conversation_config.chat_model
     max_tokens = conversation_config.max_prompt_size
+    vision_available = conversation_config.vision_enabled
     if conversation_config.model_type == "offline":
         if state.offline_chat_processor_config is None or state.offline_chat_processor_config.loaded_model is None:
@@ -658,7 +695,11 @@ def send_message_to_model_wrapper_sync(
         loaded_model = state.offline_chat_processor_config.loaded_model
         truncated_messages = generate_chatml_messages_with_context(
-            user_message=message, system_message=system_message, model_name=chat_model, loaded_model=loaded_model
+            user_message=message,
+            system_message=system_message,
+            model_name=chat_model,
+            loaded_model=loaded_model,
+            vision_enabled=vision_available,
         )
         return send_message_to_model_offline(
@@ -672,7 +713,10 @@ def send_message_to_model_wrapper_sync(
     elif conversation_config.model_type == "openai":
         api_key = conversation_config.openai_config.api_key
         truncated_messages = generate_chatml_messages_with_context(
-            user_message=message, system_message=system_message, model_name=chat_model
+            user_message=message,
+            system_message=system_message,
+            model_name=chat_model,
+            vision_enabled=vision_available,
         )
         openai_response = send_message_to_model(
@@ -688,6 +732,7 @@ def send_message_to_model_wrapper_sync(
             system_message=system_message,
             model_name=chat_model,
             max_prompt_size=max_tokens,
+            vision_enabled=vision_available,
         )
         return anthropic_send_message_to_model(
@@ -712,6 +757,7 @@ def generate_chat_response(
     conversation_id: int = None,
     location_data: LocationData = None,
     user_name: Optional[str] = None,
+    uploaded_image_url: Optional[str] = None,
 ) -> Tuple[Union[ThreadedGenerator, Iterator[str]], Dict[str, str]]:
     # Initialize Variables
     chat_response = None
@@ -719,7 +765,6 @@ def generate_chat_response(
     metadata = {}
     agent = AgentAdapters.get_conversation_agent_by_id(conversation.agent.id) if conversation.agent else None
     try:
         partial_completion = partial(
             save_to_conversation_log,
@@ -731,9 +776,17 @@ def generate_chat_response(
             inferred_queries=inferred_queries,
             client_application=client_application,
             conversation_id=conversation_id,
+            uploaded_image_url=uploaded_image_url,
         )
         conversation_config = ConversationAdapters.get_valid_conversation_config(user, conversation)
+        vision_available = conversation_config.vision_enabled
+        if not vision_available and uploaded_image_url:
+            vision_enabled_config = ConversationAdapters.get_vision_enabled_config()
+            if vision_enabled_config:
+                conversation_config = vision_enabled_config
+                vision_available = True
         if conversation_config.model_type == "offline":
             loaded_model = state.offline_chat_processor_config.loaded_model
             chat_response = converse_offline(
@@ -759,6 +812,7 @@ def generate_chat_response(
             chat_response = converse(
                 compiled_references,
                 q,
+                image_url=uploaded_image_url,
                 online_results=online_results,
                 conversation_log=meta_log,
                 model=chat_model,
@@ -771,6 +825,7 @@ def generate_chat_response(
                 location_data=location_data,
                 user_name=user_name,
                 agent=agent,
+                vision_available=vision_available,
             )
         elif conversation_config.model_type == "anthropic":
@@ -809,6 +864,7 @@ async def text_to_image(
     online_results: Dict[str, Any],
     subscribed: bool = False,
     send_status_func: Optional[Callable] = None,
+    uploaded_image_url: Optional[str] = None,
 ):
     status_code = 200
     image = None
@@ -845,6 +901,7 @@ async def text_to_image(
         online_results=online_results,
         model_type=text_to_image_config.model_type,
         subscribed=subscribed,
+        uploaded_image_url=uploaded_image_url,
     )
     if send_status_func:
@@ -908,13 +965,7 @@ async def text_to_image(
     with timer("Convert image to webp", logger):
         # Convert png to webp for faster loading
-        image_io = io.BytesIO(decoded_image)
-        png_image = Image.open(image_io)
-        webp_image_io = io.BytesIO()
-        png_image.save(webp_image_io, "WEBP")
-        webp_image_bytes = webp_image_io.getvalue()
-        webp_image_io.close()
-        image_io.close()
+        webp_image_bytes = convert_image_to_webp(decoded_image)
     with timer("Upload image to S3", logger):
         image_url = upload_image(webp_image_bytes, user.uuid)
@@ -1095,6 +1146,7 @@ def should_notify(original_query: str, executed_query: str, ai_response: str) ->
     with timer("Chat actor: Decide to notify user of automation response", logger):
         try:
+            # TODO Replace with async call so we don't have to maintain a sync version
             response = send_message_to_model_wrapper_sync(to_notify_or_not)
             should_notify_result = "no" not in response.lower()
             logger.info(f'Decided to {"not " if not should_notify_result else ""}notify user of automation response.')

khoj/routers/storage.py CHANGED Viewed

@@ -33,3 +33,31 @@ def upload_image(image: bytes, user_id: uuid.UUID):
     except Exception as e:
         logger.error(f"Failed to upload image to S3: {e}")
         return None
+AWS_USER_UPLOADED_IMAGES_BUCKET_NAME = os.getenv("AWS_USER_UPLOADED_IMAGES_BUCKET_NAME")
+def upload_image_to_bucket(image: bytes, user_id: uuid.UUID):
+    """Upload the image to the S3 bucket"""
+    if not aws_enabled:
+        logger.info("AWS is not enabled. Skipping image upload")
+        return None
+    image_key = f"{user_id}/{uuid.uuid4()}.webp"
+    if not AWS_USER_UPLOADED_IMAGES_BUCKET_NAME:
+        logger.error("AWS_USER_UPLOADED_IMAGES_BUCKET_NAME is not set")
+        return None
+    try:
+        s3_client.put_object(
+            Bucket=AWS_USER_UPLOADED_IMAGES_BUCKET_NAME,
+            Key=image_key,
+            Body=image,
+            ACL="public-read",
+            ContentType="image/webp",
+        )
+        return f"https://{AWS_USER_UPLOADED_IMAGES_BUCKET_NAME}/{image_key}"
+    except Exception as e:
+        logger.error(f"Failed to upload image to S3: {e}")
+        return None

khoj/utils/helpers.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations  # to avoid quoting type hints
 import datetime
+import io
 import logging
 import os
 import platform
@@ -22,6 +23,7 @@ import requests
 import torch
 from asgiref.sync import sync_to_async
 from magika import Magika
+from PIL import Image
 from khoj.utils import constants
@@ -416,3 +418,16 @@ def is_internet_connected():
         return response.status_code == 200
     except:
         return False
+def convert_image_to_webp(image_bytes):
+    """Convert image bytes to webp format for faster loading"""
+    image_io = io.BytesIO(image_bytes)
+    with Image.open(image_io) as original_image:
+        webp_image_io = io.BytesIO()
+        original_image.save(webp_image_io, "WEBP")
+        # Encode the WebP image back to base64
+        webp_image_bytes = webp_image_io.getvalue()
+        webp_image_io.close()
+        return webp_image_bytes

{khoj-1.21.6.dev14.dist-info → khoj-1.21.7.dev1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: khoj
-Version: 1.21.6.dev14
+Version: 1.21.7.dev1
 Summary: Your Second Brain
 Project-URL: Homepage, https://khoj.dev
 Project-URL: Documentation, https://docs.khoj.dev

khoj 1.21.6.dev14__py3-none-any.whl → 1.21.7.dev1__py3-none-any.whl

khoj 1.21.6.dev14py3-none-any.whl → 1.21.7.dev1py3-none-any.whl