PyPI - khoj - Versions diffs - 1.26.4.dev2__py3-none-any.whl → 1.26.5.dev16__py3-none-any.whl - Mend

khoj 1.26.4.dev2py3-none-any.whl → 1.26.5.dev16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

khoj/processor/conversation/prompts.py CHANGED Viewed

@@ -176,6 +176,150 @@ Improved Prompt:
 """.strip()
 )
+## Diagram Generation
+## --
+improve_diagram_description_prompt = PromptTemplate.from_template(
+    """
+you are an architect working with a novice artist using a diagramming tool.
+{personality_context}
+you need to convert the user's query to a description format that the novice artist can use very well. you are allowed to use primitives like
+- text
+- rectangle
+- diamond
+- ellipse
+- line
+- arrow
+- frame
+use these primitives to describe what sort of diagram the drawer should create. the artist must recreate the diagram every time, so include all relevant prior information in your description.
+use simple, concise language.
+Today's Date: {current_date}
+User's Location: {location}
+User's Notes:
+{references}
+Online References:
+{online_results}
+Conversation Log:
+{chat_history}
+Query: {query}
+""".strip()
+)
+excalidraw_diagram_generation_prompt = PromptTemplate.from_template(
+    """
+You are a program manager with the ability to describe diagrams to compose in professional, fine detail.
+{personality_context}
+You need to create a declarative description of the diagram and relevant components, using this base schema. Use the `label` property to specify the text to be rendered in the respective elements. Always use light colors for the `backgroundColor` property, like white, or light blue, green, red. "type", "x", "y", "id", are required properties for all elements.
+{{
+    type: string,
+    x: number,
+    y: number,
+    strokeColor: string,
+    backgroundColor: string,
+    width: number,
+    height: number,
+    id: string,
+    label: {{
+        text: string,
+    }}
+}}
+Valid types:
+- text
+- rectangle
+- diamond
+- ellipse
+- line
+- arrow
+For arrows and lines, you can use the `points` property to specify the start and end points of the arrow. You may also use the `label` property to specify the text to be rendered. You may use the `start` and `end` properties to connect the linear elements to other elements. The start and end point can either be the ID to map to an existing object, or the `type` to create a new object. Mapping to an existing object is useful if you want to connect it to multiple objects. Lines and arrows can only start and end at rectangle, text, diamond, or ellipse elements.
+{{
+    type: "arrow",
+    id: string,
+    x: number,
+    y: number,
+    width: number,
+    height: number,
+    strokeColor: string,
+    start: {{
+        id: string,
+        type: string,
+    }},
+    end: {{
+        id: string,
+        type: string,
+    }},
+    label: {{
+        text: string,
+    }}
+    points: [
+        [number, number],
+        [number, number],
+    ]
+}}
+For text, you must use the `text` property to specify the text to be rendered. You may also use `fontSize` property to specify the font size of the text. Only use the `text` element for titles, subtitles, and overviews. For labels, use the `label` property in the respective elements.
+{{
+    type: "text",
+    id: string,
+    x: number,
+    y: number,
+    fontSize: number,
+    text: string,
+}}
+For frames, use the `children` property to specify the elements that are inside the frame by their ids.
+{{
+    type: "frame",
+    id: string,
+    x: number,
+    y: number,
+    width: number,
+    height: number,
+    name: string,
+    children: [
+        string
+    ]
+}}
+Here's an example of a valid diagram:
+Design Description: Create a diagram describing a circular development process with 3 stages: design, implementation and feedback. The design stage is connected to the implementation stage and the implementation stage is connected to the feedback stage and the feedback stage is connected to the design stage. Each stage should be labeled with the stage name.
+Response:
+[
+    {{"type":"text","x":-150,"y":50,"width":300,"height":40,"id":"title_text","text":"Circular Development Process","fontSize":24}},
+    {{"type":"ellipse","x":-169,"y":113,"width":188,"height":202,"id":"design_ellipse", "label": {{"text": "Design"}}}},
+    {{"type":"ellipse","x":62,"y":394,"width":186,"height":188,"id":"implement_ellipse", "label": {{"text": "Implement"}}}},
+    {{"type":"ellipse","x":-348,"y":430,"width":184,"height":170,"id":"feedback_ellipse", "label": {{"text": "Feedback"}}}},
+    {{"type":"arrow","x":21,"y":273,"id":"design_to_implement_arrow","points":[[0,0],[86,105]],"start":{{"id":"design_ellipse"}}, "end":{{"id":"implement_ellipse"}}}},
+    {{"type":"arrow","x":50,"y":519,"id":"implement_to_feedback_arrow","points":[[0,0],[-198,-6]],"start":{{"id":"implement_ellipse"}}, "end":{{"id":"feedback_ellipse"}}}},
+    {{"type":"arrow","x":-228,"y":417,"id":"feedback_to_design_arrow","points":[[0,0],[85,-123]],"start":{{"id":"feedback_ellipse"}}, "end":{{"id":"design_ellipse"}}}},
+]
+Create a detailed diagram from the provided context and user prompt below. Return a valid JSON object:
+Diagram Description: {query}
+""".strip()
+)
 ## Online Search Conversation
 ## --
 online_search_conversation = PromptTemplate.from_template(

khoj/processor/conversation/utils.py CHANGED Viewed

@@ -109,7 +109,7 @@ def save_to_conversation_log(
     client_application: ClientApplication = None,
     conversation_id: str = None,
     automation_id: str = None,
-    uploaded_image_url: str = None,
+    query_images: List[str] = None,
 ):
     user_message_time = user_message_time or datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     updated_conversation = message_to_log(
@@ -117,7 +117,7 @@ def save_to_conversation_log(
         chat_response=chat_response,
         user_message_metadata={
             "created": user_message_time,
-            "uploadedImageData": uploaded_image_url,
+            "images": query_images,
         },
         khoj_message_metadata={
             "context": compiled_references,
@@ -145,10 +145,18 @@ Khoj: "{inferred_queries if ("text-to-image" in intent_type) else chat_response}
     )
-# Format user and system messages to chatml format
-def construct_structured_message(message, image_url, model_type, vision_enabled):
-    if image_url and vision_enabled and model_type == ChatModelOptions.ModelType.OPENAI:
-        return [{"type": "text", "text": message}, {"type": "image_url", "image_url": {"url": image_url}}]
+def construct_structured_message(message: str, images: list[str], model_type: str, vision_enabled: bool):
+    """
+    Format messages into appropriate multimedia format for supported chat model types
+    """
+    if not images or not vision_enabled:
+        return message
+    if model_type in [ChatModelOptions.ModelType.OPENAI, ChatModelOptions.ModelType.GOOGLE]:
+        return [
+            {"type": "text", "text": message},
+            *[{"type": "image_url", "image_url": {"url": image}} for image in images],
+        ]
     return message
@@ -160,7 +168,7 @@ def generate_chatml_messages_with_context(
     loaded_model: Optional[Llama] = None,
     max_prompt_size=None,
     tokenizer_name=None,
-    uploaded_image_url=None,
+    query_images=None,
     vision_enabled=False,
     model_type="",
 ):
@@ -181,11 +189,12 @@ def generate_chatml_messages_with_context(
         message_notes = f'\n\n Notes:\n{chat.get("context")}' if chat.get("context") else "\n"
         role = "user" if chat["by"] == "you" else "assistant"
-        message_content = chat["message"] + message_notes
+        if chat["by"] == "khoj" and "excalidraw" in chat["intent"].get("type"):
+            message_content = chat.get("intent").get("inferred-queries")[0] + message_notes
+        else:
+            message_content = chat["message"] + message_notes
-        message_content = construct_structured_message(
-            message_content, chat.get("uploadedImageData"), model_type, vision_enabled
-        )
+        message_content = construct_structured_message(message_content, chat.get("images"), model_type, vision_enabled)
         reconstructed_message = ChatMessage(content=message_content, role=role)
@@ -198,7 +207,7 @@ def generate_chatml_messages_with_context(
     if not is_none_or_empty(user_message):
         messages.append(
             ChatMessage(
-                content=construct_structured_message(user_message, uploaded_image_url, model_type, vision_enabled),
+                content=construct_structured_message(user_message, query_images, model_type, vision_enabled),
                 role="user",
             )
         )
@@ -222,7 +231,6 @@ def truncate_messages(
     tokenizer_name=None,
 ) -> list[ChatMessage]:
     """Truncate messages to fit within max prompt size supported by model"""
     default_tokenizer = "gpt-4o"
     try:
@@ -252,6 +260,7 @@ def truncate_messages(
             system_message = messages.pop(idx)
             break
+    # TODO: Handle truncation of multi-part message.content, i.e when message.content is a list[dict] rather than a string
     system_message_tokens = (
         len(encoder.encode(system_message.content)) if system_message and type(system_message.content) == str else 0
     )

khoj/processor/image/generate.py CHANGED Viewed

@@ -26,7 +26,7 @@ async def text_to_image(
     references: List[Dict[str, Any]],
     online_results: Dict[str, Any],
     send_status_func: Optional[Callable] = None,
-    uploaded_image_url: Optional[str] = None,
+    query_images: Optional[List[str]] = None,
     agent: Agent = None,
 ):
     status_code = 200
@@ -65,7 +65,7 @@ async def text_to_image(
         note_references=references,
         online_results=online_results,
         model_type=text_to_image_config.model_type,
-        uploaded_image_url=uploaded_image_url,
+        query_images=query_images,
         user=user,
         agent=agent,
     )
@@ -87,18 +87,18 @@ async def text_to_image(
             if "content_policy_violation" in e.message:
                 logger.error(f"Image Generation blocked by OpenAI: {e}")
                 status_code = e.status_code  # type: ignore
-                message = f"Image generation blocked by OpenAI: {e.message}"  # type: ignore
+                message = f"Image generation blocked by OpenAI due to policy violation"  # type: ignore
                 yield image_url or image, status_code, message, intent_type.value
                 return
             else:
                 logger.error(f"Image Generation failed with {e}", exc_info=True)
-                message = f"Image generation failed with OpenAI error: {e.message}"  # type: ignore
+                message = f"Image generation failed using OpenAI"  # type: ignore
                 status_code = e.status_code  # type: ignore
                 yield image_url or image, status_code, message, intent_type.value
                 return
         except requests.RequestException as e:
             logger.error(f"Image Generation failed with {e}", exc_info=True)
-            message = f"Image generation using {text2image_model} via {text_to_image_config.model_type} failed with error: {e}"
+            message = f"Image generation using {text2image_model} via {text_to_image_config.model_type} failed due to a network error."
             status_code = 502
             yield image_url or image, status_code, message, intent_type.value
             return

khoj/processor/tools/online_search.py CHANGED Viewed

@@ -62,7 +62,7 @@ async def search_online(
     user: KhojUser,
     send_status_func: Optional[Callable] = None,
     custom_filters: List[str] = [],
-    uploaded_image_url: str = None,
+    query_images: List[str] = None,
     agent: Agent = None,
 ):
     query += " ".join(custom_filters)
@@ -73,7 +73,7 @@ async def search_online(
     # Breakdown the query into subqueries to get the correct answer
     subqueries = await generate_online_subqueries(
-        query, conversation_history, location, user, uploaded_image_url=uploaded_image_url, agent=agent
+        query, conversation_history, location, user, query_images=query_images, agent=agent
     )
     response_dict = {}
@@ -151,7 +151,7 @@ async def read_webpages(
     location: LocationData,
     user: KhojUser,
     send_status_func: Optional[Callable] = None,
-    uploaded_image_url: str = None,
+    query_images: List[str] = None,
     agent: Agent = None,
 ):
     "Infer web pages to read from the query and extract relevant information from them"
@@ -159,7 +159,7 @@ async def read_webpages(
     if send_status_func:
         async for event in send_status_func(f"**Inferring web pages to read**"):
             yield {ChatEvent.STATUS: event}
-    urls = await infer_webpage_urls(query, conversation_history, location, user, uploaded_image_url)
+    urls = await infer_webpage_urls(query, conversation_history, location, user, query_images)
     logger.info(f"Reading web pages at: {urls}")
     if send_status_func:

khoj/routers/api.py CHANGED Viewed

@@ -347,7 +347,7 @@ async def extract_references_and_questions(
     conversation_commands: List[ConversationCommand] = [ConversationCommand.Default],
     location_data: LocationData = None,
     send_status_func: Optional[Callable] = None,
-    uploaded_image_url: Optional[str] = None,
+    query_images: Optional[List[str]] = None,
     agent: Agent = None,
 ):
     user = request.user.object if request.user.is_authenticated else None
@@ -438,7 +438,7 @@ async def extract_references_and_questions(
                 conversation_log=meta_log,
                 location_data=location_data,
                 user=user,
-                uploaded_image_url=uploaded_image_url,
+                query_images=query_images,
                 vision_enabled=vision_enabled,
                 personality_context=personality_context,
             )
@@ -459,12 +459,14 @@ async def extract_references_and_questions(
             chat_model = conversation_config.chat_model
             inferred_queries = extract_questions_gemini(
                 defiltered_query,
+                query_images=query_images,
                 model=chat_model,
                 api_key=api_key,
                 conversation_log=meta_log,
                 location_data=location_data,
                 max_tokens=conversation_config.max_prompt_size,
                 user=user,
+                vision_enabled=vision_enabled,
                 personality_context=personality_context,
             )

khoj/routers/api_chat.py CHANGED Viewed

@@ -30,8 +30,10 @@ from khoj.processor.speech.text_to_speech import generate_text_to_speech
 from khoj.processor.tools.online_search import read_webpages, search_online
 from khoj.routers.api import extract_references_and_questions
 from khoj.routers.helpers import (
+    ApiImageRateLimiter,
     ApiUserRateLimiter,
     ChatEvent,
+    ChatRequestBody,
     CommonQueryParams,
     ConversationCommandRateLimiter,
     agenerate_chat_response,
@@ -40,6 +42,7 @@ from khoj.routers.helpers import (
     construct_automation_created_message,
     create_automation,
     extract_relevant_summary,
+    generate_excalidraw_diagram,
     get_conversation_command,
     is_query_empty,
     is_ready_to_chat,
@@ -523,22 +526,6 @@ async def set_conversation_title(
     )
-class ChatRequestBody(BaseModel):
-    q: str
-    n: Optional[int] = 7
-    d: Optional[float] = None
-    stream: Optional[bool] = False
-    title: Optional[str] = None
-    conversation_id: Optional[str] = None
-    city: Optional[str] = None
-    region: Optional[str] = None
-    country: Optional[str] = None
-    country_code: Optional[str] = None
-    timezone: Optional[str] = None
-    image: Optional[str] = None
-    create_new: Optional[bool] = False
 @api_chat.post("")
 @requires(["authenticated"])
 async def chat(
@@ -551,6 +538,7 @@ async def chat(
     rate_limiter_per_day=Depends(
         ApiUserRateLimiter(requests=600, subscribed_requests=6000, window=60 * 60 * 24, slug="chat_day")
     ),
+    image_rate_limiter=Depends(ApiImageRateLimiter(max_images=10, max_combined_size_mb=20)),
 ):
     # Access the parameters from the body
     q = body.q
@@ -564,9 +552,9 @@ async def chat(
     country = body.country or get_country_name_from_timezone(body.timezone)
     country_code = body.country_code or get_country_code_from_timezone(body.timezone)
     timezone = body.timezone
-    image = body.image
+    raw_images = body.images
-    async def event_generator(q: str, image: str):
+    async def event_generator(q: str, images: list[str]):
         start_time = time.perf_counter()
         ttft = None
         chat_metadata: dict = {}
@@ -576,16 +564,16 @@ async def chat(
         q = unquote(q)
         nonlocal conversation_id
-        uploaded_image_url = None
-        if image:
-            decoded_string = unquote(image)
-            base64_data = decoded_string.split(",", 1)[1]
-            image_bytes = base64.b64decode(base64_data)
-            webp_image_bytes = convert_image_to_webp(image_bytes)
-            try:
-                uploaded_image_url = upload_image_to_bucket(webp_image_bytes, request.user.object.id)
-            except:
-                uploaded_image_url = None
+        uploaded_images: list[str] = []
+        if images:
+            for image in images:
+                decoded_string = unquote(image)
+                base64_data = decoded_string.split(",", 1)[1]
+                image_bytes = base64.b64decode(base64_data)
+                webp_image_bytes = convert_image_to_webp(image_bytes)
+                uploaded_image = upload_image_to_bucket(webp_image_bytes, request.user.object.id)
+                if uploaded_image:
+                    uploaded_images.append(uploaded_image)
         async def send_event(event_type: ChatEvent, data: str | dict):
             nonlocal connection_alive, ttft
@@ -692,7 +680,7 @@ async def chat(
                 meta_log,
                 is_automated_task,
                 user=user,
-                uploaded_image_url=uploaded_image_url,
+                query_images=uploaded_images,
                 agent=agent,
             )
             conversation_commands_str = ", ".join([cmd.value for cmd in conversation_commands])
@@ -701,7 +689,7 @@ async def chat(
             ):
                 yield result
-            mode = await aget_relevant_output_modes(q, meta_log, is_automated_task, user, uploaded_image_url, agent)
+            mode = await aget_relevant_output_modes(q, meta_log, is_automated_task, user, uploaded_images, agent)
             async for result in send_event(ChatEvent.STATUS, f"**Decided Response Mode:** {mode.value}"):
                 yield result
             if mode not in conversation_commands:
@@ -764,7 +752,7 @@ async def chat(
                         q,
                         contextual_data,
                         conversation_history=meta_log,
-                        uploaded_image_url=uploaded_image_url,
+                        query_images=uploaded_images,
                         user=user,
                         agent=agent,
                     )
@@ -785,7 +773,7 @@ async def chat(
                 intent_type="summarize",
                 client_application=request.user.client_app,
                 conversation_id=conversation_id,
-                uploaded_image_url=uploaded_image_url,
+                query_images=uploaded_images,
             )
             return
@@ -828,7 +816,7 @@ async def chat(
                 conversation_id=conversation_id,
                 inferred_queries=[query_to_run],
                 automation_id=automation.id,
-                uploaded_image_url=uploaded_image_url,
+                query_images=uploaded_images,
             )
             async for result in send_llm_response(llm_response):
                 yield result
@@ -848,7 +836,7 @@ async def chat(
                 conversation_commands,
                 location,
                 partial(send_event, ChatEvent.STATUS),
-                uploaded_image_url=uploaded_image_url,
+                query_images=uploaded_images,
                 agent=agent,
             ):
                 if isinstance(result, dict) and ChatEvent.STATUS in result:
@@ -892,7 +880,7 @@ async def chat(
                     user,
                     partial(send_event, ChatEvent.STATUS),
                     custom_filters,
-                    uploaded_image_url=uploaded_image_url,
+                    query_images=uploaded_images,
                     agent=agent,
                 ):
                     if isinstance(result, dict) and ChatEvent.STATUS in result:
@@ -916,7 +904,7 @@ async def chat(
                     location,
                     user,
                     partial(send_event, ChatEvent.STATUS),
-                    uploaded_image_url=uploaded_image_url,
+                    query_images=uploaded_images,
                     agent=agent,
                 ):
                     if isinstance(result, dict) and ChatEvent.STATUS in result:
@@ -966,20 +954,20 @@ async def chat(
                 references=compiled_references,
                 online_results=online_results,
                 send_status_func=partial(send_event, ChatEvent.STATUS),
-                uploaded_image_url=uploaded_image_url,
+                query_images=uploaded_images,
                 agent=agent,
             ):
                 if isinstance(result, dict) and ChatEvent.STATUS in result:
                     yield result[ChatEvent.STATUS]
                 else:
-                    image, status_code, improved_image_prompt, intent_type = result
+                    generated_image, status_code, improved_image_prompt, intent_type = result
-            if image is None or status_code != 200:
+            if generated_image is None or status_code != 200:
                 content_obj = {
                     "content-type": "application/json",
                     "intentType": intent_type,
                     "detail": improved_image_prompt,
-                    "image": image,
+                    "image": None,
                 }
                 async for result in send_llm_response(json.dumps(content_obj)):
                     yield result
@@ -987,7 +975,7 @@ async def chat(
             await sync_to_async(save_to_conversation_log)(
                 q,
-                image,
+                generated_image,
                 user,
                 meta_log,
                 user_message_time,
@@ -997,13 +985,64 @@ async def chat(
                 conversation_id=conversation_id,
                 compiled_references=compiled_references,
                 online_results=online_results,
-                uploaded_image_url=uploaded_image_url,
+                query_images=uploaded_images,
             )
             content_obj = {
                 "intentType": intent_type,
                 "inferredQueries": [improved_image_prompt],
-                "image": image,
+                "image": generated_image,
+            }
+            async for result in send_llm_response(json.dumps(content_obj)):
+                yield result
+            return
+        if ConversationCommand.Diagram in conversation_commands:
+            async for result in send_event(ChatEvent.STATUS, f"Creating diagram"):
+                yield result
+            intent_type = "excalidraw"
+            inferred_queries = []
+            diagram_description = ""
+            async for result in generate_excalidraw_diagram(
+                q=defiltered_query,
+                conversation_history=meta_log,
+                location_data=location,
+                note_references=compiled_references,
+                online_results=online_results,
+                query_images=uploaded_images,
+                user=user,
+                agent=agent,
+                send_status_func=partial(send_event, ChatEvent.STATUS),
+            ):
+                if isinstance(result, dict) and ChatEvent.STATUS in result:
+                    yield result[ChatEvent.STATUS]
+                else:
+                    better_diagram_description_prompt, excalidraw_diagram_description = result
+                    inferred_queries.append(better_diagram_description_prompt)
+                    diagram_description = excalidraw_diagram_description
+            content_obj = {
+                "intentType": intent_type,
+                "inferredQueries": inferred_queries,
+                "image": diagram_description,
             }
+            await sync_to_async(save_to_conversation_log)(
+                q,
+                excalidraw_diagram_description,
+                user,
+                meta_log,
+                user_message_time,
+                intent_type="excalidraw",
+                inferred_queries=[better_diagram_description_prompt],
+                client_application=request.user.client_app,
+                conversation_id=conversation_id,
+                compiled_references=compiled_references,
+                online_results=online_results,
+                query_images=uploaded_images,
+            )
             async for result in send_llm_response(json.dumps(content_obj)):
                 yield result
             return
@@ -1024,7 +1063,7 @@ async def chat(
             conversation_id,
             location,
             user_name,
-            uploaded_image_url,
+            uploaded_images,
         )
         # Send Response
@@ -1050,9 +1089,9 @@ async def chat(
     ## Stream Text Response
     if stream:
-        return StreamingResponse(event_generator(q, image=image), media_type="text/plain")
+        return StreamingResponse(event_generator(q, images=raw_images), media_type="text/plain")
     ## Non-Streaming Text Response
     else:
-        response_iterator = event_generator(q, image=image)
+        response_iterator = event_generator(q, images=raw_images)
         response_data = await read_chat_stream(response_iterator)
         return Response(content=json.dumps(response_data), media_type="application/json", status_code=200)

khoj 1.26.4.dev2__py3-none-any.whl → 1.26.5.dev16__py3-none-any.whl

khoj 1.26.4.dev2py3-none-any.whl → 1.26.5.dev16py3-none-any.whl