PyPI - khoj - Versions diffs - 2.0.0b10__py3-none-any.whl → 2.0.0b11.dev15__py3-none-any.whl - Mend

khoj 2.0.0b10py3-none-any.whl → 2.0.0b11.dev15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

khoj/processor/conversation/google/utils.py CHANGED Viewed

@@ -2,6 +2,7 @@ import json
 import logging
 import os
 import random
+import re
 from copy import deepcopy
 from time import perf_counter
 from typing import Any, AsyncGenerator, AsyncIterator, Dict, List
@@ -13,6 +14,7 @@ from google.genai import types as gtypes
 from langchain_core.messages.chat import ChatMessage
 from pydantic import BaseModel
 from tenacity import (
+    RetryCallState,
     before_sleep_log,
     retry,
     retry_if_exception,
@@ -73,7 +75,7 @@ SAFETY_SETTINGS = [
 def _is_retryable_error(exception: BaseException) -> bool:
     """Check if the exception is a retryable error"""
     # server errors
-    if isinstance(exception, gerrors.APIError):
+    if isinstance(exception, (gerrors.APIError, gerrors.ClientError)):
         return exception.code in [429, 502, 503, 504]
     # client errors
     if (
@@ -88,9 +90,48 @@ def _is_retryable_error(exception: BaseException) -> bool:
     return False
+def _extract_retry_delay(exception: BaseException) -> float:
+    """Extract retry delay from Gemini error response, return in seconds"""
+    if (
+        isinstance(exception, (gerrors.ClientError, gerrors.APIError))
+        and hasattr(exception, "details")
+        and isinstance(exception.details, dict)
+    ):
+        # Look for retryDelay key, value pair. E.g "retryDelay": "54s"
+        if delay_str := exception.details.get("retryDelay"):
+            delay_seconds_match = re.search(r"(\d+)s", delay_str)
+            if delay_seconds_match:
+                delay_seconds = float(delay_seconds_match.group(1))
+                return delay_seconds
+    return None
+def _wait_with_gemini_delay(min_wait=4, max_wait=120, multiplier=1, fallback_wait=None):
+    """Custom wait strategy that respects Gemini's retryDelay if present"""
+    def wait_func(retry_state: RetryCallState) -> float:
+        # Use backoff time if last exception suggests a retry delay
+        if retry_state.outcome and retry_state.outcome.failed:
+            exception = retry_state.outcome.exception()
+            gemini_delay = _extract_retry_delay(exception)
+            if gemini_delay:
+                # Use the Gemini-suggested delay, but cap it at max_wait
+                suggested_delay = min(gemini_delay, max_wait)
+                logger.info(f"Using Gemini suggested retry delay: {suggested_delay} seconds")
+                return suggested_delay
+        # Else use fallback backoff if provided
+        if fallback_wait:
+            return fallback_wait(retry_state)
+        # Else use exponential backoff with provided parameters
+        else:
+            return wait_exponential(multiplier=multiplier, min=min_wait, max=max_wait)(retry_state)
+    return wait_func
 @retry(
     retry=retry_if_exception(_is_retryable_error),
-    wait=wait_random_exponential(min=1, max=10),
+    wait=_wait_with_gemini_delay(min_wait=1, max_wait=10, fallback_wait=wait_random_exponential(min=1, max=10)),
     stop=stop_after_attempt(2),
     before_sleep=before_sleep_log(logger, logging.DEBUG),
     reraise=True,
@@ -169,7 +210,14 @@ def gemini_completion_with_backoff(
         )
     except gerrors.ClientError as e:
         response = None
-        response_text, _ = handle_gemini_response(e.args)
+        # Handle 429 rate limit errors directly
+        if e.code == 429:
+            response_text = f"My brain is exhausted. Can you please try again in a bit?"
+            # Log the full error details for debugging
+            logger.error(f"Gemini ClientError: {e.code} {e.status}. Details: {e.details}")
+        # Handle other errors
+        else:
+            response_text, _ = handle_gemini_response(e.args)
         # Respond with reason for stopping
         logger.warning(
             f"LLM Response Prevented for {model_name}: {response_text}.\n"
@@ -206,7 +254,7 @@ def gemini_completion_with_backoff(
 @retry(
     retry=retry_if_exception(_is_retryable_error),
-    wait=wait_exponential(multiplier=1, min=4, max=10),
+    wait=_wait_with_gemini_delay(multiplier=1, min_wait=4, max_wait=10),
     stop=stop_after_attempt(3),
     before_sleep=before_sleep_log(logger, logging.WARNING),
     reraise=False,
@@ -310,6 +358,13 @@ def handle_gemini_response(
     candidates: list[gtypes.Candidate], prompt_feedback: gtypes.GenerateContentResponsePromptFeedback = None
 ):
     """Check if Gemini response was blocked and return an explanatory error message."""
+    # Ensure we have a proper list of candidates
+    if not isinstance(candidates, list):
+        message = f"\nUnexpected response format. Try again."
+        stopped = True
+        return message, stopped
     # Check if the response was blocked due to safety concerns with the prompt
     if len(candidates) == 0 and prompt_feedback:
         message = f"\nI'd prefer to not respond to that due to **{prompt_feedback.block_reason.name}** issues with your query."
@@ -428,7 +483,18 @@ def format_messages_for_gemini(
     if len(messages) == 1:
         messages[0].role = "user"
-    formatted_messages = [gtypes.Content(role=message.role, parts=message.content) for message in messages]
+    # Ensure messages are properly formatted for Content creation
+    valid_messages = []
+    for message in messages:
+        try:
+            # Try create Content object to validate the structure before adding to valid messages
+            gtypes.Content(role=message.role, parts=message.content)
+            valid_messages.append(message)
+        except Exception as e:
+            logger.warning(f"Dropping message with invalid content structure: {e}. Message: {message}")
+            continue
+    formatted_messages = [gtypes.Content(role=message.role, parts=message.content) for message in valid_messages]
     return formatted_messages, system_prompt

khoj/processor/conversation/openai/utils.py CHANGED Viewed

@@ -100,6 +100,7 @@ def completion_with_backoff(
         reasoning_effort = "high" if deepthought else "low"
         model_kwargs["reasoning_effort"] = reasoning_effort
     elif model_name.startswith("deepseek-reasoner"):
+        stream_processor = in_stream_thought_processor
         # Two successive messages cannot be from the same role. Should merge any back-to-back messages from the same role.
         # The first message should always be a user message (except system message).
         updated_messages: List[dict] = []
@@ -111,8 +112,8 @@ def completion_with_backoff(
             else:
                 updated_messages.append(message)
         formatted_messages = updated_messages
-    elif is_qwen_reasoning_model(model_name, api_base_url):
-        stream_processor = partial(in_stream_thought_processor, thought_tag="think")
+    elif is_qwen_style_reasoning_model(model_name, api_base_url):
+        stream_processor = in_stream_thought_processor
         # Reasoning is enabled by default. Disable when deepthought is False.
         # See https://qwenlm.github.io/blog/qwen3/#advanced-usages
         if not deepthought:
@@ -144,6 +145,14 @@ def completion_with_backoff(
                 elif chunk.type == "tool_calls.function.arguments.done":
                     tool_calls += [ToolCall(name=chunk.name, args=json.loads(chunk.arguments), id=None)]
         if tool_calls:
+            # If there are tool calls, aggregate thoughts and responses into thoughts
+            if thoughts and aggregated_response:
+                # wrap each line of thought in italics
+                thoughts = "\n".join([f"*{line.strip()}*" for line in thoughts.splitlines() if line.strip()])
+                thoughts = f"{thoughts}\n\n{aggregated_response}"
+            else:
+                thoughts = thoughts or aggregated_response
+            # Json dump tool calls into aggregated response
             tool_calls = [
                 ToolCall(name=chunk.name, args=chunk.args, id=tool_id) for chunk, tool_id in zip(tool_calls, tool_ids)
             ]
@@ -158,6 +167,25 @@ def completion_with_backoff(
             **model_kwargs,
         )
         aggregated_response = chunk.choices[0].message.content
+        if hasattr(chunk.choices[0].message, "reasoning_content"):
+            thoughts = chunk.choices[0].message.reasoning_content
+        else:
+            thoughts = chunk.choices[0].message.model_extra.get("reasoning_content", "")
+        raw_tool_calls = chunk.choices[0].message.tool_calls
+        if raw_tool_calls:
+            tool_calls = [
+                ToolCall(name=tool.function.name, args=tool.function.parsed_arguments, id=tool.id)
+                for tool in raw_tool_calls
+            ]
+            # If there are tool calls, aggregate thoughts and responses into thoughts
+            if thoughts and aggregated_response:
+                # wrap each line of thought in italics
+                thoughts = "\n".join([f"*{line.strip()}*" for line in thoughts.splitlines() if line.strip()])
+                thoughts = f"{thoughts}\n\n{aggregated_response}"
+            else:
+                thoughts = thoughts or aggregated_response
+            # Json dump tool calls into aggregated response
+            aggregated_response = json.dumps([tool_call.__dict__ for tool_call in tool_calls])
     # Calculate cost of chat
     input_tokens = chunk.usage.prompt_tokens if hasattr(chunk, "usage") and chunk.usage else 0
@@ -216,7 +244,7 @@ async def chat_completion_with_backoff(
         openai_async_clients[client_key] = client
     stream = not is_non_streaming_model(model_name, api_base_url)
-    stream_processor = adefault_stream_processor
+    stream_processor = astream_thought_processor
     if stream:
         model_kwargs["stream_options"] = {"include_usage": True}
     else:
@@ -244,13 +272,13 @@ async def chat_completion_with_backoff(
                     "content"
                 ] = f"{first_system_message_content}\nFormatting re-enabled"
     elif is_twitter_reasoning_model(model_name, api_base_url):
-        stream_processor = adeepseek_stream_processor
         reasoning_effort = "high" if deepthought else "low"
         model_kwargs["reasoning_effort"] = reasoning_effort
     elif model_name.startswith("deepseek-reasoner") or "deepseek-r1" in model_name:
-        # Official Deepseek reasoner model returns structured thinking output.
-        # Deepseek r1 served via other AI model API providers return it in response stream
-        stream_processor = ain_stream_thought_processor if "deepseek-r1" in model_name else adeepseek_stream_processor  # type: ignore[assignment]
+        # Official Deepseek reasoner model and some inference APIs like vLLM return structured thinking output.
+        # Others like DeepInfra return it in response stream.
+        # Using the instream thought processor handles both cases, structured thoughts and in response thoughts.
+        stream_processor = ain_stream_thought_processor
         # Two successive messages cannot be from the same role. Should merge any back-to-back messages from the same role.
         # The first message should always be a user message (except system message).
         updated_messages: List[dict] = []
@@ -266,8 +294,8 @@ async def chat_completion_with_backoff(
             else:
                 updated_messages.append(message)
         formatted_messages = updated_messages
-    elif is_qwen_reasoning_model(model_name, api_base_url):
-        stream_processor = partial(ain_stream_thought_processor, thought_tag="think")
+    elif is_qwen_style_reasoning_model(model_name, api_base_url):
+        stream_processor = ain_stream_thought_processor
         # Reasoning is enabled by default. Disable when deepthought is False.
         # See https://qwenlm.github.io/blog/qwen3/#advanced-usages
         if not deepthought:
@@ -492,11 +520,12 @@ def is_twitter_reasoning_model(model_name: str, api_base_url: str = None) -> boo
     )
-def is_qwen_reasoning_model(model_name: str, api_base_url: str = None) -> bool:
+def is_qwen_style_reasoning_model(model_name: str, api_base_url: str = None) -> bool:
     """
-    Check if the model is a Qwen reasoning model
+    Check if the model is a Qwen style reasoning model
     """
-    return "qwen3" in model_name.lower() and api_base_url is not None
+    qwen_style_reason_model = ["qwen3", "smollm3"]
+    return any(prefix in model_name.lower() for prefix in qwen_style_reason_model) and api_base_url is not None
 def is_local_api(api_base_url: str) -> bool:
@@ -543,39 +572,17 @@ def default_stream_processor(
     chat_stream: ChatCompletionStream,
 ) -> Generator[ChatCompletionStreamWithThoughtEvent, None, None]:
     """
-    Async generator to cast and return chunks from the standard openai chat completions stream.
+    Generator of chunks from the standard openai chat completions stream.
     """
     for chunk in chat_stream:
         yield chunk
-async def adefault_stream_processor(
+async def astream_thought_processor(
     chat_stream: openai.AsyncStream[ChatCompletionChunk],
 ) -> AsyncGenerator[ChatCompletionWithThoughtsChunk, None]:
     """
-    Async generator to cast and return chunks from the standard openai chat completions stream.
-    """
-    async for chunk in chat_stream:
-        try:
-            # Validate the chunk has the required fields before processing
-            chunk_data = chunk.model_dump()
-            # Skip chunks that don't have the required object field or have invalid values
-            if not chunk_data.get("object") or chunk_data.get("object") != "chat.completion.chunk":
-                logger.warning(f"Skipping invalid chunk with object field: {chunk_data.get('object', 'missing')}")
-                continue
-            yield ChatCompletionWithThoughtsChunk.model_validate(chunk_data)
-        except Exception as e:
-            logger.warning(f"Error processing chunk: {e}. Skipping malformed chunk.")
-            continue
-async def adeepseek_stream_processor(
-    chat_stream: openai.AsyncStream[ChatCompletionChunk],
-) -> AsyncGenerator[ChatCompletionWithThoughtsChunk, None]:
-    """
-    Async generator to cast and return chunks from the deepseek chat completions stream.
+    Async generator of chunks from standard openai chat completions stream with thoughts/reasoning.
     """
     async for chunk in chat_stream:
         try:
@@ -588,12 +595,19 @@ async def adeepseek_stream_processor(
                 continue
             tchunk = ChatCompletionWithThoughtsChunk.model_validate(chunk_data)
+            # Handlle deepseek style response with thoughts. Used by AI APIs like vLLM, sgLang, DeepSeek, LiteLLM.
             if (
                 len(tchunk.choices) > 0
                 and hasattr(tchunk.choices[0].delta, "reasoning_content")
                 and tchunk.choices[0].delta.reasoning_content
             ):
                 tchunk.choices[0].delta.thought = chunk.choices[0].delta.reasoning_content
+            # Handlle llama.cpp server style response with thoughts.
+            elif len(tchunk.choices) > 0 and tchunk.choices[0].delta.model_extra.get("reasoning_content"):
+                tchunk.choices[0].delta.thought = tchunk.choices[0].delta.model_extra.get("reasoning_content")
             yield tchunk
         except Exception as e:
             logger.warning(f"Error processing chunk: {e}. Skipping malformed chunk.")
@@ -702,7 +716,7 @@ async def ain_stream_thought_processor(
     chat_stream: openai.AsyncStream[ChatCompletionChunk], thought_tag="think"
 ) -> AsyncGenerator[ChatCompletionWithThoughtsChunk, None]:
     """
-    Async generator for chat completion with thought chunks.
+    Async generator for chat completion with structured and inline thought chunks.
     Assumes <thought_tag>...</thought_tag> can only appear once at the start.
     Handles partial tags across streamed chunks.
     """
@@ -712,7 +726,7 @@ async def ain_stream_thought_processor(
     # Modes and transitions: detect_start > thought (optional) > message
     mode = "detect_start"
-    async for chunk in adefault_stream_processor(chat_stream):
+    async for chunk in astream_thought_processor(chat_stream):
         if len(chunk.choices) == 0:
             continue
         if mode == "message":
@@ -829,6 +843,7 @@ def to_openai_tools(tools: List[ToolDefinition]) -> List[Dict] | None:
                 "name": tool.name,
                 "description": tool.description,
                 "parameters": clean_response_schema(tool.schema),
+                "strict": True,
             },
         }
         for tool in tools

khoj/processor/conversation/utils.py CHANGED Viewed

@@ -385,6 +385,7 @@ class ChatEvent(Enum):
     USAGE = "usage"
     END_RESPONSE = "end_response"
     INTERRUPT = "interrupt"
+    END_EVENT = "␃🔚␗"
 def message_to_log(

khoj/processor/operator/__init__.py CHANGED Viewed

@@ -44,7 +44,7 @@ async def operate_environment(
     query_files: str = None,  # TODO: Handle query files
     cancellation_event: Optional[asyncio.Event] = None,
     interrupt_queue: Optional[asyncio.Queue] = None,
-    abort_message: Optional[str] = "␃🔚␗",
+    abort_message: Optional[str] = ChatEvent.END_EVENT.value,
     tracer: dict = {},
 ):
     response, user_input_message = None, None

khoj/routers/api_agents.py CHANGED Viewed

@@ -138,7 +138,7 @@ async def get_agent_by_conversation(
     else:
         agent_chat_model = None
-    has_files = agent.fileobject_set.exists()
+    has_files = await agent.fileobject_set.aexists()
     agents_packet = {
         "slug": agent.slug,

khoj/routers/api_chat.py CHANGED Viewed

@@ -4,6 +4,7 @@ import json
 import logging
 import time
 import uuid
+from dataclasses import dataclass
 from datetime import datetime
 from functools import partial
 from typing import Any, Dict, List, Optional
@@ -703,7 +704,6 @@ async def event_generator(
     train_of_thought = []
     cancellation_event = asyncio.Event()
     child_interrupt_queue: asyncio.Queue = asyncio.Queue(maxsize=10)
-    event_delimiter = "␃🔚␗"
     tracer: dict = {
         "mid": turn_id,
@@ -790,7 +790,7 @@ async def event_generator(
                 # Check if any interrupt query is received
                 if interrupt_query := get_message_from_queue(parent_interrupt_queue):
-                    if interrupt_query == event_delimiter:
+                    if interrupt_query == ChatEvent.END_EVENT.value:
                         cancellation_event.set()
                         logger.debug(f"Chat cancelled by user {user} via interrupt queue.")
                     else:
@@ -871,7 +871,7 @@ async def event_generator(
                 )
         finally:
             if not cancellation_event.is_set():
-                yield event_delimiter
+                yield ChatEvent.END_EVENT.value
             # Cancel the disconnect monitor task if it is still running
             if cancellation_event.is_set() or event_type == ChatEvent.END_RESPONSE:
                 await cancel_disconnect_monitor()
@@ -1043,7 +1043,7 @@ async def event_generator(
             tracer=tracer,
             cancellation_event=cancellation_event,
             interrupt_queue=child_interrupt_queue,
-            abort_message=event_delimiter,
+            abort_message=ChatEvent.END_EVENT.value,
         ):
             if isinstance(research_result, ResearchIteration):
                 if research_result.summarizedResult:
@@ -1397,6 +1397,7 @@ async def event_generator(
     )
     full_response = ""
+    message_start = True
     async for item in llm_response:
         # Should not happen with async generator. Skip.
         if item is None or not isinstance(item, ResponseWithThought):
@@ -1410,10 +1411,11 @@ async def event_generator(
             async for result in send_event(ChatEvent.THOUGHT, item.thought):
                 yield result
             continue
         # Start sending response
-        async for result in send_event(ChatEvent.START_LLM_RESPONSE, ""):
-            yield result
+        elif message_start:
+            message_start = False
+            async for result in send_event(ChatEvent.START_LLM_RESPONSE, ""):
+                yield result
         try:
             async for result in send_event(ChatEvent.MESSAGE, message):
@@ -1423,6 +1425,13 @@ async def event_generator(
                 logger.warning(f"Error during streaming. Stopping send: {e}")
             break
+    # Check if the user has disconnected
+    if cancellation_event.is_set():
+        logger.debug(f"Stopping LLM response to user {user} on {common.client} client.")
+        # Cancel the disconnect monitor task if it is still running
+        await cancel_disconnect_monitor()
+        return
     # Save conversation once finish streaming
     asyncio.create_task(
         save_to_conversation_log(
@@ -1448,16 +1457,16 @@ async def event_generator(
     )
     # Signal end of LLM response after the loop finishes
-    if not cancellation_event.is_set():
-        async for result in send_event(ChatEvent.END_LLM_RESPONSE, ""):
-            yield result
-        # Send Usage Metadata once llm interactions are complete
-        if tracer.get("usage"):
-            async for event in send_event(ChatEvent.USAGE, tracer.get("usage")):
-                yield event
-        async for result in send_event(ChatEvent.END_RESPONSE, ""):
-            yield result
-        logger.debug("Finished streaming response")
+    async for result in send_event(ChatEvent.END_LLM_RESPONSE, ""):
+        yield result
+    # Send Usage Metadata once llm interactions are complete
+    if tracer.get("usage"):
+        async for event in send_event(ChatEvent.USAGE, tracer.get("usage")):
+            yield event
+    async for result in send_event(ChatEvent.END_RESPONSE, ""):
+        yield result
+    logger.debug("Finished streaming response")
     # Cancel the disconnect monitor task if it is still running
     await cancel_disconnect_monitor()
@@ -1509,8 +1518,7 @@ async def chat_ws(
             if data.get("type") == "interrupt":
                 if current_task and not current_task.done():
                     # Send interrupt signal to the ongoing task
-                    abort_message = "␃🔚␗"
-                    await interrupt_queue.put(data.get("query") or abort_message)
+                    await interrupt_queue.put(data.get("query") or ChatEvent.END_EVENT.value)
                     logger.info(
                         f"Interrupt signal sent to ongoing task for user {websocket.scope['user'].object.id} with query: {data.get('query')}"
                     )
@@ -1572,6 +1580,37 @@ async def process_chat_request(
     interrupt_queue: asyncio.Queue,
 ):
     """Process a single chat request with interrupt support"""
+    # Server-side message buffering for better streaming performance
+    @dataclass
+    class MessageBuffer:
+        """Buffer for managing streamed chat messages with timing control."""
+        content: str = ""
+        timeout: Optional[asyncio.Task] = None
+        last_flush: float = 0.0
+        def __post_init__(self):
+            """Initialize last_flush with current time if not provided."""
+            if self.last_flush == 0.0:
+                self.last_flush = time.perf_counter()
+    message_buffer = MessageBuffer()
+    BUFFER_FLUSH_INTERVAL = 0.1  # 100ms buffer interval
+    BUFFER_MAX_SIZE = 512  # Flush if buffer reaches this size
+    async def flush_message_buffer():
+        """Flush the accumulated message buffer to the client"""
+        nonlocal message_buffer
+        if message_buffer.content:
+            buffered_content = message_buffer.content
+            message_buffer.content = ""
+            message_buffer.last_flush = time.perf_counter()
+            if message_buffer.timeout:
+                message_buffer.timeout.cancel()
+                message_buffer.timeout = None
+            yield buffered_content
     try:
         # Since we are using websockets, we can ignore the stream parameter and always stream
         response_iterator = event_generator(
@@ -1583,7 +1622,43 @@ async def process_chat_request(
             interrupt_queue,
         )
         async for event in response_iterator:
-            await websocket.send_text(event)
+            if event.startswith("{") and event.endswith("}"):
+                evt_json = json.loads(event)
+                if evt_json["type"] == ChatEvent.END_LLM_RESPONSE.value:
+                    # Flush remaining buffer content on end llm response event
+                    chunks = "".join([chunk async for chunk in flush_message_buffer()])
+                    await websocket.send_text(chunks)
+                    await websocket.send_text(ChatEvent.END_EVENT.value)
+                await websocket.send_text(event)
+                await websocket.send_text(ChatEvent.END_EVENT.value)
+            elif event != ChatEvent.END_EVENT.value:
+                # Buffer MESSAGE events for better streaming performance
+                message_buffer.content += str(event)
+                # Flush if buffer is too large or enough time has passed
+                current_time = time.perf_counter()
+                should_flush_time = (current_time - message_buffer.last_flush) >= BUFFER_FLUSH_INTERVAL
+                should_flush_size = len(message_buffer.content) >= BUFFER_MAX_SIZE
+                if should_flush_size or should_flush_time:
+                    chunks = "".join([chunk async for chunk in flush_message_buffer()])
+                    await websocket.send_text(chunks)
+                    await websocket.send_text(ChatEvent.END_EVENT.value)
+                else:
+                    # Cancel any previous timeout tasks to reset the flush timer
+                    if message_buffer.timeout:
+                        message_buffer.timeout.cancel()
+                    async def delayed_flush():
+                        """Flush message buffer if no new messages arrive within debounce interval."""
+                        await asyncio.sleep(BUFFER_FLUSH_INTERVAL)
+                        # Check if there's still content to flush
+                        chunks = "".join([chunk async for chunk in flush_message_buffer()])
+                        await websocket.send_text(chunks)
+                        await websocket.send_text(ChatEvent.END_EVENT.value)
+                    # Flush buffer if no new messages arrive within debounce interval
+                    message_buffer.timeout = asyncio.create_task(delayed_flush())
     except asyncio.CancelledError:
         logger.debug(f"Chat request cancelled for user {websocket.scope['user'].object.id}")
         raise

khoj/routers/helpers.py CHANGED Viewed

@@ -2099,7 +2099,8 @@ class WebSocketConnectionManager:
             user=user, slug__startswith=self.connection_slug_prefix
         ).acount()
-        return active_connections < max_connections
+        # Restrict max active connections per user in production
+        return active_connections < max_connections or state.anonymous_mode or in_debug_mode()
     async def register_connection(self, user: KhojUser, connection_id: str) -> None:
         """Register a new WebSocket connection."""
@@ -2616,7 +2617,6 @@ class MessageProcessor:
 async def read_chat_stream(response_iterator: AsyncGenerator[str, None]) -> Dict[str, Any]:
     processor = MessageProcessor()
-    event_delimiter = "␃🔚␗"
     buffer = ""
     async for chunk in response_iterator:
@@ -2624,9 +2624,9 @@ async def read_chat_stream(response_iterator: AsyncGenerator[str, None]) -> Dict
         buffer += chunk
         # Once the buffer contains a complete event
-        while event_delimiter in buffer:
+        while ChatEvent.END_EVENT.value in buffer:
             # Extract the event from the buffer
-            event, buffer = buffer.split(event_delimiter, 1)
+            event, buffer = buffer.split(ChatEvent.END_EVENT.value, 1)
             # Process the event
             if event:
                 processor.process_message_chunk(event)

khoj/routers/research.py CHANGED Viewed

@@ -224,7 +224,7 @@ async def research(
     query_files: str = None,
     cancellation_event: Optional[asyncio.Event] = None,
     interrupt_queue: Optional[asyncio.Queue] = None,
-    abort_message: str = "␃🔚␗",
+    abort_message: str = ChatEvent.END_EVENT.value,
 ):
     max_document_searches = 7
     max_online_searches = 3

khoj/routers/web_client.py CHANGED Viewed

@@ -139,3 +139,8 @@ def automations_config_page(
 @web_client.get("/.well-known/assetlinks.json", response_class=FileResponse)
 def assetlinks(request: Request):
     return FileResponse(constants.assetlinks_file_path)
+@web_client.get("/server/error", response_class=HTMLResponse)
+def server_error_page(request: Request):
+    return templates.TemplateResponse("error.html", context={"request": request})

{khoj-2.0.0b10.dist-info → khoj-2.0.0b11.dev15.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: khoj
-Version: 2.0.0b10
+Version: 2.0.0b11.dev15
 Summary: Your Second Brain
 Project-URL: Homepage, https://khoj.dev
 Project-URL: Documentation, https://docs.khoj.dev

khoj 2.0.0b10__py3-none-any.whl → 2.0.0b11.dev15__py3-none-any.whl

khoj 2.0.0b10py3-none-any.whl → 2.0.0b11.dev15py3-none-any.whl