PyPI - mito-ai - Versions diffs - 0.1.33__py3-none-any.whl → 0.1.49__py3-none-any.whl - Mend

mito-ai 0.1.33py3-none-any.whl → 0.1.49py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (146) hide show

mito_ai/utils/gemini_utils.py CHANGED Viewed

@@ -5,14 +5,17 @@ import asyncio
 import json
 import time
 from typing import Any, Dict, List, Optional, Callable, Union, AsyncGenerator, Tuple
-from tornado.httpclient import AsyncHTTPClient
+from mito_ai.utils.mito_server_utils import get_response_from_mito_server, stream_response_from_mito_server
 from mito_ai.completions.models import AgentResponse, CompletionReply, CompletionStreamChunk, CompletionItem, MessageType
-from .utils import _create_http_client
 from mito_ai.constants import MITO_GEMINI_URL
+from mito_ai.utils.provider_utils import does_message_require_fast_model
+from mito_ai.utils.utils import _create_http_client
 timeout = 30
 max_retries = 1
+FAST_GEMINI_MODEL = "gemini-2.0-flash-lite"
 def _prepare_gemini_request_data_and_headers(
     model: str,
     contents: List[Dict[str, Any]],
@@ -62,116 +65,58 @@ async def get_gemini_completion_from_mito_server(
     response_format_info: Optional[Any] = None
 ) -> str:
     data, headers = _prepare_gemini_request_data_and_headers(model, contents, message_type, config, response_format_info, stream=False)
-    http_client, http_client_timeout = _create_http_client(timeout, max_retries)
-    start_time = time.time()
-    try:
-        res = await http_client.fetch(
-            MITO_GEMINI_URL,
-            method="POST",
-            headers=headers,
-            body=json.dumps(data),
-            request_timeout=http_client_timeout
-        )
-        print(f"Gemini request completed in {time.time() - start_time:.2f} seconds")
-    except Exception as e:
-        print(f"Gemini request failed after {time.time() - start_time:.2f} seconds with error: {str(e)}")
-        raise
-    finally:
-        http_client.close()
-    # The response is a string
-    return res.body.decode("utf-8")
+    return await get_response_from_mito_server(
+        MITO_GEMINI_URL,
+        headers,
+        data,
+        timeout,
+        max_retries,
+        message_type,
+        provider_name="Gemini"
+    )
 async def stream_gemini_completion_from_mito_server(
     model: str,
     contents: List[Dict[str, Any]],
     message_type: MessageType,
     message_id: str,
-    reply_fn: Optional[Callable[[Union[CompletionReply, CompletionStreamChunk]], None]]
+    reply_fn: Callable[[Union[CompletionReply, CompletionStreamChunk]], None]
 ) -> AsyncGenerator[str, None]:
     data, headers = _prepare_gemini_request_data_and_headers(model, contents, message_type, stream=True)
-    http_client, http_client_timeout = _create_http_client(timeout, max_retries)
-    start_time = time.time()
-    chunk_queue: asyncio.Queue[str] = asyncio.Queue()
-    fetch_complete = False
-    def chunk_callback(chunk: bytes) -> None:
-        try:
-            chunk_str = chunk.decode('utf-8')
-            asyncio.create_task(chunk_queue.put(chunk_str))
-        except Exception as e:
-            print(f"Error processing Gemini streaming chunk: {str(e)}")
-    fetch_future = None
-    try:
-        fetch_future = http_client.fetch(
-            MITO_GEMINI_URL,
-            method="POST",
-            headers=headers,
-            body=json.dumps(data),
-            request_timeout=http_client_timeout,
-            streaming_callback=chunk_callback
-        )
-        async def wait_for_fetch() -> None:
-            try:
-                await fetch_future
-                nonlocal fetch_complete
-                fetch_complete = True
-                print("Gemini fetch completed")
-            except Exception as e:
-                print(f"Error in Gemini fetch: {str(e)}")
-                raise
-        fetch_task = asyncio.create_task(wait_for_fetch())
-        while not (fetch_complete and chunk_queue.empty()):
-            try:
-                chunk = await asyncio.wait_for(chunk_queue.get(), timeout=0.1)
-                clean_chunk = chunk.strip('"')
-                decoded_chunk = clean_chunk.encode().decode('unicode_escape')
-                if reply_fn and message_id:
-                    reply_fn(CompletionStreamChunk(
-                        parent_id=message_id,
-                        chunk=CompletionItem(
-                            content=decoded_chunk,
-                            isIncomplete=True,
-                            token=message_id,
-                        ),
-                        done=False,
-                    ))
-                yield chunk
-            except asyncio.TimeoutError:
-                if fetch_complete and chunk_queue.empty():
-                    break
-                continue
-        print(f"\nGemini stream completed in {time.time() - start_time:.2f} seconds")
-        if reply_fn and message_id:
-            reply_fn(CompletionStreamChunk(
-                parent_id=message_id,
-                chunk=CompletionItem(
-                    content="",
-                    isIncomplete=False,
-                    token=message_id,
-                ),
-                done=True,
-            ))
-    except Exception as e:
-        print(f"\nGemini stream failed after {time.time() - start_time:.2f} seconds with error: {str(e)}")
-        if fetch_future:
-            try:
-                await fetch_future
-            except Exception:
-                pass
-        raise
-    finally:
-        http_client.close()
+    # Define chunk processor for Gemini's special processing
+    def gemini_chunk_processor(chunk: str) -> str:
+        clean_chunk = chunk.strip('"')
+        return clean_chunk.encode().decode('unicode_escape')
+    # Use the unified streaming function with Gemini's chunk processor
+    async for chunk in stream_response_from_mito_server(
+        url=MITO_GEMINI_URL,
+        headers=headers,
+        data=data,
+        timeout=timeout,
+        max_retries=max_retries,
+        message_type=message_type,
+        reply_fn=reply_fn,
+        message_id=message_id,
+        chunk_processor=gemini_chunk_processor,
+        provider_name="Gemini",
+    ):
+        yield chunk
 def get_gemini_completion_function_params(
+    message_type: MessageType,
     model: str,
     contents: list[dict[str, Any]],
-    message_type: MessageType,
     response_format_info: Optional[Any] = None,
 ) -> Dict[str, Any]:
     """
     Build the provider_data dict for Gemini completions, mirroring the OpenAI/Anthropic approach.
     Only includes fields needed for the Gemini API.
     """
+    message_requires_fast_model = does_message_require_fast_model(message_type)
+    model = FAST_GEMINI_MODEL if message_requires_fast_model else model
     provider_data: Dict[str, Any] = {
         "model": model,
         "contents": contents,

mito_ai/utils/message_history_utils.py CHANGED Viewed

@@ -3,12 +3,14 @@
 import re
 from typing import List
+from mito_ai.constants import MESSAGE_HISTORY_TRIM_THRESHOLD
 from openai.types.chat import ChatCompletionMessageParam
 from mito_ai.completions.prompt_builders.prompt_constants import (
     ACTIVE_CELL_ID_SECTION_HEADING,
     ACTIVE_CELL_OUTPUT_SECTION_HEADING,
     GET_CELL_OUTPUT_TOOL_RESPONSE_SECTION_HEADING,
     FILES_SECTION_HEADING,
+    STREAMLIT_APP_STATUS_SECTION_HEADING,
     VARIABLES_SECTION_HEADING,
     JUPYTER_NOTEBOOK_SECTION_HEADING,
     CONTENT_REMOVED_PLACEHOLDER
@@ -30,7 +32,8 @@ def trim_sections_from_message_content(content: str) -> str:
         JUPYTER_NOTEBOOK_SECTION_HEADING,
         GET_CELL_OUTPUT_TOOL_RESPONSE_SECTION_HEADING,
         ACTIVE_CELL_OUTPUT_SECTION_HEADING,
-        ACTIVE_CELL_ID_SECTION_HEADING
+        ACTIVE_CELL_ID_SECTION_HEADING,
+        STREAMLIT_APP_STATUS_SECTION_HEADING
     ]
     for heading in section_headings:
@@ -43,18 +46,18 @@ def trim_sections_from_message_content(content: str) -> str:
     return content
-def trim_old_messages(messages: List[ChatCompletionMessageParam], keep_recent: int = 3) -> List[ChatCompletionMessageParam]:
+def trim_old_messages(messages: List[ChatCompletionMessageParam]) -> List[ChatCompletionMessageParam]:
     """
     Trims metadata sections from messages that are older than the specified number of recent messages.
     We do this in order to reduce the token count of the messages, which helps us stay under the token limit for the LLM.
     """
-    if len(messages) <= keep_recent:
+    if len(messages) <= MESSAGE_HISTORY_TRIM_THRESHOLD:
         return messages
     # Process all messages except the keep_recent most recent ones.
     # Only trim user messages, which is where this metadata lives.
     # We want to not edit the system messages, as they contain important information / examples.
-    for i in range(len(messages) - keep_recent):
+    for i in range(len(messages) - MESSAGE_HISTORY_TRIM_THRESHOLD):
         content = messages[i].get("content")
         is_user_message = messages[i].get("role") == "user"

mito_ai/utils/mito_server_utils.py ADDED Viewed

@@ -0,0 +1,242 @@
+# Copyright (c) Saga Inc.
+# Distributed under the terms of the GNU Affero General Public License v3.0 License.
+import asyncio
+import json
+import time
+from typing import Any, Dict, Optional, Callable, Union, AsyncGenerator
+from mito_ai.completions.models import MessageType, CompletionReply, CompletionStreamChunk, CompletionItem
+from mito_ai.utils.server_limits import check_mito_server_quota, update_mito_server_quota
+from tornado.httpclient import HTTPResponse
+from mito_ai.constants import MITO_GEMINI_URL
+from mito_ai.utils.utils import _create_http_client
+MITO_ERROR_MARKER = "MITO_ERROR_MARKER:"
+class ProviderCompletionException(Exception):
+    """Custom exception for Mito server errors that converts well to CompletionError."""
+    def __init__(self, error_message: str, provider_name: str = "LLM Provider", error_type: str = "LLMProviderError"):
+        self.error_message = error_message
+        self.provider_name = provider_name
+        self.error_type = error_type
+        # Create user-friendly title and hint
+        self.user_friendly_title = f"{provider_name} Error: {error_message}"
+        self.user_friendly_hint = f"There was a problem with {provider_name}. Try switching to a different model and trying again."
+        # Set args[0] for fallback compatibility
+        super().__init__(self.user_friendly_title)
+    def __str__(self) -> str:
+        return f"{self.provider_name} Error: {self.error_message}"
+async def get_response_from_mito_server(
+    url: str,
+    headers: dict,
+    data: Dict[str, Any],
+    timeout: int,
+    max_retries: int,
+    message_type: MessageType,
+    provider_name: str = "Mito Server"
+) -> str:
+    """
+    Get a response from the Mito server.
+    Raises:
+        ProviderCompletionException: When the server returns an error or invalid response
+        Exception: For network/HTTP errors (let these bubble up to be handled by retry logic)
+    """
+    # First check the mito server quota. If the user has reached the limit, we raise an exception.
+    check_mito_server_quota(message_type)
+    http_client, http_client_timeout = _create_http_client(timeout, max_retries)
+    start_time = time.time()
+    try:
+        res = await http_client.fetch(
+            url,
+            method="POST",
+            headers=headers,
+            body=json.dumps(data),
+            request_timeout=http_client_timeout
+        )
+        print(f"Mito server request completed in {time.time() - start_time:.2f} seconds")
+        # Parse and validate response
+        try:
+            content = json.loads(res.body.decode("utf-8"))
+            if "completion" in content:
+                return content["completion"] # type: ignore
+            elif "error" in content:
+                # Server returned an error
+                raise ProviderCompletionException(content['error'], provider_name=provider_name)
+            else:
+                # Invalid response format
+                raise ProviderCompletionException(f"No completion found in response: {content}", provider_name=provider_name)
+        except ProviderCompletionException:
+            # Re-raise ProviderCompletionException as-is
+            raise
+        except Exception as e:
+            raise ProviderCompletionException(f"Error parsing response: {str(e)}", provider_name=provider_name)
+    finally:
+        try:
+            # We always update the quota, even if there is an error
+            update_mito_server_quota(message_type)
+        except Exception as e:
+            pass
+        http_client.close()
+async def stream_response_from_mito_server(
+    url: str,
+    headers: Dict[str, str],
+    data: Dict[str, Any],
+    timeout: int,
+    max_retries: int,
+    message_type: MessageType,
+    reply_fn: Callable[[Union[CompletionReply, CompletionStreamChunk]], None],
+    message_id: str,
+    chunk_processor: Optional[Callable[[str], str]] = None,
+    provider_name: str = "Mito Server",
+) -> AsyncGenerator[str, None]:
+    """
+    Stream responses from the Mito server.
+    This is a unified streaming function that can be used by all providers (OpenAI, Anthropic, Gemini).
+    Args:
+        url: The Mito server URL to stream from
+        headers: Request headers
+        data: Request data
+        timeout: Request timeout in seconds
+        max_retries: Maximum number of retries
+        message_type: The message type for quota tracking
+        provider_name: Name of the provider for error messages
+        reply_fn: Optional function to call with each chunk for streaming replies
+        message_id: The message ID to track the request
+        chunk_processor: Optional function to process chunks before yielding (e.g., for Gemini's special processing)
+    Yields:
+        Chunks of text from the streaming response
+    """
+    # Check the mito server quota
+    check_mito_server_quota(message_type)
+    # Create HTTP client with appropriate timeout settings
+    http_client, http_client_timeout = _create_http_client(timeout, max_retries)
+    # Set up streaming infrastructure
+    start_time = time.time()
+    chunk_queue: asyncio.Queue[str] = asyncio.Queue()
+    fetch_complete = False
+    # Define a callback to process chunks and add them to the queue
+    def chunk_callback(chunk: bytes) -> None:
+        try:
+            chunk_str = chunk.decode('utf-8')
+            asyncio.create_task(chunk_queue.put(chunk_str))
+        except Exception as e:
+            print(f"Error processing {provider_name} streaming chunk: {str(e)}")
+    # Execute the streaming request
+    fetch_future = None
+    try:
+        fetch_future = http_client.fetch(
+            url,
+            method="POST",
+            headers=headers,
+            body=json.dumps(data),
+            request_timeout=http_client_timeout,
+            streaming_callback=chunk_callback
+        )
+        # Create a task to wait for the fetch to complete
+        async def wait_for_fetch() -> None:
+            try:
+                await fetch_future
+                nonlocal fetch_complete
+                fetch_complete = True
+                print(f"{provider_name} fetch completed")
+            except Exception as e:
+                print(f"Error in {provider_name} fetch: {str(e)}")
+                raise
+        # Start the task to wait for fetch completion
+        fetch_task = asyncio.create_task(wait_for_fetch())
+        # Yield chunks as they arrive
+        while not (fetch_complete and chunk_queue.empty()):
+            try:
+                # Wait for a chunk with a timeout to prevent deadlocks
+                chunk = await asyncio.wait_for(chunk_queue.get(), timeout=0.1)
+                # Process chunk if processor is provided
+                processed_chunk = chunk
+                if chunk_processor:
+                    processed_chunk = chunk_processor(chunk)
+                # Check if this chunk contains an error marker
+                if processed_chunk.startswith(MITO_ERROR_MARKER):
+                    error_message = processed_chunk[len(MITO_ERROR_MARKER):]
+                    print(f"Detected error in {provider_name} stream: {error_message}")
+                    raise ProviderCompletionException(error_message, provider_name=provider_name)
+                if reply_fn is not None and message_id is not None:
+                    # Send the chunk directly to the frontend
+                    reply_fn(CompletionStreamChunk(
+                        parent_id=message_id,
+                        chunk=CompletionItem(
+                            content=processed_chunk,
+                            isIncomplete=True,
+                            token=message_id,
+                        ),
+                        done=False,
+                    ))
+                yield chunk
+            except asyncio.TimeoutError:
+                # No chunk available within timeout, check if fetch is complete
+                if fetch_complete and chunk_queue.empty():
+                    break
+                # Otherwise continue waiting for chunks
+                continue
+        print(f"\n{provider_name} stream completed in {time.time() - start_time:.2f} seconds")
+        if reply_fn is not None and message_id is not None:
+            # Send a final chunk to indicate completion
+            reply_fn(CompletionStreamChunk(
+                parent_id=message_id,
+                chunk=CompletionItem(
+                    content="",
+                    isIncomplete=False,
+                    token=message_id,
+                ),
+                done=True,
+            ))
+    except Exception as e:
+        print(f"\n{provider_name} stream failed after {time.time() - start_time:.2f} seconds with error: {str(e)}")
+        # If an exception occurred, ensure the fetch future is awaited to properly clean up
+        if fetch_future:
+            try:
+                await fetch_future
+            except Exception:
+                pass
+        raise
+    finally:
+        # Clean up resources
+        try:
+            # We always update the quota, even if there is an error
+            update_mito_server_quota(message_type)
+        except Exception as e:
+            pass
+        http_client.close()

mito-ai 0.1.33__py3-none-any.whl → 0.1.49__py3-none-any.whl

mito-ai 0.1.33py3-none-any.whl → 0.1.49py3-none-any.whl