PyPI - mito-ai - Versions diffs - 0.1.37__py3-none-any.whl → 0.1.39__py3-none-any.whl - Mend

mito-ai 0.1.37py3-none-any.whl → 0.1.39py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mito-ai might be problematic. Click here for more details.

Files changed (56) hide show

mito_ai/__init__.py CHANGED Viewed

@@ -6,12 +6,22 @@ from jupyter_server.utils import url_path_join
 from mito_ai.completions.handlers import CompletionHandler
 from mito_ai.completions.providers import OpenAIProvider
 from mito_ai.app_builder.handlers import AppBuilderHandler
+from mito_ai.streamlit_preview.handlers import StreamlitPreviewHandler
 from mito_ai.log.urls import get_log_urls
 from mito_ai.version_check import VersionCheckHandler
 from mito_ai.db.urls import get_db_urls
 from mito_ai.settings.urls import get_settings_urls
 from mito_ai.rules.urls import get_rules_urls
 from mito_ai.auth.urls import get_auth_urls
+from mito_ai.streamlit_preview.urls import get_streamlit_preview_urls
+# Sometimes matplotlib figures do not show up in the notebook with this warning:
+# UserWarning: FigureCanvasAgg is non-interactive, and thus cannot be shown
+# I believe that streamlit is reconfiguring the matplotlib settings and this is happening as a result.
+# For now, we just set the backend to inline, so that the figures show up again
+import os
+os.environ['MPLBACKEND'] = 'inline'
 try:
     from _version import __version__
 except ImportError:
@@ -58,6 +68,11 @@ def _load_jupyter_server_extension(server_app) -> None: # type: ignore
             AppBuilderHandler,
             {}
         ),
+        (
+            url_path_join(base_url, "mito-ai", "streamlit-preview"),
+            StreamlitPreviewHandler,
+            {}
+        ),
         (
             url_path_join(base_url, "mito-ai", "version-check"),
             VersionCheckHandler,
@@ -69,8 +84,9 @@ def _load_jupyter_server_extension(server_app) -> None: # type: ignore
     handlers.extend(get_db_urls(base_url))  # type: ignore
     handlers.extend(get_settings_urls(base_url))  # type: ignore
     handlers.extend(get_rules_urls(base_url))  # type: ignore
-    handlers.extend(get_log_urls(base_url))  # type: ignore
+    handlers.extend(get_log_urls(base_url, open_ai_provider.key_type))  # type: ignore
     handlers.extend(get_auth_urls(base_url))  # type: ignore
+    handlers.extend(get_streamlit_preview_urls(base_url))  # type: ignore
     web_app.add_handlers(host_pattern, handlers)
     server_app.log.info("Loaded the mito_ai server extension")

mito_ai/_version.py CHANGED Viewed

@@ -1,4 +1,4 @@
 # This file is auto-generated by Hatchling. As such, do not:
 #   - modify
 #   - track in version control e.g. be sure to add to .gitignore
-__version__ = VERSION = '0.1.37'
+__version__ = VERSION = '0.1.39'

mito_ai/app_builder/handlers.py CHANGED Viewed

@@ -4,7 +4,7 @@
 import os
 import time
 import logging
-from typing import Any, Union
+from typing import Any, Union, Optional
 import zipfile
 import tempfile
 from mito_ai.utils.create import initialize_user
@@ -13,6 +13,7 @@ from mito_ai.utils.websocket_base import BaseWebSocketHandler
 from mito_ai.app_builder.models import (
     BuildAppReply,
     AppBuilderError,
+    BuildAppRequest,
     ErrorMessage,
     MessageType
 )
@@ -74,7 +75,8 @@ class AppBuilderHandler(BaseWebSocketHandler):
             if message_type == MessageType.BUILD_APP.value:
                 # Handle build app request
-                await self._handle_build_app(parsed_message)
+                build_app_request = BuildAppRequest(**parsed_message)
+                await self._handle_build_app(build_app_request)
             else:
                 self.log.error(f"Unknown message type: {message_type}")
                 error = AppBuilderError(
@@ -98,25 +100,24 @@ class AppBuilderHandler(BaseWebSocketHandler):
         latency_ms = round((time.time() - start) * 1000)
         self.log.info(f"App builder handler processed in {latency_ms} ms.")
-    async def _handle_build_app(self, message: dict) -> None:
+    async def _handle_build_app(self, message: BuildAppRequest) -> None:
         """Handle a build app request.
         Args:
             message: The parsed message.
         """
-        message_id = message.get('message_id', '')  # Default to empty string if not present
-        notebook_path = message.get('notebook_path')
-        app_path = message.get('app_path')
-        jwt_token = message.get('jwt_token', '')  # Extract JWT token from request, default to empty string
+        message_id = message.message_id
+        notebook_path = message.notebook_path
+        jwt_token = message.jwt_token
         if not message_id:
             self.log.error("Missing message_id in request")
             return
-        if not app_path:
+        if not notebook_path:
             error = AppBuilderError(
                 error_type="InvalidRequest",
-                title="Missing 'path' parameter"
+                title="Missing 'notebook_path' parameter"
             )
             self.reply(BuildAppReply(
                 parent_id=message_id,
@@ -126,32 +127,36 @@ class AppBuilderHandler(BaseWebSocketHandler):
             return
         # Validate JWT token if provided
-        if jwt_token and jwt_token != 'placeholder-jwt-token':
-            self.log.info(f"Validating JWT token: {jwt_token[:20]}...")
-            is_valid = self._validate_jwt_token(jwt_token)
-            if not is_valid:
-                self.log.error("JWT token validation failed")
-                error = AppBuilderError(
-                    error_type="Unauthorized",
-                    title="Invalid authentication token",
-                    hint="Please sign in again to deploy your app."
-                )
-                self.reply(BuildAppReply(
-                    parent_id=message_id,
-                    url="",
-                    error=error
-                ))
-                return
-            else:
-                self.log.info("JWT token validation successful")
+        token_preview = jwt_token[:20] if jwt_token else "No token provided"
+        self.log.info(f"Validating JWT token: {token_preview}...")
+        is_valid = self._validate_jwt_token(jwt_token) if jwt_token else False
+        if not is_valid or not jwt_token:
+            self.log.error("JWT token validation failed")
+            error = AppBuilderError(
+                error_type="Unauthorized",
+                title="Invalid authentication token",
+                hint="Please sign in again to deploy your app."
+            )
+            self.reply(BuildAppReply(
+                parent_id=message_id,
+                url="",
+                error=error
+            ))
+            return
         else:
-            self.log.warning("No JWT token provided or using placeholder token")
+            self.log.info("JWT token validation successful")
         try:
+            notebook_path = str(notebook_path) if notebook_path else ""
+            app_directory = os.path.dirname(notebook_path)
+            app_path = os.path.join(app_directory, "app.py")
-            success_flag, result_message = await streamlit_handler(str(notebook_path) if notebook_path else "", app_path)
-            if not success_flag:
-                raise Exception(result_message)
+            if not os.path.exists(app_path):
+                success_flag, app_path_result, result_message = await streamlit_handler(notebook_path)
+                if not success_flag or app_path_result is None:
+                    raise Exception(result_message)
+                app_path = app_path_result
             deploy_url = await self._deploy_app(app_path, jwt_token)
@@ -271,15 +276,15 @@ class AppBuilderHandler(BaseWebSocketHandler):
         except requests.exceptions.RequestException as e:
             self.log.error(f"Error during API request: {e}")
             if hasattr(e, 'response') and e.response is not None:
-                try:
-                    error_detail = e.response.json()
-                    self.log.error(f"Server error details: {error_detail}")
-                except:
-                    self.log.error(f"Server response: {e.response.text}")
-            raise Exception(f"Deployment failed: {str(e)}")
+                error_detail = e.response.json()
+                self.log.error(f"Server error details: {error_detail}")
+                if 'error' in error_detail:
+                    raise Exception(error_detail['error'])
+                raise
         except Exception as e:
             self.log.error(f"Error during deployment: {str(e)}")
             raise
+        raise RuntimeError("Unexpected error in _deploy_app")
     async def _upload_app_to_s3(self, app_path: str, presigned_url: str) -> requests.Response:
         """Upload the app to S3 using the presigned URL."""

mito_ai/app_builder/models.py CHANGED Viewed

@@ -65,7 +65,7 @@ class BuildAppRequest:
     message_id: str
     # Path to the app file.
-    path: str
+    notebook_path: str
     # JWT token for authorization.
     jwt_token: Optional[str] = None

mito_ai/completions/handlers.py CHANGED Viewed

@@ -46,7 +46,7 @@ from mito_ai.completions.completion_handlers.agent_execution_handler import get_
 from mito_ai.completions.completion_handlers.agent_auto_error_fixup_handler import get_agent_auto_error_fixup_completion
 from mito_ai.utils.telemetry_utils import identify
-FALLBACK_MODEL = "gpt-4.1"  # Default model to use for safety
+FALLBACK_MODEL = "gpt-5"  # Default model to use for safety
 # The GlobalMessageHistory is responsible for updating the message histories stored in the .mito/ai-chats directory.
 # We create one GlobalMessageHistory per backend server instance instead of one per websocket connection so that the

mito_ai/completions/prompt_builders/agent_system_message.py CHANGED Viewed

@@ -52,9 +52,6 @@ Format:
         code_summary: str
         cell_type: 'code' | 'markdown'
     }}
-    get_cell_output_cell_id: None,
-    next_steps: None,
-    analysis_assumptions: None
 }}
 Important information:
@@ -64,7 +61,7 @@ Important information:
 4. The code_summary must be a very short phrase (1–5 words maximum) that begins with a verb ending in "-ing" (e.g., "Loading data", "Filtering rows", "Calculating average", "Plotting revenue"). Avoid full sentences or explanations—this should read like a quick commit message or code label, not a description.
 5. Important: Only use the CELL_UPDATE tool if you want to add/modify a notebook cell in response to the user's request. If the user is just sending you a friendly greeting or asking you a question about yourself, you SHOULD NOT USE A CELL_UPDATE tool because it does not require modifying the notebook. Instead, just use the FINISHED_TASK response.
 6. The assumptions is an optional list of critical assumptions that you made about the data or analysis approach. The assumptions you list here will be displayed to the user so that they can confirm or correct the assumptions. For example: ["NaN values in the impressions column represent 0 impressions", "Only crashes with pedestrian or cyclist fatalities are considered fatal crashes", "Intervention priority combines both volume and severity to identify maximum impact opportunities"].
-7. Only include important data and analytical assumptions that if incorrect would fundamentally change your analysis conclusions. These should be data handling decisions, methodological choices, and definitional boundaries. Do not include: obvious statements ("Each record is counted once"), result interpretation guidance ("Gaps in the plot represent zero values"), display choices ("Data is sorted for clarity"), internal reasoning ("Bar chart is better than line plot"), or environment assumptions ("Library X is installed"). Prioritize quality over quantity - include only the most critical assumptions or None if there are no critical assumptions made in this step that have not already be shared with the user. If you ever doubt whether an assumption is critical enough to be shared with the user as an assumption, don't include it. Most messages should not include an assumption.
+7. Only include important data and analytical assumptions that if incorrect would fundamentally change your analysis conclusions. These should be data handling decisions, methodological choices, and definitional boundaries. Do not include: obvious statements ("Each record is counted once"), result interpretation guidance ("Gaps in the plot represent zero values"), display choices ("Data is sorted for clarity"), internal reasoning ("Bar chart is better than line plot"), or environment assumptions ("Library X is installed"). Prioritize quality over quantity - include only the most critical assumptions or omit the field entirely if there are no critical assumptions made in this step that have not already be shared with the user. If you ever doubt whether an assumption is critical enough to be shared with the user as an assumption, don't include it. Most messages should not include an assumption.
 8. Do not include the same assumption or variations of the same assumption multiple times in the same conversation. Once you have presented the assumption to the user, they will already have the opportunity to confirm or correct it so do not include it again.
 #### Cell Addition:
@@ -81,9 +78,6 @@ Format:
         code_summary: str
         cell_type: 'code' | 'markdown'
     }}
-    get_cell_output_cell_id: None,
-    next_steps: None,
-    analysis_assumptions: None
 }}
 Important information:
@@ -93,7 +87,7 @@ Important information:
 4. code_summary must be a very short phrase (1–5 words maximum) that begins with a verb ending in "-ing" (e.g., "Loading data", "Filtering rows", "Calculating average", "Plotting revenue"). Avoid full sentences or explanations—this should read like a quick commit message or code label, not a description.
 5. The cell_type should only be 'markdown' if there is no code to add. There may be times where the code has comments. These are still code cells and should have the cell_type 'code'. Any cells that are labeled 'markdown' will be converted to markdown cells by the user.
 6. The assumptions is an optional list of critical assumptions that you made about the data or analysis approach. The assumptions you list here will be displayed to the user so that they can confirm or correct the assumptions. For example: ["NaN values in the impressions column represent 0 impressions", "Only crashes with pedestrian or cyclist fatalities are considered fatal crashes", "Intervention priority combines both volume and severity to identify maximum impact opportunities"].
-7. Only include important data and analytical assumptions that if incorrect would fundamentally change your analysis conclusions. These should be data handling decisions, methodological choices, and definitional boundaries. Do not include: obvious statements ("Each record is counted once"), result interpretation guidance ("Gaps in the plot represent zero values"), display choices ("Data is sorted for clarity"), internal reasoning ("Bar chart is better than line plot"), or environment assumptions ("Library X is installed"). Prioritize quality over quantity - include only the most critical assumptions or None if there are no critical assumptions made in this step that have not already be shared with the user. If you ever doubt whether an assumption is critical enough to be shared with the user as an assumption, don't include it. Most messages should not include an assumption.
+7. Only include important data and analytical assumptions that if incorrect would fundamentally change your analysis conclusions. These should be data handling decisions, methodological choices, and definitional boundaries. Do not include: obvious statements ("Each record is counted once"), result interpretation guidance ("Gaps in the plot represent zero values"), display choices ("Data is sorted for clarity"), internal reasoning ("Bar chart is better than line plot"), or environment assumptions ("Library X is installed"). Prioritize quality over quantity - include only the most critical assumptions or omit the field entirely if there are no critical assumptions made in this step that have not already be shared with the user. If you ever doubt whether an assumption is critical enough to be shared with the user as an assumption, don't include it. Most messages should not include an assumption.
 8. Do not include the same assumption or variations of the same assumption multiple times in the same conversation. Once you have presented the assumption to the user, they will already have the opportunity to confirm or correct it so do not include it again.
 <Cell Modification Example>
@@ -133,17 +127,14 @@ Convert the transaction_date column to datetime and then multiply the total_pric
 Output:
 {{
     type: 'cell_update',
-    cell_type: 'code',
+    message: "I'll convert the transaction_date column to datetime and multiply total_price by the multiplier.",
     cell_update: {{
-        type: 'modification'
+        type: 'modification',
         id: 'c68fdf19-db8c-46dd-926f-d90ad35bb3bc',
         code: "import pandas as pd\\nsales_df = pd.read_csv('./sales.csv')\\nloan_multiplier = 1.5\\nsales_df['transaction_date'] = pd.to_datetime(sales_df['transaction_date'])\\nsales_df['total_price'] = sales_df['total_price'] * sales_multiplier",
         code_summary: "Converting the transaction_date column",
         cell_type: 'code'
-    }},
-    get_cell_output_cell_id: None,
-    next_steps: None,
-    analysis_assumptions: None
+    }}
 }}
 </Cell Modification Example>
@@ -184,17 +175,14 @@ Graph the total_price for each sale
 Output:
 {{
     type: 'cell_update',
-    message: "I'll create a graph with using matplotlib with sale `index` on the x axis and `total_price` on the y axis.",
+    message: "I'll create a graph using matplotlib with sale index on the x axis and total_price on the y axis.",
     cell_update: {{
-        type: 'add'
-        index: 2
-        code: "import matplotlib.pyplot as plt\n\nplt.bar(sales_df.index, sales_df['total_price'])\nplt.title('Total Price per Sale')\nplt.xlabel('Transaction Number')\nplt.ylabel('Sales Price ($)')\nplt.show()"
+        type: 'new',
+        index: 2,
+        code: "import matplotlib.pyplot as plt\n\nplt.bar(sales_df.index, sales_df['total_price'])\nplt.title('Total Price per Sale')\nplt.xlabel('Transaction Number')\nplt.ylabel('Sales Price ($)')\nplt.show()",
         code_summary: "Plotting total_price",
-        code_summary: "Plotting total_price"
-    }},
-    get_cell_output_cell_id: None,
-    next_steps: None,
-    analysis_assumptions: None
+        cell_type: 'code'
+    }}
 }}
 </Cell Addition Example>
@@ -208,10 +196,7 @@ When you want to get a base64 encoded version of a cell's output, respond with t
 {{
     type: 'get_cell_output',
     message: str,
-    get_cell_output_cell_id: str,
-    cell_update: None,
-    next_steps: Optional[List[str]],
-    analysis_assumptions: Optional[List[str]]
+    get_cell_output_cell_id: str
 }}
 Important information:
@@ -228,10 +213,7 @@ When you have completed the user's task, respond with a message in this format:
 {{
     type: 'finished_task',
     message: str,
-    get_cell_output_cell_id: None,
-    cell_update: None,
-    next_steps: Optional[List[str]],
-    analysis_assumptions: None
+    next_steps: Optional[List[str]]
 }}
 Important information:
@@ -249,8 +231,6 @@ Important information:
 {{
     type: 'finished_task',
     message: "Revenue analysis complete: total sales reached $2.3M with 34% growth in Q4[MITO_CITATION:abc123:2-3], while premium products generated 67% of profit margins[MITO_CITATION:xyz456:5]. The customer segmentation workflow identified three distinct buying patterns driving conversion rates[MITO_CITATION:def456:8-12].",
-    get_cell_output_cell_id: None,
-    cell_update: None,
     next_steps: ["Graph sales by product category", "Identify seasonal patterns in data", "Find the top 3 performing products"]
 }}
@@ -263,11 +243,7 @@ User message: "Hi"
 Output:
 {{
     type: 'finished_task',
-    message: "Hey there! I'm Mito AI. How can I help you today?",
-    get_cell_output_cell_id: None,
-    cell_update: None,
-    next_steps: None,
-    analysis_assumptions: None
+    message: "Hey there! I'm Mito AI. How can I help you today?"
 }}
 </Finished Task Example 2>
@@ -327,12 +303,11 @@ Output:
     type: 'cell_update',
     message: "I'll calculate two new variables all_time_high_date and all_time_high_price.",
     cell_update: {{
-        type: 'add'
-        index: 2
-        code: "all_time_high_row_idx = tesla_stock_prices_df['closing_price'].idxmax()\nall_time_high_date = tesla_stock_prices_df.at[all_time_high_row_idx, 'Date']\nall_time_high_price = tesla_stock_prices_df.at[all_time_high_row_idx, 'closing_price']"
+        type: 'new',
+        index: 2,
+        code: "all_time_high_row_idx = tesla_stock_prices_df['closing_price'].idxmax()\nall_time_high_date = tesla_stock_prices_df.at[all_time_high_row_idx, 'Date']\nall_time_high_price = tesla_stock_prices_df.at[all_time_high_row_idx, 'closing_price']",
         code_summary: "Calculating all time high"
-    }},
-    get_cell_output_cell_id: None
+    }}
 }}
 ### User Message 2
@@ -379,8 +354,6 @@ Output:
 {{
     type: 'finished_task',
     message: "The all time high tesla stock closing price was $265.91 [MITO_CITATION:9c0d5fda-2b16-4f52-a1c5-a48892f3e2e8:2] on 2025-03-16 [MITO_CITATION:9c0d5fda-2b16-4f52-a1c5-a48892f3e2e8:1]",
-    get_cell_output_cell_id: None,
-    cell_update: None,
     next_steps: ["Create a visualization of Tesla's stock price over time", "Calculate the percentage change from the lowest to highest price", "Analyze the volatility of Tesla's stock"]
 }}

mito_ai/completions/prompt_builders/chat_name_prompt.py CHANGED Viewed

@@ -3,13 +3,13 @@
 def create_chat_name_prompt(user_message: str, assistant_message: str) -> str:
     prompt = f"""Create a short name for the chat thread based on the first user message
-    and the first LLM response. Reply ONLY with the short title (max 40 chars). Don't add any extra text.
-    Don't include that its a Python project in the chat.
+and the first LLM response. Reply ONLY with the short title (max 40 chars). Don't add any extra text.
+Don't include that its a Python project in the chat.
-    User Message: {user_message}
+User Message: {user_message}
-    Assistant Message: {assistant_message}
-    """
+Assistant Message: {assistant_message}
+"""
     return prompt

mito_ai/log/handlers.py CHANGED Viewed

@@ -3,16 +3,22 @@
 from dataclasses import dataclass
 import json
-from typing import Any, Final
+from typing import Any, Final, Literal
 import tornado
 import os
 from jupyter_server.base.handlers import APIHandler
-from mito_ai.utils.telemetry_utils import log
+from mito_ai.utils.telemetry_utils import MITO_SERVER_KEY, USER_KEY, log
 class LogHandler(APIHandler):
     """Handler for logging"""
+    def initialize(self, key_type: Literal['mito_server_key', 'user_key']) -> None:
+        """Initialize the log handler"""
+        # The key_type is required so that we know if we can log pro users
+        self.key_type = key_type
     @tornado.web.authenticated
     def put(self) -> None:
         """Log an event"""
@@ -26,6 +32,7 @@ class LogHandler(APIHandler):
         log_event = data['log_event']
         params = data.get('params', {})
-        log(log_event, params)
+        key_type = MITO_SERVER_KEY if self.key_type == "mito_server_key" else USER_KEY
+        log(log_event, params, key_type=key_type)

mito_ai/log/urls.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Any, List, Tuple
 from jupyter_server.utils import url_path_join
 from mito_ai.log.handlers import LogHandler
-def get_log_urls(base_url: str) -> List[Tuple[str, Any, dict]]:
+def get_log_urls(base_url: str, key_type: str) -> List[Tuple[str, Any, dict]]:
     """Get all log related URL patterns.
     Args:
@@ -15,7 +15,7 @@ def get_log_urls(base_url: str) -> List[Tuple[str, Any, dict]]:
         List of (url_pattern, handler_class, handler_kwargs) tuples
     """
     BASE_URL = base_url + "/mito-ai"
     return [
-        (url_path_join(BASE_URL, "log"), LogHandler, {}),
+        (url_path_join(BASE_URL, "log"), LogHandler, {"key_type": key_type}),
     ]

mito_ai/openai_client.py CHANGED Viewed

@@ -35,7 +35,7 @@ from mito_ai.utils.telemetry_utils import (
     USER_KEY,
 )
-OPENAI_MODEL_FALLBACK = "gpt-4.1"
+OPENAI_MODEL_FALLBACK = "gpt-5"
 class OpenAIClient(LoggingConfigurable):
     """Provide AI feature through OpenAI services."""

mito_ai/streamlit_conversion/agent_utils.py ADDED Viewed

@@ -0,0 +1,116 @@
+# Copyright (c) Saga Inc.
+# Distributed under the terms of the GNU Affero General Public License v3.0 License.
+from typing import List
+import re
+from unidiff import PatchSet
+from mito_ai.streamlit_conversion.prompts.prompt_constants import MITO_TODO_PLACEHOLDER
+def extract_todo_placeholders(agent_response: str) -> List[str]:
+    """Extract TODO placeholders from the agent's response"""
+    return [line.strip() for line in agent_response.split('\n') if MITO_TODO_PLACEHOLDER in line]
+def apply_patch_to_text(text: str, diff: str) -> str:
+    """
+    Apply a *unified-diff* (git-style) patch to the given text and return
+    the updated contents.
+    Parameters
+    ----------
+    text : str
+        The original file contents.
+    diff : str
+        A unified diff that transforms *text* into the desired output.
+        The diff must reference exactly one file (the Streamlit app).
+    Returns
+    -------
+    str
+        The patched contents.
+    Raises
+    ------
+    ValueError
+        If the patch cannot be applied or references more than one file.
+    """
+    # Nothing to do
+    if not diff.strip():
+        return text
+    # Parse the patch
+    patch = PatchSet(diff.splitlines(keepends=True))
+    # We expect a single-file patch (what the prompt asks the model to emit)
+    if len(patch) != 1:
+        raise ValueError(
+            f"Expected a patch for exactly one file, got {len(patch)} files."
+        )
+    file_patch = patch[0]
+    original_lines = text.splitlines(keepends=True)
+    result_lines: List[str] = []
+    cursor = 0  # index in original_lines (0-based)
+    for hunk in file_patch:
+        # Copy unchanged lines before this hunk
+        while cursor < hunk.source_start - 1:
+            result_lines.append(original_lines[cursor])
+            cursor += 1
+        # Apply hunk line-by-line
+        for line in hunk:
+            if line.is_context:
+                result_lines.append(original_lines[cursor])
+                cursor += 1
+            elif line.is_removed:
+                cursor += 1  # Skip this line from the original
+            elif line.is_added:
+                # Ensure added line ends with newline for consistency
+                val = line.value
+                if not val.endswith("\n"):
+                    val += "\n"
+                result_lines.append(val)
+    # Copy any remaining lines after the last hunk
+    result_lines.extend(original_lines[cursor:])
+    return "".join(result_lines)
+def fix_diff_headers(diff: str) -> str:
+    """
+    The AI is generally not very good at counting the number of lines in the diff. If the hunk header has
+    an incorrect count, then the patch will fail. So instead we just calculate the counts ourselves, its deterministic.
+    """
+    lines = diff.split('\n')
+    for i, line in enumerate(lines):
+        if line.startswith('@@'):
+            # Extract the starting line numbers
+            match = re.match(r'@@ -(\d+),\d+ \+(\d+),\d+ @@', line)
+            if match:
+                old_start = match.group(1)
+                new_start = match.group(2)
+                # Count lines in this hunk
+                old_count = 0
+                new_count = 0
+                for j in range(i + 1, len(lines)):
+                    next_line = lines[j]
+                    if next_line.startswith('@@') or next_line.startswith('---'):
+                        break
+                    if next_line.startswith(' ') or next_line.startswith('-'):
+                        old_count += 1
+                    if next_line.startswith(' ') or next_line.startswith('+'):
+                        new_count += 1
+                # Replace the header with correct counts
+                lines[i] = f"@@ -{old_start},{old_count} +{new_start},{new_count} @@"
+    return '\n'.join(lines)

mito_ai/streamlit_conversion/prompts/prompt_constants.py ADDED Viewed

@@ -0,0 +1,59 @@
+# Copyright (c) Saga Inc.
+# Distributed under the terms of the GNU Affero General Public License v3.0 License.
+MITO_TODO_PLACEHOLDER = "# MITO_TODO_PLACEHOLDER"
+unified_diff_instrucrions = f"""
+RESPONSE FORMAT: Return the changes you want to make to the streamlit app as a **unified diff (git-style patch)**:
+- Begin with a ````unified_diff` header and a ```` end header.
+- Then, include the standard header lines `--- a/app.py` and `+++ b/app.py`.
+- Show only the modified hunks; each hunk must start with an `@@` header with line numbers.
+- Within each hunk:
+  * Unchanged context lines start with a single space.
+  * Removed lines start with `-`.
+  * Added lines start with `+`.
+- If there are **no changes**, return an empty string.
+- Do not include the line numbers in your response.
+**IMPORTANT: For the hunk header, use `@@ -START_LINE,1 +START_LINE,1 @@` where we always use 1 as the count value. In a later step, the system will automatically calculate the correct counts.**
+<Example Response>
+In the example below, assume that the line of code `data_list = [` is on line 57 of the existing streamlit app.
+```unified_diff
+--- a/app.py
++++ b/app.py
+@@ -57,1 +57,1 @@
+ data_list = [
+     {{'id': 1, 'name': 'Item A', 'category': 'Type 1', 'value': 100}},
+     {{'id': 2, 'name': 'Item B', 'category': 'Type 2', 'value': 200}},
+-    {MITO_TODO_PLACEHOLDER}: Add remaining entries from notebook
++    {{'id': 3, 'name': 'Item C', 'category': 'Type 3', 'value': 300}},
++    {{'id': 4, 'name': 'Item D', 'category': 'Type 4', 'value': 400}},
++    {{'id': 5, 'name': 'Item E', 'category': 'Type 5', 'value': 500}},
++    {{'id': 6, 'name': 'Item F', 'category': 'Type 6', 'value': 600}},
++    {{'id': 7, 'name': 'Item G', 'category': 'Type 7', 'value': 700}},
++    {{'id': 8, 'name': 'Item H', 'category': 'Type 8', 'value': 800}},
++    {{'id': 9, 'name': 'Item I', 'category': 'Type 9', 'value': 900}},
++    {{'id': 10, 'name': 'Item J', 'category': 'Type 10', 'value': 1000}},
++    {{'id': 11, 'name': 'Item K', 'category': 'Type 11', 'value': 1100}},
++    {{'id': 12, 'name': 'Item L', 'category': 'Type 12', 'value': 1200}},
++    {{'id': 13, 'name': 'Item M', 'category': 'Type 13', 'value': 1300}},
++    {{'id': 14, 'name': 'Item N', 'category': 'Type 14', 'value': 1400}},
++    {{'id': 15, 'name': 'Item O', 'category': 'Type 15', 'value': 1500}},
++    {{'id': 16, 'name': 'Item P', 'category': 'Type 16', 'value': 1600}},
++    {{'id': 17, 'name': 'Item Q', 'category': 'Type 17', 'value': 1700}},
++    {{'id': 18, 'name': 'Item R', 'category': 'Type 18', 'value': 1800}},
++    {{'id': 19, 'name': 'Item S', 'category': 'Type 19', 'value': 1900}},
++    {{'id': 20, 'name': 'Item T', 'category': 'Type 20', 'value': 2000}},
++    {{'id': 21, 'name': 'Item U', 'category': 'Type 21', 'value': 2100}},
++    {{'id': 22, 'name': 'Item V', 'category': 'Type 22', 'value': 2200}},
++    {{'id': 23, 'name': 'Item W', 'category': 'Type 23', 'value': 2300}},
++    {{'id': 24, 'name': 'Item X', 'category': 'Type 24', 'value': 2400}},
++    {{'id': 25, 'name': 'Item Y', 'category': 'Type 25', 'value': 2500}}
+```
+</Example Response>
+Your response must consist **only** of valid unified-diff block.
+"""

mito_ai/streamlit_conversion/prompts/prompt_utils.py ADDED Viewed

@@ -0,0 +1,10 @@
+# Copyright (c) Saga Inc.
+# Distributed under the terms of the GNU Affero General Public License v3.0 License.
+def add_line_numbers_to_code(code: str) -> str:
+    """Add line numbers to the code"""
+    code_with_line_numbers = ""
+    for i, line in enumerate(code.split('\n'), 1):
+        code_with_line_numbers += f"{i:3d}: {line}\n"
+    return code_with_line_numbers

mito-ai 0.1.37__py3-none-any.whl → 0.1.39__py3-none-any.whl

Potentially problematic release.

mito-ai 0.1.37py3-none-any.whl → 0.1.39py3-none-any.whl