mito-ai 0.1.38__py3-none-any.whl → 0.1.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mito-ai might be problematic. Click here for more details.

Files changed (47) hide show
  1. mito_ai/__init__.py +8 -0
  2. mito_ai/_version.py +1 -1
  3. mito_ai/app_builder/handlers.py +16 -11
  4. mito_ai/completions/handlers.py +1 -1
  5. mito_ai/completions/prompt_builders/agent_system_message.py +18 -45
  6. mito_ai/completions/prompt_builders/chat_name_prompt.py +6 -6
  7. mito_ai/openai_client.py +1 -1
  8. mito_ai/streamlit_conversion/agent_utils.py +116 -0
  9. mito_ai/streamlit_conversion/prompts/prompt_constants.py +59 -0
  10. mito_ai/streamlit_conversion/prompts/prompt_utils.py +10 -0
  11. mito_ai/streamlit_conversion/prompts/streamlit_app_creation_prompt.py +45 -0
  12. mito_ai/streamlit_conversion/prompts/streamlit_error_correction_prompt.py +28 -0
  13. mito_ai/streamlit_conversion/prompts/streamlit_finish_todo_prompt.py +44 -0
  14. mito_ai/streamlit_conversion/streamlit_agent_handler.py +72 -42
  15. mito_ai/streamlit_conversion/streamlit_system_prompt.py +19 -17
  16. mito_ai/streamlit_conversion/streamlit_utils.py +43 -5
  17. mito_ai/streamlit_conversion/validate_streamlit_app.py +116 -0
  18. mito_ai/streamlit_preview/handlers.py +7 -4
  19. mito_ai/tests/streamlit_conversion/test_streamlit_agent_handler.py +153 -66
  20. mito_ai/tests/streamlit_conversion/test_validate_streamlit_app.py +119 -0
  21. mito_ai/tests/utils/test_anthropic_utils.py +2 -2
  22. mito_ai/utils/anthropic_utils.py +4 -4
  23. mito_ai/utils/open_ai_utils.py +0 -4
  24. {mito_ai-0.1.38.data → mito_ai-0.1.40.data}/data/share/jupyter/labextensions/mito_ai/build_log.json +1 -1
  25. {mito_ai-0.1.38.data → mito_ai-0.1.40.data}/data/share/jupyter/labextensions/mito_ai/package.json +2 -2
  26. {mito_ai-0.1.38.data → mito_ai-0.1.40.data}/data/share/jupyter/labextensions/mito_ai/schemas/mito_ai/package.json.orig +1 -1
  27. mito_ai-0.1.38.data/data/share/jupyter/labextensions/mito_ai/static/lib_index_js.5d1d7c234e2dc7c9d97b.js → mito_ai-0.1.40.data/data/share/jupyter/labextensions/mito_ai/static/lib_index_js.55d9f8ca386d87856d2d.js +411 -78
  28. mito_ai-0.1.40.data/data/share/jupyter/labextensions/mito_ai/static/lib_index_js.55d9f8ca386d87856d2d.js.map +1 -0
  29. mito_ai-0.1.38.data/data/share/jupyter/labextensions/mito_ai/static/remoteEntry.bcce4ea34631acf6dbbe.js → mito_ai-0.1.40.data/data/share/jupyter/labextensions/mito_ai/static/remoteEntry.264103d9addd1e166113.js +3 -3
  30. mito_ai-0.1.38.data/data/share/jupyter/labextensions/mito_ai/static/remoteEntry.bcce4ea34631acf6dbbe.js.map → mito_ai-0.1.40.data/data/share/jupyter/labextensions/mito_ai/static/remoteEntry.264103d9addd1e166113.js.map +1 -1
  31. {mito_ai-0.1.38.dist-info → mito_ai-0.1.40.dist-info}/METADATA +4 -1
  32. {mito_ai-0.1.38.dist-info → mito_ai-0.1.40.dist-info}/RECORD +44 -38
  33. mito_ai/streamlit_conversion/validate_and_run_streamlit_code.py +0 -208
  34. mito_ai/tests/streamlit_conversion/test_validate_and_run_streamlit_code.py +0 -418
  35. mito_ai-0.1.38.data/data/share/jupyter/labextensions/mito_ai/static/lib_index_js.5d1d7c234e2dc7c9d97b.js.map +0 -1
  36. {mito_ai-0.1.38.data → mito_ai-0.1.40.data}/data/etc/jupyter/jupyter_server_config.d/mito_ai.json +0 -0
  37. {mito_ai-0.1.38.data → mito_ai-0.1.40.data}/data/share/jupyter/labextensions/mito_ai/schemas/mito_ai/toolbar-buttons.json +0 -0
  38. {mito_ai-0.1.38.data → mito_ai-0.1.40.data}/data/share/jupyter/labextensions/mito_ai/static/style.js +0 -0
  39. {mito_ai-0.1.38.data → mito_ai-0.1.40.data}/data/share/jupyter/labextensions/mito_ai/static/style_index_js.5876024bb17dbd6a3ee6.js +0 -0
  40. {mito_ai-0.1.38.data → mito_ai-0.1.40.data}/data/share/jupyter/labextensions/mito_ai/static/style_index_js.5876024bb17dbd6a3ee6.js.map +0 -0
  41. {mito_ai-0.1.38.data → mito_ai-0.1.40.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_semver_index_js.9795f79265ddb416864b.js +0 -0
  42. {mito_ai-0.1.38.data → mito_ai-0.1.40.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_semver_index_js.9795f79265ddb416864b.js.map +0 -0
  43. {mito_ai-0.1.38.data → mito_ai-0.1.40.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_vscode-diff_dist_index_js.ea55f1f9346638aafbcf.js +0 -0
  44. {mito_ai-0.1.38.data → mito_ai-0.1.40.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_vscode-diff_dist_index_js.ea55f1f9346638aafbcf.js.map +0 -0
  45. {mito_ai-0.1.38.dist-info → mito_ai-0.1.40.dist-info}/WHEEL +0 -0
  46. {mito_ai-0.1.38.dist-info → mito_ai-0.1.40.dist-info}/entry_points.txt +0 -0
  47. {mito_ai-0.1.38.dist-info → mito_ai-0.1.40.dist-info}/licenses/LICENSE +0 -0
mito_ai/__init__.py CHANGED
@@ -14,6 +14,14 @@ from mito_ai.settings.urls import get_settings_urls
14
14
  from mito_ai.rules.urls import get_rules_urls
15
15
  from mito_ai.auth.urls import get_auth_urls
16
16
  from mito_ai.streamlit_preview.urls import get_streamlit_preview_urls
17
+
18
+ # Sometimes matplotlib figures do not show up in the notebook with this warning:
19
+ # UserWarning: FigureCanvasAgg is non-interactive, and thus cannot be shown
20
+ # I believe that streamlit is reconfiguring the matplotlib settings and this is happening as a result.
21
+ # For now, we just set the backend to inline, so that the figures show up again
22
+ import os
23
+ os.environ['MPLBACKEND'] = 'inline'
24
+
17
25
  try:
18
26
  from _version import __version__
19
27
  except ImportError:
mito_ai/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # This file is auto-generated by Hatchling. As such, do not:
2
2
  # - modify
3
3
  # - track in version control e.g. be sure to add to .gitignore
4
- __version__ = VERSION = '0.1.38'
4
+ __version__ = VERSION = '0.1.40'
@@ -4,7 +4,7 @@
4
4
  import os
5
5
  import time
6
6
  import logging
7
- from typing import Any, Union
7
+ from typing import Any, Union, Optional
8
8
  import zipfile
9
9
  import tempfile
10
10
  from mito_ai.utils.create import initialize_user
@@ -147,11 +147,16 @@ class AppBuilderHandler(BaseWebSocketHandler):
147
147
  self.log.info("JWT token validation successful")
148
148
 
149
149
  try:
150
-
151
150
  notebook_path = str(notebook_path) if notebook_path else ""
152
- success_flag, app_path, result_message = await streamlit_handler(notebook_path)
153
- if not success_flag or app_path is None:
154
- raise Exception(result_message)
151
+
152
+ app_directory = os.path.dirname(notebook_path)
153
+ app_path = os.path.join(app_directory, "app.py")
154
+
155
+ if not os.path.exists(app_path):
156
+ success_flag, app_path_result, result_message = await streamlit_handler(notebook_path)
157
+ if not success_flag or app_path_result is None:
158
+ raise Exception(result_message)
159
+ app_path = app_path_result
155
160
 
156
161
  deploy_url = await self._deploy_app(app_path, jwt_token)
157
162
 
@@ -271,15 +276,15 @@ class AppBuilderHandler(BaseWebSocketHandler):
271
276
  except requests.exceptions.RequestException as e:
272
277
  self.log.error(f"Error during API request: {e}")
273
278
  if hasattr(e, 'response') and e.response is not None:
274
- try:
275
- error_detail = e.response.json()
276
- self.log.error(f"Server error details: {error_detail}")
277
- except:
278
- self.log.error(f"Server response: {e.response.text}")
279
- raise Exception(f"Deployment failed: {str(e)}")
279
+ error_detail = e.response.json()
280
+ self.log.error(f"Server error details: {error_detail}")
281
+ if 'error' in error_detail:
282
+ raise Exception(error_detail['error'])
283
+ raise
280
284
  except Exception as e:
281
285
  self.log.error(f"Error during deployment: {str(e)}")
282
286
  raise
287
+ raise RuntimeError("Unexpected error in _deploy_app")
283
288
 
284
289
  async def _upload_app_to_s3(self, app_path: str, presigned_url: str) -> requests.Response:
285
290
  """Upload the app to S3 using the presigned URL."""
@@ -46,7 +46,7 @@ from mito_ai.completions.completion_handlers.agent_execution_handler import get_
46
46
  from mito_ai.completions.completion_handlers.agent_auto_error_fixup_handler import get_agent_auto_error_fixup_completion
47
47
  from mito_ai.utils.telemetry_utils import identify
48
48
 
49
- FALLBACK_MODEL = "gpt-4.1" # Default model to use for safety
49
+ FALLBACK_MODEL = "gpt-5" # Default model to use for safety
50
50
 
51
51
  # The GlobalMessageHistory is responsible for updating the message histories stored in the .mito/ai-chats directory.
52
52
  # We create one GlobalMessageHistory per backend server instance instead of one per websocket connection so that the
@@ -52,9 +52,6 @@ Format:
52
52
  code_summary: str
53
53
  cell_type: 'code' | 'markdown'
54
54
  }}
55
- get_cell_output_cell_id: None,
56
- next_steps: None,
57
- analysis_assumptions: None
58
55
  }}
59
56
 
60
57
  Important information:
@@ -64,7 +61,7 @@ Important information:
64
61
  4. The code_summary must be a very short phrase (1–5 words maximum) that begins with a verb ending in "-ing" (e.g., "Loading data", "Filtering rows", "Calculating average", "Plotting revenue"). Avoid full sentences or explanations—this should read like a quick commit message or code label, not a description.
65
62
  5. Important: Only use the CELL_UPDATE tool if you want to add/modify a notebook cell in response to the user's request. If the user is just sending you a friendly greeting or asking you a question about yourself, you SHOULD NOT USE A CELL_UPDATE tool because it does not require modifying the notebook. Instead, just use the FINISHED_TASK response.
66
63
  6. The assumptions is an optional list of critical assumptions that you made about the data or analysis approach. The assumptions you list here will be displayed to the user so that they can confirm or correct the assumptions. For example: ["NaN values in the impressions column represent 0 impressions", "Only crashes with pedestrian or cyclist fatalities are considered fatal crashes", "Intervention priority combines both volume and severity to identify maximum impact opportunities"].
67
- 7. Only include important data and analytical assumptions that if incorrect would fundamentally change your analysis conclusions. These should be data handling decisions, methodological choices, and definitional boundaries. Do not include: obvious statements ("Each record is counted once"), result interpretation guidance ("Gaps in the plot represent zero values"), display choices ("Data is sorted for clarity"), internal reasoning ("Bar chart is better than line plot"), or environment assumptions ("Library X is installed"). Prioritize quality over quantity - include only the most critical assumptions or None if there are no critical assumptions made in this step that have not already be shared with the user. If you ever doubt whether an assumption is critical enough to be shared with the user as an assumption, don't include it. Most messages should not include an assumption.
64
+ 7. Only include important data and analytical assumptions that if incorrect would fundamentally change your analysis conclusions. These should be data handling decisions, methodological choices, and definitional boundaries. Do not include: obvious statements ("Each record is counted once"), result interpretation guidance ("Gaps in the plot represent zero values"), display choices ("Data is sorted for clarity"), internal reasoning ("Bar chart is better than line plot"), or environment assumptions ("Library X is installed"). Prioritize quality over quantity - include only the most critical assumptions or omit the field entirely if there are no critical assumptions made in this step that have not already be shared with the user. If you ever doubt whether an assumption is critical enough to be shared with the user as an assumption, don't include it. Most messages should not include an assumption.
68
65
  8. Do not include the same assumption or variations of the same assumption multiple times in the same conversation. Once you have presented the assumption to the user, they will already have the opportunity to confirm or correct it so do not include it again.
69
66
 
70
67
  #### Cell Addition:
@@ -81,9 +78,6 @@ Format:
81
78
  code_summary: str
82
79
  cell_type: 'code' | 'markdown'
83
80
  }}
84
- get_cell_output_cell_id: None,
85
- next_steps: None,
86
- analysis_assumptions: None
87
81
  }}
88
82
 
89
83
  Important information:
@@ -93,7 +87,7 @@ Important information:
93
87
  4. code_summary must be a very short phrase (1–5 words maximum) that begins with a verb ending in "-ing" (e.g., "Loading data", "Filtering rows", "Calculating average", "Plotting revenue"). Avoid full sentences or explanations—this should read like a quick commit message or code label, not a description.
94
88
  5. The cell_type should only be 'markdown' if there is no code to add. There may be times where the code has comments. These are still code cells and should have the cell_type 'code'. Any cells that are labeled 'markdown' will be converted to markdown cells by the user.
95
89
  6. The assumptions is an optional list of critical assumptions that you made about the data or analysis approach. The assumptions you list here will be displayed to the user so that they can confirm or correct the assumptions. For example: ["NaN values in the impressions column represent 0 impressions", "Only crashes with pedestrian or cyclist fatalities are considered fatal crashes", "Intervention priority combines both volume and severity to identify maximum impact opportunities"].
96
- 7. Only include important data and analytical assumptions that if incorrect would fundamentally change your analysis conclusions. These should be data handling decisions, methodological choices, and definitional boundaries. Do not include: obvious statements ("Each record is counted once"), result interpretation guidance ("Gaps in the plot represent zero values"), display choices ("Data is sorted for clarity"), internal reasoning ("Bar chart is better than line plot"), or environment assumptions ("Library X is installed"). Prioritize quality over quantity - include only the most critical assumptions or None if there are no critical assumptions made in this step that have not already be shared with the user. If you ever doubt whether an assumption is critical enough to be shared with the user as an assumption, don't include it. Most messages should not include an assumption.
90
+ 7. Only include important data and analytical assumptions that if incorrect would fundamentally change your analysis conclusions. These should be data handling decisions, methodological choices, and definitional boundaries. Do not include: obvious statements ("Each record is counted once"), result interpretation guidance ("Gaps in the plot represent zero values"), display choices ("Data is sorted for clarity"), internal reasoning ("Bar chart is better than line plot"), or environment assumptions ("Library X is installed"). Prioritize quality over quantity - include only the most critical assumptions or omit the field entirely if there are no critical assumptions made in this step that have not already be shared with the user. If you ever doubt whether an assumption is critical enough to be shared with the user as an assumption, don't include it. Most messages should not include an assumption.
97
91
  8. Do not include the same assumption or variations of the same assumption multiple times in the same conversation. Once you have presented the assumption to the user, they will already have the opportunity to confirm or correct it so do not include it again.
98
92
 
99
93
  <Cell Modification Example>
@@ -133,17 +127,14 @@ Convert the transaction_date column to datetime and then multiply the total_pric
133
127
  Output:
134
128
  {{
135
129
  type: 'cell_update',
136
- cell_type: 'code',
130
+ message: "I'll convert the transaction_date column to datetime and multiply total_price by the multiplier.",
137
131
  cell_update: {{
138
- type: 'modification'
132
+ type: 'modification',
139
133
  id: 'c68fdf19-db8c-46dd-926f-d90ad35bb3bc',
140
134
  code: "import pandas as pd\\nsales_df = pd.read_csv('./sales.csv')\\nloan_multiplier = 1.5\\nsales_df['transaction_date'] = pd.to_datetime(sales_df['transaction_date'])\\nsales_df['total_price'] = sales_df['total_price'] * sales_multiplier",
141
135
  code_summary: "Converting the transaction_date column",
142
136
  cell_type: 'code'
143
- }},
144
- get_cell_output_cell_id: None,
145
- next_steps: None,
146
- analysis_assumptions: None
137
+ }}
147
138
  }}
148
139
 
149
140
  </Cell Modification Example>
@@ -184,17 +175,14 @@ Graph the total_price for each sale
184
175
  Output:
185
176
  {{
186
177
  type: 'cell_update',
187
- message: "I'll create a graph with using matplotlib with sale `index` on the x axis and `total_price` on the y axis.",
178
+ message: "I'll create a graph using matplotlib with sale index on the x axis and total_price on the y axis.",
188
179
  cell_update: {{
189
- type: 'add'
190
- index: 2
191
- code: "import matplotlib.pyplot as plt\n\nplt.bar(sales_df.index, sales_df['total_price'])\nplt.title('Total Price per Sale')\nplt.xlabel('Transaction Number')\nplt.ylabel('Sales Price ($)')\nplt.show()"
180
+ type: 'new',
181
+ index: 2,
182
+ code: "import matplotlib.pyplot as plt\n\nplt.bar(sales_df.index, sales_df['total_price'])\nplt.title('Total Price per Sale')\nplt.xlabel('Transaction Number')\nplt.ylabel('Sales Price ($)')\nplt.show()",
192
183
  code_summary: "Plotting total_price",
193
- code_summary: "Plotting total_price"
194
- }},
195
- get_cell_output_cell_id: None,
196
- next_steps: None,
197
- analysis_assumptions: None
184
+ cell_type: 'code'
185
+ }}
198
186
  }}
199
187
 
200
188
  </Cell Addition Example>
@@ -208,10 +196,7 @@ When you want to get a base64 encoded version of a cell's output, respond with t
208
196
  {{
209
197
  type: 'get_cell_output',
210
198
  message: str,
211
- get_cell_output_cell_id: str,
212
- cell_update: None,
213
- next_steps: Optional[List[str]],
214
- analysis_assumptions: Optional[List[str]]
199
+ get_cell_output_cell_id: str
215
200
  }}
216
201
 
217
202
  Important information:
@@ -228,10 +213,7 @@ When you have completed the user's task, respond with a message in this format:
228
213
  {{
229
214
  type: 'finished_task',
230
215
  message: str,
231
- get_cell_output_cell_id: None,
232
- cell_update: None,
233
- next_steps: Optional[List[str]],
234
- analysis_assumptions: None
216
+ next_steps: Optional[List[str]]
235
217
  }}
236
218
 
237
219
  Important information:
@@ -249,8 +231,6 @@ Important information:
249
231
  {{
250
232
  type: 'finished_task',
251
233
  message: "Revenue analysis complete: total sales reached $2.3M with 34% growth in Q4[MITO_CITATION:abc123:2-3], while premium products generated 67% of profit margins[MITO_CITATION:xyz456:5]. The customer segmentation workflow identified three distinct buying patterns driving conversion rates[MITO_CITATION:def456:8-12].",
252
- get_cell_output_cell_id: None,
253
- cell_update: None,
254
234
  next_steps: ["Graph sales by product category", "Identify seasonal patterns in data", "Find the top 3 performing products"]
255
235
  }}
256
236
 
@@ -263,11 +243,7 @@ User message: "Hi"
263
243
  Output:
264
244
  {{
265
245
  type: 'finished_task',
266
- message: "Hey there! I'm Mito AI. How can I help you today?",
267
- get_cell_output_cell_id: None,
268
- cell_update: None,
269
- next_steps: None,
270
- analysis_assumptions: None
246
+ message: "Hey there! I'm Mito AI. How can I help you today?"
271
247
  }}
272
248
 
273
249
  </Finished Task Example 2>
@@ -327,12 +303,11 @@ Output:
327
303
  type: 'cell_update',
328
304
  message: "I'll calculate two new variables all_time_high_date and all_time_high_price.",
329
305
  cell_update: {{
330
- type: 'add'
331
- index: 2
332
- code: "all_time_high_row_idx = tesla_stock_prices_df['closing_price'].idxmax()\nall_time_high_date = tesla_stock_prices_df.at[all_time_high_row_idx, 'Date']\nall_time_high_price = tesla_stock_prices_df.at[all_time_high_row_idx, 'closing_price']"
306
+ type: 'new',
307
+ index: 2,
308
+ code: "all_time_high_row_idx = tesla_stock_prices_df['closing_price'].idxmax()\nall_time_high_date = tesla_stock_prices_df.at[all_time_high_row_idx, 'Date']\nall_time_high_price = tesla_stock_prices_df.at[all_time_high_row_idx, 'closing_price']",
333
309
  code_summary: "Calculating all time high"
334
- }},
335
- get_cell_output_cell_id: None
310
+ }}
336
311
  }}
337
312
 
338
313
  ### User Message 2
@@ -379,8 +354,6 @@ Output:
379
354
  {{
380
355
  type: 'finished_task',
381
356
  message: "The all time high tesla stock closing price was $265.91 [MITO_CITATION:9c0d5fda-2b16-4f52-a1c5-a48892f3e2e8:2] on 2025-03-16 [MITO_CITATION:9c0d5fda-2b16-4f52-a1c5-a48892f3e2e8:1]",
382
- get_cell_output_cell_id: None,
383
- cell_update: None,
384
357
  next_steps: ["Create a visualization of Tesla's stock price over time", "Calculate the percentage change from the lowest to highest price", "Analyze the volatility of Tesla's stock"]
385
358
  }}
386
359
 
@@ -3,13 +3,13 @@
3
3
 
4
4
  def create_chat_name_prompt(user_message: str, assistant_message: str) -> str:
5
5
  prompt = f"""Create a short name for the chat thread based on the first user message
6
- and the first LLM response. Reply ONLY with the short title (max 40 chars). Don't add any extra text.
7
-
8
- Don't include that its a Python project in the chat.
6
+ and the first LLM response. Reply ONLY with the short title (max 40 chars). Don't add any extra text.
7
+
8
+ Don't include that its a Python project in the chat.
9
9
 
10
- User Message: {user_message}
10
+ User Message: {user_message}
11
11
 
12
- Assistant Message: {assistant_message}
13
- """
12
+ Assistant Message: {assistant_message}
13
+ """
14
14
 
15
15
  return prompt
mito_ai/openai_client.py CHANGED
@@ -35,7 +35,7 @@ from mito_ai.utils.telemetry_utils import (
35
35
  USER_KEY,
36
36
  )
37
37
 
38
- OPENAI_MODEL_FALLBACK = "gpt-4.1"
38
+ OPENAI_MODEL_FALLBACK = "gpt-5"
39
39
 
40
40
  class OpenAIClient(LoggingConfigurable):
41
41
  """Provide AI feature through OpenAI services."""
@@ -0,0 +1,116 @@
1
+ # Copyright (c) Saga Inc.
2
+ # Distributed under the terms of the GNU Affero General Public License v3.0 License.
3
+
4
+ from typing import List
5
+ import re
6
+ from unidiff import PatchSet
7
+ from mito_ai.streamlit_conversion.prompts.prompt_constants import MITO_TODO_PLACEHOLDER
8
+
9
+ def extract_todo_placeholders(agent_response: str) -> List[str]:
10
+ """Extract TODO placeholders from the agent's response"""
11
+ return [line.strip() for line in agent_response.split('\n') if MITO_TODO_PLACEHOLDER in line]
12
+
13
+
14
+ def apply_patch_to_text(text: str, diff: str) -> str:
15
+ """
16
+ Apply a *unified-diff* (git-style) patch to the given text and return
17
+ the updated contents.
18
+
19
+ Parameters
20
+ ----------
21
+ text : str
22
+ The original file contents.
23
+ diff : str
24
+ A unified diff that transforms *text* into the desired output.
25
+ The diff must reference exactly one file (the Streamlit app).
26
+
27
+ Returns
28
+ -------
29
+ str
30
+ The patched contents.
31
+
32
+ Raises
33
+ ------
34
+ ValueError
35
+ If the patch cannot be applied or references more than one file.
36
+ """
37
+ # Nothing to do
38
+ if not diff.strip():
39
+ return text
40
+
41
+ # Parse the patch
42
+ patch = PatchSet(diff.splitlines(keepends=True))
43
+
44
+ # We expect a single-file patch (what the prompt asks the model to emit)
45
+ if len(patch) != 1:
46
+ raise ValueError(
47
+ f"Expected a patch for exactly one file, got {len(patch)} files."
48
+ )
49
+
50
+ file_patch = patch[0]
51
+
52
+ original_lines = text.splitlines(keepends=True)
53
+ result_lines: List[str] = []
54
+
55
+ cursor = 0 # index in original_lines (0-based)
56
+
57
+ for hunk in file_patch:
58
+ # Copy unchanged lines before this hunk
59
+ while cursor < hunk.source_start - 1:
60
+ result_lines.append(original_lines[cursor])
61
+ cursor += 1
62
+
63
+ # Apply hunk line-by-line
64
+ for line in hunk:
65
+ if line.is_context:
66
+ result_lines.append(original_lines[cursor])
67
+ cursor += 1
68
+ elif line.is_removed:
69
+ cursor += 1 # Skip this line from the original
70
+ elif line.is_added:
71
+ # Ensure added line ends with newline for consistency
72
+ val = line.value
73
+ if not val.endswith("\n"):
74
+ val += "\n"
75
+ result_lines.append(val)
76
+
77
+ # Copy any remaining lines after the last hunk
78
+ result_lines.extend(original_lines[cursor:])
79
+
80
+ return "".join(result_lines)
81
+
82
+
83
+ def fix_diff_headers(diff: str) -> str:
84
+ """
85
+ The AI is generally not very good at counting the number of lines in the diff. If the hunk header has
86
+ an incorrect count, then the patch will fail. So instead we just calculate the counts ourselves, its deterministic.
87
+ """
88
+ lines = diff.split('\n')
89
+
90
+ for i, line in enumerate(lines):
91
+ if line.startswith('@@'):
92
+ # Extract the starting line numbers
93
+ match = re.match(r'@@ -(\d+),\d+ \+(\d+),\d+ @@', line)
94
+ if match:
95
+ old_start = match.group(1)
96
+ new_start = match.group(2)
97
+
98
+ # Count lines in this hunk
99
+ old_count = 0
100
+ new_count = 0
101
+
102
+ for j in range(i + 1, len(lines)):
103
+ next_line = lines[j]
104
+ if next_line.startswith('@@') or next_line.startswith('---'):
105
+ break
106
+ if next_line.startswith(' ') or next_line.startswith('-'):
107
+ old_count += 1
108
+ if next_line.startswith(' ') or next_line.startswith('+'):
109
+ new_count += 1
110
+
111
+ # Replace the header with correct counts
112
+ lines[i] = f"@@ -{old_start},{old_count} +{new_start},{new_count} @@"
113
+
114
+ return '\n'.join(lines)
115
+
116
+
@@ -0,0 +1,59 @@
1
+ # Copyright (c) Saga Inc.
2
+ # Distributed under the terms of the GNU Affero General Public License v3.0 License.
3
+
4
+ MITO_TODO_PLACEHOLDER = "# MITO_TODO_PLACEHOLDER"
5
+
6
+ unified_diff_instrucrions = f"""
7
+ RESPONSE FORMAT: Return the changes you want to make to the streamlit app as a **unified diff (git-style patch)**:
8
+ - Begin with a ````unified_diff` header and a ```` end header.
9
+ - Then, include the standard header lines `--- a/app.py` and `+++ b/app.py`.
10
+ - Show only the modified hunks; each hunk must start with an `@@` header with line numbers.
11
+ - Within each hunk:
12
+ * Unchanged context lines start with a single space.
13
+ * Removed lines start with `-`.
14
+ * Added lines start with `+`.
15
+ - If there are **no changes**, return an empty string.
16
+ - Do not include the line numbers in your response.
17
+
18
+ **IMPORTANT: For the hunk header, use `@@ -START_LINE,1 +START_LINE,1 @@` where we always use 1 as the count value. In a later step, the system will automatically calculate the correct counts.**
19
+
20
+ <Example Response>
21
+
22
+ In the example below, assume that the line of code `data_list = [` is on line 57 of the existing streamlit app.
23
+
24
+ ```unified_diff
25
+ --- a/app.py
26
+ +++ b/app.py
27
+ @@ -57,1 +57,1 @@
28
+ data_list = [
29
+ {{'id': 1, 'name': 'Item A', 'category': 'Type 1', 'value': 100}},
30
+ {{'id': 2, 'name': 'Item B', 'category': 'Type 2', 'value': 200}},
31
+ - {MITO_TODO_PLACEHOLDER}: Add remaining entries from notebook
32
+ + {{'id': 3, 'name': 'Item C', 'category': 'Type 3', 'value': 300}},
33
+ + {{'id': 4, 'name': 'Item D', 'category': 'Type 4', 'value': 400}},
34
+ + {{'id': 5, 'name': 'Item E', 'category': 'Type 5', 'value': 500}},
35
+ + {{'id': 6, 'name': 'Item F', 'category': 'Type 6', 'value': 600}},
36
+ + {{'id': 7, 'name': 'Item G', 'category': 'Type 7', 'value': 700}},
37
+ + {{'id': 8, 'name': 'Item H', 'category': 'Type 8', 'value': 800}},
38
+ + {{'id': 9, 'name': 'Item I', 'category': 'Type 9', 'value': 900}},
39
+ + {{'id': 10, 'name': 'Item J', 'category': 'Type 10', 'value': 1000}},
40
+ + {{'id': 11, 'name': 'Item K', 'category': 'Type 11', 'value': 1100}},
41
+ + {{'id': 12, 'name': 'Item L', 'category': 'Type 12', 'value': 1200}},
42
+ + {{'id': 13, 'name': 'Item M', 'category': 'Type 13', 'value': 1300}},
43
+ + {{'id': 14, 'name': 'Item N', 'category': 'Type 14', 'value': 1400}},
44
+ + {{'id': 15, 'name': 'Item O', 'category': 'Type 15', 'value': 1500}},
45
+ + {{'id': 16, 'name': 'Item P', 'category': 'Type 16', 'value': 1600}},
46
+ + {{'id': 17, 'name': 'Item Q', 'category': 'Type 17', 'value': 1700}},
47
+ + {{'id': 18, 'name': 'Item R', 'category': 'Type 18', 'value': 1800}},
48
+ + {{'id': 19, 'name': 'Item S', 'category': 'Type 19', 'value': 1900}},
49
+ + {{'id': 20, 'name': 'Item T', 'category': 'Type 20', 'value': 2000}},
50
+ + {{'id': 21, 'name': 'Item U', 'category': 'Type 21', 'value': 2100}},
51
+ + {{'id': 22, 'name': 'Item V', 'category': 'Type 22', 'value': 2200}},
52
+ + {{'id': 23, 'name': 'Item W', 'category': 'Type 23', 'value': 2300}},
53
+ + {{'id': 24, 'name': 'Item X', 'category': 'Type 24', 'value': 2400}},
54
+ + {{'id': 25, 'name': 'Item Y', 'category': 'Type 25', 'value': 2500}}
55
+ ```
56
+ </Example Response>
57
+
58
+ Your response must consist **only** of valid unified-diff block.
59
+ """
@@ -0,0 +1,10 @@
1
+ # Copyright (c) Saga Inc.
2
+ # Distributed under the terms of the GNU Affero General Public License v3.0 License.
3
+
4
+ def add_line_numbers_to_code(code: str) -> str:
5
+ """Add line numbers to the code"""
6
+ code_with_line_numbers = ""
7
+ for i, line in enumerate(code.split('\n'), 1):
8
+ code_with_line_numbers += f"{i:3d}: {line}\n"
9
+
10
+ return code_with_line_numbers
@@ -0,0 +1,45 @@
1
+ # Copyright (c) Saga Inc.
2
+ # Distributed under the terms of the GNU Affero General Public License v3.0 License.
3
+
4
+ from mito_ai.streamlit_conversion.prompts.prompt_constants import MITO_TODO_PLACEHOLDER
5
+
6
+ def get_streamlit_app_creation_prompt(notebook: dict) -> str:
7
+ """
8
+ This prompt is used to create a streamlit app from a notebook.
9
+ """
10
+ return f"""Convert the following Jupyter notebook into a Streamlit application.
11
+
12
+ GOAL: Create a complete, runnable Streamlit app that accurately represents the notebook. It must completely convert the notebook.
13
+
14
+ TODO PLACEHOLDER RULES:
15
+ If you decide to leave any TODOs, you must mark them with {MITO_TODO_PLACEHOLDER}. You should use {MITO_TODO_PLACEHOLDER} instead of comments like the following:
16
+ - # ... (include all mappings from the notebook)
17
+ - # ... (include all violation codes from the notebook)
18
+ - # Fill in the rest of the code here
19
+ - # TODO: Add more code here
20
+ - # TODO: Add the visualization code here
21
+
22
+ For each TODO, use this exact format:
23
+ {MITO_TODO_PLACEHOLDER}: <specific description of what needs to be added>
24
+
25
+ IMPORTANT:
26
+ - The app must still be RUNNABLE even with placeholders
27
+ - Include enough sample data to show the structure
28
+ - Do NOT use placeholders for small/medium content - include it directly
29
+ - Do NOT use placeholders for file paths, imports, or core logic
30
+ - Only use placeholders when absolutely necessary. Add all of the content directly as much as possible.
31
+
32
+ <Example>
33
+ If the notebook has a list of dictionaries with 50 entries, you would write:
34
+
35
+ data = [
36
+ {{'id': 1, 'name': 'Item A', 'category': 'Type 1', 'value': 100}},
37
+ {{'id': 2, 'name': 'Item B', 'category': 'Type 2', 'value': 200}},
38
+ {MITO_TODO_PLACEHOLDER}: Add remaining entries from the data list
39
+ ]
40
+ </Example>
41
+
42
+ Notebook to convert:
43
+
44
+ {notebook}
45
+ """
@@ -0,0 +1,28 @@
1
+ # Copyright (c) Saga Inc.
2
+ # Distributed under the terms of the GNU Affero General Public License v3.0 License.
3
+
4
+ from mito_ai.streamlit_conversion.prompts.prompt_constants import unified_diff_instrucrions
5
+ from mito_ai.streamlit_conversion.prompts.prompt_utils import add_line_numbers_to_code
6
+
7
+ def get_streamlit_error_correction_prompt(error: str, streamlit_app_code: str) -> str:
8
+
9
+ existing_streamlit_app_code_with_line_numbers = add_line_numbers_to_code(streamlit_app_code)
10
+
11
+ return f"""You've created a Streamlit app, but it has an error in it when you try to run it.
12
+
13
+ Your job is to fix the error now. Only fix the specific error that you are instructed to fix now. Do not fix other error that that you anticipate. You will be asked to fix other errors later.
14
+
15
+ {unified_diff_instrucrions}
16
+
17
+ ===============================================
18
+
19
+ EXISTING STREAMLIT APP:
20
+ {existing_streamlit_app_code_with_line_numbers}
21
+
22
+ ===============================================
23
+
24
+ Please create a unified diff that corrects this error. Please keep your fix concise:
25
+ {error}
26
+
27
+ """
28
+
@@ -0,0 +1,44 @@
1
+ # Copyright (c) Saga Inc.
2
+ # Distributed under the terms of the GNU Affero General Public License v3.0 License.
3
+
4
+ from mito_ai.streamlit_conversion.prompts.prompt_constants import MITO_TODO_PLACEHOLDER, unified_diff_instrucrions
5
+ from mito_ai.streamlit_conversion.prompts.prompt_utils import add_line_numbers_to_code
6
+
7
+ def get_finish_todo_prompt(notebook: dict, existing_streamlit_app_code: str, todo_placeholder: str) -> str:
8
+
9
+ existing_streamlit_app_code_with_line_numbers = add_line_numbers_to_code(existing_streamlit_app_code)
10
+
11
+ return f"""You've already created the first draft of a Streamlit app representation of a Jupyter notebook, but you left yourself some TODOs marked as `{MITO_TODO_PLACEHOLDER}`.
12
+
13
+ **CRITICAL COMPLETION REQUIREMENT:**
14
+ You have ONE and ONLY ONE opportunity to complete this TODO. If you do not finish the entire task completely, the application will be broken and unusable. This is your final chance to get it right.
15
+
16
+ **COMPLETION RULES:**
17
+ 1. **NEVER leave partial work** - If the TODO asks for a list with 100 items, provide ALL 100 items
18
+ 2. **NEVER use placeholders** - This is your only opportunity to fulfill this TODO, so do not leave yourself another TODO.
19
+ 3. **NEVER assume "good enough"** - Complete the task to 100% satisfaction
20
+ 4. **If the task seems large, that's exactly why it needs to be done now** - This is your only chance
21
+
22
+ **HOW TO DETERMINE IF TASK IS COMPLETE:**
23
+ - If building a list/dictionary: Include ALL items that should be in the final data structure
24
+ - If creating functions: Implement ALL required functionality
25
+ - If converting a visualization: Copy over ALL of the visualization code from the notebook, including all styling and formatting.
26
+
27
+ {unified_diff_instrucrions}
28
+
29
+ ===============================================
30
+
31
+ Input Notebook that you are converting into the Streamlit app:
32
+ {notebook}
33
+
34
+ ===============================================
35
+
36
+ EXISTING STREAMLIT APP:
37
+ {existing_streamlit_app_code_with_line_numbers}
38
+
39
+ ===============================================
40
+
41
+ Please make the changes for this TODO. Only focus on this one TODO right now. You will be asked to fix others later:
42
+ {todo_placeholder}
43
+
44
+ """