mito-ai 0.1.37__py3-none-any.whl → 0.1.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mito-ai might be problematic. Click here for more details.

Files changed (56) hide show
  1. mito_ai/__init__.py +17 -1
  2. mito_ai/_version.py +1 -1
  3. mito_ai/app_builder/handlers.py +43 -38
  4. mito_ai/app_builder/models.py +1 -1
  5. mito_ai/completions/handlers.py +1 -1
  6. mito_ai/completions/prompt_builders/agent_system_message.py +18 -45
  7. mito_ai/completions/prompt_builders/chat_name_prompt.py +6 -6
  8. mito_ai/log/handlers.py +10 -3
  9. mito_ai/log/urls.py +3 -3
  10. mito_ai/openai_client.py +1 -1
  11. mito_ai/streamlit_conversion/agent_utils.py +116 -0
  12. mito_ai/streamlit_conversion/prompts/prompt_constants.py +59 -0
  13. mito_ai/streamlit_conversion/prompts/prompt_utils.py +10 -0
  14. mito_ai/streamlit_conversion/prompts/streamlit_app_creation_prompt.py +45 -0
  15. mito_ai/streamlit_conversion/prompts/streamlit_error_correction_prompt.py +28 -0
  16. mito_ai/streamlit_conversion/prompts/streamlit_finish_todo_prompt.py +44 -0
  17. mito_ai/streamlit_conversion/streamlit_agent_handler.py +90 -44
  18. mito_ai/streamlit_conversion/streamlit_system_prompt.py +30 -17
  19. mito_ai/streamlit_conversion/streamlit_utils.py +48 -8
  20. mito_ai/streamlit_conversion/validate_streamlit_app.py +116 -0
  21. mito_ai/streamlit_preview/__init__.py +7 -0
  22. mito_ai/streamlit_preview/handlers.py +164 -0
  23. mito_ai/streamlit_preview/manager.py +159 -0
  24. mito_ai/streamlit_preview/urls.py +22 -0
  25. mito_ai/tests/streamlit_conversion/test_streamlit_agent_handler.py +166 -78
  26. mito_ai/tests/streamlit_conversion/test_streamlit_utils.py +4 -5
  27. mito_ai/tests/streamlit_conversion/test_validate_streamlit_app.py +119 -0
  28. mito_ai/tests/streamlit_preview/test_streamlit_preview_manager.py +302 -0
  29. mito_ai/tests/utils/test_anthropic_utils.py +2 -2
  30. mito_ai/utils/anthropic_utils.py +4 -4
  31. mito_ai/utils/open_ai_utils.py +0 -4
  32. mito_ai/utils/telemetry_utils.py +28 -1
  33. {mito_ai-0.1.37.data → mito_ai-0.1.39.data}/data/share/jupyter/labextensions/mito_ai/build_log.json +1 -1
  34. {mito_ai-0.1.37.data → mito_ai-0.1.39.data}/data/share/jupyter/labextensions/mito_ai/package.json +2 -2
  35. {mito_ai-0.1.37.data → mito_ai-0.1.39.data}/data/share/jupyter/labextensions/mito_ai/schemas/mito_ai/package.json.orig +1 -1
  36. {mito_ai-0.1.37.data → mito_ai-0.1.39.data}/data/share/jupyter/labextensions/mito_ai/schemas/mito_ai/toolbar-buttons.json +6 -1
  37. mito_ai-0.1.37.data/data/share/jupyter/labextensions/mito_ai/static/lib_index_js.831f63b48760c7119b9b.js → mito_ai-0.1.39.data/data/share/jupyter/labextensions/mito_ai/static/lib_index_js.16b532b655cd2906e04a.js +799 -116
  38. mito_ai-0.1.39.data/data/share/jupyter/labextensions/mito_ai/static/lib_index_js.16b532b655cd2906e04a.js.map +1 -0
  39. mito_ai-0.1.37.data/data/share/jupyter/labextensions/mito_ai/static/remoteEntry.93ecc9bc0edba61535cc.js → mito_ai-0.1.39.data/data/share/jupyter/labextensions/mito_ai/static/remoteEntry.606207904e6aaa42b1bf.js +5 -5
  40. mito_ai-0.1.37.data/data/share/jupyter/labextensions/mito_ai/static/remoteEntry.93ecc9bc0edba61535cc.js.map → mito_ai-0.1.39.data/data/share/jupyter/labextensions/mito_ai/static/remoteEntry.606207904e6aaa42b1bf.js.map +1 -1
  41. {mito_ai-0.1.37.dist-info → mito_ai-0.1.39.dist-info}/METADATA +4 -1
  42. {mito_ai-0.1.37.dist-info → mito_ai-0.1.39.dist-info}/RECORD +53 -42
  43. mito_ai/streamlit_conversion/validate_and_run_streamlit_code.py +0 -207
  44. mito_ai/tests/streamlit_conversion/test_validate_and_run_streamlit_code.py +0 -418
  45. mito_ai-0.1.37.data/data/share/jupyter/labextensions/mito_ai/static/lib_index_js.831f63b48760c7119b9b.js.map +0 -1
  46. {mito_ai-0.1.37.data → mito_ai-0.1.39.data}/data/etc/jupyter/jupyter_server_config.d/mito_ai.json +0 -0
  47. {mito_ai-0.1.37.data → mito_ai-0.1.39.data}/data/share/jupyter/labextensions/mito_ai/static/style.js +0 -0
  48. {mito_ai-0.1.37.data → mito_ai-0.1.39.data}/data/share/jupyter/labextensions/mito_ai/static/style_index_js.5876024bb17dbd6a3ee6.js +0 -0
  49. {mito_ai-0.1.37.data → mito_ai-0.1.39.data}/data/share/jupyter/labextensions/mito_ai/static/style_index_js.5876024bb17dbd6a3ee6.js.map +0 -0
  50. {mito_ai-0.1.37.data → mito_ai-0.1.39.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_semver_index_js.9795f79265ddb416864b.js +0 -0
  51. {mito_ai-0.1.37.data → mito_ai-0.1.39.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_semver_index_js.9795f79265ddb416864b.js.map +0 -0
  52. {mito_ai-0.1.37.data → mito_ai-0.1.39.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_vscode-diff_dist_index_js.ea55f1f9346638aafbcf.js +0 -0
  53. {mito_ai-0.1.37.data → mito_ai-0.1.39.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_vscode-diff_dist_index_js.ea55f1f9346638aafbcf.js.map +0 -0
  54. {mito_ai-0.1.37.dist-info → mito_ai-0.1.39.dist-info}/WHEEL +0 -0
  55. {mito_ai-0.1.37.dist-info → mito_ai-0.1.39.dist-info}/entry_points.txt +0 -0
  56. {mito_ai-0.1.37.dist-info → mito_ai-0.1.39.dist-info}/licenses/LICENSE +0 -0
mito_ai/__init__.py CHANGED
@@ -6,12 +6,22 @@ from jupyter_server.utils import url_path_join
6
6
  from mito_ai.completions.handlers import CompletionHandler
7
7
  from mito_ai.completions.providers import OpenAIProvider
8
8
  from mito_ai.app_builder.handlers import AppBuilderHandler
9
+ from mito_ai.streamlit_preview.handlers import StreamlitPreviewHandler
9
10
  from mito_ai.log.urls import get_log_urls
10
11
  from mito_ai.version_check import VersionCheckHandler
11
12
  from mito_ai.db.urls import get_db_urls
12
13
  from mito_ai.settings.urls import get_settings_urls
13
14
  from mito_ai.rules.urls import get_rules_urls
14
15
  from mito_ai.auth.urls import get_auth_urls
16
+ from mito_ai.streamlit_preview.urls import get_streamlit_preview_urls
17
+
18
+ # Sometimes matplotlib figures do not show up in the notebook with this warning:
19
+ # UserWarning: FigureCanvasAgg is non-interactive, and thus cannot be shown
20
+ # I believe that streamlit is reconfiguring the matplotlib settings and this is happening as a result.
21
+ # For now, we just set the backend to inline, so that the figures show up again
22
+ import os
23
+ os.environ['MPLBACKEND'] = 'inline'
24
+
15
25
  try:
16
26
  from _version import __version__
17
27
  except ImportError:
@@ -58,6 +68,11 @@ def _load_jupyter_server_extension(server_app) -> None: # type: ignore
58
68
  AppBuilderHandler,
59
69
  {}
60
70
  ),
71
+ (
72
+ url_path_join(base_url, "mito-ai", "streamlit-preview"),
73
+ StreamlitPreviewHandler,
74
+ {}
75
+ ),
61
76
  (
62
77
  url_path_join(base_url, "mito-ai", "version-check"),
63
78
  VersionCheckHandler,
@@ -69,8 +84,9 @@ def _load_jupyter_server_extension(server_app) -> None: # type: ignore
69
84
  handlers.extend(get_db_urls(base_url)) # type: ignore
70
85
  handlers.extend(get_settings_urls(base_url)) # type: ignore
71
86
  handlers.extend(get_rules_urls(base_url)) # type: ignore
72
- handlers.extend(get_log_urls(base_url)) # type: ignore
87
+ handlers.extend(get_log_urls(base_url, open_ai_provider.key_type)) # type: ignore
73
88
  handlers.extend(get_auth_urls(base_url)) # type: ignore
89
+ handlers.extend(get_streamlit_preview_urls(base_url)) # type: ignore
74
90
 
75
91
  web_app.add_handlers(host_pattern, handlers)
76
92
  server_app.log.info("Loaded the mito_ai server extension")
mito_ai/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # This file is auto-generated by Hatchling. As such, do not:
2
2
  # - modify
3
3
  # - track in version control e.g. be sure to add to .gitignore
4
- __version__ = VERSION = '0.1.37'
4
+ __version__ = VERSION = '0.1.39'
@@ -4,7 +4,7 @@
4
4
  import os
5
5
  import time
6
6
  import logging
7
- from typing import Any, Union
7
+ from typing import Any, Union, Optional
8
8
  import zipfile
9
9
  import tempfile
10
10
  from mito_ai.utils.create import initialize_user
@@ -13,6 +13,7 @@ from mito_ai.utils.websocket_base import BaseWebSocketHandler
13
13
  from mito_ai.app_builder.models import (
14
14
  BuildAppReply,
15
15
  AppBuilderError,
16
+ BuildAppRequest,
16
17
  ErrorMessage,
17
18
  MessageType
18
19
  )
@@ -74,7 +75,8 @@ class AppBuilderHandler(BaseWebSocketHandler):
74
75
 
75
76
  if message_type == MessageType.BUILD_APP.value:
76
77
  # Handle build app request
77
- await self._handle_build_app(parsed_message)
78
+ build_app_request = BuildAppRequest(**parsed_message)
79
+ await self._handle_build_app(build_app_request)
78
80
  else:
79
81
  self.log.error(f"Unknown message type: {message_type}")
80
82
  error = AppBuilderError(
@@ -98,25 +100,24 @@ class AppBuilderHandler(BaseWebSocketHandler):
98
100
  latency_ms = round((time.time() - start) * 1000)
99
101
  self.log.info(f"App builder handler processed in {latency_ms} ms.")
100
102
 
101
- async def _handle_build_app(self, message: dict) -> None:
103
+ async def _handle_build_app(self, message: BuildAppRequest) -> None:
102
104
  """Handle a build app request.
103
105
 
104
106
  Args:
105
107
  message: The parsed message.
106
108
  """
107
- message_id = message.get('message_id', '') # Default to empty string if not present
108
- notebook_path = message.get('notebook_path')
109
- app_path = message.get('app_path')
110
- jwt_token = message.get('jwt_token', '') # Extract JWT token from request, default to empty string
109
+ message_id = message.message_id
110
+ notebook_path = message.notebook_path
111
+ jwt_token = message.jwt_token
111
112
 
112
113
  if not message_id:
113
114
  self.log.error("Missing message_id in request")
114
115
  return
115
116
 
116
- if not app_path:
117
+ if not notebook_path:
117
118
  error = AppBuilderError(
118
119
  error_type="InvalidRequest",
119
- title="Missing 'path' parameter"
120
+ title="Missing 'notebook_path' parameter"
120
121
  )
121
122
  self.reply(BuildAppReply(
122
123
  parent_id=message_id,
@@ -126,32 +127,36 @@ class AppBuilderHandler(BaseWebSocketHandler):
126
127
  return
127
128
 
128
129
  # Validate JWT token if provided
129
- if jwt_token and jwt_token != 'placeholder-jwt-token':
130
- self.log.info(f"Validating JWT token: {jwt_token[:20]}...")
131
- is_valid = self._validate_jwt_token(jwt_token)
132
- if not is_valid:
133
- self.log.error("JWT token validation failed")
134
- error = AppBuilderError(
135
- error_type="Unauthorized",
136
- title="Invalid authentication token",
137
- hint="Please sign in again to deploy your app."
138
- )
139
- self.reply(BuildAppReply(
140
- parent_id=message_id,
141
- url="",
142
- error=error
143
- ))
144
- return
145
- else:
146
- self.log.info("JWT token validation successful")
130
+ token_preview = jwt_token[:20] if jwt_token else "No token provided"
131
+ self.log.info(f"Validating JWT token: {token_preview}...")
132
+ is_valid = self._validate_jwt_token(jwt_token) if jwt_token else False
133
+ if not is_valid or not jwt_token:
134
+ self.log.error("JWT token validation failed")
135
+ error = AppBuilderError(
136
+ error_type="Unauthorized",
137
+ title="Invalid authentication token",
138
+ hint="Please sign in again to deploy your app."
139
+ )
140
+ self.reply(BuildAppReply(
141
+ parent_id=message_id,
142
+ url="",
143
+ error=error
144
+ ))
145
+ return
147
146
  else:
148
- self.log.warning("No JWT token provided or using placeholder token")
149
-
147
+ self.log.info("JWT token validation successful")
148
+
150
149
  try:
150
+ notebook_path = str(notebook_path) if notebook_path else ""
151
+
152
+ app_directory = os.path.dirname(notebook_path)
153
+ app_path = os.path.join(app_directory, "app.py")
151
154
 
152
- success_flag, result_message = await streamlit_handler(str(notebook_path) if notebook_path else "", app_path)
153
- if not success_flag:
154
- raise Exception(result_message)
155
+ if not os.path.exists(app_path):
156
+ success_flag, app_path_result, result_message = await streamlit_handler(notebook_path)
157
+ if not success_flag or app_path_result is None:
158
+ raise Exception(result_message)
159
+ app_path = app_path_result
155
160
 
156
161
  deploy_url = await self._deploy_app(app_path, jwt_token)
157
162
 
@@ -271,15 +276,15 @@ class AppBuilderHandler(BaseWebSocketHandler):
271
276
  except requests.exceptions.RequestException as e:
272
277
  self.log.error(f"Error during API request: {e}")
273
278
  if hasattr(e, 'response') and e.response is not None:
274
- try:
275
- error_detail = e.response.json()
276
- self.log.error(f"Server error details: {error_detail}")
277
- except:
278
- self.log.error(f"Server response: {e.response.text}")
279
- raise Exception(f"Deployment failed: {str(e)}")
279
+ error_detail = e.response.json()
280
+ self.log.error(f"Server error details: {error_detail}")
281
+ if 'error' in error_detail:
282
+ raise Exception(error_detail['error'])
283
+ raise
280
284
  except Exception as e:
281
285
  self.log.error(f"Error during deployment: {str(e)}")
282
286
  raise
287
+ raise RuntimeError("Unexpected error in _deploy_app")
283
288
 
284
289
  async def _upload_app_to_s3(self, app_path: str, presigned_url: str) -> requests.Response:
285
290
  """Upload the app to S3 using the presigned URL."""
@@ -65,7 +65,7 @@ class BuildAppRequest:
65
65
  message_id: str
66
66
 
67
67
  # Path to the app file.
68
- path: str
68
+ notebook_path: str
69
69
 
70
70
  # JWT token for authorization.
71
71
  jwt_token: Optional[str] = None
@@ -46,7 +46,7 @@ from mito_ai.completions.completion_handlers.agent_execution_handler import get_
46
46
  from mito_ai.completions.completion_handlers.agent_auto_error_fixup_handler import get_agent_auto_error_fixup_completion
47
47
  from mito_ai.utils.telemetry_utils import identify
48
48
 
49
- FALLBACK_MODEL = "gpt-4.1" # Default model to use for safety
49
+ FALLBACK_MODEL = "gpt-5" # Default model to use for safety
50
50
 
51
51
  # The GlobalMessageHistory is responsible for updating the message histories stored in the .mito/ai-chats directory.
52
52
  # We create one GlobalMessageHistory per backend server instance instead of one per websocket connection so that the
@@ -52,9 +52,6 @@ Format:
52
52
  code_summary: str
53
53
  cell_type: 'code' | 'markdown'
54
54
  }}
55
- get_cell_output_cell_id: None,
56
- next_steps: None,
57
- analysis_assumptions: None
58
55
  }}
59
56
 
60
57
  Important information:
@@ -64,7 +61,7 @@ Important information:
64
61
  4. The code_summary must be a very short phrase (1–5 words maximum) that begins with a verb ending in "-ing" (e.g., "Loading data", "Filtering rows", "Calculating average", "Plotting revenue"). Avoid full sentences or explanations—this should read like a quick commit message or code label, not a description.
65
62
  5. Important: Only use the CELL_UPDATE tool if you want to add/modify a notebook cell in response to the user's request. If the user is just sending you a friendly greeting or asking you a question about yourself, you SHOULD NOT USE A CELL_UPDATE tool because it does not require modifying the notebook. Instead, just use the FINISHED_TASK response.
66
63
  6. The assumptions is an optional list of critical assumptions that you made about the data or analysis approach. The assumptions you list here will be displayed to the user so that they can confirm or correct the assumptions. For example: ["NaN values in the impressions column represent 0 impressions", "Only crashes with pedestrian or cyclist fatalities are considered fatal crashes", "Intervention priority combines both volume and severity to identify maximum impact opportunities"].
67
- 7. Only include important data and analytical assumptions that if incorrect would fundamentally change your analysis conclusions. These should be data handling decisions, methodological choices, and definitional boundaries. Do not include: obvious statements ("Each record is counted once"), result interpretation guidance ("Gaps in the plot represent zero values"), display choices ("Data is sorted for clarity"), internal reasoning ("Bar chart is better than line plot"), or environment assumptions ("Library X is installed"). Prioritize quality over quantity - include only the most critical assumptions or None if there are no critical assumptions made in this step that have not already be shared with the user. If you ever doubt whether an assumption is critical enough to be shared with the user as an assumption, don't include it. Most messages should not include an assumption.
64
+ 7. Only include important data and analytical assumptions that if incorrect would fundamentally change your analysis conclusions. These should be data handling decisions, methodological choices, and definitional boundaries. Do not include: obvious statements ("Each record is counted once"), result interpretation guidance ("Gaps in the plot represent zero values"), display choices ("Data is sorted for clarity"), internal reasoning ("Bar chart is better than line plot"), or environment assumptions ("Library X is installed"). Prioritize quality over quantity - include only the most critical assumptions or omit the field entirely if there are no critical assumptions made in this step that have not already be shared with the user. If you ever doubt whether an assumption is critical enough to be shared with the user as an assumption, don't include it. Most messages should not include an assumption.
68
65
  8. Do not include the same assumption or variations of the same assumption multiple times in the same conversation. Once you have presented the assumption to the user, they will already have the opportunity to confirm or correct it so do not include it again.
69
66
 
70
67
  #### Cell Addition:
@@ -81,9 +78,6 @@ Format:
81
78
  code_summary: str
82
79
  cell_type: 'code' | 'markdown'
83
80
  }}
84
- get_cell_output_cell_id: None,
85
- next_steps: None,
86
- analysis_assumptions: None
87
81
  }}
88
82
 
89
83
  Important information:
@@ -93,7 +87,7 @@ Important information:
93
87
  4. code_summary must be a very short phrase (1–5 words maximum) that begins with a verb ending in "-ing" (e.g., "Loading data", "Filtering rows", "Calculating average", "Plotting revenue"). Avoid full sentences or explanations—this should read like a quick commit message or code label, not a description.
94
88
  5. The cell_type should only be 'markdown' if there is no code to add. There may be times where the code has comments. These are still code cells and should have the cell_type 'code'. Any cells that are labeled 'markdown' will be converted to markdown cells by the user.
95
89
  6. The assumptions is an optional list of critical assumptions that you made about the data or analysis approach. The assumptions you list here will be displayed to the user so that they can confirm or correct the assumptions. For example: ["NaN values in the impressions column represent 0 impressions", "Only crashes with pedestrian or cyclist fatalities are considered fatal crashes", "Intervention priority combines both volume and severity to identify maximum impact opportunities"].
96
- 7. Only include important data and analytical assumptions that if incorrect would fundamentally change your analysis conclusions. These should be data handling decisions, methodological choices, and definitional boundaries. Do not include: obvious statements ("Each record is counted once"), result interpretation guidance ("Gaps in the plot represent zero values"), display choices ("Data is sorted for clarity"), internal reasoning ("Bar chart is better than line plot"), or environment assumptions ("Library X is installed"). Prioritize quality over quantity - include only the most critical assumptions or None if there are no critical assumptions made in this step that have not already be shared with the user. If you ever doubt whether an assumption is critical enough to be shared with the user as an assumption, don't include it. Most messages should not include an assumption.
90
+ 7. Only include important data and analytical assumptions that if incorrect would fundamentally change your analysis conclusions. These should be data handling decisions, methodological choices, and definitional boundaries. Do not include: obvious statements ("Each record is counted once"), result interpretation guidance ("Gaps in the plot represent zero values"), display choices ("Data is sorted for clarity"), internal reasoning ("Bar chart is better than line plot"), or environment assumptions ("Library X is installed"). Prioritize quality over quantity - include only the most critical assumptions or omit the field entirely if there are no critical assumptions made in this step that have not already be shared with the user. If you ever doubt whether an assumption is critical enough to be shared with the user as an assumption, don't include it. Most messages should not include an assumption.
97
91
  8. Do not include the same assumption or variations of the same assumption multiple times in the same conversation. Once you have presented the assumption to the user, they will already have the opportunity to confirm or correct it so do not include it again.
98
92
 
99
93
  <Cell Modification Example>
@@ -133,17 +127,14 @@ Convert the transaction_date column to datetime and then multiply the total_pric
133
127
  Output:
134
128
  {{
135
129
  type: 'cell_update',
136
- cell_type: 'code',
130
+ message: "I'll convert the transaction_date column to datetime and multiply total_price by the multiplier.",
137
131
  cell_update: {{
138
- type: 'modification'
132
+ type: 'modification',
139
133
  id: 'c68fdf19-db8c-46dd-926f-d90ad35bb3bc',
140
134
  code: "import pandas as pd\\nsales_df = pd.read_csv('./sales.csv')\\nloan_multiplier = 1.5\\nsales_df['transaction_date'] = pd.to_datetime(sales_df['transaction_date'])\\nsales_df['total_price'] = sales_df['total_price'] * sales_multiplier",
141
135
  code_summary: "Converting the transaction_date column",
142
136
  cell_type: 'code'
143
- }},
144
- get_cell_output_cell_id: None,
145
- next_steps: None,
146
- analysis_assumptions: None
137
+ }}
147
138
  }}
148
139
 
149
140
  </Cell Modification Example>
@@ -184,17 +175,14 @@ Graph the total_price for each sale
184
175
  Output:
185
176
  {{
186
177
  type: 'cell_update',
187
- message: "I'll create a graph with using matplotlib with sale `index` on the x axis and `total_price` on the y axis.",
178
+ message: "I'll create a graph using matplotlib with sale index on the x axis and total_price on the y axis.",
188
179
  cell_update: {{
189
- type: 'add'
190
- index: 2
191
- code: "import matplotlib.pyplot as plt\n\nplt.bar(sales_df.index, sales_df['total_price'])\nplt.title('Total Price per Sale')\nplt.xlabel('Transaction Number')\nplt.ylabel('Sales Price ($)')\nplt.show()"
180
+ type: 'new',
181
+ index: 2,
182
+ code: "import matplotlib.pyplot as plt\n\nplt.bar(sales_df.index, sales_df['total_price'])\nplt.title('Total Price per Sale')\nplt.xlabel('Transaction Number')\nplt.ylabel('Sales Price ($)')\nplt.show()",
192
183
  code_summary: "Plotting total_price",
193
- code_summary: "Plotting total_price"
194
- }},
195
- get_cell_output_cell_id: None,
196
- next_steps: None,
197
- analysis_assumptions: None
184
+ cell_type: 'code'
185
+ }}
198
186
  }}
199
187
 
200
188
  </Cell Addition Example>
@@ -208,10 +196,7 @@ When you want to get a base64 encoded version of a cell's output, respond with t
208
196
  {{
209
197
  type: 'get_cell_output',
210
198
  message: str,
211
- get_cell_output_cell_id: str,
212
- cell_update: None,
213
- next_steps: Optional[List[str]],
214
- analysis_assumptions: Optional[List[str]]
199
+ get_cell_output_cell_id: str
215
200
  }}
216
201
 
217
202
  Important information:
@@ -228,10 +213,7 @@ When you have completed the user's task, respond with a message in this format:
228
213
  {{
229
214
  type: 'finished_task',
230
215
  message: str,
231
- get_cell_output_cell_id: None,
232
- cell_update: None,
233
- next_steps: Optional[List[str]],
234
- analysis_assumptions: None
216
+ next_steps: Optional[List[str]]
235
217
  }}
236
218
 
237
219
  Important information:
@@ -249,8 +231,6 @@ Important information:
249
231
  {{
250
232
  type: 'finished_task',
251
233
  message: "Revenue analysis complete: total sales reached $2.3M with 34% growth in Q4[MITO_CITATION:abc123:2-3], while premium products generated 67% of profit margins[MITO_CITATION:xyz456:5]. The customer segmentation workflow identified three distinct buying patterns driving conversion rates[MITO_CITATION:def456:8-12].",
252
- get_cell_output_cell_id: None,
253
- cell_update: None,
254
234
  next_steps: ["Graph sales by product category", "Identify seasonal patterns in data", "Find the top 3 performing products"]
255
235
  }}
256
236
 
@@ -263,11 +243,7 @@ User message: "Hi"
263
243
  Output:
264
244
  {{
265
245
  type: 'finished_task',
266
- message: "Hey there! I'm Mito AI. How can I help you today?",
267
- get_cell_output_cell_id: None,
268
- cell_update: None,
269
- next_steps: None,
270
- analysis_assumptions: None
246
+ message: "Hey there! I'm Mito AI. How can I help you today?"
271
247
  }}
272
248
 
273
249
  </Finished Task Example 2>
@@ -327,12 +303,11 @@ Output:
327
303
  type: 'cell_update',
328
304
  message: "I'll calculate two new variables all_time_high_date and all_time_high_price.",
329
305
  cell_update: {{
330
- type: 'add'
331
- index: 2
332
- code: "all_time_high_row_idx = tesla_stock_prices_df['closing_price'].idxmax()\nall_time_high_date = tesla_stock_prices_df.at[all_time_high_row_idx, 'Date']\nall_time_high_price = tesla_stock_prices_df.at[all_time_high_row_idx, 'closing_price']"
306
+ type: 'new',
307
+ index: 2,
308
+ code: "all_time_high_row_idx = tesla_stock_prices_df['closing_price'].idxmax()\nall_time_high_date = tesla_stock_prices_df.at[all_time_high_row_idx, 'Date']\nall_time_high_price = tesla_stock_prices_df.at[all_time_high_row_idx, 'closing_price']",
333
309
  code_summary: "Calculating all time high"
334
- }},
335
- get_cell_output_cell_id: None
310
+ }}
336
311
  }}
337
312
 
338
313
  ### User Message 2
@@ -379,8 +354,6 @@ Output:
379
354
  {{
380
355
  type: 'finished_task',
381
356
  message: "The all time high tesla stock closing price was $265.91 [MITO_CITATION:9c0d5fda-2b16-4f52-a1c5-a48892f3e2e8:2] on 2025-03-16 [MITO_CITATION:9c0d5fda-2b16-4f52-a1c5-a48892f3e2e8:1]",
382
- get_cell_output_cell_id: None,
383
- cell_update: None,
384
357
  next_steps: ["Create a visualization of Tesla's stock price over time", "Calculate the percentage change from the lowest to highest price", "Analyze the volatility of Tesla's stock"]
385
358
  }}
386
359
 
@@ -3,13 +3,13 @@
3
3
 
4
4
  def create_chat_name_prompt(user_message: str, assistant_message: str) -> str:
5
5
  prompt = f"""Create a short name for the chat thread based on the first user message
6
- and the first LLM response. Reply ONLY with the short title (max 40 chars). Don't add any extra text.
7
-
8
- Don't include that its a Python project in the chat.
6
+ and the first LLM response. Reply ONLY with the short title (max 40 chars). Don't add any extra text.
7
+
8
+ Don't include that its a Python project in the chat.
9
9
 
10
- User Message: {user_message}
10
+ User Message: {user_message}
11
11
 
12
- Assistant Message: {assistant_message}
13
- """
12
+ Assistant Message: {assistant_message}
13
+ """
14
14
 
15
15
  return prompt
mito_ai/log/handlers.py CHANGED
@@ -3,16 +3,22 @@
3
3
 
4
4
  from dataclasses import dataclass
5
5
  import json
6
- from typing import Any, Final
6
+ from typing import Any, Final, Literal
7
7
  import tornado
8
8
  import os
9
9
  from jupyter_server.base.handlers import APIHandler
10
- from mito_ai.utils.telemetry_utils import log
10
+ from mito_ai.utils.telemetry_utils import MITO_SERVER_KEY, USER_KEY, log
11
11
 
12
12
 
13
13
  class LogHandler(APIHandler):
14
14
  """Handler for logging"""
15
15
 
16
+ def initialize(self, key_type: Literal['mito_server_key', 'user_key']) -> None:
17
+ """Initialize the log handler"""
18
+
19
+ # The key_type is required so that we know if we can log pro users
20
+ self.key_type = key_type
21
+
16
22
  @tornado.web.authenticated
17
23
  def put(self) -> None:
18
24
  """Log an event"""
@@ -26,6 +32,7 @@ class LogHandler(APIHandler):
26
32
  log_event = data['log_event']
27
33
  params = data.get('params', {})
28
34
 
29
- log(log_event, params)
35
+ key_type = MITO_SERVER_KEY if self.key_type == "mito_server_key" else USER_KEY
36
+ log(log_event, params, key_type=key_type)
30
37
 
31
38
 
mito_ai/log/urls.py CHANGED
@@ -5,7 +5,7 @@ from typing import Any, List, Tuple
5
5
  from jupyter_server.utils import url_path_join
6
6
  from mito_ai.log.handlers import LogHandler
7
7
 
8
- def get_log_urls(base_url: str) -> List[Tuple[str, Any, dict]]:
8
+ def get_log_urls(base_url: str, key_type: str) -> List[Tuple[str, Any, dict]]:
9
9
  """Get all log related URL patterns.
10
10
 
11
11
  Args:
@@ -15,7 +15,7 @@ def get_log_urls(base_url: str) -> List[Tuple[str, Any, dict]]:
15
15
  List of (url_pattern, handler_class, handler_kwargs) tuples
16
16
  """
17
17
  BASE_URL = base_url + "/mito-ai"
18
-
18
+
19
19
  return [
20
- (url_path_join(BASE_URL, "log"), LogHandler, {}),
20
+ (url_path_join(BASE_URL, "log"), LogHandler, {"key_type": key_type}),
21
21
  ]
mito_ai/openai_client.py CHANGED
@@ -35,7 +35,7 @@ from mito_ai.utils.telemetry_utils import (
35
35
  USER_KEY,
36
36
  )
37
37
 
38
- OPENAI_MODEL_FALLBACK = "gpt-4.1"
38
+ OPENAI_MODEL_FALLBACK = "gpt-5"
39
39
 
40
40
  class OpenAIClient(LoggingConfigurable):
41
41
  """Provide AI feature through OpenAI services."""
@@ -0,0 +1,116 @@
1
+ # Copyright (c) Saga Inc.
2
+ # Distributed under the terms of the GNU Affero General Public License v3.0 License.
3
+
4
+ from typing import List
5
+ import re
6
+ from unidiff import PatchSet
7
+ from mito_ai.streamlit_conversion.prompts.prompt_constants import MITO_TODO_PLACEHOLDER
8
+
9
+ def extract_todo_placeholders(agent_response: str) -> List[str]:
10
+ """Extract TODO placeholders from the agent's response"""
11
+ return [line.strip() for line in agent_response.split('\n') if MITO_TODO_PLACEHOLDER in line]
12
+
13
+
14
+ def apply_patch_to_text(text: str, diff: str) -> str:
15
+ """
16
+ Apply a *unified-diff* (git-style) patch to the given text and return
17
+ the updated contents.
18
+
19
+ Parameters
20
+ ----------
21
+ text : str
22
+ The original file contents.
23
+ diff : str
24
+ A unified diff that transforms *text* into the desired output.
25
+ The diff must reference exactly one file (the Streamlit app).
26
+
27
+ Returns
28
+ -------
29
+ str
30
+ The patched contents.
31
+
32
+ Raises
33
+ ------
34
+ ValueError
35
+ If the patch cannot be applied or references more than one file.
36
+ """
37
+ # Nothing to do
38
+ if not diff.strip():
39
+ return text
40
+
41
+ # Parse the patch
42
+ patch = PatchSet(diff.splitlines(keepends=True))
43
+
44
+ # We expect a single-file patch (what the prompt asks the model to emit)
45
+ if len(patch) != 1:
46
+ raise ValueError(
47
+ f"Expected a patch for exactly one file, got {len(patch)} files."
48
+ )
49
+
50
+ file_patch = patch[0]
51
+
52
+ original_lines = text.splitlines(keepends=True)
53
+ result_lines: List[str] = []
54
+
55
+ cursor = 0 # index in original_lines (0-based)
56
+
57
+ for hunk in file_patch:
58
+ # Copy unchanged lines before this hunk
59
+ while cursor < hunk.source_start - 1:
60
+ result_lines.append(original_lines[cursor])
61
+ cursor += 1
62
+
63
+ # Apply hunk line-by-line
64
+ for line in hunk:
65
+ if line.is_context:
66
+ result_lines.append(original_lines[cursor])
67
+ cursor += 1
68
+ elif line.is_removed:
69
+ cursor += 1 # Skip this line from the original
70
+ elif line.is_added:
71
+ # Ensure added line ends with newline for consistency
72
+ val = line.value
73
+ if not val.endswith("\n"):
74
+ val += "\n"
75
+ result_lines.append(val)
76
+
77
+ # Copy any remaining lines after the last hunk
78
+ result_lines.extend(original_lines[cursor:])
79
+
80
+ return "".join(result_lines)
81
+
82
+
83
+ def fix_diff_headers(diff: str) -> str:
84
+ """
85
+ The AI is generally not very good at counting the number of lines in the diff. If the hunk header has
86
+ an incorrect count, then the patch will fail. So instead we just calculate the counts ourselves, its deterministic.
87
+ """
88
+ lines = diff.split('\n')
89
+
90
+ for i, line in enumerate(lines):
91
+ if line.startswith('@@'):
92
+ # Extract the starting line numbers
93
+ match = re.match(r'@@ -(\d+),\d+ \+(\d+),\d+ @@', line)
94
+ if match:
95
+ old_start = match.group(1)
96
+ new_start = match.group(2)
97
+
98
+ # Count lines in this hunk
99
+ old_count = 0
100
+ new_count = 0
101
+
102
+ for j in range(i + 1, len(lines)):
103
+ next_line = lines[j]
104
+ if next_line.startswith('@@') or next_line.startswith('---'):
105
+ break
106
+ if next_line.startswith(' ') or next_line.startswith('-'):
107
+ old_count += 1
108
+ if next_line.startswith(' ') or next_line.startswith('+'):
109
+ new_count += 1
110
+
111
+ # Replace the header with correct counts
112
+ lines[i] = f"@@ -{old_start},{old_count} +{new_start},{new_count} @@"
113
+
114
+ return '\n'.join(lines)
115
+
116
+
@@ -0,0 +1,59 @@
1
+ # Copyright (c) Saga Inc.
2
+ # Distributed under the terms of the GNU Affero General Public License v3.0 License.
3
+
4
+ MITO_TODO_PLACEHOLDER = "# MITO_TODO_PLACEHOLDER"
5
+
6
+ unified_diff_instrucrions = f"""
7
+ RESPONSE FORMAT: Return the changes you want to make to the streamlit app as a **unified diff (git-style patch)**:
8
+ - Begin with a ````unified_diff` header and a ```` end header.
9
+ - Then, include the standard header lines `--- a/app.py` and `+++ b/app.py`.
10
+ - Show only the modified hunks; each hunk must start with an `@@` header with line numbers.
11
+ - Within each hunk:
12
+ * Unchanged context lines start with a single space.
13
+ * Removed lines start with `-`.
14
+ * Added lines start with `+`.
15
+ - If there are **no changes**, return an empty string.
16
+ - Do not include the line numbers in your response.
17
+
18
+ **IMPORTANT: For the hunk header, use `@@ -START_LINE,1 +START_LINE,1 @@` where we always use 1 as the count value. In a later step, the system will automatically calculate the correct counts.**
19
+
20
+ <Example Response>
21
+
22
+ In the example below, assume that the line of code `data_list = [` is on line 57 of the existing streamlit app.
23
+
24
+ ```unified_diff
25
+ --- a/app.py
26
+ +++ b/app.py
27
+ @@ -57,1 +57,1 @@
28
+ data_list = [
29
+ {{'id': 1, 'name': 'Item A', 'category': 'Type 1', 'value': 100}},
30
+ {{'id': 2, 'name': 'Item B', 'category': 'Type 2', 'value': 200}},
31
+ - {MITO_TODO_PLACEHOLDER}: Add remaining entries from notebook
32
+ + {{'id': 3, 'name': 'Item C', 'category': 'Type 3', 'value': 300}},
33
+ + {{'id': 4, 'name': 'Item D', 'category': 'Type 4', 'value': 400}},
34
+ + {{'id': 5, 'name': 'Item E', 'category': 'Type 5', 'value': 500}},
35
+ + {{'id': 6, 'name': 'Item F', 'category': 'Type 6', 'value': 600}},
36
+ + {{'id': 7, 'name': 'Item G', 'category': 'Type 7', 'value': 700}},
37
+ + {{'id': 8, 'name': 'Item H', 'category': 'Type 8', 'value': 800}},
38
+ + {{'id': 9, 'name': 'Item I', 'category': 'Type 9', 'value': 900}},
39
+ + {{'id': 10, 'name': 'Item J', 'category': 'Type 10', 'value': 1000}},
40
+ + {{'id': 11, 'name': 'Item K', 'category': 'Type 11', 'value': 1100}},
41
+ + {{'id': 12, 'name': 'Item L', 'category': 'Type 12', 'value': 1200}},
42
+ + {{'id': 13, 'name': 'Item M', 'category': 'Type 13', 'value': 1300}},
43
+ + {{'id': 14, 'name': 'Item N', 'category': 'Type 14', 'value': 1400}},
44
+ + {{'id': 15, 'name': 'Item O', 'category': 'Type 15', 'value': 1500}},
45
+ + {{'id': 16, 'name': 'Item P', 'category': 'Type 16', 'value': 1600}},
46
+ + {{'id': 17, 'name': 'Item Q', 'category': 'Type 17', 'value': 1700}},
47
+ + {{'id': 18, 'name': 'Item R', 'category': 'Type 18', 'value': 1800}},
48
+ + {{'id': 19, 'name': 'Item S', 'category': 'Type 19', 'value': 1900}},
49
+ + {{'id': 20, 'name': 'Item T', 'category': 'Type 20', 'value': 2000}},
50
+ + {{'id': 21, 'name': 'Item U', 'category': 'Type 21', 'value': 2100}},
51
+ + {{'id': 22, 'name': 'Item V', 'category': 'Type 22', 'value': 2200}},
52
+ + {{'id': 23, 'name': 'Item W', 'category': 'Type 23', 'value': 2300}},
53
+ + {{'id': 24, 'name': 'Item X', 'category': 'Type 24', 'value': 2400}},
54
+ + {{'id': 25, 'name': 'Item Y', 'category': 'Type 25', 'value': 2500}}
55
+ ```
56
+ </Example Response>
57
+
58
+ Your response must consist **only** of valid unified-diff block.
59
+ """
@@ -0,0 +1,10 @@
1
+ # Copyright (c) Saga Inc.
2
+ # Distributed under the terms of the GNU Affero General Public License v3.0 License.
3
+
4
+ def add_line_numbers_to_code(code: str) -> str:
5
+ """Add line numbers to the code"""
6
+ code_with_line_numbers = ""
7
+ for i, line in enumerate(code.split('\n'), 1):
8
+ code_with_line_numbers += f"{i:3d}: {line}\n"
9
+
10
+ return code_with_line_numbers