mito-ai 0.1.50__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. mito_ai/__init__.py +114 -0
  2. mito_ai/_version.py +4 -0
  3. mito_ai/anthropic_client.py +334 -0
  4. mito_ai/app_deploy/__init__.py +6 -0
  5. mito_ai/app_deploy/app_deploy_utils.py +44 -0
  6. mito_ai/app_deploy/handlers.py +345 -0
  7. mito_ai/app_deploy/models.py +98 -0
  8. mito_ai/app_manager/__init__.py +4 -0
  9. mito_ai/app_manager/handlers.py +167 -0
  10. mito_ai/app_manager/models.py +71 -0
  11. mito_ai/app_manager/utils.py +24 -0
  12. mito_ai/auth/README.md +18 -0
  13. mito_ai/auth/__init__.py +6 -0
  14. mito_ai/auth/handlers.py +96 -0
  15. mito_ai/auth/urls.py +13 -0
  16. mito_ai/chat_history/handlers.py +63 -0
  17. mito_ai/chat_history/urls.py +32 -0
  18. mito_ai/completions/completion_handlers/__init__.py +3 -0
  19. mito_ai/completions/completion_handlers/agent_auto_error_fixup_handler.py +59 -0
  20. mito_ai/completions/completion_handlers/agent_execution_handler.py +66 -0
  21. mito_ai/completions/completion_handlers/chat_completion_handler.py +141 -0
  22. mito_ai/completions/completion_handlers/code_explain_handler.py +113 -0
  23. mito_ai/completions/completion_handlers/completion_handler.py +42 -0
  24. mito_ai/completions/completion_handlers/inline_completer_handler.py +48 -0
  25. mito_ai/completions/completion_handlers/smart_debug_handler.py +160 -0
  26. mito_ai/completions/completion_handlers/utils.py +147 -0
  27. mito_ai/completions/handlers.py +415 -0
  28. mito_ai/completions/message_history.py +401 -0
  29. mito_ai/completions/models.py +404 -0
  30. mito_ai/completions/prompt_builders/__init__.py +3 -0
  31. mito_ai/completions/prompt_builders/agent_execution_prompt.py +57 -0
  32. mito_ai/completions/prompt_builders/agent_smart_debug_prompt.py +160 -0
  33. mito_ai/completions/prompt_builders/agent_system_message.py +472 -0
  34. mito_ai/completions/prompt_builders/chat_name_prompt.py +15 -0
  35. mito_ai/completions/prompt_builders/chat_prompt.py +116 -0
  36. mito_ai/completions/prompt_builders/chat_system_message.py +92 -0
  37. mito_ai/completions/prompt_builders/explain_code_prompt.py +32 -0
  38. mito_ai/completions/prompt_builders/inline_completer_prompt.py +197 -0
  39. mito_ai/completions/prompt_builders/prompt_constants.py +170 -0
  40. mito_ai/completions/prompt_builders/smart_debug_prompt.py +199 -0
  41. mito_ai/completions/prompt_builders/utils.py +84 -0
  42. mito_ai/completions/providers.py +284 -0
  43. mito_ai/constants.py +63 -0
  44. mito_ai/db/__init__.py +3 -0
  45. mito_ai/db/crawlers/__init__.py +6 -0
  46. mito_ai/db/crawlers/base_crawler.py +61 -0
  47. mito_ai/db/crawlers/constants.py +43 -0
  48. mito_ai/db/crawlers/snowflake.py +71 -0
  49. mito_ai/db/handlers.py +168 -0
  50. mito_ai/db/models.py +31 -0
  51. mito_ai/db/urls.py +34 -0
  52. mito_ai/db/utils.py +185 -0
  53. mito_ai/docker/mssql/compose.yml +37 -0
  54. mito_ai/docker/mssql/init/setup.sql +21 -0
  55. mito_ai/docker/mysql/compose.yml +18 -0
  56. mito_ai/docker/mysql/init/setup.sql +13 -0
  57. mito_ai/docker/oracle/compose.yml +17 -0
  58. mito_ai/docker/oracle/init/setup.sql +20 -0
  59. mito_ai/docker/postgres/compose.yml +17 -0
  60. mito_ai/docker/postgres/init/setup.sql +13 -0
  61. mito_ai/enterprise/__init__.py +3 -0
  62. mito_ai/enterprise/utils.py +15 -0
  63. mito_ai/file_uploads/__init__.py +3 -0
  64. mito_ai/file_uploads/handlers.py +248 -0
  65. mito_ai/file_uploads/urls.py +21 -0
  66. mito_ai/gemini_client.py +232 -0
  67. mito_ai/log/handlers.py +38 -0
  68. mito_ai/log/urls.py +21 -0
  69. mito_ai/logger.py +37 -0
  70. mito_ai/openai_client.py +382 -0
  71. mito_ai/path_utils.py +70 -0
  72. mito_ai/rules/handlers.py +44 -0
  73. mito_ai/rules/urls.py +22 -0
  74. mito_ai/rules/utils.py +56 -0
  75. mito_ai/settings/handlers.py +41 -0
  76. mito_ai/settings/urls.py +20 -0
  77. mito_ai/settings/utils.py +42 -0
  78. mito_ai/streamlit_conversion/agent_utils.py +37 -0
  79. mito_ai/streamlit_conversion/prompts/prompt_constants.py +172 -0
  80. mito_ai/streamlit_conversion/prompts/prompt_utils.py +10 -0
  81. mito_ai/streamlit_conversion/prompts/streamlit_app_creation_prompt.py +46 -0
  82. mito_ai/streamlit_conversion/prompts/streamlit_error_correction_prompt.py +28 -0
  83. mito_ai/streamlit_conversion/prompts/streamlit_finish_todo_prompt.py +45 -0
  84. mito_ai/streamlit_conversion/prompts/streamlit_system_prompt.py +56 -0
  85. mito_ai/streamlit_conversion/prompts/update_existing_app_prompt.py +50 -0
  86. mito_ai/streamlit_conversion/search_replace_utils.py +94 -0
  87. mito_ai/streamlit_conversion/streamlit_agent_handler.py +144 -0
  88. mito_ai/streamlit_conversion/streamlit_utils.py +85 -0
  89. mito_ai/streamlit_conversion/validate_streamlit_app.py +105 -0
  90. mito_ai/streamlit_preview/__init__.py +6 -0
  91. mito_ai/streamlit_preview/handlers.py +111 -0
  92. mito_ai/streamlit_preview/manager.py +152 -0
  93. mito_ai/streamlit_preview/urls.py +22 -0
  94. mito_ai/streamlit_preview/utils.py +29 -0
  95. mito_ai/tests/__init__.py +3 -0
  96. mito_ai/tests/chat_history/test_chat_history.py +211 -0
  97. mito_ai/tests/completions/completion_handlers_utils_test.py +190 -0
  98. mito_ai/tests/conftest.py +53 -0
  99. mito_ai/tests/create_agent_system_message_prompt_test.py +22 -0
  100. mito_ai/tests/data/prompt_lg.py +69 -0
  101. mito_ai/tests/data/prompt_sm.py +6 -0
  102. mito_ai/tests/data/prompt_xl.py +13 -0
  103. mito_ai/tests/data/stock_data.sqlite3 +0 -0
  104. mito_ai/tests/db/conftest.py +39 -0
  105. mito_ai/tests/db/connections_test.py +102 -0
  106. mito_ai/tests/db/mssql_test.py +29 -0
  107. mito_ai/tests/db/mysql_test.py +29 -0
  108. mito_ai/tests/db/oracle_test.py +29 -0
  109. mito_ai/tests/db/postgres_test.py +29 -0
  110. mito_ai/tests/db/schema_test.py +93 -0
  111. mito_ai/tests/db/sqlite_test.py +31 -0
  112. mito_ai/tests/db/test_db_constants.py +61 -0
  113. mito_ai/tests/deploy_app/test_app_deploy_utils.py +89 -0
  114. mito_ai/tests/file_uploads/__init__.py +2 -0
  115. mito_ai/tests/file_uploads/test_handlers.py +282 -0
  116. mito_ai/tests/message_history/test_generate_short_chat_name.py +120 -0
  117. mito_ai/tests/message_history/test_message_history_utils.py +469 -0
  118. mito_ai/tests/open_ai_utils_test.py +152 -0
  119. mito_ai/tests/performance_test.py +329 -0
  120. mito_ai/tests/providers/test_anthropic_client.py +447 -0
  121. mito_ai/tests/providers/test_azure.py +631 -0
  122. mito_ai/tests/providers/test_capabilities.py +120 -0
  123. mito_ai/tests/providers/test_gemini_client.py +195 -0
  124. mito_ai/tests/providers/test_mito_server_utils.py +448 -0
  125. mito_ai/tests/providers/test_model_resolution.py +130 -0
  126. mito_ai/tests/providers/test_openai_client.py +57 -0
  127. mito_ai/tests/providers/test_provider_completion_exception.py +66 -0
  128. mito_ai/tests/providers/test_provider_limits.py +42 -0
  129. mito_ai/tests/providers/test_providers.py +382 -0
  130. mito_ai/tests/providers/test_retry_logic.py +389 -0
  131. mito_ai/tests/providers/test_stream_mito_server_utils.py +140 -0
  132. mito_ai/tests/providers/utils.py +85 -0
  133. mito_ai/tests/rules/conftest.py +26 -0
  134. mito_ai/tests/rules/rules_test.py +117 -0
  135. mito_ai/tests/server_limits_test.py +406 -0
  136. mito_ai/tests/settings/conftest.py +26 -0
  137. mito_ai/tests/settings/settings_test.py +70 -0
  138. mito_ai/tests/settings/test_settings_constants.py +9 -0
  139. mito_ai/tests/streamlit_conversion/__init__.py +3 -0
  140. mito_ai/tests/streamlit_conversion/test_apply_search_replace.py +240 -0
  141. mito_ai/tests/streamlit_conversion/test_streamlit_agent_handler.py +246 -0
  142. mito_ai/tests/streamlit_conversion/test_streamlit_utils.py +193 -0
  143. mito_ai/tests/streamlit_conversion/test_validate_streamlit_app.py +112 -0
  144. mito_ai/tests/streamlit_preview/test_streamlit_preview_handler.py +118 -0
  145. mito_ai/tests/streamlit_preview/test_streamlit_preview_manager.py +292 -0
  146. mito_ai/tests/test_constants.py +47 -0
  147. mito_ai/tests/test_telemetry.py +12 -0
  148. mito_ai/tests/user/__init__.py +2 -0
  149. mito_ai/tests/user/test_user.py +120 -0
  150. mito_ai/tests/utils/__init__.py +3 -0
  151. mito_ai/tests/utils/test_anthropic_utils.py +162 -0
  152. mito_ai/tests/utils/test_gemini_utils.py +98 -0
  153. mito_ai/tests/version_check_test.py +169 -0
  154. mito_ai/user/handlers.py +45 -0
  155. mito_ai/user/urls.py +21 -0
  156. mito_ai/utils/__init__.py +3 -0
  157. mito_ai/utils/anthropic_utils.py +168 -0
  158. mito_ai/utils/create.py +94 -0
  159. mito_ai/utils/db.py +74 -0
  160. mito_ai/utils/error_classes.py +42 -0
  161. mito_ai/utils/gemini_utils.py +133 -0
  162. mito_ai/utils/message_history_utils.py +87 -0
  163. mito_ai/utils/mito_server_utils.py +242 -0
  164. mito_ai/utils/open_ai_utils.py +200 -0
  165. mito_ai/utils/provider_utils.py +49 -0
  166. mito_ai/utils/schema.py +86 -0
  167. mito_ai/utils/server_limits.py +152 -0
  168. mito_ai/utils/telemetry_utils.py +480 -0
  169. mito_ai/utils/utils.py +89 -0
  170. mito_ai/utils/version_utils.py +94 -0
  171. mito_ai/utils/websocket_base.py +88 -0
  172. mito_ai/version_check.py +60 -0
  173. mito_ai-0.1.50.data/data/etc/jupyter/jupyter_server_config.d/mito_ai.json +7 -0
  174. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/build_log.json +728 -0
  175. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/package.json +243 -0
  176. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/schemas/mito_ai/package.json.orig +238 -0
  177. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/schemas/mito_ai/toolbar-buttons.json +37 -0
  178. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/lib_index_js.8f1845da6bf2b128c049.js +21602 -0
  179. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/lib_index_js.8f1845da6bf2b128c049.js.map +1 -0
  180. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/node_modules_process_browser_js.4b128e94d31a81ebd209.js +198 -0
  181. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/node_modules_process_browser_js.4b128e94d31a81ebd209.js.map +1 -0
  182. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/remoteEntry.78d3ccb73e7ca1da3aae.js +619 -0
  183. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/remoteEntry.78d3ccb73e7ca1da3aae.js.map +1 -0
  184. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/style.js +4 -0
  185. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/style_index_js.5876024bb17dbd6a3ee6.js +712 -0
  186. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/style_index_js.5876024bb17dbd6a3ee6.js.map +1 -0
  187. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_auth_dist_esm_providers_cognito_apis_signOut_mjs-node_module-75790d.688c25857e7b81b1740f.js +533 -0
  188. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_auth_dist_esm_providers_cognito_apis_signOut_mjs-node_module-75790d.688c25857e7b81b1740f.js.map +1 -0
  189. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_auth_dist_esm_providers_cognito_tokenProvider_tokenProvider_-72f1c8.a917210f057fcfe224ad.js +6941 -0
  190. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_auth_dist_esm_providers_cognito_tokenProvider_tokenProvider_-72f1c8.a917210f057fcfe224ad.js.map +1 -0
  191. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_dist_esm_index_mjs.6bac1a8c4cc93f15f6b7.js +1021 -0
  192. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_dist_esm_index_mjs.6bac1a8c4cc93f15f6b7.js.map +1 -0
  193. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_ui-react_dist_esm_index_mjs.4fcecd65bef9e9847609.js +59698 -0
  194. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_ui-react_dist_esm_index_mjs.4fcecd65bef9e9847609.js.map +1 -0
  195. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_react-dom_client_js-node_modules_aws-amplify_ui-react_dist_styles_css.b43d4249e4d3dac9ad7b.js +7440 -0
  196. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_react-dom_client_js-node_modules_aws-amplify_ui-react_dist_styles_css.b43d4249e4d3dac9ad7b.js.map +1 -0
  197. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_semver_index_js.3f6754ac5116d47de76b.js +2792 -0
  198. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_semver_index_js.3f6754ac5116d47de76b.js.map +1 -0
  199. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_vscode-diff_dist_index_js.ea55f1f9346638aafbcf.js +4859 -0
  200. mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_vscode-diff_dist_index_js.ea55f1f9346638aafbcf.js.map +1 -0
  201. mito_ai-0.1.50.dist-info/METADATA +221 -0
  202. mito_ai-0.1.50.dist-info/RECORD +205 -0
  203. mito_ai-0.1.50.dist-info/WHEEL +4 -0
  204. mito_ai-0.1.50.dist-info/entry_points.txt +2 -0
  205. mito_ai-0.1.50.dist-info/licenses/LICENSE +3 -0
@@ -0,0 +1,92 @@
1
+ # Copyright (c) Saga Inc.
2
+ # Distributed under the terms of the GNU Affero General Public License v3.0 License.
3
+
4
+ from mito_ai.completions.prompt_builders.prompt_constants import (
5
+ CHAT_CODE_FORMATTING_RULES,
6
+ CITATION_RULES,
7
+ ACTIVE_CELL_ID_SECTION_HEADING,
8
+ CODE_SECTION_HEADING,
9
+ get_database_rules
10
+ )
11
+
12
+ def create_chat_system_message_prompt() -> str:
13
+ return f"""You are Mito Data Copilot, an AI assistant for Jupyter. You're a great python programmer, a seasoned data scientist and a subject matter expert.
14
+
15
+ The user is going to ask you for help writing code, debugging code, explaining code, or drawing conclusions from their data/graphs. It is your job to help them accomplish their goal.
16
+
17
+ The user will give you a set of variables, existing code, and a task to complete.
18
+
19
+ There are three possible types of responses you might give:
20
+ 1. Code Update: If the task requires modifying or extending the existing code, respond with the updated active code cell and a short explanation of the changes made.
21
+ 2. Explanation/Analysis: If the task does not require a code update, it might instead require you to provide an explanation of existing code or data, provide an analysis of the the data or chart.
22
+ 3. Friendly Response: If the user is just asking a question, saying hi, or you're just chatting, respond with a friendly response and do not return any code.
23
+
24
+ Other useful information:
25
+ 1. The user has two types of modes that they can collaborate with you in: Chat Mode (this mode) and agent mode. Chat mode gives the user more control over the edits made to the notebook and only edits the active cell. Agent mode gives you more autonomy over completing the user's task across mulitple messages. In agent mode, you can edit or create new cells, see the entire notebook, automatically run the code you write, and more.
26
+ 2. If the user asks you to generate a dashboard, app, or streamlit app for them, you should tell them that they must use Agent mode to complete the task. You are not able to automatically switch the user to agent mode, but they can switch to it themselves by using the Chat/Agent mode toggle in the bottom left corner of the Ai taskpane.
27
+
28
+ ====
29
+ {CITATION_RULES}
30
+
31
+ <Example 1>
32
+ {ACTIVE_CELL_ID_SECTION_HEADING}
33
+ '7b3a9e2c-5d14-4c83-b2f9-d67891e4a5f2'
34
+
35
+ {CODE_SECTION_HEADING}
36
+ ```python
37
+ sales_df = pd.read_csv('sales_data.csv')
38
+ monthly_revenue = sales_df.groupby('month')['revenue'].sum()
39
+ top_month = monthly_revenue.idxmax()
40
+ peak_revenue = monthly_revenue.max()
41
+ growth_rate = (monthly_revenue.iloc[-1] / monthly_revenue.iloc[0] - 1) * 100
42
+ ```
43
+
44
+ Your task: What are the key revenue insights from this sales data?
45
+
46
+ Output:
47
+ Peak monthly revenue reached $847,392 in March[MITO_CITATION:7b3a9e2c-5d14-4c83-b2f9-d67891e4a5f2:2-3], representing a 23.8% year-over-year growth rate[MITO_CITATION:7b3a9e2c-5d14-4c83-b2f9-d67891e4a5f2:4]. The revenue aggregation analysis[MITO_CITATION:7b3a9e2c-5d14-4c83-b2f9-d67891e4a5f2:1-2] reveals strong seasonal performance patterns.
48
+
49
+ </Example 1>
50
+
51
+ Notice in the example above:
52
+ - Citations support specific facts and numbers, not vague summaries
53
+ - Single line citations reference specific calculations (e.g., :4 for growth rate)
54
+ - Multiline citations reference broader analysis blocks (e.g., :1-2 for the groupby operation)
55
+ - Language is information-dense with concrete metrics
56
+ - All line numbers are 0-indexed
57
+
58
+ <Example 2>
59
+
60
+ {ACTIVE_CELL_ID_SECTION_HEADING}
61
+ '1a2b3c4d-5e6f-7g8h-9i0j-k1l2m3n4o5p6'
62
+
63
+ {CODE_SECTION_HEADING}
64
+ ```python
65
+ ```
66
+
67
+ Your task: Hello
68
+
69
+ Output:
70
+ Hey there! I'm Mito AI. How can I help you today?
71
+
72
+ </Example 2>
73
+
74
+ Notice in the example above that the user is just sending a friendly message, so we respond with a friendly message and do not return any code.
75
+
76
+ ===
77
+ {get_database_rules()}
78
+
79
+ ====
80
+ {CHAT_CODE_FORMATTING_RULES}
81
+
82
+ IMPORTANT RULES:
83
+ - Do not recreate variables that already exist
84
+ - Keep as much of the original code as possible
85
+ - When updating an existing code cell, return the full code cell with the update applied. Do not only return part of the code cell with a comment like "# Updated code starts here", etc.
86
+ - Only update code in the active cell. Do not update other code in the notebook.
87
+ - Write code that preserves the intent of the original code shared with you and the task to complete.
88
+ - Make the solution as simple as possible.
89
+ - Reuse as much of the existing code as possible.
90
+ - Do not add temporary comments like '# Fixed the typo here' or '# Added this line to fix the error'
91
+ - Whenever writing Python code, it should be a python code block starting with ```python and ending with ```
92
+ """
@@ -0,0 +1,32 @@
1
+ # Copyright (c) Saga Inc.
2
+ # Distributed under the terms of the GNU Affero General Public License v3.0 License.
3
+
4
+ from mito_ai.completions.prompt_builders.prompt_constants import CODE_SECTION_HEADING
5
+
6
+ def create_explain_code_prompt(active_cell_code: str) -> str:
7
+ prompt = f"""Explain the code in the active code cell to me like I have a basic understanding of Python. Don't explain each line, but instead explain the overall logic of the code.
8
+
9
+ <Example>
10
+
11
+ {CODE_SECTION_HEADING}
12
+
13
+ ```python
14
+ def multiply(x, y):
15
+ return x * y
16
+ ```
17
+
18
+ Output:
19
+
20
+ This code creates a function called `multiply` that takes two arguments `x` and `y`, and returns the product of `x` and `y`.
21
+
22
+ </Example>
23
+
24
+ {CODE_SECTION_HEADING}
25
+
26
+ ```python
27
+ {active_cell_code}
28
+ ```
29
+
30
+ Output:
31
+ """
32
+ return prompt
@@ -0,0 +1,197 @@
1
+ # Copyright (c) Saga Inc.
2
+ # Distributed under the terms of the GNU Affero General Public License v3.0 License.
3
+
4
+ from typing import List
5
+ from mito_ai.completions.prompt_builders.prompt_constants import (
6
+ FILES_SECTION_HEADING,
7
+ VARIABLES_SECTION_HEADING,
8
+ CODE_SECTION_HEADING
9
+ )
10
+
11
+
12
+ def create_inline_prompt(
13
+ prefix: str,
14
+ suffix: str,
15
+ variables: List[str],
16
+ files: List[str]
17
+ ) -> str:
18
+ variables_str = '\n'.join([f"{variable}" for variable in variables])
19
+ files_str = '\n'.join([f"file_name: {file}" for file in files])
20
+
21
+ prompt = f"""You are a coding assistant that lives inside of JupyterLab. Your job is to help the user write code.
22
+
23
+ You're given the current code cell, the user's cursor position, and the variables defined in the notebook. The user's cursor is signified by the symbol <cursor>.
24
+
25
+ CRITICAL FORMATTING RULES:
26
+ 1. Include a new line character at the start of your response if you want the code you are writing to be added on the line after the cursor. For example, if the cursor is at the end of a comment, you should start your response with a newline character so that the code you write is not added to the comment.
27
+ 2. If you are finishing a line of code that the user started, return the full line of code with no newline character at the start or end.
28
+ 3. Your response must preserve correct Python indentation and spacing. For example, if you're completing a line of indented code, you must preserve the indentation.
29
+
30
+ Your job is to complete the code that matches the user's intent. Write the minimal code to achieve the user's intent. Don't expand upon the user's intent.
31
+
32
+ <Example 1>
33
+ {FILES_SECTION_HEADING}
34
+ file_name: sales.csv
35
+
36
+ {VARIABLES_SECTION_HEADING}
37
+ {{
38
+ 'loan_multiplier': 1.5,
39
+ 'sales_df': pd.DataFrame({{
40
+ 'transaction_date': ['2024-01-02', '2024-01-02', '2024-01-02', '2024-01-02', '2024-01-03'],
41
+ 'price_per_unit': [10, 9.99, 13.99, 21.00, 100],
42
+ 'units_sold': [1, 2, 1, 4, 5],
43
+ 'total_price': [10, 19.98, 13.99, 84.00, 500]
44
+ }})
45
+ }}
46
+
47
+ {CODE_SECTION_HEADING}
48
+ ```python
49
+ import pandas as pd
50
+ sales_df = pd.read_csv('./sales.csv')
51
+
52
+ # Multiply the total_price column by the loan_multiplier<cursor>
53
+ ```
54
+
55
+ Output:
56
+ ```python
57
+
58
+ sales_df['total_price'] = sales_df['total_price'] * loan_multiplier
59
+ ```
60
+ </Example 1>
61
+
62
+ IMPORTANT: Notice in Example 1 that the output starts with a newline because the cursor was at the end of a comment. This newline is REQUIRED to maintain proper Python formatting.
63
+
64
+ <Example 2>
65
+ {FILES_SECTION_HEADING}
66
+
67
+
68
+ {VARIABLES_SECTION_HEADING}
69
+ {{
70
+ df: pd.DataFrame({{
71
+ 'age': [20, 25, 22, 23, 29],
72
+ 'name': ['Nawaz', 'Aaron', 'Charlie', 'Tamir', 'Eve'],
73
+ }})
74
+ }}
75
+
76
+ {CODE_SECTION_HEADING}
77
+ ```python
78
+ df['age'] = df[<cursor>['age'] > 23]
79
+ ```
80
+
81
+ Output:
82
+ ```python
83
+ df['age'] = df[df['age'] > 23]
84
+ ```
85
+ </Example 2>
86
+
87
+ IMPORTANT: Notice in Example 2 that the output does NOT start with a newline because the cursor is in the middle of existing code.
88
+
89
+ <Example 3>
90
+ {FILES_SECTION_HEADING}
91
+ file_name: voters.csv
92
+
93
+ {VARIABLES_SECTION_HEADING}
94
+ {{}}
95
+
96
+ {CODE_SECTION_HEADING}
97
+ ```python
98
+ voters = pd.read_csv('./voters.csv')
99
+
100
+ # Create a variable for pennsylvania voters, ohio voters, california voters, and texas voters
101
+ pa_voters = voters[voters['state'] == 'PA']
102
+ ohio_voters<cursor>
103
+ ```
104
+
105
+ Output:
106
+ ```python
107
+ ohio_voters = voters[voters['state'] == 'OH']
108
+ ca_voters = voters[voters['state'] == 'CA']
109
+ tx_voters = voters[voters['state'] == 'TX']
110
+ ```
111
+ </Example 3>
112
+
113
+ IMPORTANT: Notice in Example 3 that output does not start with a newline character because it wasnts to continue the line of code that the user started. Also notice the output contains three lines of code because that is the minimal code to achieve the user's intent.
114
+
115
+ <Example 4>
116
+ {FILES_SECTION_HEADING}
117
+ file_name: july_2025.xlsx
118
+ file_name: august_2025.xlsx
119
+
120
+ {VARIABLES_SECTION_HEADING}
121
+ {{}}
122
+
123
+ {CODE_SECTION_HEADING}
124
+ ```python
125
+ # Display the first 5 rows of the dataframe
126
+ df.head()
127
+ <cursor>
128
+ ```
129
+
130
+ Output:
131
+ ```python
132
+ ```
133
+ </Example 4>
134
+
135
+ IMPORTANT: Notice in Example 4 that the output is empty becuase the user's intent is already complete.
136
+
137
+ <Example 5>
138
+ {FILES_SECTION_HEADING}
139
+
140
+
141
+ {VARIABLES_SECTION_HEADING}
142
+ {{}}
143
+
144
+ {CODE_SECTION_HEADING}
145
+ ```python
146
+ def even_and_odd():
147
+ for i in range(10):
148
+ if i % 2 == 0:
149
+ print(f"Even: {{i}}")
150
+ else:
151
+ pri<cursor>
152
+ ```
153
+
154
+ Output:
155
+ ```python
156
+ print(f"Odd: {{i}}")
157
+ ```
158
+ </Example 5>
159
+
160
+ IMPORTANT: Notice in Example 5 that the output is indented several times because the code must be executed as part of the else block.
161
+
162
+ <Example 6>
163
+ {FILES_SECTION_HEADING}
164
+
165
+
166
+ {VARIABLES_SECTION_HEADING}
167
+ {{}}
168
+
169
+ {CODE_SECTION_HEADING}
170
+ ```python
171
+ days_in_week <cursor>
172
+ ```
173
+
174
+ Output:
175
+ ```python
176
+ days_in_week = 7
177
+ ```
178
+ </Example 6>
179
+
180
+ IMPORTANT: Notice in Example 6 that inorder to finish the variable declaration, the output continues the existing line of code and does not start with a new line character.
181
+
182
+ Your Task:
183
+
184
+ {FILES_SECTION_HEADING}
185
+ {files_str}
186
+
187
+ {VARIABLES_SECTION_HEADING}
188
+ {variables_str}
189
+
190
+ {CODE_SECTION_HEADING}
191
+ ```python
192
+ {prefix}<cursor>{suffix}
193
+ ```
194
+
195
+ Output:
196
+ """
197
+ return prompt
@@ -0,0 +1,170 @@
1
+ # Copyright (c) Saga Inc.
2
+ # Distributed under the terms of the GNU Affero General Public License v3.0 License.
3
+
4
+ """
5
+ This module contains constants used in prompts across the codebase.
6
+ These constants ensure consistency between prompt building and message trimming.
7
+ """
8
+
9
+ import os
10
+ import json
11
+ from typing import Final
12
+ from mito_ai.utils.schema import MITO_FOLDER
13
+
14
+ # Section headings used in prompts
15
+ FILES_SECTION_HEADING = "Files in the current directory:"
16
+ VARIABLES_SECTION_HEADING = "Defined Variables:"
17
+ CODE_SECTION_HEADING = "Code in the active code cell:"
18
+ ACTIVE_CELL_ID_SECTION_HEADING = "The ID of the active code cell:"
19
+ ACTIVE_CELL_OUTPUT_SECTION_HEADING = "Output of the active code cell:"
20
+ GET_CELL_OUTPUT_TOOL_RESPONSE_SECTION_HEADING = "Output of the code cell you just applied the CELL_UPDATE to:"
21
+ JUPYTER_NOTEBOOK_SECTION_HEADING = "Jupyter Notebook:"
22
+ STREAMLIT_APP_STATUS_SECTION_HEADING = "Streamlit App Status:"
23
+
24
+ # Placeholder text used when trimming content from messages
25
+ CONTENT_REMOVED_PLACEHOLDER = "Content removed to save space"
26
+
27
+ CITATION_RULES = """RULES FOR CITING YOUR WORK
28
+
29
+ It is important that the user is able to verify any insights that you share with them about their data. To make this easy for the user, you must cite the lines of code that you are drawing the insight from. To provide a citation, use one of the following formats inline in your response:
30
+
31
+ Single line citation:
32
+ [MITO_CITATION:cell_id:line_number]
33
+
34
+ Multiline citation (for citing a range of lines):
35
+ [MITO_CITATION:cell_id:first_line-last_line]
36
+
37
+ Citation Rules:
38
+
39
+ 1. Every fact or statement derived from the user's notebook must include a citation.
40
+ 2. When choosing the citation, select the code that will most help the user validate the fact or statement that you shared with them.
41
+ 3. Place the citation immediately after the statement it supports. Do not explain the citation with phrases like "See", "Derived from", etc. Just provide the citation object.
42
+ 4. For the "line_number" field, use the line number within the cell that is most relevant to the citation. Important: The cell line number should be 0-indexed and should not skip comments.
43
+ 5. For multiline citations, use the "first_line-last_line" format when the insight spans multiple lines of code. Both line numbers should be 0-indexed.
44
+ 6. If you cannot find relevant information in the notebook to answer a question, clearly state this and do not provide a citation.
45
+ 7. You ONLY need to provide a citation when sharing an insight from the data in the message part of the response. If all you are doing is writing/updating code, then there is no need to provide a citation.
46
+ 8. Do not include the citation in the code block as a comment. ONLY include the citation in the message field of your response.
47
+ """
48
+
49
+ def get_active_cell_output_str(has_active_cell_output: bool) -> str:
50
+ """
51
+ Used to tell the AI about the output of the active code cell.
52
+ We use this in the chat prompt.
53
+ """
54
+ if has_active_cell_output:
55
+ return f"{ACTIVE_CELL_OUTPUT_SECTION_HEADING}\nAttatched is an image of the output of the active code cell for your context."
56
+ else:
57
+ return ""
58
+
59
+ def cell_update_output_str(has_cell_update_output: bool) -> str:
60
+ """
61
+ Used to respond to the GET_CELL_OUTPUT tool, telling the agent the output of the cell it requested
62
+ """
63
+ if has_cell_update_output:
64
+ return f"{GET_CELL_OUTPUT_TOOL_RESPONSE_SECTION_HEADING}\nAttatched is an image of code cell output that you requested."
65
+ else:
66
+ return ""
67
+
68
+ def redact_sensitive_info(connections: dict) -> dict:
69
+ """
70
+ Redacts sensitive information from connections data.
71
+ Returns a copy of the connections dict with sensitive fields masked.
72
+ """
73
+ redacted = {}
74
+ for conn_name, conn_data in connections.items():
75
+ redacted[conn_name] = conn_data.copy()
76
+ for key, value in redacted[conn_name].items():
77
+ redacted[conn_name][key] = 'redacted'
78
+ return redacted
79
+
80
+ def get_database_rules() -> str:
81
+ """
82
+ Reads the user's database configurations,
83
+ and returns the rules for the AI to follow.
84
+ """
85
+
86
+ # Get the db configuration from the user's mito folder
87
+
88
+ APP_DIR_PATH: Final[str] = os.path.join(MITO_FOLDER)
89
+ connections_path: Final[str] = os.path.join(APP_DIR_PATH, 'db', 'connections.json')
90
+ schemas_path: Final[str] = os.path.join(APP_DIR_PATH, 'db', 'schemas.json')
91
+
92
+ try:
93
+ with open(connections_path, 'r') as f:
94
+ connections = json.load(f)
95
+ sanitized_connections = redact_sensitive_info(connections)
96
+ except FileNotFoundError:
97
+ connections = None
98
+ sanitized_connections = None
99
+
100
+ try:
101
+ with open(schemas_path, 'r') as f:
102
+ schemas = json.load(f)
103
+ except FileNotFoundError:
104
+ schemas = None
105
+
106
+ # If there is a db configuration, add return the rules
107
+
108
+ if connections is not None:
109
+ DATABASE_RULES = f"""DATABASE RULES:
110
+ If the user has requested data that you believe is stored in the database:
111
+ - Use the provided schema.
112
+ - Only use SQLAlchemy to query the database.
113
+ - Do not use a with statement when creating the SQLAlchemy engine. Instead, initialize it once so it can be reused for multiple queries.
114
+ - Always return the results of the query in a pandas DataFrame, unless instructed otherwise.
115
+ - Every schema has a unique connection ID. This ID can be used to find the connection details in the connections.json file.
116
+ - Do not use the connection ID to query the database. It is only for matching the schema to the correct connection.
117
+ - When using the connection ID, do not include any comments about it in your code.
118
+ - Connection details are stored in a JSON file located at: `{connections_path}`
119
+ - Here is the sanitized contents of the connections.json file:
120
+
121
+ {sanitized_connections}
122
+
123
+ - Do not hard-code connection credentials into your code. Instead, load the connections.json file and access connection fields dynamically like so:
124
+
125
+ ```
126
+ connections[connection_name]["username"]
127
+ ```
128
+
129
+ - The user may colloquially ask for a "list of x", always assume they want a pandas DataFrame.
130
+ - When working with dataframes created from an SQL query, ALWAYS use lowercase column names.
131
+ - If you think the requested data is stored in the database, but you are unsure, then ask the user for clarification.
132
+
133
+ ## Additional MSSQL Rules
134
+
135
+ - When connecting to a Microsoft SQL Server (MSSQL) database, use the following format:
136
+
137
+ ```
138
+ import urllib.parse
139
+
140
+ encoded_password = urllib.parse.quote_plus(password)
141
+ conn_str = f"mssql+pyodbc://username:encoded_password@host:port/database?driver=ODBC+Driver+18+for+SQL+Server&TrustServerCertificate=yes"
142
+ ```
143
+
144
+ - Always URL-encode passwords for MSSQL connections to handle special characters properly.
145
+ - Include the port number in MSSQL connection strings.
146
+ - Use "ODBC+Driver+18+for+SQL+Server" (with plus signs) in the driver parameter.
147
+ - Always include "TrustServerCertificate=yes" for MSSQL connections to avoid SSL certificate issues.
148
+
149
+ ## Additional Oracle Rules
150
+
151
+ - When connecting to an Oracle database, use the following format:
152
+ ```
153
+ conn_str = f"oracle+oracledb://username:password@host:port?service_name=service_name"
154
+ ```
155
+
156
+ Here is the schema:
157
+ {schemas}
158
+ """
159
+ else:
160
+ DATABASE_RULES = ""
161
+
162
+ return DATABASE_RULES
163
+
164
+
165
+ CHAT_CODE_FORMATTING_RULES = """CRITICAL CODE UPDATE RULES:
166
+ - COMPLETE REPLACEMENT: Your code will COMPLETELY REPLACE the entire contents of the active code cell.
167
+ - INCLUDE ALL CODE: You MUST return the COMPLETE, FULL contents of the entire code cell - including ALL existing code that should remain plus your modifications.
168
+ - NEVER PARTIAL CODE: NEVER return only a portion, snippet, or subset of the code cell. Partial responses will break the user's notebook by deleting important code.
169
+ - PRESERVE EXISTING CODE: Always preserve imports, variable definitions, and other code that the user needs, even if you're only modifying one small part.
170
+ """
@@ -0,0 +1,199 @@
1
+ # Copyright (c) Saga Inc.
2
+ # Distributed under the terms of the GNU Affero General Public License v3.0 License.
3
+
4
+ from typing import List
5
+ from mito_ai.completions.prompt_builders.prompt_constants import (
6
+ ACTIVE_CELL_ID_SECTION_HEADING,
7
+ FILES_SECTION_HEADING,
8
+ VARIABLES_SECTION_HEADING,
9
+ CODE_SECTION_HEADING
10
+ )
11
+
12
+
13
+ def create_error_prompt(
14
+ error_message: str,
15
+ active_cell_code: str,
16
+ active_cell_id: str,
17
+ variables: List[str],
18
+ files: List[str]
19
+ ) -> str:
20
+ variables_str = '\n'.join([f"{variable}" for variable in variables])
21
+ files_str = '\n'.join([f"{file}" for file in files])
22
+ return f"""Help me debug this code in JupyterLab. Analyze the error and provide a solution that maintains the original intent.
23
+
24
+ <Example 1>
25
+ {FILES_SECTION_HEADING}
26
+ file_name: sales.csv
27
+
28
+ {VARIABLES_SECTION_HEADING}
29
+ {{
30
+ 'revenue_multiplier': 1.5,
31
+ 'sales_df': pd.DataFrame({{
32
+ 'transaction_date': ['2024-01-02', '2024-01-02', '2024-01-02', '2024-01-02', '2024-01-03'],
33
+ 'price_per_unit': [10, 9.99, 13.99, 21.00, 100],
34
+ 'units_sold': [1, 2, 1, 4, 5],
35
+ 'total_price': [10, 19.98, 13.99, 84.00, 500]
36
+ }})
37
+ }}
38
+
39
+ {ACTIVE_CELL_ID_SECTION_HEADING}
40
+ '9e38c62b-38f8-457d-bb8d-28bfc52edf2c'
41
+
42
+ {CODE_SECTION_HEADING}
43
+ ```python
44
+ import pandas as pd
45
+ sales_df = pd.read_csv('./sales.csv')
46
+ revenue_multiplier = 1.5
47
+ sales_df['total_revenue'] = sales_df['price'] * revenue_multiplier
48
+ ```
49
+
50
+ Error Traceback:
51
+ Cell In[24], line 4
52
+ 1 import pandas as pd
53
+ 2 sales_df = pd.read_csv('./sales.csv')
54
+ 3 revenue_multiplier = 1.5
55
+ ----> 4 sales_df['total_revenue'] = sales_df['price'] * revenue_multiplier
56
+
57
+ KeyError: 'price'
58
+
59
+
60
+ ERROR ANALYSIS:
61
+ Runtime error: Attempted to access non-existent DataFrame column
62
+
63
+ INTENT ANALYSIS:
64
+ User is trying to calculate total revenue by applying a multiplier to transaction prices. Based on the defined variables, the column that the user is tring to access is likely `total_price` because that would allow them to calculate the total revenue for each transaction.
65
+
66
+ SOLUTION:
67
+ ```python
68
+ import pandas as pd
69
+ sales_df = pd.read_csv('./sales.csv')
70
+ revenue_multiplier = 1.5
71
+ sales_df['total_revenue'] = sales_df['total_price'] * revenue_multiplier
72
+ ```
73
+
74
+ The DataFrame contains 'total_price' rather than 'price'. Updated column reference to match existing data structure.
75
+ </Example 1>
76
+
77
+ <Example 2>
78
+ {FILES_SECTION_HEADING}
79
+
80
+
81
+ {VARIABLES_SECTION_HEADING}
82
+ {{
83
+ 'df': pd.DataFrame({{
84
+ 'order_id': [1, 2, 3, 4],
85
+ 'date': ['Mar 7, 2025', 'Sep 24, 2024', '25 June, 2024', 'June 29, 2024'],
86
+ 'amount': [100, 150, 299, 99]
87
+ }})
88
+ }}
89
+
90
+ {ACTIVE_CELL_ID_SECTION_HEADING}
91
+ 'c68fdf19-db8c-46dd-926f-d90ad35bb3bc'
92
+
93
+ {CODE_SECTION_HEADING}
94
+ ```python
95
+ df['date'] = pd.to_datetime(df['date'])
96
+ ```
97
+
98
+ Error Traceback:
99
+ Cell In[27], line 1
100
+ ----> 1 df['date'] = pd.to_datetime(df['date'])
101
+
102
+ ValueError: time data "25 June, 2024" doesn't match format "%b %d, %Y", at position 2. You might want to try:
103
+ - passing `format` if your strings have a consistent format;
104
+ - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
105
+ - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.
106
+
107
+ ERROR ANALYSIS:
108
+ This is a ValueError caused by applying the wrong format to a specific date string. Because it was triggered at position 2, the first date string must have successfully converted. By looking at the defined variables, I can see that first date string is in the format "Mar 7, 2025", but the third date string is in the format "25 June, 2024". Those dates are not in the same format, so the conversion failed.
109
+
110
+ INTENT ANALYSIS:
111
+ User is trying to convert the date column to a datetime object even though the dates are not in the same starting format.
112
+
113
+ SOLUTION:
114
+ ```python
115
+ def parse_date(date_str):
116
+ formats = ['%b %d, %Y', '%d %B, %Y']
117
+
118
+ for fmt in formats:
119
+ try:
120
+ return pd.to_datetime(date_str, format=fmt)
121
+ except ValueError:
122
+ # Try the next format
123
+ continue
124
+
125
+ # If no format worked, return Not a Time
126
+ return pd.NaT
127
+
128
+ df['date'] = df['date'].apply(lambda x: parse_date(x))
129
+ ```
130
+
131
+ Since the dates are not in a consistent format, we need to first figure out which format to use for each date string and then use that format to convert the date.
132
+
133
+ The best way to do this is with a function. We can call this function `parse_date`.
134
+ </Example 2>
135
+
136
+
137
+ Guidelines for Solutions:
138
+
139
+ Error Analysis:
140
+
141
+ - Identify error type (Syntax, Runtime, Logic).
142
+ - Use the defined variables and code in the active cell to understand the error.
143
+ - Consider kernel state and execution order
144
+
145
+ Intent Preservation:
146
+
147
+ - Try to understand the user's intent using the defined variables and code in the active cell.
148
+
149
+ Solution Requirements:
150
+
151
+ - Return the full code cell with the error fixed and a short explanation of the error.
152
+ - Only update code in the active cell. Do not update other code in the notebook.
153
+ - Propose a solution that fixes the error and does not change the user's intent.
154
+ - Make the solution as simple as possible.
155
+ - Reuse as much of the existing code as possible.
156
+ - Do not add temporary comments like '# Fixed the typo here' or '# Added this line to fix the error'
157
+ - The code in the SOLUTION section should be a python code block starting with ```python and ending with ```
158
+ - If you encounter a ModuleNotFoundError, you can install the package by adding the the following line to the top of the code cell: `!pip install <package_name> --quiet`.
159
+
160
+ Here is your task.
161
+
162
+ {FILES_SECTION_HEADING}
163
+ {files_str}
164
+
165
+ {VARIABLES_SECTION_HEADING}
166
+ {variables_str}
167
+
168
+ {ACTIVE_CELL_ID_SECTION_HEADING}
169
+ {active_cell_id}
170
+
171
+ {CODE_SECTION_HEADING}
172
+ ```python
173
+ {active_cell_code}
174
+ ```
175
+
176
+ Error Traceback:
177
+ {error_message}
178
+
179
+ ERROR ANALYSIS:
180
+
181
+ INTENT ANALYSIS:
182
+
183
+ SOLUTION:
184
+ """
185
+
186
+
187
+ def remove_inner_thoughts_from_message(message: str) -> str:
188
+ # The smart debug prompt thinks to itself before returning the solution. We don't need to save the inner thoughts.
189
+ # We remove them before saving the message in the chat history
190
+ if message == "":
191
+ return message
192
+
193
+ SOLUTION_STRING = "SOLUTION:"
194
+
195
+ if SOLUTION_STRING in message:
196
+ message = message.split(SOLUTION_STRING)[1].strip()
197
+
198
+ return message
199
+