mito-ai 0.1.50__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mito_ai/__init__.py +114 -0
- mito_ai/_version.py +4 -0
- mito_ai/anthropic_client.py +334 -0
- mito_ai/app_deploy/__init__.py +6 -0
- mito_ai/app_deploy/app_deploy_utils.py +44 -0
- mito_ai/app_deploy/handlers.py +345 -0
- mito_ai/app_deploy/models.py +98 -0
- mito_ai/app_manager/__init__.py +4 -0
- mito_ai/app_manager/handlers.py +167 -0
- mito_ai/app_manager/models.py +71 -0
- mito_ai/app_manager/utils.py +24 -0
- mito_ai/auth/README.md +18 -0
- mito_ai/auth/__init__.py +6 -0
- mito_ai/auth/handlers.py +96 -0
- mito_ai/auth/urls.py +13 -0
- mito_ai/chat_history/handlers.py +63 -0
- mito_ai/chat_history/urls.py +32 -0
- mito_ai/completions/completion_handlers/__init__.py +3 -0
- mito_ai/completions/completion_handlers/agent_auto_error_fixup_handler.py +59 -0
- mito_ai/completions/completion_handlers/agent_execution_handler.py +66 -0
- mito_ai/completions/completion_handlers/chat_completion_handler.py +141 -0
- mito_ai/completions/completion_handlers/code_explain_handler.py +113 -0
- mito_ai/completions/completion_handlers/completion_handler.py +42 -0
- mito_ai/completions/completion_handlers/inline_completer_handler.py +48 -0
- mito_ai/completions/completion_handlers/smart_debug_handler.py +160 -0
- mito_ai/completions/completion_handlers/utils.py +147 -0
- mito_ai/completions/handlers.py +415 -0
- mito_ai/completions/message_history.py +401 -0
- mito_ai/completions/models.py +404 -0
- mito_ai/completions/prompt_builders/__init__.py +3 -0
- mito_ai/completions/prompt_builders/agent_execution_prompt.py +57 -0
- mito_ai/completions/prompt_builders/agent_smart_debug_prompt.py +160 -0
- mito_ai/completions/prompt_builders/agent_system_message.py +472 -0
- mito_ai/completions/prompt_builders/chat_name_prompt.py +15 -0
- mito_ai/completions/prompt_builders/chat_prompt.py +116 -0
- mito_ai/completions/prompt_builders/chat_system_message.py +92 -0
- mito_ai/completions/prompt_builders/explain_code_prompt.py +32 -0
- mito_ai/completions/prompt_builders/inline_completer_prompt.py +197 -0
- mito_ai/completions/prompt_builders/prompt_constants.py +170 -0
- mito_ai/completions/prompt_builders/smart_debug_prompt.py +199 -0
- mito_ai/completions/prompt_builders/utils.py +84 -0
- mito_ai/completions/providers.py +284 -0
- mito_ai/constants.py +63 -0
- mito_ai/db/__init__.py +3 -0
- mito_ai/db/crawlers/__init__.py +6 -0
- mito_ai/db/crawlers/base_crawler.py +61 -0
- mito_ai/db/crawlers/constants.py +43 -0
- mito_ai/db/crawlers/snowflake.py +71 -0
- mito_ai/db/handlers.py +168 -0
- mito_ai/db/models.py +31 -0
- mito_ai/db/urls.py +34 -0
- mito_ai/db/utils.py +185 -0
- mito_ai/docker/mssql/compose.yml +37 -0
- mito_ai/docker/mssql/init/setup.sql +21 -0
- mito_ai/docker/mysql/compose.yml +18 -0
- mito_ai/docker/mysql/init/setup.sql +13 -0
- mito_ai/docker/oracle/compose.yml +17 -0
- mito_ai/docker/oracle/init/setup.sql +20 -0
- mito_ai/docker/postgres/compose.yml +17 -0
- mito_ai/docker/postgres/init/setup.sql +13 -0
- mito_ai/enterprise/__init__.py +3 -0
- mito_ai/enterprise/utils.py +15 -0
- mito_ai/file_uploads/__init__.py +3 -0
- mito_ai/file_uploads/handlers.py +248 -0
- mito_ai/file_uploads/urls.py +21 -0
- mito_ai/gemini_client.py +232 -0
- mito_ai/log/handlers.py +38 -0
- mito_ai/log/urls.py +21 -0
- mito_ai/logger.py +37 -0
- mito_ai/openai_client.py +382 -0
- mito_ai/path_utils.py +70 -0
- mito_ai/rules/handlers.py +44 -0
- mito_ai/rules/urls.py +22 -0
- mito_ai/rules/utils.py +56 -0
- mito_ai/settings/handlers.py +41 -0
- mito_ai/settings/urls.py +20 -0
- mito_ai/settings/utils.py +42 -0
- mito_ai/streamlit_conversion/agent_utils.py +37 -0
- mito_ai/streamlit_conversion/prompts/prompt_constants.py +172 -0
- mito_ai/streamlit_conversion/prompts/prompt_utils.py +10 -0
- mito_ai/streamlit_conversion/prompts/streamlit_app_creation_prompt.py +46 -0
- mito_ai/streamlit_conversion/prompts/streamlit_error_correction_prompt.py +28 -0
- mito_ai/streamlit_conversion/prompts/streamlit_finish_todo_prompt.py +45 -0
- mito_ai/streamlit_conversion/prompts/streamlit_system_prompt.py +56 -0
- mito_ai/streamlit_conversion/prompts/update_existing_app_prompt.py +50 -0
- mito_ai/streamlit_conversion/search_replace_utils.py +94 -0
- mito_ai/streamlit_conversion/streamlit_agent_handler.py +144 -0
- mito_ai/streamlit_conversion/streamlit_utils.py +85 -0
- mito_ai/streamlit_conversion/validate_streamlit_app.py +105 -0
- mito_ai/streamlit_preview/__init__.py +6 -0
- mito_ai/streamlit_preview/handlers.py +111 -0
- mito_ai/streamlit_preview/manager.py +152 -0
- mito_ai/streamlit_preview/urls.py +22 -0
- mito_ai/streamlit_preview/utils.py +29 -0
- mito_ai/tests/__init__.py +3 -0
- mito_ai/tests/chat_history/test_chat_history.py +211 -0
- mito_ai/tests/completions/completion_handlers_utils_test.py +190 -0
- mito_ai/tests/conftest.py +53 -0
- mito_ai/tests/create_agent_system_message_prompt_test.py +22 -0
- mito_ai/tests/data/prompt_lg.py +69 -0
- mito_ai/tests/data/prompt_sm.py +6 -0
- mito_ai/tests/data/prompt_xl.py +13 -0
- mito_ai/tests/data/stock_data.sqlite3 +0 -0
- mito_ai/tests/db/conftest.py +39 -0
- mito_ai/tests/db/connections_test.py +102 -0
- mito_ai/tests/db/mssql_test.py +29 -0
- mito_ai/tests/db/mysql_test.py +29 -0
- mito_ai/tests/db/oracle_test.py +29 -0
- mito_ai/tests/db/postgres_test.py +29 -0
- mito_ai/tests/db/schema_test.py +93 -0
- mito_ai/tests/db/sqlite_test.py +31 -0
- mito_ai/tests/db/test_db_constants.py +61 -0
- mito_ai/tests/deploy_app/test_app_deploy_utils.py +89 -0
- mito_ai/tests/file_uploads/__init__.py +2 -0
- mito_ai/tests/file_uploads/test_handlers.py +282 -0
- mito_ai/tests/message_history/test_generate_short_chat_name.py +120 -0
- mito_ai/tests/message_history/test_message_history_utils.py +469 -0
- mito_ai/tests/open_ai_utils_test.py +152 -0
- mito_ai/tests/performance_test.py +329 -0
- mito_ai/tests/providers/test_anthropic_client.py +447 -0
- mito_ai/tests/providers/test_azure.py +631 -0
- mito_ai/tests/providers/test_capabilities.py +120 -0
- mito_ai/tests/providers/test_gemini_client.py +195 -0
- mito_ai/tests/providers/test_mito_server_utils.py +448 -0
- mito_ai/tests/providers/test_model_resolution.py +130 -0
- mito_ai/tests/providers/test_openai_client.py +57 -0
- mito_ai/tests/providers/test_provider_completion_exception.py +66 -0
- mito_ai/tests/providers/test_provider_limits.py +42 -0
- mito_ai/tests/providers/test_providers.py +382 -0
- mito_ai/tests/providers/test_retry_logic.py +389 -0
- mito_ai/tests/providers/test_stream_mito_server_utils.py +140 -0
- mito_ai/tests/providers/utils.py +85 -0
- mito_ai/tests/rules/conftest.py +26 -0
- mito_ai/tests/rules/rules_test.py +117 -0
- mito_ai/tests/server_limits_test.py +406 -0
- mito_ai/tests/settings/conftest.py +26 -0
- mito_ai/tests/settings/settings_test.py +70 -0
- mito_ai/tests/settings/test_settings_constants.py +9 -0
- mito_ai/tests/streamlit_conversion/__init__.py +3 -0
- mito_ai/tests/streamlit_conversion/test_apply_search_replace.py +240 -0
- mito_ai/tests/streamlit_conversion/test_streamlit_agent_handler.py +246 -0
- mito_ai/tests/streamlit_conversion/test_streamlit_utils.py +193 -0
- mito_ai/tests/streamlit_conversion/test_validate_streamlit_app.py +112 -0
- mito_ai/tests/streamlit_preview/test_streamlit_preview_handler.py +118 -0
- mito_ai/tests/streamlit_preview/test_streamlit_preview_manager.py +292 -0
- mito_ai/tests/test_constants.py +47 -0
- mito_ai/tests/test_telemetry.py +12 -0
- mito_ai/tests/user/__init__.py +2 -0
- mito_ai/tests/user/test_user.py +120 -0
- mito_ai/tests/utils/__init__.py +3 -0
- mito_ai/tests/utils/test_anthropic_utils.py +162 -0
- mito_ai/tests/utils/test_gemini_utils.py +98 -0
- mito_ai/tests/version_check_test.py +169 -0
- mito_ai/user/handlers.py +45 -0
- mito_ai/user/urls.py +21 -0
- mito_ai/utils/__init__.py +3 -0
- mito_ai/utils/anthropic_utils.py +168 -0
- mito_ai/utils/create.py +94 -0
- mito_ai/utils/db.py +74 -0
- mito_ai/utils/error_classes.py +42 -0
- mito_ai/utils/gemini_utils.py +133 -0
- mito_ai/utils/message_history_utils.py +87 -0
- mito_ai/utils/mito_server_utils.py +242 -0
- mito_ai/utils/open_ai_utils.py +200 -0
- mito_ai/utils/provider_utils.py +49 -0
- mito_ai/utils/schema.py +86 -0
- mito_ai/utils/server_limits.py +152 -0
- mito_ai/utils/telemetry_utils.py +480 -0
- mito_ai/utils/utils.py +89 -0
- mito_ai/utils/version_utils.py +94 -0
- mito_ai/utils/websocket_base.py +88 -0
- mito_ai/version_check.py +60 -0
- mito_ai-0.1.50.data/data/etc/jupyter/jupyter_server_config.d/mito_ai.json +7 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/build_log.json +728 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/package.json +243 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/schemas/mito_ai/package.json.orig +238 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/schemas/mito_ai/toolbar-buttons.json +37 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/lib_index_js.8f1845da6bf2b128c049.js +21602 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/lib_index_js.8f1845da6bf2b128c049.js.map +1 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/node_modules_process_browser_js.4b128e94d31a81ebd209.js +198 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/node_modules_process_browser_js.4b128e94d31a81ebd209.js.map +1 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/remoteEntry.78d3ccb73e7ca1da3aae.js +619 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/remoteEntry.78d3ccb73e7ca1da3aae.js.map +1 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/style.js +4 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/style_index_js.5876024bb17dbd6a3ee6.js +712 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/style_index_js.5876024bb17dbd6a3ee6.js.map +1 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_auth_dist_esm_providers_cognito_apis_signOut_mjs-node_module-75790d.688c25857e7b81b1740f.js +533 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_auth_dist_esm_providers_cognito_apis_signOut_mjs-node_module-75790d.688c25857e7b81b1740f.js.map +1 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_auth_dist_esm_providers_cognito_tokenProvider_tokenProvider_-72f1c8.a917210f057fcfe224ad.js +6941 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_auth_dist_esm_providers_cognito_tokenProvider_tokenProvider_-72f1c8.a917210f057fcfe224ad.js.map +1 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_dist_esm_index_mjs.6bac1a8c4cc93f15f6b7.js +1021 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_dist_esm_index_mjs.6bac1a8c4cc93f15f6b7.js.map +1 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_ui-react_dist_esm_index_mjs.4fcecd65bef9e9847609.js +59698 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_ui-react_dist_esm_index_mjs.4fcecd65bef9e9847609.js.map +1 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_react-dom_client_js-node_modules_aws-amplify_ui-react_dist_styles_css.b43d4249e4d3dac9ad7b.js +7440 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_react-dom_client_js-node_modules_aws-amplify_ui-react_dist_styles_css.b43d4249e4d3dac9ad7b.js.map +1 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_semver_index_js.3f6754ac5116d47de76b.js +2792 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_semver_index_js.3f6754ac5116d47de76b.js.map +1 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_vscode-diff_dist_index_js.ea55f1f9346638aafbcf.js +4859 -0
- mito_ai-0.1.50.data/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_vscode-diff_dist_index_js.ea55f1f9346638aafbcf.js.map +1 -0
- mito_ai-0.1.50.dist-info/METADATA +221 -0
- mito_ai-0.1.50.dist-info/RECORD +205 -0
- mito_ai-0.1.50.dist-info/WHEEL +4 -0
- mito_ai-0.1.50.dist-info/entry_points.txt +2 -0
- mito_ai-0.1.50.dist-info/licenses/LICENSE +3 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# Copyright (c) Saga Inc.
|
|
2
|
+
# Distributed under the terms of the GNU Affero General Public License v3.0 License.
|
|
3
|
+
|
|
4
|
+
from mito_ai.completions.prompt_builders.prompt_constants import (
|
|
5
|
+
CHAT_CODE_FORMATTING_RULES,
|
|
6
|
+
CITATION_RULES,
|
|
7
|
+
ACTIVE_CELL_ID_SECTION_HEADING,
|
|
8
|
+
CODE_SECTION_HEADING,
|
|
9
|
+
get_database_rules
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
def create_chat_system_message_prompt() -> str:
|
|
13
|
+
return f"""You are Mito Data Copilot, an AI assistant for Jupyter. You're a great python programmer, a seasoned data scientist and a subject matter expert.
|
|
14
|
+
|
|
15
|
+
The user is going to ask you for help writing code, debugging code, explaining code, or drawing conclusions from their data/graphs. It is your job to help them accomplish their goal.
|
|
16
|
+
|
|
17
|
+
The user will give you a set of variables, existing code, and a task to complete.
|
|
18
|
+
|
|
19
|
+
There are three possible types of responses you might give:
|
|
20
|
+
1. Code Update: If the task requires modifying or extending the existing code, respond with the updated active code cell and a short explanation of the changes made.
|
|
21
|
+
2. Explanation/Analysis: If the task does not require a code update, it might instead require you to provide an explanation of existing code or data, provide an analysis of the the data or chart.
|
|
22
|
+
3. Friendly Response: If the user is just asking a question, saying hi, or you're just chatting, respond with a friendly response and do not return any code.
|
|
23
|
+
|
|
24
|
+
Other useful information:
|
|
25
|
+
1. The user has two types of modes that they can collaborate with you in: Chat Mode (this mode) and agent mode. Chat mode gives the user more control over the edits made to the notebook and only edits the active cell. Agent mode gives you more autonomy over completing the user's task across mulitple messages. In agent mode, you can edit or create new cells, see the entire notebook, automatically run the code you write, and more.
|
|
26
|
+
2. If the user asks you to generate a dashboard, app, or streamlit app for them, you should tell them that they must use Agent mode to complete the task. You are not able to automatically switch the user to agent mode, but they can switch to it themselves by using the Chat/Agent mode toggle in the bottom left corner of the Ai taskpane.
|
|
27
|
+
|
|
28
|
+
====
|
|
29
|
+
{CITATION_RULES}
|
|
30
|
+
|
|
31
|
+
<Example 1>
|
|
32
|
+
{ACTIVE_CELL_ID_SECTION_HEADING}
|
|
33
|
+
'7b3a9e2c-5d14-4c83-b2f9-d67891e4a5f2'
|
|
34
|
+
|
|
35
|
+
{CODE_SECTION_HEADING}
|
|
36
|
+
```python
|
|
37
|
+
sales_df = pd.read_csv('sales_data.csv')
|
|
38
|
+
monthly_revenue = sales_df.groupby('month')['revenue'].sum()
|
|
39
|
+
top_month = monthly_revenue.idxmax()
|
|
40
|
+
peak_revenue = monthly_revenue.max()
|
|
41
|
+
growth_rate = (monthly_revenue.iloc[-1] / monthly_revenue.iloc[0] - 1) * 100
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Your task: What are the key revenue insights from this sales data?
|
|
45
|
+
|
|
46
|
+
Output:
|
|
47
|
+
Peak monthly revenue reached $847,392 in March[MITO_CITATION:7b3a9e2c-5d14-4c83-b2f9-d67891e4a5f2:2-3], representing a 23.8% year-over-year growth rate[MITO_CITATION:7b3a9e2c-5d14-4c83-b2f9-d67891e4a5f2:4]. The revenue aggregation analysis[MITO_CITATION:7b3a9e2c-5d14-4c83-b2f9-d67891e4a5f2:1-2] reveals strong seasonal performance patterns.
|
|
48
|
+
|
|
49
|
+
</Example 1>
|
|
50
|
+
|
|
51
|
+
Notice in the example above:
|
|
52
|
+
- Citations support specific facts and numbers, not vague summaries
|
|
53
|
+
- Single line citations reference specific calculations (e.g., :4 for growth rate)
|
|
54
|
+
- Multiline citations reference broader analysis blocks (e.g., :1-2 for the groupby operation)
|
|
55
|
+
- Language is information-dense with concrete metrics
|
|
56
|
+
- All line numbers are 0-indexed
|
|
57
|
+
|
|
58
|
+
<Example 2>
|
|
59
|
+
|
|
60
|
+
{ACTIVE_CELL_ID_SECTION_HEADING}
|
|
61
|
+
'1a2b3c4d-5e6f-7g8h-9i0j-k1l2m3n4o5p6'
|
|
62
|
+
|
|
63
|
+
{CODE_SECTION_HEADING}
|
|
64
|
+
```python
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Your task: Hello
|
|
68
|
+
|
|
69
|
+
Output:
|
|
70
|
+
Hey there! I'm Mito AI. How can I help you today?
|
|
71
|
+
|
|
72
|
+
</Example 2>
|
|
73
|
+
|
|
74
|
+
Notice in the example above that the user is just sending a friendly message, so we respond with a friendly message and do not return any code.
|
|
75
|
+
|
|
76
|
+
===
|
|
77
|
+
{get_database_rules()}
|
|
78
|
+
|
|
79
|
+
====
|
|
80
|
+
{CHAT_CODE_FORMATTING_RULES}
|
|
81
|
+
|
|
82
|
+
IMPORTANT RULES:
|
|
83
|
+
- Do not recreate variables that already exist
|
|
84
|
+
- Keep as much of the original code as possible
|
|
85
|
+
- When updating an existing code cell, return the full code cell with the update applied. Do not only return part of the code cell with a comment like "# Updated code starts here", etc.
|
|
86
|
+
- Only update code in the active cell. Do not update other code in the notebook.
|
|
87
|
+
- Write code that preserves the intent of the original code shared with you and the task to complete.
|
|
88
|
+
- Make the solution as simple as possible.
|
|
89
|
+
- Reuse as much of the existing code as possible.
|
|
90
|
+
- Do not add temporary comments like '# Fixed the typo here' or '# Added this line to fix the error'
|
|
91
|
+
- Whenever writing Python code, it should be a python code block starting with ```python and ending with ```
|
|
92
|
+
"""
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Copyright (c) Saga Inc.
|
|
2
|
+
# Distributed under the terms of the GNU Affero General Public License v3.0 License.
|
|
3
|
+
|
|
4
|
+
from mito_ai.completions.prompt_builders.prompt_constants import CODE_SECTION_HEADING
|
|
5
|
+
|
|
6
|
+
def create_explain_code_prompt(active_cell_code: str) -> str:
|
|
7
|
+
prompt = f"""Explain the code in the active code cell to me like I have a basic understanding of Python. Don't explain each line, but instead explain the overall logic of the code.
|
|
8
|
+
|
|
9
|
+
<Example>
|
|
10
|
+
|
|
11
|
+
{CODE_SECTION_HEADING}
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
def multiply(x, y):
|
|
15
|
+
return x * y
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
Output:
|
|
19
|
+
|
|
20
|
+
This code creates a function called `multiply` that takes two arguments `x` and `y`, and returns the product of `x` and `y`.
|
|
21
|
+
|
|
22
|
+
</Example>
|
|
23
|
+
|
|
24
|
+
{CODE_SECTION_HEADING}
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
{active_cell_code}
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Output:
|
|
31
|
+
"""
|
|
32
|
+
return prompt
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
# Copyright (c) Saga Inc.
|
|
2
|
+
# Distributed under the terms of the GNU Affero General Public License v3.0 License.
|
|
3
|
+
|
|
4
|
+
from typing import List
|
|
5
|
+
from mito_ai.completions.prompt_builders.prompt_constants import (
|
|
6
|
+
FILES_SECTION_HEADING,
|
|
7
|
+
VARIABLES_SECTION_HEADING,
|
|
8
|
+
CODE_SECTION_HEADING
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def create_inline_prompt(
|
|
13
|
+
prefix: str,
|
|
14
|
+
suffix: str,
|
|
15
|
+
variables: List[str],
|
|
16
|
+
files: List[str]
|
|
17
|
+
) -> str:
|
|
18
|
+
variables_str = '\n'.join([f"{variable}" for variable in variables])
|
|
19
|
+
files_str = '\n'.join([f"file_name: {file}" for file in files])
|
|
20
|
+
|
|
21
|
+
prompt = f"""You are a coding assistant that lives inside of JupyterLab. Your job is to help the user write code.
|
|
22
|
+
|
|
23
|
+
You're given the current code cell, the user's cursor position, and the variables defined in the notebook. The user's cursor is signified by the symbol <cursor>.
|
|
24
|
+
|
|
25
|
+
CRITICAL FORMATTING RULES:
|
|
26
|
+
1. Include a new line character at the start of your response if you want the code you are writing to be added on the line after the cursor. For example, if the cursor is at the end of a comment, you should start your response with a newline character so that the code you write is not added to the comment.
|
|
27
|
+
2. If you are finishing a line of code that the user started, return the full line of code with no newline character at the start or end.
|
|
28
|
+
3. Your response must preserve correct Python indentation and spacing. For example, if you're completing a line of indented code, you must preserve the indentation.
|
|
29
|
+
|
|
30
|
+
Your job is to complete the code that matches the user's intent. Write the minimal code to achieve the user's intent. Don't expand upon the user's intent.
|
|
31
|
+
|
|
32
|
+
<Example 1>
|
|
33
|
+
{FILES_SECTION_HEADING}
|
|
34
|
+
file_name: sales.csv
|
|
35
|
+
|
|
36
|
+
{VARIABLES_SECTION_HEADING}
|
|
37
|
+
{{
|
|
38
|
+
'loan_multiplier': 1.5,
|
|
39
|
+
'sales_df': pd.DataFrame({{
|
|
40
|
+
'transaction_date': ['2024-01-02', '2024-01-02', '2024-01-02', '2024-01-02', '2024-01-03'],
|
|
41
|
+
'price_per_unit': [10, 9.99, 13.99, 21.00, 100],
|
|
42
|
+
'units_sold': [1, 2, 1, 4, 5],
|
|
43
|
+
'total_price': [10, 19.98, 13.99, 84.00, 500]
|
|
44
|
+
}})
|
|
45
|
+
}}
|
|
46
|
+
|
|
47
|
+
{CODE_SECTION_HEADING}
|
|
48
|
+
```python
|
|
49
|
+
import pandas as pd
|
|
50
|
+
sales_df = pd.read_csv('./sales.csv')
|
|
51
|
+
|
|
52
|
+
# Multiply the total_price column by the loan_multiplier<cursor>
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Output:
|
|
56
|
+
```python
|
|
57
|
+
|
|
58
|
+
sales_df['total_price'] = sales_df['total_price'] * loan_multiplier
|
|
59
|
+
```
|
|
60
|
+
</Example 1>
|
|
61
|
+
|
|
62
|
+
IMPORTANT: Notice in Example 1 that the output starts with a newline because the cursor was at the end of a comment. This newline is REQUIRED to maintain proper Python formatting.
|
|
63
|
+
|
|
64
|
+
<Example 2>
|
|
65
|
+
{FILES_SECTION_HEADING}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
{VARIABLES_SECTION_HEADING}
|
|
69
|
+
{{
|
|
70
|
+
df: pd.DataFrame({{
|
|
71
|
+
'age': [20, 25, 22, 23, 29],
|
|
72
|
+
'name': ['Nawaz', 'Aaron', 'Charlie', 'Tamir', 'Eve'],
|
|
73
|
+
}})
|
|
74
|
+
}}
|
|
75
|
+
|
|
76
|
+
{CODE_SECTION_HEADING}
|
|
77
|
+
```python
|
|
78
|
+
df['age'] = df[<cursor>['age'] > 23]
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Output:
|
|
82
|
+
```python
|
|
83
|
+
df['age'] = df[df['age'] > 23]
|
|
84
|
+
```
|
|
85
|
+
</Example 2>
|
|
86
|
+
|
|
87
|
+
IMPORTANT: Notice in Example 2 that the output does NOT start with a newline because the cursor is in the middle of existing code.
|
|
88
|
+
|
|
89
|
+
<Example 3>
|
|
90
|
+
{FILES_SECTION_HEADING}
|
|
91
|
+
file_name: voters.csv
|
|
92
|
+
|
|
93
|
+
{VARIABLES_SECTION_HEADING}
|
|
94
|
+
{{}}
|
|
95
|
+
|
|
96
|
+
{CODE_SECTION_HEADING}
|
|
97
|
+
```python
|
|
98
|
+
voters = pd.read_csv('./voters.csv')
|
|
99
|
+
|
|
100
|
+
# Create a variable for pennsylvania voters, ohio voters, california voters, and texas voters
|
|
101
|
+
pa_voters = voters[voters['state'] == 'PA']
|
|
102
|
+
ohio_voters<cursor>
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Output:
|
|
106
|
+
```python
|
|
107
|
+
ohio_voters = voters[voters['state'] == 'OH']
|
|
108
|
+
ca_voters = voters[voters['state'] == 'CA']
|
|
109
|
+
tx_voters = voters[voters['state'] == 'TX']
|
|
110
|
+
```
|
|
111
|
+
</Example 3>
|
|
112
|
+
|
|
113
|
+
IMPORTANT: Notice in Example 3 that output does not start with a newline character because it wasnts to continue the line of code that the user started. Also notice the output contains three lines of code because that is the minimal code to achieve the user's intent.
|
|
114
|
+
|
|
115
|
+
<Example 4>
|
|
116
|
+
{FILES_SECTION_HEADING}
|
|
117
|
+
file_name: july_2025.xlsx
|
|
118
|
+
file_name: august_2025.xlsx
|
|
119
|
+
|
|
120
|
+
{VARIABLES_SECTION_HEADING}
|
|
121
|
+
{{}}
|
|
122
|
+
|
|
123
|
+
{CODE_SECTION_HEADING}
|
|
124
|
+
```python
|
|
125
|
+
# Display the first 5 rows of the dataframe
|
|
126
|
+
df.head()
|
|
127
|
+
<cursor>
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Output:
|
|
131
|
+
```python
|
|
132
|
+
```
|
|
133
|
+
</Example 4>
|
|
134
|
+
|
|
135
|
+
IMPORTANT: Notice in Example 4 that the output is empty becuase the user's intent is already complete.
|
|
136
|
+
|
|
137
|
+
<Example 5>
|
|
138
|
+
{FILES_SECTION_HEADING}
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
{VARIABLES_SECTION_HEADING}
|
|
142
|
+
{{}}
|
|
143
|
+
|
|
144
|
+
{CODE_SECTION_HEADING}
|
|
145
|
+
```python
|
|
146
|
+
def even_and_odd():
|
|
147
|
+
for i in range(10):
|
|
148
|
+
if i % 2 == 0:
|
|
149
|
+
print(f"Even: {{i}}")
|
|
150
|
+
else:
|
|
151
|
+
pri<cursor>
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
Output:
|
|
155
|
+
```python
|
|
156
|
+
print(f"Odd: {{i}}")
|
|
157
|
+
```
|
|
158
|
+
</Example 5>
|
|
159
|
+
|
|
160
|
+
IMPORTANT: Notice in Example 5 that the output is indented several times because the code must be executed as part of the else block.
|
|
161
|
+
|
|
162
|
+
<Example 6>
|
|
163
|
+
{FILES_SECTION_HEADING}
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
{VARIABLES_SECTION_HEADING}
|
|
167
|
+
{{}}
|
|
168
|
+
|
|
169
|
+
{CODE_SECTION_HEADING}
|
|
170
|
+
```python
|
|
171
|
+
days_in_week <cursor>
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
Output:
|
|
175
|
+
```python
|
|
176
|
+
days_in_week = 7
|
|
177
|
+
```
|
|
178
|
+
</Example 6>
|
|
179
|
+
|
|
180
|
+
IMPORTANT: Notice in Example 6 that inorder to finish the variable declaration, the output continues the existing line of code and does not start with a new line character.
|
|
181
|
+
|
|
182
|
+
Your Task:
|
|
183
|
+
|
|
184
|
+
{FILES_SECTION_HEADING}
|
|
185
|
+
{files_str}
|
|
186
|
+
|
|
187
|
+
{VARIABLES_SECTION_HEADING}
|
|
188
|
+
{variables_str}
|
|
189
|
+
|
|
190
|
+
{CODE_SECTION_HEADING}
|
|
191
|
+
```python
|
|
192
|
+
{prefix}<cursor>{suffix}
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
Output:
|
|
196
|
+
"""
|
|
197
|
+
return prompt
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
# Copyright (c) Saga Inc.
|
|
2
|
+
# Distributed under the terms of the GNU Affero General Public License v3.0 License.
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
This module contains constants used in prompts across the codebase.
|
|
6
|
+
These constants ensure consistency between prompt building and message trimming.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import json
|
|
11
|
+
from typing import Final
|
|
12
|
+
from mito_ai.utils.schema import MITO_FOLDER
|
|
13
|
+
|
|
14
|
+
# Section headings used in prompts
|
|
15
|
+
FILES_SECTION_HEADING = "Files in the current directory:"
|
|
16
|
+
VARIABLES_SECTION_HEADING = "Defined Variables:"
|
|
17
|
+
CODE_SECTION_HEADING = "Code in the active code cell:"
|
|
18
|
+
ACTIVE_CELL_ID_SECTION_HEADING = "The ID of the active code cell:"
|
|
19
|
+
ACTIVE_CELL_OUTPUT_SECTION_HEADING = "Output of the active code cell:"
|
|
20
|
+
GET_CELL_OUTPUT_TOOL_RESPONSE_SECTION_HEADING = "Output of the code cell you just applied the CELL_UPDATE to:"
|
|
21
|
+
JUPYTER_NOTEBOOK_SECTION_HEADING = "Jupyter Notebook:"
|
|
22
|
+
STREAMLIT_APP_STATUS_SECTION_HEADING = "Streamlit App Status:"
|
|
23
|
+
|
|
24
|
+
# Placeholder text used when trimming content from messages
|
|
25
|
+
CONTENT_REMOVED_PLACEHOLDER = "Content removed to save space"
|
|
26
|
+
|
|
27
|
+
CITATION_RULES = """RULES FOR CITING YOUR WORK
|
|
28
|
+
|
|
29
|
+
It is important that the user is able to verify any insights that you share with them about their data. To make this easy for the user, you must cite the lines of code that you are drawing the insight from. To provide a citation, use one of the following formats inline in your response:
|
|
30
|
+
|
|
31
|
+
Single line citation:
|
|
32
|
+
[MITO_CITATION:cell_id:line_number]
|
|
33
|
+
|
|
34
|
+
Multiline citation (for citing a range of lines):
|
|
35
|
+
[MITO_CITATION:cell_id:first_line-last_line]
|
|
36
|
+
|
|
37
|
+
Citation Rules:
|
|
38
|
+
|
|
39
|
+
1. Every fact or statement derived from the user's notebook must include a citation.
|
|
40
|
+
2. When choosing the citation, select the code that will most help the user validate the fact or statement that you shared with them.
|
|
41
|
+
3. Place the citation immediately after the statement it supports. Do not explain the citation with phrases like "See", "Derived from", etc. Just provide the citation object.
|
|
42
|
+
4. For the "line_number" field, use the line number within the cell that is most relevant to the citation. Important: The cell line number should be 0-indexed and should not skip comments.
|
|
43
|
+
5. For multiline citations, use the "first_line-last_line" format when the insight spans multiple lines of code. Both line numbers should be 0-indexed.
|
|
44
|
+
6. If you cannot find relevant information in the notebook to answer a question, clearly state this and do not provide a citation.
|
|
45
|
+
7. You ONLY need to provide a citation when sharing an insight from the data in the message part of the response. If all you are doing is writing/updating code, then there is no need to provide a citation.
|
|
46
|
+
8. Do not include the citation in the code block as a comment. ONLY include the citation in the message field of your response.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def get_active_cell_output_str(has_active_cell_output: bool) -> str:
|
|
50
|
+
"""
|
|
51
|
+
Used to tell the AI about the output of the active code cell.
|
|
52
|
+
We use this in the chat prompt.
|
|
53
|
+
"""
|
|
54
|
+
if has_active_cell_output:
|
|
55
|
+
return f"{ACTIVE_CELL_OUTPUT_SECTION_HEADING}\nAttatched is an image of the output of the active code cell for your context."
|
|
56
|
+
else:
|
|
57
|
+
return ""
|
|
58
|
+
|
|
59
|
+
def cell_update_output_str(has_cell_update_output: bool) -> str:
|
|
60
|
+
"""
|
|
61
|
+
Used to respond to the GET_CELL_OUTPUT tool, telling the agent the output of the cell it requested
|
|
62
|
+
"""
|
|
63
|
+
if has_cell_update_output:
|
|
64
|
+
return f"{GET_CELL_OUTPUT_TOOL_RESPONSE_SECTION_HEADING}\nAttatched is an image of code cell output that you requested."
|
|
65
|
+
else:
|
|
66
|
+
return ""
|
|
67
|
+
|
|
68
|
+
def redact_sensitive_info(connections: dict) -> dict:
|
|
69
|
+
"""
|
|
70
|
+
Redacts sensitive information from connections data.
|
|
71
|
+
Returns a copy of the connections dict with sensitive fields masked.
|
|
72
|
+
"""
|
|
73
|
+
redacted = {}
|
|
74
|
+
for conn_name, conn_data in connections.items():
|
|
75
|
+
redacted[conn_name] = conn_data.copy()
|
|
76
|
+
for key, value in redacted[conn_name].items():
|
|
77
|
+
redacted[conn_name][key] = 'redacted'
|
|
78
|
+
return redacted
|
|
79
|
+
|
|
80
|
+
def get_database_rules() -> str:
|
|
81
|
+
"""
|
|
82
|
+
Reads the user's database configurations,
|
|
83
|
+
and returns the rules for the AI to follow.
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
# Get the db configuration from the user's mito folder
|
|
87
|
+
|
|
88
|
+
APP_DIR_PATH: Final[str] = os.path.join(MITO_FOLDER)
|
|
89
|
+
connections_path: Final[str] = os.path.join(APP_DIR_PATH, 'db', 'connections.json')
|
|
90
|
+
schemas_path: Final[str] = os.path.join(APP_DIR_PATH, 'db', 'schemas.json')
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
with open(connections_path, 'r') as f:
|
|
94
|
+
connections = json.load(f)
|
|
95
|
+
sanitized_connections = redact_sensitive_info(connections)
|
|
96
|
+
except FileNotFoundError:
|
|
97
|
+
connections = None
|
|
98
|
+
sanitized_connections = None
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
with open(schemas_path, 'r') as f:
|
|
102
|
+
schemas = json.load(f)
|
|
103
|
+
except FileNotFoundError:
|
|
104
|
+
schemas = None
|
|
105
|
+
|
|
106
|
+
# If there is a db configuration, add return the rules
|
|
107
|
+
|
|
108
|
+
if connections is not None:
|
|
109
|
+
DATABASE_RULES = f"""DATABASE RULES:
|
|
110
|
+
If the user has requested data that you believe is stored in the database:
|
|
111
|
+
- Use the provided schema.
|
|
112
|
+
- Only use SQLAlchemy to query the database.
|
|
113
|
+
- Do not use a with statement when creating the SQLAlchemy engine. Instead, initialize it once so it can be reused for multiple queries.
|
|
114
|
+
- Always return the results of the query in a pandas DataFrame, unless instructed otherwise.
|
|
115
|
+
- Every schema has a unique connection ID. This ID can be used to find the connection details in the connections.json file.
|
|
116
|
+
- Do not use the connection ID to query the database. It is only for matching the schema to the correct connection.
|
|
117
|
+
- When using the connection ID, do not include any comments about it in your code.
|
|
118
|
+
- Connection details are stored in a JSON file located at: `{connections_path}`
|
|
119
|
+
- Here is the sanitized contents of the connections.json file:
|
|
120
|
+
|
|
121
|
+
{sanitized_connections}
|
|
122
|
+
|
|
123
|
+
- Do not hard-code connection credentials into your code. Instead, load the connections.json file and access connection fields dynamically like so:
|
|
124
|
+
|
|
125
|
+
```
|
|
126
|
+
connections[connection_name]["username"]
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
- The user may colloquially ask for a "list of x", always assume they want a pandas DataFrame.
|
|
130
|
+
- When working with dataframes created from an SQL query, ALWAYS use lowercase column names.
|
|
131
|
+
- If you think the requested data is stored in the database, but you are unsure, then ask the user for clarification.
|
|
132
|
+
|
|
133
|
+
## Additional MSSQL Rules
|
|
134
|
+
|
|
135
|
+
- When connecting to a Microsoft SQL Server (MSSQL) database, use the following format:
|
|
136
|
+
|
|
137
|
+
```
|
|
138
|
+
import urllib.parse
|
|
139
|
+
|
|
140
|
+
encoded_password = urllib.parse.quote_plus(password)
|
|
141
|
+
conn_str = f"mssql+pyodbc://username:encoded_password@host:port/database?driver=ODBC+Driver+18+for+SQL+Server&TrustServerCertificate=yes"
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
- Always URL-encode passwords for MSSQL connections to handle special characters properly.
|
|
145
|
+
- Include the port number in MSSQL connection strings.
|
|
146
|
+
- Use "ODBC+Driver+18+for+SQL+Server" (with plus signs) in the driver parameter.
|
|
147
|
+
- Always include "TrustServerCertificate=yes" for MSSQL connections to avoid SSL certificate issues.
|
|
148
|
+
|
|
149
|
+
## Additional Oracle Rules
|
|
150
|
+
|
|
151
|
+
- When connecting to an Oracle database, use the following format:
|
|
152
|
+
```
|
|
153
|
+
conn_str = f"oracle+oracledb://username:password@host:port?service_name=service_name"
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Here is the schema:
|
|
157
|
+
{schemas}
|
|
158
|
+
"""
|
|
159
|
+
else:
|
|
160
|
+
DATABASE_RULES = ""
|
|
161
|
+
|
|
162
|
+
return DATABASE_RULES
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
CHAT_CODE_FORMATTING_RULES = """CRITICAL CODE UPDATE RULES:
|
|
166
|
+
- COMPLETE REPLACEMENT: Your code will COMPLETELY REPLACE the entire contents of the active code cell.
|
|
167
|
+
- INCLUDE ALL CODE: You MUST return the COMPLETE, FULL contents of the entire code cell - including ALL existing code that should remain plus your modifications.
|
|
168
|
+
- NEVER PARTIAL CODE: NEVER return only a portion, snippet, or subset of the code cell. Partial responses will break the user's notebook by deleting important code.
|
|
169
|
+
- PRESERVE EXISTING CODE: Always preserve imports, variable definitions, and other code that the user needs, even if you're only modifying one small part.
|
|
170
|
+
"""
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
# Copyright (c) Saga Inc.
|
|
2
|
+
# Distributed under the terms of the GNU Affero General Public License v3.0 License.
|
|
3
|
+
|
|
4
|
+
from typing import List
|
|
5
|
+
from mito_ai.completions.prompt_builders.prompt_constants import (
|
|
6
|
+
ACTIVE_CELL_ID_SECTION_HEADING,
|
|
7
|
+
FILES_SECTION_HEADING,
|
|
8
|
+
VARIABLES_SECTION_HEADING,
|
|
9
|
+
CODE_SECTION_HEADING
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def create_error_prompt(
|
|
14
|
+
error_message: str,
|
|
15
|
+
active_cell_code: str,
|
|
16
|
+
active_cell_id: str,
|
|
17
|
+
variables: List[str],
|
|
18
|
+
files: List[str]
|
|
19
|
+
) -> str:
|
|
20
|
+
variables_str = '\n'.join([f"{variable}" for variable in variables])
|
|
21
|
+
files_str = '\n'.join([f"{file}" for file in files])
|
|
22
|
+
return f"""Help me debug this code in JupyterLab. Analyze the error and provide a solution that maintains the original intent.
|
|
23
|
+
|
|
24
|
+
<Example 1>
|
|
25
|
+
{FILES_SECTION_HEADING}
|
|
26
|
+
file_name: sales.csv
|
|
27
|
+
|
|
28
|
+
{VARIABLES_SECTION_HEADING}
|
|
29
|
+
{{
|
|
30
|
+
'revenue_multiplier': 1.5,
|
|
31
|
+
'sales_df': pd.DataFrame({{
|
|
32
|
+
'transaction_date': ['2024-01-02', '2024-01-02', '2024-01-02', '2024-01-02', '2024-01-03'],
|
|
33
|
+
'price_per_unit': [10, 9.99, 13.99, 21.00, 100],
|
|
34
|
+
'units_sold': [1, 2, 1, 4, 5],
|
|
35
|
+
'total_price': [10, 19.98, 13.99, 84.00, 500]
|
|
36
|
+
}})
|
|
37
|
+
}}
|
|
38
|
+
|
|
39
|
+
{ACTIVE_CELL_ID_SECTION_HEADING}
|
|
40
|
+
'9e38c62b-38f8-457d-bb8d-28bfc52edf2c'
|
|
41
|
+
|
|
42
|
+
{CODE_SECTION_HEADING}
|
|
43
|
+
```python
|
|
44
|
+
import pandas as pd
|
|
45
|
+
sales_df = pd.read_csv('./sales.csv')
|
|
46
|
+
revenue_multiplier = 1.5
|
|
47
|
+
sales_df['total_revenue'] = sales_df['price'] * revenue_multiplier
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Error Traceback:
|
|
51
|
+
Cell In[24], line 4
|
|
52
|
+
1 import pandas as pd
|
|
53
|
+
2 sales_df = pd.read_csv('./sales.csv')
|
|
54
|
+
3 revenue_multiplier = 1.5
|
|
55
|
+
----> 4 sales_df['total_revenue'] = sales_df['price'] * revenue_multiplier
|
|
56
|
+
|
|
57
|
+
KeyError: 'price'
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
ERROR ANALYSIS:
|
|
61
|
+
Runtime error: Attempted to access non-existent DataFrame column
|
|
62
|
+
|
|
63
|
+
INTENT ANALYSIS:
|
|
64
|
+
User is trying to calculate total revenue by applying a multiplier to transaction prices. Based on the defined variables, the column that the user is tring to access is likely `total_price` because that would allow them to calculate the total revenue for each transaction.
|
|
65
|
+
|
|
66
|
+
SOLUTION:
|
|
67
|
+
```python
|
|
68
|
+
import pandas as pd
|
|
69
|
+
sales_df = pd.read_csv('./sales.csv')
|
|
70
|
+
revenue_multiplier = 1.5
|
|
71
|
+
sales_df['total_revenue'] = sales_df['total_price'] * revenue_multiplier
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
The DataFrame contains 'total_price' rather than 'price'. Updated column reference to match existing data structure.
|
|
75
|
+
</Example 1>
|
|
76
|
+
|
|
77
|
+
<Example 2>
|
|
78
|
+
{FILES_SECTION_HEADING}
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
{VARIABLES_SECTION_HEADING}
|
|
82
|
+
{{
|
|
83
|
+
'df': pd.DataFrame({{
|
|
84
|
+
'order_id': [1, 2, 3, 4],
|
|
85
|
+
'date': ['Mar 7, 2025', 'Sep 24, 2024', '25 June, 2024', 'June 29, 2024'],
|
|
86
|
+
'amount': [100, 150, 299, 99]
|
|
87
|
+
}})
|
|
88
|
+
}}
|
|
89
|
+
|
|
90
|
+
{ACTIVE_CELL_ID_SECTION_HEADING}
|
|
91
|
+
'c68fdf19-db8c-46dd-926f-d90ad35bb3bc'
|
|
92
|
+
|
|
93
|
+
{CODE_SECTION_HEADING}
|
|
94
|
+
```python
|
|
95
|
+
df['date'] = pd.to_datetime(df['date'])
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Error Traceback:
|
|
99
|
+
Cell In[27], line 1
|
|
100
|
+
----> 1 df['date'] = pd.to_datetime(df['date'])
|
|
101
|
+
|
|
102
|
+
ValueError: time data "25 June, 2024" doesn't match format "%b %d, %Y", at position 2. You might want to try:
|
|
103
|
+
- passing `format` if your strings have a consistent format;
|
|
104
|
+
- passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
|
|
105
|
+
- passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.
|
|
106
|
+
|
|
107
|
+
ERROR ANALYSIS:
|
|
108
|
+
This is a ValueError caused by applying the wrong format to a specific date string. Because it was triggered at position 2, the first date string must have successfully converted. By looking at the defined variables, I can see that first date string is in the format "Mar 7, 2025", but the third date string is in the format "25 June, 2024". Those dates are not in the same format, so the conversion failed.
|
|
109
|
+
|
|
110
|
+
INTENT ANALYSIS:
|
|
111
|
+
User is trying to convert the date column to a datetime object even though the dates are not in the same starting format.
|
|
112
|
+
|
|
113
|
+
SOLUTION:
|
|
114
|
+
```python
|
|
115
|
+
def parse_date(date_str):
|
|
116
|
+
formats = ['%b %d, %Y', '%d %B, %Y']
|
|
117
|
+
|
|
118
|
+
for fmt in formats:
|
|
119
|
+
try:
|
|
120
|
+
return pd.to_datetime(date_str, format=fmt)
|
|
121
|
+
except ValueError:
|
|
122
|
+
# Try the next format
|
|
123
|
+
continue
|
|
124
|
+
|
|
125
|
+
# If no format worked, return Not a Time
|
|
126
|
+
return pd.NaT
|
|
127
|
+
|
|
128
|
+
df['date'] = df['date'].apply(lambda x: parse_date(x))
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Since the dates are not in a consistent format, we need to first figure out which format to use for each date string and then use that format to convert the date.
|
|
132
|
+
|
|
133
|
+
The best way to do this is with a function. We can call this function `parse_date`.
|
|
134
|
+
</Example 2>
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
Guidelines for Solutions:
|
|
138
|
+
|
|
139
|
+
Error Analysis:
|
|
140
|
+
|
|
141
|
+
- Identify error type (Syntax, Runtime, Logic).
|
|
142
|
+
- Use the defined variables and code in the active cell to understand the error.
|
|
143
|
+
- Consider kernel state and execution order
|
|
144
|
+
|
|
145
|
+
Intent Preservation:
|
|
146
|
+
|
|
147
|
+
- Try to understand the user's intent using the defined variables and code in the active cell.
|
|
148
|
+
|
|
149
|
+
Solution Requirements:
|
|
150
|
+
|
|
151
|
+
- Return the full code cell with the error fixed and a short explanation of the error.
|
|
152
|
+
- Only update code in the active cell. Do not update other code in the notebook.
|
|
153
|
+
- Propose a solution that fixes the error and does not change the user's intent.
|
|
154
|
+
- Make the solution as simple as possible.
|
|
155
|
+
- Reuse as much of the existing code as possible.
|
|
156
|
+
- Do not add temporary comments like '# Fixed the typo here' or '# Added this line to fix the error'
|
|
157
|
+
- The code in the SOLUTION section should be a python code block starting with ```python and ending with ```
|
|
158
|
+
- If you encounter a ModuleNotFoundError, you can install the package by adding the the following line to the top of the code cell: `!pip install <package_name> --quiet`.
|
|
159
|
+
|
|
160
|
+
Here is your task.
|
|
161
|
+
|
|
162
|
+
{FILES_SECTION_HEADING}
|
|
163
|
+
{files_str}
|
|
164
|
+
|
|
165
|
+
{VARIABLES_SECTION_HEADING}
|
|
166
|
+
{variables_str}
|
|
167
|
+
|
|
168
|
+
{ACTIVE_CELL_ID_SECTION_HEADING}
|
|
169
|
+
{active_cell_id}
|
|
170
|
+
|
|
171
|
+
{CODE_SECTION_HEADING}
|
|
172
|
+
```python
|
|
173
|
+
{active_cell_code}
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
Error Traceback:
|
|
177
|
+
{error_message}
|
|
178
|
+
|
|
179
|
+
ERROR ANALYSIS:
|
|
180
|
+
|
|
181
|
+
INTENT ANALYSIS:
|
|
182
|
+
|
|
183
|
+
SOLUTION:
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def remove_inner_thoughts_from_message(message: str) -> str:
|
|
188
|
+
# The smart debug prompt thinks to itself before returning the solution. We don't need to save the inner thoughts.
|
|
189
|
+
# We remove them before saving the message in the chat history
|
|
190
|
+
if message == "":
|
|
191
|
+
return message
|
|
192
|
+
|
|
193
|
+
SOLUTION_STRING = "SOLUTION:"
|
|
194
|
+
|
|
195
|
+
if SOLUTION_STRING in message:
|
|
196
|
+
message = message.split(SOLUTION_STRING)[1].strip()
|
|
197
|
+
|
|
198
|
+
return message
|
|
199
|
+
|