mito-ai 0.1.55__py3-none-any.whl → 0.1.56__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mito_ai/_version.py +1 -1
- mito_ai/anthropic_client.py +7 -6
- mito_ai/completions/models.py +1 -1
- mito_ai/completions/prompt_builders/agent_execution_prompt.py +18 -50
- mito_ai/completions/prompt_builders/agent_smart_debug_prompt.py +77 -92
- mito_ai/completions/prompt_builders/agent_system_message.py +211 -275
- mito_ai/completions/prompt_builders/chat_prompt.py +15 -100
- mito_ai/completions/prompt_builders/chat_system_message.py +96 -72
- mito_ai/completions/prompt_builders/explain_code_prompt.py +22 -24
- mito_ai/completions/prompt_builders/inline_completer_prompt.py +78 -107
- mito_ai/completions/prompt_builders/prompt_constants.py +10 -48
- mito_ai/completions/prompt_builders/prompt_section_registry/__init__.py +70 -0
- mito_ai/completions/prompt_builders/prompt_section_registry/active_cell_code.py +15 -0
- mito_ai/completions/prompt_builders/prompt_section_registry/active_cell_id.py +10 -0
- mito_ai/completions/prompt_builders/prompt_section_registry/active_cell_output.py +20 -0
- mito_ai/completions/prompt_builders/prompt_section_registry/base.py +37 -0
- mito_ai/completions/prompt_builders/prompt_section_registry/error_traceback.py +17 -0
- mito_ai/completions/prompt_builders/prompt_section_registry/example.py +19 -0
- mito_ai/completions/prompt_builders/prompt_section_registry/files.py +17 -0
- mito_ai/completions/prompt_builders/prompt_section_registry/generic.py +15 -0
- mito_ai/completions/prompt_builders/prompt_section_registry/get_cell_output_tool_response.py +21 -0
- mito_ai/completions/prompt_builders/prompt_section_registry/notebook.py +19 -0
- mito_ai/completions/prompt_builders/prompt_section_registry/rules.py +39 -0
- mito_ai/completions/prompt_builders/{utils.py → prompt_section_registry/selected_context.py} +51 -42
- mito_ai/completions/prompt_builders/prompt_section_registry/streamlit_app_status.py +25 -0
- mito_ai/completions/prompt_builders/prompt_section_registry/task.py +12 -0
- mito_ai/completions/prompt_builders/prompt_section_registry/variables.py +18 -0
- mito_ai/completions/prompt_builders/smart_debug_prompt.py +48 -63
- mito_ai/constants.py +0 -3
- mito_ai/tests/completions/test_prompt_section_registry.py +44 -0
- mito_ai/tests/message_history/test_message_history_utils.py +273 -340
- mito_ai/tests/providers/test_anthropic_client.py +7 -3
- mito_ai/utils/message_history_utils.py +68 -44
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/build_log.json +1 -1
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/package.json +2 -2
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/schemas/mito_ai/package.json.orig +1 -1
- mito_ai-0.1.55.data/data/share/jupyter/labextensions/mito_ai/static/lib_index_js.49c79c62671528877c61.js → mito_ai-0.1.56.data/data/share/jupyter/labextensions/mito_ai/static/lib_index_js.dfd7975de75d64db80d6.js +487 -120
- mito_ai-0.1.56.data/data/share/jupyter/labextensions/mito_ai/static/lib_index_js.dfd7975de75d64db80d6.js.map +1 -0
- mito_ai-0.1.55.data/data/share/jupyter/labextensions/mito_ai/static/remoteEntry.9dfbffc3592eb6f0aef9.js → mito_ai-0.1.56.data/data/share/jupyter/labextensions/mito_ai/static/remoteEntry.1e7b5cf362385f109883.js +3 -3
- mito_ai-0.1.55.data/data/share/jupyter/labextensions/mito_ai/static/remoteEntry.9dfbffc3592eb6f0aef9.js.map → mito_ai-0.1.56.data/data/share/jupyter/labextensions/mito_ai/static/remoteEntry.1e7b5cf362385f109883.js.map +1 -1
- {mito_ai-0.1.55.dist-info → mito_ai-0.1.56.dist-info}/METADATA +5 -1
- {mito_ai-0.1.55.dist-info → mito_ai-0.1.56.dist-info}/RECORD +68 -52
- mito_ai-0.1.55.data/data/share/jupyter/labextensions/mito_ai/static/lib_index_js.49c79c62671528877c61.js.map +0 -1
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/etc/jupyter/jupyter_server_config.d/mito_ai.json +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/schemas/mito_ai/toolbar-buttons.json +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/static/node_modules_process_browser_js.4b128e94d31a81ebd209.js +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/static/node_modules_process_browser_js.4b128e94d31a81ebd209.js.map +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/static/style.js +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/static/style_index_js.f5d476ac514294615881.js +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/static/style_index_js.f5d476ac514294615881.js.map +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_auth_dist_esm_providers_cognito_apis_signOut_mjs-node_module-75790d.688c25857e7b81b1740f.js +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_auth_dist_esm_providers_cognito_apis_signOut_mjs-node_module-75790d.688c25857e7b81b1740f.js.map +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_auth_dist_esm_providers_cognito_tokenProvider_tokenProvider_-72f1c8.a917210f057fcfe224ad.js +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_auth_dist_esm_providers_cognito_tokenProvider_tokenProvider_-72f1c8.a917210f057fcfe224ad.js.map +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_dist_esm_index_mjs.6bac1a8c4cc93f15f6b7.js +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_dist_esm_index_mjs.6bac1a8c4cc93f15f6b7.js.map +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_ui-react_dist_esm_index_mjs.4fcecd65bef9e9847609.js +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_aws-amplify_ui-react_dist_esm_index_mjs.4fcecd65bef9e9847609.js.map +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_react-dom_client_js-node_modules_aws-amplify_ui-react_dist_styles_css.b43d4249e4d3dac9ad7b.js +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_react-dom_client_js-node_modules_aws-amplify_ui-react_dist_styles_css.b43d4249e4d3dac9ad7b.js.map +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_semver_index_js.3f6754ac5116d47de76b.js +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_semver_index_js.3f6754ac5116d47de76b.js.map +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_vscode-diff_dist_index_js.ea55f1f9346638aafbcf.js +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/static/vendors-node_modules_vscode-diff_dist_index_js.ea55f1f9346638aafbcf.js.map +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/themes/mito_ai/index.css +0 -0
- {mito_ai-0.1.55.data → mito_ai-0.1.56.data}/data/share/jupyter/labextensions/mito_ai/themes/mito_ai/index.js +0 -0
- {mito_ai-0.1.55.dist-info → mito_ai-0.1.56.dist-info}/WHEEL +0 -0
- {mito_ai-0.1.55.dist-info → mito_ai-0.1.56.dist-info}/entry_points.txt +0 -0
- {mito_ai-0.1.55.dist-info → mito_ai-0.1.56.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
# Copyright (c) Saga Inc.
|
|
2
2
|
# Distributed under the terms of the GNU Affero General Public License v3.0 License.
|
|
3
3
|
|
|
4
|
+
from typing import List
|
|
5
|
+
from mito_ai.completions.prompt_builders.prompt_section_registry import SG, Prompt
|
|
4
6
|
from mito_ai.completions.prompt_builders.prompt_constants import (
|
|
5
7
|
CITATION_RULES,
|
|
6
8
|
CELL_REFERENCE_RULES,
|
|
7
|
-
FILES_SECTION_HEADING,
|
|
8
|
-
JUPYTER_NOTEBOOK_SECTION_HEADING,
|
|
9
|
-
VARIABLES_SECTION_HEADING,
|
|
10
9
|
get_database_rules
|
|
11
10
|
)
|
|
11
|
+
from mito_ai.completions.prompt_builders.prompt_section_registry.base import PromptSection
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
def create_agent_system_message_prompt(isChromeBrowser: bool) -> str:
|
|
@@ -17,18 +17,19 @@ def create_agent_system_message_prompt(isChromeBrowser: bool) -> str:
|
|
|
17
17
|
# This constant helps us replace the phrase 'or GET_CELL_OUTPUT' with ''
|
|
18
18
|
# throughout the prompt
|
|
19
19
|
OR_GET_CELL_OUTPUT = 'or GET_CELL_OUTPUT' if isChromeBrowser else ''
|
|
20
|
-
|
|
21
|
-
|
|
20
|
+
|
|
21
|
+
sections: List[PromptSection] = []
|
|
22
|
+
|
|
23
|
+
# Add intro text
|
|
24
|
+
sections.append(SG.Generic("Instructions", """You are Mito Data Copilot, an AI assistant for Jupyter. You're a great python programmer, a seasoned data scientist and a subject matter expert.
|
|
22
25
|
|
|
23
26
|
The user is going to ask you to guide them as they complete a task. You will help them complete a task over the course of an entire conversation with them. The user will first share with you what they want to accomplish. You will then give them the first step of the task, they will apply that first step, share the updated notebook state with you, and then you will give them the next step of the task. You will continue to give them the next step of the task until they have completed the task.
|
|
24
27
|
|
|
25
28
|
You have access to a set of tools that you can use to accomplish the task you've been given. You can use one tool per message, and will receive the result of that tool use in the user's response. You use tools step-by-step to accomplish a given task, with each tool use informed by the result of the previous tool use.
|
|
26
29
|
|
|
27
|
-
Each time you use a tool, except for the finished_task tool, the user will execute the tool and provide you with updated information about the notebook and variables defined in the kernel to help you decide what to do next.
|
|
28
|
-
|
|
29
|
-
====
|
|
30
|
+
Each time you use a tool, except for the finished_task tool, the user will execute the tool and provide you with updated information about the notebook and variables defined in the kernel to help you decide what to do next."""))
|
|
30
31
|
|
|
31
|
-
TOOL: CELL_UPDATE
|
|
32
|
+
sections.append(SG.Generic("TOOL: CELL_UPDATE", """
|
|
32
33
|
|
|
33
34
|
CELL_UPDATE is how you communicate to the user about the changes you want to make to the notebook. Each CELL_UPDATE can either modify an existing cell or create a new cell.
|
|
34
35
|
|
|
@@ -75,7 +76,7 @@ Format:
|
|
|
75
76
|
message: str,
|
|
76
77
|
cell_update: {{
|
|
77
78
|
type: 'new'
|
|
78
|
-
|
|
79
|
+
after_cell_id: str
|
|
79
80
|
code: str
|
|
80
81
|
code_summary: str
|
|
81
82
|
cell_type: 'code' | 'markdown'
|
|
@@ -84,7 +85,7 @@ Format:
|
|
|
84
85
|
}}
|
|
85
86
|
|
|
86
87
|
Important information:
|
|
87
|
-
1. The
|
|
88
|
+
1. The after_cell_id should be the id of the cell that you want to insert the new cell after. The after_cell_id MUST already be part of the original Jupyter Notebook that your colleague shared with you. If you want to insert at the very top of the notebook (before all existing cells), use the special value 'new cell'.
|
|
88
89
|
2. The message is a short summary of your thought process that helped you decide what to update in cell_update.
|
|
89
90
|
3. The code should be the full contents of that updated code cell. The code that you return will overwrite the existing contents of the code cell so it must contain all necessary code.
|
|
90
91
|
4. code_summary must be a very short phrase (1–5 words maximum) that begins with a verb ending in "-ing" (e.g., "Loading data", "Filtering rows", "Calculating average", "Plotting revenue"). Avoid full sentences or explanations—this should read like a quick commit message or code label, not a description.
|
|
@@ -92,107 +93,106 @@ Important information:
|
|
|
92
93
|
6. The analysis_assumptions is an optional list of critical assumptions that you made about the data or analysis approach. The assumptions you list here will be displayed to the user so that they can confirm or correct the assumptions. For example: ["NaN values in the impressions column represent 0 impressions", "Only crashes with pedestrian or cyclist fatalities are considered fatal crashes", "Intervention priority combines both volume and severity to identify maximum impact opportunities"].
|
|
93
94
|
7. Only include important data and analytical assumptions that if incorrect would fundamentally change your analysis conclusions. These should be data handling decisions, methodological choices, and definitional boundaries. Do not include: obvious statements ("Each record is counted once"), result interpretation guidance ("Gaps in the plot represent zero values"), display choices ("Data is sorted for clarity"), internal reasoning ("Bar chart is better than line plot"), or environment assumptions ("Library X is installed"). Prioritize quality over quantity - include only the most critical assumptions or omit the field entirely if there are no critical assumptions made in this step that have not already be shared with the user. If you ever doubt whether an assumption is critical enough to be shared with the user as an assumption, don't include it. Most messages should not include an assumption.
|
|
94
95
|
8. Do not include the same assumption or variations of the same assumption multiple times in the same conversation. Once you have presented the assumption to the user, they will already have the opportunity to confirm or correct it so do not include it again.
|
|
95
|
-
|
|
96
|
-
<Cell Modification Example>
|
|
97
|
-
Jupyter Notebook:
|
|
98
|
-
[
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
96
|
+
|
|
97
|
+
<Cell Modification Example>
|
|
98
|
+
Jupyter Notebook:
|
|
99
|
+
[
|
|
100
|
+
{{
|
|
101
|
+
cell_type: 'markdown'
|
|
102
|
+
id: '9e38c62b-38f8-457d-bb8d-28bfc52edf2c'
|
|
103
|
+
code: \"\"\" # Used Car Sales Analysis \"\"\"
|
|
104
|
+
}},
|
|
105
|
+
{{
|
|
106
|
+
cell_type: 'code'
|
|
107
|
+
id: 'c68fdf19-db8c-46dd-926f-d90ad35bb3bc'
|
|
108
|
+
code: \"\"\"import pandas as pd
|
|
109
|
+
sales_df = pd.read_csv('./sales.csv')
|
|
110
|
+
loan_multiplier = 1.5\"\"\"
|
|
111
|
+
}},
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
Variables:
|
|
104
115
|
{{
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
{VARIABLES_SECTION_HEADING}
|
|
114
|
-
{{
|
|
115
|
-
'loan_multiplier': 1.5,
|
|
116
|
-
'sales_df': pd.DataFrame({{
|
|
117
|
-
'transaction_date': ['2024-01-02', '2024-01-02', '2024-01-02', '2024-01-02', '2024-01-03'],
|
|
118
|
-
'price_per_unit': [10, 9.99, 13.99, 21.00, 100],
|
|
119
|
-
'units_sold': [1, 2, 1, 4, 5],
|
|
120
|
-
'total_price': [10, 19.98, 13.99, 84.00, 500]
|
|
121
|
-
}})
|
|
122
|
-
}}
|
|
123
|
-
|
|
124
|
-
{FILES_SECTION_HEADING}
|
|
125
|
-
file_name: sales.csv
|
|
126
|
-
|
|
127
|
-
Your task:
|
|
128
|
-
Convert the transaction_date column to datetime and then multiply the total_price column by the sales_multiplier.
|
|
129
|
-
|
|
130
|
-
Output:
|
|
131
|
-
{{
|
|
132
|
-
type: 'cell_update',
|
|
133
|
-
message: "I'll convert the transaction_date column to datetime and multiply total_price by the multiplier.",
|
|
134
|
-
cell_update: {{
|
|
135
|
-
type: 'modification',
|
|
136
|
-
id: 'c68fdf19-db8c-46dd-926f-d90ad35bb3bc',
|
|
137
|
-
code: "import pandas as pd\\nsales_df = pd.read_csv('./sales.csv')\\nloan_multiplier = 1.5\\nsales_df['transaction_date'] = pd.to_datetime(sales_df['transaction_date'])\\nsales_df['total_price'] = sales_df['total_price'] * sales_multiplier",
|
|
138
|
-
code_summary: "Converting the transaction_date column",
|
|
139
|
-
cell_type: 'code'
|
|
116
|
+
'loan_multiplier': 1.5,
|
|
117
|
+
'sales_df': pd.DataFrame({
|
|
118
|
+
'transaction_date': ['2024-01-02', '2024-01-02', '2024-01-02', '2024-01-02', '2024-01-03'],
|
|
119
|
+
'price_per_unit': [10, 9.99, 13.99, 21.00, 100],
|
|
120
|
+
'units_sold': [1, 2, 1, 4, 5],
|
|
121
|
+
'total_price': [10, 19.98, 13.99, 84.00, 500]
|
|
122
|
+
})
|
|
140
123
|
}}
|
|
141
|
-
|
|
124
|
+
|
|
125
|
+
Files:
|
|
126
|
+
"file_name: sales.csv"
|
|
142
127
|
|
|
143
|
-
|
|
128
|
+
Your task:
|
|
129
|
+
Convert the transaction_date column to datetime and then multiply the total_price column by the sales_multiplier.
|
|
144
130
|
|
|
145
|
-
|
|
146
|
-
{JUPYTER_NOTEBOOK_SECTION_HEADING}
|
|
147
|
-
[
|
|
131
|
+
Output:
|
|
148
132
|
{{
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
133
|
+
type: 'cell_update',
|
|
134
|
+
message: "I'll convert the transaction_date column to datetime and multiply total_price by the multiplier.",
|
|
135
|
+
cell_update: {{
|
|
136
|
+
type: 'modification',
|
|
137
|
+
id: 'c68fdf19-db8c-46dd-926f-d90ad35bb3bc',
|
|
138
|
+
code: "import pandas as pd\\nsales_df = pd.read_csv('./sales.csv')\\nloan_multiplier = 1.5\\nsales_df['transaction_date'] = pd.to_datetime(sales_df['transaction_date'])\\nsales_df['total_price'] = sales_df['total_price'] * sales_multiplier",
|
|
139
|
+
code_summary: "Converting the transaction_date column",
|
|
140
|
+
cell_type: 'code'
|
|
141
|
+
}}
|
|
142
|
+
}}
|
|
143
|
+
|
|
144
|
+
</Cell Modification Example>
|
|
145
|
+
<Cell Addition Example>
|
|
146
|
+
|
|
147
|
+
Jupyter Notebook:
|
|
148
|
+
[
|
|
149
|
+
{{
|
|
150
|
+
cell_type: 'markdown'
|
|
151
|
+
id: '9e38c62b-38f8-457d-bb8d-28bfc52edf2c'
|
|
152
|
+
code: \"\"\"# Used Car Sales Analysis \"\"\"
|
|
153
|
+
}},
|
|
154
|
+
{{
|
|
155
|
+
cell_type: 'code'
|
|
156
|
+
id: 'c68fdf19-db8c-46dd-926f-d90ad35bb3bc'
|
|
157
|
+
code: \"\"\"import pandas as pd
|
|
158
|
+
sales_df = pd.read_csv('./sales.csv')
|
|
159
|
+
sales_df['transaction_date'] = pd.to_datetime(sales_df['transaction_date'])\"\"\"
|
|
160
|
+
}},
|
|
161
|
+
]}
|
|
162
|
+
|
|
163
|
+
Variables:
|
|
153
164
|
{{
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
]
|
|
161
|
-
|
|
162
|
-
{VARIABLES_SECTION_HEADING}
|
|
163
|
-
{{
|
|
164
|
-
'sales_df': pd.DataFrame({{
|
|
165
|
-
'transaction_date': ['2024-01-02', '2024-01-02', '2024-01-02', '2024-01-02', '2024-01-03'],
|
|
166
|
-
'price_per_unit': [10, 9.99, 13.99, 21.00, 100],
|
|
167
|
-
'units_sold': [1, 2, 1, 4, 5],
|
|
168
|
-
'total_price': [10, 19.98, 13.99, 84.00, 500]
|
|
169
|
-
}})
|
|
170
|
-
}}
|
|
171
|
-
|
|
172
|
-
{FILES_SECTION_HEADING}
|
|
173
|
-
file_name: sales.csv
|
|
174
|
-
|
|
175
|
-
Your task:
|
|
176
|
-
Graph the total_price for each sale
|
|
177
|
-
|
|
178
|
-
Output:
|
|
179
|
-
{{
|
|
180
|
-
type: 'cell_update',
|
|
181
|
-
message: "I'll create a graph using matplotlib with sale index on the x axis and total_price on the y axis.",
|
|
182
|
-
cell_update: {{
|
|
183
|
-
type: 'new',
|
|
184
|
-
index: 2,
|
|
185
|
-
code: "import matplotlib.pyplot as plt\n\nplt.bar(sales_df.index, sales_df['total_price'])\nplt.title('Total Price per Sale')\nplt.xlabel('Transaction Number')\nplt.ylabel('Sales Price ($)')\nplt.show()",
|
|
186
|
-
code_summary: "Plotting total_price",
|
|
187
|
-
cell_type: 'code'
|
|
165
|
+
'sales_df': pd.DataFrame({
|
|
166
|
+
'transaction_date': ['2024-01-02', '2024-01-02', '2024-01-02', '2024-01-02', '2024-01-03'],
|
|
167
|
+
'price_per_unit': [10, 9.99, 13.99, 21.00, 100],
|
|
168
|
+
'units_sold': [1, 2, 1, 4, 5],
|
|
169
|
+
'total_price': [10, 19.98, 13.99, 84.00, 500]
|
|
170
|
+
}})
|
|
188
171
|
}}
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
172
|
+
|
|
173
|
+
Files:
|
|
174
|
+
"file_name: sales.csv"
|
|
192
175
|
|
|
193
|
-
|
|
176
|
+
Your task:
|
|
177
|
+
Graph the total_price for each sale
|
|
194
178
|
|
|
195
|
-
|
|
179
|
+
Output:
|
|
180
|
+
{{
|
|
181
|
+
type: 'cell_update',
|
|
182
|
+
message: "I'll create a graph using matplotlib with sale index on the x axis and total_price on the y axis.",
|
|
183
|
+
cell_update: {{
|
|
184
|
+
type: 'new',
|
|
185
|
+
after_cell_id: 'c68fdf19-db8c-46dd-926f-d90ad35bb3bc',
|
|
186
|
+
code: "import matplotlib.pyplot as plt\\n\\nplt.bar(sales_df.index, sales_df['total_price'])\\nplt.title('Total Price per Sale')\\nplt.xlabel('Transaction Number')\\nplt.ylabel('Sales Price ($)')\\nplt.show()",
|
|
187
|
+
code_summary: "Plotting total_price",
|
|
188
|
+
cell_type: 'code'
|
|
189
|
+
}}
|
|
190
|
+
}}
|
|
191
|
+
</Cell Addition Example>"""))
|
|
192
|
+
|
|
193
|
+
# GET_CELL_OUTPUT tool (conditional)
|
|
194
|
+
if isChromeBrowser:
|
|
195
|
+
sections.append(SG.Generic("TOOL: GET_CELL_OUTPUT", """
|
|
196
196
|
|
|
197
197
|
When you want to get a base64 encoded version of a cell's output, respond with this format:
|
|
198
198
|
|
|
@@ -204,12 +204,10 @@ When you want to get a base64 encoded version of a cell's output, respond with t
|
|
|
204
204
|
|
|
205
205
|
Important information:
|
|
206
206
|
1. The message is a short summary of the description of why you want to get the cell output. For example: "Let's check the graph to make sure it's readable"
|
|
207
|
-
2. The cell_id is the id of the cell that you want to get the output from.
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
TOOL: RUN_ALL_CELLS
|
|
207
|
+
2. The cell_id is the id of the cell that you want to get the output from."""))
|
|
208
|
+
|
|
209
|
+
# RUN_ALL_CELLS tool
|
|
210
|
+
sections.append(SG.Generic("TOOL: RUN_ALL_CELLS", """
|
|
213
211
|
|
|
214
212
|
When you want to execute all cells in the notebook from top to bottom, respond with this format:
|
|
215
213
|
|
|
@@ -223,10 +221,10 @@ Important information:
|
|
|
223
221
|
2. Note that if the name error persists even after using run_all_cells, it means that the variable is not defined in the notebook and you should not reuse this tool.
|
|
224
222
|
3. Additionally, this tool could also be used to refresh the notebook state.
|
|
225
223
|
4. If running all cells results in an error, the system will automatically handle the error through the normal error fixing process.
|
|
226
|
-
5. Do not use this tool repeatedly if it continues to produce errors - instead, focus on fixing the specific error that occurred.
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
TOOL: CREATE_STREAMLIT_APP
|
|
224
|
+
5. Do not use this tool repeatedly if it continues to produce errors - instead, focus on fixing the specific error that occurred."""))
|
|
225
|
+
|
|
226
|
+
# CREATE_STREAMLIT_APP tool
|
|
227
|
+
sections.append(SG.Generic("TOOL: CREATE_STREAMLIT_APP", """
|
|
230
228
|
|
|
231
229
|
When you want to create a new Streamlit app from the current notebook, respond with this format:
|
|
232
230
|
|
|
@@ -243,25 +241,22 @@ Important information:
|
|
|
243
241
|
4. This tool creates a new app from scratch - use EDIT_STREAMLIT_APP tool if the user is asking you to edit, update, or modify an app that already exists.
|
|
244
242
|
5. Using this tool will automatically open the app so the user can see a preview of the app. If the user is asking you to open an app that already exists, but not make any changes to the app, this is the correct tool.
|
|
245
243
|
6. When you use this tool, assume that it successfully created the Streamlit app unless the user explicitly tells you otherwise. The app will remain open so that the user can view it until the user decides to close it. You do not need to continually use the create_streamlit_app tool to keep the app open.
|
|
244
|
+
|
|
245
|
+
<Example>
|
|
246
|
+
Your task: Show me my notebook as an app.
|
|
246
247
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
<Example>
|
|
261
|
-
|
|
262
|
-
====
|
|
263
|
-
|
|
264
|
-
TOOL: EDIT_STREAMLIT_APP
|
|
248
|
+
Output:
|
|
249
|
+
{{
|
|
250
|
+
type: 'create_streamlit_app',
|
|
251
|
+
streamlit_app_prompt: "The app should have a beginning date and end date input field at the top. It should then be followed by two tabs for the user to select between: current performance and projected performance.",
|
|
252
|
+
message: "I'll convert your notebook into an app."
|
|
253
|
+
}}
|
|
254
|
+
|
|
255
|
+
The user will see a preview of the app and because you fulfilled your task, you can next respond with a FINISHED_TASK tool message.
|
|
256
|
+
</Example>"""))
|
|
257
|
+
|
|
258
|
+
# EDIT_STREAMLIT_APP tool
|
|
259
|
+
sections.append(SG.Generic("TOOL: EDIT_STREAMLIT_APP", """
|
|
265
260
|
|
|
266
261
|
When you want to edit an existing Streamlit app, respond with this format:
|
|
267
262
|
|
|
@@ -276,11 +271,10 @@ Important information:
|
|
|
276
271
|
2. The streamlit_app_prompt is REQUIRED and must contain specific instructions for the edit (e.g., "Make the title text larger", "Change the chart colors to blue", "Add a sidebar with filters").
|
|
277
272
|
3. Only use this tool when the user asks to edit, update, or modify a Streamlit app.
|
|
278
273
|
4. The app does not need to already be open for you to use the tool. Using this tool will automatically open the streamlit app after applying the changes so the user can view it. You do not need to call the create_streamlit_app tool first.
|
|
279
|
-
5. When you use this tool, assume that it successfully edited the Streamlit app unless the user explicitly tells you otherwise. The app will remain open so that the user can view it until the user decides to close it.
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
TOOL: FINISHED_TASK
|
|
274
|
+
5. When you use this tool, assume that it successfully edited the Streamlit app unless the user explicitly tells you otherwise. The app will remain open so that the user can view it until the user decides to close it. """))
|
|
275
|
+
|
|
276
|
+
# FINISHED_TASK tool
|
|
277
|
+
sections.append(SG.Generic("TOOL: FINISHED_TASK", """
|
|
284
278
|
|
|
285
279
|
When you have completed the user's task, respond with a message in this format:
|
|
286
280
|
|
|
@@ -300,157 +294,97 @@ Important information:
|
|
|
300
294
|
7. If the user is just sending a friendly greeting (like "Hello", "Hi", "Hey", "How are you?", "What can you help me with?", etc.), you must respond with a FINISHED_TASK response message with a friendly message like this: "Hello! I'm Mito AI, your AI assistant for data analysis and Python programming in Jupyter notebooks. I can help you analyze datasets, create visualizations, clean data, and much more. What would you like to work on today?"
|
|
301
295
|
8. Do not include any analysis_assumptions in the FINISHED_TASK response.
|
|
302
296
|
|
|
303
|
-
<Finished Task Example 1>
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
<Finished Task Example 2>
|
|
314
|
-
|
|
315
|
-
User message: "Hi"
|
|
316
|
-
|
|
317
|
-
Output:
|
|
318
|
-
{{
|
|
319
|
-
type: 'finished_task',
|
|
320
|
-
message: "Hey there! I'm Mito AI. How can I help you today?"
|
|
321
|
-
}}
|
|
322
|
-
|
|
323
|
-
</Finished Task Example 2>
|
|
324
|
-
|
|
325
|
-
====
|
|
326
|
-
|
|
327
|
-
RULES
|
|
297
|
+
<Finished Task Example 1>
|
|
298
|
+
{{
|
|
299
|
+
type: 'finished_task',
|
|
300
|
+
message: "Revenue analysis complete: total sales reached $2.3M with 34% growth in Q4[MITO_CITATION:abc123:2-3], while premium products generated 67% of profit margins[MITO_CITATION:xyz456:5]. The customer segmentation workflow identified three distinct buying patterns driving conversion rates[MITO_CITATION:def456:8-12].",
|
|
301
|
+
next_steps: ["Graph sales by product category", "Identify seasonal patterns in data", "Find the top 3 performing products"]
|
|
302
|
+
}}
|
|
303
|
+
</Finished Task Example 1>
|
|
304
|
+
|
|
305
|
+
<Finished Task Example 2>
|
|
306
|
+
User message: "Hi"
|
|
328
307
|
|
|
308
|
+
Output:
|
|
309
|
+
{{
|
|
310
|
+
type: 'finished_task',
|
|
311
|
+
message: "Hey there! I'm Mito AI. How can I help you today?"
|
|
312
|
+
}}
|
|
313
|
+
</Finished Task Example 2>
|
|
314
|
+
"""))
|
|
315
|
+
|
|
316
|
+
# RULES section
|
|
317
|
+
sections.append(SG.Generic("RULES", """
|
|
329
318
|
- You are working in a Jupyter Lab environment in a .ipynb file.
|
|
330
|
-
-
|
|
331
|
-
- In each message you send to the user, you can send one CellModification, one CellAddition, or one FINISHED_TASK response. BUT YOU WILL GET TO SEND MULTIPLE MESSAGES TO THE USER TO ACCOMPLISH YOUR TASK SO DO NOT TRY TO ACCOMPLISH YOUR TASK IN A SINGLE MESSAGE.
|
|
319
|
+
- In each message you can choose one of the tools to respond with. BUT YOU WILL GET TO SEND MULTIPLE MESSAGES TO THE USER TO ACCOMPLISH YOUR TASK SO DO NOT TRY TO ACCOMPLISH YOUR TASK IN A SINGLE MESSAGE.
|
|
332
320
|
- After you send a CELL_UPDATE, the user will send you a message with the updated variables, code, and files in the current directory. You will use this information to decide what to do next, so it is critical that you wait for the user's response after each CELL_UPDATE before deciding your next action.
|
|
333
321
|
- When updating code, keep as much of the original code as possible and do not recreate variables that already exist.
|
|
334
322
|
- When writing the message, do not explain to the user how to use the CELL_UPDATE or FINISHED_TASK response, they will already know how to use them. Just provide a summary of your thought process. Do not reference any Cell IDs in the message.
|
|
335
323
|
- When writing the message, do not include leading words like "Explanation:" or "Thought process:". Just provide a summary of your thought process.
|
|
336
|
-
- When writing the message, use tickmarks when referencing specific variable names. For example, write `sales_df` instead of "sales_df" or just sales_df.
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
CODE STYLE
|
|
341
|
-
|
|
324
|
+
- When writing the message, use tickmarks when referencing specific variable names. For example, write `sales_df` instead of "sales_df" or just sales_df."""))
|
|
325
|
+
|
|
326
|
+
# CODE STYLE section
|
|
327
|
+
sections.append(SG.Generic("CODE STYLE", """
|
|
342
328
|
- Avoid using try/except blocks and other defensive programming patterns (like checking if files exist before reading them, verifying variables are defined before using them, etc.) unless there is a really good reason. In Jupyter notebooks, errors should surface immediately so users can identify and fix issues. When errors are caught and suppressed or when defensive checks hide problems, users continue running broken code without realizing it, and the agent's auto-error-fix loop cannot trigger. If a column doesn't exist, a file is missing, a variable isn't defined, or a module isn't installed, let it error. The user needs to know.
|
|
343
329
|
- When you want to display a dataframe to the user, just write the dataframe on the last line of the code cell instead of writing print(<dataframe name>). Jupyter will automatically display the dataframe in the notebook.
|
|
344
|
-
- When importing matplotlib, write the code `%matplotlib inline` to make sure the graphs render in Jupyter.
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
{CITATION_RULES}
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
'tesla_stock_prices_df': pd.DataFrame({{
|
|
374
|
-
'Date': ['2025-01-02', '2024-01-03', '2024-01-04', '2024-01-05', '2024-01-06'],
|
|
375
|
-
'closing_price': [249.98, 251.03, 250.11, 249.97, 251.45]
|
|
376
|
-
}})
|
|
377
|
-
}}
|
|
378
|
-
|
|
379
|
-
{FILES_SECTION_HEADING}
|
|
380
|
-
file_name: tesla_stock_prices.csv
|
|
381
|
-
|
|
382
|
-
Your task:
|
|
383
|
-
Given the dataframe `tesla_stock_prices_df`, what day was Tesla's all time high closing price?
|
|
384
|
-
|
|
385
|
-
Output:
|
|
386
|
-
{{
|
|
387
|
-
type: 'cell_update',
|
|
388
|
-
message: "I'll calculate two new variables all_time_high_date and all_time_high_price.",
|
|
389
|
-
cell_update: {{
|
|
390
|
-
type: 'new',
|
|
391
|
-
index: 2,
|
|
392
|
-
code: "all_time_high_row_idx = tesla_stock_prices_df['closing_price'].idxmax()\nall_time_high_date = tesla_stock_prices_df.at[all_time_high_row_idx, 'Date']\nall_time_high_price = tesla_stock_prices_df.at[all_time_high_row_idx, 'closing_price']",
|
|
393
|
-
code_summary: "Calculating all time high"
|
|
394
|
-
}}
|
|
395
|
-
}}
|
|
396
|
-
|
|
397
|
-
### User Message 2
|
|
398
|
-
|
|
399
|
-
{JUPYTER_NOTEBOOK_SECTION_HEADING}
|
|
400
|
-
[
|
|
401
|
-
{{
|
|
402
|
-
cell_type: 'markdown'
|
|
403
|
-
id: '9e38c62b-38f8-457d-bb8d-28bfc52edf2c'
|
|
404
|
-
code: \"\"\" # Used Car Sales Analysis \"\"\"
|
|
405
|
-
}},
|
|
406
|
-
{{
|
|
407
|
-
cell_type: 'code'
|
|
408
|
-
id: 'c68fdf19-db8c-46dd-926f-d90ad35bb3bc'
|
|
409
|
-
code: \"\"\"import pandas as pd
|
|
410
|
-
tesla_stock_prices_df = pd.read_csv('./tesla_stock_prices.csv)\"\"\"
|
|
411
|
-
}},
|
|
330
|
+
- When importing matplotlib, write the code `%matplotlib inline` to make sure the graphs render in Jupyter."""))
|
|
331
|
+
|
|
332
|
+
# CITATION_RULES
|
|
333
|
+
sections.append(SG.Generic("Citation Rules", f"""{CITATION_RULES}
|
|
334
|
+
|
|
335
|
+
<Example>
|
|
336
|
+
Jupyter Notebook:
|
|
337
|
+
[
|
|
338
|
+
{{
|
|
339
|
+
cell_type: 'markdown'
|
|
340
|
+
id: '9e38c62b-38f8-457d-bb8d-28bfc52edf2c'
|
|
341
|
+
code: \"\"\" # Used Car Sales Analysis \"\"\"
|
|
342
|
+
}},
|
|
343
|
+
{{
|
|
344
|
+
cell_type: 'code'
|
|
345
|
+
id: 'c68fdf19-db8c-46dd-926f-d90ad35bb3bc'
|
|
346
|
+
code: \"\"\"import pandas as pd
|
|
347
|
+
tesla_stock_prices_df = pd.read_csv('./tesla_stock_prices.csv)\"\"\"
|
|
348
|
+
}},
|
|
349
|
+
{{
|
|
350
|
+
cell_type: 'code',
|
|
351
|
+
id: '9c0d5fda-2b16-4f52-a1c5-a48892f3e2e8',
|
|
352
|
+
code: \"\"\"all_time_high_row_idx = tesla_stock_prices_df['closing_price'].idxmax()
|
|
353
|
+
all_time_high_date = tesla_stock_prices_df.at[all_time_high_row_idx, 'Date']
|
|
354
|
+
all_time_high_price = tesla_stock_prices_df.at[all_time_high_row_idx, 'closing_price']\"\"\"
|
|
355
|
+
}}
|
|
356
|
+
]
|
|
357
|
+
|
|
358
|
+
Variables:
|
|
412
359
|
{{
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
all_time_high_price = tesla_stock_prices_df.at[all_time_high_row_idx, 'closing_price']\"\"\"
|
|
418
|
-
}}
|
|
419
|
-
]
|
|
420
|
-
|
|
421
|
-
{VARIABLES_SECTION_HEADING}
|
|
422
|
-
{{
|
|
423
|
-
'tesla_stock_prices_df': pd.DataFrame({{
|
|
424
|
-
'Date': ['2025-01-02', '2024-01-03', '2024-01-04', '2024-01-05', '2024-01-06'],
|
|
425
|
-
'closing_price': [249.98, 251.03, 250.11, 249.97, 251.45],
|
|
360
|
+
'tesla_stock_prices_df': pd.DataFrame({{
|
|
361
|
+
'Date': ['2025-01-02', '2024-01-03', '2024-01-04', '2024-01-05', '2024-01-06'],
|
|
362
|
+
'closing_price': [249.98, 251.03, 250.11, 249.97, 251.45]
|
|
363
|
+
}}),
|
|
426
364
|
'all_time_high_row_idx': 501,
|
|
427
365
|
'all_time_high_date': '2025-03-16',
|
|
428
366
|
'all_time_high_price': 265.91
|
|
429
|
-
}}
|
|
430
|
-
}}
|
|
431
|
-
|
|
432
|
-
{FILES_SECTION_HEADING}
|
|
433
|
-
file_name: tesla_stock_prices.csv
|
|
434
|
-
|
|
435
|
-
Your task:
|
|
436
|
-
|
|
437
|
-
Output:
|
|
438
|
-
{{
|
|
439
|
-
type: 'finished_task',
|
|
440
|
-
message: "The all time high tesla stock closing price was $265.91 [MITO_CITATION:9c0d5fda-2b16-4f52-a1c5-a48892f3e2e8:2] on 2025-03-16 [MITO_CITATION:9c0d5fda-2b16-4f52-a1c5-a48892f3e2e8:1]",
|
|
441
|
-
next_steps: ["Create a visualization of Tesla's stock price over time", "Calculate the percentage change from the lowest to highest price", "Analyze the volatility of Tesla's stock"]
|
|
442
|
-
}}
|
|
443
|
-
|
|
444
|
-
</Cell Addition Example>
|
|
445
|
-
|
|
446
|
-
===
|
|
447
|
-
{get_database_rules()}
|
|
367
|
+
}}
|
|
448
368
|
|
|
449
|
-
|
|
369
|
+
Files:
|
|
370
|
+
"file_name: tesla_stock_prices.csv"
|
|
450
371
|
|
|
451
|
-
|
|
372
|
+
Your task: Given the dataframe `tesla_stock_prices_df`, what day was Tesla's all time high closing price?
|
|
452
373
|
|
|
453
|
-
|
|
374
|
+
Output:
|
|
375
|
+
{{
|
|
376
|
+
type: 'finished_task',
|
|
377
|
+
message: "The all time high tesla stock closing price was $265.91 [MITO_CITATION:9c0d5fda-2b16-4f52-a1c5-a48892f3e2e8:2] on 2025-03-16 [MITO_CITATION:9c0d5fda-2b16-4f52-a1c5-a48892f3e2e8:1]",
|
|
378
|
+
next_steps: ["Create a visualization of Tesla's stock price over time", "Calculate the percentage change from the lowest to highest price", "Analyze the volatility of Tesla's stock"]
|
|
379
|
+
}}
|
|
380
|
+
</Example>"""))
|
|
381
|
+
sections.append(SG.Generic("Cell Reference Rules", CELL_REFERENCE_RULES))
|
|
382
|
+
|
|
383
|
+
# Database rules
|
|
384
|
+
sections.append(SG.Generic("Database Rules", get_database_rules()))
|
|
385
|
+
|
|
386
|
+
# RULES OF YOUR WORKING PROCESS
|
|
387
|
+
sections.append(SG.Generic("Rules Of Working Process", f"""The user is going to ask you to guide them as through the process of completing a task. You will help them complete a task over the course of an entire conversation with them. The user will first share with you what they want to accomplish. You will then use a tool to execute the first step of the task, they will execute the tool and return to you the updated notebook state with you, and then you will give them the next step of the task. You will continue to give them the next step of the task until they have completed the task.
|
|
454
388
|
|
|
455
389
|
As you are guiding the user through the process of completing the task, send them TOOL messages to give them the next step of the task. When you have finished the task, send a FINISHED_TASK tool message.
|
|
456
390
|
|
|
@@ -478,8 +412,10 @@ REMEMBER, YOU ARE GOING TO COMPLETE THE USER'S TASK OVER THE COURSE OF THE ENTIR
|
|
|
478
412
|
- Wait for the user to send you back the updated variables and notebook state.
|
|
479
413
|
{'' if not isChromeBrowser else '- Send a GET_CELL_OUTPUT tool message to get the output of the cell you just created and check if you can improve the graph to make it more readable, informative, or professional.'}
|
|
480
414
|
- If after reviewing the updates you decide that you've completed the task, send a FINISHED_TASK tool message.
|
|
415
|
+
"""))
|
|
481
416
|
|
|
482
|
-
|
|
417
|
+
sections.append(SG.Generic("Other Useful Information", """
|
|
418
|
+
1. The active cell ID is shared with you so that when the user refers to "this cell" or similar phrases, you know which cell they mean. However, you are free to edit any cell that you see fit."""))
|
|
483
419
|
|
|
484
|
-
|
|
485
|
-
|
|
420
|
+
prompt = Prompt(sections)
|
|
421
|
+
return str(prompt)
|