jupyter-agent 2025.6.104__py3-none-any.whl → 2025.7.100__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jupyter_agent/bot_actions.py +270 -0
- jupyter_agent/bot_agents/__init__.py +0 -42
- jupyter_agent/bot_agents/base.py +89 -45
- jupyter_agent/bot_agents/master_planner.py +1 -0
- jupyter_agent/bot_agents/output_task_result.py +6 -7
- jupyter_agent/bot_agents/prepare_next_cell.py +52 -0
- jupyter_agent/bot_agents/request_user_supply.py +186 -0
- jupyter_agent/bot_agents/task_code_executor.py +3 -2
- jupyter_agent/bot_agents/task_planner_v3.py +16 -13
- jupyter_agent/bot_agents/task_reasoner.py +3 -2
- jupyter_agent/bot_agents/task_structrue_reasoner.py +22 -12
- jupyter_agent/bot_agents/task_structrue_summarier.py +22 -18
- jupyter_agent/bot_agents/task_summarier.py +3 -2
- jupyter_agent/bot_agents/task_verifier.py +2 -1
- jupyter_agent/bot_agents/task_verify_summarier.py +6 -6
- jupyter_agent/bot_chat.py +2 -2
- jupyter_agent/bot_contexts.py +37 -29
- jupyter_agent/bot_evaluation.py +262 -143
- jupyter_agent/bot_evaluators/__init__.py +0 -0
- jupyter_agent/bot_evaluators/base.py +42 -0
- jupyter_agent/bot_evaluators/dummy_flow.py +20 -0
- jupyter_agent/bot_evaluators/dummy_global.py +20 -0
- jupyter_agent/bot_evaluators/dummy_task.py +20 -0
- jupyter_agent/bot_evaluators/flow_global_planning.py +88 -0
- jupyter_agent/bot_evaluators/flow_task_executor.py +152 -0
- jupyter_agent/bot_flows/__init__.py +0 -4
- jupyter_agent/bot_flows/base.py +120 -41
- jupyter_agent/bot_flows/master_planner.py +15 -4
- jupyter_agent/bot_flows/task_executor_v3.py +57 -38
- jupyter_agent/bot_magics.py +119 -69
- jupyter_agent/bot_outputs.py +37 -43
- jupyter_agent/utils.py +20 -31
- {jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.7.100.dist-info}/METADATA +56 -4
- jupyter_agent-2025.7.100.dist-info/RECORD +41 -0
- jupyter_agent/bot_agents/task_planner_v1.py +0 -158
- jupyter_agent/bot_agents/task_planner_v2.py +0 -172
- jupyter_agent/bot_flows/task_executor_v1.py +0 -86
- jupyter_agent/bot_flows/task_executor_v2.py +0 -84
- jupyter_agent-2025.6.104.dist-info/RECORD +0 -35
- {jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.7.100.dist-info}/WHEEL +0 -0
- {jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.7.100.dist-info}/entry_points.txt +0 -0
- {jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.7.100.dist-info}/licenses/LICENSE +0 -0
- {jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.7.100.dist-info}/top_level.txt +0 -0
jupyter_agent/bot_magics.py
CHANGED
@@ -15,11 +15,14 @@ from IPython.display import Markdown
|
|
15
15
|
from IPython.core.magic import Magics, magics_class, cell_magic
|
16
16
|
from traitlets import Unicode, Int, Bool
|
17
17
|
from traitlets.config.configurable import Configurable
|
18
|
-
from .bot_contexts import NotebookContext
|
19
|
-
from .bot_agents import AgentFactory
|
20
|
-
from .bot_agents.
|
21
|
-
from .
|
22
|
-
from .
|
18
|
+
from .bot_contexts import NotebookContext
|
19
|
+
from .bot_agents.base import AgentModelType, AgentFactory
|
20
|
+
from .bot_agents.request_user_supply import RequestUserSupplyAgent
|
21
|
+
from .bot_evaluators.base import EvaluatorFactory
|
22
|
+
from .bot_flows import MasterPlannerFlow, TaskExecutorFlowV3
|
23
|
+
from .bot_outputs import _D, _I, _W, _E, _F, _M, _B, _O, reset_output, set_logging_level, flush_output
|
24
|
+
from .bot_actions import close_action_dispatcher
|
25
|
+
from .utils import get_env_capbilities
|
23
26
|
|
24
27
|
|
25
28
|
@magics_class
|
@@ -37,6 +40,9 @@ class BotMagics(Magics, Configurable):
|
|
37
40
|
coding_api_url = Unicode(None, allow_none=True, help="Coding API URL").tag(config=True)
|
38
41
|
coding_api_key = Unicode("API_KEY", help="Coding API Key").tag(config=True)
|
39
42
|
coding_model_name = Unicode("", help="Coding Model Name").tag(config=True)
|
43
|
+
evaluating_api_url = Unicode(None, allow_none=True, help="Evaluating API URL").tag(config=True)
|
44
|
+
evaluating_api_key = Unicode("API_KEY", help="Evaluating API Key").tag(config=True)
|
45
|
+
evaluating_model_name = Unicode("", help="Evaluating Model Name").tag(config=True)
|
40
46
|
reasoning_api_url = Unicode(None, allow_none=True, help="Reasoning API URL").tag(config=True)
|
41
47
|
reasoning_api_key = Unicode("API_KEY", help="Reasoning API Key").tag(config=True)
|
42
48
|
reasoning_model_name = Unicode("", help="Reasoning Model Name").tag(config=True)
|
@@ -44,11 +50,16 @@ class BotMagics(Magics, Configurable):
|
|
44
50
|
display_think = Bool(True, help="Display chatthink response").tag(config=True)
|
45
51
|
display_response = Bool(False, help="Display chat full response").tag(config=True)
|
46
52
|
support_save_meta = Bool(False, help="Support save metadata to cell").tag(config=True)
|
53
|
+
support_user_confirm = Bool(False, help="Support user confirm").tag(config=True)
|
54
|
+
support_user_supply_info = Bool(False, help="Support user supply info").tag(config=True)
|
55
|
+
support_set_cell_content = Bool(False, help="Support set cell content").tag(config=True)
|
56
|
+
enable_evaluating = Bool(False, help="Enable evaluating task").tag(config=True)
|
57
|
+
enable_supply_mocking = Bool(False, help="Enable supply mocking").tag(config=True)
|
47
58
|
notebook_path = Unicode(None, allow_none=True, help="Path to Notebook file").tag(config=True)
|
48
59
|
default_task_flow = Unicode("v3", allow_none=True, help="Default task flow").tag(config=True)
|
49
|
-
default_max_tries = Int(
|
60
|
+
default_max_tries = Int(2, help="Default max tries for task execution").tag(config=True)
|
50
61
|
default_step_mode = Bool(False, help="Default step mode for task execution").tag(config=True)
|
51
|
-
default_auto_confirm = Bool(
|
62
|
+
default_auto_confirm = Bool(True, help="Default auto confirm for task execution").tag(config=True)
|
52
63
|
|
53
64
|
def parse_args(self, line):
|
54
65
|
"""解析命令行参数"""
|
@@ -59,41 +70,48 @@ class BotMagics(Magics, Configurable):
|
|
59
70
|
parser.add_argument("-f", "--flow", type=str, default=self.default_task_flow, help="Flow name")
|
60
71
|
parser.add_argument("-m", "--max-tries", type=int, default=self.default_max_tries, help="Max tries")
|
61
72
|
parser.add_argument(
|
62
|
-
"-
|
73
|
+
"-t",
|
63
74
|
"--step-mode",
|
64
75
|
action="store_true",
|
76
|
+
dest="step_mode",
|
65
77
|
default=self.default_step_mode,
|
66
78
|
help="Run in single step mode",
|
67
79
|
)
|
68
80
|
parser.add_argument(
|
69
|
-
"-
|
81
|
+
"-T",
|
82
|
+
"--not-step-mode",
|
83
|
+
action="store_false",
|
84
|
+
dest="step_mode",
|
85
|
+
default=self.default_step_mode,
|
86
|
+
help="Run in multi step mode",
|
87
|
+
)
|
88
|
+
parser.add_argument(
|
89
|
+
"-y",
|
70
90
|
"--auto-confirm",
|
71
91
|
action="store_true",
|
92
|
+
dest="auto_confirm",
|
72
93
|
default=self.default_auto_confirm,
|
73
94
|
help="Run without confirm",
|
74
95
|
)
|
96
|
+
parser.add_argument(
|
97
|
+
"-Y",
|
98
|
+
"--not-auto-confirm",
|
99
|
+
action="store_false",
|
100
|
+
dest="auto_confirm",
|
101
|
+
default=self.default_auto_confirm,
|
102
|
+
help="Run with confirm",
|
103
|
+
)
|
75
104
|
options, _ = parser.parse_known_args(shlex.split(line.strip()))
|
76
|
-
|
77
105
|
return options
|
78
106
|
|
79
|
-
def ensure_notebook_path(self):
|
80
|
-
if self.notebook_path:
|
81
|
-
return self.notebook_path
|
82
|
-
result = self.shell and self.shell.run_cell("globals().get('__vsc_ipynb_file__')")
|
83
|
-
if result and result.success and result.result:
|
84
|
-
self.notebook_path = result.result
|
85
|
-
return self.notebook_path
|
86
|
-
try:
|
87
|
-
self.notebook_path = str(ipynbname.path())
|
88
|
-
return self.notebook_path
|
89
|
-
except Exception as e:
|
90
|
-
_F(f"Failed to get notebook path: {e}")
|
91
|
-
return None
|
92
|
-
|
93
107
|
@cell_magic
|
94
108
|
def bot(self, line, cell):
|
95
109
|
"""Jupyter cell magic: %%bot"""
|
96
110
|
try:
|
111
|
+
reset_output(stage="Logging", logging_level=self.logging_level)
|
112
|
+
_I("Cell magic %%bot executing ...")
|
113
|
+
_D(f"Cell magic called with line: {line}")
|
114
|
+
_D(f"Cell magic called with cell: {repr(cell)[:50]} ...")
|
97
115
|
if not self.ensure_notebook_path():
|
98
116
|
_O(
|
99
117
|
Markdown(
|
@@ -103,11 +121,6 @@ class BotMagics(Magics, Configurable):
|
|
103
121
|
)
|
104
122
|
)
|
105
123
|
return
|
106
|
-
AgentCellContext.SUPPORT_SAVE_META = self.support_save_meta
|
107
|
-
reset_output(stage="Logging", logging_level=self.logging_level)
|
108
|
-
_I("Cell magic %%bot executing ...")
|
109
|
-
_D(f"Cell magic called with line: {line}")
|
110
|
-
_D(f"Cell magic called with cell: {repr(cell)[:50]} ...")
|
111
124
|
if not cell.strip():
|
112
125
|
_O(
|
113
126
|
Markdown(
|
@@ -120,51 +133,23 @@ class BotMagics(Magics, Configurable):
|
|
120
133
|
"%%bot {}\n\n# {}".format(line.strip(), time.strftime("%Y-%m-%d %H:%M:%S")), replace=True
|
121
134
|
)
|
122
135
|
return
|
136
|
+
get_env_capbilities().save_metadata = self.support_save_meta
|
137
|
+
get_env_capbilities().user_confirm = self.support_user_confirm
|
138
|
+
get_env_capbilities().user_supply_info = self.support_user_supply_info
|
139
|
+
get_env_capbilities().set_cell_content = self.support_set_cell_content
|
140
|
+
RequestUserSupplyAgent.MOCK_USER_SUPPLY = self.enable_supply_mocking
|
123
141
|
options = self.parse_args(line)
|
124
|
-
_D(f"Cell magic called with options: {options}")
|
125
142
|
set_logging_level(options.logging_level)
|
143
|
+
_D(f"Cell magic called with options: {options}")
|
126
144
|
nb_context = NotebookContext(line, cell, notebook_path=self.notebook_path)
|
127
|
-
agent_factory =
|
128
|
-
|
129
|
-
display_think=self.display_think,
|
130
|
-
display_message=self.display_message,
|
131
|
-
display_response=self.display_response,
|
132
|
-
)
|
133
|
-
agent_factory.config_model(
|
134
|
-
AgentModelType.DEFAULT,
|
135
|
-
self.default_api_url,
|
136
|
-
self.default_api_key,
|
137
|
-
self.default_model_name,
|
138
|
-
)
|
139
|
-
agent_factory.config_model(
|
140
|
-
AgentModelType.PLANNER,
|
141
|
-
self.planner_api_url,
|
142
|
-
self.planner_api_key,
|
143
|
-
self.planner_model_name,
|
144
|
-
)
|
145
|
-
agent_factory.config_model(
|
146
|
-
AgentModelType.CODING,
|
147
|
-
self.coding_api_url,
|
148
|
-
self.coding_api_key,
|
149
|
-
self.coding_model_name,
|
150
|
-
)
|
151
|
-
agent_factory.config_model(
|
152
|
-
AgentModelType.REASONING,
|
153
|
-
self.reasoning_api_url,
|
154
|
-
self.reasoning_api_key,
|
155
|
-
self.reasoning_model_name,
|
156
|
-
)
|
145
|
+
agent_factory = self.get_agent_factory(nb_context)
|
146
|
+
evaluator_factory = self.get_evaluator_factory(nb_context)
|
157
147
|
if options.planning:
|
158
|
-
flow = MasterPlannerFlow(nb_context, agent_factory)
|
148
|
+
flow = MasterPlannerFlow(nb_context, agent_factory, evaluator_factory)
|
149
|
+
elif options.flow == "v3":
|
150
|
+
flow = TaskExecutorFlowV3(nb_context, agent_factory, evaluator_factory)
|
159
151
|
else:
|
160
|
-
|
161
|
-
flow = TaskExecutorFlowV1(nb_context, agent_factory)
|
162
|
-
elif options.flow == "v2":
|
163
|
-
flow = TaskExecutorFlowV2(nb_context, agent_factory)
|
164
|
-
elif options.flow == "v3":
|
165
|
-
flow = TaskExecutorFlowV3(nb_context, agent_factory)
|
166
|
-
else:
|
167
|
-
raise ValueError(f"Unknown flow: {options.flow}")
|
152
|
+
raise ValueError(f"Unknown flow: {options.flow}")
|
168
153
|
flow(
|
169
154
|
options.stage,
|
170
155
|
options.max_tries,
|
@@ -173,6 +158,71 @@ class BotMagics(Magics, Configurable):
|
|
173
158
|
)
|
174
159
|
except Exception as e:
|
175
160
|
traceback.print_exc()
|
161
|
+
finally:
|
162
|
+
close_action_dispatcher()
|
163
|
+
flush_output()
|
164
|
+
|
165
|
+
def ensure_notebook_path(self):
|
166
|
+
if self.notebook_path:
|
167
|
+
return self.notebook_path
|
168
|
+
result = self.shell and self.shell.run_cell(
|
169
|
+
"globals().get('__vsc_ipynb_file__') or globals().get('__evaluation_ipynb_file__')"
|
170
|
+
)
|
171
|
+
if result and result.success and result.result:
|
172
|
+
self.notebook_path = result.result
|
173
|
+
return self.notebook_path
|
174
|
+
try:
|
175
|
+
self.notebook_path = str(ipynbname.path())
|
176
|
+
return self.notebook_path
|
177
|
+
except Exception as e:
|
178
|
+
_F(f"Failed to get notebook path: {e}")
|
179
|
+
return None
|
180
|
+
|
181
|
+
def get_agent_factory(self, nb_context):
|
182
|
+
agent_factory = AgentFactory(
|
183
|
+
nb_context,
|
184
|
+
display_think=self.display_think,
|
185
|
+
display_message=self.display_message,
|
186
|
+
display_response=self.display_response,
|
187
|
+
)
|
188
|
+
agent_factory.config_model(
|
189
|
+
AgentModelType.DEFAULT, self.default_api_url, self.default_api_key, self.default_model_name
|
190
|
+
)
|
191
|
+
agent_factory.config_model(
|
192
|
+
AgentModelType.PLANNER, self.planner_api_url, self.planner_api_key, self.planner_model_name
|
193
|
+
)
|
194
|
+
agent_factory.config_model(
|
195
|
+
AgentModelType.CODING, self.coding_api_url, self.coding_api_key, self.coding_model_name
|
196
|
+
)
|
197
|
+
agent_factory.config_model(
|
198
|
+
AgentModelType.EVALUATING, self.evaluating_api_url, self.evaluating_api_key, self.evaluating_model_name
|
199
|
+
)
|
200
|
+
agent_factory.config_model(
|
201
|
+
AgentModelType.REASONING, self.reasoning_api_url, self.reasoning_api_key, self.reasoning_model_name
|
202
|
+
)
|
203
|
+
return agent_factory
|
204
|
+
|
205
|
+
def get_evaluator_factory(self, nb_context):
|
206
|
+
if self.enable_evaluating:
|
207
|
+
evaluator_factory = EvaluatorFactory(nb_context)
|
208
|
+
evaluator_factory.config_model(
|
209
|
+
AgentModelType.DEFAULT, self.default_api_url, self.default_api_key, self.default_model_name
|
210
|
+
)
|
211
|
+
evaluator_factory.config_model(
|
212
|
+
AgentModelType.PLANNER, self.planner_api_url, self.planner_api_key, self.planner_model_name
|
213
|
+
)
|
214
|
+
evaluator_factory.config_model(
|
215
|
+
AgentModelType.CODING, self.coding_api_url, self.coding_api_key, self.coding_model_name
|
216
|
+
)
|
217
|
+
evaluator_factory.config_model(
|
218
|
+
AgentModelType.EVALUATING, self.evaluating_api_url, self.evaluating_api_key, self.evaluating_model_name
|
219
|
+
)
|
220
|
+
evaluator_factory.config_model(
|
221
|
+
AgentModelType.REASONING, self.reasoning_api_url, self.reasoning_api_key, self.reasoning_model_name
|
222
|
+
)
|
223
|
+
else:
|
224
|
+
evaluator_factory = None
|
225
|
+
return evaluator_factory
|
176
226
|
|
177
227
|
|
178
228
|
def load_ipython_extension(ipython):
|
jupyter_agent/bot_outputs.py
CHANGED
@@ -11,9 +11,11 @@ import datetime
|
|
11
11
|
import jinja2
|
12
12
|
|
13
13
|
from enum import Enum
|
14
|
-
from typing import Optional, Dict, Any
|
14
|
+
from typing import Optional, Dict, List, Tuple, Any, Type
|
15
15
|
from pydantic import BaseModel, Field
|
16
16
|
from IPython.display import display, Markdown
|
17
|
+
from .bot_evaluation import BaseEvaluationRecord
|
18
|
+
from .bot_actions import ActionBase
|
17
19
|
from .utils import no_indent, no_wrap
|
18
20
|
|
19
21
|
STAGE_SWITCHER_SCRIPT = no_wrap(
|
@@ -173,38 +175,6 @@ LOGGING_LEVELS = {
|
|
173
175
|
}
|
174
176
|
|
175
177
|
|
176
|
-
class BaseEvalutionRecord(BaseModel):
|
177
|
-
timestamp: float = 0
|
178
|
-
notebook_name: str = ""
|
179
|
-
eval_type: str = "BASE"
|
180
|
-
cell_index: int = -1
|
181
|
-
execution_duration: float = 0.0
|
182
|
-
is_success: bool = False
|
183
|
-
correct_score: float = 0.0
|
184
|
-
|
185
|
-
|
186
|
-
class StageEvalutionRecord(BaseEvalutionRecord):
|
187
|
-
eval_type: str = "STAGE"
|
188
|
-
flow: str = ""
|
189
|
-
stage: str = ""
|
190
|
-
coding_score: float = 0.0
|
191
|
-
important_score: float = 0.0
|
192
|
-
user_supply_score: float = 0.0
|
193
|
-
|
194
|
-
|
195
|
-
class FlowEvalutionRecord(BaseEvalutionRecord):
|
196
|
-
eval_type: str = "FLOW"
|
197
|
-
flow: str = ""
|
198
|
-
stage_count: int = 0
|
199
|
-
planning_score: float = 0.0
|
200
|
-
|
201
|
-
|
202
|
-
class NotebookEvalutionRecord(BaseEvalutionRecord):
|
203
|
-
eval_type: str = "NOTEBOOK"
|
204
|
-
flow_count: int = 0
|
205
|
-
planning_score: float = 0.0
|
206
|
-
|
207
|
-
|
208
178
|
class AgentOutput:
|
209
179
|
"""
|
210
180
|
AgentOutput 是一个用于在 Jupyter Notebook 中显示 Agent 输出的类。
|
@@ -221,6 +191,7 @@ class AgentOutput:
|
|
221
191
|
)
|
222
192
|
self.template = self.jinja_env.from_string(AGENT_OUTPUT_TEMPLEATE)
|
223
193
|
self.handler = None
|
194
|
+
self._is_dirty = True
|
224
195
|
self._latest_display_tm = 0
|
225
196
|
self._contents = {}
|
226
197
|
self._active_stage = None
|
@@ -228,6 +199,7 @@ class AgentOutput:
|
|
228
199
|
self._agent_data = {}
|
229
200
|
self._logging_records = []
|
230
201
|
self._evaluation_records = []
|
202
|
+
self._action_records = []
|
231
203
|
|
232
204
|
@property
|
233
205
|
def content(self):
|
@@ -263,11 +235,16 @@ class AgentOutput:
|
|
263
235
|
)
|
264
236
|
if self._evaluation_records:
|
265
237
|
metadata["jupyter-agent-evaluation-records"] = [record.model_dump() for record in self._evaluation_records]
|
238
|
+
if self._action_records:
|
239
|
+
metadata["jupyter-agent-action-records"] = [record.model_dump() for record in self._action_records]
|
266
240
|
return metadata
|
267
241
|
|
268
242
|
def display(self, stage=None, force=False, wait=True):
|
269
|
-
if stage is not None:
|
243
|
+
if stage is not None and stage != self._active_stage:
|
270
244
|
self._active_stage = stage
|
245
|
+
self._is_dirty = True
|
246
|
+
if not self._is_dirty and not force:
|
247
|
+
return
|
271
248
|
if not force and time.time() - self._latest_display_tm < 1:
|
272
249
|
if wait:
|
273
250
|
time.sleep(1 - (time.time() - self._latest_display_tm))
|
@@ -278,6 +255,7 @@ class AgentOutput:
|
|
278
255
|
else:
|
279
256
|
self.handler.update(Markdown(self.content), metadata=self.metadata)
|
280
257
|
self._latest_display_tm = time.time()
|
258
|
+
self._is_dirty = False
|
281
259
|
|
282
260
|
def clear(self, stage=None, clear_metadata=False):
|
283
261
|
if stage is None:
|
@@ -286,6 +264,7 @@ class AgentOutput:
|
|
286
264
|
self._contents[stage] = []
|
287
265
|
if clear_metadata:
|
288
266
|
self._agent_data = {}
|
267
|
+
self._is_dirty = True
|
289
268
|
self.display(force=False, wait=False)
|
290
269
|
|
291
270
|
def output_block(
|
@@ -305,6 +284,7 @@ class AgentOutput:
|
|
305
284
|
"code_language": code_language,
|
306
285
|
}
|
307
286
|
)
|
287
|
+
self._is_dirty = True
|
308
288
|
self.display(stage, force=False, wait=False)
|
309
289
|
|
310
290
|
def output_text(self, content, stage=None, code_language="python"):
|
@@ -320,6 +300,7 @@ class AgentOutput:
|
|
320
300
|
self._contents[stage][-1]["content"] += "\n" + content
|
321
301
|
else:
|
322
302
|
self._contents[stage].append({"type": "text", "content": content, "code_language": code_language})
|
303
|
+
self._is_dirty = True
|
323
304
|
self.display(stage, force=False, wait=False)
|
324
305
|
|
325
306
|
def output_markdown(self, content, stage=None):
|
@@ -328,12 +309,14 @@ class AgentOutput:
|
|
328
309
|
if stage not in self._contents:
|
329
310
|
self._contents[stage] = []
|
330
311
|
self._contents[stage].append({"type": "markdown", "content": content})
|
312
|
+
self._is_dirty = True
|
331
313
|
self.display(stage, force=False, wait=False)
|
332
314
|
|
333
315
|
def output_agent_data(self, **kwargs):
|
334
316
|
self.log(f"output agent data {kwargs}", level="DEBUG")
|
335
317
|
self._agent_data.update(kwargs)
|
336
|
-
self._agent_data_timestamp =
|
318
|
+
self._agent_data_timestamp = time.time()
|
319
|
+
self._is_dirty = True
|
337
320
|
self.display(force=False, wait=False)
|
338
321
|
|
339
322
|
def log(self, msg, level="INFO"):
|
@@ -355,11 +338,12 @@ class AgentOutput:
|
|
355
338
|
"content": content,
|
356
339
|
}
|
357
340
|
)
|
341
|
+
self._is_dirty = True
|
358
342
|
self.display(force=False, wait=False)
|
359
343
|
|
360
|
-
def log_evaluation(self, record:
|
344
|
+
def log_evaluation(self, record: BaseEvaluationRecord):
|
361
345
|
assert isinstance(
|
362
|
-
record,
|
346
|
+
record, BaseEvaluationRecord
|
363
347
|
), "record must be an instance of BaseEvalutionRecord or its subclass"
|
364
348
|
if record.timestamp == 0:
|
365
349
|
record.timestamp = time.time()
|
@@ -369,6 +353,16 @@ class AgentOutput:
|
|
369
353
|
f"success: {record.is_success} correct: {record.correct_score:.2f}",
|
370
354
|
level="INFO",
|
371
355
|
)
|
356
|
+
self._is_dirty = True
|
357
|
+
self.display(force=False, wait=False)
|
358
|
+
|
359
|
+
def log_action(self, record: ActionBase):
|
360
|
+
assert isinstance(record, ActionBase), "record must be an instance of BaseActionRecord or its subclass"
|
361
|
+
if record.timestamp == 0:
|
362
|
+
record.timestamp = time.time()
|
363
|
+
self._action_records.append(record)
|
364
|
+
self.log(f"Action: {record.action} from {record.source}", level="INFO")
|
365
|
+
self._is_dirty = True
|
372
366
|
self.display(force=False, wait=False)
|
373
367
|
|
374
368
|
|
@@ -414,14 +408,14 @@ def output_agent_data(**kwargs):
|
|
414
408
|
get_output().output_agent_data(**kwargs)
|
415
409
|
|
416
410
|
|
417
|
-
def output_evaluation(record:
|
418
|
-
"""
|
419
|
-
输出评估记录到 AgentOutput 中。
|
420
|
-
:param record: 评估记录对象,必须是 BaseEvalutionRecord 的子类。
|
421
|
-
"""
|
411
|
+
def output_evaluation(record: BaseEvaluationRecord):
|
422
412
|
get_output().log_evaluation(record)
|
423
413
|
|
424
414
|
|
415
|
+
def output_action(record: ActionBase):
|
416
|
+
get_output().log_action(record)
|
417
|
+
|
418
|
+
|
425
419
|
def clear_output(stage=None, clear_metadata=False):
|
426
420
|
get_output().clear(stage, clear_metadata)
|
427
421
|
|
@@ -534,6 +528,6 @@ _A = output_agent_data
|
|
534
528
|
_L = log
|
535
529
|
_D = lambda msg: log(msg, level="DEBUG")
|
536
530
|
_I = lambda msg: log(msg, level="INFO")
|
537
|
-
_W = lambda msg: log(msg, level="
|
531
|
+
_W = lambda msg: log(msg, level="WARN")
|
538
532
|
_E = lambda msg: log(msg, level="ERROR")
|
539
533
|
_F = lambda msg: log(msg, level="FATAL")
|
jupyter_agent/utils.py
CHANGED
@@ -91,37 +91,6 @@ class TeeOutputCapture(capture_output):
|
|
91
91
|
return CapturedIO(stdout, stderr, outputs)
|
92
92
|
|
93
93
|
|
94
|
-
class RequestUserPrompt(BaseModel):
|
95
|
-
prompt: str = Field(
|
96
|
-
description="需要用户补充详细信息的Prompt",
|
97
|
-
examples=["请补充与...相关的详细的信息", "请确认...是否...", "请提供..."],
|
98
|
-
)
|
99
|
-
example: Optional[str] = Field(None, description="示例", examples=["..."])
|
100
|
-
|
101
|
-
|
102
|
-
class UserPromptResponse(BaseModel):
|
103
|
-
prompt: str = Field(description="需要用户补充详细信息的Prompt", examples=["..."])
|
104
|
-
response: str = Field(description="用户补充的详细信息", examples=["..."])
|
105
|
-
|
106
|
-
|
107
|
-
def request_user_response(prompts: list[RequestUserPrompt]) -> list[UserPromptResponse]:
|
108
|
-
responses = []
|
109
|
-
for prompt in prompts:
|
110
|
-
response = input(f"{prompt.prompt} (例如: {prompt.example})")
|
111
|
-
responses.append(UserPromptResponse(prompt=prompt.prompt, response=response))
|
112
|
-
return responses
|
113
|
-
|
114
|
-
|
115
|
-
def format_user_prompts(prompts: list[RequestUserPrompt], title="用户补充详细信息") -> str:
|
116
|
-
result = "```markdown\n"
|
117
|
-
result += f"### {title}\n\n"
|
118
|
-
result += "\n".join(
|
119
|
-
[f"- **Issue**: {prompt.prompt} (例如: {prompt.example})\n- **Reply**: " for prompt in prompts]
|
120
|
-
)
|
121
|
-
result += "\n```\n"
|
122
|
-
return result
|
123
|
-
|
124
|
-
|
125
94
|
def no_indent(text: str) -> str:
|
126
95
|
return re.sub(r"^\s+", "", text, flags=re.MULTILINE)
|
127
96
|
|
@@ -136,3 +105,23 @@ def no_newline(text: str) -> str:
|
|
136
105
|
|
137
106
|
def no_space(text: str) -> str:
|
138
107
|
return re.sub(r"\s+", "", text, flags=re.MULTILINE)
|
108
|
+
|
109
|
+
|
110
|
+
class EnvironmentCapbilities(BaseModel):
|
111
|
+
save_metadata: bool = False
|
112
|
+
user_confirm: bool = False
|
113
|
+
user_supply_info: bool = False
|
114
|
+
set_cell_content: bool = False
|
115
|
+
|
116
|
+
|
117
|
+
__env_capbilities = EnvironmentCapbilities()
|
118
|
+
|
119
|
+
|
120
|
+
def get_env_capbilities() -> EnvironmentCapbilities:
|
121
|
+
return __env_capbilities
|
122
|
+
|
123
|
+
|
124
|
+
def set_env_capbilities(env_capbilities: EnvironmentCapbilities):
|
125
|
+
global __env_capbilities
|
126
|
+
|
127
|
+
__env_capbilities = env_capbilities
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: jupyter-agent
|
3
|
-
Version: 2025.
|
3
|
+
Version: 2025.7.100
|
4
4
|
Summary: 调用LLM实现Jupyter代码的自动生成、执行、调试等功能
|
5
5
|
Author: viewstar000
|
6
6
|
License: MIT
|
@@ -10,6 +10,7 @@ Classifier: Operating System :: OS Independent
|
|
10
10
|
Requires-Python: >=3.12
|
11
11
|
Description-Content-Type: text/markdown
|
12
12
|
License-File: LICENSE
|
13
|
+
Requires-Dist: bottle
|
13
14
|
Requires-Dist: ipynbname
|
14
15
|
Requires-Dist: ipython
|
15
16
|
Requires-Dist: jinja2
|
@@ -95,15 +96,26 @@ pip install /path/to/jupyter-agent/dist/jupyter_agent-xxxx-py3-none-any.whl
|
|
95
96
|
# 设置当前Notebook的路径,当无法自动获取时需要手工指定,以Vscode中的Notebook为例
|
96
97
|
%config BotMagics.notebook_path = globals()["__vsc_ipynb_file__"]
|
97
98
|
|
98
|
-
#
|
99
|
+
# 是否默认开启单步模式,每执行一个步骤都退出执行循环,需要用户手动执行下一个步骤,默认为False
|
100
|
+
%config BotMagics.default_step_mode = False
|
101
|
+
# 是否默认开启自动确认,若关闭自动确认,每执行一个步骤都需要用户手动确认,默认为True
|
102
|
+
%config BotMagics.default_auto_confirm = True
|
103
|
+
|
104
|
+
# 设置运行环境是否保存任务数据到Metadata,默认为False,仅在Vscode中安装jupyter-agent-extension后或在评估模式下支持
|
99
105
|
%config BotMagics.support_save_meta = True
|
106
|
+
# 设置运行环境是否设置单元格内容,默认为False,权在Vscode中安装jupyter-agent-extension后或在评估模式下支持
|
107
|
+
%config BotMagics.support_set_cell_content = True
|
100
108
|
|
101
109
|
# 设置日志级别,可选值为DEBUG、INFO、WARN、ERROR、FATAL,默认为INFO
|
102
110
|
%config BotMagics.logging_level = 'DEBUG'
|
103
111
|
|
112
|
+
# 开启自动评估功能,默认为False,调用LLM对当前结果进行打分,目前仅实现了对子任务的整体打分
|
113
|
+
%config BotMagics.enable_evaluating = True
|
114
|
+
# 开启模拟用户补充信息功能,默认为False,调用LLM模拟对Agent的提问进行补充,用于自动评估
|
115
|
+
%config BotMagics.enable_supply_mocking = True
|
116
|
+
|
104
117
|
# 设置是否显示思考过程,默认为True
|
105
118
|
%config BotMagics.display_think = True
|
106
|
-
|
107
119
|
# 设置是否显示发送给出LLM的消息和LLM的回答,默认为False
|
108
120
|
%config BotMagics.display_message = True
|
109
121
|
%config BotMagics.display_response = True
|
@@ -151,6 +163,20 @@ pip install /path/to/jupyter-agent/dist/jupyter_agent-xxxx-py3-none-any.whl
|
|
151
163
|
|
152
164
|
更详细用法可参考[示例Notebook](https://github.com/viewstar000/jupyter-agent/blob/main/examples/data_loader.ipynb)
|
153
165
|
|
166
|
+
### 评估模式
|
167
|
+
|
168
|
+
工具提供了`bot_eval`命令用于在评估模式下执行notebook。在评估模式下,工具会顺序执行所有有单元格,直到例全局目标完成。
|
169
|
+
|
170
|
+
```bash
|
171
|
+
bot_eval [-o output_eval.ipynb] [-e output_eval.jsonl] input.ipynb
|
172
|
+
```
|
173
|
+
|
174
|
+
例如
|
175
|
+
|
176
|
+
```bash
|
177
|
+
bot_eval examples/data_loader_eval.ipynb
|
178
|
+
```
|
179
|
+
|
154
180
|
## 贡献
|
155
181
|
|
156
182
|
欢迎提交 issue 或 pull request 参与贡献。
|
@@ -237,12 +263,24 @@ Advanced Configuration:
|
|
237
263
|
# Set the current notebook path, when it is not automatically obtained, it needs to be manually specified, for example, in Vscode Notebook
|
238
264
|
%config BotMagics.notebook_path = globals()["__vsc_ipynb_file__"]
|
239
265
|
|
240
|
-
#
|
266
|
+
# Whether to enable single step mode, each step will exit the execution loop, you need to manually execute the next step, the default is False
|
267
|
+
%config BotMagics.default_step_mode = False
|
268
|
+
# Whether to enable automatic confirmation, if automatic confirmation is closed, each step needs to be confirmed by the user, the default is True
|
269
|
+
%config BotMagics.default_auto_confirm = True
|
270
|
+
|
271
|
+
# Set whether to save task data to Metadata, only Vscode installed with jupyter-agent-extension or evaluation mode supports this.
|
241
272
|
%config BotMagics.support_save_meta = True
|
273
|
+
# Set whether to set cell content, only Vscode installed with jupyter-agent-extension or evaluation mode supports this.
|
274
|
+
%config BotMagics.support_set_cell_content = True
|
242
275
|
|
243
276
|
# Set the log level, available values are DEBUG、INFO、WARN、ERROR、FATAL, default is INFO
|
244
277
|
%config BotMagics.logging_level = 'DEBUG'
|
245
278
|
|
279
|
+
# Enable automatic evaluation, default is False, call LLM to evaluate the overall result of the subtask
|
280
|
+
%config BotMagics.enable_evaluating = True
|
281
|
+
# Enable the simulation of user filling in information, default is False, call LLM to simulate the question of the agent to fill in
|
282
|
+
%config BotMagics.enable_supply_mocking = True
|
283
|
+
|
246
284
|
# Set whether to display thinking process, default is True
|
247
285
|
%config BotMagics.display_think = True
|
248
286
|
|
@@ -290,6 +328,20 @@ After generating code for a subtask, the tool will call the corresponding agent
|
|
290
328
|
|
291
329
|
For more details, please refer to [example notebook](https://github.com/viewstar000/jupyter-agent/blob/main/examples/data_loader.ipynb)
|
292
330
|
|
331
|
+
### Evaluation mode
|
332
|
+
|
333
|
+
Use `bot_eval` command to evaluate the code generated by the agent in evaluation mode. The evaluation mode will execute all cells in order and stop when the global goal is completed.
|
334
|
+
|
335
|
+
```python
|
336
|
+
bot_eval [-o output_eval.ipynb] [-e output_eval.jsonl] input.ipynb
|
337
|
+
```
|
338
|
+
|
339
|
+
For example
|
340
|
+
|
341
|
+
```bash
|
342
|
+
bot_eval examples/data_loader_eval.ipynb
|
343
|
+
```
|
344
|
+
|
293
345
|
## Contributing
|
294
346
|
|
295
347
|
Welcome to submit issues or pull requests to participate in contributions.
|
@@ -0,0 +1,41 @@
|
|
1
|
+
jupyter_agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
jupyter_agent/bot_actions.py,sha256=Zq9_nfh4SJdMxkjqcTyQzS0RY4RwofaRkGq_4aexO2o,8453
|
3
|
+
jupyter_agent/bot_chat.py,sha256=4zjkHtyOabT1bvGO-n4ZTMw0XREU_XDlgfLCI5gpxsw,8834
|
4
|
+
jupyter_agent/bot_contexts.py,sha256=gs3hVIj81jasQYiJjdoAloWx8S1Xpa4cXr8XzcefUus,19475
|
5
|
+
jupyter_agent/bot_evaluation.py,sha256=t4SH6Gq4BmSyyRMozyQ2623XNGmgtCi9CTNRvOqzuRM,14266
|
6
|
+
jupyter_agent/bot_magics.py,sha256=Sh2CGs_esZqaHWDDLKjJSDlUYcI4PdF2aFPcibcf43Y,11027
|
7
|
+
jupyter_agent/bot_outputs.py,sha256=QDzReXLqZsU7RAPR4F9JEotxAtIe9YA3ZklCJ9U_jVg,16239
|
8
|
+
jupyter_agent/utils.py,sha256=8XKXXZB1EgCwIJEqYJigA8C84FzVTc2xdcF-y5kO3kY,3634
|
9
|
+
jupyter_agent/bot_agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
+
jupyter_agent/bot_agents/base.py,sha256=pAwW_KQZE9LwyxM91umzIlWalgFUKGJOpby8JGfvLQo,12430
|
11
|
+
jupyter_agent/bot_agents/master_planner.py,sha256=twDEc0KMCyNkcsD_0nilbPteZqFwUT38QDfeYUhOzzE,1330
|
12
|
+
jupyter_agent/bot_agents/output_task_result.py,sha256=4DeAmDzjUI_0yfb26f5sLIwa74aJRzEQXbMRSyYMv1g,761
|
13
|
+
jupyter_agent/bot_agents/prepare_next_cell.py,sha256=_4-kYQxAs9PK4a6T5vbsdSixNQ4l-R8qzQNjs8e_09c,1891
|
14
|
+
jupyter_agent/bot_agents/request_user_supply.py,sha256=Bkw08hhkUUVnirBijv1mJ0hQW2VpzdXoC3TToQolQos,6106
|
15
|
+
jupyter_agent/bot_agents/task_code_executor.py,sha256=V138uj39_lLGuljEDrxzd-jRf1k4nAQkA_gF2_Jhpgw,2243
|
16
|
+
jupyter_agent/bot_agents/task_coder.py,sha256=7fXq9nk1yH3F_mJfCMZBktHmxGfgmpuChMQbpEuL0w4,1783
|
17
|
+
jupyter_agent/bot_agents/task_debuger.py,sha256=77pa_Awgvzxm3XkFA1oZsGr8SPJkjApKMtkmoySShmI,1367
|
18
|
+
jupyter_agent/bot_agents/task_planner_v3.py,sha256=Mlves3v3KL7MAJ8hPPMxUsKdB2v6vuOXlVZ6XtNMbbo,8713
|
19
|
+
jupyter_agent/bot_agents/task_reasoner.py,sha256=4oP5DzAkfEGh6LtpX4OH6aMgAPDiRvbSVclxrhx0v20,1465
|
20
|
+
jupyter_agent/bot_agents/task_structrue_reasoner.py,sha256=lNt508g4ileRjG9_NETdSrQqVb7tjdu8qHajKcZzB6E,3947
|
21
|
+
jupyter_agent/bot_agents/task_structrue_summarier.py,sha256=fnNiXQMiEPHyowqOP6Ht_OnxV_1h_WTLKfcM2IYEt24,4053
|
22
|
+
jupyter_agent/bot_agents/task_summarier.py,sha256=Q9b11gdWvwnYLsIjwSpMkZQur1CqFdd_uKb322o8u-M,1787
|
23
|
+
jupyter_agent/bot_agents/task_verifier.py,sha256=kGtz8BkSB097RwdgY3FcXpSbVRcikFeTXiokheza0t8,2522
|
24
|
+
jupyter_agent/bot_agents/task_verify_summarier.py,sha256=XIxRuW8T1DchHLy3PlGWWUMVC8hcTEyjhQ5tnELWNZk,4943
|
25
|
+
jupyter_agent/bot_evaluators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
26
|
+
jupyter_agent/bot_evaluators/base.py,sha256=zGxW469lq2Ab1mOaTtVQcWQmJrKNAAaF8X7OPbnUY04,1375
|
27
|
+
jupyter_agent/bot_evaluators/dummy_flow.py,sha256=W0BWJKMgXIilZY0i8eP_SNVgqTUd1CT_uqMBs5aygVA,473
|
28
|
+
jupyter_agent/bot_evaluators/dummy_global.py,sha256=yZ8fo2xfVN8gZTpBfs8EJ4dcv2t4ls6aXxn3Mo7bNSk,483
|
29
|
+
jupyter_agent/bot_evaluators/dummy_task.py,sha256=owh6g6ItPXXYjesplzNMxVcKAU_kktWtuJhqRzZ05V4,475
|
30
|
+
jupyter_agent/bot_evaluators/flow_global_planning.py,sha256=kOLd0dCoqrMi6zbe5chXrwxmdahtt8QqX3UnAZgk3AQ,2419
|
31
|
+
jupyter_agent/bot_evaluators/flow_task_executor.py,sha256=gzHlKkP9K5fICYgUY5BKAzjwqn3xScxklohqoUCJaZk,4450
|
32
|
+
jupyter_agent/bot_flows/__init__.py,sha256=Xe7EbC6bt04Nc4Yr0e--FVvBJCxkZCZkwYL9oahMBtI,338
|
33
|
+
jupyter_agent/bot_flows/base.py,sha256=F-iXu59IfnOXWze3e2myvzdBlyk8xzlrqHA8GTfu4vo,14916
|
34
|
+
jupyter_agent/bot_flows/master_planner.py,sha256=F1AunpfNwFqEn4z8uzNEq7d_5_cNHRhMlO1P7uWcYf0,980
|
35
|
+
jupyter_agent/bot_flows/task_executor_v3.py,sha256=uCuwcG8ZfPIR7V5AX7UfFkYVbJ6MxPscixjtLwBYWtE,4878
|
36
|
+
jupyter_agent-2025.7.100.dist-info/licenses/LICENSE,sha256=nWMmSIg7OepTIDX_OPP0-T9ImeCBBoog7eJxm5awtcM,1068
|
37
|
+
jupyter_agent-2025.7.100.dist-info/METADATA,sha256=zaOVnSQK797gupNKlYbwOCV9Y_A3R4_6yk12WRDjqYI,12560
|
38
|
+
jupyter_agent-2025.7.100.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
39
|
+
jupyter_agent-2025.7.100.dist-info/entry_points.txt,sha256=063AB86wSrC_V-iiEEqxTlR4uz-T7VH_YagIpmKFQC0,63
|
40
|
+
jupyter_agent-2025.7.100.dist-info/top_level.txt,sha256=c3USTBZ7DZGuvLKlEW-QfGIx0tzn98iCEn3bpdYnDtE,14
|
41
|
+
jupyter_agent-2025.7.100.dist-info/RECORD,,
|