jupyter-agent 2025.6.104__py3-none-any.whl → 2025.7.100__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. jupyter_agent/bot_actions.py +270 -0
  2. jupyter_agent/bot_agents/__init__.py +0 -42
  3. jupyter_agent/bot_agents/base.py +89 -45
  4. jupyter_agent/bot_agents/master_planner.py +1 -0
  5. jupyter_agent/bot_agents/output_task_result.py +6 -7
  6. jupyter_agent/bot_agents/prepare_next_cell.py +52 -0
  7. jupyter_agent/bot_agents/request_user_supply.py +186 -0
  8. jupyter_agent/bot_agents/task_code_executor.py +3 -2
  9. jupyter_agent/bot_agents/task_planner_v3.py +16 -13
  10. jupyter_agent/bot_agents/task_reasoner.py +3 -2
  11. jupyter_agent/bot_agents/task_structrue_reasoner.py +22 -12
  12. jupyter_agent/bot_agents/task_structrue_summarier.py +22 -18
  13. jupyter_agent/bot_agents/task_summarier.py +3 -2
  14. jupyter_agent/bot_agents/task_verifier.py +2 -1
  15. jupyter_agent/bot_agents/task_verify_summarier.py +6 -6
  16. jupyter_agent/bot_chat.py +2 -2
  17. jupyter_agent/bot_contexts.py +37 -29
  18. jupyter_agent/bot_evaluation.py +262 -143
  19. jupyter_agent/bot_evaluators/__init__.py +0 -0
  20. jupyter_agent/bot_evaluators/base.py +42 -0
  21. jupyter_agent/bot_evaluators/dummy_flow.py +20 -0
  22. jupyter_agent/bot_evaluators/dummy_global.py +20 -0
  23. jupyter_agent/bot_evaluators/dummy_task.py +20 -0
  24. jupyter_agent/bot_evaluators/flow_global_planning.py +88 -0
  25. jupyter_agent/bot_evaluators/flow_task_executor.py +152 -0
  26. jupyter_agent/bot_flows/__init__.py +0 -4
  27. jupyter_agent/bot_flows/base.py +120 -41
  28. jupyter_agent/bot_flows/master_planner.py +15 -4
  29. jupyter_agent/bot_flows/task_executor_v3.py +57 -38
  30. jupyter_agent/bot_magics.py +119 -69
  31. jupyter_agent/bot_outputs.py +37 -43
  32. jupyter_agent/utils.py +20 -31
  33. {jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.7.100.dist-info}/METADATA +56 -4
  34. jupyter_agent-2025.7.100.dist-info/RECORD +41 -0
  35. jupyter_agent/bot_agents/task_planner_v1.py +0 -158
  36. jupyter_agent/bot_agents/task_planner_v2.py +0 -172
  37. jupyter_agent/bot_flows/task_executor_v1.py +0 -86
  38. jupyter_agent/bot_flows/task_executor_v2.py +0 -84
  39. jupyter_agent-2025.6.104.dist-info/RECORD +0 -35
  40. {jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.7.100.dist-info}/WHEEL +0 -0
  41. {jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.7.100.dist-info}/entry_points.txt +0 -0
  42. {jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.7.100.dist-info}/licenses/LICENSE +0 -0
  43. {jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.7.100.dist-info}/top_level.txt +0 -0
@@ -15,11 +15,14 @@ from IPython.display import Markdown
15
15
  from IPython.core.magic import Magics, magics_class, cell_magic
16
16
  from traitlets import Unicode, Int, Bool
17
17
  from traitlets.config.configurable import Configurable
18
- from .bot_contexts import NotebookContext, AgentCellContext
19
- from .bot_agents import AgentFactory
20
- from .bot_agents.base import AgentModelType
21
- from .bot_flows import MasterPlannerFlow, TaskExecutorFlowV1, TaskExecutorFlowV2, TaskExecutorFlowV3
22
- from .bot_outputs import _D, _I, _W, _E, _F, _M, _B, _O, reset_output, set_logging_level
18
+ from .bot_contexts import NotebookContext
19
+ from .bot_agents.base import AgentModelType, AgentFactory
20
+ from .bot_agents.request_user_supply import RequestUserSupplyAgent
21
+ from .bot_evaluators.base import EvaluatorFactory
22
+ from .bot_flows import MasterPlannerFlow, TaskExecutorFlowV3
23
+ from .bot_outputs import _D, _I, _W, _E, _F, _M, _B, _O, reset_output, set_logging_level, flush_output
24
+ from .bot_actions import close_action_dispatcher
25
+ from .utils import get_env_capbilities
23
26
 
24
27
 
25
28
  @magics_class
@@ -37,6 +40,9 @@ class BotMagics(Magics, Configurable):
37
40
  coding_api_url = Unicode(None, allow_none=True, help="Coding API URL").tag(config=True)
38
41
  coding_api_key = Unicode("API_KEY", help="Coding API Key").tag(config=True)
39
42
  coding_model_name = Unicode("", help="Coding Model Name").tag(config=True)
43
+ evaluating_api_url = Unicode(None, allow_none=True, help="Evaluating API URL").tag(config=True)
44
+ evaluating_api_key = Unicode("API_KEY", help="Evaluating API Key").tag(config=True)
45
+ evaluating_model_name = Unicode("", help="Evaluating Model Name").tag(config=True)
40
46
  reasoning_api_url = Unicode(None, allow_none=True, help="Reasoning API URL").tag(config=True)
41
47
  reasoning_api_key = Unicode("API_KEY", help="Reasoning API Key").tag(config=True)
42
48
  reasoning_model_name = Unicode("", help="Reasoning Model Name").tag(config=True)
@@ -44,11 +50,16 @@ class BotMagics(Magics, Configurable):
44
50
  display_think = Bool(True, help="Display chatthink response").tag(config=True)
45
51
  display_response = Bool(False, help="Display chat full response").tag(config=True)
46
52
  support_save_meta = Bool(False, help="Support save metadata to cell").tag(config=True)
53
+ support_user_confirm = Bool(False, help="Support user confirm").tag(config=True)
54
+ support_user_supply_info = Bool(False, help="Support user supply info").tag(config=True)
55
+ support_set_cell_content = Bool(False, help="Support set cell content").tag(config=True)
56
+ enable_evaluating = Bool(False, help="Enable evaluating task").tag(config=True)
57
+ enable_supply_mocking = Bool(False, help="Enable supply mocking").tag(config=True)
47
58
  notebook_path = Unicode(None, allow_none=True, help="Path to Notebook file").tag(config=True)
48
59
  default_task_flow = Unicode("v3", allow_none=True, help="Default task flow").tag(config=True)
49
- default_max_tries = Int(3, help="Default max tries for task execution").tag(config=True)
60
+ default_max_tries = Int(2, help="Default max tries for task execution").tag(config=True)
50
61
  default_step_mode = Bool(False, help="Default step mode for task execution").tag(config=True)
51
- default_auto_confirm = Bool(False, help="Default auto confirm for task execution").tag(config=True)
62
+ default_auto_confirm = Bool(True, help="Default auto confirm for task execution").tag(config=True)
52
63
 
53
64
  def parse_args(self, line):
54
65
  """解析命令行参数"""
@@ -59,41 +70,48 @@ class BotMagics(Magics, Configurable):
59
70
  parser.add_argument("-f", "--flow", type=str, default=self.default_task_flow, help="Flow name")
60
71
  parser.add_argument("-m", "--max-tries", type=int, default=self.default_max_tries, help="Max tries")
61
72
  parser.add_argument(
62
- "-S",
73
+ "-t",
63
74
  "--step-mode",
64
75
  action="store_true",
76
+ dest="step_mode",
65
77
  default=self.default_step_mode,
66
78
  help="Run in single step mode",
67
79
  )
68
80
  parser.add_argument(
69
- "-Y",
81
+ "-T",
82
+ "--not-step-mode",
83
+ action="store_false",
84
+ dest="step_mode",
85
+ default=self.default_step_mode,
86
+ help="Run in multi step mode",
87
+ )
88
+ parser.add_argument(
89
+ "-y",
70
90
  "--auto-confirm",
71
91
  action="store_true",
92
+ dest="auto_confirm",
72
93
  default=self.default_auto_confirm,
73
94
  help="Run without confirm",
74
95
  )
96
+ parser.add_argument(
97
+ "-Y",
98
+ "--not-auto-confirm",
99
+ action="store_false",
100
+ dest="auto_confirm",
101
+ default=self.default_auto_confirm,
102
+ help="Run with confirm",
103
+ )
75
104
  options, _ = parser.parse_known_args(shlex.split(line.strip()))
76
-
77
105
  return options
78
106
 
79
- def ensure_notebook_path(self):
80
- if self.notebook_path:
81
- return self.notebook_path
82
- result = self.shell and self.shell.run_cell("globals().get('__vsc_ipynb_file__')")
83
- if result and result.success and result.result:
84
- self.notebook_path = result.result
85
- return self.notebook_path
86
- try:
87
- self.notebook_path = str(ipynbname.path())
88
- return self.notebook_path
89
- except Exception as e:
90
- _F(f"Failed to get notebook path: {e}")
91
- return None
92
-
93
107
  @cell_magic
94
108
  def bot(self, line, cell):
95
109
  """Jupyter cell magic: %%bot"""
96
110
  try:
111
+ reset_output(stage="Logging", logging_level=self.logging_level)
112
+ _I("Cell magic %%bot executing ...")
113
+ _D(f"Cell magic called with line: {line}")
114
+ _D(f"Cell magic called with cell: {repr(cell)[:50]} ...")
97
115
  if not self.ensure_notebook_path():
98
116
  _O(
99
117
  Markdown(
@@ -103,11 +121,6 @@ class BotMagics(Magics, Configurable):
103
121
  )
104
122
  )
105
123
  return
106
- AgentCellContext.SUPPORT_SAVE_META = self.support_save_meta
107
- reset_output(stage="Logging", logging_level=self.logging_level)
108
- _I("Cell magic %%bot executing ...")
109
- _D(f"Cell magic called with line: {line}")
110
- _D(f"Cell magic called with cell: {repr(cell)[:50]} ...")
111
124
  if not cell.strip():
112
125
  _O(
113
126
  Markdown(
@@ -120,51 +133,23 @@ class BotMagics(Magics, Configurable):
120
133
  "%%bot {}\n\n# {}".format(line.strip(), time.strftime("%Y-%m-%d %H:%M:%S")), replace=True
121
134
  )
122
135
  return
136
+ get_env_capbilities().save_metadata = self.support_save_meta
137
+ get_env_capbilities().user_confirm = self.support_user_confirm
138
+ get_env_capbilities().user_supply_info = self.support_user_supply_info
139
+ get_env_capbilities().set_cell_content = self.support_set_cell_content
140
+ RequestUserSupplyAgent.MOCK_USER_SUPPLY = self.enable_supply_mocking
123
141
  options = self.parse_args(line)
124
- _D(f"Cell magic called with options: {options}")
125
142
  set_logging_level(options.logging_level)
143
+ _D(f"Cell magic called with options: {options}")
126
144
  nb_context = NotebookContext(line, cell, notebook_path=self.notebook_path)
127
- agent_factory = AgentFactory(
128
- nb_context,
129
- display_think=self.display_think,
130
- display_message=self.display_message,
131
- display_response=self.display_response,
132
- )
133
- agent_factory.config_model(
134
- AgentModelType.DEFAULT,
135
- self.default_api_url,
136
- self.default_api_key,
137
- self.default_model_name,
138
- )
139
- agent_factory.config_model(
140
- AgentModelType.PLANNER,
141
- self.planner_api_url,
142
- self.planner_api_key,
143
- self.planner_model_name,
144
- )
145
- agent_factory.config_model(
146
- AgentModelType.CODING,
147
- self.coding_api_url,
148
- self.coding_api_key,
149
- self.coding_model_name,
150
- )
151
- agent_factory.config_model(
152
- AgentModelType.REASONING,
153
- self.reasoning_api_url,
154
- self.reasoning_api_key,
155
- self.reasoning_model_name,
156
- )
145
+ agent_factory = self.get_agent_factory(nb_context)
146
+ evaluator_factory = self.get_evaluator_factory(nb_context)
157
147
  if options.planning:
158
- flow = MasterPlannerFlow(nb_context, agent_factory)
148
+ flow = MasterPlannerFlow(nb_context, agent_factory, evaluator_factory)
149
+ elif options.flow == "v3":
150
+ flow = TaskExecutorFlowV3(nb_context, agent_factory, evaluator_factory)
159
151
  else:
160
- if options.flow == "v1":
161
- flow = TaskExecutorFlowV1(nb_context, agent_factory)
162
- elif options.flow == "v2":
163
- flow = TaskExecutorFlowV2(nb_context, agent_factory)
164
- elif options.flow == "v3":
165
- flow = TaskExecutorFlowV3(nb_context, agent_factory)
166
- else:
167
- raise ValueError(f"Unknown flow: {options.flow}")
152
+ raise ValueError(f"Unknown flow: {options.flow}")
168
153
  flow(
169
154
  options.stage,
170
155
  options.max_tries,
@@ -173,6 +158,71 @@ class BotMagics(Magics, Configurable):
173
158
  )
174
159
  except Exception as e:
175
160
  traceback.print_exc()
161
+ finally:
162
+ close_action_dispatcher()
163
+ flush_output()
164
+
165
+ def ensure_notebook_path(self):
166
+ if self.notebook_path:
167
+ return self.notebook_path
168
+ result = self.shell and self.shell.run_cell(
169
+ "globals().get('__vsc_ipynb_file__') or globals().get('__evaluation_ipynb_file__')"
170
+ )
171
+ if result and result.success and result.result:
172
+ self.notebook_path = result.result
173
+ return self.notebook_path
174
+ try:
175
+ self.notebook_path = str(ipynbname.path())
176
+ return self.notebook_path
177
+ except Exception as e:
178
+ _F(f"Failed to get notebook path: {e}")
179
+ return None
180
+
181
+ def get_agent_factory(self, nb_context):
182
+ agent_factory = AgentFactory(
183
+ nb_context,
184
+ display_think=self.display_think,
185
+ display_message=self.display_message,
186
+ display_response=self.display_response,
187
+ )
188
+ agent_factory.config_model(
189
+ AgentModelType.DEFAULT, self.default_api_url, self.default_api_key, self.default_model_name
190
+ )
191
+ agent_factory.config_model(
192
+ AgentModelType.PLANNER, self.planner_api_url, self.planner_api_key, self.planner_model_name
193
+ )
194
+ agent_factory.config_model(
195
+ AgentModelType.CODING, self.coding_api_url, self.coding_api_key, self.coding_model_name
196
+ )
197
+ agent_factory.config_model(
198
+ AgentModelType.EVALUATING, self.evaluating_api_url, self.evaluating_api_key, self.evaluating_model_name
199
+ )
200
+ agent_factory.config_model(
201
+ AgentModelType.REASONING, self.reasoning_api_url, self.reasoning_api_key, self.reasoning_model_name
202
+ )
203
+ return agent_factory
204
+
205
+ def get_evaluator_factory(self, nb_context):
206
+ if self.enable_evaluating:
207
+ evaluator_factory = EvaluatorFactory(nb_context)
208
+ evaluator_factory.config_model(
209
+ AgentModelType.DEFAULT, self.default_api_url, self.default_api_key, self.default_model_name
210
+ )
211
+ evaluator_factory.config_model(
212
+ AgentModelType.PLANNER, self.planner_api_url, self.planner_api_key, self.planner_model_name
213
+ )
214
+ evaluator_factory.config_model(
215
+ AgentModelType.CODING, self.coding_api_url, self.coding_api_key, self.coding_model_name
216
+ )
217
+ evaluator_factory.config_model(
218
+ AgentModelType.EVALUATING, self.evaluating_api_url, self.evaluating_api_key, self.evaluating_model_name
219
+ )
220
+ evaluator_factory.config_model(
221
+ AgentModelType.REASONING, self.reasoning_api_url, self.reasoning_api_key, self.reasoning_model_name
222
+ )
223
+ else:
224
+ evaluator_factory = None
225
+ return evaluator_factory
176
226
 
177
227
 
178
228
  def load_ipython_extension(ipython):
@@ -11,9 +11,11 @@ import datetime
11
11
  import jinja2
12
12
 
13
13
  from enum import Enum
14
- from typing import Optional, Dict, Any
14
+ from typing import Optional, Dict, List, Tuple, Any, Type
15
15
  from pydantic import BaseModel, Field
16
16
  from IPython.display import display, Markdown
17
+ from .bot_evaluation import BaseEvaluationRecord
18
+ from .bot_actions import ActionBase
17
19
  from .utils import no_indent, no_wrap
18
20
 
19
21
  STAGE_SWITCHER_SCRIPT = no_wrap(
@@ -173,38 +175,6 @@ LOGGING_LEVELS = {
173
175
  }
174
176
 
175
177
 
176
- class BaseEvalutionRecord(BaseModel):
177
- timestamp: float = 0
178
- notebook_name: str = ""
179
- eval_type: str = "BASE"
180
- cell_index: int = -1
181
- execution_duration: float = 0.0
182
- is_success: bool = False
183
- correct_score: float = 0.0
184
-
185
-
186
- class StageEvalutionRecord(BaseEvalutionRecord):
187
- eval_type: str = "STAGE"
188
- flow: str = ""
189
- stage: str = ""
190
- coding_score: float = 0.0
191
- important_score: float = 0.0
192
- user_supply_score: float = 0.0
193
-
194
-
195
- class FlowEvalutionRecord(BaseEvalutionRecord):
196
- eval_type: str = "FLOW"
197
- flow: str = ""
198
- stage_count: int = 0
199
- planning_score: float = 0.0
200
-
201
-
202
- class NotebookEvalutionRecord(BaseEvalutionRecord):
203
- eval_type: str = "NOTEBOOK"
204
- flow_count: int = 0
205
- planning_score: float = 0.0
206
-
207
-
208
178
  class AgentOutput:
209
179
  """
210
180
  AgentOutput 是一个用于在 Jupyter Notebook 中显示 Agent 输出的类。
@@ -221,6 +191,7 @@ class AgentOutput:
221
191
  )
222
192
  self.template = self.jinja_env.from_string(AGENT_OUTPUT_TEMPLEATE)
223
193
  self.handler = None
194
+ self._is_dirty = True
224
195
  self._latest_display_tm = 0
225
196
  self._contents = {}
226
197
  self._active_stage = None
@@ -228,6 +199,7 @@ class AgentOutput:
228
199
  self._agent_data = {}
229
200
  self._logging_records = []
230
201
  self._evaluation_records = []
202
+ self._action_records = []
231
203
 
232
204
  @property
233
205
  def content(self):
@@ -263,11 +235,16 @@ class AgentOutput:
263
235
  )
264
236
  if self._evaluation_records:
265
237
  metadata["jupyter-agent-evaluation-records"] = [record.model_dump() for record in self._evaluation_records]
238
+ if self._action_records:
239
+ metadata["jupyter-agent-action-records"] = [record.model_dump() for record in self._action_records]
266
240
  return metadata
267
241
 
268
242
  def display(self, stage=None, force=False, wait=True):
269
- if stage is not None:
243
+ if stage is not None and stage != self._active_stage:
270
244
  self._active_stage = stage
245
+ self._is_dirty = True
246
+ if not self._is_dirty and not force:
247
+ return
271
248
  if not force and time.time() - self._latest_display_tm < 1:
272
249
  if wait:
273
250
  time.sleep(1 - (time.time() - self._latest_display_tm))
@@ -278,6 +255,7 @@ class AgentOutput:
278
255
  else:
279
256
  self.handler.update(Markdown(self.content), metadata=self.metadata)
280
257
  self._latest_display_tm = time.time()
258
+ self._is_dirty = False
281
259
 
282
260
  def clear(self, stage=None, clear_metadata=False):
283
261
  if stage is None:
@@ -286,6 +264,7 @@ class AgentOutput:
286
264
  self._contents[stage] = []
287
265
  if clear_metadata:
288
266
  self._agent_data = {}
267
+ self._is_dirty = True
289
268
  self.display(force=False, wait=False)
290
269
 
291
270
  def output_block(
@@ -305,6 +284,7 @@ class AgentOutput:
305
284
  "code_language": code_language,
306
285
  }
307
286
  )
287
+ self._is_dirty = True
308
288
  self.display(stage, force=False, wait=False)
309
289
 
310
290
  def output_text(self, content, stage=None, code_language="python"):
@@ -320,6 +300,7 @@ class AgentOutput:
320
300
  self._contents[stage][-1]["content"] += "\n" + content
321
301
  else:
322
302
  self._contents[stage].append({"type": "text", "content": content, "code_language": code_language})
303
+ self._is_dirty = True
323
304
  self.display(stage, force=False, wait=False)
324
305
 
325
306
  def output_markdown(self, content, stage=None):
@@ -328,12 +309,14 @@ class AgentOutput:
328
309
  if stage not in self._contents:
329
310
  self._contents[stage] = []
330
311
  self._contents[stage].append({"type": "markdown", "content": content})
312
+ self._is_dirty = True
331
313
  self.display(stage, force=False, wait=False)
332
314
 
333
315
  def output_agent_data(self, **kwargs):
334
316
  self.log(f"output agent data {kwargs}", level="DEBUG")
335
317
  self._agent_data.update(kwargs)
336
- self._agent_data_timestamp = int(time.time() * 1000)
318
+ self._agent_data_timestamp = time.time()
319
+ self._is_dirty = True
337
320
  self.display(force=False, wait=False)
338
321
 
339
322
  def log(self, msg, level="INFO"):
@@ -355,11 +338,12 @@ class AgentOutput:
355
338
  "content": content,
356
339
  }
357
340
  )
341
+ self._is_dirty = True
358
342
  self.display(force=False, wait=False)
359
343
 
360
- def log_evaluation(self, record: BaseEvalutionRecord):
344
+ def log_evaluation(self, record: BaseEvaluationRecord):
361
345
  assert isinstance(
362
- record, BaseEvalutionRecord
346
+ record, BaseEvaluationRecord
363
347
  ), "record must be an instance of BaseEvalutionRecord or its subclass"
364
348
  if record.timestamp == 0:
365
349
  record.timestamp = time.time()
@@ -369,6 +353,16 @@ class AgentOutput:
369
353
  f"success: {record.is_success} correct: {record.correct_score:.2f}",
370
354
  level="INFO",
371
355
  )
356
+ self._is_dirty = True
357
+ self.display(force=False, wait=False)
358
+
359
+ def log_action(self, record: ActionBase):
360
+ assert isinstance(record, ActionBase), "record must be an instance of BaseActionRecord or its subclass"
361
+ if record.timestamp == 0:
362
+ record.timestamp = time.time()
363
+ self._action_records.append(record)
364
+ self.log(f"Action: {record.action} from {record.source}", level="INFO")
365
+ self._is_dirty = True
372
366
  self.display(force=False, wait=False)
373
367
 
374
368
 
@@ -414,14 +408,14 @@ def output_agent_data(**kwargs):
414
408
  get_output().output_agent_data(**kwargs)
415
409
 
416
410
 
417
- def output_evaluation(record: BaseEvalutionRecord):
418
- """
419
- 输出评估记录到 AgentOutput 中。
420
- :param record: 评估记录对象,必须是 BaseEvalutionRecord 的子类。
421
- """
411
+ def output_evaluation(record: BaseEvaluationRecord):
422
412
  get_output().log_evaluation(record)
423
413
 
424
414
 
415
+ def output_action(record: ActionBase):
416
+ get_output().log_action(record)
417
+
418
+
425
419
  def clear_output(stage=None, clear_metadata=False):
426
420
  get_output().clear(stage, clear_metadata)
427
421
 
@@ -534,6 +528,6 @@ _A = output_agent_data
534
528
  _L = log
535
529
  _D = lambda msg: log(msg, level="DEBUG")
536
530
  _I = lambda msg: log(msg, level="INFO")
537
- _W = lambda msg: log(msg, level="WARNING")
531
+ _W = lambda msg: log(msg, level="WARN")
538
532
  _E = lambda msg: log(msg, level="ERROR")
539
533
  _F = lambda msg: log(msg, level="FATAL")
jupyter_agent/utils.py CHANGED
@@ -91,37 +91,6 @@ class TeeOutputCapture(capture_output):
91
91
  return CapturedIO(stdout, stderr, outputs)
92
92
 
93
93
 
94
- class RequestUserPrompt(BaseModel):
95
- prompt: str = Field(
96
- description="需要用户补充详细信息的Prompt",
97
- examples=["请补充与...相关的详细的信息", "请确认...是否...", "请提供..."],
98
- )
99
- example: Optional[str] = Field(None, description="示例", examples=["..."])
100
-
101
-
102
- class UserPromptResponse(BaseModel):
103
- prompt: str = Field(description="需要用户补充详细信息的Prompt", examples=["..."])
104
- response: str = Field(description="用户补充的详细信息", examples=["..."])
105
-
106
-
107
- def request_user_response(prompts: list[RequestUserPrompt]) -> list[UserPromptResponse]:
108
- responses = []
109
- for prompt in prompts:
110
- response = input(f"{prompt.prompt} (例如: {prompt.example})")
111
- responses.append(UserPromptResponse(prompt=prompt.prompt, response=response))
112
- return responses
113
-
114
-
115
- def format_user_prompts(prompts: list[RequestUserPrompt], title="用户补充详细信息") -> str:
116
- result = "```markdown\n"
117
- result += f"### {title}\n\n"
118
- result += "\n".join(
119
- [f"- **Issue**: {prompt.prompt} (例如: {prompt.example})\n- **Reply**: " for prompt in prompts]
120
- )
121
- result += "\n```\n"
122
- return result
123
-
124
-
125
94
  def no_indent(text: str) -> str:
126
95
  return re.sub(r"^\s+", "", text, flags=re.MULTILINE)
127
96
 
@@ -136,3 +105,23 @@ def no_newline(text: str) -> str:
136
105
 
137
106
  def no_space(text: str) -> str:
138
107
  return re.sub(r"\s+", "", text, flags=re.MULTILINE)
108
+
109
+
110
+ class EnvironmentCapbilities(BaseModel):
111
+ save_metadata: bool = False
112
+ user_confirm: bool = False
113
+ user_supply_info: bool = False
114
+ set_cell_content: bool = False
115
+
116
+
117
+ __env_capbilities = EnvironmentCapbilities()
118
+
119
+
120
+ def get_env_capbilities() -> EnvironmentCapbilities:
121
+ return __env_capbilities
122
+
123
+
124
+ def set_env_capbilities(env_capbilities: EnvironmentCapbilities):
125
+ global __env_capbilities
126
+
127
+ __env_capbilities = env_capbilities
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: jupyter-agent
3
- Version: 2025.6.104
3
+ Version: 2025.7.100
4
4
  Summary: 调用LLM实现Jupyter代码的自动生成、执行、调试等功能
5
5
  Author: viewstar000
6
6
  License: MIT
@@ -10,6 +10,7 @@ Classifier: Operating System :: OS Independent
10
10
  Requires-Python: >=3.12
11
11
  Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
+ Requires-Dist: bottle
13
14
  Requires-Dist: ipynbname
14
15
  Requires-Dist: ipython
15
16
  Requires-Dist: jinja2
@@ -95,15 +96,26 @@ pip install /path/to/jupyter-agent/dist/jupyter_agent-xxxx-py3-none-any.whl
95
96
  # 设置当前Notebook的路径,当无法自动获取时需要手工指定,以Vscode中的Notebook为例
96
97
  %config BotMagics.notebook_path = globals()["__vsc_ipynb_file__"]
97
98
 
98
- # 设置是否保存任务数据到Metadata,只有Vscode中安装了jupyter-agent-extension后才支持
99
+ # 是否默认开启单步模式,每执行一个步骤都退出执行循环,需要用户手动执行下一个步骤,默认为False
100
+ %config BotMagics.default_step_mode = False
101
+ # 是否默认开启自动确认,若关闭自动确认,每执行一个步骤都需要用户手动确认,默认为True
102
+ %config BotMagics.default_auto_confirm = True
103
+
104
+ # 设置运行环境是否保存任务数据到Metadata,默认为False,仅在Vscode中安装jupyter-agent-extension后或在评估模式下支持
99
105
  %config BotMagics.support_save_meta = True
106
+ # 设置运行环境是否设置单元格内容,默认为False,权在Vscode中安装jupyter-agent-extension后或在评估模式下支持
107
+ %config BotMagics.support_set_cell_content = True
100
108
 
101
109
  # 设置日志级别,可选值为DEBUG、INFO、WARN、ERROR、FATAL,默认为INFO
102
110
  %config BotMagics.logging_level = 'DEBUG'
103
111
 
112
+ # 开启自动评估功能,默认为False,调用LLM对当前结果进行打分,目前仅实现了对子任务的整体打分
113
+ %config BotMagics.enable_evaluating = True
114
+ # 开启模拟用户补充信息功能,默认为False,调用LLM模拟对Agent的提问进行补充,用于自动评估
115
+ %config BotMagics.enable_supply_mocking = True
116
+
104
117
  # 设置是否显示思考过程,默认为True
105
118
  %config BotMagics.display_think = True
106
-
107
119
  # 设置是否显示发送给出LLM的消息和LLM的回答,默认为False
108
120
  %config BotMagics.display_message = True
109
121
  %config BotMagics.display_response = True
@@ -151,6 +163,20 @@ pip install /path/to/jupyter-agent/dist/jupyter_agent-xxxx-py3-none-any.whl
151
163
 
152
164
  更详细用法可参考[示例Notebook](https://github.com/viewstar000/jupyter-agent/blob/main/examples/data_loader.ipynb)
153
165
 
166
+ ### 评估模式
167
+
168
+ 工具提供了`bot_eval`命令用于在评估模式下执行notebook。在评估模式下,工具会顺序执行所有有单元格,直到例全局目标完成。
169
+
170
+ ```bash
171
+ bot_eval [-o output_eval.ipynb] [-e output_eval.jsonl] input.ipynb
172
+ ```
173
+
174
+ 例如
175
+
176
+ ```bash
177
+ bot_eval examples/data_loader_eval.ipynb
178
+ ```
179
+
154
180
  ## 贡献
155
181
 
156
182
  欢迎提交 issue 或 pull request 参与贡献。
@@ -237,12 +263,24 @@ Advanced Configuration:
237
263
  # Set the current notebook path, when it is not automatically obtained, it needs to be manually specified, for example, in Vscode Notebook
238
264
  %config BotMagics.notebook_path = globals()["__vsc_ipynb_file__"]
239
265
 
240
- # Set whether to save task data to Metadata, only Vscode installed with jupyter-agent-extension supports
266
+ # Whether to enable single step mode, each step will exit the execution loop, you need to manually execute the next step, the default is False
267
+ %config BotMagics.default_step_mode = False
268
+ # Whether to enable automatic confirmation, if automatic confirmation is closed, each step needs to be confirmed by the user, the default is True
269
+ %config BotMagics.default_auto_confirm = True
270
+
271
+ # Set whether to save task data to Metadata, only Vscode installed with jupyter-agent-extension or evaluation mode supports this.
241
272
  %config BotMagics.support_save_meta = True
273
+ # Set whether to set cell content, only Vscode installed with jupyter-agent-extension or evaluation mode supports this.
274
+ %config BotMagics.support_set_cell_content = True
242
275
 
243
276
  # Set the log level, available values are DEBUG、INFO、WARN、ERROR、FATAL, default is INFO
244
277
  %config BotMagics.logging_level = 'DEBUG'
245
278
 
279
+ # Enable automatic evaluation, default is False, call LLM to evaluate the overall result of the subtask
280
+ %config BotMagics.enable_evaluating = True
281
+ # Enable the simulation of user filling in information, default is False, call LLM to simulate the question of the agent to fill in
282
+ %config BotMagics.enable_supply_mocking = True
283
+
246
284
  # Set whether to display thinking process, default is True
247
285
  %config BotMagics.display_think = True
248
286
 
@@ -290,6 +328,20 @@ After generating code for a subtask, the tool will call the corresponding agent
290
328
 
291
329
  For more details, please refer to [example notebook](https://github.com/viewstar000/jupyter-agent/blob/main/examples/data_loader.ipynb)
292
330
 
331
+ ### Evaluation mode
332
+
333
+ Use `bot_eval` command to evaluate the code generated by the agent in evaluation mode. The evaluation mode will execute all cells in order and stop when the global goal is completed.
334
+
335
+ ```python
336
+ bot_eval [-o output_eval.ipynb] [-e output_eval.jsonl] input.ipynb
337
+ ```
338
+
339
+ For example
340
+
341
+ ```bash
342
+ bot_eval examples/data_loader_eval.ipynb
343
+ ```
344
+
293
345
  ## Contributing
294
346
 
295
347
  Welcome to submit issues or pull requests to participate in contributions.
@@ -0,0 +1,41 @@
1
+ jupyter_agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ jupyter_agent/bot_actions.py,sha256=Zq9_nfh4SJdMxkjqcTyQzS0RY4RwofaRkGq_4aexO2o,8453
3
+ jupyter_agent/bot_chat.py,sha256=4zjkHtyOabT1bvGO-n4ZTMw0XREU_XDlgfLCI5gpxsw,8834
4
+ jupyter_agent/bot_contexts.py,sha256=gs3hVIj81jasQYiJjdoAloWx8S1Xpa4cXr8XzcefUus,19475
5
+ jupyter_agent/bot_evaluation.py,sha256=t4SH6Gq4BmSyyRMozyQ2623XNGmgtCi9CTNRvOqzuRM,14266
6
+ jupyter_agent/bot_magics.py,sha256=Sh2CGs_esZqaHWDDLKjJSDlUYcI4PdF2aFPcibcf43Y,11027
7
+ jupyter_agent/bot_outputs.py,sha256=QDzReXLqZsU7RAPR4F9JEotxAtIe9YA3ZklCJ9U_jVg,16239
8
+ jupyter_agent/utils.py,sha256=8XKXXZB1EgCwIJEqYJigA8C84FzVTc2xdcF-y5kO3kY,3634
9
+ jupyter_agent/bot_agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ jupyter_agent/bot_agents/base.py,sha256=pAwW_KQZE9LwyxM91umzIlWalgFUKGJOpby8JGfvLQo,12430
11
+ jupyter_agent/bot_agents/master_planner.py,sha256=twDEc0KMCyNkcsD_0nilbPteZqFwUT38QDfeYUhOzzE,1330
12
+ jupyter_agent/bot_agents/output_task_result.py,sha256=4DeAmDzjUI_0yfb26f5sLIwa74aJRzEQXbMRSyYMv1g,761
13
+ jupyter_agent/bot_agents/prepare_next_cell.py,sha256=_4-kYQxAs9PK4a6T5vbsdSixNQ4l-R8qzQNjs8e_09c,1891
14
+ jupyter_agent/bot_agents/request_user_supply.py,sha256=Bkw08hhkUUVnirBijv1mJ0hQW2VpzdXoC3TToQolQos,6106
15
+ jupyter_agent/bot_agents/task_code_executor.py,sha256=V138uj39_lLGuljEDrxzd-jRf1k4nAQkA_gF2_Jhpgw,2243
16
+ jupyter_agent/bot_agents/task_coder.py,sha256=7fXq9nk1yH3F_mJfCMZBktHmxGfgmpuChMQbpEuL0w4,1783
17
+ jupyter_agent/bot_agents/task_debuger.py,sha256=77pa_Awgvzxm3XkFA1oZsGr8SPJkjApKMtkmoySShmI,1367
18
+ jupyter_agent/bot_agents/task_planner_v3.py,sha256=Mlves3v3KL7MAJ8hPPMxUsKdB2v6vuOXlVZ6XtNMbbo,8713
19
+ jupyter_agent/bot_agents/task_reasoner.py,sha256=4oP5DzAkfEGh6LtpX4OH6aMgAPDiRvbSVclxrhx0v20,1465
20
+ jupyter_agent/bot_agents/task_structrue_reasoner.py,sha256=lNt508g4ileRjG9_NETdSrQqVb7tjdu8qHajKcZzB6E,3947
21
+ jupyter_agent/bot_agents/task_structrue_summarier.py,sha256=fnNiXQMiEPHyowqOP6Ht_OnxV_1h_WTLKfcM2IYEt24,4053
22
+ jupyter_agent/bot_agents/task_summarier.py,sha256=Q9b11gdWvwnYLsIjwSpMkZQur1CqFdd_uKb322o8u-M,1787
23
+ jupyter_agent/bot_agents/task_verifier.py,sha256=kGtz8BkSB097RwdgY3FcXpSbVRcikFeTXiokheza0t8,2522
24
+ jupyter_agent/bot_agents/task_verify_summarier.py,sha256=XIxRuW8T1DchHLy3PlGWWUMVC8hcTEyjhQ5tnELWNZk,4943
25
+ jupyter_agent/bot_evaluators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
+ jupyter_agent/bot_evaluators/base.py,sha256=zGxW469lq2Ab1mOaTtVQcWQmJrKNAAaF8X7OPbnUY04,1375
27
+ jupyter_agent/bot_evaluators/dummy_flow.py,sha256=W0BWJKMgXIilZY0i8eP_SNVgqTUd1CT_uqMBs5aygVA,473
28
+ jupyter_agent/bot_evaluators/dummy_global.py,sha256=yZ8fo2xfVN8gZTpBfs8EJ4dcv2t4ls6aXxn3Mo7bNSk,483
29
+ jupyter_agent/bot_evaluators/dummy_task.py,sha256=owh6g6ItPXXYjesplzNMxVcKAU_kktWtuJhqRzZ05V4,475
30
+ jupyter_agent/bot_evaluators/flow_global_planning.py,sha256=kOLd0dCoqrMi6zbe5chXrwxmdahtt8QqX3UnAZgk3AQ,2419
31
+ jupyter_agent/bot_evaluators/flow_task_executor.py,sha256=gzHlKkP9K5fICYgUY5BKAzjwqn3xScxklohqoUCJaZk,4450
32
+ jupyter_agent/bot_flows/__init__.py,sha256=Xe7EbC6bt04Nc4Yr0e--FVvBJCxkZCZkwYL9oahMBtI,338
33
+ jupyter_agent/bot_flows/base.py,sha256=F-iXu59IfnOXWze3e2myvzdBlyk8xzlrqHA8GTfu4vo,14916
34
+ jupyter_agent/bot_flows/master_planner.py,sha256=F1AunpfNwFqEn4z8uzNEq7d_5_cNHRhMlO1P7uWcYf0,980
35
+ jupyter_agent/bot_flows/task_executor_v3.py,sha256=uCuwcG8ZfPIR7V5AX7UfFkYVbJ6MxPscixjtLwBYWtE,4878
36
+ jupyter_agent-2025.7.100.dist-info/licenses/LICENSE,sha256=nWMmSIg7OepTIDX_OPP0-T9ImeCBBoog7eJxm5awtcM,1068
37
+ jupyter_agent-2025.7.100.dist-info/METADATA,sha256=zaOVnSQK797gupNKlYbwOCV9Y_A3R4_6yk12WRDjqYI,12560
38
+ jupyter_agent-2025.7.100.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
39
+ jupyter_agent-2025.7.100.dist-info/entry_points.txt,sha256=063AB86wSrC_V-iiEEqxTlR4uz-T7VH_YagIpmKFQC0,63
40
+ jupyter_agent-2025.7.100.dist-info/top_level.txt,sha256=c3USTBZ7DZGuvLKlEW-QfGIx0tzn98iCEn3bpdYnDtE,14
41
+ jupyter_agent-2025.7.100.dist-info/RECORD,,