jupyter-agent 2025.6.103__py3-none-any.whl → 2025.6.105__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. jupyter_agent/bot_actions.py +270 -0
  2. jupyter_agent/bot_agents/__init__.py +0 -42
  3. jupyter_agent/bot_agents/base.py +85 -45
  4. jupyter_agent/bot_agents/master_planner.py +2 -0
  5. jupyter_agent/bot_agents/output_task_result.py +6 -7
  6. jupyter_agent/bot_agents/request_user_supply.py +186 -0
  7. jupyter_agent/bot_agents/task_planner_v3.py +12 -13
  8. jupyter_agent/bot_agents/task_reasoner.py +2 -2
  9. jupyter_agent/bot_agents/task_structrue_reasoner.py +19 -12
  10. jupyter_agent/bot_agents/task_structrue_summarier.py +19 -18
  11. jupyter_agent/bot_agents/task_summarier.py +2 -2
  12. jupyter_agent/bot_agents/task_verifier.py +1 -1
  13. jupyter_agent/bot_agents/task_verify_summarier.py +5 -6
  14. jupyter_agent/bot_chat.py +2 -2
  15. jupyter_agent/bot_contexts.py +28 -23
  16. jupyter_agent/bot_evaluation.py +325 -0
  17. jupyter_agent/bot_evaluators/__init__.py +0 -0
  18. jupyter_agent/bot_evaluators/base.py +42 -0
  19. jupyter_agent/bot_evaluators/dummy_flow.py +20 -0
  20. jupyter_agent/bot_evaluators/dummy_global.py +20 -0
  21. jupyter_agent/bot_evaluators/dummy_task.py +20 -0
  22. jupyter_agent/bot_evaluators/flow_global_planning.py +88 -0
  23. jupyter_agent/bot_evaluators/flow_task_executor.py +152 -0
  24. jupyter_agent/bot_flows/__init__.py +0 -4
  25. jupyter_agent/bot_flows/base.py +114 -10
  26. jupyter_agent/bot_flows/master_planner.py +7 -2
  27. jupyter_agent/bot_flows/task_executor_v3.py +45 -20
  28. jupyter_agent/bot_magics.py +108 -53
  29. jupyter_agent/bot_outputs.py +56 -3
  30. jupyter_agent/utils.py +20 -31
  31. {jupyter_agent-2025.6.103.dist-info → jupyter_agent-2025.6.105.dist-info}/METADATA +39 -8
  32. jupyter_agent-2025.6.105.dist-info/RECORD +40 -0
  33. jupyter_agent-2025.6.105.dist-info/entry_points.txt +2 -0
  34. jupyter_agent/bot_agents/task_planner_v1.py +0 -158
  35. jupyter_agent/bot_agents/task_planner_v2.py +0 -172
  36. jupyter_agent/bot_flows/task_executor_v1.py +0 -86
  37. jupyter_agent/bot_flows/task_executor_v2.py +0 -84
  38. jupyter_agent-2025.6.103.dist-info/RECORD +0 -33
  39. {jupyter_agent-2025.6.103.dist-info → jupyter_agent-2025.6.105.dist-info}/WHEEL +0 -0
  40. {jupyter_agent-2025.6.103.dist-info → jupyter_agent-2025.6.105.dist-info}/licenses/LICENSE +0 -0
  41. {jupyter_agent-2025.6.103.dist-info → jupyter_agent-2025.6.105.dist-info}/top_level.txt +0 -0
@@ -5,6 +5,7 @@ This software is released under the MIT License.
5
5
  https://opensource.org/licenses/MIT
6
6
  """
7
7
 
8
+ import time
8
9
  import traceback
9
10
 
10
11
  from pydantic import BaseModel
@@ -12,11 +13,16 @@ from enum import Enum
12
13
  from typing import List, Dict, Optional, Type
13
14
  from IPython.display import Markdown
14
15
  from ..bot_agents.base import BaseAgent
15
- from ..bot_outputs import _D, _I, _W, _E, _F, _M, _B, set_stage, flush_output
16
+ from ..bot_evaluators.dummy_global import DummyGlobalEvaluator
17
+ from ..bot_evaluators.flow_task_executor import FlowTaskExecEvaluator
18
+ from ..bot_outputs import _D, _I, _W, _E, _F, _M, _B
19
+ from ..bot_outputs import set_stage, flush_output, output_evaluation
20
+ from ..bot_evaluation import FlowEvaluationRecord, StageEvaluationRecord, NotebookEvaluationRecord
16
21
 
17
22
  TASK_AGENT_STATE_ERROR = "_AGENT_STATE_ERROR_32534526_"
18
23
  TASK_STAGE_START = "start"
19
24
  TASK_STAGE_COMPLETED = "completed"
25
+ TASK_STAGE_GLOBAL_FINISHED = "global_finished"
20
26
 
21
27
 
22
28
  class TaskAction(str, Enum):
@@ -47,11 +53,14 @@ class BaseTaskFlow:
47
53
 
48
54
  STAGE_TRANSITIONS: List[StageTransition] = []
49
55
  START_STAGE = TASK_STAGE_START
50
- STOP_STAGES = [TASK_STAGE_COMPLETED]
56
+ STOP_STAGES = [TASK_STAGE_COMPLETED, TASK_STAGE_GLOBAL_FINISHED]
57
+ FLOW_EVALUATOR = FlowTaskExecEvaluator
58
+ GLOBAL_EVALUATOR = DummyGlobalEvaluator
51
59
 
52
- def __init__(self, notebook_context, agent_factory):
60
+ def __init__(self, notebook_context, agent_factory, evaluator_factory=None):
53
61
  self.notebook_context = notebook_context
54
62
  self.agent_factory = agent_factory
63
+ self.evaluator_factory = evaluator_factory
55
64
  self.stage_transitions = {}
56
65
  self.prepare_stage_transitions()
57
66
 
@@ -146,11 +155,15 @@ class BaseTaskFlow:
146
155
  ns = self._get_next_stage_trans(stage, state, action)
147
156
  return ns.stage
148
157
 
149
- def __call__(self, stage, max_tries=3, stage_continue=True, stage_confirm=True):
158
+ def __call__(self, stage, max_tries=5, stage_continue=True, stage_confirm=True):
150
159
 
151
160
  n_tries = 0
161
+ flow_duration = 0.0
162
+ stage_count = 0
163
+ # Initialize the task stage
152
164
  stage = stage or self.START_STAGE
153
165
  while n_tries <= max_tries:
166
+ stage_st = time.time()
154
167
  try:
155
168
  stage_name = stage.value if isinstance(stage, Enum) else stage
156
169
  stage_name = stage_name.replace(".", "-").capitalize()
@@ -163,6 +176,46 @@ class BaseTaskFlow:
163
176
  _M(f"```python\n{traceback.format_exc()}\n```")
164
177
  state = TASK_AGENT_STATE_ERROR
165
178
  failed = True
179
+ stage_count += 1
180
+ stage_duration = time.time() - stage_st
181
+ flow_duration += stage_duration
182
+ _M(f"Stage `{stage}` completed in {stage_duration:.2f} seconds with state `{state}` and failed `{failed}`")
183
+ if (
184
+ self.evaluator_factory is not None
185
+ and not failed
186
+ and hasattr(agent, "EVALUATORS")
187
+ and state in agent.EVALUATORS
188
+ ):
189
+ # If the agent has evaluators, run them
190
+ evaluator = self.evaluator_factory(agent.EVALUATORS[state])
191
+ try:
192
+ _M(f"**Evaluating** stage `{stage}` with evaluator `{type(evaluator).__name__}` ...")
193
+ evaluation_result = evaluator()
194
+ evaluation_result.timestamp = evaluation_result.timestamp or time.time()
195
+ evaluation_result.evaluator = evaluation_result.evaluator or type(evaluator).__name__
196
+ evaluation_result.cell_index = self.task.cell_idx
197
+ evaluation_result.flow = type(self).__name__
198
+ evaluation_result.stage = str(stage)
199
+ evaluation_result.agent = type(agent).__name__
200
+ evaluation_result.execution_duration = stage_duration
201
+ evaluation_result.is_success = not failed
202
+ output_evaluation(evaluation_result)
203
+ except Exception as e:
204
+ _M(f"**Error** during task evaluation stage `{stage}`: `{type(e)}`: `{e}`")
205
+ _M(f"```python\n{traceback.format_exc()}\n```")
206
+ else:
207
+ output_evaluation(
208
+ StageEvaluationRecord(
209
+ timestamp=time.time(),
210
+ evaluator="default",
211
+ cell_index=self.task.cell_idx,
212
+ flow=type(self).__name__,
213
+ stage=str(stage),
214
+ agent=type(agent).__name__,
215
+ execution_duration=stage_duration,
216
+ is_success=not failed,
217
+ )
218
+ )
166
219
 
167
220
  if state != TASK_AGENT_STATE_ERROR:
168
221
  # Agent did not fail, check if we have reached the final stage
@@ -171,14 +224,18 @@ class BaseTaskFlow:
171
224
  self.task.update_cell()
172
225
  if next_stage in self.STOP_STAGES:
173
226
  _M(f"Task execution **Stopped** at stage `{next_stage}`")
227
+ stage = next_stage
174
228
  break
175
229
 
176
230
  if failed:
177
231
  # Agent failed
178
232
  n_tries += 1
233
+ if n_tries > max_tries:
234
+ _M(f"**Max flow tries reached** during task execution stage `{stage}`, **Stop!**")
235
+ break
179
236
 
180
- if failed or stage_confirm:
181
- # Agent failed or we need to confirm
237
+ if stage_confirm:
238
+ # We need to confirm
182
239
  message = self.get_prompt_message(stage, state, failed)
183
240
  _M("**Confirm**: " + message)
184
241
  flush_output()
@@ -188,15 +245,13 @@ class BaseTaskFlow:
188
245
  self.task.update_cell()
189
246
  if action == TaskAction.STOP:
190
247
  _M(f"Task execution **Stopped**, and set next stage to `{next_stage}`")
191
- break
192
- elif n_tries > max_tries:
193
- _M(f"**Max tries reached** during task execution stage `{stage}`, **Stop!**")
248
+ stage = next_stage
194
249
  break
195
250
  else:
196
251
  _M(f"**Action**: `{action}` transits stage to `{next_stage}`")
197
252
  stage = next_stage
198
253
  else:
199
- # Agent succeeded, transit to the next stage without confirmation
254
+ # transit to the next stage without confirmation
200
255
  next_stage = self.get_next_stage(stage, state, TaskAction.CONTINUE)
201
256
  self.task.agent_stage = next_stage
202
257
  self.task.update_cell()
@@ -205,5 +260,54 @@ class BaseTaskFlow:
205
260
 
206
261
  if not stage_continue:
207
262
  break
263
+ # Finalize the task execution
264
+ stage_name = stage.value if isinstance(stage, Enum) else stage
265
+ if stage_name == TASK_STAGE_GLOBAL_FINISHED:
266
+ _M("Task execution **finished** globally.")
267
+ if self.evaluator_factory is not None and hasattr(self, "GLOBAL_EVALUATOR") and self.GLOBAL_EVALUATOR:
268
+ evaluator = self.evaluator_factory(self.GLOBAL_EVALUATOR)
269
+ _M(f"**Evaluating** notebook with evaluator `{type(evaluator).__name__}` ...")
270
+ evaluation_result = evaluator()
271
+ evaluation_result.timestamp = evaluation_result.timestamp or time.time()
272
+ evaluation_result.evaluator = evaluation_result.evaluator or type(evaluator).__name__
273
+ evaluation_result.cell_index = self.task.cell_idx
274
+ evaluation_result.is_success = True
275
+ output_evaluation(evaluation_result)
276
+ else:
277
+ output_evaluation(
278
+ NotebookEvaluationRecord(
279
+ timestamp=time.time(),
280
+ evaluator="default",
281
+ cell_index=self.task.cell_idx,
282
+ is_success=True,
283
+ )
284
+ )
285
+ elif stage_name == TASK_STAGE_COMPLETED:
286
+ _M(f"Task execution **completed** in {flow_duration:.2f} seconds with {stage_count} stages.")
287
+ if self.evaluator_factory is not None and hasattr(self, "FLOW_EVALUATOR") and self.FLOW_EVALUATOR:
288
+ evaluator = self.evaluator_factory(self.FLOW_EVALUATOR)
289
+ _M(f"**Evaluating** flow `{type(self).__name__}` with evaluator `{type(evaluator).__name__}` ...")
290
+ evaluation_result = evaluator()
291
+ evaluation_result.timestamp = evaluation_result.timestamp or time.time()
292
+ evaluation_result.evaluator = evaluation_result.evaluator or type(evaluator).__name__
293
+ evaluation_result.cell_index = self.task.cell_idx
294
+ evaluation_result.flow = type(self).__name__
295
+ evaluation_result.stage_count = stage_count
296
+ evaluation_result.execution_duration = flow_duration
297
+ evaluation_result.is_success = True
298
+ output_evaluation(evaluation_result)
299
+ else:
300
+ # If no evaluator, just output the evaluation record
301
+ output_evaluation(
302
+ FlowEvaluationRecord(
303
+ timestamp=time.time(),
304
+ evaluator="default",
305
+ cell_index=self.task.cell_idx,
306
+ flow=type(self).__name__,
307
+ stage_count=stage_count,
308
+ execution_duration=flow_duration,
309
+ is_success=True,
310
+ )
311
+ )
208
312
  flush_output()
209
313
  return stage
@@ -6,11 +6,16 @@ https://opensource.org/licenses/MIT
6
6
  """
7
7
 
8
8
  from .base import BaseTaskFlow, StageTransition, TASK_STAGE_START, TASK_STAGE_COMPLETED
9
+ from ..bot_evaluators.flow_global_planning import FlowGlobalPlanningEvaluator
10
+ from ..bot_agents.master_planner import MasterPlannerAgent
11
+ from ..bot_agents.output_task_result import OutputTaskResult
9
12
 
10
13
 
11
14
  class MasterPlannerFlow(BaseTaskFlow):
12
15
 
13
16
  STAGE_TRANSITIONS = [
14
- StageTransition(stage=TASK_STAGE_START, agent="MasterPlannerAgent", next_stage=TASK_STAGE_START)
17
+ StageTransition(stage=TASK_STAGE_START, agent=MasterPlannerAgent, next_stage=TASK_STAGE_COMPLETED),
18
+ StageTransition(stage=TASK_STAGE_COMPLETED, agent=OutputTaskResult, next_stage=TASK_STAGE_COMPLETED),
15
19
  ]
16
- STOP_STAGES = [TASK_STAGE_START]
20
+ STOP_STAGES = [TASK_STAGE_COMPLETED]
21
+ FLOW_EVALUATOR = FlowGlobalPlanningEvaluator
@@ -6,35 +6,43 @@ https://opensource.org/licenses/MIT
6
6
  """
7
7
 
8
8
  from enum import Enum
9
- from .base import BaseTaskFlow, StageTransition, StageNext, TaskAction
10
- from ..bot_agents import (
11
- TaskPlannerAgentV3,
12
- TaskCodingAgent,
13
- CodeDebugerAgent,
14
- CodeExecutor,
15
- TaskStructureSummaryAgent,
16
- TaskStructureReasoningAgent,
17
- OutputTaskResult,
9
+ from .base import (
10
+ BaseTaskFlow,
11
+ StageTransition,
12
+ StageNext,
13
+ TaskAction,
14
+ TASK_STAGE_COMPLETED,
15
+ TASK_STAGE_GLOBAL_FINISHED,
18
16
  )
19
- from ..bot_agents.task_planner_v3 import TaskPlannerState
17
+ from ..bot_agents.task_planner_v3 import TaskPlannerAgentV3, TaskPlannerState
18
+ from ..bot_agents.task_coder import TaskCodingAgent
19
+ from ..bot_agents.task_debuger import CodeDebugerAgent
20
+ from ..bot_agents.task_code_executor import CodeExecutor
21
+ from ..bot_agents.task_structrue_summarier import TaskStructureSummaryAgent, TaskStructureSummaryState
22
+ from ..bot_agents.task_structrue_reasoner import TaskStructureReasoningAgent, TaskStructureReasonState
23
+ from ..bot_agents.output_task_result import OutputTaskResult
24
+ from ..bot_agents.request_user_supply import RequestAboveUserSupplyAgent, RequestBelowUserSupplyAgent
20
25
 
21
26
 
22
27
  class TaskStage(str, Enum):
23
28
  PLANNING = "planning"
24
29
  PLANNING_PAUSED = "planning_paused"
30
+ REQUEST_INFO_ABOVE = "request_info_above"
31
+ REQUEST_INFO_BELOW = "request_info_below"
25
32
  CODING = "coding"
26
33
  EXECUTING = "executing"
27
34
  DEBUGGING = "debugging"
28
35
  REASONING = "reasoning"
29
36
  SUMMARY = "summary"
30
- COMPLETED = "completed"
31
37
  OUTPUT_RESULT = "output_result"
38
+ COMPLETED = TASK_STAGE_COMPLETED
39
+ GLOBAL_FINISHED = TASK_STAGE_GLOBAL_FINISHED
32
40
 
33
41
 
34
42
  class TaskExecutorFlowV3(BaseTaskFlow):
35
43
 
36
44
  START_STAGE = TaskStage.PLANNING
37
- STOP_STAGES = [TaskStage.COMPLETED, TaskStage.PLANNING_PAUSED]
45
+ STOP_STAGES = [TaskStage.COMPLETED, TaskStage.PLANNING_PAUSED, TaskStage.GLOBAL_FINISHED]
38
46
  STAGE_TRANSITIONS = [
39
47
  StageTransition[TaskStage, TaskPlannerState](
40
48
  stage=TaskStage.PLANNING,
@@ -42,10 +50,13 @@ class TaskExecutorFlowV3(BaseTaskFlow):
42
50
  states={
43
51
  TaskPlannerState.CODING_PLANNED: TaskStage.CODING,
44
52
  TaskPlannerState.REASONING_PLANNED: TaskStage.REASONING,
45
- TaskPlannerState.REQUEST_INFO: TaskStage.PLANNING_PAUSED,
46
- TaskPlannerState.GLOBAL_FINISHED: TaskStage.COMPLETED,
53
+ TaskPlannerState.REQUEST_INFO: TaskStage.REQUEST_INFO_ABOVE,
54
+ TaskPlannerState.GLOBAL_FINISHED: TaskStage.GLOBAL_FINISHED,
47
55
  },
48
56
  ),
57
+ StageTransition[TaskStage, None](
58
+ stage=TaskStage.REQUEST_INFO_ABOVE, agent=RequestAboveUserSupplyAgent, next_stage=TaskStage.PLANNING_PAUSED
59
+ ),
49
60
  StageTransition[TaskStage, TaskPlannerState](
50
61
  stage=TaskStage.PLANNING_PAUSED,
51
62
  agent=TaskPlannerAgentV3,
@@ -67,17 +78,28 @@ class TaskExecutorFlowV3(BaseTaskFlow):
67
78
  StageTransition[TaskStage, None](
68
79
  stage=TaskStage.DEBUGGING, agent=CodeDebugerAgent, next_stage=TaskStage.EXECUTING
69
80
  ),
70
- StageTransition[TaskStage, None](
71
- stage=TaskStage.REASONING, agent=TaskStructureReasoningAgent, next_stage=TaskStage.COMPLETED
81
+ StageTransition[TaskStage, TaskStructureReasonState](
82
+ stage=TaskStage.REASONING,
83
+ agent=TaskStructureReasoningAgent,
84
+ states={
85
+ TaskStructureReasonState.DONE: TaskStage.COMPLETED,
86
+ TaskStructureReasonState.REQUEST_INFO: TaskStage.REQUEST_INFO_BELOW,
87
+ },
72
88
  ),
73
- StageTransition[TaskStage, None](
89
+ StageTransition[TaskStage, TaskStructureSummaryState](
74
90
  stage=TaskStage.SUMMARY,
75
91
  agent=TaskStructureSummaryAgent,
76
- next_stage={
77
- TaskAction.DEFAULT: StageNext(stage=TaskStage.COMPLETED),
78
- TaskAction.STOP: StageNext(stage=TaskStage.EXECUTING),
92
+ states={
93
+ TaskStructureSummaryState.DONE: {
94
+ TaskAction.DEFAULT: StageNext(stage=TaskStage.COMPLETED),
95
+ TaskAction.STOP: StageNext(stage=TaskStage.EXECUTING),
96
+ },
97
+ TaskStructureSummaryState.REQUEST_INFO: TaskStage.REQUEST_INFO_BELOW,
79
98
  },
80
99
  ),
100
+ StageTransition[TaskStage, None](
101
+ stage=TaskStage.REQUEST_INFO_BELOW, agent=RequestBelowUserSupplyAgent, next_stage=TaskStage.COMPLETED
102
+ ),
81
103
  StageTransition[TaskStage, bool](
82
104
  stage=TaskStage.COMPLETED,
83
105
  agent=CodeExecutor,
@@ -86,4 +108,7 @@ class TaskExecutorFlowV3(BaseTaskFlow):
86
108
  StageTransition[TaskStage, None](
87
109
  stage=TaskStage.OUTPUT_RESULT, agent=OutputTaskResult, next_stage=TaskStage.COMPLETED
88
110
  ),
111
+ StageTransition[TaskStage, None](
112
+ stage=TaskStage.GLOBAL_FINISHED, agent=OutputTaskResult, next_stage=TaskStage.GLOBAL_FINISHED
113
+ ),
89
114
  ]
@@ -15,11 +15,14 @@ from IPython.display import Markdown
15
15
  from IPython.core.magic import Magics, magics_class, cell_magic
16
16
  from traitlets import Unicode, Int, Bool
17
17
  from traitlets.config.configurable import Configurable
18
- from .bot_contexts import NotebookContext, AgentCellContext
19
- from .bot_agents import AgentFactory
20
- from .bot_agents.base import AgentModelType
21
- from .bot_flows import MasterPlannerFlow, TaskExecutorFlowV1, TaskExecutorFlowV2, TaskExecutorFlowV3
22
- from .bot_outputs import _D, _I, _W, _E, _F, _M, _B, _O, reset_output, set_logging_level
18
+ from .bot_contexts import NotebookContext
19
+ from .bot_agents.base import AgentModelType, AgentFactory
20
+ from .bot_agents.request_user_supply import RequestUserSupplyAgent
21
+ from .bot_evaluators.base import EvaluatorFactory
22
+ from .bot_flows import MasterPlannerFlow, TaskExecutorFlowV3
23
+ from .bot_outputs import _D, _I, _W, _E, _F, _M, _B, _O, reset_output, set_logging_level, flush_output
24
+ from .bot_actions import close_action_dispatcher
25
+ from .utils import get_env_capbilities
23
26
 
24
27
 
25
28
  @magics_class
@@ -43,9 +46,17 @@ class BotMagics(Magics, Configurable):
43
46
  display_message = Bool(False, help="Display chat message").tag(config=True)
44
47
  display_think = Bool(True, help="Display chatthink response").tag(config=True)
45
48
  display_response = Bool(False, help="Display chat full response").tag(config=True)
49
+ support_save_meta = Bool(False, help="Support save metadata to cell").tag(config=True)
50
+ support_user_confirm = Bool(False, help="Support user confirm").tag(config=True)
51
+ support_user_supply_info = Bool(False, help="Support user supply info").tag(config=True)
52
+ support_set_cell_content = Bool(False, help="Support set cell content").tag(config=True)
53
+ enable_evaluating = Bool(False, help="Enable evaluating task").tag(config=True)
54
+ enable_supply_mocking = Bool(False, help="Enable supply mocking").tag(config=True)
46
55
  notebook_path = Unicode(None, allow_none=True, help="Path to Notebook file").tag(config=True)
47
56
  default_task_flow = Unicode("v3", allow_none=True, help="Default task flow").tag(config=True)
48
- support_save_meta = Bool(False, help="Support save metadata to cell").tag(config=True)
57
+ default_max_tries = Int(3, help="Default max tries for task execution").tag(config=True)
58
+ default_step_mode = Bool(False, help="Default step mode for task execution").tag(config=True)
59
+ default_auto_confirm = Bool(False, help="Default auto confirm for task execution").tag(config=True)
49
60
 
50
61
  def parse_args(self, line):
51
62
  """解析命令行参数"""
@@ -54,31 +65,32 @@ class BotMagics(Magics, Configurable):
54
65
  parser.add_argument("-P", "--planning", action="store_true", default=False, help="Run in planning mode")
55
66
  parser.add_argument("-s", "--stage", type=str, default=None, help="Task stage")
56
67
  parser.add_argument("-f", "--flow", type=str, default=self.default_task_flow, help="Flow name")
57
- parser.add_argument("-m", "--max-tries", type=int, default=3, help="Max tries")
58
- parser.add_argument("-S", "--step-mode", action="store_true", default=False, help="Run in single step mode")
59
- parser.add_argument("-Y", "--auto-confirm", action="store_true", default=False, help="Run without confirm")
68
+ parser.add_argument("-m", "--max-tries", type=int, default=self.default_max_tries, help="Max tries")
69
+ parser.add_argument(
70
+ "-S",
71
+ "--step-mode",
72
+ action="store_true",
73
+ default=self.default_step_mode,
74
+ help="Run in single step mode",
75
+ )
76
+ parser.add_argument(
77
+ "-Y",
78
+ "--auto-confirm",
79
+ action="store_true",
80
+ default=self.default_auto_confirm,
81
+ help="Run without confirm",
82
+ )
60
83
  options, _ = parser.parse_known_args(shlex.split(line.strip()))
61
-
62
84
  return options
63
85
 
64
- def ensure_notebook_path(self):
65
- if self.notebook_path:
66
- return self.notebook_path
67
- result = self.shell and self.shell.run_cell("globals().get('__vsc_ipynb_file__')")
68
- if result and result.success and result.result:
69
- self.notebook_path = result.result
70
- return self.notebook_path
71
- try:
72
- self.notebook_path = str(ipynbname.path())
73
- return self.notebook_path
74
- except Exception as e:
75
- _F(f"Failed to get notebook path: {e}")
76
- return None
77
-
78
86
  @cell_magic
79
87
  def bot(self, line, cell):
80
88
  """Jupyter cell magic: %%bot"""
81
89
  try:
90
+ reset_output(stage="Logging", logging_level=self.logging_level)
91
+ _I("Cell magic %%bot executing ...")
92
+ _D(f"Cell magic called with line: {line}")
93
+ _D(f"Cell magic called with cell: {repr(cell)[:50]} ...")
82
94
  if not self.ensure_notebook_path():
83
95
  _O(
84
96
  Markdown(
@@ -88,11 +100,6 @@ class BotMagics(Magics, Configurable):
88
100
  )
89
101
  )
90
102
  return
91
- AgentCellContext.SUPPORT_SAVE_META = self.support_save_meta
92
- reset_output(stage="Logging", logging_level=self.logging_level)
93
- _I("Cell magic %%bot executing ...")
94
- _D(f"Cell magic called with line: {line}")
95
- _D(f"Cell magic called with cell: {repr(cell)[:50]} ...")
96
103
  if not cell.strip():
97
104
  _O(
98
105
  Markdown(
@@ -105,42 +112,90 @@ class BotMagics(Magics, Configurable):
105
112
  "%%bot {}\n\n# {}".format(line.strip(), time.strftime("%Y-%m-%d %H:%M:%S")), replace=True
106
113
  )
107
114
  return
115
+ get_env_capbilities().save_metadata = self.support_save_meta
116
+ get_env_capbilities().user_confirm = self.support_user_confirm
117
+ get_env_capbilities().user_supply_info = self.support_user_supply_info
118
+ get_env_capbilities().set_cell_content = self.support_set_cell_content
119
+ RequestUserSupplyAgent.MOCK_USER_SUPPLY = self.enable_supply_mocking
108
120
  options = self.parse_args(line)
109
- _D(f"Cell magic called with options: {options}")
110
121
  set_logging_level(options.logging_level)
122
+ _D(f"Cell magic called with options: {options}")
111
123
  nb_context = NotebookContext(line, cell, notebook_path=self.notebook_path)
112
- agent_factory = AgentFactory(
113
- nb_context,
114
- display_think=self.display_think,
115
- display_message=self.display_message,
116
- display_response=self.display_response,
124
+ agent_factory = self.get_agent_factory(nb_context)
125
+ evaluator_factory = self.get_evaluator_factory(nb_context)
126
+ if options.planning:
127
+ flow = MasterPlannerFlow(nb_context, agent_factory, evaluator_factory)
128
+ elif options.flow == "v3":
129
+ flow = TaskExecutorFlowV3(nb_context, agent_factory, evaluator_factory)
130
+ else:
131
+ raise ValueError(f"Unknown flow: {options.flow}")
132
+ flow(
133
+ options.stage,
134
+ options.max_tries,
135
+ not options.step_mode,
136
+ not options.auto_confirm,
117
137
  )
118
- agent_factory.config_model(
138
+ except Exception as e:
139
+ traceback.print_exc()
140
+ finally:
141
+ close_action_dispatcher()
142
+ flush_output()
143
+
144
+ def ensure_notebook_path(self):
145
+ if self.notebook_path:
146
+ return self.notebook_path
147
+ result = self.shell and self.shell.run_cell(
148
+ "globals().get('__vsc_ipynb_file__') or globals().get('__evaluation_ipynb_file__')"
149
+ )
150
+ if result and result.success and result.result:
151
+ self.notebook_path = result.result
152
+ return self.notebook_path
153
+ try:
154
+ self.notebook_path = str(ipynbname.path())
155
+ return self.notebook_path
156
+ except Exception as e:
157
+ _F(f"Failed to get notebook path: {e}")
158
+ return None
159
+
160
+ def get_agent_factory(self, nb_context):
161
+ agent_factory = AgentFactory(
162
+ nb_context,
163
+ display_think=self.display_think,
164
+ display_message=self.display_message,
165
+ display_response=self.display_response,
166
+ )
167
+ agent_factory.config_model(
168
+ AgentModelType.DEFAULT, self.default_api_url, self.default_api_key, self.default_model_name
169
+ )
170
+ agent_factory.config_model(
171
+ AgentModelType.PLANNER, self.planner_api_url, self.planner_api_key, self.planner_model_name
172
+ )
173
+ agent_factory.config_model(
174
+ AgentModelType.CODING, self.coding_api_url, self.coding_api_key, self.coding_model_name
175
+ )
176
+ agent_factory.config_model(
177
+ AgentModelType.REASONING, self.reasoning_api_url, self.reasoning_api_key, self.reasoning_model_name
178
+ )
179
+ return agent_factory
180
+
181
+ def get_evaluator_factory(self, nb_context):
182
+ if self.enable_evaluating:
183
+ evaluator_factory = EvaluatorFactory(nb_context)
184
+ evaluator_factory.config_model(
119
185
  AgentModelType.DEFAULT, self.default_api_url, self.default_api_key, self.default_model_name
120
186
  )
121
- agent_factory.config_model(
187
+ evaluator_factory.config_model(
122
188
  AgentModelType.PLANNER, self.planner_api_url, self.planner_api_key, self.planner_model_name
123
189
  )
124
- agent_factory.config_model(
190
+ evaluator_factory.config_model(
125
191
  AgentModelType.CODING, self.coding_api_url, self.coding_api_key, self.coding_model_name
126
192
  )
127
- agent_factory.config_model(
193
+ evaluator_factory.config_model(
128
194
  AgentModelType.REASONING, self.reasoning_api_url, self.reasoning_api_key, self.reasoning_model_name
129
195
  )
130
- if options.planning:
131
- flow = MasterPlannerFlow(nb_context, agent_factory)
132
- else:
133
- if options.flow == "v1":
134
- flow = TaskExecutorFlowV1(nb_context, agent_factory)
135
- elif options.flow == "v2":
136
- flow = TaskExecutorFlowV2(nb_context, agent_factory)
137
- elif options.flow == "v3":
138
- flow = TaskExecutorFlowV3(nb_context, agent_factory)
139
- else:
140
- raise ValueError(f"Unknown flow: {options.flow}")
141
- flow(options.stage, options.max_tries, not options.step_mode, not options.auto_confirm)
142
- except Exception as e:
143
- traceback.print_exc()
196
+ else:
197
+ evaluator_factory = None
198
+ return evaluator_factory
144
199
 
145
200
 
146
201
  def load_ipython_extension(ipython):