jupyter-agent 2025.6.104__py3-none-any.whl → 2025.6.105__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. jupyter_agent/bot_actions.py +270 -0
  2. jupyter_agent/bot_agents/__init__.py +0 -42
  3. jupyter_agent/bot_agents/base.py +85 -45
  4. jupyter_agent/bot_agents/master_planner.py +2 -0
  5. jupyter_agent/bot_agents/output_task_result.py +6 -7
  6. jupyter_agent/bot_agents/request_user_supply.py +186 -0
  7. jupyter_agent/bot_agents/task_planner_v3.py +12 -13
  8. jupyter_agent/bot_agents/task_reasoner.py +2 -2
  9. jupyter_agent/bot_agents/task_structrue_reasoner.py +19 -12
  10. jupyter_agent/bot_agents/task_structrue_summarier.py +19 -18
  11. jupyter_agent/bot_agents/task_summarier.py +2 -2
  12. jupyter_agent/bot_agents/task_verifier.py +1 -1
  13. jupyter_agent/bot_agents/task_verify_summarier.py +5 -6
  14. jupyter_agent/bot_chat.py +2 -2
  15. jupyter_agent/bot_contexts.py +28 -23
  16. jupyter_agent/bot_evaluation.py +262 -143
  17. jupyter_agent/bot_evaluators/__init__.py +0 -0
  18. jupyter_agent/bot_evaluators/base.py +42 -0
  19. jupyter_agent/bot_evaluators/dummy_flow.py +20 -0
  20. jupyter_agent/bot_evaluators/dummy_global.py +20 -0
  21. jupyter_agent/bot_evaluators/dummy_task.py +20 -0
  22. jupyter_agent/bot_evaluators/flow_global_planning.py +88 -0
  23. jupyter_agent/bot_evaluators/flow_task_executor.py +152 -0
  24. jupyter_agent/bot_flows/__init__.py +0 -4
  25. jupyter_agent/bot_flows/base.py +84 -19
  26. jupyter_agent/bot_flows/master_planner.py +6 -2
  27. jupyter_agent/bot_flows/task_executor_v3.py +31 -17
  28. jupyter_agent/bot_magics.py +88 -65
  29. jupyter_agent/bot_outputs.py +37 -43
  30. jupyter_agent/utils.py +20 -31
  31. {jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.6.105.dist-info}/METADATA +34 -4
  32. jupyter_agent-2025.6.105.dist-info/RECORD +40 -0
  33. jupyter_agent/bot_agents/task_planner_v1.py +0 -158
  34. jupyter_agent/bot_agents/task_planner_v2.py +0 -172
  35. jupyter_agent/bot_flows/task_executor_v1.py +0 -86
  36. jupyter_agent/bot_flows/task_executor_v2.py +0 -84
  37. jupyter_agent-2025.6.104.dist-info/RECORD +0 -35
  38. {jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.6.105.dist-info}/WHEEL +0 -0
  39. {jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.6.105.dist-info}/entry_points.txt +0 -0
  40. {jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.6.105.dist-info}/licenses/LICENSE +0 -0
  41. {jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.6.105.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,152 @@
1
+ """
2
+ Copyright (c) 2025 viewstar000
3
+
4
+ This software is released under the MIT License.
5
+ https://opensource.org/licenses/MIT
6
+ """
7
+
8
+ import time
9
+
10
+ from enum import Enum
11
+ from typing import Optional, List
12
+ from pydantic import BaseModel, Field
13
+ from IPython.display import Markdown
14
+ from .base import BaseEvaluator
15
+ from ..bot_outputs import _D, _I, _W, _E, _F, _A, _O, _C, _M, _B
16
+ from ..bot_evaluation import FlowEvaluationRecord
17
+
18
+
19
+ FLOW_TASK_EXEC_EVAL_PROMPT = """\
20
+ **角色定义**:
21
+
22
+ 你是一个任务规划评估专家,负责对任务规划的结果进行评估。
23
+
24
+ **任务要求**:
25
+
26
+ 请你根据任务规划的结果,评估任务规划的质量和准确性,并给出相应的评分和反馈。
27
+
28
+ {% include "TASK_OUTPUT_FORMAT" %}
29
+
30
+ ---
31
+
32
+ {% include "TASK_CONTEXTS" %}
33
+
34
+ ---
35
+
36
+ {% include "CODE_CONTEXTS" %}
37
+
38
+ ---
39
+
40
+ **当前子任务规划信息**:
41
+
42
+ ### 当前子任务规划目标:
43
+
44
+ {{ task.subject }}
45
+
46
+ {% if task.coding_prompt %}
47
+ ### 当前子任务代码需求:
48
+
49
+ {{ task.coding_prompt }}
50
+
51
+ ### 当前子任务生成的代码:
52
+
53
+ ```python
54
+ {{ task.source }}
55
+ ```
56
+
57
+ ### 当前代码执行的输出与结果:
58
+
59
+ {{ task.output }}
60
+ {% endif %}
61
+
62
+ ### 当前子任务总结要求:
63
+
64
+ {{ task.summary_prompt }}
65
+
66
+
67
+ ### 当前子任务输出的分析总结后的最终结果:
68
+
69
+ ```markdown
70
+ {{ task.result }}
71
+ ```
72
+
73
+ {% if task.important_infos %}
74
+ ### 当前子任务输出的重要信息:
75
+
76
+ ```json
77
+ {{ task.important_infos | json }}
78
+ ```
79
+ {% endif %}
80
+
81
+ {% if task.request_below_supply_infos %}
82
+ ### 当前子任务输出的请求用户补充确认的信息:
83
+
84
+ ```json
85
+ {{ task.request_below_supply_infos | json }}
86
+ ```
87
+ {% endif %}
88
+
89
+ ---
90
+
91
+ 请按要求给出当前子任务规划的评估结果:
92
+ """
93
+
94
+
95
+ class FlowTaskExecEvalResult(BaseModel):
96
+ """
97
+ 任务规划评估结果
98
+ """
99
+
100
+ is_correct: bool = Field(description="最终结果是否符合当前子任务的目标", examples=[True, False])
101
+ correct_score: float = Field(
102
+ description="最终结果符合当前子任务目标的分数,范围0-1,>=0.5表示符合目标,<0.5表示不符合目标",
103
+ examples=[0.95, 0.3],
104
+ )
105
+ planning_score: float = Field(
106
+ description="当前子任务的目标规划、代码生成、总结是否符合全局目标规划要求,范围0-1,>=0.5表示符合要求,<0.5表示不符合要求",
107
+ examples=[0.85, 0.25],
108
+ )
109
+ reasoning_score: float = Field(
110
+ description="当前子任务的推理过程是否合理,是否存在逻辑错误,是否存在与前置子任务相冲突的情况,"
111
+ "范围0-1,>=0.5表示合理、正确、无冲突,<0.5表示不合理",
112
+ examples=[0.9, 0.4],
113
+ )
114
+ coding_score: float = Field(
115
+ description="代码生成的质量评分,代码逻辑是否符合规划要求,是否存在逻辑错误,是否存在冗余、抽象不合理等情况,"
116
+ "范围0-1,>=0.5表示代码质量较高,<0.5表示代码质量较低",
117
+ examples=[0.75, 0.2],
118
+ )
119
+ important_info_score: float = Field(
120
+ description="重要信息分数,当前子任务的规划、代码生成、总结是否充分考虑了前置任务生成的重要信息,"
121
+ "以及当前子任务的重要信息是否完整、准确、无误导、无冲突,"
122
+ "范围0-1,>=0.5表示重要信息完整、准确,<0.5表示重要信息不完整或不准确",
123
+ examples=[0.9, 0.4],
124
+ )
125
+ user_supply_info_score: float = Field(
126
+ description="用户补充信息分数,当前子任务的规划、代码生成、总结是否充分考虑了用户补充的信息,"
127
+ "范围0-1,>=0.5表示充分考虑,<0.5表示未充分考虑",
128
+ examples=[0.8, 0.3],
129
+ )
130
+ feedback: Optional[str] = Field(default=None, description="评估反馈")
131
+
132
+
133
+ class FlowTaskExecEvaluator(BaseEvaluator):
134
+ """
135
+ 任务规划评估器
136
+ """
137
+
138
+ PROMPT = FLOW_TASK_EXEC_EVAL_PROMPT
139
+ OUTPUT_JSON_SCHEMA = FlowTaskExecEvalResult
140
+
141
+ def on_reply(self, reply):
142
+ reply = super().on_reply(reply)
143
+ return FlowEvaluationRecord(
144
+ timestamp=time.time(),
145
+ evaluator="flow_task_executor",
146
+ correct_score=reply.correct_score,
147
+ planning_score=reply.planning_score,
148
+ reasoning_score=reply.reasoning_score,
149
+ coding_score=reply.coding_score,
150
+ important_score=reply.important_info_score,
151
+ user_supply_score=reply.user_supply_info_score,
152
+ )
@@ -7,14 +7,10 @@ https://opensource.org/licenses/MIT
7
7
 
8
8
  from .base import BaseTaskFlow
9
9
  from .master_planner import MasterPlannerFlow
10
- from .task_executor_v1 import TaskExecutorFlowV1
11
- from .task_executor_v2 import TaskExecutorFlowV2
12
10
  from .task_executor_v3 import TaskExecutorFlowV3
13
11
 
14
12
  __all__ = [
15
13
  "BaseTaskFlow",
16
14
  "MasterPlannerFlow",
17
- "TaskExecutorFlowV1",
18
- "TaskExecutorFlowV2",
19
15
  "TaskExecutorFlowV3",
20
16
  ]
@@ -13,9 +13,11 @@ from enum import Enum
13
13
  from typing import List, Dict, Optional, Type
14
14
  from IPython.display import Markdown
15
15
  from ..bot_agents.base import BaseAgent
16
+ from ..bot_evaluators.dummy_global import DummyGlobalEvaluator
17
+ from ..bot_evaluators.flow_task_executor import FlowTaskExecEvaluator
16
18
  from ..bot_outputs import _D, _I, _W, _E, _F, _M, _B
17
19
  from ..bot_outputs import set_stage, flush_output, output_evaluation
18
- from ..bot_outputs import FlowEvalutionRecord, StageEvalutionRecord, NotebookEvalutionRecord
20
+ from ..bot_evaluation import FlowEvaluationRecord, StageEvaluationRecord, NotebookEvaluationRecord
19
21
 
20
22
  TASK_AGENT_STATE_ERROR = "_AGENT_STATE_ERROR_32534526_"
21
23
  TASK_STAGE_START = "start"
@@ -52,10 +54,13 @@ class BaseTaskFlow:
52
54
  STAGE_TRANSITIONS: List[StageTransition] = []
53
55
  START_STAGE = TASK_STAGE_START
54
56
  STOP_STAGES = [TASK_STAGE_COMPLETED, TASK_STAGE_GLOBAL_FINISHED]
57
+ FLOW_EVALUATOR = FlowTaskExecEvaluator
58
+ GLOBAL_EVALUATOR = DummyGlobalEvaluator
55
59
 
56
- def __init__(self, notebook_context, agent_factory):
60
+ def __init__(self, notebook_context, agent_factory, evaluator_factory=None):
57
61
  self.notebook_context = notebook_context
58
62
  self.agent_factory = agent_factory
63
+ self.evaluator_factory = evaluator_factory
59
64
  self.stage_transitions = {}
60
65
  self.prepare_stage_transitions()
61
66
 
@@ -175,15 +180,42 @@ class BaseTaskFlow:
175
180
  stage_duration = time.time() - stage_st
176
181
  flow_duration += stage_duration
177
182
  _M(f"Stage `{stage}` completed in {stage_duration:.2f} seconds with state `{state}` and failed `{failed}`")
178
- output_evaluation(
179
- StageEvalutionRecord(
180
- cell_index=self.task.cell_idx,
181
- flow=type(self).__name__,
182
- stage=str(stage),
183
- execution_duration=stage_duration,
184
- is_success=not failed,
183
+ if (
184
+ self.evaluator_factory is not None
185
+ and not failed
186
+ and hasattr(agent, "EVALUATORS")
187
+ and state in agent.EVALUATORS
188
+ ):
189
+ # If the agent has evaluators, run them
190
+ evaluator = self.evaluator_factory(agent.EVALUATORS[state])
191
+ try:
192
+ _M(f"**Evaluating** stage `{stage}` with evaluator `{type(evaluator).__name__}` ...")
193
+ evaluation_result = evaluator()
194
+ evaluation_result.timestamp = evaluation_result.timestamp or time.time()
195
+ evaluation_result.evaluator = evaluation_result.evaluator or type(evaluator).__name__
196
+ evaluation_result.cell_index = self.task.cell_idx
197
+ evaluation_result.flow = type(self).__name__
198
+ evaluation_result.stage = str(stage)
199
+ evaluation_result.agent = type(agent).__name__
200
+ evaluation_result.execution_duration = stage_duration
201
+ evaluation_result.is_success = not failed
202
+ output_evaluation(evaluation_result)
203
+ except Exception as e:
204
+ _M(f"**Error** during task evaluation stage `{stage}`: `{type(e)}`: `{e}`")
205
+ _M(f"```python\n{traceback.format_exc()}\n```")
206
+ else:
207
+ output_evaluation(
208
+ StageEvaluationRecord(
209
+ timestamp=time.time(),
210
+ evaluator="default",
211
+ cell_index=self.task.cell_idx,
212
+ flow=type(self).__name__,
213
+ stage=str(stage),
214
+ agent=type(agent).__name__,
215
+ execution_duration=stage_duration,
216
+ is_success=not failed,
217
+ )
185
218
  )
186
- )
187
219
 
188
220
  if state != TASK_AGENT_STATE_ERROR:
189
221
  # Agent did not fail, check if we have reached the final stage
@@ -232,17 +264,50 @@ class BaseTaskFlow:
232
264
  stage_name = stage.value if isinstance(stage, Enum) else stage
233
265
  if stage_name == TASK_STAGE_GLOBAL_FINISHED:
234
266
  _M("Task execution **finished** globally.")
235
- output_evaluation(NotebookEvalutionRecord(cell_index=self.task.cell_idx, is_success=True))
267
+ if self.evaluator_factory is not None and hasattr(self, "GLOBAL_EVALUATOR") and self.GLOBAL_EVALUATOR:
268
+ evaluator = self.evaluator_factory(self.GLOBAL_EVALUATOR)
269
+ _M(f"**Evaluating** notebook with evaluator `{type(evaluator).__name__}` ...")
270
+ evaluation_result = evaluator()
271
+ evaluation_result.timestamp = evaluation_result.timestamp or time.time()
272
+ evaluation_result.evaluator = evaluation_result.evaluator or type(evaluator).__name__
273
+ evaluation_result.cell_index = self.task.cell_idx
274
+ evaluation_result.is_success = True
275
+ output_evaluation(evaluation_result)
276
+ else:
277
+ output_evaluation(
278
+ NotebookEvaluationRecord(
279
+ timestamp=time.time(),
280
+ evaluator="default",
281
+ cell_index=self.task.cell_idx,
282
+ is_success=True,
283
+ )
284
+ )
236
285
  elif stage_name == TASK_STAGE_COMPLETED:
237
286
  _M(f"Task execution **completed** in {flow_duration:.2f} seconds with {stage_count} stages.")
238
- output_evaluation(
239
- FlowEvalutionRecord(
240
- cell_index=self.task.cell_idx,
241
- flow=type(self).__name__,
242
- stage_count=stage_count,
243
- execution_duration=flow_duration,
244
- is_success=True,
287
+ if self.evaluator_factory is not None and hasattr(self, "FLOW_EVALUATOR") and self.FLOW_EVALUATOR:
288
+ evaluator = self.evaluator_factory(self.FLOW_EVALUATOR)
289
+ _M(f"**Evaluating** flow `{type(self).__name__}` with evaluator `{type(evaluator).__name__}` ...")
290
+ evaluation_result = evaluator()
291
+ evaluation_result.timestamp = evaluation_result.timestamp or time.time()
292
+ evaluation_result.evaluator = evaluation_result.evaluator or type(evaluator).__name__
293
+ evaluation_result.cell_index = self.task.cell_idx
294
+ evaluation_result.flow = type(self).__name__
295
+ evaluation_result.stage_count = stage_count
296
+ evaluation_result.execution_duration = flow_duration
297
+ evaluation_result.is_success = True
298
+ output_evaluation(evaluation_result)
299
+ else:
300
+ # If no evaluator, just output the evaluation record
301
+ output_evaluation(
302
+ FlowEvaluationRecord(
303
+ timestamp=time.time(),
304
+ evaluator="default",
305
+ cell_index=self.task.cell_idx,
306
+ flow=type(self).__name__,
307
+ stage_count=stage_count,
308
+ execution_duration=flow_duration,
309
+ is_success=True,
310
+ )
245
311
  )
246
- )
247
312
  flush_output()
248
313
  return stage
@@ -6,12 +6,16 @@ https://opensource.org/licenses/MIT
6
6
  """
7
7
 
8
8
  from .base import BaseTaskFlow, StageTransition, TASK_STAGE_START, TASK_STAGE_COMPLETED
9
+ from ..bot_evaluators.flow_global_planning import FlowGlobalPlanningEvaluator
10
+ from ..bot_agents.master_planner import MasterPlannerAgent
11
+ from ..bot_agents.output_task_result import OutputTaskResult
9
12
 
10
13
 
11
14
  class MasterPlannerFlow(BaseTaskFlow):
12
15
 
13
16
  STAGE_TRANSITIONS = [
14
- StageTransition(stage=TASK_STAGE_START, agent="MasterPlannerAgent", next_stage=TASK_STAGE_COMPLETED),
15
- StageTransition(stage=TASK_STAGE_COMPLETED, agent="OutputTaskResult", next_stage=TASK_STAGE_COMPLETED),
17
+ StageTransition(stage=TASK_STAGE_START, agent=MasterPlannerAgent, next_stage=TASK_STAGE_COMPLETED),
18
+ StageTransition(stage=TASK_STAGE_COMPLETED, agent=OutputTaskResult, next_stage=TASK_STAGE_COMPLETED),
16
19
  ]
17
20
  STOP_STAGES = [TASK_STAGE_COMPLETED]
21
+ FLOW_EVALUATOR = FlowGlobalPlanningEvaluator
@@ -14,21 +14,21 @@ from .base import (
14
14
  TASK_STAGE_COMPLETED,
15
15
  TASK_STAGE_GLOBAL_FINISHED,
16
16
  )
17
- from ..bot_agents import (
18
- TaskPlannerAgentV3,
19
- TaskCodingAgent,
20
- CodeDebugerAgent,
21
- CodeExecutor,
22
- TaskStructureSummaryAgent,
23
- TaskStructureReasoningAgent,
24
- OutputTaskResult,
25
- )
26
- from ..bot_agents.task_planner_v3 import TaskPlannerState
17
+ from ..bot_agents.task_planner_v3 import TaskPlannerAgentV3, TaskPlannerState
18
+ from ..bot_agents.task_coder import TaskCodingAgent
19
+ from ..bot_agents.task_debuger import CodeDebugerAgent
20
+ from ..bot_agents.task_code_executor import CodeExecutor
21
+ from ..bot_agents.task_structrue_summarier import TaskStructureSummaryAgent, TaskStructureSummaryState
22
+ from ..bot_agents.task_structrue_reasoner import TaskStructureReasoningAgent, TaskStructureReasonState
23
+ from ..bot_agents.output_task_result import OutputTaskResult
24
+ from ..bot_agents.request_user_supply import RequestAboveUserSupplyAgent, RequestBelowUserSupplyAgent
27
25
 
28
26
 
29
27
  class TaskStage(str, Enum):
30
28
  PLANNING = "planning"
31
29
  PLANNING_PAUSED = "planning_paused"
30
+ REQUEST_INFO_ABOVE = "request_info_above"
31
+ REQUEST_INFO_BELOW = "request_info_below"
32
32
  CODING = "coding"
33
33
  EXECUTING = "executing"
34
34
  DEBUGGING = "debugging"
@@ -50,10 +50,13 @@ class TaskExecutorFlowV3(BaseTaskFlow):
50
50
  states={
51
51
  TaskPlannerState.CODING_PLANNED: TaskStage.CODING,
52
52
  TaskPlannerState.REASONING_PLANNED: TaskStage.REASONING,
53
- TaskPlannerState.REQUEST_INFO: TaskStage.PLANNING_PAUSED,
53
+ TaskPlannerState.REQUEST_INFO: TaskStage.REQUEST_INFO_ABOVE,
54
54
  TaskPlannerState.GLOBAL_FINISHED: TaskStage.GLOBAL_FINISHED,
55
55
  },
56
56
  ),
57
+ StageTransition[TaskStage, None](
58
+ stage=TaskStage.REQUEST_INFO_ABOVE, agent=RequestAboveUserSupplyAgent, next_stage=TaskStage.PLANNING_PAUSED
59
+ ),
57
60
  StageTransition[TaskStage, TaskPlannerState](
58
61
  stage=TaskStage.PLANNING_PAUSED,
59
62
  agent=TaskPlannerAgentV3,
@@ -75,17 +78,28 @@ class TaskExecutorFlowV3(BaseTaskFlow):
75
78
  StageTransition[TaskStage, None](
76
79
  stage=TaskStage.DEBUGGING, agent=CodeDebugerAgent, next_stage=TaskStage.EXECUTING
77
80
  ),
78
- StageTransition[TaskStage, None](
79
- stage=TaskStage.REASONING, agent=TaskStructureReasoningAgent, next_stage=TaskStage.COMPLETED
81
+ StageTransition[TaskStage, TaskStructureReasonState](
82
+ stage=TaskStage.REASONING,
83
+ agent=TaskStructureReasoningAgent,
84
+ states={
85
+ TaskStructureReasonState.DONE: TaskStage.COMPLETED,
86
+ TaskStructureReasonState.REQUEST_INFO: TaskStage.REQUEST_INFO_BELOW,
87
+ },
80
88
  ),
81
- StageTransition[TaskStage, None](
89
+ StageTransition[TaskStage, TaskStructureSummaryState](
82
90
  stage=TaskStage.SUMMARY,
83
91
  agent=TaskStructureSummaryAgent,
84
- next_stage={
85
- TaskAction.DEFAULT: StageNext(stage=TaskStage.COMPLETED),
86
- TaskAction.STOP: StageNext(stage=TaskStage.EXECUTING),
92
+ states={
93
+ TaskStructureSummaryState.DONE: {
94
+ TaskAction.DEFAULT: StageNext(stage=TaskStage.COMPLETED),
95
+ TaskAction.STOP: StageNext(stage=TaskStage.EXECUTING),
96
+ },
97
+ TaskStructureSummaryState.REQUEST_INFO: TaskStage.REQUEST_INFO_BELOW,
87
98
  },
88
99
  ),
100
+ StageTransition[TaskStage, None](
101
+ stage=TaskStage.REQUEST_INFO_BELOW, agent=RequestBelowUserSupplyAgent, next_stage=TaskStage.COMPLETED
102
+ ),
89
103
  StageTransition[TaskStage, bool](
90
104
  stage=TaskStage.COMPLETED,
91
105
  agent=CodeExecutor,
@@ -15,11 +15,14 @@ from IPython.display import Markdown
15
15
  from IPython.core.magic import Magics, magics_class, cell_magic
16
16
  from traitlets import Unicode, Int, Bool
17
17
  from traitlets.config.configurable import Configurable
18
- from .bot_contexts import NotebookContext, AgentCellContext
19
- from .bot_agents import AgentFactory
20
- from .bot_agents.base import AgentModelType
21
- from .bot_flows import MasterPlannerFlow, TaskExecutorFlowV1, TaskExecutorFlowV2, TaskExecutorFlowV3
22
- from .bot_outputs import _D, _I, _W, _E, _F, _M, _B, _O, reset_output, set_logging_level
18
+ from .bot_contexts import NotebookContext
19
+ from .bot_agents.base import AgentModelType, AgentFactory
20
+ from .bot_agents.request_user_supply import RequestUserSupplyAgent
21
+ from .bot_evaluators.base import EvaluatorFactory
22
+ from .bot_flows import MasterPlannerFlow, TaskExecutorFlowV3
23
+ from .bot_outputs import _D, _I, _W, _E, _F, _M, _B, _O, reset_output, set_logging_level, flush_output
24
+ from .bot_actions import close_action_dispatcher
25
+ from .utils import get_env_capbilities
23
26
 
24
27
 
25
28
  @magics_class
@@ -44,6 +47,11 @@ class BotMagics(Magics, Configurable):
44
47
  display_think = Bool(True, help="Display chatthink response").tag(config=True)
45
48
  display_response = Bool(False, help="Display chat full response").tag(config=True)
46
49
  support_save_meta = Bool(False, help="Support save metadata to cell").tag(config=True)
50
+ support_user_confirm = Bool(False, help="Support user confirm").tag(config=True)
51
+ support_user_supply_info = Bool(False, help="Support user supply info").tag(config=True)
52
+ support_set_cell_content = Bool(False, help="Support set cell content").tag(config=True)
53
+ enable_evaluating = Bool(False, help="Enable evaluating task").tag(config=True)
54
+ enable_supply_mocking = Bool(False, help="Enable supply mocking").tag(config=True)
47
55
  notebook_path = Unicode(None, allow_none=True, help="Path to Notebook file").tag(config=True)
48
56
  default_task_flow = Unicode("v3", allow_none=True, help="Default task flow").tag(config=True)
49
57
  default_max_tries = Int(3, help="Default max tries for task execution").tag(config=True)
@@ -73,27 +81,16 @@ class BotMagics(Magics, Configurable):
73
81
  help="Run without confirm",
74
82
  )
75
83
  options, _ = parser.parse_known_args(shlex.split(line.strip()))
76
-
77
84
  return options
78
85
 
79
- def ensure_notebook_path(self):
80
- if self.notebook_path:
81
- return self.notebook_path
82
- result = self.shell and self.shell.run_cell("globals().get('__vsc_ipynb_file__')")
83
- if result and result.success and result.result:
84
- self.notebook_path = result.result
85
- return self.notebook_path
86
- try:
87
- self.notebook_path = str(ipynbname.path())
88
- return self.notebook_path
89
- except Exception as e:
90
- _F(f"Failed to get notebook path: {e}")
91
- return None
92
-
93
86
  @cell_magic
94
87
  def bot(self, line, cell):
95
88
  """Jupyter cell magic: %%bot"""
96
89
  try:
90
+ reset_output(stage="Logging", logging_level=self.logging_level)
91
+ _I("Cell magic %%bot executing ...")
92
+ _D(f"Cell magic called with line: {line}")
93
+ _D(f"Cell magic called with cell: {repr(cell)[:50]} ...")
97
94
  if not self.ensure_notebook_path():
98
95
  _O(
99
96
  Markdown(
@@ -103,11 +100,6 @@ class BotMagics(Magics, Configurable):
103
100
  )
104
101
  )
105
102
  return
106
- AgentCellContext.SUPPORT_SAVE_META = self.support_save_meta
107
- reset_output(stage="Logging", logging_level=self.logging_level)
108
- _I("Cell magic %%bot executing ...")
109
- _D(f"Cell magic called with line: {line}")
110
- _D(f"Cell magic called with cell: {repr(cell)[:50]} ...")
111
103
  if not cell.strip():
112
104
  _O(
113
105
  Markdown(
@@ -120,51 +112,23 @@ class BotMagics(Magics, Configurable):
120
112
  "%%bot {}\n\n# {}".format(line.strip(), time.strftime("%Y-%m-%d %H:%M:%S")), replace=True
121
113
  )
122
114
  return
115
+ get_env_capbilities().save_metadata = self.support_save_meta
116
+ get_env_capbilities().user_confirm = self.support_user_confirm
117
+ get_env_capbilities().user_supply_info = self.support_user_supply_info
118
+ get_env_capbilities().set_cell_content = self.support_set_cell_content
119
+ RequestUserSupplyAgent.MOCK_USER_SUPPLY = self.enable_supply_mocking
123
120
  options = self.parse_args(line)
124
- _D(f"Cell magic called with options: {options}")
125
121
  set_logging_level(options.logging_level)
122
+ _D(f"Cell magic called with options: {options}")
126
123
  nb_context = NotebookContext(line, cell, notebook_path=self.notebook_path)
127
- agent_factory = AgentFactory(
128
- nb_context,
129
- display_think=self.display_think,
130
- display_message=self.display_message,
131
- display_response=self.display_response,
132
- )
133
- agent_factory.config_model(
134
- AgentModelType.DEFAULT,
135
- self.default_api_url,
136
- self.default_api_key,
137
- self.default_model_name,
138
- )
139
- agent_factory.config_model(
140
- AgentModelType.PLANNER,
141
- self.planner_api_url,
142
- self.planner_api_key,
143
- self.planner_model_name,
144
- )
145
- agent_factory.config_model(
146
- AgentModelType.CODING,
147
- self.coding_api_url,
148
- self.coding_api_key,
149
- self.coding_model_name,
150
- )
151
- agent_factory.config_model(
152
- AgentModelType.REASONING,
153
- self.reasoning_api_url,
154
- self.reasoning_api_key,
155
- self.reasoning_model_name,
156
- )
124
+ agent_factory = self.get_agent_factory(nb_context)
125
+ evaluator_factory = self.get_evaluator_factory(nb_context)
157
126
  if options.planning:
158
- flow = MasterPlannerFlow(nb_context, agent_factory)
127
+ flow = MasterPlannerFlow(nb_context, agent_factory, evaluator_factory)
128
+ elif options.flow == "v3":
129
+ flow = TaskExecutorFlowV3(nb_context, agent_factory, evaluator_factory)
159
130
  else:
160
- if options.flow == "v1":
161
- flow = TaskExecutorFlowV1(nb_context, agent_factory)
162
- elif options.flow == "v2":
163
- flow = TaskExecutorFlowV2(nb_context, agent_factory)
164
- elif options.flow == "v3":
165
- flow = TaskExecutorFlowV3(nb_context, agent_factory)
166
- else:
167
- raise ValueError(f"Unknown flow: {options.flow}")
131
+ raise ValueError(f"Unknown flow: {options.flow}")
168
132
  flow(
169
133
  options.stage,
170
134
  options.max_tries,
@@ -173,6 +137,65 @@ class BotMagics(Magics, Configurable):
173
137
  )
174
138
  except Exception as e:
175
139
  traceback.print_exc()
140
+ finally:
141
+ close_action_dispatcher()
142
+ flush_output()
143
+
144
+ def ensure_notebook_path(self):
145
+ if self.notebook_path:
146
+ return self.notebook_path
147
+ result = self.shell and self.shell.run_cell(
148
+ "globals().get('__vsc_ipynb_file__') or globals().get('__evaluation_ipynb_file__')"
149
+ )
150
+ if result and result.success and result.result:
151
+ self.notebook_path = result.result
152
+ return self.notebook_path
153
+ try:
154
+ self.notebook_path = str(ipynbname.path())
155
+ return self.notebook_path
156
+ except Exception as e:
157
+ _F(f"Failed to get notebook path: {e}")
158
+ return None
159
+
160
+ def get_agent_factory(self, nb_context):
161
+ agent_factory = AgentFactory(
162
+ nb_context,
163
+ display_think=self.display_think,
164
+ display_message=self.display_message,
165
+ display_response=self.display_response,
166
+ )
167
+ agent_factory.config_model(
168
+ AgentModelType.DEFAULT, self.default_api_url, self.default_api_key, self.default_model_name
169
+ )
170
+ agent_factory.config_model(
171
+ AgentModelType.PLANNER, self.planner_api_url, self.planner_api_key, self.planner_model_name
172
+ )
173
+ agent_factory.config_model(
174
+ AgentModelType.CODING, self.coding_api_url, self.coding_api_key, self.coding_model_name
175
+ )
176
+ agent_factory.config_model(
177
+ AgentModelType.REASONING, self.reasoning_api_url, self.reasoning_api_key, self.reasoning_model_name
178
+ )
179
+ return agent_factory
180
+
181
+ def get_evaluator_factory(self, nb_context):
182
+ if self.enable_evaluating:
183
+ evaluator_factory = EvaluatorFactory(nb_context)
184
+ evaluator_factory.config_model(
185
+ AgentModelType.DEFAULT, self.default_api_url, self.default_api_key, self.default_model_name
186
+ )
187
+ evaluator_factory.config_model(
188
+ AgentModelType.PLANNER, self.planner_api_url, self.planner_api_key, self.planner_model_name
189
+ )
190
+ evaluator_factory.config_model(
191
+ AgentModelType.CODING, self.coding_api_url, self.coding_api_key, self.coding_model_name
192
+ )
193
+ evaluator_factory.config_model(
194
+ AgentModelType.REASONING, self.reasoning_api_url, self.reasoning_api_key, self.reasoning_model_name
195
+ )
196
+ else:
197
+ evaluator_factory = None
198
+ return evaluator_factory
176
199
 
177
200
 
178
201
  def load_ipython_extension(ipython):