jupyter-agent 2025.6.104__py3-none-any.whl → 2025.6.105__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jupyter_agent/bot_actions.py +270 -0
- jupyter_agent/bot_agents/__init__.py +0 -42
- jupyter_agent/bot_agents/base.py +85 -45
- jupyter_agent/bot_agents/master_planner.py +2 -0
- jupyter_agent/bot_agents/output_task_result.py +6 -7
- jupyter_agent/bot_agents/request_user_supply.py +186 -0
- jupyter_agent/bot_agents/task_planner_v3.py +12 -13
- jupyter_agent/bot_agents/task_reasoner.py +2 -2
- jupyter_agent/bot_agents/task_structrue_reasoner.py +19 -12
- jupyter_agent/bot_agents/task_structrue_summarier.py +19 -18
- jupyter_agent/bot_agents/task_summarier.py +2 -2
- jupyter_agent/bot_agents/task_verifier.py +1 -1
- jupyter_agent/bot_agents/task_verify_summarier.py +5 -6
- jupyter_agent/bot_chat.py +2 -2
- jupyter_agent/bot_contexts.py +28 -23
- jupyter_agent/bot_evaluation.py +262 -143
- jupyter_agent/bot_evaluators/__init__.py +0 -0
- jupyter_agent/bot_evaluators/base.py +42 -0
- jupyter_agent/bot_evaluators/dummy_flow.py +20 -0
- jupyter_agent/bot_evaluators/dummy_global.py +20 -0
- jupyter_agent/bot_evaluators/dummy_task.py +20 -0
- jupyter_agent/bot_evaluators/flow_global_planning.py +88 -0
- jupyter_agent/bot_evaluators/flow_task_executor.py +152 -0
- jupyter_agent/bot_flows/__init__.py +0 -4
- jupyter_agent/bot_flows/base.py +84 -19
- jupyter_agent/bot_flows/master_planner.py +6 -2
- jupyter_agent/bot_flows/task_executor_v3.py +31 -17
- jupyter_agent/bot_magics.py +88 -65
- jupyter_agent/bot_outputs.py +37 -43
- jupyter_agent/utils.py +20 -31
- {jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.6.105.dist-info}/METADATA +34 -4
- jupyter_agent-2025.6.105.dist-info/RECORD +40 -0
- jupyter_agent/bot_agents/task_planner_v1.py +0 -158
- jupyter_agent/bot_agents/task_planner_v2.py +0 -172
- jupyter_agent/bot_flows/task_executor_v1.py +0 -86
- jupyter_agent/bot_flows/task_executor_v2.py +0 -84
- jupyter_agent-2025.6.104.dist-info/RECORD +0 -35
- {jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.6.105.dist-info}/WHEEL +0 -0
- {jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.6.105.dist-info}/entry_points.txt +0 -0
- {jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.6.105.dist-info}/licenses/LICENSE +0 -0
- {jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.6.105.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,152 @@
|
|
1
|
+
"""
|
2
|
+
Copyright (c) 2025 viewstar000
|
3
|
+
|
4
|
+
This software is released under the MIT License.
|
5
|
+
https://opensource.org/licenses/MIT
|
6
|
+
"""
|
7
|
+
|
8
|
+
import time
|
9
|
+
|
10
|
+
from enum import Enum
|
11
|
+
from typing import Optional, List
|
12
|
+
from pydantic import BaseModel, Field
|
13
|
+
from IPython.display import Markdown
|
14
|
+
from .base import BaseEvaluator
|
15
|
+
from ..bot_outputs import _D, _I, _W, _E, _F, _A, _O, _C, _M, _B
|
16
|
+
from ..bot_evaluation import FlowEvaluationRecord
|
17
|
+
|
18
|
+
|
19
|
+
FLOW_TASK_EXEC_EVAL_PROMPT = """\
|
20
|
+
**角色定义**:
|
21
|
+
|
22
|
+
你是一个任务规划评估专家,负责对任务规划的结果进行评估。
|
23
|
+
|
24
|
+
**任务要求**:
|
25
|
+
|
26
|
+
请你根据任务规划的结果,评估任务规划的质量和准确性,并给出相应的评分和反馈。
|
27
|
+
|
28
|
+
{% include "TASK_OUTPUT_FORMAT" %}
|
29
|
+
|
30
|
+
---
|
31
|
+
|
32
|
+
{% include "TASK_CONTEXTS" %}
|
33
|
+
|
34
|
+
---
|
35
|
+
|
36
|
+
{% include "CODE_CONTEXTS" %}
|
37
|
+
|
38
|
+
---
|
39
|
+
|
40
|
+
**当前子任务规划信息**:
|
41
|
+
|
42
|
+
### 当前子任务规划目标:
|
43
|
+
|
44
|
+
{{ task.subject }}
|
45
|
+
|
46
|
+
{% if task.coding_prompt %}
|
47
|
+
### 当前子任务代码需求:
|
48
|
+
|
49
|
+
{{ task.coding_prompt }}
|
50
|
+
|
51
|
+
### 当前子任务生成的代码:
|
52
|
+
|
53
|
+
```python
|
54
|
+
{{ task.source }}
|
55
|
+
```
|
56
|
+
|
57
|
+
### 当前代码执行的输出与结果:
|
58
|
+
|
59
|
+
{{ task.output }}
|
60
|
+
{% endif %}
|
61
|
+
|
62
|
+
### 当前子任务总结要求:
|
63
|
+
|
64
|
+
{{ task.summary_prompt }}
|
65
|
+
|
66
|
+
|
67
|
+
### 当前子任务输出的分析总结后的最终结果:
|
68
|
+
|
69
|
+
```markdown
|
70
|
+
{{ task.result }}
|
71
|
+
```
|
72
|
+
|
73
|
+
{% if task.important_infos %}
|
74
|
+
### 当前子任务输出的重要信息:
|
75
|
+
|
76
|
+
```json
|
77
|
+
{{ task.important_infos | json }}
|
78
|
+
```
|
79
|
+
{% endif %}
|
80
|
+
|
81
|
+
{% if task.request_below_supply_infos %}
|
82
|
+
### 当前子任务输出的请求用户补充确认的信息:
|
83
|
+
|
84
|
+
```json
|
85
|
+
{{ task.request_below_supply_infos | json }}
|
86
|
+
```
|
87
|
+
{% endif %}
|
88
|
+
|
89
|
+
---
|
90
|
+
|
91
|
+
请按要求给出当前子任务规划的评估结果:
|
92
|
+
"""
|
93
|
+
|
94
|
+
|
95
|
+
class FlowTaskExecEvalResult(BaseModel):
|
96
|
+
"""
|
97
|
+
任务规划评估结果
|
98
|
+
"""
|
99
|
+
|
100
|
+
is_correct: bool = Field(description="最终结果是否符合当前子任务的目标", examples=[True, False])
|
101
|
+
correct_score: float = Field(
|
102
|
+
description="最终结果符合当前子任务目标的分数,范围0-1,>=0.5表示符合目标,<0.5表示不符合目标",
|
103
|
+
examples=[0.95, 0.3],
|
104
|
+
)
|
105
|
+
planning_score: float = Field(
|
106
|
+
description="当前子任务的目标规划、代码生成、总结是否符合全局目标规划要求,范围0-1,>=0.5表示符合要求,<0.5表示不符合要求",
|
107
|
+
examples=[0.85, 0.25],
|
108
|
+
)
|
109
|
+
reasoning_score: float = Field(
|
110
|
+
description="当前子任务的推理过程是否合理,是否存在逻辑错误,是否存在与前置子任务相冲突的情况,"
|
111
|
+
"范围0-1,>=0.5表示合理、正确、无冲突,<0.5表示不合理",
|
112
|
+
examples=[0.9, 0.4],
|
113
|
+
)
|
114
|
+
coding_score: float = Field(
|
115
|
+
description="代码生成的质量评分,代码逻辑是否符合规划要求,是否存在逻辑错误,是否存在冗余、抽象不合理等情况,"
|
116
|
+
"范围0-1,>=0.5表示代码质量较高,<0.5表示代码质量较低",
|
117
|
+
examples=[0.75, 0.2],
|
118
|
+
)
|
119
|
+
important_info_score: float = Field(
|
120
|
+
description="重要信息分数,当前子任务的规划、代码生成、总结是否充分考虑了前置任务生成的重要信息,"
|
121
|
+
"以及当前子任务的重要信息是否完整、准确、无误导、无冲突,"
|
122
|
+
"范围0-1,>=0.5表示重要信息完整、准确,<0.5表示重要信息不完整或不准确",
|
123
|
+
examples=[0.9, 0.4],
|
124
|
+
)
|
125
|
+
user_supply_info_score: float = Field(
|
126
|
+
description="用户补充信息分数,当前子任务的规划、代码生成、总结是否充分考虑了用户补充的信息,"
|
127
|
+
"范围0-1,>=0.5表示充分考虑,<0.5表示未充分考虑",
|
128
|
+
examples=[0.8, 0.3],
|
129
|
+
)
|
130
|
+
feedback: Optional[str] = Field(default=None, description="评估反馈")
|
131
|
+
|
132
|
+
|
133
|
+
class FlowTaskExecEvaluator(BaseEvaluator):
|
134
|
+
"""
|
135
|
+
任务规划评估器
|
136
|
+
"""
|
137
|
+
|
138
|
+
PROMPT = FLOW_TASK_EXEC_EVAL_PROMPT
|
139
|
+
OUTPUT_JSON_SCHEMA = FlowTaskExecEvalResult
|
140
|
+
|
141
|
+
def on_reply(self, reply):
|
142
|
+
reply = super().on_reply(reply)
|
143
|
+
return FlowEvaluationRecord(
|
144
|
+
timestamp=time.time(),
|
145
|
+
evaluator="flow_task_executor",
|
146
|
+
correct_score=reply.correct_score,
|
147
|
+
planning_score=reply.planning_score,
|
148
|
+
reasoning_score=reply.reasoning_score,
|
149
|
+
coding_score=reply.coding_score,
|
150
|
+
important_score=reply.important_info_score,
|
151
|
+
user_supply_score=reply.user_supply_info_score,
|
152
|
+
)
|
@@ -7,14 +7,10 @@ https://opensource.org/licenses/MIT
|
|
7
7
|
|
8
8
|
from .base import BaseTaskFlow
|
9
9
|
from .master_planner import MasterPlannerFlow
|
10
|
-
from .task_executor_v1 import TaskExecutorFlowV1
|
11
|
-
from .task_executor_v2 import TaskExecutorFlowV2
|
12
10
|
from .task_executor_v3 import TaskExecutorFlowV3
|
13
11
|
|
14
12
|
__all__ = [
|
15
13
|
"BaseTaskFlow",
|
16
14
|
"MasterPlannerFlow",
|
17
|
-
"TaskExecutorFlowV1",
|
18
|
-
"TaskExecutorFlowV2",
|
19
15
|
"TaskExecutorFlowV3",
|
20
16
|
]
|
jupyter_agent/bot_flows/base.py
CHANGED
@@ -13,9 +13,11 @@ from enum import Enum
|
|
13
13
|
from typing import List, Dict, Optional, Type
|
14
14
|
from IPython.display import Markdown
|
15
15
|
from ..bot_agents.base import BaseAgent
|
16
|
+
from ..bot_evaluators.dummy_global import DummyGlobalEvaluator
|
17
|
+
from ..bot_evaluators.flow_task_executor import FlowTaskExecEvaluator
|
16
18
|
from ..bot_outputs import _D, _I, _W, _E, _F, _M, _B
|
17
19
|
from ..bot_outputs import set_stage, flush_output, output_evaluation
|
18
|
-
from ..
|
20
|
+
from ..bot_evaluation import FlowEvaluationRecord, StageEvaluationRecord, NotebookEvaluationRecord
|
19
21
|
|
20
22
|
TASK_AGENT_STATE_ERROR = "_AGENT_STATE_ERROR_32534526_"
|
21
23
|
TASK_STAGE_START = "start"
|
@@ -52,10 +54,13 @@ class BaseTaskFlow:
|
|
52
54
|
STAGE_TRANSITIONS: List[StageTransition] = []
|
53
55
|
START_STAGE = TASK_STAGE_START
|
54
56
|
STOP_STAGES = [TASK_STAGE_COMPLETED, TASK_STAGE_GLOBAL_FINISHED]
|
57
|
+
FLOW_EVALUATOR = FlowTaskExecEvaluator
|
58
|
+
GLOBAL_EVALUATOR = DummyGlobalEvaluator
|
55
59
|
|
56
|
-
def __init__(self, notebook_context, agent_factory):
|
60
|
+
def __init__(self, notebook_context, agent_factory, evaluator_factory=None):
|
57
61
|
self.notebook_context = notebook_context
|
58
62
|
self.agent_factory = agent_factory
|
63
|
+
self.evaluator_factory = evaluator_factory
|
59
64
|
self.stage_transitions = {}
|
60
65
|
self.prepare_stage_transitions()
|
61
66
|
|
@@ -175,15 +180,42 @@ class BaseTaskFlow:
|
|
175
180
|
stage_duration = time.time() - stage_st
|
176
181
|
flow_duration += stage_duration
|
177
182
|
_M(f"Stage `{stage}` completed in {stage_duration:.2f} seconds with state `{state}` and failed `{failed}`")
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
183
|
+
if (
|
184
|
+
self.evaluator_factory is not None
|
185
|
+
and not failed
|
186
|
+
and hasattr(agent, "EVALUATORS")
|
187
|
+
and state in agent.EVALUATORS
|
188
|
+
):
|
189
|
+
# If the agent has evaluators, run them
|
190
|
+
evaluator = self.evaluator_factory(agent.EVALUATORS[state])
|
191
|
+
try:
|
192
|
+
_M(f"**Evaluating** stage `{stage}` with evaluator `{type(evaluator).__name__}` ...")
|
193
|
+
evaluation_result = evaluator()
|
194
|
+
evaluation_result.timestamp = evaluation_result.timestamp or time.time()
|
195
|
+
evaluation_result.evaluator = evaluation_result.evaluator or type(evaluator).__name__
|
196
|
+
evaluation_result.cell_index = self.task.cell_idx
|
197
|
+
evaluation_result.flow = type(self).__name__
|
198
|
+
evaluation_result.stage = str(stage)
|
199
|
+
evaluation_result.agent = type(agent).__name__
|
200
|
+
evaluation_result.execution_duration = stage_duration
|
201
|
+
evaluation_result.is_success = not failed
|
202
|
+
output_evaluation(evaluation_result)
|
203
|
+
except Exception as e:
|
204
|
+
_M(f"**Error** during task evaluation stage `{stage}`: `{type(e)}`: `{e}`")
|
205
|
+
_M(f"```python\n{traceback.format_exc()}\n```")
|
206
|
+
else:
|
207
|
+
output_evaluation(
|
208
|
+
StageEvaluationRecord(
|
209
|
+
timestamp=time.time(),
|
210
|
+
evaluator="default",
|
211
|
+
cell_index=self.task.cell_idx,
|
212
|
+
flow=type(self).__name__,
|
213
|
+
stage=str(stage),
|
214
|
+
agent=type(agent).__name__,
|
215
|
+
execution_duration=stage_duration,
|
216
|
+
is_success=not failed,
|
217
|
+
)
|
185
218
|
)
|
186
|
-
)
|
187
219
|
|
188
220
|
if state != TASK_AGENT_STATE_ERROR:
|
189
221
|
# Agent did not fail, check if we have reached the final stage
|
@@ -232,17 +264,50 @@ class BaseTaskFlow:
|
|
232
264
|
stage_name = stage.value if isinstance(stage, Enum) else stage
|
233
265
|
if stage_name == TASK_STAGE_GLOBAL_FINISHED:
|
234
266
|
_M("Task execution **finished** globally.")
|
235
|
-
|
267
|
+
if self.evaluator_factory is not None and hasattr(self, "GLOBAL_EVALUATOR") and self.GLOBAL_EVALUATOR:
|
268
|
+
evaluator = self.evaluator_factory(self.GLOBAL_EVALUATOR)
|
269
|
+
_M(f"**Evaluating** notebook with evaluator `{type(evaluator).__name__}` ...")
|
270
|
+
evaluation_result = evaluator()
|
271
|
+
evaluation_result.timestamp = evaluation_result.timestamp or time.time()
|
272
|
+
evaluation_result.evaluator = evaluation_result.evaluator or type(evaluator).__name__
|
273
|
+
evaluation_result.cell_index = self.task.cell_idx
|
274
|
+
evaluation_result.is_success = True
|
275
|
+
output_evaluation(evaluation_result)
|
276
|
+
else:
|
277
|
+
output_evaluation(
|
278
|
+
NotebookEvaluationRecord(
|
279
|
+
timestamp=time.time(),
|
280
|
+
evaluator="default",
|
281
|
+
cell_index=self.task.cell_idx,
|
282
|
+
is_success=True,
|
283
|
+
)
|
284
|
+
)
|
236
285
|
elif stage_name == TASK_STAGE_COMPLETED:
|
237
286
|
_M(f"Task execution **completed** in {flow_duration:.2f} seconds with {stage_count} stages.")
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
287
|
+
if self.evaluator_factory is not None and hasattr(self, "FLOW_EVALUATOR") and self.FLOW_EVALUATOR:
|
288
|
+
evaluator = self.evaluator_factory(self.FLOW_EVALUATOR)
|
289
|
+
_M(f"**Evaluating** flow `{type(self).__name__}` with evaluator `{type(evaluator).__name__}` ...")
|
290
|
+
evaluation_result = evaluator()
|
291
|
+
evaluation_result.timestamp = evaluation_result.timestamp or time.time()
|
292
|
+
evaluation_result.evaluator = evaluation_result.evaluator or type(evaluator).__name__
|
293
|
+
evaluation_result.cell_index = self.task.cell_idx
|
294
|
+
evaluation_result.flow = type(self).__name__
|
295
|
+
evaluation_result.stage_count = stage_count
|
296
|
+
evaluation_result.execution_duration = flow_duration
|
297
|
+
evaluation_result.is_success = True
|
298
|
+
output_evaluation(evaluation_result)
|
299
|
+
else:
|
300
|
+
# If no evaluator, just output the evaluation record
|
301
|
+
output_evaluation(
|
302
|
+
FlowEvaluationRecord(
|
303
|
+
timestamp=time.time(),
|
304
|
+
evaluator="default",
|
305
|
+
cell_index=self.task.cell_idx,
|
306
|
+
flow=type(self).__name__,
|
307
|
+
stage_count=stage_count,
|
308
|
+
execution_duration=flow_duration,
|
309
|
+
is_success=True,
|
310
|
+
)
|
245
311
|
)
|
246
|
-
)
|
247
312
|
flush_output()
|
248
313
|
return stage
|
@@ -6,12 +6,16 @@ https://opensource.org/licenses/MIT
|
|
6
6
|
"""
|
7
7
|
|
8
8
|
from .base import BaseTaskFlow, StageTransition, TASK_STAGE_START, TASK_STAGE_COMPLETED
|
9
|
+
from ..bot_evaluators.flow_global_planning import FlowGlobalPlanningEvaluator
|
10
|
+
from ..bot_agents.master_planner import MasterPlannerAgent
|
11
|
+
from ..bot_agents.output_task_result import OutputTaskResult
|
9
12
|
|
10
13
|
|
11
14
|
class MasterPlannerFlow(BaseTaskFlow):
|
12
15
|
|
13
16
|
STAGE_TRANSITIONS = [
|
14
|
-
StageTransition(stage=TASK_STAGE_START, agent=
|
15
|
-
StageTransition(stage=TASK_STAGE_COMPLETED, agent=
|
17
|
+
StageTransition(stage=TASK_STAGE_START, agent=MasterPlannerAgent, next_stage=TASK_STAGE_COMPLETED),
|
18
|
+
StageTransition(stage=TASK_STAGE_COMPLETED, agent=OutputTaskResult, next_stage=TASK_STAGE_COMPLETED),
|
16
19
|
]
|
17
20
|
STOP_STAGES = [TASK_STAGE_COMPLETED]
|
21
|
+
FLOW_EVALUATOR = FlowGlobalPlanningEvaluator
|
@@ -14,21 +14,21 @@ from .base import (
|
|
14
14
|
TASK_STAGE_COMPLETED,
|
15
15
|
TASK_STAGE_GLOBAL_FINISHED,
|
16
16
|
)
|
17
|
-
from ..bot_agents import
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
)
|
26
|
-
from ..bot_agents.task_planner_v3 import TaskPlannerState
|
17
|
+
from ..bot_agents.task_planner_v3 import TaskPlannerAgentV3, TaskPlannerState
|
18
|
+
from ..bot_agents.task_coder import TaskCodingAgent
|
19
|
+
from ..bot_agents.task_debuger import CodeDebugerAgent
|
20
|
+
from ..bot_agents.task_code_executor import CodeExecutor
|
21
|
+
from ..bot_agents.task_structrue_summarier import TaskStructureSummaryAgent, TaskStructureSummaryState
|
22
|
+
from ..bot_agents.task_structrue_reasoner import TaskStructureReasoningAgent, TaskStructureReasonState
|
23
|
+
from ..bot_agents.output_task_result import OutputTaskResult
|
24
|
+
from ..bot_agents.request_user_supply import RequestAboveUserSupplyAgent, RequestBelowUserSupplyAgent
|
27
25
|
|
28
26
|
|
29
27
|
class TaskStage(str, Enum):
|
30
28
|
PLANNING = "planning"
|
31
29
|
PLANNING_PAUSED = "planning_paused"
|
30
|
+
REQUEST_INFO_ABOVE = "request_info_above"
|
31
|
+
REQUEST_INFO_BELOW = "request_info_below"
|
32
32
|
CODING = "coding"
|
33
33
|
EXECUTING = "executing"
|
34
34
|
DEBUGGING = "debugging"
|
@@ -50,10 +50,13 @@ class TaskExecutorFlowV3(BaseTaskFlow):
|
|
50
50
|
states={
|
51
51
|
TaskPlannerState.CODING_PLANNED: TaskStage.CODING,
|
52
52
|
TaskPlannerState.REASONING_PLANNED: TaskStage.REASONING,
|
53
|
-
TaskPlannerState.REQUEST_INFO: TaskStage.
|
53
|
+
TaskPlannerState.REQUEST_INFO: TaskStage.REQUEST_INFO_ABOVE,
|
54
54
|
TaskPlannerState.GLOBAL_FINISHED: TaskStage.GLOBAL_FINISHED,
|
55
55
|
},
|
56
56
|
),
|
57
|
+
StageTransition[TaskStage, None](
|
58
|
+
stage=TaskStage.REQUEST_INFO_ABOVE, agent=RequestAboveUserSupplyAgent, next_stage=TaskStage.PLANNING_PAUSED
|
59
|
+
),
|
57
60
|
StageTransition[TaskStage, TaskPlannerState](
|
58
61
|
stage=TaskStage.PLANNING_PAUSED,
|
59
62
|
agent=TaskPlannerAgentV3,
|
@@ -75,17 +78,28 @@ class TaskExecutorFlowV3(BaseTaskFlow):
|
|
75
78
|
StageTransition[TaskStage, None](
|
76
79
|
stage=TaskStage.DEBUGGING, agent=CodeDebugerAgent, next_stage=TaskStage.EXECUTING
|
77
80
|
),
|
78
|
-
StageTransition[TaskStage,
|
79
|
-
stage=TaskStage.REASONING,
|
81
|
+
StageTransition[TaskStage, TaskStructureReasonState](
|
82
|
+
stage=TaskStage.REASONING,
|
83
|
+
agent=TaskStructureReasoningAgent,
|
84
|
+
states={
|
85
|
+
TaskStructureReasonState.DONE: TaskStage.COMPLETED,
|
86
|
+
TaskStructureReasonState.REQUEST_INFO: TaskStage.REQUEST_INFO_BELOW,
|
87
|
+
},
|
80
88
|
),
|
81
|
-
StageTransition[TaskStage,
|
89
|
+
StageTransition[TaskStage, TaskStructureSummaryState](
|
82
90
|
stage=TaskStage.SUMMARY,
|
83
91
|
agent=TaskStructureSummaryAgent,
|
84
|
-
|
85
|
-
|
86
|
-
|
92
|
+
states={
|
93
|
+
TaskStructureSummaryState.DONE: {
|
94
|
+
TaskAction.DEFAULT: StageNext(stage=TaskStage.COMPLETED),
|
95
|
+
TaskAction.STOP: StageNext(stage=TaskStage.EXECUTING),
|
96
|
+
},
|
97
|
+
TaskStructureSummaryState.REQUEST_INFO: TaskStage.REQUEST_INFO_BELOW,
|
87
98
|
},
|
88
99
|
),
|
100
|
+
StageTransition[TaskStage, None](
|
101
|
+
stage=TaskStage.REQUEST_INFO_BELOW, agent=RequestBelowUserSupplyAgent, next_stage=TaskStage.COMPLETED
|
102
|
+
),
|
89
103
|
StageTransition[TaskStage, bool](
|
90
104
|
stage=TaskStage.COMPLETED,
|
91
105
|
agent=CodeExecutor,
|
jupyter_agent/bot_magics.py
CHANGED
@@ -15,11 +15,14 @@ from IPython.display import Markdown
|
|
15
15
|
from IPython.core.magic import Magics, magics_class, cell_magic
|
16
16
|
from traitlets import Unicode, Int, Bool
|
17
17
|
from traitlets.config.configurable import Configurable
|
18
|
-
from .bot_contexts import NotebookContext
|
19
|
-
from .bot_agents import AgentFactory
|
20
|
-
from .bot_agents.
|
21
|
-
from .
|
22
|
-
from .
|
18
|
+
from .bot_contexts import NotebookContext
|
19
|
+
from .bot_agents.base import AgentModelType, AgentFactory
|
20
|
+
from .bot_agents.request_user_supply import RequestUserSupplyAgent
|
21
|
+
from .bot_evaluators.base import EvaluatorFactory
|
22
|
+
from .bot_flows import MasterPlannerFlow, TaskExecutorFlowV3
|
23
|
+
from .bot_outputs import _D, _I, _W, _E, _F, _M, _B, _O, reset_output, set_logging_level, flush_output
|
24
|
+
from .bot_actions import close_action_dispatcher
|
25
|
+
from .utils import get_env_capbilities
|
23
26
|
|
24
27
|
|
25
28
|
@magics_class
|
@@ -44,6 +47,11 @@ class BotMagics(Magics, Configurable):
|
|
44
47
|
display_think = Bool(True, help="Display chatthink response").tag(config=True)
|
45
48
|
display_response = Bool(False, help="Display chat full response").tag(config=True)
|
46
49
|
support_save_meta = Bool(False, help="Support save metadata to cell").tag(config=True)
|
50
|
+
support_user_confirm = Bool(False, help="Support user confirm").tag(config=True)
|
51
|
+
support_user_supply_info = Bool(False, help="Support user supply info").tag(config=True)
|
52
|
+
support_set_cell_content = Bool(False, help="Support set cell content").tag(config=True)
|
53
|
+
enable_evaluating = Bool(False, help="Enable evaluating task").tag(config=True)
|
54
|
+
enable_supply_mocking = Bool(False, help="Enable supply mocking").tag(config=True)
|
47
55
|
notebook_path = Unicode(None, allow_none=True, help="Path to Notebook file").tag(config=True)
|
48
56
|
default_task_flow = Unicode("v3", allow_none=True, help="Default task flow").tag(config=True)
|
49
57
|
default_max_tries = Int(3, help="Default max tries for task execution").tag(config=True)
|
@@ -73,27 +81,16 @@ class BotMagics(Magics, Configurable):
|
|
73
81
|
help="Run without confirm",
|
74
82
|
)
|
75
83
|
options, _ = parser.parse_known_args(shlex.split(line.strip()))
|
76
|
-
|
77
84
|
return options
|
78
85
|
|
79
|
-
def ensure_notebook_path(self):
|
80
|
-
if self.notebook_path:
|
81
|
-
return self.notebook_path
|
82
|
-
result = self.shell and self.shell.run_cell("globals().get('__vsc_ipynb_file__')")
|
83
|
-
if result and result.success and result.result:
|
84
|
-
self.notebook_path = result.result
|
85
|
-
return self.notebook_path
|
86
|
-
try:
|
87
|
-
self.notebook_path = str(ipynbname.path())
|
88
|
-
return self.notebook_path
|
89
|
-
except Exception as e:
|
90
|
-
_F(f"Failed to get notebook path: {e}")
|
91
|
-
return None
|
92
|
-
|
93
86
|
@cell_magic
|
94
87
|
def bot(self, line, cell):
|
95
88
|
"""Jupyter cell magic: %%bot"""
|
96
89
|
try:
|
90
|
+
reset_output(stage="Logging", logging_level=self.logging_level)
|
91
|
+
_I("Cell magic %%bot executing ...")
|
92
|
+
_D(f"Cell magic called with line: {line}")
|
93
|
+
_D(f"Cell magic called with cell: {repr(cell)[:50]} ...")
|
97
94
|
if not self.ensure_notebook_path():
|
98
95
|
_O(
|
99
96
|
Markdown(
|
@@ -103,11 +100,6 @@ class BotMagics(Magics, Configurable):
|
|
103
100
|
)
|
104
101
|
)
|
105
102
|
return
|
106
|
-
AgentCellContext.SUPPORT_SAVE_META = self.support_save_meta
|
107
|
-
reset_output(stage="Logging", logging_level=self.logging_level)
|
108
|
-
_I("Cell magic %%bot executing ...")
|
109
|
-
_D(f"Cell magic called with line: {line}")
|
110
|
-
_D(f"Cell magic called with cell: {repr(cell)[:50]} ...")
|
111
103
|
if not cell.strip():
|
112
104
|
_O(
|
113
105
|
Markdown(
|
@@ -120,51 +112,23 @@ class BotMagics(Magics, Configurable):
|
|
120
112
|
"%%bot {}\n\n# {}".format(line.strip(), time.strftime("%Y-%m-%d %H:%M:%S")), replace=True
|
121
113
|
)
|
122
114
|
return
|
115
|
+
get_env_capbilities().save_metadata = self.support_save_meta
|
116
|
+
get_env_capbilities().user_confirm = self.support_user_confirm
|
117
|
+
get_env_capbilities().user_supply_info = self.support_user_supply_info
|
118
|
+
get_env_capbilities().set_cell_content = self.support_set_cell_content
|
119
|
+
RequestUserSupplyAgent.MOCK_USER_SUPPLY = self.enable_supply_mocking
|
123
120
|
options = self.parse_args(line)
|
124
|
-
_D(f"Cell magic called with options: {options}")
|
125
121
|
set_logging_level(options.logging_level)
|
122
|
+
_D(f"Cell magic called with options: {options}")
|
126
123
|
nb_context = NotebookContext(line, cell, notebook_path=self.notebook_path)
|
127
|
-
agent_factory =
|
128
|
-
|
129
|
-
display_think=self.display_think,
|
130
|
-
display_message=self.display_message,
|
131
|
-
display_response=self.display_response,
|
132
|
-
)
|
133
|
-
agent_factory.config_model(
|
134
|
-
AgentModelType.DEFAULT,
|
135
|
-
self.default_api_url,
|
136
|
-
self.default_api_key,
|
137
|
-
self.default_model_name,
|
138
|
-
)
|
139
|
-
agent_factory.config_model(
|
140
|
-
AgentModelType.PLANNER,
|
141
|
-
self.planner_api_url,
|
142
|
-
self.planner_api_key,
|
143
|
-
self.planner_model_name,
|
144
|
-
)
|
145
|
-
agent_factory.config_model(
|
146
|
-
AgentModelType.CODING,
|
147
|
-
self.coding_api_url,
|
148
|
-
self.coding_api_key,
|
149
|
-
self.coding_model_name,
|
150
|
-
)
|
151
|
-
agent_factory.config_model(
|
152
|
-
AgentModelType.REASONING,
|
153
|
-
self.reasoning_api_url,
|
154
|
-
self.reasoning_api_key,
|
155
|
-
self.reasoning_model_name,
|
156
|
-
)
|
124
|
+
agent_factory = self.get_agent_factory(nb_context)
|
125
|
+
evaluator_factory = self.get_evaluator_factory(nb_context)
|
157
126
|
if options.planning:
|
158
|
-
flow = MasterPlannerFlow(nb_context, agent_factory)
|
127
|
+
flow = MasterPlannerFlow(nb_context, agent_factory, evaluator_factory)
|
128
|
+
elif options.flow == "v3":
|
129
|
+
flow = TaskExecutorFlowV3(nb_context, agent_factory, evaluator_factory)
|
159
130
|
else:
|
160
|
-
|
161
|
-
flow = TaskExecutorFlowV1(nb_context, agent_factory)
|
162
|
-
elif options.flow == "v2":
|
163
|
-
flow = TaskExecutorFlowV2(nb_context, agent_factory)
|
164
|
-
elif options.flow == "v3":
|
165
|
-
flow = TaskExecutorFlowV3(nb_context, agent_factory)
|
166
|
-
else:
|
167
|
-
raise ValueError(f"Unknown flow: {options.flow}")
|
131
|
+
raise ValueError(f"Unknown flow: {options.flow}")
|
168
132
|
flow(
|
169
133
|
options.stage,
|
170
134
|
options.max_tries,
|
@@ -173,6 +137,65 @@ class BotMagics(Magics, Configurable):
|
|
173
137
|
)
|
174
138
|
except Exception as e:
|
175
139
|
traceback.print_exc()
|
140
|
+
finally:
|
141
|
+
close_action_dispatcher()
|
142
|
+
flush_output()
|
143
|
+
|
144
|
+
def ensure_notebook_path(self):
|
145
|
+
if self.notebook_path:
|
146
|
+
return self.notebook_path
|
147
|
+
result = self.shell and self.shell.run_cell(
|
148
|
+
"globals().get('__vsc_ipynb_file__') or globals().get('__evaluation_ipynb_file__')"
|
149
|
+
)
|
150
|
+
if result and result.success and result.result:
|
151
|
+
self.notebook_path = result.result
|
152
|
+
return self.notebook_path
|
153
|
+
try:
|
154
|
+
self.notebook_path = str(ipynbname.path())
|
155
|
+
return self.notebook_path
|
156
|
+
except Exception as e:
|
157
|
+
_F(f"Failed to get notebook path: {e}")
|
158
|
+
return None
|
159
|
+
|
160
|
+
def get_agent_factory(self, nb_context):
|
161
|
+
agent_factory = AgentFactory(
|
162
|
+
nb_context,
|
163
|
+
display_think=self.display_think,
|
164
|
+
display_message=self.display_message,
|
165
|
+
display_response=self.display_response,
|
166
|
+
)
|
167
|
+
agent_factory.config_model(
|
168
|
+
AgentModelType.DEFAULT, self.default_api_url, self.default_api_key, self.default_model_name
|
169
|
+
)
|
170
|
+
agent_factory.config_model(
|
171
|
+
AgentModelType.PLANNER, self.planner_api_url, self.planner_api_key, self.planner_model_name
|
172
|
+
)
|
173
|
+
agent_factory.config_model(
|
174
|
+
AgentModelType.CODING, self.coding_api_url, self.coding_api_key, self.coding_model_name
|
175
|
+
)
|
176
|
+
agent_factory.config_model(
|
177
|
+
AgentModelType.REASONING, self.reasoning_api_url, self.reasoning_api_key, self.reasoning_model_name
|
178
|
+
)
|
179
|
+
return agent_factory
|
180
|
+
|
181
|
+
def get_evaluator_factory(self, nb_context):
|
182
|
+
if self.enable_evaluating:
|
183
|
+
evaluator_factory = EvaluatorFactory(nb_context)
|
184
|
+
evaluator_factory.config_model(
|
185
|
+
AgentModelType.DEFAULT, self.default_api_url, self.default_api_key, self.default_model_name
|
186
|
+
)
|
187
|
+
evaluator_factory.config_model(
|
188
|
+
AgentModelType.PLANNER, self.planner_api_url, self.planner_api_key, self.planner_model_name
|
189
|
+
)
|
190
|
+
evaluator_factory.config_model(
|
191
|
+
AgentModelType.CODING, self.coding_api_url, self.coding_api_key, self.coding_model_name
|
192
|
+
)
|
193
|
+
evaluator_factory.config_model(
|
194
|
+
AgentModelType.REASONING, self.reasoning_api_url, self.reasoning_api_key, self.reasoning_model_name
|
195
|
+
)
|
196
|
+
else:
|
197
|
+
evaluator_factory = None
|
198
|
+
return evaluator_factory
|
176
199
|
|
177
200
|
|
178
201
|
def load_ipython_extension(ipython):
|