jupyter-agent 2025.6.103__py3-none-any.whl → 2025.6.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,206 @@
1
+ """
2
+ Copyright (c) 2025 viewstar000
3
+
4
+ This software is released under the MIT License.
5
+ https://opensource.org/licenses/MIT
6
+ """
7
+
8
+ import os
9
+ import time
10
+ import json
11
+ import argparse
12
+ import nbformat
13
+
14
+ from pathlib import Path
15
+ from nbclient.client import NotebookClient
16
+ from .bot_outputs import NotebookEvalutionRecord
17
+
18
+
19
+ def run_notebook(
20
+ input_path: str | Path,
21
+ output_path: str | Path = "",
22
+ inplace: bool = False,
23
+ timeout: int = -1,
24
+ startup_timeout: int = 60,
25
+ allow_errors: bool = False,
26
+ kernel_name: str = "",
27
+ skip_cells_with_tag: str = "skip-execution",
28
+ evaluation_path: str | Path = "",
29
+ ) -> None:
30
+ """Run a notebook by path."""
31
+ input_path = Path(input_path).with_suffix(".ipynb")
32
+
33
+ assert not (inplace and output_path), "Cannot specify both 'inplace' and 'output_path'"
34
+ if inplace:
35
+ output_path = input_path
36
+ elif not output_path:
37
+ output_path = input_path.parent.joinpath(f"{input_path.with_suffix('').name}_executed.ipynb")
38
+ output_path = os.path.abspath(output_path)
39
+
40
+ # Open up the notebook we're going to run
41
+ with input_path.open() as f:
42
+ print("Running notebook:", input_path)
43
+ nb = nbformat.read(f, as_version=4)
44
+
45
+ start_time = 0
46
+ is_global_finished = False
47
+
48
+ def save_notebook(**kwargs):
49
+ """Save the executed notebook to the specified output path."""
50
+ nonlocal is_global_finished
51
+
52
+ if kwargs:
53
+ cell_idx = kwargs.get("cell_index", 0)
54
+ cell_type = kwargs.get("cell", {}).get("cell_type")
55
+ cell_id = kwargs.get("cell", {}).get("id")
56
+ cell_exec_count = kwargs.get("cell", {}).get("execution_count")
57
+ cell_meta = kwargs.get("cell", {}).get("metadata", {})
58
+ cell_payloads = kwargs.get("execute_reply", {}).get("content", {}).get("payload", [])
59
+ cell_outputs = kwargs.get("cell", {}).get("outputs", [])
60
+ for payload in cell_payloads:
61
+ if payload.get("source") == "set_next_input" and payload.get("replace") is True:
62
+ print(f"CELL[{cell_idx}] Replacing cell with set_next_input payload")
63
+ nb.cells[cell_idx].source = payload.get("text", "")
64
+ cell_agent_data_timestamp = cell_meta.get("jupyter-agent-data-timestamp", 0)
65
+ output_agent_data_timestamp = cell_agent_data_timestamp
66
+ for output in cell_outputs:
67
+ if output["output_type"] == "display_data":
68
+ output_meta = output.get("metadata", {})
69
+ if (
70
+ output_meta.get("jupyter-agent-data-store")
71
+ and output_meta.get("jupyter-agent-data-timestamp", 0) > output_agent_data_timestamp
72
+ and output_meta.get("jupyter-agent-data", {})
73
+ ):
74
+ print(f"CELL[{cell_idx}] Found jupyter-agent-data-store outputs, save it to cell metadata")
75
+ output_agent_data_timestamp = output_meta.get("jupyter-agent-data-timestamp", 0)
76
+ nb.cells[cell_idx].metadata["jupyter-agent-data-store"] = True
77
+ nb.cells[cell_idx].metadata["jupyter-agent-data-timestamp"] = output_agent_data_timestamp
78
+ if "jupyter-agent-data" not in nb.cells[cell_idx].metadata:
79
+ nb.cells[cell_idx].metadata["jupyter-agent-data"] = {}
80
+ nb.cells[cell_idx].metadata["jupyter-agent-data"].update(output_meta["jupyter-agent-data"])
81
+ for record in output_meta.get("jupyter-agent-evaluation-records", []):
82
+ record["notebook_name"] = output_path
83
+ if record["eval_type"] == "NOTEBOOK":
84
+ record["execution_duration"] = time.time() - start_time
85
+ is_global_finished = True
86
+ del nb.cells[cell_idx + 1 :] # Remove all cells after the notebook cell
87
+ print(
88
+ f"CELL[{cell_idx}] Evaluating record: {record['eval_type']} "
89
+ f"duration: {record['execution_duration']:.2f}s "
90
+ f"success: {record['is_success']} "
91
+ f"correct: {record['correct_score']:.2f}"
92
+ )
93
+ if evaluation_path:
94
+ with open(evaluation_path, "a") as eval_file:
95
+ eval_file.write(json.dumps(record) + "\n")
96
+ print(f"CELL[{cell_idx}] Saving executed {cell_type} cell - {cell_id}: {cell_exec_count}")
97
+ else:
98
+ print(f"Saving executed notebook to: {output_path}")
99
+ nbformat.write(nb, output_path)
100
+
101
+ # Add metadata to the notebook
102
+ nb.cells.insert(
103
+ 0,
104
+ nbformat.v4.new_code_cell(
105
+ source=(
106
+ f"# Executed notebook: {input_path.name}\n"
107
+ f"# Output saved to: {output_path}\n\n"
108
+ f"__evaluation_ipynb_file__ = '{output_path}'\n"
109
+ ),
110
+ metadata={"tags": ["CTX_EXCLUDE"]},
111
+ ),
112
+ )
113
+ save_notebook()
114
+
115
+ # Configure nbclient to run the notebook
116
+ client = NotebookClient(
117
+ nb,
118
+ timeout=timeout,
119
+ startup_timeout=startup_timeout,
120
+ skip_cells_with_tag=skip_cells_with_tag,
121
+ allow_errors=allow_errors,
122
+ kernel_name=kernel_name,
123
+ resources={"metadata": {"path": input_path.parent.absolute()}},
124
+ on_cell_executed=save_notebook,
125
+ )
126
+
127
+ # Run it
128
+ print("Executing notebook...")
129
+ start_time = time.time()
130
+ client.execute()
131
+ save_notebook()
132
+ print("Notebook execution completed.")
133
+
134
+ # If the notebook did not finish globally, append an evaluation record
135
+ if not is_global_finished:
136
+ print("Notebook execution did not finish globally, appending evaluation records.")
137
+ record = NotebookEvalutionRecord(
138
+ notebook_name=output_path,
139
+ eval_type="NOTEBOOK",
140
+ execution_duration=time.time() - start_time,
141
+ is_success=False,
142
+ correct_score=0.0,
143
+ )
144
+ print(
145
+ f"Global evaluation record: {record.eval_type} "
146
+ f"duration: {record.execution_duration:.2f}s "
147
+ f"success: {record.is_success} "
148
+ f"correct: {record.correct_score:.2f}"
149
+ )
150
+ if evaluation_path:
151
+ with open(evaluation_path, "a") as eval_file:
152
+ eval_file.write(json.dumps(record.model_dump()) + "\n")
153
+
154
+
155
+ def main():
156
+ """Main function to run the notebook execution."""
157
+ parser = argparse.ArgumentParser(description="Run a Jupyter notebook.")
158
+ parser.add_argument(
159
+ "-o", "--output_path", type=str, default="", help="Path to save the executed notebook (default: same as input)"
160
+ )
161
+ parser.add_argument(
162
+ "-i", "--inplace", action="store_true", help="Run the notebook in place (overwrite input file)"
163
+ )
164
+ parser.add_argument(
165
+ "-e",
166
+ "--evaluation_path",
167
+ type=str,
168
+ default="",
169
+ help="Path to save evaluation records (default: no evaluation records saved)",
170
+ )
171
+ parser.add_argument(
172
+ "--timeout", type=int, default=-1, help="Execution timeout in seconds (default: -1, no timeout)"
173
+ )
174
+ parser.add_argument(
175
+ "--startup_timeout", type=int, default=60, help="Kernel startup timeout in seconds (default: 60)"
176
+ )
177
+ parser.add_argument(
178
+ "--allow_errors", action="store_true", help="Allow errors in the notebook execution (default: False)"
179
+ )
180
+ parser.add_argument(
181
+ "--kernel_name", type=str, default="", help="Kernel name to use for execution (default: use notebook's kernel)"
182
+ )
183
+ parser.add_argument(
184
+ "--skip_cells_with_tag",
185
+ type=str,
186
+ default="skip-execution",
187
+ help="Tag to skip cells with (default: 'skip-execution')",
188
+ )
189
+ parser.add_argument("input_path", type=str, help="Path to the input notebook file")
190
+ args = parser.parse_args()
191
+
192
+ run_notebook(
193
+ input_path=args.input_path,
194
+ output_path=args.output_path,
195
+ inplace=args.inplace,
196
+ timeout=args.timeout,
197
+ startup_timeout=args.startup_timeout,
198
+ allow_errors=args.allow_errors,
199
+ kernel_name=args.kernel_name,
200
+ skip_cells_with_tag=args.skip_cells_with_tag,
201
+ evaluation_path=args.evaluation_path,
202
+ )
203
+
204
+
205
+ if __name__ == "__main__":
206
+ main()
@@ -5,6 +5,7 @@ This software is released under the MIT License.
5
5
  https://opensource.org/licenses/MIT
6
6
  """
7
7
 
8
+ import time
8
9
  import traceback
9
10
 
10
11
  from pydantic import BaseModel
@@ -12,11 +13,14 @@ from enum import Enum
12
13
  from typing import List, Dict, Optional, Type
13
14
  from IPython.display import Markdown
14
15
  from ..bot_agents.base import BaseAgent
15
- from ..bot_outputs import _D, _I, _W, _E, _F, _M, _B, set_stage, flush_output
16
+ from ..bot_outputs import _D, _I, _W, _E, _F, _M, _B
17
+ from ..bot_outputs import set_stage, flush_output, output_evaluation
18
+ from ..bot_outputs import FlowEvalutionRecord, StageEvalutionRecord, NotebookEvalutionRecord
16
19
 
17
20
  TASK_AGENT_STATE_ERROR = "_AGENT_STATE_ERROR_32534526_"
18
21
  TASK_STAGE_START = "start"
19
22
  TASK_STAGE_COMPLETED = "completed"
23
+ TASK_STAGE_GLOBAL_FINISHED = "global_finished"
20
24
 
21
25
 
22
26
  class TaskAction(str, Enum):
@@ -47,7 +51,7 @@ class BaseTaskFlow:
47
51
 
48
52
  STAGE_TRANSITIONS: List[StageTransition] = []
49
53
  START_STAGE = TASK_STAGE_START
50
- STOP_STAGES = [TASK_STAGE_COMPLETED]
54
+ STOP_STAGES = [TASK_STAGE_COMPLETED, TASK_STAGE_GLOBAL_FINISHED]
51
55
 
52
56
  def __init__(self, notebook_context, agent_factory):
53
57
  self.notebook_context = notebook_context
@@ -146,11 +150,15 @@ class BaseTaskFlow:
146
150
  ns = self._get_next_stage_trans(stage, state, action)
147
151
  return ns.stage
148
152
 
149
- def __call__(self, stage, max_tries=3, stage_continue=True, stage_confirm=True):
153
+ def __call__(self, stage, max_tries=5, stage_continue=True, stage_confirm=True):
150
154
 
151
155
  n_tries = 0
156
+ flow_duration = 0.0
157
+ stage_count = 0
158
+ # Initialize the task stage
152
159
  stage = stage or self.START_STAGE
153
160
  while n_tries <= max_tries:
161
+ stage_st = time.time()
154
162
  try:
155
163
  stage_name = stage.value if isinstance(stage, Enum) else stage
156
164
  stage_name = stage_name.replace(".", "-").capitalize()
@@ -163,6 +171,19 @@ class BaseTaskFlow:
163
171
  _M(f"```python\n{traceback.format_exc()}\n```")
164
172
  state = TASK_AGENT_STATE_ERROR
165
173
  failed = True
174
+ stage_count += 1
175
+ stage_duration = time.time() - stage_st
176
+ flow_duration += stage_duration
177
+ _M(f"Stage `{stage}` completed in {stage_duration:.2f} seconds with state `{state}` and failed `{failed}`")
178
+ output_evaluation(
179
+ StageEvalutionRecord(
180
+ cell_index=self.task.cell_idx,
181
+ flow=type(self).__name__,
182
+ stage=str(stage),
183
+ execution_duration=stage_duration,
184
+ is_success=not failed,
185
+ )
186
+ )
166
187
 
167
188
  if state != TASK_AGENT_STATE_ERROR:
168
189
  # Agent did not fail, check if we have reached the final stage
@@ -171,14 +192,18 @@ class BaseTaskFlow:
171
192
  self.task.update_cell()
172
193
  if next_stage in self.STOP_STAGES:
173
194
  _M(f"Task execution **Stopped** at stage `{next_stage}`")
195
+ stage = next_stage
174
196
  break
175
197
 
176
198
  if failed:
177
199
  # Agent failed
178
200
  n_tries += 1
201
+ if n_tries > max_tries:
202
+ _M(f"**Max flow tries reached** during task execution stage `{stage}`, **Stop!**")
203
+ break
179
204
 
180
- if failed or stage_confirm:
181
- # Agent failed or we need to confirm
205
+ if stage_confirm:
206
+ # We need to confirm
182
207
  message = self.get_prompt_message(stage, state, failed)
183
208
  _M("**Confirm**: " + message)
184
209
  flush_output()
@@ -188,15 +213,13 @@ class BaseTaskFlow:
188
213
  self.task.update_cell()
189
214
  if action == TaskAction.STOP:
190
215
  _M(f"Task execution **Stopped**, and set next stage to `{next_stage}`")
191
- break
192
- elif n_tries > max_tries:
193
- _M(f"**Max tries reached** during task execution stage `{stage}`, **Stop!**")
216
+ stage = next_stage
194
217
  break
195
218
  else:
196
219
  _M(f"**Action**: `{action}` transits stage to `{next_stage}`")
197
220
  stage = next_stage
198
221
  else:
199
- # Agent succeeded, transit to the next stage without confirmation
222
+ # transit to the next stage without confirmation
200
223
  next_stage = self.get_next_stage(stage, state, TaskAction.CONTINUE)
201
224
  self.task.agent_stage = next_stage
202
225
  self.task.update_cell()
@@ -205,5 +228,21 @@ class BaseTaskFlow:
205
228
 
206
229
  if not stage_continue:
207
230
  break
231
+ # Finalize the task execution
232
+ stage_name = stage.value if isinstance(stage, Enum) else stage
233
+ if stage_name == TASK_STAGE_GLOBAL_FINISHED:
234
+ _M("Task execution **finished** globally.")
235
+ output_evaluation(NotebookEvalutionRecord(cell_index=self.task.cell_idx, is_success=True))
236
+ elif stage_name == TASK_STAGE_COMPLETED:
237
+ _M(f"Task execution **completed** in {flow_duration:.2f} seconds with {stage_count} stages.")
238
+ output_evaluation(
239
+ FlowEvalutionRecord(
240
+ cell_index=self.task.cell_idx,
241
+ flow=type(self).__name__,
242
+ stage_count=stage_count,
243
+ execution_duration=flow_duration,
244
+ is_success=True,
245
+ )
246
+ )
208
247
  flush_output()
209
248
  return stage
@@ -11,6 +11,7 @@ from .base import BaseTaskFlow, StageTransition, TASK_STAGE_START, TASK_STAGE_CO
11
11
  class MasterPlannerFlow(BaseTaskFlow):
12
12
 
13
13
  STAGE_TRANSITIONS = [
14
- StageTransition(stage=TASK_STAGE_START, agent="MasterPlannerAgent", next_stage=TASK_STAGE_START)
14
+ StageTransition(stage=TASK_STAGE_START, agent="MasterPlannerAgent", next_stage=TASK_STAGE_COMPLETED),
15
+ StageTransition(stage=TASK_STAGE_COMPLETED, agent="OutputTaskResult", next_stage=TASK_STAGE_COMPLETED),
15
16
  ]
16
- STOP_STAGES = [TASK_STAGE_START]
17
+ STOP_STAGES = [TASK_STAGE_COMPLETED]
@@ -6,7 +6,14 @@ https://opensource.org/licenses/MIT
6
6
  """
7
7
 
8
8
  from enum import Enum
9
- from .base import BaseTaskFlow, StageTransition, StageNext, TaskAction
9
+ from .base import (
10
+ BaseTaskFlow,
11
+ StageTransition,
12
+ StageNext,
13
+ TaskAction,
14
+ TASK_STAGE_COMPLETED,
15
+ TASK_STAGE_GLOBAL_FINISHED,
16
+ )
10
17
  from ..bot_agents import (
11
18
  TaskPlannerAgentV3,
12
19
  TaskCodingAgent,
@@ -27,14 +34,15 @@ class TaskStage(str, Enum):
27
34
  DEBUGGING = "debugging"
28
35
  REASONING = "reasoning"
29
36
  SUMMARY = "summary"
30
- COMPLETED = "completed"
31
37
  OUTPUT_RESULT = "output_result"
38
+ COMPLETED = TASK_STAGE_COMPLETED
39
+ GLOBAL_FINISHED = TASK_STAGE_GLOBAL_FINISHED
32
40
 
33
41
 
34
42
  class TaskExecutorFlowV3(BaseTaskFlow):
35
43
 
36
44
  START_STAGE = TaskStage.PLANNING
37
- STOP_STAGES = [TaskStage.COMPLETED, TaskStage.PLANNING_PAUSED]
45
+ STOP_STAGES = [TaskStage.COMPLETED, TaskStage.PLANNING_PAUSED, TaskStage.GLOBAL_FINISHED]
38
46
  STAGE_TRANSITIONS = [
39
47
  StageTransition[TaskStage, TaskPlannerState](
40
48
  stage=TaskStage.PLANNING,
@@ -43,7 +51,7 @@ class TaskExecutorFlowV3(BaseTaskFlow):
43
51
  TaskPlannerState.CODING_PLANNED: TaskStage.CODING,
44
52
  TaskPlannerState.REASONING_PLANNED: TaskStage.REASONING,
45
53
  TaskPlannerState.REQUEST_INFO: TaskStage.PLANNING_PAUSED,
46
- TaskPlannerState.GLOBAL_FINISHED: TaskStage.COMPLETED,
54
+ TaskPlannerState.GLOBAL_FINISHED: TaskStage.GLOBAL_FINISHED,
47
55
  },
48
56
  ),
49
57
  StageTransition[TaskStage, TaskPlannerState](
@@ -86,4 +94,7 @@ class TaskExecutorFlowV3(BaseTaskFlow):
86
94
  StageTransition[TaskStage, None](
87
95
  stage=TaskStage.OUTPUT_RESULT, agent=OutputTaskResult, next_stage=TaskStage.COMPLETED
88
96
  ),
97
+ StageTransition[TaskStage, None](
98
+ stage=TaskStage.GLOBAL_FINISHED, agent=OutputTaskResult, next_stage=TaskStage.GLOBAL_FINISHED
99
+ ),
89
100
  ]
@@ -43,9 +43,12 @@ class BotMagics(Magics, Configurable):
43
43
  display_message = Bool(False, help="Display chat message").tag(config=True)
44
44
  display_think = Bool(True, help="Display chatthink response").tag(config=True)
45
45
  display_response = Bool(False, help="Display chat full response").tag(config=True)
46
+ support_save_meta = Bool(False, help="Support save metadata to cell").tag(config=True)
46
47
  notebook_path = Unicode(None, allow_none=True, help="Path to Notebook file").tag(config=True)
47
48
  default_task_flow = Unicode("v3", allow_none=True, help="Default task flow").tag(config=True)
48
- support_save_meta = Bool(False, help="Support save metadata to cell").tag(config=True)
49
+ default_max_tries = Int(3, help="Default max tries for task execution").tag(config=True)
50
+ default_step_mode = Bool(False, help="Default step mode for task execution").tag(config=True)
51
+ default_auto_confirm = Bool(False, help="Default auto confirm for task execution").tag(config=True)
49
52
 
50
53
  def parse_args(self, line):
51
54
  """解析命令行参数"""
@@ -54,9 +57,21 @@ class BotMagics(Magics, Configurable):
54
57
  parser.add_argument("-P", "--planning", action="store_true", default=False, help="Run in planning mode")
55
58
  parser.add_argument("-s", "--stage", type=str, default=None, help="Task stage")
56
59
  parser.add_argument("-f", "--flow", type=str, default=self.default_task_flow, help="Flow name")
57
- parser.add_argument("-m", "--max-tries", type=int, default=3, help="Max tries")
58
- parser.add_argument("-S", "--step-mode", action="store_true", default=False, help="Run in single step mode")
59
- parser.add_argument("-Y", "--auto-confirm", action="store_true", default=False, help="Run without confirm")
60
+ parser.add_argument("-m", "--max-tries", type=int, default=self.default_max_tries, help="Max tries")
61
+ parser.add_argument(
62
+ "-S",
63
+ "--step-mode",
64
+ action="store_true",
65
+ default=self.default_step_mode,
66
+ help="Run in single step mode",
67
+ )
68
+ parser.add_argument(
69
+ "-Y",
70
+ "--auto-confirm",
71
+ action="store_true",
72
+ default=self.default_auto_confirm,
73
+ help="Run without confirm",
74
+ )
60
75
  options, _ = parser.parse_known_args(shlex.split(line.strip()))
61
76
 
62
77
  return options
@@ -116,16 +131,28 @@ class BotMagics(Magics, Configurable):
116
131
  display_response=self.display_response,
117
132
  )
118
133
  agent_factory.config_model(
119
- AgentModelType.DEFAULT, self.default_api_url, self.default_api_key, self.default_model_name
134
+ AgentModelType.DEFAULT,
135
+ self.default_api_url,
136
+ self.default_api_key,
137
+ self.default_model_name,
120
138
  )
121
139
  agent_factory.config_model(
122
- AgentModelType.PLANNER, self.planner_api_url, self.planner_api_key, self.planner_model_name
140
+ AgentModelType.PLANNER,
141
+ self.planner_api_url,
142
+ self.planner_api_key,
143
+ self.planner_model_name,
123
144
  )
124
145
  agent_factory.config_model(
125
- AgentModelType.CODING, self.coding_api_url, self.coding_api_key, self.coding_model_name
146
+ AgentModelType.CODING,
147
+ self.coding_api_url,
148
+ self.coding_api_key,
149
+ self.coding_model_name,
126
150
  )
127
151
  agent_factory.config_model(
128
- AgentModelType.REASONING, self.reasoning_api_url, self.reasoning_api_key, self.reasoning_model_name
152
+ AgentModelType.REASONING,
153
+ self.reasoning_api_url,
154
+ self.reasoning_api_key,
155
+ self.reasoning_model_name,
129
156
  )
130
157
  if options.planning:
131
158
  flow = MasterPlannerFlow(nb_context, agent_factory)
@@ -138,7 +165,12 @@ class BotMagics(Magics, Configurable):
138
165
  flow = TaskExecutorFlowV3(nb_context, agent_factory)
139
166
  else:
140
167
  raise ValueError(f"Unknown flow: {options.flow}")
141
- flow(options.stage, options.max_tries, not options.step_mode, not options.auto_confirm)
168
+ flow(
169
+ options.stage,
170
+ options.max_tries,
171
+ not options.step_mode,
172
+ not options.auto_confirm,
173
+ )
142
174
  except Exception as e:
143
175
  traceback.print_exc()
144
176
 
@@ -11,6 +11,8 @@ import datetime
11
11
  import jinja2
12
12
 
13
13
  from enum import Enum
14
+ from typing import Optional, Dict, Any
15
+ from pydantic import BaseModel, Field
14
16
  from IPython.display import display, Markdown
15
17
  from .utils import no_indent, no_wrap
16
18
 
@@ -171,6 +173,38 @@ LOGGING_LEVELS = {
171
173
  }
172
174
 
173
175
 
176
+ class BaseEvalutionRecord(BaseModel):
177
+ timestamp: float = 0
178
+ notebook_name: str = ""
179
+ eval_type: str = "BASE"
180
+ cell_index: int = -1
181
+ execution_duration: float = 0.0
182
+ is_success: bool = False
183
+ correct_score: float = 0.0
184
+
185
+
186
+ class StageEvalutionRecord(BaseEvalutionRecord):
187
+ eval_type: str = "STAGE"
188
+ flow: str = ""
189
+ stage: str = ""
190
+ coding_score: float = 0.0
191
+ important_score: float = 0.0
192
+ user_supply_score: float = 0.0
193
+
194
+
195
+ class FlowEvalutionRecord(BaseEvalutionRecord):
196
+ eval_type: str = "FLOW"
197
+ flow: str = ""
198
+ stage_count: int = 0
199
+ planning_score: float = 0.0
200
+
201
+
202
+ class NotebookEvalutionRecord(BaseEvalutionRecord):
203
+ eval_type: str = "NOTEBOOK"
204
+ flow_count: int = 0
205
+ planning_score: float = 0.0
206
+
207
+
174
208
  class AgentOutput:
175
209
  """
176
210
  AgentOutput 是一个用于在 Jupyter Notebook 中显示 Agent 输出的类。
@@ -193,6 +227,7 @@ class AgentOutput:
193
227
  self._agent_data_timestamp = None
194
228
  self._agent_data = {}
195
229
  self._logging_records = []
230
+ self._evaluation_records = []
196
231
 
197
232
  @property
198
233
  def content(self):
@@ -226,6 +261,8 @@ class AgentOutput:
226
261
  "jupyter-agent-data": self._agent_data,
227
262
  }
228
263
  )
264
+ if self._evaluation_records:
265
+ metadata["jupyter-agent-evaluation-records"] = [record.model_dump() for record in self._evaluation_records]
229
266
  return metadata
230
267
 
231
268
  def display(self, stage=None, force=False, wait=True):
@@ -320,6 +357,20 @@ class AgentOutput:
320
357
  )
321
358
  self.display(force=False, wait=False)
322
359
 
360
+ def log_evaluation(self, record: BaseEvalutionRecord):
361
+ assert isinstance(
362
+ record, BaseEvalutionRecord
363
+ ), "record must be an instance of BaseEvalutionRecord or its subclass"
364
+ if record.timestamp == 0:
365
+ record.timestamp = time.time()
366
+ self._evaluation_records.append(record)
367
+ self.log(
368
+ f"Evaluation: {record.eval_type}[{record.cell_index}] duration: {record.execution_duration:.2f}s "
369
+ f"success: {record.is_success} correct: {record.correct_score:.2f}",
370
+ level="INFO",
371
+ )
372
+ self.display(force=False, wait=False)
373
+
323
374
 
324
375
  __agent_output = None
325
376
 
@@ -363,6 +414,14 @@ def output_agent_data(**kwargs):
363
414
  get_output().output_agent_data(**kwargs)
364
415
 
365
416
 
417
+ def output_evaluation(record: BaseEvalutionRecord):
418
+ """
419
+ 输出评估记录到 AgentOutput 中。
420
+ :param record: 评估记录对象,必须是 BaseEvalutionRecord 的子类。
421
+ """
422
+ get_output().log_evaluation(record)
423
+
424
+
366
425
  def clear_output(stage=None, clear_metadata=False):
367
426
  get_output().clear(stage, clear_metadata)
368
427
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: jupyter-agent
3
- Version: 2025.6.103
3
+ Version: 2025.6.104
4
4
  Summary: 调用LLM实现Jupyter代码的自动生成、执行、调试等功能
5
5
  Author: viewstar000
6
6
  License: MIT
@@ -10,14 +10,15 @@ Classifier: Operating System :: OS Independent
10
10
  Requires-Python: >=3.12
11
11
  Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
+ Requires-Dist: ipynbname
13
14
  Requires-Dist: ipython
15
+ Requires-Dist: jinja2
16
+ Requires-Dist: nbclient
14
17
  Requires-Dist: nbformat
15
- Requires-Dist: ipynbname
16
18
  Requires-Dist: openai
17
- Requires-Dist: traitlets
18
- Requires-Dist: pyyaml
19
- Requires-Dist: jinja2
20
19
  Requires-Dist: pydantic
20
+ Requires-Dist: pyyaml
21
+ Requires-Dist: traitlets
21
22
  Dynamic: license-file
22
23
 
23
24
  # jupyter-agent
@@ -1,8 +1,9 @@
1
1
  jupyter_agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  jupyter_agent/bot_chat.py,sha256=RkaT-Cpkp3G24F1AtbcgqjKo-LBxwMNAYHVitaPZNOg,8834
3
3
  jupyter_agent/bot_contexts.py,sha256=eFezr7ImokMz33jJJMlLLQspQBYnuE6Bc2bE1wLbXWU,19123
4
- jupyter_agent/bot_magics.py,sha256=whyGu6Jru7ZAIWXjwoq_CfNxqvlDawGTREUHX9XjHFY,7434
5
- jupyter_agent/bot_outputs.py,sha256=7_cbOBaGdoYMR6qu43nZEsB9KY6wp4zA0Dh7Jq7LMTg,14065
4
+ jupyter_agent/bot_evaluation.py,sha256=tTGUxZ5Gf-A_aFj1HZ8_rjwXpF5X4cv_YVUyOybJShc,8462
5
+ jupyter_agent/bot_magics.py,sha256=1W52-HVSixsOR-svFv_iPrllNxdgx2mEHMQ4op0UW1o,8204
6
+ jupyter_agent/bot_outputs.py,sha256=aDowlURSt9AZnlQdwQp5Gmd5fSRzY910B6A0h-gvmF0,15953
6
7
  jupyter_agent/utils.py,sha256=jbvDtVK6MfGaf5ZLoam_sq9R5TMriEG4HbMF0bHHDes,4387
7
8
  jupyter_agent/bot_agents/__init__.py,sha256=IdlBlvfaDUfp3qhkNuwUVV_CdplafZsgLezLVkZCREw,1323
8
9
  jupyter_agent/bot_agents/base.py,sha256=50XtKBVRj83zALGgw4klGLZkUlqHNhs1WIX5av9bIm4,10893
@@ -21,13 +22,14 @@ jupyter_agent/bot_agents/task_summarier.py,sha256=bvYEKW_NWRwe-kNNxR7uhJTMKMJXSy
21
22
  jupyter_agent/bot_agents/task_verifier.py,sha256=9Tlyb7hP7tBHMrh5XkRD30mYLodNum33X6v2snjm0QI,2478
22
23
  jupyter_agent/bot_agents/task_verify_summarier.py,sha256=mhpqgcBPOur0TtG8rYUT-BCAYgAiJxDgXVnCAE5Cucs,4963
23
24
  jupyter_agent/bot_flows/__init__.py,sha256=vbb3GJLu6aZdJ2ox4eaHn5cg0d4WQM6zmhIbMAlHIFo,488
24
- jupyter_agent/bot_flows/base.py,sha256=o94y9H8E9FCGz7srdyJDl7UcAj7RCEJCBjtSSREvcWY,8437
25
- jupyter_agent/bot_flows/master_planner.py,sha256=2MvAMikMsBIarTL4QNYTjAs70bxo7VAV0DbHus325Es,430
25
+ jupyter_agent/bot_flows/base.py,sha256=V8JEkjf-gmkuz0X4P8-ziHapeWPN2EEoXFTk5g8AK1Y,10210
26
+ jupyter_agent/bot_flows/master_planner.py,sha256=rmLXrg457TY91FVjOKjJOR8X7gAqP18Sl0O0c5I4pAY,551
26
27
  jupyter_agent/bot_flows/task_executor_v1.py,sha256=WzTfoOTjBpk1emvpiL3yeiudKdDf6EpVdJIugxxbqM4,2975
27
28
  jupyter_agent/bot_flows/task_executor_v2.py,sha256=IyfxhzA4TljNd6iqqUYxxzB63r9lxfe1Zbu177hytRs,2949
28
- jupyter_agent/bot_flows/task_executor_v3.py,sha256=NwJy7iAo2U3rl5iQYpACWVAkIA5DRMDdx0JadheRMMQ,3198
29
- jupyter_agent-2025.6.103.dist-info/licenses/LICENSE,sha256=nWMmSIg7OepTIDX_OPP0-T9ImeCBBoog7eJxm5awtcM,1068
30
- jupyter_agent-2025.6.103.dist-info/METADATA,sha256=ZtUJtanX1-RXpH1mMA6JH9fWCWK4YMaFU1h9e2nP_tE,9998
31
- jupyter_agent-2025.6.103.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
32
- jupyter_agent-2025.6.103.dist-info/top_level.txt,sha256=c3USTBZ7DZGuvLKlEW-QfGIx0tzn98iCEn3bpdYnDtE,14
33
- jupyter_agent-2025.6.103.dist-info/RECORD,,
29
+ jupyter_agent/bot_flows/task_executor_v3.py,sha256=Argp0T-bNBRHpjpHhFuNKclOtN2O6DJ28rY7F0GUQrI,3527
30
+ jupyter_agent-2025.6.104.dist-info/licenses/LICENSE,sha256=nWMmSIg7OepTIDX_OPP0-T9ImeCBBoog7eJxm5awtcM,1068
31
+ jupyter_agent-2025.6.104.dist-info/METADATA,sha256=cxJPbyRvUTMJgf01snK1YIvgh87ImLWNKRg3z7RoMi0,10022
32
+ jupyter_agent-2025.6.104.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
33
+ jupyter_agent-2025.6.104.dist-info/entry_points.txt,sha256=063AB86wSrC_V-iiEEqxTlR4uz-T7VH_YagIpmKFQC0,63
34
+ jupyter_agent-2025.6.104.dist-info/top_level.txt,sha256=c3USTBZ7DZGuvLKlEW-QfGIx0tzn98iCEn3bpdYnDtE,14
35
+ jupyter_agent-2025.6.104.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ bot_eval = jupyter_agent.bot_evaluation:main