jupyter-agent 2025.6.102__py3-none-any.whl → 2025.6.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,206 @@
1
+ """
2
+ Copyright (c) 2025 viewstar000
3
+
4
+ This software is released under the MIT License.
5
+ https://opensource.org/licenses/MIT
6
+ """
7
+
8
+ import os
9
+ import time
10
+ import json
11
+ import argparse
12
+ import nbformat
13
+
14
+ from pathlib import Path
15
+ from nbclient.client import NotebookClient
16
+ from .bot_outputs import NotebookEvalutionRecord
17
+
18
+
19
+ def run_notebook(
20
+ input_path: str | Path,
21
+ output_path: str | Path = "",
22
+ inplace: bool = False,
23
+ timeout: int = -1,
24
+ startup_timeout: int = 60,
25
+ allow_errors: bool = False,
26
+ kernel_name: str = "",
27
+ skip_cells_with_tag: str = "skip-execution",
28
+ evaluation_path: str | Path = "",
29
+ ) -> None:
30
+ """Run a notebook by path."""
31
+ input_path = Path(input_path).with_suffix(".ipynb")
32
+
33
+ assert not (inplace and output_path), "Cannot specify both 'inplace' and 'output_path'"
34
+ if inplace:
35
+ output_path = input_path
36
+ elif not output_path:
37
+ output_path = input_path.parent.joinpath(f"{input_path.with_suffix('').name}_executed.ipynb")
38
+ output_path = os.path.abspath(output_path)
39
+
40
+ # Open up the notebook we're going to run
41
+ with input_path.open() as f:
42
+ print("Running notebook:", input_path)
43
+ nb = nbformat.read(f, as_version=4)
44
+
45
+ start_time = 0
46
+ is_global_finished = False
47
+
48
+ def save_notebook(**kwargs):
49
+ """Save the executed notebook to the specified output path."""
50
+ nonlocal is_global_finished
51
+
52
+ if kwargs:
53
+ cell_idx = kwargs.get("cell_index", 0)
54
+ cell_type = kwargs.get("cell", {}).get("cell_type")
55
+ cell_id = kwargs.get("cell", {}).get("id")
56
+ cell_exec_count = kwargs.get("cell", {}).get("execution_count")
57
+ cell_meta = kwargs.get("cell", {}).get("metadata", {})
58
+ cell_payloads = kwargs.get("execute_reply", {}).get("content", {}).get("payload", [])
59
+ cell_outputs = kwargs.get("cell", {}).get("outputs", [])
60
+ for payload in cell_payloads:
61
+ if payload.get("source") == "set_next_input" and payload.get("replace") is True:
62
+ print(f"CELL[{cell_idx}] Replacing cell with set_next_input payload")
63
+ nb.cells[cell_idx].source = payload.get("text", "")
64
+ cell_agent_data_timestamp = cell_meta.get("jupyter-agent-data-timestamp", 0)
65
+ output_agent_data_timestamp = cell_agent_data_timestamp
66
+ for output in cell_outputs:
67
+ if output["output_type"] == "display_data":
68
+ output_meta = output.get("metadata", {})
69
+ if (
70
+ output_meta.get("jupyter-agent-data-store")
71
+ and output_meta.get("jupyter-agent-data-timestamp", 0) > output_agent_data_timestamp
72
+ and output_meta.get("jupyter-agent-data", {})
73
+ ):
74
+ print(f"CELL[{cell_idx}] Found jupyter-agent-data-store outputs, save it to cell metadata")
75
+ output_agent_data_timestamp = output_meta.get("jupyter-agent-data-timestamp", 0)
76
+ nb.cells[cell_idx].metadata["jupyter-agent-data-store"] = True
77
+ nb.cells[cell_idx].metadata["jupyter-agent-data-timestamp"] = output_agent_data_timestamp
78
+ if "jupyter-agent-data" not in nb.cells[cell_idx].metadata:
79
+ nb.cells[cell_idx].metadata["jupyter-agent-data"] = {}
80
+ nb.cells[cell_idx].metadata["jupyter-agent-data"].update(output_meta["jupyter-agent-data"])
81
+ for record in output_meta.get("jupyter-agent-evaluation-records", []):
82
+ record["notebook_name"] = output_path
83
+ if record["eval_type"] == "NOTEBOOK":
84
+ record["execution_duration"] = time.time() - start_time
85
+ is_global_finished = True
86
+ del nb.cells[cell_idx + 1 :] # Remove all cells after the notebook cell
87
+ print(
88
+ f"CELL[{cell_idx}] Evaluating record: {record['eval_type']} "
89
+ f"duration: {record['execution_duration']:.2f}s "
90
+ f"success: {record['is_success']} "
91
+ f"correct: {record['correct_score']:.2f}"
92
+ )
93
+ if evaluation_path:
94
+ with open(evaluation_path, "a") as eval_file:
95
+ eval_file.write(json.dumps(record) + "\n")
96
+ print(f"CELL[{cell_idx}] Saving executed {cell_type} cell - {cell_id}: {cell_exec_count}")
97
+ else:
98
+ print(f"Saving executed notebook to: {output_path}")
99
+ nbformat.write(nb, output_path)
100
+
101
+ # Add metadata to the notebook
102
+ nb.cells.insert(
103
+ 0,
104
+ nbformat.v4.new_code_cell(
105
+ source=(
106
+ f"# Executed notebook: {input_path.name}\n"
107
+ f"# Output saved to: {output_path}\n\n"
108
+ f"__evaluation_ipynb_file__ = '{output_path}'\n"
109
+ ),
110
+ metadata={"tags": ["CTX_EXCLUDE"]},
111
+ ),
112
+ )
113
+ save_notebook()
114
+
115
+ # Configure nbclient to run the notebook
116
+ client = NotebookClient(
117
+ nb,
118
+ timeout=timeout,
119
+ startup_timeout=startup_timeout,
120
+ skip_cells_with_tag=skip_cells_with_tag,
121
+ allow_errors=allow_errors,
122
+ kernel_name=kernel_name,
123
+ resources={"metadata": {"path": input_path.parent.absolute()}},
124
+ on_cell_executed=save_notebook,
125
+ )
126
+
127
+ # Run it
128
+ print("Executing notebook...")
129
+ start_time = time.time()
130
+ client.execute()
131
+ save_notebook()
132
+ print("Notebook execution completed.")
133
+
134
+ # If the notebook did not finish globally, append an evaluation record
135
+ if not is_global_finished:
136
+ print("Notebook execution did not finish globally, appending evaluation records.")
137
+ record = NotebookEvalutionRecord(
138
+ notebook_name=output_path,
139
+ eval_type="NOTEBOOK",
140
+ execution_duration=time.time() - start_time,
141
+ is_success=False,
142
+ correct_score=0.0,
143
+ )
144
+ print(
145
+ f"Global evaluation record: {record.eval_type} "
146
+ f"duration: {record.execution_duration:.2f}s "
147
+ f"success: {record.is_success} "
148
+ f"correct: {record.correct_score:.2f}"
149
+ )
150
+ if evaluation_path:
151
+ with open(evaluation_path, "a") as eval_file:
152
+ eval_file.write(json.dumps(record.model_dump()) + "\n")
153
+
154
+
155
+ def main():
156
+ """Main function to run the notebook execution."""
157
+ parser = argparse.ArgumentParser(description="Run a Jupyter notebook.")
158
+ parser.add_argument(
159
+ "-o", "--output_path", type=str, default="", help="Path to save the executed notebook (default: same as input)"
160
+ )
161
+ parser.add_argument(
162
+ "-i", "--inplace", action="store_true", help="Run the notebook in place (overwrite input file)"
163
+ )
164
+ parser.add_argument(
165
+ "-e",
166
+ "--evaluation_path",
167
+ type=str,
168
+ default="",
169
+ help="Path to save evaluation records (default: no evaluation records saved)",
170
+ )
171
+ parser.add_argument(
172
+ "--timeout", type=int, default=-1, help="Execution timeout in seconds (default: -1, no timeout)"
173
+ )
174
+ parser.add_argument(
175
+ "--startup_timeout", type=int, default=60, help="Kernel startup timeout in seconds (default: 60)"
176
+ )
177
+ parser.add_argument(
178
+ "--allow_errors", action="store_true", help="Allow errors in the notebook execution (default: False)"
179
+ )
180
+ parser.add_argument(
181
+ "--kernel_name", type=str, default="", help="Kernel name to use for execution (default: use notebook's kernel)"
182
+ )
183
+ parser.add_argument(
184
+ "--skip_cells_with_tag",
185
+ type=str,
186
+ default="skip-execution",
187
+ help="Tag to skip cells with (default: 'skip-execution')",
188
+ )
189
+ parser.add_argument("input_path", type=str, help="Path to the input notebook file")
190
+ args = parser.parse_args()
191
+
192
+ run_notebook(
193
+ input_path=args.input_path,
194
+ output_path=args.output_path,
195
+ inplace=args.inplace,
196
+ timeout=args.timeout,
197
+ startup_timeout=args.startup_timeout,
198
+ allow_errors=args.allow_errors,
199
+ kernel_name=args.kernel_name,
200
+ skip_cells_with_tag=args.skip_cells_with_tag,
201
+ evaluation_path=args.evaluation_path,
202
+ )
203
+
204
+
205
+ if __name__ == "__main__":
206
+ main()
@@ -5,6 +5,7 @@ This software is released under the MIT License.
5
5
  https://opensource.org/licenses/MIT
6
6
  """
7
7
 
8
+ import time
8
9
  import traceback
9
10
 
10
11
  from pydantic import BaseModel
@@ -12,11 +13,14 @@ from enum import Enum
12
13
  from typing import List, Dict, Optional, Type
13
14
  from IPython.display import Markdown
14
15
  from ..bot_agents.base import BaseAgent
15
- from ..bot_outputs import _D, _I, _W, _E, _F, _M, _B, set_stage, flush_output
16
+ from ..bot_outputs import _D, _I, _W, _E, _F, _M, _B
17
+ from ..bot_outputs import set_stage, flush_output, output_evaluation
18
+ from ..bot_outputs import FlowEvalutionRecord, StageEvalutionRecord, NotebookEvalutionRecord
16
19
 
17
20
  TASK_AGENT_STATE_ERROR = "_AGENT_STATE_ERROR_32534526_"
18
21
  TASK_STAGE_START = "start"
19
22
  TASK_STAGE_COMPLETED = "completed"
23
+ TASK_STAGE_GLOBAL_FINISHED = "global_finished"
20
24
 
21
25
 
22
26
  class TaskAction(str, Enum):
@@ -47,7 +51,7 @@ class BaseTaskFlow:
47
51
 
48
52
  STAGE_TRANSITIONS: List[StageTransition] = []
49
53
  START_STAGE = TASK_STAGE_START
50
- STOP_STAGES = [TASK_STAGE_COMPLETED]
54
+ STOP_STAGES = [TASK_STAGE_COMPLETED, TASK_STAGE_GLOBAL_FINISHED]
51
55
 
52
56
  def __init__(self, notebook_context, agent_factory):
53
57
  self.notebook_context = notebook_context
@@ -146,11 +150,15 @@ class BaseTaskFlow:
146
150
  ns = self._get_next_stage_trans(stage, state, action)
147
151
  return ns.stage
148
152
 
149
- def __call__(self, stage, max_tries=3, stage_continue=True, stage_confirm=True):
153
+ def __call__(self, stage, max_tries=5, stage_continue=True, stage_confirm=True):
150
154
 
151
155
  n_tries = 0
156
+ flow_duration = 0.0
157
+ stage_count = 0
158
+ # Initialize the task stage
152
159
  stage = stage or self.START_STAGE
153
160
  while n_tries <= max_tries:
161
+ stage_st = time.time()
154
162
  try:
155
163
  stage_name = stage.value if isinstance(stage, Enum) else stage
156
164
  stage_name = stage_name.replace(".", "-").capitalize()
@@ -163,6 +171,19 @@ class BaseTaskFlow:
163
171
  _M(f"```python\n{traceback.format_exc()}\n```")
164
172
  state = TASK_AGENT_STATE_ERROR
165
173
  failed = True
174
+ stage_count += 1
175
+ stage_duration = time.time() - stage_st
176
+ flow_duration += stage_duration
177
+ _M(f"Stage `{stage}` completed in {stage_duration:.2f} seconds with state `{state}` and failed `{failed}`")
178
+ output_evaluation(
179
+ StageEvalutionRecord(
180
+ cell_index=self.task.cell_idx,
181
+ flow=type(self).__name__,
182
+ stage=str(stage),
183
+ execution_duration=stage_duration,
184
+ is_success=not failed,
185
+ )
186
+ )
166
187
 
167
188
  if state != TASK_AGENT_STATE_ERROR:
168
189
  # Agent did not fail, check if we have reached the final stage
@@ -171,14 +192,18 @@ class BaseTaskFlow:
171
192
  self.task.update_cell()
172
193
  if next_stage in self.STOP_STAGES:
173
194
  _M(f"Task execution **Stopped** at stage `{next_stage}`")
195
+ stage = next_stage
174
196
  break
175
197
 
176
198
  if failed:
177
199
  # Agent failed
178
200
  n_tries += 1
201
+ if n_tries > max_tries:
202
+ _M(f"**Max flow tries reached** during task execution stage `{stage}`, **Stop!**")
203
+ break
179
204
 
180
- if failed or stage_confirm:
181
- # Agent failed or we need to confirm
205
+ if stage_confirm:
206
+ # We need to confirm
182
207
  message = self.get_prompt_message(stage, state, failed)
183
208
  _M("**Confirm**: " + message)
184
209
  flush_output()
@@ -188,15 +213,13 @@ class BaseTaskFlow:
188
213
  self.task.update_cell()
189
214
  if action == TaskAction.STOP:
190
215
  _M(f"Task execution **Stopped**, and set next stage to `{next_stage}`")
191
- break
192
- elif n_tries > max_tries:
193
- _M(f"**Max tries reached** during task execution stage `{stage}`, **Stop!**")
216
+ stage = next_stage
194
217
  break
195
218
  else:
196
219
  _M(f"**Action**: `{action}` transits stage to `{next_stage}`")
197
220
  stage = next_stage
198
221
  else:
199
- # Agent succeeded, transit to the next stage without confirmation
222
+ # transit to the next stage without confirmation
200
223
  next_stage = self.get_next_stage(stage, state, TaskAction.CONTINUE)
201
224
  self.task.agent_stage = next_stage
202
225
  self.task.update_cell()
@@ -205,5 +228,21 @@ class BaseTaskFlow:
205
228
 
206
229
  if not stage_continue:
207
230
  break
231
+ # Finalize the task execution
232
+ stage_name = stage.value if isinstance(stage, Enum) else stage
233
+ if stage_name == TASK_STAGE_GLOBAL_FINISHED:
234
+ _M("Task execution **finished** globally.")
235
+ output_evaluation(NotebookEvalutionRecord(cell_index=self.task.cell_idx, is_success=True))
236
+ elif stage_name == TASK_STAGE_COMPLETED:
237
+ _M(f"Task execution **completed** in {flow_duration:.2f} seconds with {stage_count} stages.")
238
+ output_evaluation(
239
+ FlowEvalutionRecord(
240
+ cell_index=self.task.cell_idx,
241
+ flow=type(self).__name__,
242
+ stage_count=stage_count,
243
+ execution_duration=flow_duration,
244
+ is_success=True,
245
+ )
246
+ )
208
247
  flush_output()
209
248
  return stage
@@ -11,6 +11,7 @@ from .base import BaseTaskFlow, StageTransition, TASK_STAGE_START, TASK_STAGE_CO
11
11
  class MasterPlannerFlow(BaseTaskFlow):
12
12
 
13
13
  STAGE_TRANSITIONS = [
14
- StageTransition(stage=TASK_STAGE_START, agent="MasterPlannerAgent", next_stage=TASK_STAGE_START)
14
+ StageTransition(stage=TASK_STAGE_START, agent="MasterPlannerAgent", next_stage=TASK_STAGE_COMPLETED),
15
+ StageTransition(stage=TASK_STAGE_COMPLETED, agent="OutputTaskResult", next_stage=TASK_STAGE_COMPLETED),
15
16
  ]
16
- STOP_STAGES = [TASK_STAGE_START]
17
+ STOP_STAGES = [TASK_STAGE_COMPLETED]
@@ -6,7 +6,14 @@ https://opensource.org/licenses/MIT
6
6
  """
7
7
 
8
8
  from enum import Enum
9
- from .base import BaseTaskFlow, StageTransition, StageNext, TaskAction
9
+ from .base import (
10
+ BaseTaskFlow,
11
+ StageTransition,
12
+ StageNext,
13
+ TaskAction,
14
+ TASK_STAGE_COMPLETED,
15
+ TASK_STAGE_GLOBAL_FINISHED,
16
+ )
10
17
  from ..bot_agents import (
11
18
  TaskPlannerAgentV3,
12
19
  TaskCodingAgent,
@@ -27,14 +34,15 @@ class TaskStage(str, Enum):
27
34
  DEBUGGING = "debugging"
28
35
  REASONING = "reasoning"
29
36
  SUMMARY = "summary"
30
- COMPLETED = "completed"
31
37
  OUTPUT_RESULT = "output_result"
38
+ COMPLETED = TASK_STAGE_COMPLETED
39
+ GLOBAL_FINISHED = TASK_STAGE_GLOBAL_FINISHED
32
40
 
33
41
 
34
42
  class TaskExecutorFlowV3(BaseTaskFlow):
35
43
 
36
44
  START_STAGE = TaskStage.PLANNING
37
- STOP_STAGES = [TaskStage.COMPLETED, TaskStage.PLANNING_PAUSED]
45
+ STOP_STAGES = [TaskStage.COMPLETED, TaskStage.PLANNING_PAUSED, TaskStage.GLOBAL_FINISHED]
38
46
  STAGE_TRANSITIONS = [
39
47
  StageTransition[TaskStage, TaskPlannerState](
40
48
  stage=TaskStage.PLANNING,
@@ -43,7 +51,7 @@ class TaskExecutorFlowV3(BaseTaskFlow):
43
51
  TaskPlannerState.CODING_PLANNED: TaskStage.CODING,
44
52
  TaskPlannerState.REASONING_PLANNED: TaskStage.REASONING,
45
53
  TaskPlannerState.REQUEST_INFO: TaskStage.PLANNING_PAUSED,
46
- TaskPlannerState.GLOBAL_FINISHED: TaskStage.COMPLETED,
54
+ TaskPlannerState.GLOBAL_FINISHED: TaskStage.GLOBAL_FINISHED,
47
55
  },
48
56
  ),
49
57
  StageTransition[TaskStage, TaskPlannerState](
@@ -86,4 +94,7 @@ class TaskExecutorFlowV3(BaseTaskFlow):
86
94
  StageTransition[TaskStage, None](
87
95
  stage=TaskStage.OUTPUT_RESULT, agent=OutputTaskResult, next_stage=TaskStage.COMPLETED
88
96
  ),
97
+ StageTransition[TaskStage, None](
98
+ stage=TaskStage.GLOBAL_FINISHED, agent=OutputTaskResult, next_stage=TaskStage.GLOBAL_FINISHED
99
+ ),
89
100
  ]
@@ -43,9 +43,12 @@ class BotMagics(Magics, Configurable):
43
43
  display_message = Bool(False, help="Display chat message").tag(config=True)
44
44
  display_think = Bool(True, help="Display chatthink response").tag(config=True)
45
45
  display_response = Bool(False, help="Display chat full response").tag(config=True)
46
+ support_save_meta = Bool(False, help="Support save metadata to cell").tag(config=True)
46
47
  notebook_path = Unicode(None, allow_none=True, help="Path to Notebook file").tag(config=True)
47
48
  default_task_flow = Unicode("v3", allow_none=True, help="Default task flow").tag(config=True)
48
- support_save_meta = Bool(False, help="Support save metadata to cell").tag(config=True)
49
+ default_max_tries = Int(3, help="Default max tries for task execution").tag(config=True)
50
+ default_step_mode = Bool(False, help="Default step mode for task execution").tag(config=True)
51
+ default_auto_confirm = Bool(False, help="Default auto confirm for task execution").tag(config=True)
49
52
 
50
53
  def parse_args(self, line):
51
54
  """解析命令行参数"""
@@ -54,17 +57,52 @@ class BotMagics(Magics, Configurable):
54
57
  parser.add_argument("-P", "--planning", action="store_true", default=False, help="Run in planning mode")
55
58
  parser.add_argument("-s", "--stage", type=str, default=None, help="Task stage")
56
59
  parser.add_argument("-f", "--flow", type=str, default=self.default_task_flow, help="Flow name")
57
- parser.add_argument("-m", "--max-tries", type=int, default=3, help="Max tries")
58
- parser.add_argument("-S", "--step-mode", action="store_true", default=False, help="Run in single step mode")
59
- parser.add_argument("-Y", "--auto-confirm", action="store_true", default=False, help="Run without confirm")
60
+ parser.add_argument("-m", "--max-tries", type=int, default=self.default_max_tries, help="Max tries")
61
+ parser.add_argument(
62
+ "-S",
63
+ "--step-mode",
64
+ action="store_true",
65
+ default=self.default_step_mode,
66
+ help="Run in single step mode",
67
+ )
68
+ parser.add_argument(
69
+ "-Y",
70
+ "--auto-confirm",
71
+ action="store_true",
72
+ default=self.default_auto_confirm,
73
+ help="Run without confirm",
74
+ )
60
75
  options, _ = parser.parse_known_args(shlex.split(line.strip()))
61
76
 
62
77
  return options
63
78
 
79
+ def ensure_notebook_path(self):
80
+ if self.notebook_path:
81
+ return self.notebook_path
82
+ result = self.shell and self.shell.run_cell("globals().get('__vsc_ipynb_file__')")
83
+ if result and result.success and result.result:
84
+ self.notebook_path = result.result
85
+ return self.notebook_path
86
+ try:
87
+ self.notebook_path = str(ipynbname.path())
88
+ return self.notebook_path
89
+ except Exception as e:
90
+ _F(f"Failed to get notebook path: {e}")
91
+ return None
92
+
64
93
  @cell_magic
65
94
  def bot(self, line, cell):
66
95
  """Jupyter cell magic: %%bot"""
67
96
  try:
97
+ if not self.ensure_notebook_path():
98
+ _O(
99
+ Markdown(
100
+ "The notebook path is **empty**, we can't do anything.\n\n"
101
+ "Please set the notebook path in the configuration, and **RERUN** the cell again.\n\n"
102
+ 'For example: `%config BotMagics.notebook_path = globals()["__vsc_ipynb_file__"]`'
103
+ )
104
+ )
105
+ return
68
106
  AgentCellContext.SUPPORT_SAVE_META = self.support_save_meta
69
107
  reset_output(stage="Logging", logging_level=self.logging_level)
70
108
  _I("Cell magic %%bot executing ...")
@@ -85,8 +123,6 @@ class BotMagics(Magics, Configurable):
85
123
  options = self.parse_args(line)
86
124
  _D(f"Cell magic called with options: {options}")
87
125
  set_logging_level(options.logging_level)
88
- self.notebook_path = self.notebook_path or ipynbname.path()
89
- _D(f"Cell magic called with notebook path: {self.notebook_path}")
90
126
  nb_context = NotebookContext(line, cell, notebook_path=self.notebook_path)
91
127
  agent_factory = AgentFactory(
92
128
  nb_context,
@@ -95,16 +131,28 @@ class BotMagics(Magics, Configurable):
95
131
  display_response=self.display_response,
96
132
  )
97
133
  agent_factory.config_model(
98
- AgentModelType.DEFAULT, self.default_api_url, self.default_api_key, self.default_model_name
134
+ AgentModelType.DEFAULT,
135
+ self.default_api_url,
136
+ self.default_api_key,
137
+ self.default_model_name,
99
138
  )
100
139
  agent_factory.config_model(
101
- AgentModelType.PLANNER, self.planner_api_url, self.planner_api_key, self.planner_model_name
140
+ AgentModelType.PLANNER,
141
+ self.planner_api_url,
142
+ self.planner_api_key,
143
+ self.planner_model_name,
102
144
  )
103
145
  agent_factory.config_model(
104
- AgentModelType.CODING, self.coding_api_url, self.coding_api_key, self.coding_model_name
146
+ AgentModelType.CODING,
147
+ self.coding_api_url,
148
+ self.coding_api_key,
149
+ self.coding_model_name,
105
150
  )
106
151
  agent_factory.config_model(
107
- AgentModelType.REASONING, self.reasoning_api_url, self.reasoning_api_key, self.reasoning_model_name
152
+ AgentModelType.REASONING,
153
+ self.reasoning_api_url,
154
+ self.reasoning_api_key,
155
+ self.reasoning_model_name,
108
156
  )
109
157
  if options.planning:
110
158
  flow = MasterPlannerFlow(nb_context, agent_factory)
@@ -117,7 +165,12 @@ class BotMagics(Magics, Configurable):
117
165
  flow = TaskExecutorFlowV3(nb_context, agent_factory)
118
166
  else:
119
167
  raise ValueError(f"Unknown flow: {options.flow}")
120
- flow(options.stage, options.max_tries, not options.step_mode, not options.auto_confirm)
168
+ flow(
169
+ options.stage,
170
+ options.max_tries,
171
+ not options.step_mode,
172
+ not options.auto_confirm,
173
+ )
121
174
  except Exception as e:
122
175
  traceback.print_exc()
123
176
 
@@ -11,6 +11,8 @@ import datetime
11
11
  import jinja2
12
12
 
13
13
  from enum import Enum
14
+ from typing import Optional, Dict, Any
15
+ from pydantic import BaseModel, Field
14
16
  from IPython.display import display, Markdown
15
17
  from .utils import no_indent, no_wrap
16
18
 
@@ -171,6 +173,38 @@ LOGGING_LEVELS = {
171
173
  }
172
174
 
173
175
 
176
+ class BaseEvalutionRecord(BaseModel):
177
+ timestamp: float = 0
178
+ notebook_name: str = ""
179
+ eval_type: str = "BASE"
180
+ cell_index: int = -1
181
+ execution_duration: float = 0.0
182
+ is_success: bool = False
183
+ correct_score: float = 0.0
184
+
185
+
186
+ class StageEvalutionRecord(BaseEvalutionRecord):
187
+ eval_type: str = "STAGE"
188
+ flow: str = ""
189
+ stage: str = ""
190
+ coding_score: float = 0.0
191
+ important_score: float = 0.0
192
+ user_supply_score: float = 0.0
193
+
194
+
195
+ class FlowEvalutionRecord(BaseEvalutionRecord):
196
+ eval_type: str = "FLOW"
197
+ flow: str = ""
198
+ stage_count: int = 0
199
+ planning_score: float = 0.0
200
+
201
+
202
+ class NotebookEvalutionRecord(BaseEvalutionRecord):
203
+ eval_type: str = "NOTEBOOK"
204
+ flow_count: int = 0
205
+ planning_score: float = 0.0
206
+
207
+
174
208
  class AgentOutput:
175
209
  """
176
210
  AgentOutput 是一个用于在 Jupyter Notebook 中显示 Agent 输出的类。
@@ -193,6 +227,7 @@ class AgentOutput:
193
227
  self._agent_data_timestamp = None
194
228
  self._agent_data = {}
195
229
  self._logging_records = []
230
+ self._evaluation_records = []
196
231
 
197
232
  @property
198
233
  def content(self):
@@ -226,6 +261,8 @@ class AgentOutput:
226
261
  "jupyter-agent-data": self._agent_data,
227
262
  }
228
263
  )
264
+ if self._evaluation_records:
265
+ metadata["jupyter-agent-evaluation-records"] = [record.model_dump() for record in self._evaluation_records]
229
266
  return metadata
230
267
 
231
268
  def display(self, stage=None, force=False, wait=True):
@@ -320,6 +357,20 @@ class AgentOutput:
320
357
  )
321
358
  self.display(force=False, wait=False)
322
359
 
360
+ def log_evaluation(self, record: BaseEvalutionRecord):
361
+ assert isinstance(
362
+ record, BaseEvalutionRecord
363
+ ), "record must be an instance of BaseEvalutionRecord or its subclass"
364
+ if record.timestamp == 0:
365
+ record.timestamp = time.time()
366
+ self._evaluation_records.append(record)
367
+ self.log(
368
+ f"Evaluation: {record.eval_type}[{record.cell_index}] duration: {record.execution_duration:.2f}s "
369
+ f"success: {record.is_success} correct: {record.correct_score:.2f}",
370
+ level="INFO",
371
+ )
372
+ self.display(force=False, wait=False)
373
+
323
374
 
324
375
  __agent_output = None
325
376
 
@@ -363,6 +414,14 @@ def output_agent_data(**kwargs):
363
414
  get_output().output_agent_data(**kwargs)
364
415
 
365
416
 
417
+ def output_evaluation(record: BaseEvalutionRecord):
418
+ """
419
+ 输出评估记录到 AgentOutput 中。
420
+ :param record: 评估记录对象,必须是 BaseEvalutionRecord 的子类。
421
+ """
422
+ get_output().log_evaluation(record)
423
+
424
+
366
425
  def clear_output(stage=None, clear_metadata=False):
367
426
  get_output().clear(stage, clear_metadata)
368
427
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: jupyter-agent
3
- Version: 2025.6.102
3
+ Version: 2025.6.104
4
4
  Summary: 调用LLM实现Jupyter代码的自动生成、执行、调试等功能
5
5
  Author: viewstar000
6
6
  License: MIT
@@ -10,14 +10,15 @@ Classifier: Operating System :: OS Independent
10
10
  Requires-Python: >=3.12
11
11
  Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
+ Requires-Dist: ipynbname
13
14
  Requires-Dist: ipython
15
+ Requires-Dist: jinja2
16
+ Requires-Dist: nbclient
14
17
  Requires-Dist: nbformat
15
- Requires-Dist: ipynbname
16
18
  Requires-Dist: openai
17
- Requires-Dist: traitlets
18
- Requires-Dist: pyyaml
19
- Requires-Dist: jinja2
20
19
  Requires-Dist: pydantic
20
+ Requires-Dist: pyyaml
21
+ Requires-Dist: traitlets
21
22
  Dynamic: license-file
22
23
 
23
24
  # jupyter-agent
@@ -75,6 +76,8 @@ pip install /path/to/jupyter-agent/dist/jupyter_agent-xxxx-py3-none-any.whl
75
76
 
76
77
  ### 全局配置
77
78
 
79
+ 基础配置
80
+
78
81
  ```python
79
82
  # 加载扩展的Magic命令
80
83
  %load_ext jupyter_agent.bot_magics
@@ -84,12 +87,26 @@ pip install /path/to/jupyter-agent/dist/jupyter_agent-xxxx-py3-none-any.whl
84
87
  %config BotMagics.default_api_key = 'API_KEY'
85
88
  %config BotMagics.default_model_name = 'qwen3-30b-a3b'
86
89
  %config BotMagics.coding_model_name = 'devstral-small-2505-mlx'
90
+ ```
87
91
 
88
- # 设置当前Notebook的路径,由于vscode中运行里无法自动获取到该路径,需要手工指定
92
+ 扩展配置
93
+
94
+ ```python
95
+ # 设置当前Notebook的路径,当无法自动获取时需要手工指定,以Vscode中的Notebook为例
89
96
  %config BotMagics.notebook_path = globals()["__vsc_ipynb_file__"]
90
97
 
91
98
  # 设置是否保存任务数据到Metadata,只有Vscode中安装了jupyter-agent-extension后才支持
92
99
  %config BotMagics.support_save_meta = True
100
+
101
+ # 设置日志级别,可选值为DEBUG、INFO、WARN、ERROR、FATAL,默认为INFO
102
+ %config BotMagics.logging_level = 'DEBUG'
103
+
104
+ # 设置是否显示思考过程,默认为True
105
+ %config BotMagics.display_think = True
106
+
107
+ # 设置是否显示发送给出LLM的消息和LLM的回答,默认为False
108
+ %config BotMagics.display_message = True
109
+ %config BotMagics.display_response = True
93
110
  ```
94
111
 
95
112
  ### 全局任务规划
@@ -118,7 +135,7 @@ pip install /path/to/jupyter-agent/dist/jupyter_agent-xxxx-py3-none-any.whl
118
135
 
119
136
  ![docs/image-task-empty.png](https://raw.githubusercontent.com/viewstar000/jupyter-agent/refs/heads/main/docs/image-task-empty.png)
120
137
 
121
- > 注:由于cell magic命令无法直接定位当前cell,需要通过cell的内容进行匹配,因此首次执行%%bot命令时,需要在cell中额外添加一些随机字符
138
+ > **注:**由于cell magic命令无法直接定位当前cell,需要通过cell的内容进行匹配,因此首次执行%%bot命令时,需要在cell中额外添加一些随机字符
122
139
 
123
140
  接下来工具会调用相应的agent自动生成并执行相应步骤的代码,如下图:
124
141
 
@@ -130,6 +147,8 @@ pip install /path/to/jupyter-agent/dist/jupyter_agent-xxxx-py3-none-any.whl
130
147
 
131
148
  ![docs/image-task-confirm.png](https://raw.githubusercontent.com/viewstar000/jupyter-agent/refs/heads/main/docs/image-task-confirm.png)
132
149
 
150
+ > **注:**在执行`%%bot`命令前,必须确保当前Notebook已保存,否则Agent无法读取到完整的Notebook上下文。建议开启Notebook编辑器自动保存功能。
151
+
133
152
  更详细用法可参考[示例Notebook](https://github.com/viewstar000/jupyter-agent/blob/main/examples/data_loader.ipynb)
134
153
 
135
154
  ## 贡献
@@ -195,6 +214,10 @@ pip install /path/to/jupyter-agent/dist/jupyter_agent-xxxx-py3-none-any.whl
195
214
 
196
215
  After installing `jupyter-agent` and `jupyter-agent-extension`, you can use `%%bot` magic command to work on task planning, code generation and execution.
197
216
 
217
+ ### Configuration
218
+
219
+ Basic Configuration:
220
+
198
221
  First create or open a notebook in Vscode, create a new cell, enter and execute the following commands:
199
222
 
200
223
  ```python
@@ -206,12 +229,26 @@ First create or open a notebook in Vscode, create a new cell, enter and execute
206
229
  %config BotMagics.default_api_key = 'API_KEY'
207
230
  %config BotMagics.default_model_name = 'qwen3-30b-a3b'
208
231
  %config BotMagics.coding_model_name = 'devstral-small-2505-mlx'
232
+ ```
209
233
 
210
- # Set the path of the current Notebook, which cannot be automatically obtained in vscode when running, you need to manually specify
234
+ Advanced Configuration:
235
+
236
+ ```python
237
+ # Set the current notebook path, when it is not automatically obtained, it needs to be manually specified, for example, in Vscode Notebook
211
238
  %config BotMagics.notebook_path = globals()["__vsc_ipynb_file__"]
212
239
 
213
240
  # Set whether to save task data to Metadata, only Vscode installed with jupyter-agent-extension supports
214
241
  %config BotMagics.support_save_meta = True
242
+
243
+ # Set the log level, available values are DEBUG、INFO、WARN、ERROR、FATAL, default is INFO
244
+ %config BotMagics.logging_level = 'DEBUG'
245
+
246
+ # Set whether to display thinking process, default is True
247
+ %config BotMagics.display_think = True
248
+
249
+ # Set whether to display messages sent to LLM and LLM responses, default is False
250
+ %config BotMagics.display_message = True
251
+ %config BotMagics.display_response = True
215
252
  ```
216
253
 
217
254
  Now, you can use the `%%bot` command to work on task rules and code generation.
@@ -249,6 +286,8 @@ After generating code for a subtask, the tool will call the corresponding agent
249
286
 
250
287
  ![docs/image-task-confirm.png](https://raw.githubusercontent.com/viewstar000/jupyter-agent/refs/heads/main/docs/image-task-confirm.png)
251
288
 
289
+ > **Note:** Before using the `%%bot` command, you must ensure that the current notebook has been saved, otherwise the agent will not be able to read the full context of the notebook. Suggested to enable the notebook editor's automatic save function.
290
+
252
291
  For more details, please refer to [example notebook](https://github.com/viewstar000/jupyter-agent/blob/main/examples/data_loader.ipynb)
253
292
 
254
293
  ## Contributing
@@ -1,8 +1,9 @@
1
1
  jupyter_agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  jupyter_agent/bot_chat.py,sha256=RkaT-Cpkp3G24F1AtbcgqjKo-LBxwMNAYHVitaPZNOg,8834
3
3
  jupyter_agent/bot_contexts.py,sha256=eFezr7ImokMz33jJJMlLLQspQBYnuE6Bc2bE1wLbXWU,19123
4
- jupyter_agent/bot_magics.py,sha256=PQib9GoefCAkh2Hy1InfdoiFo-KE-9wJ4qKEj_k9zO4,6570
5
- jupyter_agent/bot_outputs.py,sha256=7_cbOBaGdoYMR6qu43nZEsB9KY6wp4zA0Dh7Jq7LMTg,14065
4
+ jupyter_agent/bot_evaluation.py,sha256=tTGUxZ5Gf-A_aFj1HZ8_rjwXpF5X4cv_YVUyOybJShc,8462
5
+ jupyter_agent/bot_magics.py,sha256=1W52-HVSixsOR-svFv_iPrllNxdgx2mEHMQ4op0UW1o,8204
6
+ jupyter_agent/bot_outputs.py,sha256=aDowlURSt9AZnlQdwQp5Gmd5fSRzY910B6A0h-gvmF0,15953
6
7
  jupyter_agent/utils.py,sha256=jbvDtVK6MfGaf5ZLoam_sq9R5TMriEG4HbMF0bHHDes,4387
7
8
  jupyter_agent/bot_agents/__init__.py,sha256=IdlBlvfaDUfp3qhkNuwUVV_CdplafZsgLezLVkZCREw,1323
8
9
  jupyter_agent/bot_agents/base.py,sha256=50XtKBVRj83zALGgw4klGLZkUlqHNhs1WIX5av9bIm4,10893
@@ -21,13 +22,14 @@ jupyter_agent/bot_agents/task_summarier.py,sha256=bvYEKW_NWRwe-kNNxR7uhJTMKMJXSy
21
22
  jupyter_agent/bot_agents/task_verifier.py,sha256=9Tlyb7hP7tBHMrh5XkRD30mYLodNum33X6v2snjm0QI,2478
22
23
  jupyter_agent/bot_agents/task_verify_summarier.py,sha256=mhpqgcBPOur0TtG8rYUT-BCAYgAiJxDgXVnCAE5Cucs,4963
23
24
  jupyter_agent/bot_flows/__init__.py,sha256=vbb3GJLu6aZdJ2ox4eaHn5cg0d4WQM6zmhIbMAlHIFo,488
24
- jupyter_agent/bot_flows/base.py,sha256=o94y9H8E9FCGz7srdyJDl7UcAj7RCEJCBjtSSREvcWY,8437
25
- jupyter_agent/bot_flows/master_planner.py,sha256=2MvAMikMsBIarTL4QNYTjAs70bxo7VAV0DbHus325Es,430
25
+ jupyter_agent/bot_flows/base.py,sha256=V8JEkjf-gmkuz0X4P8-ziHapeWPN2EEoXFTk5g8AK1Y,10210
26
+ jupyter_agent/bot_flows/master_planner.py,sha256=rmLXrg457TY91FVjOKjJOR8X7gAqP18Sl0O0c5I4pAY,551
26
27
  jupyter_agent/bot_flows/task_executor_v1.py,sha256=WzTfoOTjBpk1emvpiL3yeiudKdDf6EpVdJIugxxbqM4,2975
27
28
  jupyter_agent/bot_flows/task_executor_v2.py,sha256=IyfxhzA4TljNd6iqqUYxxzB63r9lxfe1Zbu177hytRs,2949
28
- jupyter_agent/bot_flows/task_executor_v3.py,sha256=NwJy7iAo2U3rl5iQYpACWVAkIA5DRMDdx0JadheRMMQ,3198
29
- jupyter_agent-2025.6.102.dist-info/licenses/LICENSE,sha256=nWMmSIg7OepTIDX_OPP0-T9ImeCBBoog7eJxm5awtcM,1068
30
- jupyter_agent-2025.6.102.dist-info/METADATA,sha256=ghJ-4ompPy8WtYEam-O_jHLQZQF55wEgeKXrwBwXoIQ,8647
31
- jupyter_agent-2025.6.102.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
32
- jupyter_agent-2025.6.102.dist-info/top_level.txt,sha256=c3USTBZ7DZGuvLKlEW-QfGIx0tzn98iCEn3bpdYnDtE,14
33
- jupyter_agent-2025.6.102.dist-info/RECORD,,
29
+ jupyter_agent/bot_flows/task_executor_v3.py,sha256=Argp0T-bNBRHpjpHhFuNKclOtN2O6DJ28rY7F0GUQrI,3527
30
+ jupyter_agent-2025.6.104.dist-info/licenses/LICENSE,sha256=nWMmSIg7OepTIDX_OPP0-T9ImeCBBoog7eJxm5awtcM,1068
31
+ jupyter_agent-2025.6.104.dist-info/METADATA,sha256=cxJPbyRvUTMJgf01snK1YIvgh87ImLWNKRg3z7RoMi0,10022
32
+ jupyter_agent-2025.6.104.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
33
+ jupyter_agent-2025.6.104.dist-info/entry_points.txt,sha256=063AB86wSrC_V-iiEEqxTlR4uz-T7VH_YagIpmKFQC0,63
34
+ jupyter_agent-2025.6.104.dist-info/top_level.txt,sha256=c3USTBZ7DZGuvLKlEW-QfGIx0tzn98iCEn3bpdYnDtE,14
35
+ jupyter_agent-2025.6.104.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ bot_eval = jupyter_agent.bot_evaluation:main