PyPI - jupyter-agent - Versions diffs - 2025.6.102__py3-none-any.whl → 2025.6.104__py3-none-any.whl - Mend

jupyter-agent 2025.6.102py3-none-any.whl → 2025.6.104py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

jupyter_agent/bot_evaluation.py ADDED Viewed

@@ -0,0 +1,206 @@
+"""
+Copyright (c) 2025 viewstar000
+This software is released under the MIT License.
+https://opensource.org/licenses/MIT
+"""
+import os
+import time
+import json
+import argparse
+import nbformat
+from pathlib import Path
+from nbclient.client import NotebookClient
+from .bot_outputs import NotebookEvalutionRecord
+def run_notebook(
+    input_path: str | Path,
+    output_path: str | Path = "",
+    inplace: bool = False,
+    timeout: int = -1,
+    startup_timeout: int = 60,
+    allow_errors: bool = False,
+    kernel_name: str = "",
+    skip_cells_with_tag: str = "skip-execution",
+    evaluation_path: str | Path = "",
+) -> None:
+    """Run a notebook by path."""
+    input_path = Path(input_path).with_suffix(".ipynb")
+    assert not (inplace and output_path), "Cannot specify both 'inplace' and 'output_path'"
+    if inplace:
+        output_path = input_path
+    elif not output_path:
+        output_path = input_path.parent.joinpath(f"{input_path.with_suffix('').name}_executed.ipynb")
+    output_path = os.path.abspath(output_path)
+    # Open up the notebook we're going to run
+    with input_path.open() as f:
+        print("Running notebook:", input_path)
+        nb = nbformat.read(f, as_version=4)
+    start_time = 0
+    is_global_finished = False
+    def save_notebook(**kwargs):
+        """Save the executed notebook to the specified output path."""
+        nonlocal is_global_finished
+        if kwargs:
+            cell_idx = kwargs.get("cell_index", 0)
+            cell_type = kwargs.get("cell", {}).get("cell_type")
+            cell_id = kwargs.get("cell", {}).get("id")
+            cell_exec_count = kwargs.get("cell", {}).get("execution_count")
+            cell_meta = kwargs.get("cell", {}).get("metadata", {})
+            cell_payloads = kwargs.get("execute_reply", {}).get("content", {}).get("payload", [])
+            cell_outputs = kwargs.get("cell", {}).get("outputs", [])
+            for payload in cell_payloads:
+                if payload.get("source") == "set_next_input" and payload.get("replace") is True:
+                    print(f"CELL[{cell_idx}] Replacing cell with set_next_input payload")
+                    nb.cells[cell_idx].source = payload.get("text", "")
+            cell_agent_data_timestamp = cell_meta.get("jupyter-agent-data-timestamp", 0)
+            output_agent_data_timestamp = cell_agent_data_timestamp
+            for output in cell_outputs:
+                if output["output_type"] == "display_data":
+                    output_meta = output.get("metadata", {})
+                    if (
+                        output_meta.get("jupyter-agent-data-store")
+                        and output_meta.get("jupyter-agent-data-timestamp", 0) > output_agent_data_timestamp
+                        and output_meta.get("jupyter-agent-data", {})
+                    ):
+                        print(f"CELL[{cell_idx}] Found jupyter-agent-data-store outputs, save it to cell metadata")
+                        output_agent_data_timestamp = output_meta.get("jupyter-agent-data-timestamp", 0)
+                        nb.cells[cell_idx].metadata["jupyter-agent-data-store"] = True
+                        nb.cells[cell_idx].metadata["jupyter-agent-data-timestamp"] = output_agent_data_timestamp
+                        if "jupyter-agent-data" not in nb.cells[cell_idx].metadata:
+                            nb.cells[cell_idx].metadata["jupyter-agent-data"] = {}
+                        nb.cells[cell_idx].metadata["jupyter-agent-data"].update(output_meta["jupyter-agent-data"])
+                    for record in output_meta.get("jupyter-agent-evaluation-records", []):
+                        record["notebook_name"] = output_path
+                        if record["eval_type"] == "NOTEBOOK":
+                            record["execution_duration"] = time.time() - start_time
+                            is_global_finished = True
+                            del nb.cells[cell_idx + 1 :]  # Remove all cells after the notebook cell
+                        print(
+                            f"CELL[{cell_idx}] Evaluating record: {record['eval_type']} "
+                            f"duration: {record['execution_duration']:.2f}s "
+                            f"success: {record['is_success']} "
+                            f"correct: {record['correct_score']:.2f}"
+                        )
+                        if evaluation_path:
+                            with open(evaluation_path, "a") as eval_file:
+                                eval_file.write(json.dumps(record) + "\n")
+            print(f"CELL[{cell_idx}] Saving executed {cell_type} cell - {cell_id}: {cell_exec_count}")
+        else:
+            print(f"Saving executed notebook to: {output_path}")
+        nbformat.write(nb, output_path)
+    # Add metadata to the notebook
+    nb.cells.insert(
+        0,
+        nbformat.v4.new_code_cell(
+            source=(
+                f"# Executed notebook: {input_path.name}\n"
+                f"# Output saved to: {output_path}\n\n"
+                f"__evaluation_ipynb_file__ = '{output_path}'\n"
+            ),
+            metadata={"tags": ["CTX_EXCLUDE"]},
+        ),
+    )
+    save_notebook()
+    # Configure nbclient to run the notebook
+    client = NotebookClient(
+        nb,
+        timeout=timeout,
+        startup_timeout=startup_timeout,
+        skip_cells_with_tag=skip_cells_with_tag,
+        allow_errors=allow_errors,
+        kernel_name=kernel_name,
+        resources={"metadata": {"path": input_path.parent.absolute()}},
+        on_cell_executed=save_notebook,
+    )
+    # Run it
+    print("Executing notebook...")
+    start_time = time.time()
+    client.execute()
+    save_notebook()
+    print("Notebook execution completed.")
+    # If the notebook did not finish globally, append an evaluation record
+    if not is_global_finished:
+        print("Notebook execution did not finish globally, appending evaluation records.")
+        record = NotebookEvalutionRecord(
+            notebook_name=output_path,
+            eval_type="NOTEBOOK",
+            execution_duration=time.time() - start_time,
+            is_success=False,
+            correct_score=0.0,
+        )
+        print(
+            f"Global evaluation record: {record.eval_type} "
+            f"duration: {record.execution_duration:.2f}s "
+            f"success: {record.is_success} "
+            f"correct: {record.correct_score:.2f}"
+        )
+        if evaluation_path:
+            with open(evaluation_path, "a") as eval_file:
+                eval_file.write(json.dumps(record.model_dump()) + "\n")
+def main():
+    """Main function to run the notebook execution."""
+    parser = argparse.ArgumentParser(description="Run a Jupyter notebook.")
+    parser.add_argument(
+        "-o", "--output_path", type=str, default="", help="Path to save the executed notebook (default: same as input)"
+    )
+    parser.add_argument(
+        "-i", "--inplace", action="store_true", help="Run the notebook in place (overwrite input file)"
+    )
+    parser.add_argument(
+        "-e",
+        "--evaluation_path",
+        type=str,
+        default="",
+        help="Path to save evaluation records (default: no evaluation records saved)",
+    )
+    parser.add_argument(
+        "--timeout", type=int, default=-1, help="Execution timeout in seconds (default: -1, no timeout)"
+    )
+    parser.add_argument(
+        "--startup_timeout", type=int, default=60, help="Kernel startup timeout in seconds (default: 60)"
+    )
+    parser.add_argument(
+        "--allow_errors", action="store_true", help="Allow errors in the notebook execution (default: False)"
+    )
+    parser.add_argument(
+        "--kernel_name", type=str, default="", help="Kernel name to use for execution (default: use notebook's kernel)"
+    )
+    parser.add_argument(
+        "--skip_cells_with_tag",
+        type=str,
+        default="skip-execution",
+        help="Tag to skip cells with (default: 'skip-execution')",
+    )
+    parser.add_argument("input_path", type=str, help="Path to the input notebook file")
+    args = parser.parse_args()
+    run_notebook(
+        input_path=args.input_path,
+        output_path=args.output_path,
+        inplace=args.inplace,
+        timeout=args.timeout,
+        startup_timeout=args.startup_timeout,
+        allow_errors=args.allow_errors,
+        kernel_name=args.kernel_name,
+        skip_cells_with_tag=args.skip_cells_with_tag,
+        evaluation_path=args.evaluation_path,
+    )
+if __name__ == "__main__":
+    main()

jupyter_agent/bot_flows/base.py CHANGED Viewed

@@ -5,6 +5,7 @@ This software is released under the MIT License.
 https://opensource.org/licenses/MIT
 """
+import time
 import traceback
 from pydantic import BaseModel
@@ -12,11 +13,14 @@ from enum import Enum
 from typing import List, Dict, Optional, Type
 from IPython.display import Markdown
 from ..bot_agents.base import BaseAgent
-from ..bot_outputs import _D, _I, _W, _E, _F, _M, _B, set_stage, flush_output
+from ..bot_outputs import _D, _I, _W, _E, _F, _M, _B
+from ..bot_outputs import set_stage, flush_output, output_evaluation
+from ..bot_outputs import FlowEvalutionRecord, StageEvalutionRecord, NotebookEvalutionRecord
 TASK_AGENT_STATE_ERROR = "_AGENT_STATE_ERROR_32534526_"
 TASK_STAGE_START = "start"
 TASK_STAGE_COMPLETED = "completed"
+TASK_STAGE_GLOBAL_FINISHED = "global_finished"
 class TaskAction(str, Enum):
@@ -47,7 +51,7 @@ class BaseTaskFlow:
     STAGE_TRANSITIONS: List[StageTransition] = []
     START_STAGE = TASK_STAGE_START
-    STOP_STAGES = [TASK_STAGE_COMPLETED]
+    STOP_STAGES = [TASK_STAGE_COMPLETED, TASK_STAGE_GLOBAL_FINISHED]
     def __init__(self, notebook_context, agent_factory):
         self.notebook_context = notebook_context
@@ -146,11 +150,15 @@ class BaseTaskFlow:
         ns = self._get_next_stage_trans(stage, state, action)
         return ns.stage
-    def __call__(self, stage, max_tries=3, stage_continue=True, stage_confirm=True):
+    def __call__(self, stage, max_tries=5, stage_continue=True, stage_confirm=True):
         n_tries = 0
+        flow_duration = 0.0
+        stage_count = 0
+        # Initialize the task stage
         stage = stage or self.START_STAGE
         while n_tries <= max_tries:
+            stage_st = time.time()
             try:
                 stage_name = stage.value if isinstance(stage, Enum) else stage
                 stage_name = stage_name.replace(".", "-").capitalize()
@@ -163,6 +171,19 @@ class BaseTaskFlow:
                 _M(f"```python\n{traceback.format_exc()}\n```")
                 state = TASK_AGENT_STATE_ERROR
                 failed = True
+            stage_count += 1
+            stage_duration = time.time() - stage_st
+            flow_duration += stage_duration
+            _M(f"Stage `{stage}` completed in {stage_duration:.2f} seconds with state `{state}` and failed `{failed}`")
+            output_evaluation(
+                StageEvalutionRecord(
+                    cell_index=self.task.cell_idx,
+                    flow=type(self).__name__,
+                    stage=str(stage),
+                    execution_duration=stage_duration,
+                    is_success=not failed,
+                )
+            )
             if state != TASK_AGENT_STATE_ERROR:
                 # Agent did not fail, check if we have reached the final stage
@@ -171,14 +192,18 @@ class BaseTaskFlow:
                 self.task.update_cell()
                 if next_stage in self.STOP_STAGES:
                     _M(f"Task execution **Stopped** at stage `{next_stage}`")
+                    stage = next_stage
                     break
             if failed:
                 # Agent failed
                 n_tries += 1
+                if n_tries > max_tries:
+                    _M(f"**Max flow tries reached** during task execution stage `{stage}`, **Stop!**")
+                    break
-            if failed or stage_confirm:
-                # Agent failed or we need to confirm
+            if stage_confirm:
+                # We need to confirm
                 message = self.get_prompt_message(stage, state, failed)
                 _M("**Confirm**: " + message)
                 flush_output()
@@ -188,15 +213,13 @@ class BaseTaskFlow:
                 self.task.update_cell()
                 if action == TaskAction.STOP:
                     _M(f"Task execution **Stopped**, and set next stage to `{next_stage}`")
-                    break
-                elif n_tries > max_tries:
-                    _M(f"**Max tries reached** during task execution stage `{stage}`, **Stop!**")
+                    stage = next_stage
                     break
                 else:
                     _M(f"**Action**: `{action}` transits stage to `{next_stage}`")
                     stage = next_stage
             else:
-                # Agent succeeded, transit to the next stage without confirmation
+                # transit to the next stage without confirmation
                 next_stage = self.get_next_stage(stage, state, TaskAction.CONTINUE)
                 self.task.agent_stage = next_stage
                 self.task.update_cell()
@@ -205,5 +228,21 @@ class BaseTaskFlow:
             if not stage_continue:
                 break
+        # Finalize the task execution
+        stage_name = stage.value if isinstance(stage, Enum) else stage
+        if stage_name == TASK_STAGE_GLOBAL_FINISHED:
+            _M("Task execution **finished** globally.")
+            output_evaluation(NotebookEvalutionRecord(cell_index=self.task.cell_idx, is_success=True))
+        elif stage_name == TASK_STAGE_COMPLETED:
+            _M(f"Task execution **completed** in {flow_duration:.2f} seconds with {stage_count} stages.")
+            output_evaluation(
+                FlowEvalutionRecord(
+                    cell_index=self.task.cell_idx,
+                    flow=type(self).__name__,
+                    stage_count=stage_count,
+                    execution_duration=flow_duration,
+                    is_success=True,
+                )
+            )
         flush_output()
         return stage

jupyter_agent/bot_flows/master_planner.py CHANGED Viewed

@@ -11,6 +11,7 @@ from .base import BaseTaskFlow, StageTransition, TASK_STAGE_START, TASK_STAGE_CO
 class MasterPlannerFlow(BaseTaskFlow):
     STAGE_TRANSITIONS = [
-        StageTransition(stage=TASK_STAGE_START, agent="MasterPlannerAgent", next_stage=TASK_STAGE_START)
+        StageTransition(stage=TASK_STAGE_START, agent="MasterPlannerAgent", next_stage=TASK_STAGE_COMPLETED),
+        StageTransition(stage=TASK_STAGE_COMPLETED, agent="OutputTaskResult", next_stage=TASK_STAGE_COMPLETED),
     ]
-    STOP_STAGES = [TASK_STAGE_START]
+    STOP_STAGES = [TASK_STAGE_COMPLETED]

jupyter_agent/bot_flows/task_executor_v3.py CHANGED Viewed

@@ -6,7 +6,14 @@ https://opensource.org/licenses/MIT
 """
 from enum import Enum
-from .base import BaseTaskFlow, StageTransition, StageNext, TaskAction
+from .base import (
+    BaseTaskFlow,
+    StageTransition,
+    StageNext,
+    TaskAction,
+    TASK_STAGE_COMPLETED,
+    TASK_STAGE_GLOBAL_FINISHED,
+)
 from ..bot_agents import (
     TaskPlannerAgentV3,
     TaskCodingAgent,
@@ -27,14 +34,15 @@ class TaskStage(str, Enum):
     DEBUGGING = "debugging"
     REASONING = "reasoning"
     SUMMARY = "summary"
-    COMPLETED = "completed"
     OUTPUT_RESULT = "output_result"
+    COMPLETED = TASK_STAGE_COMPLETED
+    GLOBAL_FINISHED = TASK_STAGE_GLOBAL_FINISHED
 class TaskExecutorFlowV3(BaseTaskFlow):
     START_STAGE = TaskStage.PLANNING
-    STOP_STAGES = [TaskStage.COMPLETED, TaskStage.PLANNING_PAUSED]
+    STOP_STAGES = [TaskStage.COMPLETED, TaskStage.PLANNING_PAUSED, TaskStage.GLOBAL_FINISHED]
     STAGE_TRANSITIONS = [
         StageTransition[TaskStage, TaskPlannerState](
             stage=TaskStage.PLANNING,
@@ -43,7 +51,7 @@ class TaskExecutorFlowV3(BaseTaskFlow):
                 TaskPlannerState.CODING_PLANNED: TaskStage.CODING,
                 TaskPlannerState.REASONING_PLANNED: TaskStage.REASONING,
                 TaskPlannerState.REQUEST_INFO: TaskStage.PLANNING_PAUSED,
-                TaskPlannerState.GLOBAL_FINISHED: TaskStage.COMPLETED,
+                TaskPlannerState.GLOBAL_FINISHED: TaskStage.GLOBAL_FINISHED,
             },
         ),
         StageTransition[TaskStage, TaskPlannerState](
@@ -86,4 +94,7 @@ class TaskExecutorFlowV3(BaseTaskFlow):
         StageTransition[TaskStage, None](
             stage=TaskStage.OUTPUT_RESULT, agent=OutputTaskResult, next_stage=TaskStage.COMPLETED
         ),
+        StageTransition[TaskStage, None](
+            stage=TaskStage.GLOBAL_FINISHED, agent=OutputTaskResult, next_stage=TaskStage.GLOBAL_FINISHED
+        ),
     ]

jupyter_agent/bot_magics.py CHANGED Viewed

@@ -43,9 +43,12 @@ class BotMagics(Magics, Configurable):
     display_message = Bool(False, help="Display chat message").tag(config=True)
     display_think = Bool(True, help="Display chatthink response").tag(config=True)
     display_response = Bool(False, help="Display chat full response").tag(config=True)
+    support_save_meta = Bool(False, help="Support save metadata to cell").tag(config=True)
     notebook_path = Unicode(None, allow_none=True, help="Path to Notebook file").tag(config=True)
     default_task_flow = Unicode("v3", allow_none=True, help="Default task flow").tag(config=True)
-    support_save_meta = Bool(False, help="Support save metadata to cell").tag(config=True)
+    default_max_tries = Int(3, help="Default max tries for task execution").tag(config=True)
+    default_step_mode = Bool(False, help="Default step mode for task execution").tag(config=True)
+    default_auto_confirm = Bool(False, help="Default auto confirm for task execution").tag(config=True)
     def parse_args(self, line):
         """解析命令行参数"""
@@ -54,17 +57,52 @@ class BotMagics(Magics, Configurable):
         parser.add_argument("-P", "--planning", action="store_true", default=False, help="Run in planning mode")
         parser.add_argument("-s", "--stage", type=str, default=None, help="Task stage")
         parser.add_argument("-f", "--flow", type=str, default=self.default_task_flow, help="Flow name")
-        parser.add_argument("-m", "--max-tries", type=int, default=3, help="Max tries")
-        parser.add_argument("-S", "--step-mode", action="store_true", default=False, help="Run in single step mode")
-        parser.add_argument("-Y", "--auto-confirm", action="store_true", default=False, help="Run without confirm")
+        parser.add_argument("-m", "--max-tries", type=int, default=self.default_max_tries, help="Max tries")
+        parser.add_argument(
+            "-S",
+            "--step-mode",
+            action="store_true",
+            default=self.default_step_mode,
+            help="Run in single step mode",
+        )
+        parser.add_argument(
+            "-Y",
+            "--auto-confirm",
+            action="store_true",
+            default=self.default_auto_confirm,
+            help="Run without confirm",
+        )
         options, _ = parser.parse_known_args(shlex.split(line.strip()))
         return options
+    def ensure_notebook_path(self):
+        if self.notebook_path:
+            return self.notebook_path
+        result = self.shell and self.shell.run_cell("globals().get('__vsc_ipynb_file__')")
+        if result and result.success and result.result:
+            self.notebook_path = result.result
+            return self.notebook_path
+        try:
+            self.notebook_path = str(ipynbname.path())
+            return self.notebook_path
+        except Exception as e:
+            _F(f"Failed to get notebook path: {e}")
+            return None
     @cell_magic
     def bot(self, line, cell):
         """Jupyter cell magic: %%bot"""
         try:
+            if not self.ensure_notebook_path():
+                _O(
+                    Markdown(
+                        "The notebook path is **empty**, we can't do anything.\n\n"
+                        "Please set the notebook path in the configuration, and **RERUN** the cell again.\n\n"
+                        'For example: `%config BotMagics.notebook_path = globals()["__vsc_ipynb_file__"]`'
+                    )
+                )
+                return
             AgentCellContext.SUPPORT_SAVE_META = self.support_save_meta
             reset_output(stage="Logging", logging_level=self.logging_level)
             _I("Cell magic %%bot executing ...")
@@ -85,8 +123,6 @@ class BotMagics(Magics, Configurable):
             options = self.parse_args(line)
             _D(f"Cell magic called with options: {options}")
             set_logging_level(options.logging_level)
-            self.notebook_path = self.notebook_path or ipynbname.path()
-            _D(f"Cell magic called with notebook path: {self.notebook_path}")
             nb_context = NotebookContext(line, cell, notebook_path=self.notebook_path)
             agent_factory = AgentFactory(
                 nb_context,
@@ -95,16 +131,28 @@ class BotMagics(Magics, Configurable):
                 display_response=self.display_response,
             )
             agent_factory.config_model(
-                AgentModelType.DEFAULT, self.default_api_url, self.default_api_key, self.default_model_name
+                AgentModelType.DEFAULT,
+                self.default_api_url,
+                self.default_api_key,
+                self.default_model_name,
             )
             agent_factory.config_model(
-                AgentModelType.PLANNER, self.planner_api_url, self.planner_api_key, self.planner_model_name
+                AgentModelType.PLANNER,
+                self.planner_api_url,
+                self.planner_api_key,
+                self.planner_model_name,
             )
             agent_factory.config_model(
-                AgentModelType.CODING, self.coding_api_url, self.coding_api_key, self.coding_model_name
+                AgentModelType.CODING,
+                self.coding_api_url,
+                self.coding_api_key,
+                self.coding_model_name,
             )
             agent_factory.config_model(
-                AgentModelType.REASONING, self.reasoning_api_url, self.reasoning_api_key, self.reasoning_model_name
+                AgentModelType.REASONING,
+                self.reasoning_api_url,
+                self.reasoning_api_key,
+                self.reasoning_model_name,
             )
             if options.planning:
                 flow = MasterPlannerFlow(nb_context, agent_factory)
@@ -117,7 +165,12 @@ class BotMagics(Magics, Configurable):
                     flow = TaskExecutorFlowV3(nb_context, agent_factory)
                 else:
                     raise ValueError(f"Unknown flow: {options.flow}")
-            flow(options.stage, options.max_tries, not options.step_mode, not options.auto_confirm)
+            flow(
+                options.stage,
+                options.max_tries,
+                not options.step_mode,
+                not options.auto_confirm,
+            )
         except Exception as e:
             traceback.print_exc()

jupyter_agent/bot_outputs.py CHANGED Viewed

@@ -11,6 +11,8 @@ import datetime
 import jinja2
 from enum import Enum
+from typing import Optional, Dict, Any
+from pydantic import BaseModel, Field
 from IPython.display import display, Markdown
 from .utils import no_indent, no_wrap
@@ -171,6 +173,38 @@ LOGGING_LEVELS = {
 }
+class BaseEvalutionRecord(BaseModel):
+    timestamp: float = 0
+    notebook_name: str = ""
+    eval_type: str = "BASE"
+    cell_index: int = -1
+    execution_duration: float = 0.0
+    is_success: bool = False
+    correct_score: float = 0.0
+class StageEvalutionRecord(BaseEvalutionRecord):
+    eval_type: str = "STAGE"
+    flow: str = ""
+    stage: str = ""
+    coding_score: float = 0.0
+    important_score: float = 0.0
+    user_supply_score: float = 0.0
+class FlowEvalutionRecord(BaseEvalutionRecord):
+    eval_type: str = "FLOW"
+    flow: str = ""
+    stage_count: int = 0
+    planning_score: float = 0.0
+class NotebookEvalutionRecord(BaseEvalutionRecord):
+    eval_type: str = "NOTEBOOK"
+    flow_count: int = 0
+    planning_score: float = 0.0
 class AgentOutput:
     """
     AgentOutput 是一个用于在 Jupyter Notebook 中显示 Agent 输出的类。
@@ -193,6 +227,7 @@ class AgentOutput:
         self._agent_data_timestamp = None
         self._agent_data = {}
         self._logging_records = []
+        self._evaluation_records = []
     @property
     def content(self):
@@ -226,6 +261,8 @@ class AgentOutput:
                     "jupyter-agent-data": self._agent_data,
                 }
             )
+        if self._evaluation_records:
+            metadata["jupyter-agent-evaluation-records"] = [record.model_dump() for record in self._evaluation_records]
         return metadata
     def display(self, stage=None, force=False, wait=True):
@@ -320,6 +357,20 @@ class AgentOutput:
             )
         self.display(force=False, wait=False)
+    def log_evaluation(self, record: BaseEvalutionRecord):
+        assert isinstance(
+            record, BaseEvalutionRecord
+        ), "record must be an instance of BaseEvalutionRecord or its subclass"
+        if record.timestamp == 0:
+            record.timestamp = time.time()
+        self._evaluation_records.append(record)
+        self.log(
+            f"Evaluation: {record.eval_type}[{record.cell_index}] duration: {record.execution_duration:.2f}s "
+            f"success: {record.is_success} correct: {record.correct_score:.2f}",
+            level="INFO",
+        )
+        self.display(force=False, wait=False)
 __agent_output = None
@@ -363,6 +414,14 @@ def output_agent_data(**kwargs):
     get_output().output_agent_data(**kwargs)
+def output_evaluation(record: BaseEvalutionRecord):
+    """
+    输出评估记录到 AgentOutput 中。
+    :param record: 评估记录对象，必须是 BaseEvalutionRecord 的子类。
+    """
+    get_output().log_evaluation(record)
 def clear_output(stage=None, clear_metadata=False):
     get_output().clear(stage, clear_metadata)

{jupyter_agent-2025.6.102.dist-info → jupyter_agent-2025.6.104.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: jupyter-agent
-Version: 2025.6.102
+Version: 2025.6.104
 Summary: 调用LLM实现Jupyter代码的自动生成、执行、调试等功能
 Author: viewstar000
 License: MIT
@@ -10,14 +10,15 @@ Classifier: Operating System :: OS Independent
 Requires-Python: >=3.12
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: ipynbname
 Requires-Dist: ipython
+Requires-Dist: jinja2
+Requires-Dist: nbclient
 Requires-Dist: nbformat
-Requires-Dist: ipynbname
 Requires-Dist: openai
-Requires-Dist: traitlets
-Requires-Dist: pyyaml
-Requires-Dist: jinja2
 Requires-Dist: pydantic
+Requires-Dist: pyyaml
+Requires-Dist: traitlets
 Dynamic: license-file
 # jupyter-agent
@@ -75,6 +76,8 @@ pip install /path/to/jupyter-agent/dist/jupyter_agent-xxxx-py3-none-any.whl
 ### 全局配置
+基础配置
 ```python
 # 加载扩展的Magic命令
 %load_ext jupyter_agent.bot_magics
@@ -84,12 +87,26 @@ pip install /path/to/jupyter-agent/dist/jupyter_agent-xxxx-py3-none-any.whl
 %config BotMagics.default_api_key = 'API_KEY'
 %config BotMagics.default_model_name = 'qwen3-30b-a3b'
 %config BotMagics.coding_model_name = 'devstral-small-2505-mlx'
+```
-# 设置当前Notebook的路径，由于vscode中运行里无法自动获取到该路径，需要手工指定
+扩展配置
+```python
+# 设置当前Notebook的路径，当无法自动获取时需要手工指定，以Vscode中的Notebook为例
 %config BotMagics.notebook_path = globals()["__vsc_ipynb_file__"]
 # 设置是否保存任务数据到Metadata，只有Vscode中安装了jupyter-agent-extension后才支持
 %config BotMagics.support_save_meta = True
+# 设置日志级别，可选值为DEBUG、INFO、WARN、ERROR、FATAL，默认为INFO
+%config BotMagics.logging_level = 'DEBUG'
+# 设置是否显示思考过程，默认为True
+%config BotMagics.display_think = True
+# 设置是否显示发送给出LLM的消息和LLM的回答，默认为False
+%config BotMagics.display_message = True
+%config BotMagics.display_response = True
 ```
 ### 全局任务规划
@@ -118,7 +135,7 @@ pip install /path/to/jupyter-agent/dist/jupyter_agent-xxxx-py3-none-any.whl
 ![docs/image-task-empty.png](https://raw.githubusercontent.com/viewstar000/jupyter-agent/refs/heads/main/docs/image-task-empty.png)
-> 注：由于cell magic命令无法直接定位当前cell，需要通过cell的内容进行匹配，因此首次执行%%bot命令时，需要在cell中额外添加一些随机字符
+> **注：**由于cell magic命令无法直接定位当前cell，需要通过cell的内容进行匹配，因此首次执行%%bot命令时，需要在cell中额外添加一些随机字符
 接下来工具会调用相应的agent自动生成并执行相应步骤的代码，如下图：
@@ -130,6 +147,8 @@ pip install /path/to/jupyter-agent/dist/jupyter_agent-xxxx-py3-none-any.whl
 ![docs/image-task-confirm.png](https://raw.githubusercontent.com/viewstar000/jupyter-agent/refs/heads/main/docs/image-task-confirm.png)
+> **注：**在执行`%%bot`命令前，必须确保当前Notebook已保存，否则Agent无法读取到完整的Notebook上下文。建议开启Notebook编辑器自动保存功能。
 更详细用法可参考[示例Notebook](https://github.com/viewstar000/jupyter-agent/blob/main/examples/data_loader.ipynb)
 ## 贡献
@@ -195,6 +214,10 @@ pip install /path/to/jupyter-agent/dist/jupyter_agent-xxxx-py3-none-any.whl
 After installing `jupyter-agent` and `jupyter-agent-extension`, you can use `%%bot` magic command to work on task planning, code generation and execution.
+### Configuration
+Basic Configuration:
 First create or open a notebook in Vscode, create a new cell, enter and execute the following commands:
 ```python
@@ -206,12 +229,26 @@ First create or open a notebook in Vscode, create a new cell, enter and execute
 %config BotMagics.default_api_key = 'API_KEY'
 %config BotMagics.default_model_name = 'qwen3-30b-a3b'
 %config BotMagics.coding_model_name = 'devstral-small-2505-mlx'
+```
-# Set the path of the current Notebook, which cannot be automatically obtained in vscode when running, you need to manually specify
+Advanced Configuration:
+```python
+# Set the current notebook path, when it is not automatically obtained, it needs to be manually specified, for example, in Vscode Notebook
 %config BotMagics.notebook_path = globals()["__vsc_ipynb_file__"]
 # Set whether to save task data to Metadata, only Vscode installed with jupyter-agent-extension supports
 %config BotMagics.support_save_meta = True
+# Set the log level, available values are DEBUG、INFO、WARN、ERROR、FATAL, default is INFO
+%config BotMagics.logging_level = 'DEBUG'
+# Set whether to display thinking process, default is True
+%config BotMagics.display_think = True
+# Set whether to display messages sent to LLM and LLM responses, default is False
+%config BotMagics.display_message = True
+%config BotMagics.display_response = True
 ```
 Now, you can use the `%%bot` command to work on task rules and code generation.
@@ -249,6 +286,8 @@ After generating code for a subtask, the tool will call the corresponding agent
 ![docs/image-task-confirm.png](https://raw.githubusercontent.com/viewstar000/jupyter-agent/refs/heads/main/docs/image-task-confirm.png)
+> **Note:** Before using the `%%bot` command, you must ensure that the current notebook has been saved, otherwise the agent will not be able to read the full context of the notebook. Suggested to enable the notebook editor's automatic save function.
 For more details, please refer to [example notebook](https://github.com/viewstar000/jupyter-agent/blob/main/examples/data_loader.ipynb)
 ## Contributing

{jupyter_agent-2025.6.102.dist-info → jupyter_agent-2025.6.104.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,9 @@
 jupyter_agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 jupyter_agent/bot_chat.py,sha256=RkaT-Cpkp3G24F1AtbcgqjKo-LBxwMNAYHVitaPZNOg,8834
 jupyter_agent/bot_contexts.py,sha256=eFezr7ImokMz33jJJMlLLQspQBYnuE6Bc2bE1wLbXWU,19123
-jupyter_agent/bot_magics.py,sha256=PQib9GoefCAkh2Hy1InfdoiFo-KE-9wJ4qKEj_k9zO4,6570
-jupyter_agent/bot_outputs.py,sha256=7_cbOBaGdoYMR6qu43nZEsB9KY6wp4zA0Dh7Jq7LMTg,14065
+jupyter_agent/bot_evaluation.py,sha256=tTGUxZ5Gf-A_aFj1HZ8_rjwXpF5X4cv_YVUyOybJShc,8462
+jupyter_agent/bot_magics.py,sha256=1W52-HVSixsOR-svFv_iPrllNxdgx2mEHMQ4op0UW1o,8204
+jupyter_agent/bot_outputs.py,sha256=aDowlURSt9AZnlQdwQp5Gmd5fSRzY910B6A0h-gvmF0,15953
 jupyter_agent/utils.py,sha256=jbvDtVK6MfGaf5ZLoam_sq9R5TMriEG4HbMF0bHHDes,4387
 jupyter_agent/bot_agents/__init__.py,sha256=IdlBlvfaDUfp3qhkNuwUVV_CdplafZsgLezLVkZCREw,1323
 jupyter_agent/bot_agents/base.py,sha256=50XtKBVRj83zALGgw4klGLZkUlqHNhs1WIX5av9bIm4,10893
@@ -21,13 +22,14 @@ jupyter_agent/bot_agents/task_summarier.py,sha256=bvYEKW_NWRwe-kNNxR7uhJTMKMJXSy
 jupyter_agent/bot_agents/task_verifier.py,sha256=9Tlyb7hP7tBHMrh5XkRD30mYLodNum33X6v2snjm0QI,2478
 jupyter_agent/bot_agents/task_verify_summarier.py,sha256=mhpqgcBPOur0TtG8rYUT-BCAYgAiJxDgXVnCAE5Cucs,4963
 jupyter_agent/bot_flows/__init__.py,sha256=vbb3GJLu6aZdJ2ox4eaHn5cg0d4WQM6zmhIbMAlHIFo,488
-jupyter_agent/bot_flows/base.py,sha256=o94y9H8E9FCGz7srdyJDl7UcAj7RCEJCBjtSSREvcWY,8437
-jupyter_agent/bot_flows/master_planner.py,sha256=2MvAMikMsBIarTL4QNYTjAs70bxo7VAV0DbHus325Es,430
+jupyter_agent/bot_flows/base.py,sha256=V8JEkjf-gmkuz0X4P8-ziHapeWPN2EEoXFTk5g8AK1Y,10210
+jupyter_agent/bot_flows/master_planner.py,sha256=rmLXrg457TY91FVjOKjJOR8X7gAqP18Sl0O0c5I4pAY,551
 jupyter_agent/bot_flows/task_executor_v1.py,sha256=WzTfoOTjBpk1emvpiL3yeiudKdDf6EpVdJIugxxbqM4,2975
 jupyter_agent/bot_flows/task_executor_v2.py,sha256=IyfxhzA4TljNd6iqqUYxxzB63r9lxfe1Zbu177hytRs,2949
-jupyter_agent/bot_flows/task_executor_v3.py,sha256=NwJy7iAo2U3rl5iQYpACWVAkIA5DRMDdx0JadheRMMQ,3198
-jupyter_agent-2025.6.102.dist-info/licenses/LICENSE,sha256=nWMmSIg7OepTIDX_OPP0-T9ImeCBBoog7eJxm5awtcM,1068
-jupyter_agent-2025.6.102.dist-info/METADATA,sha256=ghJ-4ompPy8WtYEam-O_jHLQZQF55wEgeKXrwBwXoIQ,8647
-jupyter_agent-2025.6.102.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-jupyter_agent-2025.6.102.dist-info/top_level.txt,sha256=c3USTBZ7DZGuvLKlEW-QfGIx0tzn98iCEn3bpdYnDtE,14
-jupyter_agent-2025.6.102.dist-info/RECORD,,
+jupyter_agent/bot_flows/task_executor_v3.py,sha256=Argp0T-bNBRHpjpHhFuNKclOtN2O6DJ28rY7F0GUQrI,3527
+jupyter_agent-2025.6.104.dist-info/licenses/LICENSE,sha256=nWMmSIg7OepTIDX_OPP0-T9ImeCBBoog7eJxm5awtcM,1068
+jupyter_agent-2025.6.104.dist-info/METADATA,sha256=cxJPbyRvUTMJgf01snK1YIvgh87ImLWNKRg3z7RoMi0,10022
+jupyter_agent-2025.6.104.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+jupyter_agent-2025.6.104.dist-info/entry_points.txt,sha256=063AB86wSrC_V-iiEEqxTlR4uz-T7VH_YagIpmKFQC0,63
+jupyter_agent-2025.6.104.dist-info/top_level.txt,sha256=c3USTBZ7DZGuvLKlEW-QfGIx0tzn98iCEn3bpdYnDtE,14
+jupyter_agent-2025.6.104.dist-info/RECORD,,

jupyter_agent-2025.6.104.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ bot_eval = jupyter_agent.bot_evaluation:main

{jupyter_agent-2025.6.102.dist-info → jupyter_agent-2025.6.104.dist-info}/WHEEL RENAMED Viewed

File without changes

{jupyter_agent-2025.6.102.dist-info → jupyter_agent-2025.6.104.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{jupyter_agent-2025.6.102.dist-info → jupyter_agent-2025.6.104.dist-info}/top_level.txt RENAMED Viewed

File without changes

jupyter-agent 2025.6.102__py3-none-any.whl → 2025.6.104__py3-none-any.whl

jupyter-agent 2025.6.102py3-none-any.whl → 2025.6.104py3-none-any.whl