PyPI - jupyter-agent - Versions diffs - 2025.6.104__py3-none-any.whl → 2025.7.100__py3-none-any.whl - Mend

jupyter-agent 2025.6.104py3-none-any.whl → 2025.7.100py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

jupyter_agent/bot_actions.py +270 -0
jupyter_agent/bot_agents/__init__.py +0 -42
jupyter_agent/bot_agents/base.py +89 -45
jupyter_agent/bot_agents/master_planner.py +1 -0
jupyter_agent/bot_agents/output_task_result.py +6 -7
jupyter_agent/bot_agents/prepare_next_cell.py +52 -0
jupyter_agent/bot_agents/request_user_supply.py +186 -0
jupyter_agent/bot_agents/task_code_executor.py +3 -2
jupyter_agent/bot_agents/task_planner_v3.py +16 -13
jupyter_agent/bot_agents/task_reasoner.py +3 -2
jupyter_agent/bot_agents/task_structrue_reasoner.py +22 -12
jupyter_agent/bot_agents/task_structrue_summarier.py +22 -18
jupyter_agent/bot_agents/task_summarier.py +3 -2
jupyter_agent/bot_agents/task_verifier.py +2 -1
jupyter_agent/bot_agents/task_verify_summarier.py +6 -6
jupyter_agent/bot_chat.py +2 -2
jupyter_agent/bot_contexts.py +37 -29
jupyter_agent/bot_evaluation.py +262 -143
jupyter_agent/bot_evaluators/__init__.py +0 -0
jupyter_agent/bot_evaluators/base.py +42 -0
jupyter_agent/bot_evaluators/dummy_flow.py +20 -0
jupyter_agent/bot_evaluators/dummy_global.py +20 -0
jupyter_agent/bot_evaluators/dummy_task.py +20 -0
jupyter_agent/bot_evaluators/flow_global_planning.py +88 -0
jupyter_agent/bot_evaluators/flow_task_executor.py +152 -0
jupyter_agent/bot_flows/__init__.py +0 -4
jupyter_agent/bot_flows/base.py +120 -41
jupyter_agent/bot_flows/master_planner.py +15 -4
jupyter_agent/bot_flows/task_executor_v3.py +57 -38
jupyter_agent/bot_magics.py +119 -69
jupyter_agent/bot_outputs.py +37 -43
jupyter_agent/utils.py +20 -31
{jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.7.100.dist-info}/METADATA +56 -4
jupyter_agent-2025.7.100.dist-info/RECORD +41 -0
jupyter_agent/bot_agents/task_planner_v1.py +0 -158
jupyter_agent/bot_agents/task_planner_v2.py +0 -172
jupyter_agent/bot_flows/task_executor_v1.py +0 -86
jupyter_agent/bot_flows/task_executor_v2.py +0 -84
jupyter_agent-2025.6.104.dist-info/RECORD +0 -35
{jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.7.100.dist-info}/WHEEL +0 -0
{jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.7.100.dist-info}/entry_points.txt +0 -0
{jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.7.100.dist-info}/licenses/LICENSE +0 -0
{jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.7.100.dist-info}/top_level.txt +0 -0

jupyter_agent/bot_magics.py CHANGED Viewed

@@ -15,11 +15,14 @@ from IPython.display import Markdown
 from IPython.core.magic import Magics, magics_class, cell_magic
 from traitlets import Unicode, Int, Bool
 from traitlets.config.configurable import Configurable
-from .bot_contexts import NotebookContext, AgentCellContext
-from .bot_agents import AgentFactory
-from .bot_agents.base import AgentModelType
-from .bot_flows import MasterPlannerFlow, TaskExecutorFlowV1, TaskExecutorFlowV2, TaskExecutorFlowV3
-from .bot_outputs import _D, _I, _W, _E, _F, _M, _B, _O, reset_output, set_logging_level
+from .bot_contexts import NotebookContext
+from .bot_agents.base import AgentModelType, AgentFactory
+from .bot_agents.request_user_supply import RequestUserSupplyAgent
+from .bot_evaluators.base import EvaluatorFactory
+from .bot_flows import MasterPlannerFlow, TaskExecutorFlowV3
+from .bot_outputs import _D, _I, _W, _E, _F, _M, _B, _O, reset_output, set_logging_level, flush_output
+from .bot_actions import close_action_dispatcher
+from .utils import get_env_capbilities
 @magics_class
@@ -37,6 +40,9 @@ class BotMagics(Magics, Configurable):
     coding_api_url = Unicode(None, allow_none=True, help="Coding API URL").tag(config=True)
     coding_api_key = Unicode("API_KEY", help="Coding API Key").tag(config=True)
     coding_model_name = Unicode("", help="Coding Model Name").tag(config=True)
+    evaluating_api_url = Unicode(None, allow_none=True, help="Evaluating API URL").tag(config=True)
+    evaluating_api_key = Unicode("API_KEY", help="Evaluating API Key").tag(config=True)
+    evaluating_model_name = Unicode("", help="Evaluating Model Name").tag(config=True)
     reasoning_api_url = Unicode(None, allow_none=True, help="Reasoning API URL").tag(config=True)
     reasoning_api_key = Unicode("API_KEY", help="Reasoning API Key").tag(config=True)
     reasoning_model_name = Unicode("", help="Reasoning Model Name").tag(config=True)
@@ -44,11 +50,16 @@ class BotMagics(Magics, Configurable):
     display_think = Bool(True, help="Display chatthink response").tag(config=True)
     display_response = Bool(False, help="Display chat full response").tag(config=True)
     support_save_meta = Bool(False, help="Support save metadata to cell").tag(config=True)
+    support_user_confirm = Bool(False, help="Support user confirm").tag(config=True)
+    support_user_supply_info = Bool(False, help="Support user supply info").tag(config=True)
+    support_set_cell_content = Bool(False, help="Support set cell content").tag(config=True)
+    enable_evaluating = Bool(False, help="Enable evaluating task").tag(config=True)
+    enable_supply_mocking = Bool(False, help="Enable supply mocking").tag(config=True)
     notebook_path = Unicode(None, allow_none=True, help="Path to Notebook file").tag(config=True)
     default_task_flow = Unicode("v3", allow_none=True, help="Default task flow").tag(config=True)
-    default_max_tries = Int(3, help="Default max tries for task execution").tag(config=True)
+    default_max_tries = Int(2, help="Default max tries for task execution").tag(config=True)
     default_step_mode = Bool(False, help="Default step mode for task execution").tag(config=True)
-    default_auto_confirm = Bool(False, help="Default auto confirm for task execution").tag(config=True)
+    default_auto_confirm = Bool(True, help="Default auto confirm for task execution").tag(config=True)
     def parse_args(self, line):
         """解析命令行参数"""
@@ -59,41 +70,48 @@ class BotMagics(Magics, Configurable):
         parser.add_argument("-f", "--flow", type=str, default=self.default_task_flow, help="Flow name")
         parser.add_argument("-m", "--max-tries", type=int, default=self.default_max_tries, help="Max tries")
         parser.add_argument(
-            "-S",
+            "-t",
             "--step-mode",
             action="store_true",
+            dest="step_mode",
             default=self.default_step_mode,
             help="Run in single step mode",
         )
         parser.add_argument(
-            "-Y",
+            "-T",
+            "--not-step-mode",
+            action="store_false",
+            dest="step_mode",
+            default=self.default_step_mode,
+            help="Run in multi step mode",
+        )
+        parser.add_argument(
+            "-y",
             "--auto-confirm",
             action="store_true",
+            dest="auto_confirm",
             default=self.default_auto_confirm,
             help="Run without confirm",
         )
+        parser.add_argument(
+            "-Y",
+            "--not-auto-confirm",
+            action="store_false",
+            dest="auto_confirm",
+            default=self.default_auto_confirm,
+            help="Run with confirm",
+        )
         options, _ = parser.parse_known_args(shlex.split(line.strip()))
         return options
-    def ensure_notebook_path(self):
-        if self.notebook_path:
-            return self.notebook_path
-        result = self.shell and self.shell.run_cell("globals().get('__vsc_ipynb_file__')")
-        if result and result.success and result.result:
-            self.notebook_path = result.result
-            return self.notebook_path
-        try:
-            self.notebook_path = str(ipynbname.path())
-            return self.notebook_path
-        except Exception as e:
-            _F(f"Failed to get notebook path: {e}")
-            return None
     @cell_magic
     def bot(self, line, cell):
         """Jupyter cell magic: %%bot"""
         try:
+            reset_output(stage="Logging", logging_level=self.logging_level)
+            _I("Cell magic %%bot executing ...")
+            _D(f"Cell magic called with line: {line}")
+            _D(f"Cell magic called with cell: {repr(cell)[:50]} ...")
             if not self.ensure_notebook_path():
                 _O(
                     Markdown(
@@ -103,11 +121,6 @@ class BotMagics(Magics, Configurable):
                     )
                 )
                 return
-            AgentCellContext.SUPPORT_SAVE_META = self.support_save_meta
-            reset_output(stage="Logging", logging_level=self.logging_level)
-            _I("Cell magic %%bot executing ...")
-            _D(f"Cell magic called with line: {line}")
-            _D(f"Cell magic called with cell: {repr(cell)[:50]} ...")
             if not cell.strip():
                 _O(
                     Markdown(
@@ -120,51 +133,23 @@ class BotMagics(Magics, Configurable):
                         "%%bot {}\n\n# {}".format(line.strip(), time.strftime("%Y-%m-%d %H:%M:%S")), replace=True
                     )
                 return
+            get_env_capbilities().save_metadata = self.support_save_meta
+            get_env_capbilities().user_confirm = self.support_user_confirm
+            get_env_capbilities().user_supply_info = self.support_user_supply_info
+            get_env_capbilities().set_cell_content = self.support_set_cell_content
+            RequestUserSupplyAgent.MOCK_USER_SUPPLY = self.enable_supply_mocking
             options = self.parse_args(line)
-            _D(f"Cell magic called with options: {options}")
             set_logging_level(options.logging_level)
+            _D(f"Cell magic called with options: {options}")
             nb_context = NotebookContext(line, cell, notebook_path=self.notebook_path)
-            agent_factory = AgentFactory(
-                nb_context,
-                display_think=self.display_think,
-                display_message=self.display_message,
-                display_response=self.display_response,
-            )
-            agent_factory.config_model(
-                AgentModelType.DEFAULT,
-                self.default_api_url,
-                self.default_api_key,
-                self.default_model_name,
-            )
-            agent_factory.config_model(
-                AgentModelType.PLANNER,
-                self.planner_api_url,
-                self.planner_api_key,
-                self.planner_model_name,
-            )
-            agent_factory.config_model(
-                AgentModelType.CODING,
-                self.coding_api_url,
-                self.coding_api_key,
-                self.coding_model_name,
-            )
-            agent_factory.config_model(
-                AgentModelType.REASONING,
-                self.reasoning_api_url,
-                self.reasoning_api_key,
-                self.reasoning_model_name,
-            )
+            agent_factory = self.get_agent_factory(nb_context)
+            evaluator_factory = self.get_evaluator_factory(nb_context)
             if options.planning:
-                flow = MasterPlannerFlow(nb_context, agent_factory)
+                flow = MasterPlannerFlow(nb_context, agent_factory, evaluator_factory)
+            elif options.flow == "v3":
+                flow = TaskExecutorFlowV3(nb_context, agent_factory, evaluator_factory)
             else:
-                if options.flow == "v1":
-                    flow = TaskExecutorFlowV1(nb_context, agent_factory)
-                elif options.flow == "v2":
-                    flow = TaskExecutorFlowV2(nb_context, agent_factory)
-                elif options.flow == "v3":
-                    flow = TaskExecutorFlowV3(nb_context, agent_factory)
-                else:
-                    raise ValueError(f"Unknown flow: {options.flow}")
+                raise ValueError(f"Unknown flow: {options.flow}")
             flow(
                 options.stage,
                 options.max_tries,
@@ -173,6 +158,71 @@ class BotMagics(Magics, Configurable):
             )
         except Exception as e:
             traceback.print_exc()
+        finally:
+            close_action_dispatcher()
+            flush_output()
+    def ensure_notebook_path(self):
+        if self.notebook_path:
+            return self.notebook_path
+        result = self.shell and self.shell.run_cell(
+            "globals().get('__vsc_ipynb_file__') or globals().get('__evaluation_ipynb_file__')"
+        )
+        if result and result.success and result.result:
+            self.notebook_path = result.result
+            return self.notebook_path
+        try:
+            self.notebook_path = str(ipynbname.path())
+            return self.notebook_path
+        except Exception as e:
+            _F(f"Failed to get notebook path: {e}")
+            return None
+    def get_agent_factory(self, nb_context):
+        agent_factory = AgentFactory(
+            nb_context,
+            display_think=self.display_think,
+            display_message=self.display_message,
+            display_response=self.display_response,
+        )
+        agent_factory.config_model(
+            AgentModelType.DEFAULT, self.default_api_url, self.default_api_key, self.default_model_name
+        )
+        agent_factory.config_model(
+            AgentModelType.PLANNER, self.planner_api_url, self.planner_api_key, self.planner_model_name
+        )
+        agent_factory.config_model(
+            AgentModelType.CODING, self.coding_api_url, self.coding_api_key, self.coding_model_name
+        )
+        agent_factory.config_model(
+            AgentModelType.EVALUATING, self.evaluating_api_url, self.evaluating_api_key, self.evaluating_model_name
+        )
+        agent_factory.config_model(
+            AgentModelType.REASONING, self.reasoning_api_url, self.reasoning_api_key, self.reasoning_model_name
+        )
+        return agent_factory
+    def get_evaluator_factory(self, nb_context):
+        if self.enable_evaluating:
+            evaluator_factory = EvaluatorFactory(nb_context)
+            evaluator_factory.config_model(
+                AgentModelType.DEFAULT, self.default_api_url, self.default_api_key, self.default_model_name
+            )
+            evaluator_factory.config_model(
+                AgentModelType.PLANNER, self.planner_api_url, self.planner_api_key, self.planner_model_name
+            )
+            evaluator_factory.config_model(
+                AgentModelType.CODING, self.coding_api_url, self.coding_api_key, self.coding_model_name
+            )
+            evaluator_factory.config_model(
+                AgentModelType.EVALUATING, self.evaluating_api_url, self.evaluating_api_key, self.evaluating_model_name
+            )
+            evaluator_factory.config_model(
+                AgentModelType.REASONING, self.reasoning_api_url, self.reasoning_api_key, self.reasoning_model_name
+            )
+        else:
+            evaluator_factory = None
+        return evaluator_factory
 def load_ipython_extension(ipython):

jupyter_agent/bot_outputs.py CHANGED Viewed

@@ -11,9 +11,11 @@ import datetime
 import jinja2
 from enum import Enum
-from typing import Optional, Dict, Any
+from typing import Optional, Dict, List, Tuple, Any, Type
 from pydantic import BaseModel, Field
 from IPython.display import display, Markdown
+from .bot_evaluation import BaseEvaluationRecord
+from .bot_actions import ActionBase
 from .utils import no_indent, no_wrap
 STAGE_SWITCHER_SCRIPT = no_wrap(
@@ -173,38 +175,6 @@ LOGGING_LEVELS = {
 }
-class BaseEvalutionRecord(BaseModel):
-    timestamp: float = 0
-    notebook_name: str = ""
-    eval_type: str = "BASE"
-    cell_index: int = -1
-    execution_duration: float = 0.0
-    is_success: bool = False
-    correct_score: float = 0.0
-class StageEvalutionRecord(BaseEvalutionRecord):
-    eval_type: str = "STAGE"
-    flow: str = ""
-    stage: str = ""
-    coding_score: float = 0.0
-    important_score: float = 0.0
-    user_supply_score: float = 0.0
-class FlowEvalutionRecord(BaseEvalutionRecord):
-    eval_type: str = "FLOW"
-    flow: str = ""
-    stage_count: int = 0
-    planning_score: float = 0.0
-class NotebookEvalutionRecord(BaseEvalutionRecord):
-    eval_type: str = "NOTEBOOK"
-    flow_count: int = 0
-    planning_score: float = 0.0
 class AgentOutput:
     """
     AgentOutput 是一个用于在 Jupyter Notebook 中显示 Agent 输出的类。
@@ -221,6 +191,7 @@ class AgentOutput:
         )
         self.template = self.jinja_env.from_string(AGENT_OUTPUT_TEMPLEATE)
         self.handler = None
+        self._is_dirty = True
         self._latest_display_tm = 0
         self._contents = {}
         self._active_stage = None
@@ -228,6 +199,7 @@ class AgentOutput:
         self._agent_data = {}
         self._logging_records = []
         self._evaluation_records = []
+        self._action_records = []
     @property
     def content(self):
@@ -263,11 +235,16 @@ class AgentOutput:
             )
         if self._evaluation_records:
             metadata["jupyter-agent-evaluation-records"] = [record.model_dump() for record in self._evaluation_records]
+        if self._action_records:
+            metadata["jupyter-agent-action-records"] = [record.model_dump() for record in self._action_records]
         return metadata
     def display(self, stage=None, force=False, wait=True):
-        if stage is not None:
+        if stage is not None and stage != self._active_stage:
             self._active_stage = stage
+            self._is_dirty = True
+        if not self._is_dirty and not force:
+            return
         if not force and time.time() - self._latest_display_tm < 1:
             if wait:
                 time.sleep(1 - (time.time() - self._latest_display_tm))
@@ -278,6 +255,7 @@ class AgentOutput:
         else:
             self.handler.update(Markdown(self.content), metadata=self.metadata)
         self._latest_display_tm = time.time()
+        self._is_dirty = False
     def clear(self, stage=None, clear_metadata=False):
         if stage is None:
@@ -286,6 +264,7 @@ class AgentOutput:
             self._contents[stage] = []
         if clear_metadata:
             self._agent_data = {}
+        self._is_dirty = True
         self.display(force=False, wait=False)
     def output_block(
@@ -305,6 +284,7 @@ class AgentOutput:
                 "code_language": code_language,
             }
         )
+        self._is_dirty = True
         self.display(stage, force=False, wait=False)
     def output_text(self, content, stage=None, code_language="python"):
@@ -320,6 +300,7 @@ class AgentOutput:
             self._contents[stage][-1]["content"] += "\n" + content
         else:
             self._contents[stage].append({"type": "text", "content": content, "code_language": code_language})
+        self._is_dirty = True
         self.display(stage, force=False, wait=False)
     def output_markdown(self, content, stage=None):
@@ -328,12 +309,14 @@ class AgentOutput:
         if stage not in self._contents:
             self._contents[stage] = []
         self._contents[stage].append({"type": "markdown", "content": content})
+        self._is_dirty = True
         self.display(stage, force=False, wait=False)
     def output_agent_data(self, **kwargs):
         self.log(f"output agent data {kwargs}", level="DEBUG")
         self._agent_data.update(kwargs)
-        self._agent_data_timestamp = int(time.time() * 1000)
+        self._agent_data_timestamp = time.time()
+        self._is_dirty = True
         self.display(force=False, wait=False)
     def log(self, msg, level="INFO"):
@@ -355,11 +338,12 @@ class AgentOutput:
                     "content": content,
                 }
             )
+        self._is_dirty = True
         self.display(force=False, wait=False)
-    def log_evaluation(self, record: BaseEvalutionRecord):
+    def log_evaluation(self, record: BaseEvaluationRecord):
         assert isinstance(
-            record, BaseEvalutionRecord
+            record, BaseEvaluationRecord
         ), "record must be an instance of BaseEvalutionRecord or its subclass"
         if record.timestamp == 0:
             record.timestamp = time.time()
@@ -369,6 +353,16 @@ class AgentOutput:
             f"success: {record.is_success} correct: {record.correct_score:.2f}",
             level="INFO",
         )
+        self._is_dirty = True
+        self.display(force=False, wait=False)
+    def log_action(self, record: ActionBase):
+        assert isinstance(record, ActionBase), "record must be an instance of BaseActionRecord or its subclass"
+        if record.timestamp == 0:
+            record.timestamp = time.time()
+        self._action_records.append(record)
+        self.log(f"Action: {record.action} from {record.source}", level="INFO")
+        self._is_dirty = True
         self.display(force=False, wait=False)
@@ -414,14 +408,14 @@ def output_agent_data(**kwargs):
     get_output().output_agent_data(**kwargs)
-def output_evaluation(record: BaseEvalutionRecord):
-    """
-    输出评估记录到 AgentOutput 中。
-    :param record: 评估记录对象，必须是 BaseEvalutionRecord 的子类。
-    """
+def output_evaluation(record: BaseEvaluationRecord):
     get_output().log_evaluation(record)
+def output_action(record: ActionBase):
+    get_output().log_action(record)
 def clear_output(stage=None, clear_metadata=False):
     get_output().clear(stage, clear_metadata)
@@ -534,6 +528,6 @@ _A = output_agent_data
 _L = log
 _D = lambda msg: log(msg, level="DEBUG")
 _I = lambda msg: log(msg, level="INFO")
-_W = lambda msg: log(msg, level="WARNING")
+_W = lambda msg: log(msg, level="WARN")
 _E = lambda msg: log(msg, level="ERROR")
 _F = lambda msg: log(msg, level="FATAL")

jupyter_agent/utils.py CHANGED Viewed

@@ -91,37 +91,6 @@ class TeeOutputCapture(capture_output):
         return CapturedIO(stdout, stderr, outputs)
-class RequestUserPrompt(BaseModel):
-    prompt: str = Field(
-        description="需要用户补充详细信息的Prompt",
-        examples=["请补充与...相关的详细的信息", "请确认...是否...", "请提供..."],
-    )
-    example: Optional[str] = Field(None, description="示例", examples=["..."])
-class UserPromptResponse(BaseModel):
-    prompt: str = Field(description="需要用户补充详细信息的Prompt", examples=["..."])
-    response: str = Field(description="用户补充的详细信息", examples=["..."])
-def request_user_response(prompts: list[RequestUserPrompt]) -> list[UserPromptResponse]:
-    responses = []
-    for prompt in prompts:
-        response = input(f"{prompt.prompt} (例如: {prompt.example})")
-        responses.append(UserPromptResponse(prompt=prompt.prompt, response=response))
-    return responses
-def format_user_prompts(prompts: list[RequestUserPrompt], title="用户补充详细信息") -> str:
-    result = "```markdown\n"
-    result += f"### {title}\n\n"
-    result += "\n".join(
-        [f"- **Issue**: {prompt.prompt} (例如: {prompt.example})\n- **Reply**: " for prompt in prompts]
-    )
-    result += "\n```\n"
-    return result
 def no_indent(text: str) -> str:
     return re.sub(r"^\s+", "", text, flags=re.MULTILINE)
@@ -136,3 +105,23 @@ def no_newline(text: str) -> str:
 def no_space(text: str) -> str:
     return re.sub(r"\s+", "", text, flags=re.MULTILINE)
+class EnvironmentCapbilities(BaseModel):
+    save_metadata: bool = False
+    user_confirm: bool = False
+    user_supply_info: bool = False
+    set_cell_content: bool = False
+__env_capbilities = EnvironmentCapbilities()
+def get_env_capbilities() -> EnvironmentCapbilities:
+    return __env_capbilities
+def set_env_capbilities(env_capbilities: EnvironmentCapbilities):
+    global __env_capbilities
+    __env_capbilities = env_capbilities

{jupyter_agent-2025.6.104.dist-info → jupyter_agent-2025.7.100.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: jupyter-agent
-Version: 2025.6.104
+Version: 2025.7.100
 Summary: 调用LLM实现Jupyter代码的自动生成、执行、调试等功能
 Author: viewstar000
 License: MIT
@@ -10,6 +10,7 @@ Classifier: Operating System :: OS Independent
 Requires-Python: >=3.12
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: bottle
 Requires-Dist: ipynbname
 Requires-Dist: ipython
 Requires-Dist: jinja2
@@ -95,15 +96,26 @@ pip install /path/to/jupyter-agent/dist/jupyter_agent-xxxx-py3-none-any.whl
 # 设置当前Notebook的路径，当无法自动获取时需要手工指定，以Vscode中的Notebook为例
 %config BotMagics.notebook_path = globals()["__vsc_ipynb_file__"]
-# 设置是否保存任务数据到Metadata，只有Vscode中安装了jupyter-agent-extension后才支持
+# 是否默认开启单步模式，每执行一个步骤都退出执行循环，需要用户手动执行下一个步骤，默认为False
+%config BotMagics.default_step_mode = False
+# 是否默认开启自动确认，若关闭自动确认，每执行一个步骤都需要用户手动确认，默认为True
+%config BotMagics.default_auto_confirm = True
+# 设置运行环境是否保存任务数据到Metadata，默认为False，仅在Vscode中安装jupyter-agent-extension后或在评估模式下支持
 %config BotMagics.support_save_meta = True
+# 设置运行环境是否设置单元格内容，默认为False，权在Vscode中安装jupyter-agent-extension后或在评估模式下支持
+%config BotMagics.support_set_cell_content = True
 # 设置日志级别，可选值为DEBUG、INFO、WARN、ERROR、FATAL，默认为INFO
 %config BotMagics.logging_level = 'DEBUG'
+# 开启自动评估功能，默认为False，调用LLM对当前结果进行打分，目前仅实现了对子任务的整体打分
+%config BotMagics.enable_evaluating = True
+# 开启模拟用户补充信息功能，默认为False，调用LLM模拟对Agent的提问进行补充，用于自动评估
+%config BotMagics.enable_supply_mocking = True
 # 设置是否显示思考过程，默认为True
 %config BotMagics.display_think = True
 # 设置是否显示发送给出LLM的消息和LLM的回答，默认为False
 %config BotMagics.display_message = True
 %config BotMagics.display_response = True
@@ -151,6 +163,20 @@ pip install /path/to/jupyter-agent/dist/jupyter_agent-xxxx-py3-none-any.whl
 更详细用法可参考[示例Notebook](https://github.com/viewstar000/jupyter-agent/blob/main/examples/data_loader.ipynb)
+### 评估模式
+工具提供了`bot_eval`命令用于在评估模式下执行notebook。在评估模式下，工具会顺序执行所有有单元格，直到例全局目标完成。
+```bash
+bot_eval [-o output_eval.ipynb] [-e output_eval.jsonl] input.ipynb
+```
+例如
+```bash
+bot_eval examples/data_loader_eval.ipynb
+```
 ## 贡献
 欢迎提交 issue 或 pull request 参与贡献。
@@ -237,12 +263,24 @@ Advanced Configuration:
 # Set the current notebook path, when it is not automatically obtained, it needs to be manually specified, for example, in Vscode Notebook
 %config BotMagics.notebook_path = globals()["__vsc_ipynb_file__"]
-# Set whether to save task data to Metadata, only Vscode installed with jupyter-agent-extension supports
+# Whether to enable single step mode, each step will exit the execution loop, you need to manually execute the next step, the default is False
+%config BotMagics.default_step_mode = False
+# Whether to enable automatic confirmation, if automatic confirmation is closed, each step needs to be confirmed by the user, the default is True
+%config BotMagics.default_auto_confirm = True
+# Set whether to save task data to Metadata, only Vscode installed with jupyter-agent-extension or evaluation mode supports this.
 %config BotMagics.support_save_meta = True
+# Set whether to set cell content, only Vscode installed with jupyter-agent-extension or evaluation mode supports this.
+%config BotMagics.support_set_cell_content = True
 # Set the log level, available values are DEBUG、INFO、WARN、ERROR、FATAL, default is INFO
 %config BotMagics.logging_level = 'DEBUG'
+# Enable automatic evaluation, default is False, call LLM to evaluate the overall result of the subtask
+%config BotMagics.enable_evaluating = True
+# Enable the simulation of user filling in information, default is False, call LLM to simulate the question of the agent to fill in
+%config BotMagics.enable_supply_mocking = True
 # Set whether to display thinking process, default is True
 %config BotMagics.display_think = True
@@ -290,6 +328,20 @@ After generating code for a subtask, the tool will call the corresponding agent
 For more details, please refer to [example notebook](https://github.com/viewstar000/jupyter-agent/blob/main/examples/data_loader.ipynb)
+### Evaluation mode
+Use `bot_eval` command to evaluate the code generated by the agent in evaluation mode. The evaluation mode will execute all cells in order and stop when the global goal is completed.
+```python
+bot_eval [-o output_eval.ipynb] [-e output_eval.jsonl] input.ipynb
+```
+For example
+```bash
+bot_eval examples/data_loader_eval.ipynb
+```
 ## Contributing
 Welcome to submit issues or pull requests to participate in contributions.

jupyter_agent-2025.7.100.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,41 @@
+jupyter_agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+jupyter_agent/bot_actions.py,sha256=Zq9_nfh4SJdMxkjqcTyQzS0RY4RwofaRkGq_4aexO2o,8453
+jupyter_agent/bot_chat.py,sha256=4zjkHtyOabT1bvGO-n4ZTMw0XREU_XDlgfLCI5gpxsw,8834
+jupyter_agent/bot_contexts.py,sha256=gs3hVIj81jasQYiJjdoAloWx8S1Xpa4cXr8XzcefUus,19475
+jupyter_agent/bot_evaluation.py,sha256=t4SH6Gq4BmSyyRMozyQ2623XNGmgtCi9CTNRvOqzuRM,14266
+jupyter_agent/bot_magics.py,sha256=Sh2CGs_esZqaHWDDLKjJSDlUYcI4PdF2aFPcibcf43Y,11027
+jupyter_agent/bot_outputs.py,sha256=QDzReXLqZsU7RAPR4F9JEotxAtIe9YA3ZklCJ9U_jVg,16239
+jupyter_agent/utils.py,sha256=8XKXXZB1EgCwIJEqYJigA8C84FzVTc2xdcF-y5kO3kY,3634
+jupyter_agent/bot_agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+jupyter_agent/bot_agents/base.py,sha256=pAwW_KQZE9LwyxM91umzIlWalgFUKGJOpby8JGfvLQo,12430
+jupyter_agent/bot_agents/master_planner.py,sha256=twDEc0KMCyNkcsD_0nilbPteZqFwUT38QDfeYUhOzzE,1330
+jupyter_agent/bot_agents/output_task_result.py,sha256=4DeAmDzjUI_0yfb26f5sLIwa74aJRzEQXbMRSyYMv1g,761
+jupyter_agent/bot_agents/prepare_next_cell.py,sha256=_4-kYQxAs9PK4a6T5vbsdSixNQ4l-R8qzQNjs8e_09c,1891
+jupyter_agent/bot_agents/request_user_supply.py,sha256=Bkw08hhkUUVnirBijv1mJ0hQW2VpzdXoC3TToQolQos,6106
+jupyter_agent/bot_agents/task_code_executor.py,sha256=V138uj39_lLGuljEDrxzd-jRf1k4nAQkA_gF2_Jhpgw,2243
+jupyter_agent/bot_agents/task_coder.py,sha256=7fXq9nk1yH3F_mJfCMZBktHmxGfgmpuChMQbpEuL0w4,1783
+jupyter_agent/bot_agents/task_debuger.py,sha256=77pa_Awgvzxm3XkFA1oZsGr8SPJkjApKMtkmoySShmI,1367
+jupyter_agent/bot_agents/task_planner_v3.py,sha256=Mlves3v3KL7MAJ8hPPMxUsKdB2v6vuOXlVZ6XtNMbbo,8713
+jupyter_agent/bot_agents/task_reasoner.py,sha256=4oP5DzAkfEGh6LtpX4OH6aMgAPDiRvbSVclxrhx0v20,1465
+jupyter_agent/bot_agents/task_structrue_reasoner.py,sha256=lNt508g4ileRjG9_NETdSrQqVb7tjdu8qHajKcZzB6E,3947
+jupyter_agent/bot_agents/task_structrue_summarier.py,sha256=fnNiXQMiEPHyowqOP6Ht_OnxV_1h_WTLKfcM2IYEt24,4053
+jupyter_agent/bot_agents/task_summarier.py,sha256=Q9b11gdWvwnYLsIjwSpMkZQur1CqFdd_uKb322o8u-M,1787
+jupyter_agent/bot_agents/task_verifier.py,sha256=kGtz8BkSB097RwdgY3FcXpSbVRcikFeTXiokheza0t8,2522
+jupyter_agent/bot_agents/task_verify_summarier.py,sha256=XIxRuW8T1DchHLy3PlGWWUMVC8hcTEyjhQ5tnELWNZk,4943
+jupyter_agent/bot_evaluators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+jupyter_agent/bot_evaluators/base.py,sha256=zGxW469lq2Ab1mOaTtVQcWQmJrKNAAaF8X7OPbnUY04,1375
+jupyter_agent/bot_evaluators/dummy_flow.py,sha256=W0BWJKMgXIilZY0i8eP_SNVgqTUd1CT_uqMBs5aygVA,473
+jupyter_agent/bot_evaluators/dummy_global.py,sha256=yZ8fo2xfVN8gZTpBfs8EJ4dcv2t4ls6aXxn3Mo7bNSk,483
+jupyter_agent/bot_evaluators/dummy_task.py,sha256=owh6g6ItPXXYjesplzNMxVcKAU_kktWtuJhqRzZ05V4,475
+jupyter_agent/bot_evaluators/flow_global_planning.py,sha256=kOLd0dCoqrMi6zbe5chXrwxmdahtt8QqX3UnAZgk3AQ,2419
+jupyter_agent/bot_evaluators/flow_task_executor.py,sha256=gzHlKkP9K5fICYgUY5BKAzjwqn3xScxklohqoUCJaZk,4450
+jupyter_agent/bot_flows/__init__.py,sha256=Xe7EbC6bt04Nc4Yr0e--FVvBJCxkZCZkwYL9oahMBtI,338
+jupyter_agent/bot_flows/base.py,sha256=F-iXu59IfnOXWze3e2myvzdBlyk8xzlrqHA8GTfu4vo,14916
+jupyter_agent/bot_flows/master_planner.py,sha256=F1AunpfNwFqEn4z8uzNEq7d_5_cNHRhMlO1P7uWcYf0,980
+jupyter_agent/bot_flows/task_executor_v3.py,sha256=uCuwcG8ZfPIR7V5AX7UfFkYVbJ6MxPscixjtLwBYWtE,4878
+jupyter_agent-2025.7.100.dist-info/licenses/LICENSE,sha256=nWMmSIg7OepTIDX_OPP0-T9ImeCBBoog7eJxm5awtcM,1068
+jupyter_agent-2025.7.100.dist-info/METADATA,sha256=zaOVnSQK797gupNKlYbwOCV9Y_A3R4_6yk12WRDjqYI,12560
+jupyter_agent-2025.7.100.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+jupyter_agent-2025.7.100.dist-info/entry_points.txt,sha256=063AB86wSrC_V-iiEEqxTlR4uz-T7VH_YagIpmKFQC0,63
+jupyter_agent-2025.7.100.dist-info/top_level.txt,sha256=c3USTBZ7DZGuvLKlEW-QfGIx0tzn98iCEn3bpdYnDtE,14
+jupyter_agent-2025.7.100.dist-info/RECORD,,

jupyter-agent 2025.6.104__py3-none-any.whl → 2025.7.100__py3-none-any.whl

jupyter-agent 2025.6.104py3-none-any.whl → 2025.7.100py3-none-any.whl