jupyter-agent 2025.6.103__py3-none-any.whl → 2025.6.104__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jupyter_agent/bot_evaluation.py +206 -0
- jupyter_agent/bot_flows/base.py +48 -9
- jupyter_agent/bot_flows/master_planner.py +3 -2
- jupyter_agent/bot_flows/task_executor_v3.py +15 -4
- jupyter_agent/bot_magics.py +41 -9
- jupyter_agent/bot_outputs.py +59 -0
- {jupyter_agent-2025.6.103.dist-info → jupyter_agent-2025.6.104.dist-info}/METADATA +6 -5
- {jupyter_agent-2025.6.103.dist-info → jupyter_agent-2025.6.104.dist-info}/RECORD +12 -10
- jupyter_agent-2025.6.104.dist-info/entry_points.txt +2 -0
- {jupyter_agent-2025.6.103.dist-info → jupyter_agent-2025.6.104.dist-info}/WHEEL +0 -0
- {jupyter_agent-2025.6.103.dist-info → jupyter_agent-2025.6.104.dist-info}/licenses/LICENSE +0 -0
- {jupyter_agent-2025.6.103.dist-info → jupyter_agent-2025.6.104.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,206 @@
|
|
1
|
+
"""
|
2
|
+
Copyright (c) 2025 viewstar000
|
3
|
+
|
4
|
+
This software is released under the MIT License.
|
5
|
+
https://opensource.org/licenses/MIT
|
6
|
+
"""
|
7
|
+
|
8
|
+
import os
|
9
|
+
import time
|
10
|
+
import json
|
11
|
+
import argparse
|
12
|
+
import nbformat
|
13
|
+
|
14
|
+
from pathlib import Path
|
15
|
+
from nbclient.client import NotebookClient
|
16
|
+
from .bot_outputs import NotebookEvalutionRecord
|
17
|
+
|
18
|
+
|
19
|
+
def run_notebook(
|
20
|
+
input_path: str | Path,
|
21
|
+
output_path: str | Path = "",
|
22
|
+
inplace: bool = False,
|
23
|
+
timeout: int = -1,
|
24
|
+
startup_timeout: int = 60,
|
25
|
+
allow_errors: bool = False,
|
26
|
+
kernel_name: str = "",
|
27
|
+
skip_cells_with_tag: str = "skip-execution",
|
28
|
+
evaluation_path: str | Path = "",
|
29
|
+
) -> None:
|
30
|
+
"""Run a notebook by path."""
|
31
|
+
input_path = Path(input_path).with_suffix(".ipynb")
|
32
|
+
|
33
|
+
assert not (inplace and output_path), "Cannot specify both 'inplace' and 'output_path'"
|
34
|
+
if inplace:
|
35
|
+
output_path = input_path
|
36
|
+
elif not output_path:
|
37
|
+
output_path = input_path.parent.joinpath(f"{input_path.with_suffix('').name}_executed.ipynb")
|
38
|
+
output_path = os.path.abspath(output_path)
|
39
|
+
|
40
|
+
# Open up the notebook we're going to run
|
41
|
+
with input_path.open() as f:
|
42
|
+
print("Running notebook:", input_path)
|
43
|
+
nb = nbformat.read(f, as_version=4)
|
44
|
+
|
45
|
+
start_time = 0
|
46
|
+
is_global_finished = False
|
47
|
+
|
48
|
+
def save_notebook(**kwargs):
|
49
|
+
"""Save the executed notebook to the specified output path."""
|
50
|
+
nonlocal is_global_finished
|
51
|
+
|
52
|
+
if kwargs:
|
53
|
+
cell_idx = kwargs.get("cell_index", 0)
|
54
|
+
cell_type = kwargs.get("cell", {}).get("cell_type")
|
55
|
+
cell_id = kwargs.get("cell", {}).get("id")
|
56
|
+
cell_exec_count = kwargs.get("cell", {}).get("execution_count")
|
57
|
+
cell_meta = kwargs.get("cell", {}).get("metadata", {})
|
58
|
+
cell_payloads = kwargs.get("execute_reply", {}).get("content", {}).get("payload", [])
|
59
|
+
cell_outputs = kwargs.get("cell", {}).get("outputs", [])
|
60
|
+
for payload in cell_payloads:
|
61
|
+
if payload.get("source") == "set_next_input" and payload.get("replace") is True:
|
62
|
+
print(f"CELL[{cell_idx}] Replacing cell with set_next_input payload")
|
63
|
+
nb.cells[cell_idx].source = payload.get("text", "")
|
64
|
+
cell_agent_data_timestamp = cell_meta.get("jupyter-agent-data-timestamp", 0)
|
65
|
+
output_agent_data_timestamp = cell_agent_data_timestamp
|
66
|
+
for output in cell_outputs:
|
67
|
+
if output["output_type"] == "display_data":
|
68
|
+
output_meta = output.get("metadata", {})
|
69
|
+
if (
|
70
|
+
output_meta.get("jupyter-agent-data-store")
|
71
|
+
and output_meta.get("jupyter-agent-data-timestamp", 0) > output_agent_data_timestamp
|
72
|
+
and output_meta.get("jupyter-agent-data", {})
|
73
|
+
):
|
74
|
+
print(f"CELL[{cell_idx}] Found jupyter-agent-data-store outputs, save it to cell metadata")
|
75
|
+
output_agent_data_timestamp = output_meta.get("jupyter-agent-data-timestamp", 0)
|
76
|
+
nb.cells[cell_idx].metadata["jupyter-agent-data-store"] = True
|
77
|
+
nb.cells[cell_idx].metadata["jupyter-agent-data-timestamp"] = output_agent_data_timestamp
|
78
|
+
if "jupyter-agent-data" not in nb.cells[cell_idx].metadata:
|
79
|
+
nb.cells[cell_idx].metadata["jupyter-agent-data"] = {}
|
80
|
+
nb.cells[cell_idx].metadata["jupyter-agent-data"].update(output_meta["jupyter-agent-data"])
|
81
|
+
for record in output_meta.get("jupyter-agent-evaluation-records", []):
|
82
|
+
record["notebook_name"] = output_path
|
83
|
+
if record["eval_type"] == "NOTEBOOK":
|
84
|
+
record["execution_duration"] = time.time() - start_time
|
85
|
+
is_global_finished = True
|
86
|
+
del nb.cells[cell_idx + 1 :] # Remove all cells after the notebook cell
|
87
|
+
print(
|
88
|
+
f"CELL[{cell_idx}] Evaluating record: {record['eval_type']} "
|
89
|
+
f"duration: {record['execution_duration']:.2f}s "
|
90
|
+
f"success: {record['is_success']} "
|
91
|
+
f"correct: {record['correct_score']:.2f}"
|
92
|
+
)
|
93
|
+
if evaluation_path:
|
94
|
+
with open(evaluation_path, "a") as eval_file:
|
95
|
+
eval_file.write(json.dumps(record) + "\n")
|
96
|
+
print(f"CELL[{cell_idx}] Saving executed {cell_type} cell - {cell_id}: {cell_exec_count}")
|
97
|
+
else:
|
98
|
+
print(f"Saving executed notebook to: {output_path}")
|
99
|
+
nbformat.write(nb, output_path)
|
100
|
+
|
101
|
+
# Add metadata to the notebook
|
102
|
+
nb.cells.insert(
|
103
|
+
0,
|
104
|
+
nbformat.v4.new_code_cell(
|
105
|
+
source=(
|
106
|
+
f"# Executed notebook: {input_path.name}\n"
|
107
|
+
f"# Output saved to: {output_path}\n\n"
|
108
|
+
f"__evaluation_ipynb_file__ = '{output_path}'\n"
|
109
|
+
),
|
110
|
+
metadata={"tags": ["CTX_EXCLUDE"]},
|
111
|
+
),
|
112
|
+
)
|
113
|
+
save_notebook()
|
114
|
+
|
115
|
+
# Configure nbclient to run the notebook
|
116
|
+
client = NotebookClient(
|
117
|
+
nb,
|
118
|
+
timeout=timeout,
|
119
|
+
startup_timeout=startup_timeout,
|
120
|
+
skip_cells_with_tag=skip_cells_with_tag,
|
121
|
+
allow_errors=allow_errors,
|
122
|
+
kernel_name=kernel_name,
|
123
|
+
resources={"metadata": {"path": input_path.parent.absolute()}},
|
124
|
+
on_cell_executed=save_notebook,
|
125
|
+
)
|
126
|
+
|
127
|
+
# Run it
|
128
|
+
print("Executing notebook...")
|
129
|
+
start_time = time.time()
|
130
|
+
client.execute()
|
131
|
+
save_notebook()
|
132
|
+
print("Notebook execution completed.")
|
133
|
+
|
134
|
+
# If the notebook did not finish globally, append an evaluation record
|
135
|
+
if not is_global_finished:
|
136
|
+
print("Notebook execution did not finish globally, appending evaluation records.")
|
137
|
+
record = NotebookEvalutionRecord(
|
138
|
+
notebook_name=output_path,
|
139
|
+
eval_type="NOTEBOOK",
|
140
|
+
execution_duration=time.time() - start_time,
|
141
|
+
is_success=False,
|
142
|
+
correct_score=0.0,
|
143
|
+
)
|
144
|
+
print(
|
145
|
+
f"Global evaluation record: {record.eval_type} "
|
146
|
+
f"duration: {record.execution_duration:.2f}s "
|
147
|
+
f"success: {record.is_success} "
|
148
|
+
f"correct: {record.correct_score:.2f}"
|
149
|
+
)
|
150
|
+
if evaluation_path:
|
151
|
+
with open(evaluation_path, "a") as eval_file:
|
152
|
+
eval_file.write(json.dumps(record.model_dump()) + "\n")
|
153
|
+
|
154
|
+
|
155
|
+
def main():
|
156
|
+
"""Main function to run the notebook execution."""
|
157
|
+
parser = argparse.ArgumentParser(description="Run a Jupyter notebook.")
|
158
|
+
parser.add_argument(
|
159
|
+
"-o", "--output_path", type=str, default="", help="Path to save the executed notebook (default: same as input)"
|
160
|
+
)
|
161
|
+
parser.add_argument(
|
162
|
+
"-i", "--inplace", action="store_true", help="Run the notebook in place (overwrite input file)"
|
163
|
+
)
|
164
|
+
parser.add_argument(
|
165
|
+
"-e",
|
166
|
+
"--evaluation_path",
|
167
|
+
type=str,
|
168
|
+
default="",
|
169
|
+
help="Path to save evaluation records (default: no evaluation records saved)",
|
170
|
+
)
|
171
|
+
parser.add_argument(
|
172
|
+
"--timeout", type=int, default=-1, help="Execution timeout in seconds (default: -1, no timeout)"
|
173
|
+
)
|
174
|
+
parser.add_argument(
|
175
|
+
"--startup_timeout", type=int, default=60, help="Kernel startup timeout in seconds (default: 60)"
|
176
|
+
)
|
177
|
+
parser.add_argument(
|
178
|
+
"--allow_errors", action="store_true", help="Allow errors in the notebook execution (default: False)"
|
179
|
+
)
|
180
|
+
parser.add_argument(
|
181
|
+
"--kernel_name", type=str, default="", help="Kernel name to use for execution (default: use notebook's kernel)"
|
182
|
+
)
|
183
|
+
parser.add_argument(
|
184
|
+
"--skip_cells_with_tag",
|
185
|
+
type=str,
|
186
|
+
default="skip-execution",
|
187
|
+
help="Tag to skip cells with (default: 'skip-execution')",
|
188
|
+
)
|
189
|
+
parser.add_argument("input_path", type=str, help="Path to the input notebook file")
|
190
|
+
args = parser.parse_args()
|
191
|
+
|
192
|
+
run_notebook(
|
193
|
+
input_path=args.input_path,
|
194
|
+
output_path=args.output_path,
|
195
|
+
inplace=args.inplace,
|
196
|
+
timeout=args.timeout,
|
197
|
+
startup_timeout=args.startup_timeout,
|
198
|
+
allow_errors=args.allow_errors,
|
199
|
+
kernel_name=args.kernel_name,
|
200
|
+
skip_cells_with_tag=args.skip_cells_with_tag,
|
201
|
+
evaluation_path=args.evaluation_path,
|
202
|
+
)
|
203
|
+
|
204
|
+
|
205
|
+
if __name__ == "__main__":
|
206
|
+
main()
|
jupyter_agent/bot_flows/base.py
CHANGED
@@ -5,6 +5,7 @@ This software is released under the MIT License.
|
|
5
5
|
https://opensource.org/licenses/MIT
|
6
6
|
"""
|
7
7
|
|
8
|
+
import time
|
8
9
|
import traceback
|
9
10
|
|
10
11
|
from pydantic import BaseModel
|
@@ -12,11 +13,14 @@ from enum import Enum
|
|
12
13
|
from typing import List, Dict, Optional, Type
|
13
14
|
from IPython.display import Markdown
|
14
15
|
from ..bot_agents.base import BaseAgent
|
15
|
-
from ..bot_outputs import _D, _I, _W, _E, _F, _M, _B
|
16
|
+
from ..bot_outputs import _D, _I, _W, _E, _F, _M, _B
|
17
|
+
from ..bot_outputs import set_stage, flush_output, output_evaluation
|
18
|
+
from ..bot_outputs import FlowEvalutionRecord, StageEvalutionRecord, NotebookEvalutionRecord
|
16
19
|
|
17
20
|
TASK_AGENT_STATE_ERROR = "_AGENT_STATE_ERROR_32534526_"
|
18
21
|
TASK_STAGE_START = "start"
|
19
22
|
TASK_STAGE_COMPLETED = "completed"
|
23
|
+
TASK_STAGE_GLOBAL_FINISHED = "global_finished"
|
20
24
|
|
21
25
|
|
22
26
|
class TaskAction(str, Enum):
|
@@ -47,7 +51,7 @@ class BaseTaskFlow:
|
|
47
51
|
|
48
52
|
STAGE_TRANSITIONS: List[StageTransition] = []
|
49
53
|
START_STAGE = TASK_STAGE_START
|
50
|
-
STOP_STAGES = [TASK_STAGE_COMPLETED]
|
54
|
+
STOP_STAGES = [TASK_STAGE_COMPLETED, TASK_STAGE_GLOBAL_FINISHED]
|
51
55
|
|
52
56
|
def __init__(self, notebook_context, agent_factory):
|
53
57
|
self.notebook_context = notebook_context
|
@@ -146,11 +150,15 @@ class BaseTaskFlow:
|
|
146
150
|
ns = self._get_next_stage_trans(stage, state, action)
|
147
151
|
return ns.stage
|
148
152
|
|
149
|
-
def __call__(self, stage, max_tries=
|
153
|
+
def __call__(self, stage, max_tries=5, stage_continue=True, stage_confirm=True):
|
150
154
|
|
151
155
|
n_tries = 0
|
156
|
+
flow_duration = 0.0
|
157
|
+
stage_count = 0
|
158
|
+
# Initialize the task stage
|
152
159
|
stage = stage or self.START_STAGE
|
153
160
|
while n_tries <= max_tries:
|
161
|
+
stage_st = time.time()
|
154
162
|
try:
|
155
163
|
stage_name = stage.value if isinstance(stage, Enum) else stage
|
156
164
|
stage_name = stage_name.replace(".", "-").capitalize()
|
@@ -163,6 +171,19 @@ class BaseTaskFlow:
|
|
163
171
|
_M(f"```python\n{traceback.format_exc()}\n```")
|
164
172
|
state = TASK_AGENT_STATE_ERROR
|
165
173
|
failed = True
|
174
|
+
stage_count += 1
|
175
|
+
stage_duration = time.time() - stage_st
|
176
|
+
flow_duration += stage_duration
|
177
|
+
_M(f"Stage `{stage}` completed in {stage_duration:.2f} seconds with state `{state}` and failed `{failed}`")
|
178
|
+
output_evaluation(
|
179
|
+
StageEvalutionRecord(
|
180
|
+
cell_index=self.task.cell_idx,
|
181
|
+
flow=type(self).__name__,
|
182
|
+
stage=str(stage),
|
183
|
+
execution_duration=stage_duration,
|
184
|
+
is_success=not failed,
|
185
|
+
)
|
186
|
+
)
|
166
187
|
|
167
188
|
if state != TASK_AGENT_STATE_ERROR:
|
168
189
|
# Agent did not fail, check if we have reached the final stage
|
@@ -171,14 +192,18 @@ class BaseTaskFlow:
|
|
171
192
|
self.task.update_cell()
|
172
193
|
if next_stage in self.STOP_STAGES:
|
173
194
|
_M(f"Task execution **Stopped** at stage `{next_stage}`")
|
195
|
+
stage = next_stage
|
174
196
|
break
|
175
197
|
|
176
198
|
if failed:
|
177
199
|
# Agent failed
|
178
200
|
n_tries += 1
|
201
|
+
if n_tries > max_tries:
|
202
|
+
_M(f"**Max flow tries reached** during task execution stage `{stage}`, **Stop!**")
|
203
|
+
break
|
179
204
|
|
180
|
-
if
|
181
|
-
#
|
205
|
+
if stage_confirm:
|
206
|
+
# We need to confirm
|
182
207
|
message = self.get_prompt_message(stage, state, failed)
|
183
208
|
_M("**Confirm**: " + message)
|
184
209
|
flush_output()
|
@@ -188,15 +213,13 @@ class BaseTaskFlow:
|
|
188
213
|
self.task.update_cell()
|
189
214
|
if action == TaskAction.STOP:
|
190
215
|
_M(f"Task execution **Stopped**, and set next stage to `{next_stage}`")
|
191
|
-
|
192
|
-
elif n_tries > max_tries:
|
193
|
-
_M(f"**Max tries reached** during task execution stage `{stage}`, **Stop!**")
|
216
|
+
stage = next_stage
|
194
217
|
break
|
195
218
|
else:
|
196
219
|
_M(f"**Action**: `{action}` transits stage to `{next_stage}`")
|
197
220
|
stage = next_stage
|
198
221
|
else:
|
199
|
-
#
|
222
|
+
# transit to the next stage without confirmation
|
200
223
|
next_stage = self.get_next_stage(stage, state, TaskAction.CONTINUE)
|
201
224
|
self.task.agent_stage = next_stage
|
202
225
|
self.task.update_cell()
|
@@ -205,5 +228,21 @@ class BaseTaskFlow:
|
|
205
228
|
|
206
229
|
if not stage_continue:
|
207
230
|
break
|
231
|
+
# Finalize the task execution
|
232
|
+
stage_name = stage.value if isinstance(stage, Enum) else stage
|
233
|
+
if stage_name == TASK_STAGE_GLOBAL_FINISHED:
|
234
|
+
_M("Task execution **finished** globally.")
|
235
|
+
output_evaluation(NotebookEvalutionRecord(cell_index=self.task.cell_idx, is_success=True))
|
236
|
+
elif stage_name == TASK_STAGE_COMPLETED:
|
237
|
+
_M(f"Task execution **completed** in {flow_duration:.2f} seconds with {stage_count} stages.")
|
238
|
+
output_evaluation(
|
239
|
+
FlowEvalutionRecord(
|
240
|
+
cell_index=self.task.cell_idx,
|
241
|
+
flow=type(self).__name__,
|
242
|
+
stage_count=stage_count,
|
243
|
+
execution_duration=flow_duration,
|
244
|
+
is_success=True,
|
245
|
+
)
|
246
|
+
)
|
208
247
|
flush_output()
|
209
248
|
return stage
|
@@ -11,6 +11,7 @@ from .base import BaseTaskFlow, StageTransition, TASK_STAGE_START, TASK_STAGE_CO
|
|
11
11
|
class MasterPlannerFlow(BaseTaskFlow):
|
12
12
|
|
13
13
|
STAGE_TRANSITIONS = [
|
14
|
-
StageTransition(stage=TASK_STAGE_START, agent="MasterPlannerAgent", next_stage=
|
14
|
+
StageTransition(stage=TASK_STAGE_START, agent="MasterPlannerAgent", next_stage=TASK_STAGE_COMPLETED),
|
15
|
+
StageTransition(stage=TASK_STAGE_COMPLETED, agent="OutputTaskResult", next_stage=TASK_STAGE_COMPLETED),
|
15
16
|
]
|
16
|
-
STOP_STAGES = [
|
17
|
+
STOP_STAGES = [TASK_STAGE_COMPLETED]
|
@@ -6,7 +6,14 @@ https://opensource.org/licenses/MIT
|
|
6
6
|
"""
|
7
7
|
|
8
8
|
from enum import Enum
|
9
|
-
from .base import
|
9
|
+
from .base import (
|
10
|
+
BaseTaskFlow,
|
11
|
+
StageTransition,
|
12
|
+
StageNext,
|
13
|
+
TaskAction,
|
14
|
+
TASK_STAGE_COMPLETED,
|
15
|
+
TASK_STAGE_GLOBAL_FINISHED,
|
16
|
+
)
|
10
17
|
from ..bot_agents import (
|
11
18
|
TaskPlannerAgentV3,
|
12
19
|
TaskCodingAgent,
|
@@ -27,14 +34,15 @@ class TaskStage(str, Enum):
|
|
27
34
|
DEBUGGING = "debugging"
|
28
35
|
REASONING = "reasoning"
|
29
36
|
SUMMARY = "summary"
|
30
|
-
COMPLETED = "completed"
|
31
37
|
OUTPUT_RESULT = "output_result"
|
38
|
+
COMPLETED = TASK_STAGE_COMPLETED
|
39
|
+
GLOBAL_FINISHED = TASK_STAGE_GLOBAL_FINISHED
|
32
40
|
|
33
41
|
|
34
42
|
class TaskExecutorFlowV3(BaseTaskFlow):
|
35
43
|
|
36
44
|
START_STAGE = TaskStage.PLANNING
|
37
|
-
STOP_STAGES = [TaskStage.COMPLETED, TaskStage.PLANNING_PAUSED]
|
45
|
+
STOP_STAGES = [TaskStage.COMPLETED, TaskStage.PLANNING_PAUSED, TaskStage.GLOBAL_FINISHED]
|
38
46
|
STAGE_TRANSITIONS = [
|
39
47
|
StageTransition[TaskStage, TaskPlannerState](
|
40
48
|
stage=TaskStage.PLANNING,
|
@@ -43,7 +51,7 @@ class TaskExecutorFlowV3(BaseTaskFlow):
|
|
43
51
|
TaskPlannerState.CODING_PLANNED: TaskStage.CODING,
|
44
52
|
TaskPlannerState.REASONING_PLANNED: TaskStage.REASONING,
|
45
53
|
TaskPlannerState.REQUEST_INFO: TaskStage.PLANNING_PAUSED,
|
46
|
-
TaskPlannerState.GLOBAL_FINISHED: TaskStage.
|
54
|
+
TaskPlannerState.GLOBAL_FINISHED: TaskStage.GLOBAL_FINISHED,
|
47
55
|
},
|
48
56
|
),
|
49
57
|
StageTransition[TaskStage, TaskPlannerState](
|
@@ -86,4 +94,7 @@ class TaskExecutorFlowV3(BaseTaskFlow):
|
|
86
94
|
StageTransition[TaskStage, None](
|
87
95
|
stage=TaskStage.OUTPUT_RESULT, agent=OutputTaskResult, next_stage=TaskStage.COMPLETED
|
88
96
|
),
|
97
|
+
StageTransition[TaskStage, None](
|
98
|
+
stage=TaskStage.GLOBAL_FINISHED, agent=OutputTaskResult, next_stage=TaskStage.GLOBAL_FINISHED
|
99
|
+
),
|
89
100
|
]
|
jupyter_agent/bot_magics.py
CHANGED
@@ -43,9 +43,12 @@ class BotMagics(Magics, Configurable):
|
|
43
43
|
display_message = Bool(False, help="Display chat message").tag(config=True)
|
44
44
|
display_think = Bool(True, help="Display chatthink response").tag(config=True)
|
45
45
|
display_response = Bool(False, help="Display chat full response").tag(config=True)
|
46
|
+
support_save_meta = Bool(False, help="Support save metadata to cell").tag(config=True)
|
46
47
|
notebook_path = Unicode(None, allow_none=True, help="Path to Notebook file").tag(config=True)
|
47
48
|
default_task_flow = Unicode("v3", allow_none=True, help="Default task flow").tag(config=True)
|
48
|
-
|
49
|
+
default_max_tries = Int(3, help="Default max tries for task execution").tag(config=True)
|
50
|
+
default_step_mode = Bool(False, help="Default step mode for task execution").tag(config=True)
|
51
|
+
default_auto_confirm = Bool(False, help="Default auto confirm for task execution").tag(config=True)
|
49
52
|
|
50
53
|
def parse_args(self, line):
|
51
54
|
"""解析命令行参数"""
|
@@ -54,9 +57,21 @@ class BotMagics(Magics, Configurable):
|
|
54
57
|
parser.add_argument("-P", "--planning", action="store_true", default=False, help="Run in planning mode")
|
55
58
|
parser.add_argument("-s", "--stage", type=str, default=None, help="Task stage")
|
56
59
|
parser.add_argument("-f", "--flow", type=str, default=self.default_task_flow, help="Flow name")
|
57
|
-
parser.add_argument("-m", "--max-tries", type=int, default=
|
58
|
-
parser.add_argument(
|
59
|
-
|
60
|
+
parser.add_argument("-m", "--max-tries", type=int, default=self.default_max_tries, help="Max tries")
|
61
|
+
parser.add_argument(
|
62
|
+
"-S",
|
63
|
+
"--step-mode",
|
64
|
+
action="store_true",
|
65
|
+
default=self.default_step_mode,
|
66
|
+
help="Run in single step mode",
|
67
|
+
)
|
68
|
+
parser.add_argument(
|
69
|
+
"-Y",
|
70
|
+
"--auto-confirm",
|
71
|
+
action="store_true",
|
72
|
+
default=self.default_auto_confirm,
|
73
|
+
help="Run without confirm",
|
74
|
+
)
|
60
75
|
options, _ = parser.parse_known_args(shlex.split(line.strip()))
|
61
76
|
|
62
77
|
return options
|
@@ -116,16 +131,28 @@ class BotMagics(Magics, Configurable):
|
|
116
131
|
display_response=self.display_response,
|
117
132
|
)
|
118
133
|
agent_factory.config_model(
|
119
|
-
AgentModelType.DEFAULT,
|
134
|
+
AgentModelType.DEFAULT,
|
135
|
+
self.default_api_url,
|
136
|
+
self.default_api_key,
|
137
|
+
self.default_model_name,
|
120
138
|
)
|
121
139
|
agent_factory.config_model(
|
122
|
-
AgentModelType.PLANNER,
|
140
|
+
AgentModelType.PLANNER,
|
141
|
+
self.planner_api_url,
|
142
|
+
self.planner_api_key,
|
143
|
+
self.planner_model_name,
|
123
144
|
)
|
124
145
|
agent_factory.config_model(
|
125
|
-
AgentModelType.CODING,
|
146
|
+
AgentModelType.CODING,
|
147
|
+
self.coding_api_url,
|
148
|
+
self.coding_api_key,
|
149
|
+
self.coding_model_name,
|
126
150
|
)
|
127
151
|
agent_factory.config_model(
|
128
|
-
AgentModelType.REASONING,
|
152
|
+
AgentModelType.REASONING,
|
153
|
+
self.reasoning_api_url,
|
154
|
+
self.reasoning_api_key,
|
155
|
+
self.reasoning_model_name,
|
129
156
|
)
|
130
157
|
if options.planning:
|
131
158
|
flow = MasterPlannerFlow(nb_context, agent_factory)
|
@@ -138,7 +165,12 @@ class BotMagics(Magics, Configurable):
|
|
138
165
|
flow = TaskExecutorFlowV3(nb_context, agent_factory)
|
139
166
|
else:
|
140
167
|
raise ValueError(f"Unknown flow: {options.flow}")
|
141
|
-
flow(
|
168
|
+
flow(
|
169
|
+
options.stage,
|
170
|
+
options.max_tries,
|
171
|
+
not options.step_mode,
|
172
|
+
not options.auto_confirm,
|
173
|
+
)
|
142
174
|
except Exception as e:
|
143
175
|
traceback.print_exc()
|
144
176
|
|
jupyter_agent/bot_outputs.py
CHANGED
@@ -11,6 +11,8 @@ import datetime
|
|
11
11
|
import jinja2
|
12
12
|
|
13
13
|
from enum import Enum
|
14
|
+
from typing import Optional, Dict, Any
|
15
|
+
from pydantic import BaseModel, Field
|
14
16
|
from IPython.display import display, Markdown
|
15
17
|
from .utils import no_indent, no_wrap
|
16
18
|
|
@@ -171,6 +173,38 @@ LOGGING_LEVELS = {
|
|
171
173
|
}
|
172
174
|
|
173
175
|
|
176
|
+
class BaseEvalutionRecord(BaseModel):
|
177
|
+
timestamp: float = 0
|
178
|
+
notebook_name: str = ""
|
179
|
+
eval_type: str = "BASE"
|
180
|
+
cell_index: int = -1
|
181
|
+
execution_duration: float = 0.0
|
182
|
+
is_success: bool = False
|
183
|
+
correct_score: float = 0.0
|
184
|
+
|
185
|
+
|
186
|
+
class StageEvalutionRecord(BaseEvalutionRecord):
|
187
|
+
eval_type: str = "STAGE"
|
188
|
+
flow: str = ""
|
189
|
+
stage: str = ""
|
190
|
+
coding_score: float = 0.0
|
191
|
+
important_score: float = 0.0
|
192
|
+
user_supply_score: float = 0.0
|
193
|
+
|
194
|
+
|
195
|
+
class FlowEvalutionRecord(BaseEvalutionRecord):
|
196
|
+
eval_type: str = "FLOW"
|
197
|
+
flow: str = ""
|
198
|
+
stage_count: int = 0
|
199
|
+
planning_score: float = 0.0
|
200
|
+
|
201
|
+
|
202
|
+
class NotebookEvalutionRecord(BaseEvalutionRecord):
|
203
|
+
eval_type: str = "NOTEBOOK"
|
204
|
+
flow_count: int = 0
|
205
|
+
planning_score: float = 0.0
|
206
|
+
|
207
|
+
|
174
208
|
class AgentOutput:
|
175
209
|
"""
|
176
210
|
AgentOutput 是一个用于在 Jupyter Notebook 中显示 Agent 输出的类。
|
@@ -193,6 +227,7 @@ class AgentOutput:
|
|
193
227
|
self._agent_data_timestamp = None
|
194
228
|
self._agent_data = {}
|
195
229
|
self._logging_records = []
|
230
|
+
self._evaluation_records = []
|
196
231
|
|
197
232
|
@property
|
198
233
|
def content(self):
|
@@ -226,6 +261,8 @@ class AgentOutput:
|
|
226
261
|
"jupyter-agent-data": self._agent_data,
|
227
262
|
}
|
228
263
|
)
|
264
|
+
if self._evaluation_records:
|
265
|
+
metadata["jupyter-agent-evaluation-records"] = [record.model_dump() for record in self._evaluation_records]
|
229
266
|
return metadata
|
230
267
|
|
231
268
|
def display(self, stage=None, force=False, wait=True):
|
@@ -320,6 +357,20 @@ class AgentOutput:
|
|
320
357
|
)
|
321
358
|
self.display(force=False, wait=False)
|
322
359
|
|
360
|
+
def log_evaluation(self, record: BaseEvalutionRecord):
|
361
|
+
assert isinstance(
|
362
|
+
record, BaseEvalutionRecord
|
363
|
+
), "record must be an instance of BaseEvalutionRecord or its subclass"
|
364
|
+
if record.timestamp == 0:
|
365
|
+
record.timestamp = time.time()
|
366
|
+
self._evaluation_records.append(record)
|
367
|
+
self.log(
|
368
|
+
f"Evaluation: {record.eval_type}[{record.cell_index}] duration: {record.execution_duration:.2f}s "
|
369
|
+
f"success: {record.is_success} correct: {record.correct_score:.2f}",
|
370
|
+
level="INFO",
|
371
|
+
)
|
372
|
+
self.display(force=False, wait=False)
|
373
|
+
|
323
374
|
|
324
375
|
__agent_output = None
|
325
376
|
|
@@ -363,6 +414,14 @@ def output_agent_data(**kwargs):
|
|
363
414
|
get_output().output_agent_data(**kwargs)
|
364
415
|
|
365
416
|
|
417
|
+
def output_evaluation(record: BaseEvalutionRecord):
|
418
|
+
"""
|
419
|
+
输出评估记录到 AgentOutput 中。
|
420
|
+
:param record: 评估记录对象,必须是 BaseEvalutionRecord 的子类。
|
421
|
+
"""
|
422
|
+
get_output().log_evaluation(record)
|
423
|
+
|
424
|
+
|
366
425
|
def clear_output(stage=None, clear_metadata=False):
|
367
426
|
get_output().clear(stage, clear_metadata)
|
368
427
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: jupyter-agent
|
3
|
-
Version: 2025.6.
|
3
|
+
Version: 2025.6.104
|
4
4
|
Summary: 调用LLM实现Jupyter代码的自动生成、执行、调试等功能
|
5
5
|
Author: viewstar000
|
6
6
|
License: MIT
|
@@ -10,14 +10,15 @@ Classifier: Operating System :: OS Independent
|
|
10
10
|
Requires-Python: >=3.12
|
11
11
|
Description-Content-Type: text/markdown
|
12
12
|
License-File: LICENSE
|
13
|
+
Requires-Dist: ipynbname
|
13
14
|
Requires-Dist: ipython
|
15
|
+
Requires-Dist: jinja2
|
16
|
+
Requires-Dist: nbclient
|
14
17
|
Requires-Dist: nbformat
|
15
|
-
Requires-Dist: ipynbname
|
16
18
|
Requires-Dist: openai
|
17
|
-
Requires-Dist: traitlets
|
18
|
-
Requires-Dist: pyyaml
|
19
|
-
Requires-Dist: jinja2
|
20
19
|
Requires-Dist: pydantic
|
20
|
+
Requires-Dist: pyyaml
|
21
|
+
Requires-Dist: traitlets
|
21
22
|
Dynamic: license-file
|
22
23
|
|
23
24
|
# jupyter-agent
|
@@ -1,8 +1,9 @@
|
|
1
1
|
jupyter_agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
2
|
jupyter_agent/bot_chat.py,sha256=RkaT-Cpkp3G24F1AtbcgqjKo-LBxwMNAYHVitaPZNOg,8834
|
3
3
|
jupyter_agent/bot_contexts.py,sha256=eFezr7ImokMz33jJJMlLLQspQBYnuE6Bc2bE1wLbXWU,19123
|
4
|
-
jupyter_agent/
|
5
|
-
jupyter_agent/
|
4
|
+
jupyter_agent/bot_evaluation.py,sha256=tTGUxZ5Gf-A_aFj1HZ8_rjwXpF5X4cv_YVUyOybJShc,8462
|
5
|
+
jupyter_agent/bot_magics.py,sha256=1W52-HVSixsOR-svFv_iPrllNxdgx2mEHMQ4op0UW1o,8204
|
6
|
+
jupyter_agent/bot_outputs.py,sha256=aDowlURSt9AZnlQdwQp5Gmd5fSRzY910B6A0h-gvmF0,15953
|
6
7
|
jupyter_agent/utils.py,sha256=jbvDtVK6MfGaf5ZLoam_sq9R5TMriEG4HbMF0bHHDes,4387
|
7
8
|
jupyter_agent/bot_agents/__init__.py,sha256=IdlBlvfaDUfp3qhkNuwUVV_CdplafZsgLezLVkZCREw,1323
|
8
9
|
jupyter_agent/bot_agents/base.py,sha256=50XtKBVRj83zALGgw4klGLZkUlqHNhs1WIX5av9bIm4,10893
|
@@ -21,13 +22,14 @@ jupyter_agent/bot_agents/task_summarier.py,sha256=bvYEKW_NWRwe-kNNxR7uhJTMKMJXSy
|
|
21
22
|
jupyter_agent/bot_agents/task_verifier.py,sha256=9Tlyb7hP7tBHMrh5XkRD30mYLodNum33X6v2snjm0QI,2478
|
22
23
|
jupyter_agent/bot_agents/task_verify_summarier.py,sha256=mhpqgcBPOur0TtG8rYUT-BCAYgAiJxDgXVnCAE5Cucs,4963
|
23
24
|
jupyter_agent/bot_flows/__init__.py,sha256=vbb3GJLu6aZdJ2ox4eaHn5cg0d4WQM6zmhIbMAlHIFo,488
|
24
|
-
jupyter_agent/bot_flows/base.py,sha256=
|
25
|
-
jupyter_agent/bot_flows/master_planner.py,sha256=
|
25
|
+
jupyter_agent/bot_flows/base.py,sha256=V8JEkjf-gmkuz0X4P8-ziHapeWPN2EEoXFTk5g8AK1Y,10210
|
26
|
+
jupyter_agent/bot_flows/master_planner.py,sha256=rmLXrg457TY91FVjOKjJOR8X7gAqP18Sl0O0c5I4pAY,551
|
26
27
|
jupyter_agent/bot_flows/task_executor_v1.py,sha256=WzTfoOTjBpk1emvpiL3yeiudKdDf6EpVdJIugxxbqM4,2975
|
27
28
|
jupyter_agent/bot_flows/task_executor_v2.py,sha256=IyfxhzA4TljNd6iqqUYxxzB63r9lxfe1Zbu177hytRs,2949
|
28
|
-
jupyter_agent/bot_flows/task_executor_v3.py,sha256=
|
29
|
-
jupyter_agent-2025.6.
|
30
|
-
jupyter_agent-2025.6.
|
31
|
-
jupyter_agent-2025.6.
|
32
|
-
jupyter_agent-2025.6.
|
33
|
-
jupyter_agent-2025.6.
|
29
|
+
jupyter_agent/bot_flows/task_executor_v3.py,sha256=Argp0T-bNBRHpjpHhFuNKclOtN2O6DJ28rY7F0GUQrI,3527
|
30
|
+
jupyter_agent-2025.6.104.dist-info/licenses/LICENSE,sha256=nWMmSIg7OepTIDX_OPP0-T9ImeCBBoog7eJxm5awtcM,1068
|
31
|
+
jupyter_agent-2025.6.104.dist-info/METADATA,sha256=cxJPbyRvUTMJgf01snK1YIvgh87ImLWNKRg3z7RoMi0,10022
|
32
|
+
jupyter_agent-2025.6.104.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
33
|
+
jupyter_agent-2025.6.104.dist-info/entry_points.txt,sha256=063AB86wSrC_V-iiEEqxTlR4uz-T7VH_YagIpmKFQC0,63
|
34
|
+
jupyter_agent-2025.6.104.dist-info/top_level.txt,sha256=c3USTBZ7DZGuvLKlEW-QfGIx0tzn98iCEn3bpdYnDtE,14
|
35
|
+
jupyter_agent-2025.6.104.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|