hdsp-jupyter-extension 2.0.11__py3-none-any.whl → 2.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_server/langchain/MULTI_AGENT_ARCHITECTURE.md +1114 -0
- agent_server/langchain/__init__.py +2 -2
- agent_server/langchain/agent.py +72 -33
- agent_server/langchain/agent_factory.py +400 -0
- agent_server/langchain/agent_prompts/__init__.py +25 -0
- agent_server/langchain/agent_prompts/athena_query_prompt.py +71 -0
- agent_server/langchain/agent_prompts/planner_prompt.py +85 -0
- agent_server/langchain/agent_prompts/python_developer_prompt.py +123 -0
- agent_server/langchain/agent_prompts/researcher_prompt.py +38 -0
- agent_server/langchain/custom_middleware.py +652 -195
- agent_server/langchain/hitl_config.py +34 -10
- agent_server/langchain/middleware/__init__.py +24 -0
- agent_server/langchain/middleware/code_history_middleware.py +412 -0
- agent_server/langchain/middleware/description_injector.py +150 -0
- agent_server/langchain/middleware/skill_middleware.py +298 -0
- agent_server/langchain/middleware/subagent_events.py +171 -0
- agent_server/langchain/middleware/subagent_middleware.py +329 -0
- agent_server/langchain/prompts.py +96 -101
- agent_server/langchain/skills/data_analysis.md +236 -0
- agent_server/langchain/skills/data_loading.md +158 -0
- agent_server/langchain/skills/inference.md +392 -0
- agent_server/langchain/skills/model_training.md +318 -0
- agent_server/langchain/skills/pyspark.md +352 -0
- agent_server/langchain/subagents/__init__.py +20 -0
- agent_server/langchain/subagents/base.py +173 -0
- agent_server/langchain/tools/__init__.py +3 -0
- agent_server/langchain/tools/jupyter_tools.py +58 -20
- agent_server/langchain/tools/lsp_tools.py +1 -1
- agent_server/langchain/tools/shared/__init__.py +26 -0
- agent_server/langchain/tools/shared/qdrant_search.py +175 -0
- agent_server/langchain/tools/tool_registry.py +219 -0
- agent_server/langchain/tools/workspace_tools.py +197 -0
- agent_server/routers/config.py +40 -1
- agent_server/routers/langchain_agent.py +818 -337
- {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
- {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +7 -2
- hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js → hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.037b3c8e5d6a92b63b16.js +1108 -179
- hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.037b3c8e5d6a92b63b16.js.map +1 -0
- jupyter_ext/labextension/static/lib_index_js.58c1e128ba0b76f41f04.js → hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.5449ba3c7e25177d2987.js +3916 -8128
- hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.5449ba3c7e25177d2987.js.map +1 -0
- hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.9da31d1134a53b0c4af5.js → hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.a8e0b064eb9b1c1ff463.js +17 -17
- hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.a8e0b064eb9b1c1ff463.js.map +1 -0
- {hdsp_jupyter_extension-2.0.11.dist-info → hdsp_jupyter_extension-2.0.13.dist-info}/METADATA +1 -1
- {hdsp_jupyter_extension-2.0.11.dist-info → hdsp_jupyter_extension-2.0.13.dist-info}/RECORD +75 -51
- jupyter_ext/_version.py +1 -1
- jupyter_ext/handlers.py +59 -8
- jupyter_ext/labextension/build_log.json +1 -1
- jupyter_ext/labextension/package.json +7 -2
- jupyter_ext/labextension/static/{frontend_styles_index_js.2d9fb488c82498c45c2d.js → frontend_styles_index_js.037b3c8e5d6a92b63b16.js} +1108 -179
- jupyter_ext/labextension/static/frontend_styles_index_js.037b3c8e5d6a92b63b16.js.map +1 -0
- hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.58c1e128ba0b76f41f04.js → jupyter_ext/labextension/static/lib_index_js.5449ba3c7e25177d2987.js +3916 -8128
- jupyter_ext/labextension/static/lib_index_js.5449ba3c7e25177d2987.js.map +1 -0
- jupyter_ext/labextension/static/{remoteEntry.9da31d1134a53b0c4af5.js → remoteEntry.a8e0b064eb9b1c1ff463.js} +17 -17
- jupyter_ext/labextension/static/remoteEntry.a8e0b064eb9b1c1ff463.js.map +1 -0
- hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js.map +0 -1
- hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.58c1e128ba0b76f41f04.js.map +0 -1
- hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.9da31d1134a53b0c4af5.js.map +0 -1
- jupyter_ext/labextension/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js.map +0 -1
- jupyter_ext/labextension/static/lib_index_js.58c1e128ba0b76f41f04.js.map +0 -1
- jupyter_ext/labextension/static/remoteEntry.9da31d1134a53b0c4af5.js.map +0 -1
- {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
- {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
- {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
- {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
- {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
- {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
- {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
- {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
- {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
- {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +0 -0
- {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +0 -0
- {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +0 -0
- {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +0 -0
- {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
- {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
- {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
- {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
- {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +0 -0
- {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +0 -0
- {hdsp_jupyter_extension-2.0.11.dist-info → hdsp_jupyter_extension-2.0.13.dist-info}/WHEEL +0 -0
- {hdsp_jupyter_extension-2.0.11.dist-info → hdsp_jupyter_extension-2.0.13.dist-info}/licenses/LICENSE +0 -0
|
@@ -11,10 +11,13 @@ import re
|
|
|
11
11
|
import uuid
|
|
12
12
|
from typing import Any, Dict, Optional
|
|
13
13
|
|
|
14
|
+
from json_repair import repair_json
|
|
14
15
|
from langchain_core.messages import AIMessage, HumanMessage
|
|
15
16
|
|
|
16
17
|
from agent_server.langchain.logging_utils import (
|
|
17
18
|
_format_middleware_marker,
|
|
19
|
+
_pretty_json,
|
|
20
|
+
_serialize_message,
|
|
18
21
|
_with_middleware_logging,
|
|
19
22
|
)
|
|
20
23
|
from agent_server.langchain.prompts import JSON_TOOL_SCHEMA, NON_HITL_TOOLS
|
|
@@ -75,6 +78,15 @@ def parse_json_tool_call(text) -> Optional[Dict[str, Any]]:
|
|
|
75
78
|
except json.JSONDecodeError:
|
|
76
79
|
pass
|
|
77
80
|
|
|
81
|
+
# Try json-repair for malformed JSON from LLMs
|
|
82
|
+
try:
|
|
83
|
+
repaired = repair_json(text, return_objects=True)
|
|
84
|
+
if isinstance(repaired, dict) and "tool" in repaired:
|
|
85
|
+
logger.info(f"Repaired malformed JSON tool call: {repaired.get('tool')}")
|
|
86
|
+
return repaired
|
|
87
|
+
except Exception as e:
|
|
88
|
+
logger.debug(f"json-repair failed: {e}")
|
|
89
|
+
|
|
78
90
|
return None
|
|
79
91
|
|
|
80
92
|
|
|
@@ -83,15 +95,110 @@ def normalize_tool_name(tool_name: str) -> str:
|
|
|
83
95
|
|
|
84
96
|
Rules:
|
|
85
97
|
- write_todos_tool → write_todos (TodoListMiddleware exception)
|
|
98
|
+
- task → task_tool (SubAgentMiddleware uses task_tool)
|
|
86
99
|
- other tools without _tool suffix → add _tool suffix
|
|
87
100
|
"""
|
|
88
101
|
if tool_name == "write_todos_tool":
|
|
89
102
|
return "write_todos"
|
|
90
|
-
if
|
|
103
|
+
if tool_name == "task":
|
|
104
|
+
return "task_tool"
|
|
105
|
+
if not tool_name.endswith("_tool") and tool_name not in ("write_todos",):
|
|
91
106
|
return f"{tool_name}_tool"
|
|
92
107
|
return tool_name
|
|
93
108
|
|
|
94
109
|
|
|
110
|
+
def repair_tool_call_arguments(arguments: str) -> Optional[Dict[str, Any]]:
|
|
111
|
+
"""Repair malformed tool call arguments from LLMs.
|
|
112
|
+
|
|
113
|
+
Some LLMs (e.g., gpt-oss-120b) return arguments without leading '{' or
|
|
114
|
+
with other JSON formatting issues.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
arguments: Raw arguments string from LLM
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
Parsed dictionary or None if repair failed
|
|
121
|
+
"""
|
|
122
|
+
if not arguments or not isinstance(arguments, str):
|
|
123
|
+
return None
|
|
124
|
+
|
|
125
|
+
arguments = arguments.strip()
|
|
126
|
+
|
|
127
|
+
# Try direct parse first
|
|
128
|
+
try:
|
|
129
|
+
return json.loads(arguments)
|
|
130
|
+
except json.JSONDecodeError:
|
|
131
|
+
pass
|
|
132
|
+
|
|
133
|
+
# Use json-repair for malformed arguments
|
|
134
|
+
try:
|
|
135
|
+
repaired = repair_json(arguments, return_objects=True)
|
|
136
|
+
if isinstance(repaired, dict):
|
|
137
|
+
logger.info("Repaired malformed tool arguments")
|
|
138
|
+
return repaired
|
|
139
|
+
except Exception as e:
|
|
140
|
+
logger.debug(f"json-repair failed for arguments: {e}")
|
|
141
|
+
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def try_extract_tool_calls_from_additional_kwargs(
|
|
146
|
+
response_message,
|
|
147
|
+
) -> Optional[AIMessage]:
|
|
148
|
+
"""Try to extract and repair tool_calls from additional_kwargs.
|
|
149
|
+
|
|
150
|
+
Some LLMs put tool_calls in additional_kwargs but with malformed arguments.
|
|
151
|
+
This function tries to repair them and create a proper AIMessage.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
response_message: AIMessage with potential tool_calls in additional_kwargs
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
New AIMessage with repaired tool_calls, or None if extraction failed
|
|
158
|
+
"""
|
|
159
|
+
if not response_message:
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
additional_kwargs = getattr(response_message, "additional_kwargs", {})
|
|
163
|
+
raw_tool_calls = additional_kwargs.get("tool_calls", [])
|
|
164
|
+
|
|
165
|
+
if not raw_tool_calls:
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
repaired_tool_calls = []
|
|
169
|
+
for tc in raw_tool_calls:
|
|
170
|
+
func = tc.get("function", {})
|
|
171
|
+
name = func.get("name")
|
|
172
|
+
arguments = func.get("arguments", "")
|
|
173
|
+
tc_id = tc.get("id", str(uuid.uuid4()))
|
|
174
|
+
|
|
175
|
+
if not name:
|
|
176
|
+
continue
|
|
177
|
+
|
|
178
|
+
# Try to repair arguments
|
|
179
|
+
args = repair_tool_call_arguments(arguments)
|
|
180
|
+
if args is not None:
|
|
181
|
+
repaired_tool_calls.append(
|
|
182
|
+
{
|
|
183
|
+
"name": normalize_tool_name(name),
|
|
184
|
+
"args": args,
|
|
185
|
+
"id": tc_id,
|
|
186
|
+
"type": "tool_call",
|
|
187
|
+
}
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
if repaired_tool_calls:
|
|
191
|
+
logger.info(
|
|
192
|
+
f"Extracted {len(repaired_tool_calls)} tool calls from additional_kwargs"
|
|
193
|
+
)
|
|
194
|
+
return AIMessage(
|
|
195
|
+
content=getattr(response_message, "content", "") or "",
|
|
196
|
+
tool_calls=repaired_tool_calls,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
return None
|
|
200
|
+
|
|
201
|
+
|
|
95
202
|
def create_tool_call_message(tool_name: str, arguments: Dict[str, Any]) -> AIMessage:
|
|
96
203
|
"""Create AIMessage with tool_calls from parsed JSON.
|
|
97
204
|
|
|
@@ -125,7 +232,7 @@ def create_handle_empty_response_middleware(wrap_model_call):
|
|
|
125
232
|
1. Detects empty or text-only responses (no tool_calls)
|
|
126
233
|
2. Retries with JSON schema prompt to force structured output
|
|
127
234
|
3. Parses JSON response and injects tool_calls into AIMessage
|
|
128
|
-
4. Falls back to synthetic
|
|
235
|
+
4. Falls back to synthetic write_todos completion if all else fails
|
|
129
236
|
|
|
130
237
|
Args:
|
|
131
238
|
wrap_model_call: LangChain's wrap_model_call decorator
|
|
@@ -142,65 +249,129 @@ def create_handle_empty_response_middleware(wrap_model_call):
|
|
|
142
249
|
# Check if all todos are completed - if so, return empty response to stop agent
|
|
143
250
|
# Method 1: Check state.todos
|
|
144
251
|
todos = request.state.get("todos", [])
|
|
252
|
+
logger.debug(
|
|
253
|
+
"handle_empty_response: state.todos=%s",
|
|
254
|
+
json.dumps(todos, ensure_ascii=False) if todos else "[]",
|
|
255
|
+
)
|
|
145
256
|
if todos:
|
|
146
257
|
pending_todos = [
|
|
147
258
|
t for t in todos if t.get("status") in ("pending", "in_progress")
|
|
148
259
|
]
|
|
149
260
|
if not pending_todos:
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
261
|
+
# Check if summary already exists AFTER the last REAL HumanMessage
|
|
262
|
+
# (to avoid false positives from previous task summaries)
|
|
263
|
+
# Note: Skip system-injected messages like "[SYSTEM] Tool completed..."
|
|
264
|
+
summary_exists = False
|
|
265
|
+
messages = request.messages
|
|
266
|
+
|
|
267
|
+
# Find index of last REAL HumanMessage (not system-injected)
|
|
268
|
+
last_human_idx = -1
|
|
269
|
+
for i, msg in enumerate(messages):
|
|
270
|
+
msg_type = getattr(msg, "type", "") or type(msg).__name__
|
|
271
|
+
if msg_type in ("human", "HumanMessage"):
|
|
272
|
+
msg_content = getattr(msg, "content", "") or ""
|
|
273
|
+
# Skip system-injected messages
|
|
274
|
+
if not msg_content.startswith("[SYSTEM]"):
|
|
275
|
+
last_human_idx = i
|
|
276
|
+
|
|
277
|
+
# Only check messages after last REAL HumanMessage for summary
|
|
278
|
+
messages_to_check = (
|
|
279
|
+
messages[last_human_idx + 1 :]
|
|
280
|
+
if last_human_idx >= 0
|
|
281
|
+
else messages[-10:]
|
|
153
282
|
)
|
|
154
|
-
|
|
283
|
+
for msg in messages_to_check:
|
|
284
|
+
content = getattr(msg, "content", "") or ""
|
|
285
|
+
if '"summary"' in content and '"next_items"' in content:
|
|
286
|
+
summary_exists = True
|
|
287
|
+
break
|
|
288
|
+
|
|
289
|
+
if summary_exists:
|
|
290
|
+
logger.info(
|
|
291
|
+
"All %d todos completed and summary exists after last user message - stopping agent (no LLM call)",
|
|
292
|
+
len(todos),
|
|
293
|
+
)
|
|
294
|
+
return AIMessage(content="", tool_calls=[])
|
|
295
|
+
else:
|
|
296
|
+
# Allow one more LLM call for summary generation
|
|
297
|
+
logger.info(
|
|
298
|
+
"All %d todos completed but no summary yet after last user message - allowing LLM call for summary",
|
|
299
|
+
len(todos),
|
|
300
|
+
)
|
|
155
301
|
|
|
156
302
|
# Method 2: Check last message if it's a write_todos ToolMessage with all completed
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
303
|
+
# Note: We now allow one more LLM call for summary generation when all todos are completed
|
|
304
|
+
# This check is skipped to let the agent produce a summary
|
|
305
|
+
|
|
306
|
+
# Check if summary todo is completed
|
|
307
|
+
# IMPORTANT: Only consider summary completed if it's the LAST todo item and ALL todos are done
|
|
308
|
+
# This prevents false positives when a previous summary is completed but new tasks are added
|
|
309
|
+
all_todos_completed = all(t.get("status") == "completed" for t in todos)
|
|
310
|
+
last_todo_is_summary = (
|
|
311
|
+
len(todos) > 0
|
|
312
|
+
and "작업 요약" in todos[-1].get("content", "")
|
|
313
|
+
and "다음 단계" in todos[-1].get("content", "")
|
|
314
|
+
and todos[-1].get("status") == "completed"
|
|
315
|
+
)
|
|
316
|
+
summary_todo_completed = all_todos_completed and last_todo_is_summary
|
|
317
|
+
|
|
318
|
+
if not summary_todo_completed and any(
|
|
319
|
+
t.get("status") == "completed"
|
|
320
|
+
and "작업 요약" in t.get("content", "")
|
|
321
|
+
for t in todos
|
|
322
|
+
):
|
|
323
|
+
logger.debug(
|
|
324
|
+
"Previous summary todo completed but new tasks exist - NOT treating as final summary"
|
|
325
|
+
)
|
|
180
326
|
|
|
181
|
-
# Check if
|
|
182
|
-
# This allows agent to naturally terminate after final_answer_tool
|
|
327
|
+
# Check if summary content exists in messages
|
|
183
328
|
messages = request.messages
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
329
|
+
summary_exists = False
|
|
330
|
+
for msg in messages[-15:]:
|
|
331
|
+
msg_content = getattr(msg, "content", "") or ""
|
|
332
|
+
if '"summary"' in msg_content and '"next_items"' in msg_content:
|
|
333
|
+
summary_exists = True
|
|
334
|
+
break
|
|
335
|
+
if any(
|
|
336
|
+
kw in msg_content
|
|
337
|
+
for kw in [
|
|
338
|
+
"다음 단계 제안",
|
|
339
|
+
"다음 단계:",
|
|
340
|
+
"### 다음 단계",
|
|
341
|
+
"## 다음 단계",
|
|
342
|
+
"**다음 단계**",
|
|
343
|
+
"모든 작업이 완료",
|
|
344
|
+
"**작업 요약**",
|
|
345
|
+
"### 작업 요약",
|
|
346
|
+
"## 작업 요약",
|
|
347
|
+
]
|
|
348
|
+
):
|
|
349
|
+
summary_exists = True
|
|
350
|
+
break
|
|
200
351
|
|
|
201
352
|
for attempt in range(max_retries + 1):
|
|
202
353
|
response = handler(request)
|
|
203
354
|
|
|
355
|
+
# If summary todo is completed AND summary content exists, accept empty response
|
|
356
|
+
# This prevents infinite loop when inject_continuation_middleware returns empty AIMessage
|
|
357
|
+
response_message = _extract_ai_message(response)
|
|
358
|
+
if summary_todo_completed and summary_exists:
|
|
359
|
+
has_content_check = (
|
|
360
|
+
bool(getattr(response_message, "content", None))
|
|
361
|
+
if response_message
|
|
362
|
+
else False
|
|
363
|
+
)
|
|
364
|
+
has_tool_calls_check = (
|
|
365
|
+
bool(getattr(response_message, "tool_calls", None))
|
|
366
|
+
if response_message
|
|
367
|
+
else False
|
|
368
|
+
)
|
|
369
|
+
if not has_content_check and not has_tool_calls_check:
|
|
370
|
+
logger.info(
|
|
371
|
+
"Summary todo completed AND summary exists - accepting empty response (agent should stop)"
|
|
372
|
+
)
|
|
373
|
+
return response
|
|
374
|
+
|
|
204
375
|
# Extract AIMessage from response
|
|
205
376
|
response_message = _extract_ai_message(response)
|
|
206
377
|
|
|
@@ -227,6 +398,23 @@ def create_handle_empty_response_middleware(wrap_model_call):
|
|
|
227
398
|
if has_tool_calls:
|
|
228
399
|
return response
|
|
229
400
|
|
|
401
|
+
# Try to extract and repair tool_calls from additional_kwargs
|
|
402
|
+
# Some LLMs (e.g., gpt-oss-120b) put tool_calls in additional_kwargs
|
|
403
|
+
# but with malformed arguments (missing '{', broken JSON, etc.)
|
|
404
|
+
if response_message and not has_tool_calls:
|
|
405
|
+
repaired_message = try_extract_tool_calls_from_additional_kwargs(
|
|
406
|
+
response_message
|
|
407
|
+
)
|
|
408
|
+
if repaired_message and repaired_message.tool_calls:
|
|
409
|
+
logger.info(
|
|
410
|
+
"Repaired tool_calls from additional_kwargs: %d calls",
|
|
411
|
+
len(repaired_message.tool_calls),
|
|
412
|
+
)
|
|
413
|
+
response = _replace_ai_message_in_response(
|
|
414
|
+
response, repaired_message
|
|
415
|
+
)
|
|
416
|
+
return response
|
|
417
|
+
|
|
230
418
|
# Try to parse JSON from content
|
|
231
419
|
if has_content and response_message:
|
|
232
420
|
parsed = parse_json_tool_call(response_message.content)
|
|
@@ -242,6 +430,117 @@ def create_handle_empty_response_middleware(wrap_model_call):
|
|
|
242
430
|
response = _replace_ai_message_in_response(response, new_message)
|
|
243
431
|
return response
|
|
244
432
|
|
|
433
|
+
# Check if content is summary JSON (for summary todo)
|
|
434
|
+
# Summary JSON has "summary" and "next_items" but no "tool"
|
|
435
|
+
# IMPORTANT: Check for summary JSON pattern FIRST, regardless of current todo
|
|
436
|
+
# This handles cases where LLM outputs summary JSON mixed with other content
|
|
437
|
+
content = response_message.content
|
|
438
|
+
if isinstance(content, list):
|
|
439
|
+
content = " ".join(str(p) for p in content)
|
|
440
|
+
|
|
441
|
+
# Check if content contains summary JSON pattern
|
|
442
|
+
has_summary_pattern = ('"summary"' in content or "'summary'" in content) and (
|
|
443
|
+
'"next_items"' in content or "'next_items'" in content
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
if has_summary_pattern:
|
|
447
|
+
# Try to extract and repair summary JSON from mixed content
|
|
448
|
+
try:
|
|
449
|
+
# Try to find JSON object containing summary
|
|
450
|
+
import re
|
|
451
|
+
json_match = re.search(r'\{[^{}]*"summary"[^{}]*"next_items"[^{}]*\}', content, re.DOTALL)
|
|
452
|
+
if json_match:
|
|
453
|
+
repaired_summary = repair_json(
|
|
454
|
+
json_match.group(), return_objects=True
|
|
455
|
+
)
|
|
456
|
+
else:
|
|
457
|
+
repaired_summary = repair_json(
|
|
458
|
+
content, return_objects=True
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
if (
|
|
462
|
+
isinstance(repaired_summary, dict)
|
|
463
|
+
and "summary" in repaired_summary
|
|
464
|
+
and "next_items" in repaired_summary
|
|
465
|
+
):
|
|
466
|
+
# Create new message with repaired JSON content
|
|
467
|
+
repaired_content = json.dumps(
|
|
468
|
+
repaired_summary, ensure_ascii=False
|
|
469
|
+
)
|
|
470
|
+
logger.info(
|
|
471
|
+
"Detected and repaired summary JSON in content (pattern-based detection)"
|
|
472
|
+
)
|
|
473
|
+
# Create message with repaired content
|
|
474
|
+
repaired_response_message = AIMessage(
|
|
475
|
+
content=repaired_content,
|
|
476
|
+
tool_calls=getattr(
|
|
477
|
+
response_message, "tool_calls", []
|
|
478
|
+
)
|
|
479
|
+
or [],
|
|
480
|
+
)
|
|
481
|
+
synthetic_message = _create_synthetic_completion(
|
|
482
|
+
request,
|
|
483
|
+
repaired_response_message,
|
|
484
|
+
has_content=True,
|
|
485
|
+
)
|
|
486
|
+
response = _replace_ai_message_in_response(
|
|
487
|
+
response, synthetic_message
|
|
488
|
+
)
|
|
489
|
+
return response
|
|
490
|
+
except Exception as e:
|
|
491
|
+
logger.debug(f"Failed to extract summary JSON from mixed content: {e}")
|
|
492
|
+
|
|
493
|
+
# Fallback: accept as-is if repair failed but looks like summary
|
|
494
|
+
logger.info(
|
|
495
|
+
"Detected summary JSON pattern in content - accepting and synthesizing write_todos"
|
|
496
|
+
)
|
|
497
|
+
synthetic_message = _create_synthetic_completion(
|
|
498
|
+
request, response_message, has_content=True
|
|
499
|
+
)
|
|
500
|
+
response = _replace_ai_message_in_response(
|
|
501
|
+
response, synthetic_message
|
|
502
|
+
)
|
|
503
|
+
return response
|
|
504
|
+
|
|
505
|
+
# Legacy: Also check if current todo is a summary todo (backward compatibility)
|
|
506
|
+
todos = request.state.get("todos", [])
|
|
507
|
+
in_progress_todos = [
|
|
508
|
+
t for t in todos if t.get("status") == "in_progress"
|
|
509
|
+
]
|
|
510
|
+
pending_todos = [t for t in todos if t.get("status") == "pending"]
|
|
511
|
+
current_todo = (
|
|
512
|
+
in_progress_todos[0]
|
|
513
|
+
if in_progress_todos
|
|
514
|
+
else pending_todos[0]
|
|
515
|
+
if pending_todos
|
|
516
|
+
else None
|
|
517
|
+
)
|
|
518
|
+
if current_todo:
|
|
519
|
+
summary_keywords = [
|
|
520
|
+
"작업 요약",
|
|
521
|
+
"결과 요약",
|
|
522
|
+
"분석 요약",
|
|
523
|
+
"요약 및",
|
|
524
|
+
"다음단계",
|
|
525
|
+
"다음 단계",
|
|
526
|
+
"next step",
|
|
527
|
+
]
|
|
528
|
+
is_summary_todo = any(
|
|
529
|
+
kw in current_todo.get("content", "") for kw in summary_keywords
|
|
530
|
+
)
|
|
531
|
+
if is_summary_todo and ('"summary"' in content or "'summary'" in content):
|
|
532
|
+
# This is a summary todo with summary content - accept it
|
|
533
|
+
logger.info(
|
|
534
|
+
"Summary todo with summary content detected - accepting"
|
|
535
|
+
)
|
|
536
|
+
synthetic_message = _create_synthetic_completion(
|
|
537
|
+
request, response_message, has_content=True
|
|
538
|
+
)
|
|
539
|
+
response = _replace_ai_message_in_response(
|
|
540
|
+
response, synthetic_message
|
|
541
|
+
)
|
|
542
|
+
return response
|
|
543
|
+
|
|
245
544
|
# Invalid response - retry with JSON schema prompt
|
|
246
545
|
if response_message and attempt < max_retries:
|
|
247
546
|
reason = "text-only" if has_content else "empty"
|
|
@@ -256,7 +555,7 @@ def create_handle_empty_response_middleware(wrap_model_call):
|
|
|
256
555
|
reason,
|
|
257
556
|
)
|
|
258
557
|
# Synthesize write_todos while preserving the content (summary)
|
|
259
|
-
synthetic_message =
|
|
558
|
+
synthetic_message = _create_synthetic_completion(
|
|
260
559
|
request, response_message, has_content
|
|
261
560
|
)
|
|
262
561
|
response = _replace_ai_message_in_response(
|
|
@@ -271,6 +570,8 @@ def create_handle_empty_response_middleware(wrap_model_call):
|
|
|
271
570
|
attempt + 1,
|
|
272
571
|
max_retries + 1,
|
|
273
572
|
)
|
|
573
|
+
if reason == "text-only":
|
|
574
|
+
_log_invalid_ai_message(response_message, reason)
|
|
274
575
|
|
|
275
576
|
request = request.override(
|
|
276
577
|
messages=request.messages + [HumanMessage(content=json_prompt)]
|
|
@@ -293,7 +594,7 @@ def create_handle_empty_response_middleware(wrap_model_call):
|
|
|
293
594
|
logger.warning(
|
|
294
595
|
"Max retries exhausted. Synthesizing write_todos to complete."
|
|
295
596
|
)
|
|
296
|
-
synthetic_message =
|
|
597
|
+
synthetic_message = _create_synthetic_completion(
|
|
297
598
|
request, response_message, has_content
|
|
298
599
|
)
|
|
299
600
|
response = _replace_ai_message_in_response(response, synthetic_message)
|
|
@@ -325,6 +626,23 @@ def _extract_ai_message(response):
|
|
|
325
626
|
return None
|
|
326
627
|
|
|
327
628
|
|
|
629
|
+
def _log_invalid_ai_message(response_message, reason: str) -> None:
|
|
630
|
+
"""Log full AIMessage details for invalid (text-only) responses."""
|
|
631
|
+
if not response_message:
|
|
632
|
+
return
|
|
633
|
+
try:
|
|
634
|
+
payload = _serialize_message(response_message)
|
|
635
|
+
except Exception as exc:
|
|
636
|
+
logger.warning(
|
|
637
|
+
"Invalid AIMessage detail (%s): failed to serialize (%s). Raw=%r",
|
|
638
|
+
reason,
|
|
639
|
+
exc,
|
|
640
|
+
response_message,
|
|
641
|
+
)
|
|
642
|
+
return
|
|
643
|
+
logger.warning("Invalid AIMessage detail (%s): %s", reason, _pretty_json(payload))
|
|
644
|
+
|
|
645
|
+
|
|
328
646
|
def _replace_ai_message_in_response(response, new_message):
|
|
329
647
|
"""Replace AIMessage in response with a new one."""
|
|
330
648
|
if hasattr(response, "result"):
|
|
@@ -344,25 +662,51 @@ def _build_json_prompt(request, response_message, has_content):
|
|
|
344
662
|
pending_todos = [t for t in todos if t.get("status") in ("pending", "in_progress")]
|
|
345
663
|
in_progress_todos = [t for t in todos if t.get("status") == "in_progress"]
|
|
346
664
|
|
|
665
|
+
# Check if CURRENT todo (first in_progress or first pending) is summary/next_steps
|
|
666
|
+
# Not checking ALL pending todos - only the one we should be working on now
|
|
667
|
+
summary_keywords = [
|
|
668
|
+
"작업 요약",
|
|
669
|
+
"결과 요약",
|
|
670
|
+
"분석 요약",
|
|
671
|
+
"요약 및",
|
|
672
|
+
"다음단계",
|
|
673
|
+
"다음 단계",
|
|
674
|
+
"next step",
|
|
675
|
+
]
|
|
676
|
+
current_todo = (
|
|
677
|
+
in_progress_todos[0]
|
|
678
|
+
if in_progress_todos
|
|
679
|
+
else pending_todos[0]
|
|
680
|
+
if pending_todos
|
|
681
|
+
else None
|
|
682
|
+
)
|
|
683
|
+
is_summary_todo = current_todo is not None and any(
|
|
684
|
+
kw in current_todo.get("content", "") for kw in summary_keywords
|
|
685
|
+
)
|
|
686
|
+
|
|
347
687
|
if has_content:
|
|
348
688
|
# If all todos completed, don't force another tool call
|
|
349
689
|
if todos and not pending_todos:
|
|
350
690
|
return None # Signal to skip retry
|
|
351
691
|
|
|
352
|
-
# If current
|
|
692
|
+
# If current todo is "작업 요약 및 다음단계 제시", accept text-only response
|
|
353
693
|
# The LLM is outputting the summary, we'll synthesize write_todos
|
|
354
|
-
if
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
)
|
|
365
|
-
|
|
694
|
+
if is_summary_todo:
|
|
695
|
+
summary_todo = next(
|
|
696
|
+
(
|
|
697
|
+
t
|
|
698
|
+
for t in pending_todos
|
|
699
|
+
if any(kw in t.get("content", "") for kw in summary_keywords)
|
|
700
|
+
),
|
|
701
|
+
{"content": "summary"},
|
|
702
|
+
)
|
|
703
|
+
logger.info(
|
|
704
|
+
"Current todo is summary/next steps ('%s'), accepting text-only response",
|
|
705
|
+
summary_todo.get("content", "")[:30],
|
|
706
|
+
)
|
|
707
|
+
return (
|
|
708
|
+
None # Signal to skip retry - will synthesize write_todos with content
|
|
709
|
+
)
|
|
366
710
|
|
|
367
711
|
return (
|
|
368
712
|
f"{JSON_TOOL_SCHEMA}\n\n"
|
|
@@ -370,6 +714,18 @@ def _build_json_prompt(request, response_message, has_content):
|
|
|
370
714
|
f"Call the next appropriate tool to continue.\n"
|
|
371
715
|
f'Example: {{"tool": "jupyter_cell_tool", "arguments": {{"code": "print(\'hello\')"}}}}'
|
|
372
716
|
)
|
|
717
|
+
elif is_summary_todo:
|
|
718
|
+
# Empty response but current todo is summary - force summary JSON output
|
|
719
|
+
logger.info(
|
|
720
|
+
"Empty response but current todo is summary/next steps - forcing summary JSON prompt"
|
|
721
|
+
)
|
|
722
|
+
return (
|
|
723
|
+
f"{JSON_TOOL_SCHEMA}\n\n"
|
|
724
|
+
f"You MUST output a summary JSON with next_items. This is the final step.\n"
|
|
725
|
+
f"출력 형식 (반드시 이 형식으로 출력):\n"
|
|
726
|
+
f'{{"summary": "완료된 작업 요약 (한국어)", "next_items": [{{"subject": "다음 작업 제목", "description": "설명"}}]}}\n\n'
|
|
727
|
+
f"Do NOT call any tool. Just output the summary JSON directly in your response."
|
|
728
|
+
)
|
|
373
729
|
elif pending_todos:
|
|
374
730
|
todo_list = ", ".join(t.get("content", "")[:20] for t in pending_todos[:3])
|
|
375
731
|
example_json = '{"tool": "jupyter_cell_tool", "arguments": {"code": "import pandas as pd\\ndf = pd.read_csv(\'titanic.csv\')\\nprint(df.head())"}}'
|
|
@@ -400,7 +756,7 @@ def _build_json_prompt(request, response_message, has_content):
|
|
|
400
756
|
)
|
|
401
757
|
|
|
402
758
|
|
|
403
|
-
def
|
|
759
|
+
def _create_synthetic_completion(request, response_message, has_content):
|
|
404
760
|
"""Create synthetic write_todos call to mark all todos as completed.
|
|
405
761
|
|
|
406
762
|
This triggers automatic session termination via router's all_todos_completed check.
|
|
@@ -408,6 +764,15 @@ def _create_synthetic_final_answer(request, response_message, has_content):
|
|
|
408
764
|
"""
|
|
409
765
|
todos = request.state.get("todos", [])
|
|
410
766
|
|
|
767
|
+
# Warn if there are pending todos being force-completed
|
|
768
|
+
pending_count = sum(1 for t in todos if t.get("status") == "pending")
|
|
769
|
+
if pending_count > 0:
|
|
770
|
+
logger.warning(
|
|
771
|
+
"Force-completing %d pending todos that were never started: %s",
|
|
772
|
+
pending_count,
|
|
773
|
+
[t.get("content") for t in todos if t.get("status") == "pending"]
|
|
774
|
+
)
|
|
775
|
+
|
|
411
776
|
# Mark all todos as completed
|
|
412
777
|
completed_todos = (
|
|
413
778
|
[{**todo, "status": "completed"} for todo in todos]
|
|
@@ -614,80 +979,63 @@ def create_normalize_tool_args_middleware(wrap_model_call, tools=None):
|
|
|
614
979
|
)
|
|
615
980
|
args[key] = normalized_value
|
|
616
981
|
|
|
617
|
-
#
|
|
982
|
+
# Validate write_todos: Only ONE item should be in_progress at a time
|
|
618
983
|
if tool_name == "write_todos" and "todos" in args:
|
|
619
984
|
todos = args["todos"]
|
|
620
985
|
if isinstance(todos, list) and len(todos) > 0:
|
|
621
|
-
#
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
has_summary = any(
|
|
629
|
-
any(
|
|
630
|
-
kw in todo.get("content", "")
|
|
631
|
-
for kw in summary_keywords
|
|
632
|
-
)
|
|
633
|
-
for todo in todos
|
|
634
|
-
if isinstance(todo, dict)
|
|
986
|
+
# Validate: Only ONE item should be in_progress at a time
|
|
987
|
+
# If multiple in_progress, keep only the first one
|
|
988
|
+
in_progress_count = sum(
|
|
989
|
+
1
|
|
990
|
+
for t in todos
|
|
991
|
+
if isinstance(t, dict)
|
|
992
|
+
and t.get("status") == "in_progress"
|
|
635
993
|
)
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
"status"
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
has_summary_json = (
|
|
666
|
-
'"summary"' in msg_content
|
|
667
|
-
and '"next_items"' in msg_content
|
|
668
|
-
) or (
|
|
669
|
-
"'summary'" in msg_content
|
|
670
|
-
and "'next_items'" in msg_content
|
|
671
|
-
)
|
|
672
|
-
if not has_summary_json:
|
|
673
|
-
# Just log warning - don't block completion to avoid infinite loop
|
|
674
|
-
logger.warning(
|
|
675
|
-
"Summary todo marked completed but no summary JSON in content. "
|
|
676
|
-
"Allowing completion to proceed. Content: %s",
|
|
677
|
-
msg_content[:200],
|
|
678
|
-
)
|
|
994
|
+
if in_progress_count > 1:
|
|
995
|
+
found_first = False
|
|
996
|
+
for todo in todos:
|
|
997
|
+
if not isinstance(todo, dict):
|
|
998
|
+
continue
|
|
999
|
+
if todo.get("status") == "in_progress":
|
|
1000
|
+
if found_first:
|
|
1001
|
+
# Reset subsequent in_progress to pending
|
|
1002
|
+
todo["status"] = "pending"
|
|
1003
|
+
logger.info(
|
|
1004
|
+
"Reset duplicate in_progress todo to pending: %s",
|
|
1005
|
+
todo.get("content", "")[
|
|
1006
|
+
:30
|
|
1007
|
+
],
|
|
1008
|
+
)
|
|
1009
|
+
else:
|
|
1010
|
+
found_first = True
|
|
1011
|
+
|
|
1012
|
+
# NOTE: Previously had logic to revert summary todo to in_progress
|
|
1013
|
+
# if no summary JSON was found. This caused infinite loops
|
|
1014
|
+
# where LLM kept calling write_todos repeatedly.
|
|
1015
|
+
# Now we let the natural termination logic handle this.
|
|
1016
|
+
#
|
|
1017
|
+
# NOTE: Also removed logic to preserve todos when LLM tries to delete them.
|
|
1018
|
+
# The LLM should be able to modify todos freely when:
|
|
1019
|
+
# - User rejects code approval
|
|
1020
|
+
# - User changes their request
|
|
1021
|
+
# - Code execution fails
|
|
1022
|
+
# We rely on prompts to guide proper todo management.
|
|
679
1023
|
|
|
680
1024
|
return response
|
|
681
1025
|
|
|
682
1026
|
return normalize_tool_args
|
|
683
1027
|
|
|
684
1028
|
|
|
685
|
-
def
|
|
686
|
-
"""Create middleware
|
|
1029
|
+
def create_continuation_control_middleware(wrap_model_call):
|
|
1030
|
+
"""Create unified middleware for continuation control.
|
|
687
1031
|
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
1032
|
+
This middleware combines two functions:
|
|
1033
|
+
1. BEFORE handler: Inject continuation prompt after non-HITL tool execution
|
|
1034
|
+
- Checks for summary completion and stops if done
|
|
1035
|
+
- Injects "[SYSTEM] Tool completed..." messages to guide LLM
|
|
1036
|
+
2. AFTER handler: Prevent auto-continuation after summary JSON output
|
|
1037
|
+
- Strips write_todos from responses containing summary JSON
|
|
1038
|
+
- Prevents agent from auto-creating new todos after task completion
|
|
691
1039
|
|
|
692
1040
|
Args:
|
|
693
1041
|
wrap_model_call: LangChain's wrap_model_call decorator
|
|
@@ -696,89 +1044,198 @@ def create_inject_continuation_middleware(wrap_model_call):
|
|
|
696
1044
|
Middleware function
|
|
697
1045
|
"""
|
|
698
1046
|
|
|
1047
|
+
def _check_summary_exists(messages, last_real_human_idx: int) -> bool:
|
|
1048
|
+
"""Check if summary content exists in messages after last real user message."""
|
|
1049
|
+
messages_to_check = (
|
|
1050
|
+
messages[last_real_human_idx + 1 :]
|
|
1051
|
+
if last_real_human_idx >= 0
|
|
1052
|
+
else messages[-15:]
|
|
1053
|
+
)
|
|
1054
|
+
for msg in messages_to_check:
|
|
1055
|
+
msg_content = getattr(msg, "content", "") or ""
|
|
1056
|
+
# Check for summary JSON
|
|
1057
|
+
if '"summary"' in msg_content and '"next_items"' in msg_content:
|
|
1058
|
+
return True
|
|
1059
|
+
# Check for markdown summary (common patterns)
|
|
1060
|
+
if any(
|
|
1061
|
+
kw in msg_content
|
|
1062
|
+
for kw in [
|
|
1063
|
+
"다음 단계 제안",
|
|
1064
|
+
"다음 단계:",
|
|
1065
|
+
"### 다음 단계",
|
|
1066
|
+
"## 다음 단계",
|
|
1067
|
+
"**다음 단계**",
|
|
1068
|
+
"모든 작업이 완료",
|
|
1069
|
+
"**작업 요약**",
|
|
1070
|
+
"### 작업 요약",
|
|
1071
|
+
"## 작업 요약",
|
|
1072
|
+
]
|
|
1073
|
+
):
|
|
1074
|
+
return True
|
|
1075
|
+
return False
|
|
1076
|
+
|
|
1077
|
+
def _find_last_real_human_idx(messages) -> int:
|
|
1078
|
+
"""Find index of last real HumanMessage (not system-injected)."""
|
|
1079
|
+
last_real_human_idx = -1
|
|
1080
|
+
for i, msg in enumerate(messages):
|
|
1081
|
+
msg_type = getattr(msg, "type", "") or type(msg).__name__
|
|
1082
|
+
if msg_type in ("human", "HumanMessage"):
|
|
1083
|
+
msg_content = getattr(msg, "content", "") or ""
|
|
1084
|
+
if not msg_content.startswith("[SYSTEM]"):
|
|
1085
|
+
last_real_human_idx = i
|
|
1086
|
+
return last_real_human_idx
|
|
1087
|
+
|
|
699
1088
|
@wrap_model_call
|
|
700
|
-
@_with_middleware_logging("
|
|
701
|
-
def
|
|
1089
|
+
@_with_middleware_logging("continuation_control")
|
|
1090
|
+
def continuation_control(request, handler):
|
|
702
1091
|
messages = request.messages
|
|
703
|
-
if not messages:
|
|
704
|
-
return handler(request)
|
|
705
|
-
|
|
706
|
-
last_msg = messages[-1]
|
|
707
|
-
if getattr(last_msg, "type", "") == "tool":
|
|
708
|
-
tool_name = getattr(last_msg, "name", "") or ""
|
|
709
|
-
|
|
710
|
-
# Try to extract tool name from content
|
|
711
|
-
if not tool_name:
|
|
712
|
-
try:
|
|
713
|
-
content_json = json.loads(last_msg.content)
|
|
714
|
-
tool_name = content_json.get("tool", "")
|
|
715
|
-
except (json.JSONDecodeError, TypeError, AttributeError):
|
|
716
|
-
pass
|
|
717
|
-
|
|
718
|
-
if tool_name in NON_HITL_TOOLS:
|
|
719
|
-
# Method 1: Check state.todos
|
|
720
|
-
todos = request.state.get("todos", [])
|
|
721
|
-
pending_todos = [
|
|
722
|
-
t for t in todos if t.get("status") in ("pending", "in_progress")
|
|
723
|
-
]
|
|
724
1092
|
|
|
725
|
-
|
|
726
|
-
|
|
1093
|
+
# ===== BEFORE HANDLER: Inject continuation prompt =====
|
|
1094
|
+
if messages:
|
|
1095
|
+
last_msg = messages[-1]
|
|
1096
|
+
if getattr(last_msg, "type", "") == "tool":
|
|
1097
|
+
tool_name = getattr(last_msg, "name", "") or ""
|
|
1098
|
+
|
|
1099
|
+
# Try to extract tool name from content
|
|
1100
|
+
if not tool_name:
|
|
1101
|
+
try:
|
|
1102
|
+
content_json = json.loads(last_msg.content)
|
|
1103
|
+
tool_name = content_json.get("tool", "")
|
|
1104
|
+
except (json.JSONDecodeError, TypeError, AttributeError):
|
|
1105
|
+
pass
|
|
1106
|
+
|
|
1107
|
+
if tool_name in NON_HITL_TOOLS:
|
|
1108
|
+
todos = request.state.get("todos", [])
|
|
1109
|
+
|
|
1110
|
+
last_real_human_idx = _find_last_real_human_idx(messages)
|
|
1111
|
+
summary_exists = _check_summary_exists(
|
|
1112
|
+
messages, last_real_human_idx
|
|
1113
|
+
)
|
|
1114
|
+
|
|
1115
|
+
# STOP if summary exists (regardless of todo status)
|
|
1116
|
+
if summary_exists:
|
|
1117
|
+
logger.info(
|
|
1118
|
+
"Summary exists after tool: %s - stopping agent (user must request next steps)",
|
|
1119
|
+
tool_name,
|
|
1120
|
+
)
|
|
1121
|
+
return AIMessage(content="", tool_calls=[])
|
|
1122
|
+
|
|
1123
|
+
pending_todos = [
|
|
1124
|
+
t
|
|
1125
|
+
for t in todos
|
|
1126
|
+
if t.get("status") in ("pending", "in_progress")
|
|
1127
|
+
]
|
|
1128
|
+
|
|
1129
|
+
# If all todos completed but no summary yet, allow LLM call for summary
|
|
1130
|
+
if not pending_todos and todos:
|
|
1131
|
+
logger.info(
|
|
1132
|
+
"All %d todos completed, no summary yet after tool: %s - allowing LLM for summary",
|
|
1133
|
+
len(todos),
|
|
1134
|
+
tool_name,
|
|
1135
|
+
)
|
|
1136
|
+
|
|
727
1137
|
logger.info(
|
|
728
|
-
"
|
|
1138
|
+
"Injecting continuation prompt after non-HITL tool: %s",
|
|
729
1139
|
tool_name,
|
|
730
1140
|
)
|
|
731
|
-
return AIMessage(content="", tool_calls=[])
|
|
732
1141
|
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
1142
|
+
# Skip continuation injection for write_todos
|
|
1143
|
+
# This prevents auto-continuation to next task after completing one
|
|
1144
|
+
# Agent will decide next action based on its own reasoning
|
|
1145
|
+
if tool_name == "write_todos":
|
|
1146
|
+
logger.info(
|
|
1147
|
+
"Skipping continuation prompt after write_todos - "
|
|
1148
|
+
"agent decides next action (pending: %d)",
|
|
1149
|
+
len(pending_todos) if pending_todos else 0,
|
|
1150
|
+
)
|
|
1151
|
+
# Don't inject continuation - let agent naturally continue or stop
|
|
1152
|
+
elif pending_todos:
|
|
1153
|
+
pending_list = ", ".join(
|
|
1154
|
+
t.get("content", "")[:30] for t in pending_todos[:3]
|
|
1155
|
+
)
|
|
1156
|
+
continuation = (
|
|
1157
|
+
f"Tool '{tool_name}' completed. "
|
|
1158
|
+
f"Continue with pending tasks: {pending_list}. "
|
|
1159
|
+
f"Call jupyter_cell_tool or the next appropriate tool."
|
|
1160
|
+
)
|
|
1161
|
+
new_messages = list(messages) + [
|
|
1162
|
+
HumanMessage(content=f"[SYSTEM] {continuation}")
|
|
1163
|
+
]
|
|
1164
|
+
request = request.override(messages=new_messages)
|
|
1165
|
+
else:
|
|
1166
|
+
continuation = (
|
|
1167
|
+
f"Tool '{tool_name}' completed. "
|
|
1168
|
+
f"Create a todo list with write_todos if needed."
|
|
1169
|
+
)
|
|
1170
|
+
new_messages = list(messages) + [
|
|
1171
|
+
HumanMessage(content=f"[SYSTEM] {continuation}")
|
|
1172
|
+
]
|
|
1173
|
+
request = request.override(messages=new_messages)
|
|
1174
|
+
|
|
1175
|
+
# ===== CALL HANDLER =====
|
|
1176
|
+
response = handler(request)
|
|
1177
|
+
|
|
1178
|
+
# ===== AFTER HANDLER: Strip write_todos if summary JSON present =====
|
|
1179
|
+
response_message = _extract_ai_message(response)
|
|
1180
|
+
if not response_message:
|
|
1181
|
+
return response
|
|
1182
|
+
|
|
1183
|
+
# Get content - handle both string and list formats
|
|
1184
|
+
content = getattr(response_message, "content", "") or ""
|
|
1185
|
+
if isinstance(content, list):
|
|
1186
|
+
content = " ".join(
|
|
1187
|
+
str(p) if isinstance(p, str) else p.get("text", "")
|
|
1188
|
+
for p in content
|
|
1189
|
+
if isinstance(p, (str, dict))
|
|
1190
|
+
)
|
|
752
1191
|
|
|
1192
|
+
# Check if content contains summary JSON pattern
|
|
1193
|
+
has_summary_json = '"summary"' in content and '"next_items"' in content
|
|
1194
|
+
|
|
1195
|
+
if has_summary_json:
|
|
1196
|
+
tool_calls = getattr(response_message, "tool_calls", []) or []
|
|
1197
|
+
write_todos_calls = [
|
|
1198
|
+
tc for tc in tool_calls if tc.get("name") == "write_todos"
|
|
1199
|
+
]
|
|
1200
|
+
|
|
1201
|
+
if write_todos_calls:
|
|
753
1202
|
logger.info(
|
|
754
|
-
"
|
|
755
|
-
|
|
1203
|
+
"Summary JSON 감지 - write_todos 호출 제거 (자동 계속 방지). "
|
|
1204
|
+
"제거된 write_todos 호출 수: %d",
|
|
1205
|
+
len(write_todos_calls),
|
|
756
1206
|
)
|
|
757
1207
|
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
t.get("content", "")[:30] for t in pending_todos[:3]
|
|
761
|
-
)
|
|
762
|
-
continuation = (
|
|
763
|
-
f"Tool '{tool_name}' completed. "
|
|
764
|
-
f"Continue with pending tasks: {pending_list}. "
|
|
765
|
-
f"Call jupyter_cell_tool or the next appropriate tool."
|
|
766
|
-
)
|
|
767
|
-
else:
|
|
768
|
-
# No todos yet - let agent create them
|
|
769
|
-
continuation = (
|
|
770
|
-
f"Tool '{tool_name}' completed. "
|
|
771
|
-
f"Create a todo list with write_todos if needed."
|
|
772
|
-
)
|
|
773
|
-
|
|
774
|
-
new_messages = list(messages) + [
|
|
775
|
-
HumanMessage(content=f"[SYSTEM] {continuation}")
|
|
1208
|
+
filtered_tool_calls = [
|
|
1209
|
+
tc for tc in tool_calls if tc.get("name") != "write_todos"
|
|
776
1210
|
]
|
|
777
|
-
request = request.override(messages=new_messages)
|
|
778
1211
|
|
|
779
|
-
|
|
1212
|
+
new_message = AIMessage(
|
|
1213
|
+
content=response_message.content,
|
|
1214
|
+
tool_calls=filtered_tool_calls,
|
|
1215
|
+
additional_kwargs=getattr(
|
|
1216
|
+
response_message, "additional_kwargs", {}
|
|
1217
|
+
),
|
|
1218
|
+
response_metadata=getattr(
|
|
1219
|
+
response_message, "response_metadata", {}
|
|
1220
|
+
),
|
|
1221
|
+
)
|
|
1222
|
+
|
|
1223
|
+
response = _replace_ai_message_in_response(response, new_message)
|
|
1224
|
+
|
|
1225
|
+
return response
|
|
1226
|
+
|
|
1227
|
+
return continuation_control
|
|
1228
|
+
|
|
1229
|
+
|
|
1230
|
+
# Backward compatibility aliases
|
|
1231
|
+
def create_inject_continuation_middleware(wrap_model_call):
|
|
1232
|
+
"""Deprecated: Use create_continuation_control_middleware instead."""
|
|
1233
|
+
return create_continuation_control_middleware(wrap_model_call)
|
|
1234
|
+
|
|
780
1235
|
|
|
781
|
-
|
|
1236
|
+
def create_prevent_auto_continuation_middleware(wrap_model_call):
|
|
1237
|
+
"""Deprecated: Use create_continuation_control_middleware instead."""
|
|
1238
|
+
return create_continuation_control_middleware(wrap_model_call)
|
|
782
1239
|
|
|
783
1240
|
|
|
784
1241
|
def create_patch_tool_calls_middleware(AgentMiddleware, ToolMessage, Overwrite):
|