hdsp-jupyter-extension 2.0.10__py3-none-any.whl → 2.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_server/core/notebook_generator.py +4 -4
- agent_server/langchain/MULTI_AGENT_ARCHITECTURE.md +1114 -0
- agent_server/langchain/__init__.py +2 -2
- agent_server/langchain/agent.py +72 -33
- agent_server/langchain/agent_factory.py +400 -0
- agent_server/langchain/agent_prompts/__init__.py +25 -0
- agent_server/langchain/agent_prompts/athena_query_prompt.py +71 -0
- agent_server/langchain/agent_prompts/planner_prompt.py +85 -0
- agent_server/langchain/agent_prompts/python_developer_prompt.py +123 -0
- agent_server/langchain/agent_prompts/researcher_prompt.py +38 -0
- agent_server/langchain/custom_middleware.py +656 -113
- agent_server/langchain/hitl_config.py +38 -9
- agent_server/langchain/llm_factory.py +1 -85
- agent_server/langchain/middleware/__init__.py +24 -0
- agent_server/langchain/middleware/code_history_middleware.py +412 -0
- agent_server/langchain/middleware/description_injector.py +150 -0
- agent_server/langchain/middleware/skill_middleware.py +298 -0
- agent_server/langchain/middleware/subagent_events.py +171 -0
- agent_server/langchain/middleware/subagent_middleware.py +329 -0
- agent_server/langchain/prompts.py +107 -135
- agent_server/langchain/skills/data_analysis.md +236 -0
- agent_server/langchain/skills/data_loading.md +158 -0
- agent_server/langchain/skills/inference.md +392 -0
- agent_server/langchain/skills/model_training.md +318 -0
- agent_server/langchain/skills/pyspark.md +352 -0
- agent_server/langchain/subagents/__init__.py +20 -0
- agent_server/langchain/subagents/base.py +173 -0
- agent_server/langchain/tools/__init__.py +3 -0
- agent_server/langchain/tools/jupyter_tools.py +58 -20
- agent_server/langchain/tools/lsp_tools.py +1 -1
- agent_server/langchain/tools/shared/__init__.py +26 -0
- agent_server/langchain/tools/shared/qdrant_search.py +175 -0
- agent_server/langchain/tools/tool_registry.py +219 -0
- agent_server/langchain/tools/workspace_tools.py +197 -0
- agent_server/prompts/file_action_prompts.py +8 -8
- agent_server/routers/config.py +40 -1
- agent_server/routers/langchain_agent.py +868 -321
- hdsp_agent_core/__init__.py +46 -47
- hdsp_agent_core/factory.py +6 -10
- hdsp_agent_core/interfaces.py +4 -2
- hdsp_agent_core/knowledge/__init__.py +5 -5
- hdsp_agent_core/knowledge/chunking.py +87 -61
- hdsp_agent_core/knowledge/loader.py +103 -101
- hdsp_agent_core/llm/service.py +192 -107
- hdsp_agent_core/managers/config_manager.py +16 -22
- hdsp_agent_core/managers/session_manager.py +5 -4
- hdsp_agent_core/models/__init__.py +12 -12
- hdsp_agent_core/models/agent.py +15 -8
- hdsp_agent_core/models/common.py +1 -2
- hdsp_agent_core/models/rag.py +48 -111
- hdsp_agent_core/prompts/__init__.py +12 -12
- hdsp_agent_core/prompts/cell_action_prompts.py +9 -7
- hdsp_agent_core/services/agent_service.py +10 -8
- hdsp_agent_core/services/chat_service.py +10 -6
- hdsp_agent_core/services/rag_service.py +3 -6
- hdsp_agent_core/tests/conftest.py +4 -1
- hdsp_agent_core/tests/test_factory.py +2 -2
- hdsp_agent_core/tests/test_services.py +12 -19
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +7 -2
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js → hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.037b3c8e5d6a92b63b16.js +1108 -179
- hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.037b3c8e5d6a92b63b16.js.map +1 -0
- jupyter_ext/labextension/static/lib_index_js.dc6434bee96ab03a0539.js → hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.5449ba3c7e25177d2987.js +3936 -8144
- hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.5449ba3c7e25177d2987.js.map +1 -0
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.4a252df3ade74efee8d6.js → hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.a8e0b064eb9b1c1ff463.js +17 -17
- hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.a8e0b064eb9b1c1ff463.js.map +1 -0
- {hdsp_jupyter_extension-2.0.10.dist-info → hdsp_jupyter_extension-2.0.13.dist-info}/METADATA +1 -1
- {hdsp_jupyter_extension-2.0.10.dist-info → hdsp_jupyter_extension-2.0.13.dist-info}/RECORD +100 -76
- jupyter_ext/__init__.py +21 -11
- jupyter_ext/_version.py +1 -1
- jupyter_ext/handlers.py +128 -58
- jupyter_ext/labextension/build_log.json +1 -1
- jupyter_ext/labextension/package.json +7 -2
- jupyter_ext/labextension/static/{frontend_styles_index_js.2d9fb488c82498c45c2d.js → frontend_styles_index_js.037b3c8e5d6a92b63b16.js} +1108 -179
- jupyter_ext/labextension/static/frontend_styles_index_js.037b3c8e5d6a92b63b16.js.map +1 -0
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.dc6434bee96ab03a0539.js → jupyter_ext/labextension/static/lib_index_js.5449ba3c7e25177d2987.js +3936 -8144
- jupyter_ext/labextension/static/lib_index_js.5449ba3c7e25177d2987.js.map +1 -0
- jupyter_ext/labextension/static/{remoteEntry.4a252df3ade74efee8d6.js → remoteEntry.a8e0b064eb9b1c1ff463.js} +17 -17
- jupyter_ext/labextension/static/remoteEntry.a8e0b064eb9b1c1ff463.js.map +1 -0
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js.map +0 -1
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.dc6434bee96ab03a0539.js.map +0 -1
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.4a252df3ade74efee8d6.js.map +0 -1
- jupyter_ext/labextension/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js.map +0 -1
- jupyter_ext/labextension/static/lib_index_js.dc6434bee96ab03a0539.js.map +0 -1
- jupyter_ext/labextension/static/remoteEntry.4a252df3ade74efee8d6.js.map +0 -1
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +0 -0
- {hdsp_jupyter_extension-2.0.10.dist-info → hdsp_jupyter_extension-2.0.13.dist-info}/WHEEL +0 -0
- {hdsp_jupyter_extension-2.0.10.dist-info → hdsp_jupyter_extension-2.0.13.dist-info}/licenses/LICENSE +0 -0
|
@@ -11,10 +11,13 @@ import re
|
|
|
11
11
|
import uuid
|
|
12
12
|
from typing import Any, Dict, Optional
|
|
13
13
|
|
|
14
|
+
from json_repair import repair_json
|
|
14
15
|
from langchain_core.messages import AIMessage, HumanMessage
|
|
15
16
|
|
|
16
17
|
from agent_server.langchain.logging_utils import (
|
|
17
18
|
_format_middleware_marker,
|
|
19
|
+
_pretty_json,
|
|
20
|
+
_serialize_message,
|
|
18
21
|
_with_middleware_logging,
|
|
19
22
|
)
|
|
20
23
|
from agent_server.langchain.prompts import JSON_TOOL_SCHEMA, NON_HITL_TOOLS
|
|
@@ -75,6 +78,15 @@ def parse_json_tool_call(text) -> Optional[Dict[str, Any]]:
|
|
|
75
78
|
except json.JSONDecodeError:
|
|
76
79
|
pass
|
|
77
80
|
|
|
81
|
+
# Try json-repair for malformed JSON from LLMs
|
|
82
|
+
try:
|
|
83
|
+
repaired = repair_json(text, return_objects=True)
|
|
84
|
+
if isinstance(repaired, dict) and "tool" in repaired:
|
|
85
|
+
logger.info(f"Repaired malformed JSON tool call: {repaired.get('tool')}")
|
|
86
|
+
return repaired
|
|
87
|
+
except Exception as e:
|
|
88
|
+
logger.debug(f"json-repair failed: {e}")
|
|
89
|
+
|
|
78
90
|
return None
|
|
79
91
|
|
|
80
92
|
|
|
@@ -83,15 +95,110 @@ def normalize_tool_name(tool_name: str) -> str:
|
|
|
83
95
|
|
|
84
96
|
Rules:
|
|
85
97
|
- write_todos_tool → write_todos (TodoListMiddleware exception)
|
|
98
|
+
- task → task_tool (SubAgentMiddleware uses task_tool)
|
|
86
99
|
- other tools without _tool suffix → add _tool suffix
|
|
87
100
|
"""
|
|
88
101
|
if tool_name == "write_todos_tool":
|
|
89
102
|
return "write_todos"
|
|
90
|
-
if
|
|
103
|
+
if tool_name == "task":
|
|
104
|
+
return "task_tool"
|
|
105
|
+
if not tool_name.endswith("_tool") and tool_name not in ("write_todos",):
|
|
91
106
|
return f"{tool_name}_tool"
|
|
92
107
|
return tool_name
|
|
93
108
|
|
|
94
109
|
|
|
110
|
+
def repair_tool_call_arguments(arguments: str) -> Optional[Dict[str, Any]]:
|
|
111
|
+
"""Repair malformed tool call arguments from LLMs.
|
|
112
|
+
|
|
113
|
+
Some LLMs (e.g., gpt-oss-120b) return arguments without leading '{' or
|
|
114
|
+
with other JSON formatting issues.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
arguments: Raw arguments string from LLM
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
Parsed dictionary or None if repair failed
|
|
121
|
+
"""
|
|
122
|
+
if not arguments or not isinstance(arguments, str):
|
|
123
|
+
return None
|
|
124
|
+
|
|
125
|
+
arguments = arguments.strip()
|
|
126
|
+
|
|
127
|
+
# Try direct parse first
|
|
128
|
+
try:
|
|
129
|
+
return json.loads(arguments)
|
|
130
|
+
except json.JSONDecodeError:
|
|
131
|
+
pass
|
|
132
|
+
|
|
133
|
+
# Use json-repair for malformed arguments
|
|
134
|
+
try:
|
|
135
|
+
repaired = repair_json(arguments, return_objects=True)
|
|
136
|
+
if isinstance(repaired, dict):
|
|
137
|
+
logger.info("Repaired malformed tool arguments")
|
|
138
|
+
return repaired
|
|
139
|
+
except Exception as e:
|
|
140
|
+
logger.debug(f"json-repair failed for arguments: {e}")
|
|
141
|
+
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def try_extract_tool_calls_from_additional_kwargs(
|
|
146
|
+
response_message,
|
|
147
|
+
) -> Optional[AIMessage]:
|
|
148
|
+
"""Try to extract and repair tool_calls from additional_kwargs.
|
|
149
|
+
|
|
150
|
+
Some LLMs put tool_calls in additional_kwargs but with malformed arguments.
|
|
151
|
+
This function tries to repair them and create a proper AIMessage.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
response_message: AIMessage with potential tool_calls in additional_kwargs
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
New AIMessage with repaired tool_calls, or None if extraction failed
|
|
158
|
+
"""
|
|
159
|
+
if not response_message:
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
additional_kwargs = getattr(response_message, "additional_kwargs", {})
|
|
163
|
+
raw_tool_calls = additional_kwargs.get("tool_calls", [])
|
|
164
|
+
|
|
165
|
+
if not raw_tool_calls:
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
repaired_tool_calls = []
|
|
169
|
+
for tc in raw_tool_calls:
|
|
170
|
+
func = tc.get("function", {})
|
|
171
|
+
name = func.get("name")
|
|
172
|
+
arguments = func.get("arguments", "")
|
|
173
|
+
tc_id = tc.get("id", str(uuid.uuid4()))
|
|
174
|
+
|
|
175
|
+
if not name:
|
|
176
|
+
continue
|
|
177
|
+
|
|
178
|
+
# Try to repair arguments
|
|
179
|
+
args = repair_tool_call_arguments(arguments)
|
|
180
|
+
if args is not None:
|
|
181
|
+
repaired_tool_calls.append(
|
|
182
|
+
{
|
|
183
|
+
"name": normalize_tool_name(name),
|
|
184
|
+
"args": args,
|
|
185
|
+
"id": tc_id,
|
|
186
|
+
"type": "tool_call",
|
|
187
|
+
}
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
if repaired_tool_calls:
|
|
191
|
+
logger.info(
|
|
192
|
+
f"Extracted {len(repaired_tool_calls)} tool calls from additional_kwargs"
|
|
193
|
+
)
|
|
194
|
+
return AIMessage(
|
|
195
|
+
content=getattr(response_message, "content", "") or "",
|
|
196
|
+
tool_calls=repaired_tool_calls,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
return None
|
|
200
|
+
|
|
201
|
+
|
|
95
202
|
def create_tool_call_message(tool_name: str, arguments: Dict[str, Any]) -> AIMessage:
|
|
96
203
|
"""Create AIMessage with tool_calls from parsed JSON.
|
|
97
204
|
|
|
@@ -125,7 +232,7 @@ def create_handle_empty_response_middleware(wrap_model_call):
|
|
|
125
232
|
1. Detects empty or text-only responses (no tool_calls)
|
|
126
233
|
2. Retries with JSON schema prompt to force structured output
|
|
127
234
|
3. Parses JSON response and injects tool_calls into AIMessage
|
|
128
|
-
4. Falls back to synthetic
|
|
235
|
+
4. Falls back to synthetic write_todos completion if all else fails
|
|
129
236
|
|
|
130
237
|
Args:
|
|
131
238
|
wrap_model_call: LangChain's wrap_model_call decorator
|
|
@@ -139,42 +246,132 @@ def create_handle_empty_response_middleware(wrap_model_call):
|
|
|
139
246
|
def handle_empty_response(request, handler):
|
|
140
247
|
max_retries = 2
|
|
141
248
|
|
|
142
|
-
# Check if all todos are completed - if so,
|
|
249
|
+
# Check if all todos are completed - if so, return empty response to stop agent
|
|
250
|
+
# Method 1: Check state.todos
|
|
143
251
|
todos = request.state.get("todos", [])
|
|
252
|
+
logger.debug(
|
|
253
|
+
"handle_empty_response: state.todos=%s",
|
|
254
|
+
json.dumps(todos, ensure_ascii=False) if todos else "[]",
|
|
255
|
+
)
|
|
144
256
|
if todos:
|
|
145
257
|
pending_todos = [
|
|
146
258
|
t for t in todos if t.get("status") in ("pending", "in_progress")
|
|
147
259
|
]
|
|
148
260
|
if not pending_todos:
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
261
|
+
# Check if summary already exists AFTER the last REAL HumanMessage
|
|
262
|
+
# (to avoid false positives from previous task summaries)
|
|
263
|
+
# Note: Skip system-injected messages like "[SYSTEM] Tool completed..."
|
|
264
|
+
summary_exists = False
|
|
265
|
+
messages = request.messages
|
|
266
|
+
|
|
267
|
+
# Find index of last REAL HumanMessage (not system-injected)
|
|
268
|
+
last_human_idx = -1
|
|
269
|
+
for i, msg in enumerate(messages):
|
|
270
|
+
msg_type = getattr(msg, "type", "") or type(msg).__name__
|
|
271
|
+
if msg_type in ("human", "HumanMessage"):
|
|
272
|
+
msg_content = getattr(msg, "content", "") or ""
|
|
273
|
+
# Skip system-injected messages
|
|
274
|
+
if not msg_content.startswith("[SYSTEM]"):
|
|
275
|
+
last_human_idx = i
|
|
276
|
+
|
|
277
|
+
# Only check messages after last REAL HumanMessage for summary
|
|
278
|
+
messages_to_check = (
|
|
279
|
+
messages[last_human_idx + 1 :]
|
|
280
|
+
if last_human_idx >= 0
|
|
281
|
+
else messages[-10:]
|
|
152
282
|
)
|
|
153
|
-
|
|
283
|
+
for msg in messages_to_check:
|
|
284
|
+
content = getattr(msg, "content", "") or ""
|
|
285
|
+
if '"summary"' in content and '"next_items"' in content:
|
|
286
|
+
summary_exists = True
|
|
287
|
+
break
|
|
154
288
|
|
|
155
|
-
|
|
156
|
-
# This allows agent to naturally terminate after final_answer_tool
|
|
157
|
-
messages = request.messages
|
|
158
|
-
if messages:
|
|
159
|
-
last_msg = messages[-1]
|
|
160
|
-
if getattr(last_msg, "type", "") == "tool":
|
|
161
|
-
tool_name = getattr(last_msg, "name", "") or ""
|
|
162
|
-
if not tool_name:
|
|
163
|
-
try:
|
|
164
|
-
content_json = json.loads(last_msg.content)
|
|
165
|
-
tool_name = content_json.get("tool", "")
|
|
166
|
-
except (json.JSONDecodeError, TypeError, AttributeError):
|
|
167
|
-
pass
|
|
168
|
-
if tool_name in ("final_answer_tool", "final_answer"):
|
|
289
|
+
if summary_exists:
|
|
169
290
|
logger.info(
|
|
170
|
-
"
|
|
291
|
+
"All %d todos completed and summary exists after last user message - stopping agent (no LLM call)",
|
|
292
|
+
len(todos),
|
|
171
293
|
)
|
|
172
|
-
|
|
173
|
-
|
|
294
|
+
return AIMessage(content="", tool_calls=[])
|
|
295
|
+
else:
|
|
296
|
+
# Allow one more LLM call for summary generation
|
|
297
|
+
logger.info(
|
|
298
|
+
"All %d todos completed but no summary yet after last user message - allowing LLM call for summary",
|
|
299
|
+
len(todos),
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# Method 2: Check last message if it's a write_todos ToolMessage with all completed
|
|
303
|
+
# Note: We now allow one more LLM call for summary generation when all todos are completed
|
|
304
|
+
# This check is skipped to let the agent produce a summary
|
|
305
|
+
|
|
306
|
+
# Check if summary todo is completed
|
|
307
|
+
# IMPORTANT: Only consider summary completed if it's the LAST todo item and ALL todos are done
|
|
308
|
+
# This prevents false positives when a previous summary is completed but new tasks are added
|
|
309
|
+
all_todos_completed = all(t.get("status") == "completed" for t in todos)
|
|
310
|
+
last_todo_is_summary = (
|
|
311
|
+
len(todos) > 0
|
|
312
|
+
and "작업 요약" in todos[-1].get("content", "")
|
|
313
|
+
and "다음 단계" in todos[-1].get("content", "")
|
|
314
|
+
and todos[-1].get("status") == "completed"
|
|
315
|
+
)
|
|
316
|
+
summary_todo_completed = all_todos_completed and last_todo_is_summary
|
|
317
|
+
|
|
318
|
+
if not summary_todo_completed and any(
|
|
319
|
+
t.get("status") == "completed"
|
|
320
|
+
and "작업 요약" in t.get("content", "")
|
|
321
|
+
for t in todos
|
|
322
|
+
):
|
|
323
|
+
logger.debug(
|
|
324
|
+
"Previous summary todo completed but new tasks exist - NOT treating as final summary"
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
# Check if summary content exists in messages
|
|
328
|
+
messages = request.messages
|
|
329
|
+
summary_exists = False
|
|
330
|
+
for msg in messages[-15:]:
|
|
331
|
+
msg_content = getattr(msg, "content", "") or ""
|
|
332
|
+
if '"summary"' in msg_content and '"next_items"' in msg_content:
|
|
333
|
+
summary_exists = True
|
|
334
|
+
break
|
|
335
|
+
if any(
|
|
336
|
+
kw in msg_content
|
|
337
|
+
for kw in [
|
|
338
|
+
"다음 단계 제안",
|
|
339
|
+
"다음 단계:",
|
|
340
|
+
"### 다음 단계",
|
|
341
|
+
"## 다음 단계",
|
|
342
|
+
"**다음 단계**",
|
|
343
|
+
"모든 작업이 완료",
|
|
344
|
+
"**작업 요약**",
|
|
345
|
+
"### 작업 요약",
|
|
346
|
+
"## 작업 요약",
|
|
347
|
+
]
|
|
348
|
+
):
|
|
349
|
+
summary_exists = True
|
|
350
|
+
break
|
|
174
351
|
|
|
175
352
|
for attempt in range(max_retries + 1):
|
|
176
353
|
response = handler(request)
|
|
177
354
|
|
|
355
|
+
# If summary todo is completed AND summary content exists, accept empty response
|
|
356
|
+
# This prevents infinite loop when inject_continuation_middleware returns empty AIMessage
|
|
357
|
+
response_message = _extract_ai_message(response)
|
|
358
|
+
if summary_todo_completed and summary_exists:
|
|
359
|
+
has_content_check = (
|
|
360
|
+
bool(getattr(response_message, "content", None))
|
|
361
|
+
if response_message
|
|
362
|
+
else False
|
|
363
|
+
)
|
|
364
|
+
has_tool_calls_check = (
|
|
365
|
+
bool(getattr(response_message, "tool_calls", None))
|
|
366
|
+
if response_message
|
|
367
|
+
else False
|
|
368
|
+
)
|
|
369
|
+
if not has_content_check and not has_tool_calls_check:
|
|
370
|
+
logger.info(
|
|
371
|
+
"Summary todo completed AND summary exists - accepting empty response (agent should stop)"
|
|
372
|
+
)
|
|
373
|
+
return response
|
|
374
|
+
|
|
178
375
|
# Extract AIMessage from response
|
|
179
376
|
response_message = _extract_ai_message(response)
|
|
180
377
|
|
|
@@ -201,6 +398,23 @@ def create_handle_empty_response_middleware(wrap_model_call):
|
|
|
201
398
|
if has_tool_calls:
|
|
202
399
|
return response
|
|
203
400
|
|
|
401
|
+
# Try to extract and repair tool_calls from additional_kwargs
|
|
402
|
+
# Some LLMs (e.g., gpt-oss-120b) put tool_calls in additional_kwargs
|
|
403
|
+
# but with malformed arguments (missing '{', broken JSON, etc.)
|
|
404
|
+
if response_message and not has_tool_calls:
|
|
405
|
+
repaired_message = try_extract_tool_calls_from_additional_kwargs(
|
|
406
|
+
response_message
|
|
407
|
+
)
|
|
408
|
+
if repaired_message and repaired_message.tool_calls:
|
|
409
|
+
logger.info(
|
|
410
|
+
"Repaired tool_calls from additional_kwargs: %d calls",
|
|
411
|
+
len(repaired_message.tool_calls),
|
|
412
|
+
)
|
|
413
|
+
response = _replace_ai_message_in_response(
|
|
414
|
+
response, repaired_message
|
|
415
|
+
)
|
|
416
|
+
return response
|
|
417
|
+
|
|
204
418
|
# Try to parse JSON from content
|
|
205
419
|
if has_content and response_message:
|
|
206
420
|
parsed = parse_json_tool_call(response_message.content)
|
|
@@ -216,6 +430,117 @@ def create_handle_empty_response_middleware(wrap_model_call):
|
|
|
216
430
|
response = _replace_ai_message_in_response(response, new_message)
|
|
217
431
|
return response
|
|
218
432
|
|
|
433
|
+
# Check if content is summary JSON (for summary todo)
|
|
434
|
+
# Summary JSON has "summary" and "next_items" but no "tool"
|
|
435
|
+
# IMPORTANT: Check for summary JSON pattern FIRST, regardless of current todo
|
|
436
|
+
# This handles cases where LLM outputs summary JSON mixed with other content
|
|
437
|
+
content = response_message.content
|
|
438
|
+
if isinstance(content, list):
|
|
439
|
+
content = " ".join(str(p) for p in content)
|
|
440
|
+
|
|
441
|
+
# Check if content contains summary JSON pattern
|
|
442
|
+
has_summary_pattern = ('"summary"' in content or "'summary'" in content) and (
|
|
443
|
+
'"next_items"' in content or "'next_items'" in content
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
if has_summary_pattern:
|
|
447
|
+
# Try to extract and repair summary JSON from mixed content
|
|
448
|
+
try:
|
|
449
|
+
# Try to find JSON object containing summary
|
|
450
|
+
import re
|
|
451
|
+
json_match = re.search(r'\{[^{}]*"summary"[^{}]*"next_items"[^{}]*\}', content, re.DOTALL)
|
|
452
|
+
if json_match:
|
|
453
|
+
repaired_summary = repair_json(
|
|
454
|
+
json_match.group(), return_objects=True
|
|
455
|
+
)
|
|
456
|
+
else:
|
|
457
|
+
repaired_summary = repair_json(
|
|
458
|
+
content, return_objects=True
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
if (
|
|
462
|
+
isinstance(repaired_summary, dict)
|
|
463
|
+
and "summary" in repaired_summary
|
|
464
|
+
and "next_items" in repaired_summary
|
|
465
|
+
):
|
|
466
|
+
# Create new message with repaired JSON content
|
|
467
|
+
repaired_content = json.dumps(
|
|
468
|
+
repaired_summary, ensure_ascii=False
|
|
469
|
+
)
|
|
470
|
+
logger.info(
|
|
471
|
+
"Detected and repaired summary JSON in content (pattern-based detection)"
|
|
472
|
+
)
|
|
473
|
+
# Create message with repaired content
|
|
474
|
+
repaired_response_message = AIMessage(
|
|
475
|
+
content=repaired_content,
|
|
476
|
+
tool_calls=getattr(
|
|
477
|
+
response_message, "tool_calls", []
|
|
478
|
+
)
|
|
479
|
+
or [],
|
|
480
|
+
)
|
|
481
|
+
synthetic_message = _create_synthetic_completion(
|
|
482
|
+
request,
|
|
483
|
+
repaired_response_message,
|
|
484
|
+
has_content=True,
|
|
485
|
+
)
|
|
486
|
+
response = _replace_ai_message_in_response(
|
|
487
|
+
response, synthetic_message
|
|
488
|
+
)
|
|
489
|
+
return response
|
|
490
|
+
except Exception as e:
|
|
491
|
+
logger.debug(f"Failed to extract summary JSON from mixed content: {e}")
|
|
492
|
+
|
|
493
|
+
# Fallback: accept as-is if repair failed but looks like summary
|
|
494
|
+
logger.info(
|
|
495
|
+
"Detected summary JSON pattern in content - accepting and synthesizing write_todos"
|
|
496
|
+
)
|
|
497
|
+
synthetic_message = _create_synthetic_completion(
|
|
498
|
+
request, response_message, has_content=True
|
|
499
|
+
)
|
|
500
|
+
response = _replace_ai_message_in_response(
|
|
501
|
+
response, synthetic_message
|
|
502
|
+
)
|
|
503
|
+
return response
|
|
504
|
+
|
|
505
|
+
# Legacy: Also check if current todo is a summary todo (backward compatibility)
|
|
506
|
+
todos = request.state.get("todos", [])
|
|
507
|
+
in_progress_todos = [
|
|
508
|
+
t for t in todos if t.get("status") == "in_progress"
|
|
509
|
+
]
|
|
510
|
+
pending_todos = [t for t in todos if t.get("status") == "pending"]
|
|
511
|
+
current_todo = (
|
|
512
|
+
in_progress_todos[0]
|
|
513
|
+
if in_progress_todos
|
|
514
|
+
else pending_todos[0]
|
|
515
|
+
if pending_todos
|
|
516
|
+
else None
|
|
517
|
+
)
|
|
518
|
+
if current_todo:
|
|
519
|
+
summary_keywords = [
|
|
520
|
+
"작업 요약",
|
|
521
|
+
"결과 요약",
|
|
522
|
+
"분석 요약",
|
|
523
|
+
"요약 및",
|
|
524
|
+
"다음단계",
|
|
525
|
+
"다음 단계",
|
|
526
|
+
"next step",
|
|
527
|
+
]
|
|
528
|
+
is_summary_todo = any(
|
|
529
|
+
kw in current_todo.get("content", "") for kw in summary_keywords
|
|
530
|
+
)
|
|
531
|
+
if is_summary_todo and ('"summary"' in content or "'summary'" in content):
|
|
532
|
+
# This is a summary todo with summary content - accept it
|
|
533
|
+
logger.info(
|
|
534
|
+
"Summary todo with summary content detected - accepting"
|
|
535
|
+
)
|
|
536
|
+
synthetic_message = _create_synthetic_completion(
|
|
537
|
+
request, response_message, has_content=True
|
|
538
|
+
)
|
|
539
|
+
response = _replace_ai_message_in_response(
|
|
540
|
+
response, synthetic_message
|
|
541
|
+
)
|
|
542
|
+
return response
|
|
543
|
+
|
|
219
544
|
# Invalid response - retry with JSON schema prompt
|
|
220
545
|
if response_message and attempt < max_retries:
|
|
221
546
|
reason = "text-only" if has_content else "empty"
|
|
@@ -230,7 +555,7 @@ def create_handle_empty_response_middleware(wrap_model_call):
|
|
|
230
555
|
reason,
|
|
231
556
|
)
|
|
232
557
|
# Synthesize write_todos while preserving the content (summary)
|
|
233
|
-
synthetic_message =
|
|
558
|
+
synthetic_message = _create_synthetic_completion(
|
|
234
559
|
request, response_message, has_content
|
|
235
560
|
)
|
|
236
561
|
response = _replace_ai_message_in_response(
|
|
@@ -245,6 +570,8 @@ def create_handle_empty_response_middleware(wrap_model_call):
|
|
|
245
570
|
attempt + 1,
|
|
246
571
|
max_retries + 1,
|
|
247
572
|
)
|
|
573
|
+
if reason == "text-only":
|
|
574
|
+
_log_invalid_ai_message(response_message, reason)
|
|
248
575
|
|
|
249
576
|
request = request.override(
|
|
250
577
|
messages=request.messages + [HumanMessage(content=json_prompt)]
|
|
@@ -267,7 +594,7 @@ def create_handle_empty_response_middleware(wrap_model_call):
|
|
|
267
594
|
logger.warning(
|
|
268
595
|
"Max retries exhausted. Synthesizing write_todos to complete."
|
|
269
596
|
)
|
|
270
|
-
synthetic_message =
|
|
597
|
+
synthetic_message = _create_synthetic_completion(
|
|
271
598
|
request, response_message, has_content
|
|
272
599
|
)
|
|
273
600
|
response = _replace_ai_message_in_response(response, synthetic_message)
|
|
@@ -299,6 +626,23 @@ def _extract_ai_message(response):
|
|
|
299
626
|
return None
|
|
300
627
|
|
|
301
628
|
|
|
629
|
+
def _log_invalid_ai_message(response_message, reason: str) -> None:
|
|
630
|
+
"""Log full AIMessage details for invalid (text-only) responses."""
|
|
631
|
+
if not response_message:
|
|
632
|
+
return
|
|
633
|
+
try:
|
|
634
|
+
payload = _serialize_message(response_message)
|
|
635
|
+
except Exception as exc:
|
|
636
|
+
logger.warning(
|
|
637
|
+
"Invalid AIMessage detail (%s): failed to serialize (%s). Raw=%r",
|
|
638
|
+
reason,
|
|
639
|
+
exc,
|
|
640
|
+
response_message,
|
|
641
|
+
)
|
|
642
|
+
return
|
|
643
|
+
logger.warning("Invalid AIMessage detail (%s): %s", reason, _pretty_json(payload))
|
|
644
|
+
|
|
645
|
+
|
|
302
646
|
def _replace_ai_message_in_response(response, new_message):
|
|
303
647
|
"""Replace AIMessage in response with a new one."""
|
|
304
648
|
if hasattr(response, "result"):
|
|
@@ -318,25 +662,51 @@ def _build_json_prompt(request, response_message, has_content):
|
|
|
318
662
|
pending_todos = [t for t in todos if t.get("status") in ("pending", "in_progress")]
|
|
319
663
|
in_progress_todos = [t for t in todos if t.get("status") == "in_progress"]
|
|
320
664
|
|
|
665
|
+
# Check if CURRENT todo (first in_progress or first pending) is summary/next_steps
|
|
666
|
+
# Not checking ALL pending todos - only the one we should be working on now
|
|
667
|
+
summary_keywords = [
|
|
668
|
+
"작업 요약",
|
|
669
|
+
"결과 요약",
|
|
670
|
+
"분석 요약",
|
|
671
|
+
"요약 및",
|
|
672
|
+
"다음단계",
|
|
673
|
+
"다음 단계",
|
|
674
|
+
"next step",
|
|
675
|
+
]
|
|
676
|
+
current_todo = (
|
|
677
|
+
in_progress_todos[0]
|
|
678
|
+
if in_progress_todos
|
|
679
|
+
else pending_todos[0]
|
|
680
|
+
if pending_todos
|
|
681
|
+
else None
|
|
682
|
+
)
|
|
683
|
+
is_summary_todo = current_todo is not None and any(
|
|
684
|
+
kw in current_todo.get("content", "") for kw in summary_keywords
|
|
685
|
+
)
|
|
686
|
+
|
|
321
687
|
if has_content:
|
|
322
688
|
# If all todos completed, don't force another tool call
|
|
323
689
|
if todos and not pending_todos:
|
|
324
690
|
return None # Signal to skip retry
|
|
325
691
|
|
|
326
|
-
# If current
|
|
692
|
+
# If current todo is "작업 요약 및 다음단계 제시", accept text-only response
|
|
327
693
|
# The LLM is outputting the summary, we'll synthesize write_todos
|
|
328
|
-
if
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
)
|
|
339
|
-
|
|
694
|
+
if is_summary_todo:
|
|
695
|
+
summary_todo = next(
|
|
696
|
+
(
|
|
697
|
+
t
|
|
698
|
+
for t in pending_todos
|
|
699
|
+
if any(kw in t.get("content", "") for kw in summary_keywords)
|
|
700
|
+
),
|
|
701
|
+
{"content": "summary"},
|
|
702
|
+
)
|
|
703
|
+
logger.info(
|
|
704
|
+
"Current todo is summary/next steps ('%s'), accepting text-only response",
|
|
705
|
+
summary_todo.get("content", "")[:30],
|
|
706
|
+
)
|
|
707
|
+
return (
|
|
708
|
+
None # Signal to skip retry - will synthesize write_todos with content
|
|
709
|
+
)
|
|
340
710
|
|
|
341
711
|
return (
|
|
342
712
|
f"{JSON_TOOL_SCHEMA}\n\n"
|
|
@@ -344,6 +714,18 @@ def _build_json_prompt(request, response_message, has_content):
|
|
|
344
714
|
f"Call the next appropriate tool to continue.\n"
|
|
345
715
|
f'Example: {{"tool": "jupyter_cell_tool", "arguments": {{"code": "print(\'hello\')"}}}}'
|
|
346
716
|
)
|
|
717
|
+
elif is_summary_todo:
|
|
718
|
+
# Empty response but current todo is summary - force summary JSON output
|
|
719
|
+
logger.info(
|
|
720
|
+
"Empty response but current todo is summary/next steps - forcing summary JSON prompt"
|
|
721
|
+
)
|
|
722
|
+
return (
|
|
723
|
+
f"{JSON_TOOL_SCHEMA}\n\n"
|
|
724
|
+
f"You MUST output a summary JSON with next_items. This is the final step.\n"
|
|
725
|
+
f"출력 형식 (반드시 이 형식으로 출력):\n"
|
|
726
|
+
f'{{"summary": "완료된 작업 요약 (한국어)", "next_items": [{{"subject": "다음 작업 제목", "description": "설명"}}]}}\n\n'
|
|
727
|
+
f"Do NOT call any tool. Just output the summary JSON directly in your response."
|
|
728
|
+
)
|
|
347
729
|
elif pending_todos:
|
|
348
730
|
todo_list = ", ".join(t.get("content", "")[:20] for t in pending_todos[:3])
|
|
349
731
|
example_json = '{"tool": "jupyter_cell_tool", "arguments": {"code": "import pandas as pd\\ndf = pd.read_csv(\'titanic.csv\')\\nprint(df.head())"}}'
|
|
@@ -374,7 +756,7 @@ def _build_json_prompt(request, response_message, has_content):
|
|
|
374
756
|
)
|
|
375
757
|
|
|
376
758
|
|
|
377
|
-
def
|
|
759
|
+
def _create_synthetic_completion(request, response_message, has_content):
|
|
378
760
|
"""Create synthetic write_todos call to mark all todos as completed.
|
|
379
761
|
|
|
380
762
|
This triggers automatic session termination via router's all_todos_completed check.
|
|
@@ -382,6 +764,15 @@ def _create_synthetic_final_answer(request, response_message, has_content):
|
|
|
382
764
|
"""
|
|
383
765
|
todos = request.state.get("todos", [])
|
|
384
766
|
|
|
767
|
+
# Warn if there are pending todos being force-completed
|
|
768
|
+
pending_count = sum(1 for t in todos if t.get("status") == "pending")
|
|
769
|
+
if pending_count > 0:
|
|
770
|
+
logger.warning(
|
|
771
|
+
"Force-completing %d pending todos that were never started: %s",
|
|
772
|
+
pending_count,
|
|
773
|
+
[t.get("content") for t in todos if t.get("status") == "pending"]
|
|
774
|
+
)
|
|
775
|
+
|
|
385
776
|
# Mark all todos as completed
|
|
386
777
|
completed_todos = (
|
|
387
778
|
[{**todo, "status": "completed"} for todo in todos]
|
|
@@ -588,41 +979,63 @@ def create_normalize_tool_args_middleware(wrap_model_call, tools=None):
|
|
|
588
979
|
)
|
|
589
980
|
args[key] = normalized_value
|
|
590
981
|
|
|
591
|
-
#
|
|
982
|
+
# Validate write_todos: Only ONE item should be in_progress at a time
|
|
592
983
|
if tool_name == "write_todos" and "todos" in args:
|
|
593
984
|
todos = args["todos"]
|
|
594
985
|
if isinstance(todos, list) and len(todos) > 0:
|
|
595
|
-
#
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
for
|
|
600
|
-
if isinstance(
|
|
986
|
+
# Validate: Only ONE item should be in_progress at a time
|
|
987
|
+
# If multiple in_progress, keep only the first one
|
|
988
|
+
in_progress_count = sum(
|
|
989
|
+
1
|
|
990
|
+
for t in todos
|
|
991
|
+
if isinstance(t, dict)
|
|
992
|
+
and t.get("status") == "in_progress"
|
|
601
993
|
)
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
"status"
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
994
|
+
if in_progress_count > 1:
|
|
995
|
+
found_first = False
|
|
996
|
+
for todo in todos:
|
|
997
|
+
if not isinstance(todo, dict):
|
|
998
|
+
continue
|
|
999
|
+
if todo.get("status") == "in_progress":
|
|
1000
|
+
if found_first:
|
|
1001
|
+
# Reset subsequent in_progress to pending
|
|
1002
|
+
todo["status"] = "pending"
|
|
1003
|
+
logger.info(
|
|
1004
|
+
"Reset duplicate in_progress todo to pending: %s",
|
|
1005
|
+
todo.get("content", "")[
|
|
1006
|
+
:30
|
|
1007
|
+
],
|
|
1008
|
+
)
|
|
1009
|
+
else:
|
|
1010
|
+
found_first = True
|
|
1011
|
+
|
|
1012
|
+
# NOTE: Previously had logic to revert summary todo to in_progress
|
|
1013
|
+
# if no summary JSON was found. This caused infinite loops
|
|
1014
|
+
# where LLM kept calling write_todos repeatedly.
|
|
1015
|
+
# Now we let the natural termination logic handle this.
|
|
1016
|
+
#
|
|
1017
|
+
# NOTE: Also removed logic to preserve todos when LLM tries to delete them.
|
|
1018
|
+
# The LLM should be able to modify todos freely when:
|
|
1019
|
+
# - User rejects code approval
|
|
1020
|
+
# - User changes their request
|
|
1021
|
+
# - Code execution fails
|
|
1022
|
+
# We rely on prompts to guide proper todo management.
|
|
614
1023
|
|
|
615
1024
|
return response
|
|
616
1025
|
|
|
617
1026
|
return normalize_tool_args
|
|
618
1027
|
|
|
619
1028
|
|
|
620
|
-
def
|
|
621
|
-
"""Create middleware
|
|
1029
|
+
def create_continuation_control_middleware(wrap_model_call):
|
|
1030
|
+
"""Create unified middleware for continuation control.
|
|
622
1031
|
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
1032
|
+
This middleware combines two functions:
|
|
1033
|
+
1. BEFORE handler: Inject continuation prompt after non-HITL tool execution
|
|
1034
|
+
- Checks for summary completion and stops if done
|
|
1035
|
+
- Injects "[SYSTEM] Tool completed..." messages to guide LLM
|
|
1036
|
+
2. AFTER handler: Prevent auto-continuation after summary JSON output
|
|
1037
|
+
- Strips write_todos from responses containing summary JSON
|
|
1038
|
+
- Prevents agent from auto-creating new todos after task completion
|
|
626
1039
|
|
|
627
1040
|
Args:
|
|
628
1041
|
wrap_model_call: LangChain's wrap_model_call decorator
|
|
@@ -631,68 +1044,198 @@ def create_inject_continuation_middleware(wrap_model_call):
|
|
|
631
1044
|
Middleware function
|
|
632
1045
|
"""
|
|
633
1046
|
|
|
1047
|
+
def _check_summary_exists(messages, last_real_human_idx: int) -> bool:
|
|
1048
|
+
"""Check if summary content exists in messages after last real user message."""
|
|
1049
|
+
messages_to_check = (
|
|
1050
|
+
messages[last_real_human_idx + 1 :]
|
|
1051
|
+
if last_real_human_idx >= 0
|
|
1052
|
+
else messages[-15:]
|
|
1053
|
+
)
|
|
1054
|
+
for msg in messages_to_check:
|
|
1055
|
+
msg_content = getattr(msg, "content", "") or ""
|
|
1056
|
+
# Check for summary JSON
|
|
1057
|
+
if '"summary"' in msg_content and '"next_items"' in msg_content:
|
|
1058
|
+
return True
|
|
1059
|
+
# Check for markdown summary (common patterns)
|
|
1060
|
+
if any(
|
|
1061
|
+
kw in msg_content
|
|
1062
|
+
for kw in [
|
|
1063
|
+
"다음 단계 제안",
|
|
1064
|
+
"다음 단계:",
|
|
1065
|
+
"### 다음 단계",
|
|
1066
|
+
"## 다음 단계",
|
|
1067
|
+
"**다음 단계**",
|
|
1068
|
+
"모든 작업이 완료",
|
|
1069
|
+
"**작업 요약**",
|
|
1070
|
+
"### 작업 요약",
|
|
1071
|
+
"## 작업 요약",
|
|
1072
|
+
]
|
|
1073
|
+
):
|
|
1074
|
+
return True
|
|
1075
|
+
return False
|
|
1076
|
+
|
|
1077
|
+
def _find_last_real_human_idx(messages) -> int:
|
|
1078
|
+
"""Find index of last real HumanMessage (not system-injected)."""
|
|
1079
|
+
last_real_human_idx = -1
|
|
1080
|
+
for i, msg in enumerate(messages):
|
|
1081
|
+
msg_type = getattr(msg, "type", "") or type(msg).__name__
|
|
1082
|
+
if msg_type in ("human", "HumanMessage"):
|
|
1083
|
+
msg_content = getattr(msg, "content", "") or ""
|
|
1084
|
+
if not msg_content.startswith("[SYSTEM]"):
|
|
1085
|
+
last_real_human_idx = i
|
|
1086
|
+
return last_real_human_idx
|
|
1087
|
+
|
|
634
1088
|
@wrap_model_call
|
|
635
|
-
@_with_middleware_logging("
|
|
636
|
-
def
|
|
1089
|
+
@_with_middleware_logging("continuation_control")
|
|
1090
|
+
def continuation_control(request, handler):
|
|
637
1091
|
messages = request.messages
|
|
638
|
-
if not messages:
|
|
639
|
-
return handler(request)
|
|
640
|
-
|
|
641
|
-
last_msg = messages[-1]
|
|
642
|
-
if getattr(last_msg, "type", "") == "tool":
|
|
643
|
-
tool_name = getattr(last_msg, "name", "") or ""
|
|
644
|
-
|
|
645
|
-
# Try to extract tool name from content
|
|
646
|
-
if not tool_name:
|
|
647
|
-
try:
|
|
648
|
-
content_json = json.loads(last_msg.content)
|
|
649
|
-
tool_name = content_json.get("tool", "")
|
|
650
|
-
except (json.JSONDecodeError, TypeError, AttributeError):
|
|
651
|
-
pass
|
|
652
|
-
|
|
653
|
-
if tool_name in NON_HITL_TOOLS:
|
|
654
|
-
todos = request.state.get("todos", [])
|
|
655
|
-
pending_todos = [
|
|
656
|
-
t for t in todos if t.get("status") in ("pending", "in_progress")
|
|
657
|
-
]
|
|
658
1092
|
|
|
659
|
-
|
|
660
|
-
|
|
1093
|
+
# ===== BEFORE HANDLER: Inject continuation prompt =====
|
|
1094
|
+
if messages:
|
|
1095
|
+
last_msg = messages[-1]
|
|
1096
|
+
if getattr(last_msg, "type", "") == "tool":
|
|
1097
|
+
tool_name = getattr(last_msg, "name", "") or ""
|
|
1098
|
+
|
|
1099
|
+
# Try to extract tool name from content
|
|
1100
|
+
if not tool_name:
|
|
1101
|
+
try:
|
|
1102
|
+
content_json = json.loads(last_msg.content)
|
|
1103
|
+
tool_name = content_json.get("tool", "")
|
|
1104
|
+
except (json.JSONDecodeError, TypeError, AttributeError):
|
|
1105
|
+
pass
|
|
1106
|
+
|
|
1107
|
+
if tool_name in NON_HITL_TOOLS:
|
|
1108
|
+
todos = request.state.get("todos", [])
|
|
1109
|
+
|
|
1110
|
+
last_real_human_idx = _find_last_real_human_idx(messages)
|
|
1111
|
+
summary_exists = _check_summary_exists(
|
|
1112
|
+
messages, last_real_human_idx
|
|
1113
|
+
)
|
|
1114
|
+
|
|
1115
|
+
# STOP if summary exists (regardless of todo status)
|
|
1116
|
+
if summary_exists:
|
|
1117
|
+
logger.info(
|
|
1118
|
+
"Summary exists after tool: %s - stopping agent (user must request next steps)",
|
|
1119
|
+
tool_name,
|
|
1120
|
+
)
|
|
1121
|
+
return AIMessage(content="", tool_calls=[])
|
|
1122
|
+
|
|
1123
|
+
pending_todos = [
|
|
1124
|
+
t
|
|
1125
|
+
for t in todos
|
|
1126
|
+
if t.get("status") in ("pending", "in_progress")
|
|
1127
|
+
]
|
|
1128
|
+
|
|
1129
|
+
# If all todos completed but no summary yet, allow LLM call for summary
|
|
1130
|
+
if not pending_todos and todos:
|
|
1131
|
+
logger.info(
|
|
1132
|
+
"All %d todos completed, no summary yet after tool: %s - allowing LLM for summary",
|
|
1133
|
+
len(todos),
|
|
1134
|
+
tool_name,
|
|
1135
|
+
)
|
|
1136
|
+
|
|
661
1137
|
logger.info(
|
|
662
|
-
"
|
|
1138
|
+
"Injecting continuation prompt after non-HITL tool: %s",
|
|
663
1139
|
tool_name,
|
|
664
1140
|
)
|
|
665
|
-
return handler(request)
|
|
666
1141
|
|
|
1142
|
+
# Skip continuation injection for write_todos
|
|
1143
|
+
# This prevents auto-continuation to next task after completing one
|
|
1144
|
+
# Agent will decide next action based on its own reasoning
|
|
1145
|
+
if tool_name == "write_todos":
|
|
1146
|
+
logger.info(
|
|
1147
|
+
"Skipping continuation prompt after write_todos - "
|
|
1148
|
+
"agent decides next action (pending: %d)",
|
|
1149
|
+
len(pending_todos) if pending_todos else 0,
|
|
1150
|
+
)
|
|
1151
|
+
# Don't inject continuation - let agent naturally continue or stop
|
|
1152
|
+
elif pending_todos:
|
|
1153
|
+
pending_list = ", ".join(
|
|
1154
|
+
t.get("content", "")[:30] for t in pending_todos[:3]
|
|
1155
|
+
)
|
|
1156
|
+
continuation = (
|
|
1157
|
+
f"Tool '{tool_name}' completed. "
|
|
1158
|
+
f"Continue with pending tasks: {pending_list}. "
|
|
1159
|
+
f"Call jupyter_cell_tool or the next appropriate tool."
|
|
1160
|
+
)
|
|
1161
|
+
new_messages = list(messages) + [
|
|
1162
|
+
HumanMessage(content=f"[SYSTEM] {continuation}")
|
|
1163
|
+
]
|
|
1164
|
+
request = request.override(messages=new_messages)
|
|
1165
|
+
else:
|
|
1166
|
+
continuation = (
|
|
1167
|
+
f"Tool '{tool_name}' completed. "
|
|
1168
|
+
f"Create a todo list with write_todos if needed."
|
|
1169
|
+
)
|
|
1170
|
+
new_messages = list(messages) + [
|
|
1171
|
+
HumanMessage(content=f"[SYSTEM] {continuation}")
|
|
1172
|
+
]
|
|
1173
|
+
request = request.override(messages=new_messages)
|
|
1174
|
+
|
|
1175
|
+
# ===== CALL HANDLER =====
|
|
1176
|
+
response = handler(request)
|
|
1177
|
+
|
|
1178
|
+
# ===== AFTER HANDLER: Strip write_todos if summary JSON present =====
|
|
1179
|
+
response_message = _extract_ai_message(response)
|
|
1180
|
+
if not response_message:
|
|
1181
|
+
return response
|
|
1182
|
+
|
|
1183
|
+
# Get content - handle both string and list formats
|
|
1184
|
+
content = getattr(response_message, "content", "") or ""
|
|
1185
|
+
if isinstance(content, list):
|
|
1186
|
+
content = " ".join(
|
|
1187
|
+
str(p) if isinstance(p, str) else p.get("text", "")
|
|
1188
|
+
for p in content
|
|
1189
|
+
if isinstance(p, (str, dict))
|
|
1190
|
+
)
|
|
1191
|
+
|
|
1192
|
+
# Check if content contains summary JSON pattern
|
|
1193
|
+
has_summary_json = '"summary"' in content and '"next_items"' in content
|
|
1194
|
+
|
|
1195
|
+
if has_summary_json:
|
|
1196
|
+
tool_calls = getattr(response_message, "tool_calls", []) or []
|
|
1197
|
+
write_todos_calls = [
|
|
1198
|
+
tc for tc in tool_calls if tc.get("name") == "write_todos"
|
|
1199
|
+
]
|
|
1200
|
+
|
|
1201
|
+
if write_todos_calls:
|
|
667
1202
|
logger.info(
|
|
668
|
-
"
|
|
669
|
-
|
|
1203
|
+
"Summary JSON 감지 - write_todos 호출 제거 (자동 계속 방지). "
|
|
1204
|
+
"제거된 write_todos 호출 수: %d",
|
|
1205
|
+
len(write_todos_calls),
|
|
670
1206
|
)
|
|
671
1207
|
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
t.get("content", "")[:30] for t in pending_todos[:3]
|
|
675
|
-
)
|
|
676
|
-
continuation = (
|
|
677
|
-
f"Tool '{tool_name}' completed. "
|
|
678
|
-
f"Continue with pending tasks: {pending_list}. "
|
|
679
|
-
f"Call jupyter_cell_tool or the next appropriate tool."
|
|
680
|
-
)
|
|
681
|
-
else:
|
|
682
|
-
# No todos yet - let agent create them
|
|
683
|
-
continuation = (
|
|
684
|
-
f"Tool '{tool_name}' completed. "
|
|
685
|
-
f"Create a todo list with write_todos if needed."
|
|
686
|
-
)
|
|
687
|
-
|
|
688
|
-
new_messages = list(messages) + [
|
|
689
|
-
HumanMessage(content=f"[SYSTEM] {continuation}")
|
|
1208
|
+
filtered_tool_calls = [
|
|
1209
|
+
tc for tc in tool_calls if tc.get("name") != "write_todos"
|
|
690
1210
|
]
|
|
691
|
-
request = request.override(messages=new_messages)
|
|
692
1211
|
|
|
693
|
-
|
|
1212
|
+
new_message = AIMessage(
|
|
1213
|
+
content=response_message.content,
|
|
1214
|
+
tool_calls=filtered_tool_calls,
|
|
1215
|
+
additional_kwargs=getattr(
|
|
1216
|
+
response_message, "additional_kwargs", {}
|
|
1217
|
+
),
|
|
1218
|
+
response_metadata=getattr(
|
|
1219
|
+
response_message, "response_metadata", {}
|
|
1220
|
+
),
|
|
1221
|
+
)
|
|
1222
|
+
|
|
1223
|
+
response = _replace_ai_message_in_response(response, new_message)
|
|
1224
|
+
|
|
1225
|
+
return response
|
|
1226
|
+
|
|
1227
|
+
return continuation_control
|
|
1228
|
+
|
|
1229
|
+
|
|
1230
|
+
# Backward compatibility aliases
|
|
1231
|
+
def create_inject_continuation_middleware(wrap_model_call):
|
|
1232
|
+
"""Deprecated: Use create_continuation_control_middleware instead."""
|
|
1233
|
+
return create_continuation_control_middleware(wrap_model_call)
|
|
1234
|
+
|
|
694
1235
|
|
|
695
|
-
|
|
1236
|
+
def create_prevent_auto_continuation_middleware(wrap_model_call):
|
|
1237
|
+
"""Deprecated: Use create_continuation_control_middleware instead."""
|
|
1238
|
+
return create_continuation_control_middleware(wrap_model_call)
|
|
696
1239
|
|
|
697
1240
|
|
|
698
1241
|
def create_patch_tool_calls_middleware(AgentMiddleware, ToolMessage, Overwrite):
|