hdsp-jupyter-extension 2.0.11__py3-none-any.whl → 2.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. agent_server/langchain/MULTI_AGENT_ARCHITECTURE.md +1114 -0
  2. agent_server/langchain/__init__.py +2 -2
  3. agent_server/langchain/agent.py +72 -33
  4. agent_server/langchain/agent_factory.py +400 -0
  5. agent_server/langchain/agent_prompts/__init__.py +25 -0
  6. agent_server/langchain/agent_prompts/athena_query_prompt.py +71 -0
  7. agent_server/langchain/agent_prompts/planner_prompt.py +85 -0
  8. agent_server/langchain/agent_prompts/python_developer_prompt.py +123 -0
  9. agent_server/langchain/agent_prompts/researcher_prompt.py +38 -0
  10. agent_server/langchain/custom_middleware.py +652 -195
  11. agent_server/langchain/hitl_config.py +34 -10
  12. agent_server/langchain/middleware/__init__.py +24 -0
  13. agent_server/langchain/middleware/code_history_middleware.py +412 -0
  14. agent_server/langchain/middleware/description_injector.py +150 -0
  15. agent_server/langchain/middleware/skill_middleware.py +298 -0
  16. agent_server/langchain/middleware/subagent_events.py +171 -0
  17. agent_server/langchain/middleware/subagent_middleware.py +329 -0
  18. agent_server/langchain/prompts.py +96 -101
  19. agent_server/langchain/skills/data_analysis.md +236 -0
  20. agent_server/langchain/skills/data_loading.md +158 -0
  21. agent_server/langchain/skills/inference.md +392 -0
  22. agent_server/langchain/skills/model_training.md +318 -0
  23. agent_server/langchain/skills/pyspark.md +352 -0
  24. agent_server/langchain/subagents/__init__.py +20 -0
  25. agent_server/langchain/subagents/base.py +173 -0
  26. agent_server/langchain/tools/__init__.py +3 -0
  27. agent_server/langchain/tools/jupyter_tools.py +58 -20
  28. agent_server/langchain/tools/lsp_tools.py +1 -1
  29. agent_server/langchain/tools/shared/__init__.py +26 -0
  30. agent_server/langchain/tools/shared/qdrant_search.py +175 -0
  31. agent_server/langchain/tools/tool_registry.py +219 -0
  32. agent_server/langchain/tools/workspace_tools.py +197 -0
  33. agent_server/routers/config.py +40 -1
  34. agent_server/routers/langchain_agent.py +818 -337
  35. {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
  36. {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +7 -2
  37. hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js → hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.037b3c8e5d6a92b63b16.js +1108 -179
  38. hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.037b3c8e5d6a92b63b16.js.map +1 -0
  39. jupyter_ext/labextension/static/lib_index_js.58c1e128ba0b76f41f04.js → hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.5449ba3c7e25177d2987.js +3916 -8128
  40. hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.5449ba3c7e25177d2987.js.map +1 -0
  41. hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.9da31d1134a53b0c4af5.js → hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.a8e0b064eb9b1c1ff463.js +17 -17
  42. hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.a8e0b064eb9b1c1ff463.js.map +1 -0
  43. {hdsp_jupyter_extension-2.0.11.dist-info → hdsp_jupyter_extension-2.0.13.dist-info}/METADATA +1 -1
  44. {hdsp_jupyter_extension-2.0.11.dist-info → hdsp_jupyter_extension-2.0.13.dist-info}/RECORD +75 -51
  45. jupyter_ext/_version.py +1 -1
  46. jupyter_ext/handlers.py +59 -8
  47. jupyter_ext/labextension/build_log.json +1 -1
  48. jupyter_ext/labextension/package.json +7 -2
  49. jupyter_ext/labextension/static/{frontend_styles_index_js.2d9fb488c82498c45c2d.js → frontend_styles_index_js.037b3c8e5d6a92b63b16.js} +1108 -179
  50. jupyter_ext/labextension/static/frontend_styles_index_js.037b3c8e5d6a92b63b16.js.map +1 -0
  51. hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.58c1e128ba0b76f41f04.js → jupyter_ext/labextension/static/lib_index_js.5449ba3c7e25177d2987.js +3916 -8128
  52. jupyter_ext/labextension/static/lib_index_js.5449ba3c7e25177d2987.js.map +1 -0
  53. jupyter_ext/labextension/static/{remoteEntry.9da31d1134a53b0c4af5.js → remoteEntry.a8e0b064eb9b1c1ff463.js} +17 -17
  54. jupyter_ext/labextension/static/remoteEntry.a8e0b064eb9b1c1ff463.js.map +1 -0
  55. hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js.map +0 -1
  56. hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.58c1e128ba0b76f41f04.js.map +0 -1
  57. hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.9da31d1134a53b0c4af5.js.map +0 -1
  58. jupyter_ext/labextension/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js.map +0 -1
  59. jupyter_ext/labextension/static/lib_index_js.58c1e128ba0b76f41f04.js.map +0 -1
  60. jupyter_ext/labextension/static/remoteEntry.9da31d1134a53b0c4af5.js.map +0 -1
  61. {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
  62. {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
  63. {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
  64. {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
  65. {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
  66. {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
  67. {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
  68. {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
  69. {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
  70. {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +0 -0
  71. {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +0 -0
  72. {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +0 -0
  73. {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +0 -0
  74. {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
  75. {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
  76. {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
  77. {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
  78. {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +0 -0
  79. {hdsp_jupyter_extension-2.0.11.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +0 -0
  80. {hdsp_jupyter_extension-2.0.11.dist-info → hdsp_jupyter_extension-2.0.13.dist-info}/WHEEL +0 -0
  81. {hdsp_jupyter_extension-2.0.11.dist-info → hdsp_jupyter_extension-2.0.13.dist-info}/licenses/LICENSE +0 -0
@@ -11,10 +11,13 @@ import re
11
11
  import uuid
12
12
  from typing import Any, Dict, Optional
13
13
 
14
+ from json_repair import repair_json
14
15
  from langchain_core.messages import AIMessage, HumanMessage
15
16
 
16
17
  from agent_server.langchain.logging_utils import (
17
18
  _format_middleware_marker,
19
+ _pretty_json,
20
+ _serialize_message,
18
21
  _with_middleware_logging,
19
22
  )
20
23
  from agent_server.langchain.prompts import JSON_TOOL_SCHEMA, NON_HITL_TOOLS
@@ -75,6 +78,15 @@ def parse_json_tool_call(text) -> Optional[Dict[str, Any]]:
75
78
  except json.JSONDecodeError:
76
79
  pass
77
80
 
81
+ # Try json-repair for malformed JSON from LLMs
82
+ try:
83
+ repaired = repair_json(text, return_objects=True)
84
+ if isinstance(repaired, dict) and "tool" in repaired:
85
+ logger.info(f"Repaired malformed JSON tool call: {repaired.get('tool')}")
86
+ return repaired
87
+ except Exception as e:
88
+ logger.debug(f"json-repair failed: {e}")
89
+
78
90
  return None
79
91
 
80
92
 
@@ -83,15 +95,110 @@ def normalize_tool_name(tool_name: str) -> str:
83
95
 
84
96
  Rules:
85
97
  - write_todos_tool → write_todos (TodoListMiddleware exception)
98
+ - task → task_tool (SubAgentMiddleware uses task_tool)
86
99
  - other tools without _tool suffix → add _tool suffix
87
100
  """
88
101
  if tool_name == "write_todos_tool":
89
102
  return "write_todos"
90
- if not tool_name.endswith("_tool") and tool_name != "write_todos":
103
+ if tool_name == "task":
104
+ return "task_tool"
105
+ if not tool_name.endswith("_tool") and tool_name not in ("write_todos",):
91
106
  return f"{tool_name}_tool"
92
107
  return tool_name
93
108
 
94
109
 
110
+ def repair_tool_call_arguments(arguments: str) -> Optional[Dict[str, Any]]:
111
+ """Repair malformed tool call arguments from LLMs.
112
+
113
+ Some LLMs (e.g., gpt-oss-120b) return arguments without leading '{' or
114
+ with other JSON formatting issues.
115
+
116
+ Args:
117
+ arguments: Raw arguments string from LLM
118
+
119
+ Returns:
120
+ Parsed dictionary or None if repair failed
121
+ """
122
+ if not arguments or not isinstance(arguments, str):
123
+ return None
124
+
125
+ arguments = arguments.strip()
126
+
127
+ # Try direct parse first
128
+ try:
129
+ return json.loads(arguments)
130
+ except json.JSONDecodeError:
131
+ pass
132
+
133
+ # Use json-repair for malformed arguments
134
+ try:
135
+ repaired = repair_json(arguments, return_objects=True)
136
+ if isinstance(repaired, dict):
137
+ logger.info("Repaired malformed tool arguments")
138
+ return repaired
139
+ except Exception as e:
140
+ logger.debug(f"json-repair failed for arguments: {e}")
141
+
142
+ return None
143
+
144
+
145
+ def try_extract_tool_calls_from_additional_kwargs(
146
+ response_message,
147
+ ) -> Optional[AIMessage]:
148
+ """Try to extract and repair tool_calls from additional_kwargs.
149
+
150
+ Some LLMs put tool_calls in additional_kwargs but with malformed arguments.
151
+ This function tries to repair them and create a proper AIMessage.
152
+
153
+ Args:
154
+ response_message: AIMessage with potential tool_calls in additional_kwargs
155
+
156
+ Returns:
157
+ New AIMessage with repaired tool_calls, or None if extraction failed
158
+ """
159
+ if not response_message:
160
+ return None
161
+
162
+ additional_kwargs = getattr(response_message, "additional_kwargs", {})
163
+ raw_tool_calls = additional_kwargs.get("tool_calls", [])
164
+
165
+ if not raw_tool_calls:
166
+ return None
167
+
168
+ repaired_tool_calls = []
169
+ for tc in raw_tool_calls:
170
+ func = tc.get("function", {})
171
+ name = func.get("name")
172
+ arguments = func.get("arguments", "")
173
+ tc_id = tc.get("id", str(uuid.uuid4()))
174
+
175
+ if not name:
176
+ continue
177
+
178
+ # Try to repair arguments
179
+ args = repair_tool_call_arguments(arguments)
180
+ if args is not None:
181
+ repaired_tool_calls.append(
182
+ {
183
+ "name": normalize_tool_name(name),
184
+ "args": args,
185
+ "id": tc_id,
186
+ "type": "tool_call",
187
+ }
188
+ )
189
+
190
+ if repaired_tool_calls:
191
+ logger.info(
192
+ f"Extracted {len(repaired_tool_calls)} tool calls from additional_kwargs"
193
+ )
194
+ return AIMessage(
195
+ content=getattr(response_message, "content", "") or "",
196
+ tool_calls=repaired_tool_calls,
197
+ )
198
+
199
+ return None
200
+
201
+
95
202
  def create_tool_call_message(tool_name: str, arguments: Dict[str, Any]) -> AIMessage:
96
203
  """Create AIMessage with tool_calls from parsed JSON.
97
204
 
@@ -125,7 +232,7 @@ def create_handle_empty_response_middleware(wrap_model_call):
125
232
  1. Detects empty or text-only responses (no tool_calls)
126
233
  2. Retries with JSON schema prompt to force structured output
127
234
  3. Parses JSON response and injects tool_calls into AIMessage
128
- 4. Falls back to synthetic final_answer if all else fails
235
+ 4. Falls back to synthetic write_todos completion if all else fails
129
236
 
130
237
  Args:
131
238
  wrap_model_call: LangChain's wrap_model_call decorator
@@ -142,65 +249,129 @@ def create_handle_empty_response_middleware(wrap_model_call):
142
249
  # Check if all todos are completed - if so, return empty response to stop agent
143
250
  # Method 1: Check state.todos
144
251
  todos = request.state.get("todos", [])
252
+ logger.debug(
253
+ "handle_empty_response: state.todos=%s",
254
+ json.dumps(todos, ensure_ascii=False) if todos else "[]",
255
+ )
145
256
  if todos:
146
257
  pending_todos = [
147
258
  t for t in todos if t.get("status") in ("pending", "in_progress")
148
259
  ]
149
260
  if not pending_todos:
150
- logger.info(
151
- "All %d todos completed (from state) - stopping agent (no LLM call)",
152
- len(todos),
261
+ # Check if summary already exists AFTER the last REAL HumanMessage
262
+ # (to avoid false positives from previous task summaries)
263
+ # Note: Skip system-injected messages like "[SYSTEM] Tool completed..."
264
+ summary_exists = False
265
+ messages = request.messages
266
+
267
+ # Find index of last REAL HumanMessage (not system-injected)
268
+ last_human_idx = -1
269
+ for i, msg in enumerate(messages):
270
+ msg_type = getattr(msg, "type", "") or type(msg).__name__
271
+ if msg_type in ("human", "HumanMessage"):
272
+ msg_content = getattr(msg, "content", "") or ""
273
+ # Skip system-injected messages
274
+ if not msg_content.startswith("[SYSTEM]"):
275
+ last_human_idx = i
276
+
277
+ # Only check messages after last REAL HumanMessage for summary
278
+ messages_to_check = (
279
+ messages[last_human_idx + 1 :]
280
+ if last_human_idx >= 0
281
+ else messages[-10:]
153
282
  )
154
- return AIMessage(content="", tool_calls=[])
283
+ for msg in messages_to_check:
284
+ content = getattr(msg, "content", "") or ""
285
+ if '"summary"' in content and '"next_items"' in content:
286
+ summary_exists = True
287
+ break
288
+
289
+ if summary_exists:
290
+ logger.info(
291
+ "All %d todos completed and summary exists after last user message - stopping agent (no LLM call)",
292
+ len(todos),
293
+ )
294
+ return AIMessage(content="", tool_calls=[])
295
+ else:
296
+ # Allow one more LLM call for summary generation
297
+ logger.info(
298
+ "All %d todos completed but no summary yet after last user message - allowing LLM call for summary",
299
+ len(todos),
300
+ )
155
301
 
156
302
  # Method 2: Check last message if it's a write_todos ToolMessage with all completed
157
- messages = request.messages
158
- if messages:
159
- last_msg = messages[-1]
160
- if getattr(last_msg, "type", "") == "tool":
161
- tool_name = getattr(last_msg, "name", "") or ""
162
- content = getattr(last_msg, "content", "") or ""
163
- if tool_name == "write_todos" or "Updated todo list to" in content:
164
- # Extract todos from ToolMessage content
165
- try:
166
- import ast
167
- if "Updated todo list to" in content:
168
- list_text = content.split("Updated todo list to", 1)[1].strip()
169
- todos_from_msg = ast.literal_eval(list_text)
170
- if isinstance(todos_from_msg, list) and len(todos_from_msg) > 0:
171
- pending = [t for t in todos_from_msg if t.get("status") in ("pending", "in_progress")]
172
- if not pending:
173
- logger.info(
174
- "All %d todos completed (from ToolMessage) - stopping agent (no LLM call)",
175
- len(todos_from_msg),
176
- )
177
- return AIMessage(content="", tool_calls=[])
178
- except Exception as e:
179
- logger.debug("Failed to parse todos from ToolMessage: %s", e)
303
+ # Note: We now allow one more LLM call for summary generation when all todos are completed
304
+ # This check is skipped to let the agent produce a summary
305
+
306
+ # Check if summary todo is completed
307
+ # IMPORTANT: Only consider summary completed if it's the LAST todo item and ALL todos are done
308
+ # This prevents false positives when a previous summary is completed but new tasks are added
309
+ all_todos_completed = all(t.get("status") == "completed" for t in todos)
310
+ last_todo_is_summary = (
311
+ len(todos) > 0
312
+ and "작업 요약" in todos[-1].get("content", "")
313
+ and "다음 단계" in todos[-1].get("content", "")
314
+ and todos[-1].get("status") == "completed"
315
+ )
316
+ summary_todo_completed = all_todos_completed and last_todo_is_summary
317
+
318
+ if not summary_todo_completed and any(
319
+ t.get("status") == "completed"
320
+ and "작업 요약" in t.get("content", "")
321
+ for t in todos
322
+ ):
323
+ logger.debug(
324
+ "Previous summary todo completed but new tasks exist - NOT treating as final summary"
325
+ )
180
326
 
181
- # Check if last message is final_answer_tool result - if so, don't retry/synthesize
182
- # This allows agent to naturally terminate after final_answer_tool
327
+ # Check if summary content exists in messages
183
328
  messages = request.messages
184
- if messages:
185
- last_msg = messages[-1]
186
- if getattr(last_msg, "type", "") == "tool":
187
- tool_name = getattr(last_msg, "name", "") or ""
188
- if not tool_name:
189
- try:
190
- content_json = json.loads(last_msg.content)
191
- tool_name = content_json.get("tool", "")
192
- except (json.JSONDecodeError, TypeError, AttributeError):
193
- pass
194
- if tool_name in ("final_answer_tool", "final_answer"):
195
- logger.info(
196
- "Last message is final_answer_tool result - allowing natural termination"
197
- )
198
- # Just call handler and return response as-is (no retry/synthesize)
199
- return handler(request)
329
+ summary_exists = False
330
+ for msg in messages[-15:]:
331
+ msg_content = getattr(msg, "content", "") or ""
332
+ if '"summary"' in msg_content and '"next_items"' in msg_content:
333
+ summary_exists = True
334
+ break
335
+ if any(
336
+ kw in msg_content
337
+ for kw in [
338
+ "다음 단계 제안",
339
+ "다음 단계:",
340
+ "### 다음 단계",
341
+ "## 다음 단계",
342
+ "**다음 단계**",
343
+ "모든 작업이 완료",
344
+ "**작업 요약**",
345
+ "### 작업 요약",
346
+ "## 작업 요약",
347
+ ]
348
+ ):
349
+ summary_exists = True
350
+ break
200
351
 
201
352
  for attempt in range(max_retries + 1):
202
353
  response = handler(request)
203
354
 
355
+ # If summary todo is completed AND summary content exists, accept empty response
356
+ # This prevents infinite loop when inject_continuation_middleware returns empty AIMessage
357
+ response_message = _extract_ai_message(response)
358
+ if summary_todo_completed and summary_exists:
359
+ has_content_check = (
360
+ bool(getattr(response_message, "content", None))
361
+ if response_message
362
+ else False
363
+ )
364
+ has_tool_calls_check = (
365
+ bool(getattr(response_message, "tool_calls", None))
366
+ if response_message
367
+ else False
368
+ )
369
+ if not has_content_check and not has_tool_calls_check:
370
+ logger.info(
371
+ "Summary todo completed AND summary exists - accepting empty response (agent should stop)"
372
+ )
373
+ return response
374
+
204
375
  # Extract AIMessage from response
205
376
  response_message = _extract_ai_message(response)
206
377
 
@@ -227,6 +398,23 @@ def create_handle_empty_response_middleware(wrap_model_call):
227
398
  if has_tool_calls:
228
399
  return response
229
400
 
401
+ # Try to extract and repair tool_calls from additional_kwargs
402
+ # Some LLMs (e.g., gpt-oss-120b) put tool_calls in additional_kwargs
403
+ # but with malformed arguments (missing '{', broken JSON, etc.)
404
+ if response_message and not has_tool_calls:
405
+ repaired_message = try_extract_tool_calls_from_additional_kwargs(
406
+ response_message
407
+ )
408
+ if repaired_message and repaired_message.tool_calls:
409
+ logger.info(
410
+ "Repaired tool_calls from additional_kwargs: %d calls",
411
+ len(repaired_message.tool_calls),
412
+ )
413
+ response = _replace_ai_message_in_response(
414
+ response, repaired_message
415
+ )
416
+ return response
417
+
230
418
  # Try to parse JSON from content
231
419
  if has_content and response_message:
232
420
  parsed = parse_json_tool_call(response_message.content)
@@ -242,6 +430,117 @@ def create_handle_empty_response_middleware(wrap_model_call):
242
430
  response = _replace_ai_message_in_response(response, new_message)
243
431
  return response
244
432
 
433
+ # Check if content is summary JSON (for summary todo)
434
+ # Summary JSON has "summary" and "next_items" but no "tool"
435
+ # IMPORTANT: Check for summary JSON pattern FIRST, regardless of current todo
436
+ # This handles cases where LLM outputs summary JSON mixed with other content
437
+ content = response_message.content
438
+ if isinstance(content, list):
439
+ content = " ".join(str(p) for p in content)
440
+
441
+ # Check if content contains summary JSON pattern
442
+ has_summary_pattern = ('"summary"' in content or "'summary'" in content) and (
443
+ '"next_items"' in content or "'next_items'" in content
444
+ )
445
+
446
+ if has_summary_pattern:
447
+ # Try to extract and repair summary JSON from mixed content
448
+ try:
449
+ # Try to find JSON object containing summary
450
+ import re
451
+ json_match = re.search(r'\{[^{}]*"summary"[^{}]*"next_items"[^{}]*\}', content, re.DOTALL)
452
+ if json_match:
453
+ repaired_summary = repair_json(
454
+ json_match.group(), return_objects=True
455
+ )
456
+ else:
457
+ repaired_summary = repair_json(
458
+ content, return_objects=True
459
+ )
460
+
461
+ if (
462
+ isinstance(repaired_summary, dict)
463
+ and "summary" in repaired_summary
464
+ and "next_items" in repaired_summary
465
+ ):
466
+ # Create new message with repaired JSON content
467
+ repaired_content = json.dumps(
468
+ repaired_summary, ensure_ascii=False
469
+ )
470
+ logger.info(
471
+ "Detected and repaired summary JSON in content (pattern-based detection)"
472
+ )
473
+ # Create message with repaired content
474
+ repaired_response_message = AIMessage(
475
+ content=repaired_content,
476
+ tool_calls=getattr(
477
+ response_message, "tool_calls", []
478
+ )
479
+ or [],
480
+ )
481
+ synthetic_message = _create_synthetic_completion(
482
+ request,
483
+ repaired_response_message,
484
+ has_content=True,
485
+ )
486
+ response = _replace_ai_message_in_response(
487
+ response, synthetic_message
488
+ )
489
+ return response
490
+ except Exception as e:
491
+ logger.debug(f"Failed to extract summary JSON from mixed content: {e}")
492
+
493
+ # Fallback: accept as-is if repair failed but looks like summary
494
+ logger.info(
495
+ "Detected summary JSON pattern in content - accepting and synthesizing write_todos"
496
+ )
497
+ synthetic_message = _create_synthetic_completion(
498
+ request, response_message, has_content=True
499
+ )
500
+ response = _replace_ai_message_in_response(
501
+ response, synthetic_message
502
+ )
503
+ return response
504
+
505
+ # Legacy: Also check if current todo is a summary todo (backward compatibility)
506
+ todos = request.state.get("todos", [])
507
+ in_progress_todos = [
508
+ t for t in todos if t.get("status") == "in_progress"
509
+ ]
510
+ pending_todos = [t for t in todos if t.get("status") == "pending"]
511
+ current_todo = (
512
+ in_progress_todos[0]
513
+ if in_progress_todos
514
+ else pending_todos[0]
515
+ if pending_todos
516
+ else None
517
+ )
518
+ if current_todo:
519
+ summary_keywords = [
520
+ "작업 요약",
521
+ "결과 요약",
522
+ "분석 요약",
523
+ "요약 및",
524
+ "다음단계",
525
+ "다음 단계",
526
+ "next step",
527
+ ]
528
+ is_summary_todo = any(
529
+ kw in current_todo.get("content", "") for kw in summary_keywords
530
+ )
531
+ if is_summary_todo and ('"summary"' in content or "'summary'" in content):
532
+ # This is a summary todo with summary content - accept it
533
+ logger.info(
534
+ "Summary todo with summary content detected - accepting"
535
+ )
536
+ synthetic_message = _create_synthetic_completion(
537
+ request, response_message, has_content=True
538
+ )
539
+ response = _replace_ai_message_in_response(
540
+ response, synthetic_message
541
+ )
542
+ return response
543
+
245
544
  # Invalid response - retry with JSON schema prompt
246
545
  if response_message and attempt < max_retries:
247
546
  reason = "text-only" if has_content else "empty"
@@ -256,7 +555,7 @@ def create_handle_empty_response_middleware(wrap_model_call):
256
555
  reason,
257
556
  )
258
557
  # Synthesize write_todos while preserving the content (summary)
259
- synthetic_message = _create_synthetic_final_answer(
558
+ synthetic_message = _create_synthetic_completion(
260
559
  request, response_message, has_content
261
560
  )
262
561
  response = _replace_ai_message_in_response(
@@ -271,6 +570,8 @@ def create_handle_empty_response_middleware(wrap_model_call):
271
570
  attempt + 1,
272
571
  max_retries + 1,
273
572
  )
573
+ if reason == "text-only":
574
+ _log_invalid_ai_message(response_message, reason)
274
575
 
275
576
  request = request.override(
276
577
  messages=request.messages + [HumanMessage(content=json_prompt)]
@@ -293,7 +594,7 @@ def create_handle_empty_response_middleware(wrap_model_call):
293
594
  logger.warning(
294
595
  "Max retries exhausted. Synthesizing write_todos to complete."
295
596
  )
296
- synthetic_message = _create_synthetic_final_answer(
597
+ synthetic_message = _create_synthetic_completion(
297
598
  request, response_message, has_content
298
599
  )
299
600
  response = _replace_ai_message_in_response(response, synthetic_message)
@@ -325,6 +626,23 @@ def _extract_ai_message(response):
325
626
  return None
326
627
 
327
628
 
629
+ def _log_invalid_ai_message(response_message, reason: str) -> None:
630
+ """Log full AIMessage details for invalid (text-only) responses."""
631
+ if not response_message:
632
+ return
633
+ try:
634
+ payload = _serialize_message(response_message)
635
+ except Exception as exc:
636
+ logger.warning(
637
+ "Invalid AIMessage detail (%s): failed to serialize (%s). Raw=%r",
638
+ reason,
639
+ exc,
640
+ response_message,
641
+ )
642
+ return
643
+ logger.warning("Invalid AIMessage detail (%s): %s", reason, _pretty_json(payload))
644
+
645
+
328
646
  def _replace_ai_message_in_response(response, new_message):
329
647
  """Replace AIMessage in response with a new one."""
330
648
  if hasattr(response, "result"):
@@ -344,25 +662,51 @@ def _build_json_prompt(request, response_message, has_content):
344
662
  pending_todos = [t for t in todos if t.get("status") in ("pending", "in_progress")]
345
663
  in_progress_todos = [t for t in todos if t.get("status") == "in_progress"]
346
664
 
665
+ # Check if CURRENT todo (first in_progress or first pending) is summary/next_steps
666
+ # Not checking ALL pending todos - only the one we should be working on now
667
+ summary_keywords = [
668
+ "작업 요약",
669
+ "결과 요약",
670
+ "분석 요약",
671
+ "요약 및",
672
+ "다음단계",
673
+ "다음 단계",
674
+ "next step",
675
+ ]
676
+ current_todo = (
677
+ in_progress_todos[0]
678
+ if in_progress_todos
679
+ else pending_todos[0]
680
+ if pending_todos
681
+ else None
682
+ )
683
+ is_summary_todo = current_todo is not None and any(
684
+ kw in current_todo.get("content", "") for kw in summary_keywords
685
+ )
686
+
347
687
  if has_content:
348
688
  # If all todos completed, don't force another tool call
349
689
  if todos and not pending_todos:
350
690
  return None # Signal to skip retry
351
691
 
352
- # If current in_progress todo is "작업 요약 및 다음단계 제시", accept text-only response
692
+ # If current todo is "작업 요약 및 다음단계 제시", accept text-only response
353
693
  # The LLM is outputting the summary, we'll synthesize write_todos
354
- if in_progress_todos:
355
- current_todo = in_progress_todos[0].get("content", "")
356
- if (
357
- "작업 요약" in current_todo
358
- or "다음단계" in current_todo
359
- or "다음 단계" in current_todo
360
- ):
361
- logger.info(
362
- "Current todo is summary/next steps ('%s'), accepting text-only response",
363
- current_todo[:30],
364
- )
365
- return None # Signal to skip retry - will synthesize write_todos with content
694
+ if is_summary_todo:
695
+ summary_todo = next(
696
+ (
697
+ t
698
+ for t in pending_todos
699
+ if any(kw in t.get("content", "") for kw in summary_keywords)
700
+ ),
701
+ {"content": "summary"},
702
+ )
703
+ logger.info(
704
+ "Current todo is summary/next steps ('%s'), accepting text-only response",
705
+ summary_todo.get("content", "")[:30],
706
+ )
707
+ return (
708
+ None # Signal to skip retry - will synthesize write_todos with content
709
+ )
366
710
 
367
711
  return (
368
712
  f"{JSON_TOOL_SCHEMA}\n\n"
@@ -370,6 +714,18 @@ def _build_json_prompt(request, response_message, has_content):
370
714
  f"Call the next appropriate tool to continue.\n"
371
715
  f'Example: {{"tool": "jupyter_cell_tool", "arguments": {{"code": "print(\'hello\')"}}}}'
372
716
  )
717
+ elif is_summary_todo:
718
+ # Empty response but current todo is summary - force summary JSON output
719
+ logger.info(
720
+ "Empty response but current todo is summary/next steps - forcing summary JSON prompt"
721
+ )
722
+ return (
723
+ f"{JSON_TOOL_SCHEMA}\n\n"
724
+ f"You MUST output a summary JSON with next_items. This is the final step.\n"
725
+ f"출력 형식 (반드시 이 형식으로 출력):\n"
726
+ f'{{"summary": "완료된 작업 요약 (한국어)", "next_items": [{{"subject": "다음 작업 제목", "description": "설명"}}]}}\n\n'
727
+ f"Do NOT call any tool. Just output the summary JSON directly in your response."
728
+ )
373
729
  elif pending_todos:
374
730
  todo_list = ", ".join(t.get("content", "")[:20] for t in pending_todos[:3])
375
731
  example_json = '{"tool": "jupyter_cell_tool", "arguments": {"code": "import pandas as pd\\ndf = pd.read_csv(\'titanic.csv\')\\nprint(df.head())"}}'
@@ -400,7 +756,7 @@ def _build_json_prompt(request, response_message, has_content):
400
756
  )
401
757
 
402
758
 
403
- def _create_synthetic_final_answer(request, response_message, has_content):
759
+ def _create_synthetic_completion(request, response_message, has_content):
404
760
  """Create synthetic write_todos call to mark all todos as completed.
405
761
 
406
762
  This triggers automatic session termination via router's all_todos_completed check.
@@ -408,6 +764,15 @@ def _create_synthetic_final_answer(request, response_message, has_content):
408
764
  """
409
765
  todos = request.state.get("todos", [])
410
766
 
767
+ # Warn if there are pending todos being force-completed
768
+ pending_count = sum(1 for t in todos if t.get("status") == "pending")
769
+ if pending_count > 0:
770
+ logger.warning(
771
+ "Force-completing %d pending todos that were never started: %s",
772
+ pending_count,
773
+ [t.get("content") for t in todos if t.get("status") == "pending"]
774
+ )
775
+
411
776
  # Mark all todos as completed
412
777
  completed_todos = (
413
778
  [{**todo, "status": "completed"} for todo in todos]
@@ -614,80 +979,63 @@ def create_normalize_tool_args_middleware(wrap_model_call, tools=None):
614
979
  )
615
980
  args[key] = normalized_value
616
981
 
617
- # Ensure write_todos includes summary todo as last item
982
+ # Validate write_todos: Only ONE item should be in_progress at a time
618
983
  if tool_name == "write_todos" and "todos" in args:
619
984
  todos = args["todos"]
620
985
  if isinstance(todos, list) and len(todos) > 0:
621
- # Check if any todo contains summary keywords
622
- summary_keywords = [
623
- "작업 요약",
624
- "다음단계",
625
- "다음 단계",
626
- "요약 및",
627
- ]
628
- has_summary = any(
629
- any(
630
- kw in todo.get("content", "")
631
- for kw in summary_keywords
632
- )
633
- for todo in todos
634
- if isinstance(todo, dict)
986
+ # Validate: Only ONE item should be in_progress at a time
987
+ # If multiple in_progress, keep only the first one
988
+ in_progress_count = sum(
989
+ 1
990
+ for t in todos
991
+ if isinstance(t, dict)
992
+ and t.get("status") == "in_progress"
635
993
  )
636
-
637
- if not has_summary:
638
- # Add summary todo as last item
639
- summary_todo = {
640
- "content": "작업 요약 및 다음단계 제시",
641
- "status": "pending",
642
- }
643
- todos.append(summary_todo)
644
- logger.info(
645
- "Auto-added '작업 요약 및 다음단계 제시' to write_todos (total: %d todos)",
646
- len(todos),
647
- )
648
-
649
- # Log warning if summary todo is completed without JSON (but don't block)
650
- for todo in todos:
651
- if not isinstance(todo, dict):
652
- continue
653
- content = todo.get("content", "")
654
- status = todo.get("status", "")
655
- is_summary_todo = any(
656
- kw in content for kw in summary_keywords
657
- )
658
- if is_summary_todo and status == "completed":
659
- # Check if AIMessage content has summary JSON
660
- msg_content = getattr(msg, "content", "") or ""
661
- if isinstance(msg_content, list):
662
- msg_content = " ".join(
663
- str(p) for p in msg_content
664
- )
665
- has_summary_json = (
666
- '"summary"' in msg_content
667
- and '"next_items"' in msg_content
668
- ) or (
669
- "'summary'" in msg_content
670
- and "'next_items'" in msg_content
671
- )
672
- if not has_summary_json:
673
- # Just log warning - don't block completion to avoid infinite loop
674
- logger.warning(
675
- "Summary todo marked completed but no summary JSON in content. "
676
- "Allowing completion to proceed. Content: %s",
677
- msg_content[:200],
678
- )
994
+ if in_progress_count > 1:
995
+ found_first = False
996
+ for todo in todos:
997
+ if not isinstance(todo, dict):
998
+ continue
999
+ if todo.get("status") == "in_progress":
1000
+ if found_first:
1001
+ # Reset subsequent in_progress to pending
1002
+ todo["status"] = "pending"
1003
+ logger.info(
1004
+ "Reset duplicate in_progress todo to pending: %s",
1005
+ todo.get("content", "")[
1006
+ :30
1007
+ ],
1008
+ )
1009
+ else:
1010
+ found_first = True
1011
+
1012
+ # NOTE: Previously had logic to revert summary todo to in_progress
1013
+ # if no summary JSON was found. This caused infinite loops
1014
+ # where LLM kept calling write_todos repeatedly.
1015
+ # Now we let the natural termination logic handle this.
1016
+ #
1017
+ # NOTE: Also removed logic to preserve todos when LLM tries to delete them.
1018
+ # The LLM should be able to modify todos freely when:
1019
+ # - User rejects code approval
1020
+ # - User changes their request
1021
+ # - Code execution fails
1022
+ # We rely on prompts to guide proper todo management.
679
1023
 
680
1024
  return response
681
1025
 
682
1026
  return normalize_tool_args
683
1027
 
684
1028
 
685
- def create_inject_continuation_middleware(wrap_model_call):
686
- """Create middleware to inject continuation prompt after non-HITL tool execution.
1029
+ def create_continuation_control_middleware(wrap_model_call):
1030
+ """Create unified middleware for continuation control.
687
1031
 
688
- Non-HITL tools execute immediately without user approval, which can cause
689
- Gemini to produce empty responses. This middleware injects a system message
690
- to remind the LLM to continue with the next action.
1032
+ This middleware combines two functions:
1033
+ 1. BEFORE handler: Inject continuation prompt after non-HITL tool execution
1034
+ - Checks for summary completion and stops if done
1035
+ - Injects "[SYSTEM] Tool completed..." messages to guide LLM
1036
+ 2. AFTER handler: Prevent auto-continuation after summary JSON output
1037
+ - Strips write_todos from responses containing summary JSON
1038
+ - Prevents agent from auto-creating new todos after task completion
691
1039
 
692
1040
  Args:
693
1041
  wrap_model_call: LangChain's wrap_model_call decorator
@@ -696,89 +1044,198 @@ def create_inject_continuation_middleware(wrap_model_call):
696
1044
  Middleware function
697
1045
  """
698
1046
 
1047
+ def _check_summary_exists(messages, last_real_human_idx: int) -> bool:
1048
+ """Check if summary content exists in messages after last real user message."""
1049
+ messages_to_check = (
1050
+ messages[last_real_human_idx + 1 :]
1051
+ if last_real_human_idx >= 0
1052
+ else messages[-15:]
1053
+ )
1054
+ for msg in messages_to_check:
1055
+ msg_content = getattr(msg, "content", "") or ""
1056
+ # Check for summary JSON
1057
+ if '"summary"' in msg_content and '"next_items"' in msg_content:
1058
+ return True
1059
+ # Check for markdown summary (common patterns)
1060
+ if any(
1061
+ kw in msg_content
1062
+ for kw in [
1063
+ "다음 단계 제안",
1064
+ "다음 단계:",
1065
+ "### 다음 단계",
1066
+ "## 다음 단계",
1067
+ "**다음 단계**",
1068
+ "모든 작업이 완료",
1069
+ "**작업 요약**",
1070
+ "### 작업 요약",
1071
+ "## 작업 요약",
1072
+ ]
1073
+ ):
1074
+ return True
1075
+ return False
1076
+
1077
+ def _find_last_real_human_idx(messages) -> int:
1078
+ """Find index of last real HumanMessage (not system-injected)."""
1079
+ last_real_human_idx = -1
1080
+ for i, msg in enumerate(messages):
1081
+ msg_type = getattr(msg, "type", "") or type(msg).__name__
1082
+ if msg_type in ("human", "HumanMessage"):
1083
+ msg_content = getattr(msg, "content", "") or ""
1084
+ if not msg_content.startswith("[SYSTEM]"):
1085
+ last_real_human_idx = i
1086
+ return last_real_human_idx
1087
+
699
1088
  @wrap_model_call
700
- @_with_middleware_logging("inject_continuation_after_non_hitl_tool")
701
- def inject_continuation_after_non_hitl_tool(request, handler):
1089
+ @_with_middleware_logging("continuation_control")
1090
+ def continuation_control(request, handler):
702
1091
  messages = request.messages
703
- if not messages:
704
- return handler(request)
705
-
706
- last_msg = messages[-1]
707
- if getattr(last_msg, "type", "") == "tool":
708
- tool_name = getattr(last_msg, "name", "") or ""
709
-
710
- # Try to extract tool name from content
711
- if not tool_name:
712
- try:
713
- content_json = json.loads(last_msg.content)
714
- tool_name = content_json.get("tool", "")
715
- except (json.JSONDecodeError, TypeError, AttributeError):
716
- pass
717
-
718
- if tool_name in NON_HITL_TOOLS:
719
- # Method 1: Check state.todos
720
- todos = request.state.get("todos", [])
721
- pending_todos = [
722
- t for t in todos if t.get("status") in ("pending", "in_progress")
723
- ]
724
1092
 
725
- # If all todos are completed, DON'T call LLM - return empty response to stop agent
726
- if not pending_todos and todos:
1093
+ # ===== BEFORE HANDLER: Inject continuation prompt =====
1094
+ if messages:
1095
+ last_msg = messages[-1]
1096
+ if getattr(last_msg, "type", "") == "tool":
1097
+ tool_name = getattr(last_msg, "name", "") or ""
1098
+
1099
+ # Try to extract tool name from content
1100
+ if not tool_name:
1101
+ try:
1102
+ content_json = json.loads(last_msg.content)
1103
+ tool_name = content_json.get("tool", "")
1104
+ except (json.JSONDecodeError, TypeError, AttributeError):
1105
+ pass
1106
+
1107
+ if tool_name in NON_HITL_TOOLS:
1108
+ todos = request.state.get("todos", [])
1109
+
1110
+ last_real_human_idx = _find_last_real_human_idx(messages)
1111
+ summary_exists = _check_summary_exists(
1112
+ messages, last_real_human_idx
1113
+ )
1114
+
1115
+ # STOP if summary exists (regardless of todo status)
1116
+ if summary_exists:
1117
+ logger.info(
1118
+ "Summary exists after tool: %s - stopping agent (user must request next steps)",
1119
+ tool_name,
1120
+ )
1121
+ return AIMessage(content="", tool_calls=[])
1122
+
1123
+ pending_todos = [
1124
+ t
1125
+ for t in todos
1126
+ if t.get("status") in ("pending", "in_progress")
1127
+ ]
1128
+
1129
+ # If all todos completed but no summary yet, allow LLM call for summary
1130
+ if not pending_todos and todos:
1131
+ logger.info(
1132
+ "All %d todos completed, no summary yet after tool: %s - allowing LLM for summary",
1133
+ len(todos),
1134
+ tool_name,
1135
+ )
1136
+
727
1137
  logger.info(
728
- "All todos completed (from state) after tool: %s - stopping agent (no LLM call)",
1138
+ "Injecting continuation prompt after non-HITL tool: %s",
729
1139
  tool_name,
730
1140
  )
731
- return AIMessage(content="", tool_calls=[])
732
1141
 
733
- # Method 2: Check ToolMessage content for write_todos
734
- if tool_name == "write_todos" or "Updated todo list to" in (last_msg.content or ""):
735
- try:
736
- import ast
737
- content = last_msg.content or ""
738
- if "Updated todo list to" in content:
739
- list_text = content.split("Updated todo list to", 1)[1].strip()
740
- todos_from_msg = ast.literal_eval(list_text)
741
- if isinstance(todos_from_msg, list) and len(todos_from_msg) > 0:
742
- pending = [t for t in todos_from_msg if t.get("status") in ("pending", "in_progress")]
743
- if not pending:
744
- logger.info(
745
- "All %d todos completed (from ToolMessage) after tool: %s - stopping agent",
746
- len(todos_from_msg),
747
- tool_name,
748
- )
749
- return AIMessage(content="", tool_calls=[])
750
- except Exception as e:
751
- logger.debug("Failed to parse todos from ToolMessage: %s", e)
1142
+ # Skip continuation injection for write_todos
1143
+ # This prevents auto-continuation to next task after completing one
1144
+ # Agent will decide next action based on its own reasoning
1145
+ if tool_name == "write_todos":
1146
+ logger.info(
1147
+ "Skipping continuation prompt after write_todos - "
1148
+ "agent decides next action (pending: %d)",
1149
+ len(pending_todos) if pending_todos else 0,
1150
+ )
1151
+ # Don't inject continuation - let agent naturally continue or stop
1152
+ elif pending_todos:
1153
+ pending_list = ", ".join(
1154
+ t.get("content", "")[:30] for t in pending_todos[:3]
1155
+ )
1156
+ continuation = (
1157
+ f"Tool '{tool_name}' completed. "
1158
+ f"Continue with pending tasks: {pending_list}. "
1159
+ f"Call jupyter_cell_tool or the next appropriate tool."
1160
+ )
1161
+ new_messages = list(messages) + [
1162
+ HumanMessage(content=f"[SYSTEM] {continuation}")
1163
+ ]
1164
+ request = request.override(messages=new_messages)
1165
+ else:
1166
+ continuation = (
1167
+ f"Tool '{tool_name}' completed. "
1168
+ f"Create a todo list with write_todos if needed."
1169
+ )
1170
+ new_messages = list(messages) + [
1171
+ HumanMessage(content=f"[SYSTEM] {continuation}")
1172
+ ]
1173
+ request = request.override(messages=new_messages)
1174
+
1175
+ # ===== CALL HANDLER =====
1176
+ response = handler(request)
1177
+
1178
+ # ===== AFTER HANDLER: Strip write_todos if summary JSON present =====
1179
+ response_message = _extract_ai_message(response)
1180
+ if not response_message:
1181
+ return response
1182
+
1183
+ # Get content - handle both string and list formats
1184
+ content = getattr(response_message, "content", "") or ""
1185
+ if isinstance(content, list):
1186
+ content = " ".join(
1187
+ str(p) if isinstance(p, str) else p.get("text", "")
1188
+ for p in content
1189
+ if isinstance(p, (str, dict))
1190
+ )
752
1191
 
1192
+ # Check if content contains summary JSON pattern
1193
+ has_summary_json = '"summary"' in content and '"next_items"' in content
1194
+
1195
+ if has_summary_json:
1196
+ tool_calls = getattr(response_message, "tool_calls", []) or []
1197
+ write_todos_calls = [
1198
+ tc for tc in tool_calls if tc.get("name") == "write_todos"
1199
+ ]
1200
+
1201
+ if write_todos_calls:
753
1202
  logger.info(
754
- "Injecting continuation prompt after non-HITL tool: %s",
755
- tool_name,
1203
+ "Summary JSON 감지 - write_todos 호출 제거 (자동 계속 방지). "
1204
+ "제거된 write_todos 호출 수: %d",
1205
+ len(write_todos_calls),
756
1206
  )
757
1207
 
758
- if pending_todos:
759
- pending_list = ", ".join(
760
- t.get("content", "")[:30] for t in pending_todos[:3]
761
- )
762
- continuation = (
763
- f"Tool '{tool_name}' completed. "
764
- f"Continue with pending tasks: {pending_list}. "
765
- f"Call jupyter_cell_tool or the next appropriate tool."
766
- )
767
- else:
768
- # No todos yet - let agent create them
769
- continuation = (
770
- f"Tool '{tool_name}' completed. "
771
- f"Create a todo list with write_todos if needed."
772
- )
773
-
774
- new_messages = list(messages) + [
775
- HumanMessage(content=f"[SYSTEM] {continuation}")
1208
+ filtered_tool_calls = [
1209
+ tc for tc in tool_calls if tc.get("name") != "write_todos"
776
1210
  ]
777
- request = request.override(messages=new_messages)
778
1211
 
779
- return handler(request)
1212
+ new_message = AIMessage(
1213
+ content=response_message.content,
1214
+ tool_calls=filtered_tool_calls,
1215
+ additional_kwargs=getattr(
1216
+ response_message, "additional_kwargs", {}
1217
+ ),
1218
+ response_metadata=getattr(
1219
+ response_message, "response_metadata", {}
1220
+ ),
1221
+ )
1222
+
1223
+ response = _replace_ai_message_in_response(response, new_message)
1224
+
1225
+ return response
1226
+
1227
+ return continuation_control
1228
+
1229
+
1230
+ # Backward compatibility aliases
1231
+ def create_inject_continuation_middleware(wrap_model_call):
1232
+ """Deprecated: Use create_continuation_control_middleware instead."""
1233
+ return create_continuation_control_middleware(wrap_model_call)
1234
+
780
1235
 
781
- return inject_continuation_after_non_hitl_tool
1236
+ def create_prevent_auto_continuation_middleware(wrap_model_call):
1237
+ """Deprecated: Use create_continuation_control_middleware instead."""
1238
+ return create_continuation_control_middleware(wrap_model_call)
782
1239
 
783
1240
 
784
1241
  def create_patch_tool_calls_middleware(AgentMiddleware, ToolMessage, Overwrite):