hdsp-jupyter-extension 2.0.10__py3-none-any.whl → 2.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. agent_server/core/notebook_generator.py +4 -4
  2. agent_server/langchain/MULTI_AGENT_ARCHITECTURE.md +1114 -0
  3. agent_server/langchain/__init__.py +2 -2
  4. agent_server/langchain/agent.py +72 -33
  5. agent_server/langchain/agent_factory.py +400 -0
  6. agent_server/langchain/agent_prompts/__init__.py +25 -0
  7. agent_server/langchain/agent_prompts/athena_query_prompt.py +71 -0
  8. agent_server/langchain/agent_prompts/planner_prompt.py +85 -0
  9. agent_server/langchain/agent_prompts/python_developer_prompt.py +123 -0
  10. agent_server/langchain/agent_prompts/researcher_prompt.py +38 -0
  11. agent_server/langchain/custom_middleware.py +656 -113
  12. agent_server/langchain/hitl_config.py +38 -9
  13. agent_server/langchain/llm_factory.py +1 -85
  14. agent_server/langchain/middleware/__init__.py +24 -0
  15. agent_server/langchain/middleware/code_history_middleware.py +412 -0
  16. agent_server/langchain/middleware/description_injector.py +150 -0
  17. agent_server/langchain/middleware/skill_middleware.py +298 -0
  18. agent_server/langchain/middleware/subagent_events.py +171 -0
  19. agent_server/langchain/middleware/subagent_middleware.py +329 -0
  20. agent_server/langchain/prompts.py +107 -135
  21. agent_server/langchain/skills/data_analysis.md +236 -0
  22. agent_server/langchain/skills/data_loading.md +158 -0
  23. agent_server/langchain/skills/inference.md +392 -0
  24. agent_server/langchain/skills/model_training.md +318 -0
  25. agent_server/langchain/skills/pyspark.md +352 -0
  26. agent_server/langchain/subagents/__init__.py +20 -0
  27. agent_server/langchain/subagents/base.py +173 -0
  28. agent_server/langchain/tools/__init__.py +3 -0
  29. agent_server/langchain/tools/jupyter_tools.py +58 -20
  30. agent_server/langchain/tools/lsp_tools.py +1 -1
  31. agent_server/langchain/tools/shared/__init__.py +26 -0
  32. agent_server/langchain/tools/shared/qdrant_search.py +175 -0
  33. agent_server/langchain/tools/tool_registry.py +219 -0
  34. agent_server/langchain/tools/workspace_tools.py +197 -0
  35. agent_server/prompts/file_action_prompts.py +8 -8
  36. agent_server/routers/config.py +40 -1
  37. agent_server/routers/langchain_agent.py +868 -321
  38. hdsp_agent_core/__init__.py +46 -47
  39. hdsp_agent_core/factory.py +6 -10
  40. hdsp_agent_core/interfaces.py +4 -2
  41. hdsp_agent_core/knowledge/__init__.py +5 -5
  42. hdsp_agent_core/knowledge/chunking.py +87 -61
  43. hdsp_agent_core/knowledge/loader.py +103 -101
  44. hdsp_agent_core/llm/service.py +192 -107
  45. hdsp_agent_core/managers/config_manager.py +16 -22
  46. hdsp_agent_core/managers/session_manager.py +5 -4
  47. hdsp_agent_core/models/__init__.py +12 -12
  48. hdsp_agent_core/models/agent.py +15 -8
  49. hdsp_agent_core/models/common.py +1 -2
  50. hdsp_agent_core/models/rag.py +48 -111
  51. hdsp_agent_core/prompts/__init__.py +12 -12
  52. hdsp_agent_core/prompts/cell_action_prompts.py +9 -7
  53. hdsp_agent_core/services/agent_service.py +10 -8
  54. hdsp_agent_core/services/chat_service.py +10 -6
  55. hdsp_agent_core/services/rag_service.py +3 -6
  56. hdsp_agent_core/tests/conftest.py +4 -1
  57. hdsp_agent_core/tests/test_factory.py +2 -2
  58. hdsp_agent_core/tests/test_services.py +12 -19
  59. {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
  60. {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +7 -2
  61. hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js → hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.037b3c8e5d6a92b63b16.js +1108 -179
  62. hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.037b3c8e5d6a92b63b16.js.map +1 -0
  63. jupyter_ext/labextension/static/lib_index_js.dc6434bee96ab03a0539.js → hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.5449ba3c7e25177d2987.js +3936 -8144
  64. hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.5449ba3c7e25177d2987.js.map +1 -0
  65. hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.4a252df3ade74efee8d6.js → hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.a8e0b064eb9b1c1ff463.js +17 -17
  66. hdsp_jupyter_extension-2.0.13.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.a8e0b064eb9b1c1ff463.js.map +1 -0
  67. {hdsp_jupyter_extension-2.0.10.dist-info → hdsp_jupyter_extension-2.0.13.dist-info}/METADATA +1 -1
  68. {hdsp_jupyter_extension-2.0.10.dist-info → hdsp_jupyter_extension-2.0.13.dist-info}/RECORD +100 -76
  69. jupyter_ext/__init__.py +21 -11
  70. jupyter_ext/_version.py +1 -1
  71. jupyter_ext/handlers.py +128 -58
  72. jupyter_ext/labextension/build_log.json +1 -1
  73. jupyter_ext/labextension/package.json +7 -2
  74. jupyter_ext/labextension/static/{frontend_styles_index_js.2d9fb488c82498c45c2d.js → frontend_styles_index_js.037b3c8e5d6a92b63b16.js} +1108 -179
  75. jupyter_ext/labextension/static/frontend_styles_index_js.037b3c8e5d6a92b63b16.js.map +1 -0
  76. hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.dc6434bee96ab03a0539.js → jupyter_ext/labextension/static/lib_index_js.5449ba3c7e25177d2987.js +3936 -8144
  77. jupyter_ext/labextension/static/lib_index_js.5449ba3c7e25177d2987.js.map +1 -0
  78. jupyter_ext/labextension/static/{remoteEntry.4a252df3ade74efee8d6.js → remoteEntry.a8e0b064eb9b1c1ff463.js} +17 -17
  79. jupyter_ext/labextension/static/remoteEntry.a8e0b064eb9b1c1ff463.js.map +1 -0
  80. hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js.map +0 -1
  81. hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.dc6434bee96ab03a0539.js.map +0 -1
  82. hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.4a252df3ade74efee8d6.js.map +0 -1
  83. jupyter_ext/labextension/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js.map +0 -1
  84. jupyter_ext/labextension/static/lib_index_js.dc6434bee96ab03a0539.js.map +0 -1
  85. jupyter_ext/labextension/static/remoteEntry.4a252df3ade74efee8d6.js.map +0 -1
  86. {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
  87. {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
  88. {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
  89. {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
  90. {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
  91. {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
  92. {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
  93. {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
  94. {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
  95. {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +0 -0
  96. {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +0 -0
  97. {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +0 -0
  98. {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +0 -0
  99. {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
  100. {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
  101. {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
  102. {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
  103. {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +0 -0
  104. {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.13.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +0 -0
  105. {hdsp_jupyter_extension-2.0.10.dist-info → hdsp_jupyter_extension-2.0.13.dist-info}/WHEEL +0 -0
  106. {hdsp_jupyter_extension-2.0.10.dist-info → hdsp_jupyter_extension-2.0.13.dist-info}/licenses/LICENSE +0 -0
@@ -11,10 +11,13 @@ import re
11
11
  import uuid
12
12
  from typing import Any, Dict, Optional
13
13
 
14
+ from json_repair import repair_json
14
15
  from langchain_core.messages import AIMessage, HumanMessage
15
16
 
16
17
  from agent_server.langchain.logging_utils import (
17
18
  _format_middleware_marker,
19
+ _pretty_json,
20
+ _serialize_message,
18
21
  _with_middleware_logging,
19
22
  )
20
23
  from agent_server.langchain.prompts import JSON_TOOL_SCHEMA, NON_HITL_TOOLS
@@ -75,6 +78,15 @@ def parse_json_tool_call(text) -> Optional[Dict[str, Any]]:
75
78
  except json.JSONDecodeError:
76
79
  pass
77
80
 
81
+ # Try json-repair for malformed JSON from LLMs
82
+ try:
83
+ repaired = repair_json(text, return_objects=True)
84
+ if isinstance(repaired, dict) and "tool" in repaired:
85
+ logger.info(f"Repaired malformed JSON tool call: {repaired.get('tool')}")
86
+ return repaired
87
+ except Exception as e:
88
+ logger.debug(f"json-repair failed: {e}")
89
+
78
90
  return None
79
91
 
80
92
 
@@ -83,15 +95,110 @@ def normalize_tool_name(tool_name: str) -> str:
83
95
 
84
96
  Rules:
85
97
  - write_todos_tool → write_todos (TodoListMiddleware exception)
98
+ - task → task_tool (SubAgentMiddleware uses task_tool)
86
99
  - other tools without _tool suffix → add _tool suffix
87
100
  """
88
101
  if tool_name == "write_todos_tool":
89
102
  return "write_todos"
90
- if not tool_name.endswith("_tool") and tool_name != "write_todos":
103
+ if tool_name == "task":
104
+ return "task_tool"
105
+ if not tool_name.endswith("_tool") and tool_name not in ("write_todos",):
91
106
  return f"{tool_name}_tool"
92
107
  return tool_name
93
108
 
94
109
 
110
+ def repair_tool_call_arguments(arguments: str) -> Optional[Dict[str, Any]]:
111
+ """Repair malformed tool call arguments from LLMs.
112
+
113
+ Some LLMs (e.g., gpt-oss-120b) return arguments without leading '{' or
114
+ with other JSON formatting issues.
115
+
116
+ Args:
117
+ arguments: Raw arguments string from LLM
118
+
119
+ Returns:
120
+ Parsed dictionary or None if repair failed
121
+ """
122
+ if not arguments or not isinstance(arguments, str):
123
+ return None
124
+
125
+ arguments = arguments.strip()
126
+
127
+ # Try direct parse first
128
+ try:
129
+ return json.loads(arguments)
130
+ except json.JSONDecodeError:
131
+ pass
132
+
133
+ # Use json-repair for malformed arguments
134
+ try:
135
+ repaired = repair_json(arguments, return_objects=True)
136
+ if isinstance(repaired, dict):
137
+ logger.info("Repaired malformed tool arguments")
138
+ return repaired
139
+ except Exception as e:
140
+ logger.debug(f"json-repair failed for arguments: {e}")
141
+
142
+ return None
143
+
144
+
145
+ def try_extract_tool_calls_from_additional_kwargs(
146
+ response_message,
147
+ ) -> Optional[AIMessage]:
148
+ """Try to extract and repair tool_calls from additional_kwargs.
149
+
150
+ Some LLMs put tool_calls in additional_kwargs but with malformed arguments.
151
+ This function tries to repair them and create a proper AIMessage.
152
+
153
+ Args:
154
+ response_message: AIMessage with potential tool_calls in additional_kwargs
155
+
156
+ Returns:
157
+ New AIMessage with repaired tool_calls, or None if extraction failed
158
+ """
159
+ if not response_message:
160
+ return None
161
+
162
+ additional_kwargs = getattr(response_message, "additional_kwargs", {})
163
+ raw_tool_calls = additional_kwargs.get("tool_calls", [])
164
+
165
+ if not raw_tool_calls:
166
+ return None
167
+
168
+ repaired_tool_calls = []
169
+ for tc in raw_tool_calls:
170
+ func = tc.get("function", {})
171
+ name = func.get("name")
172
+ arguments = func.get("arguments", "")
173
+ tc_id = tc.get("id", str(uuid.uuid4()))
174
+
175
+ if not name:
176
+ continue
177
+
178
+ # Try to repair arguments
179
+ args = repair_tool_call_arguments(arguments)
180
+ if args is not None:
181
+ repaired_tool_calls.append(
182
+ {
183
+ "name": normalize_tool_name(name),
184
+ "args": args,
185
+ "id": tc_id,
186
+ "type": "tool_call",
187
+ }
188
+ )
189
+
190
+ if repaired_tool_calls:
191
+ logger.info(
192
+ f"Extracted {len(repaired_tool_calls)} tool calls from additional_kwargs"
193
+ )
194
+ return AIMessage(
195
+ content=getattr(response_message, "content", "") or "",
196
+ tool_calls=repaired_tool_calls,
197
+ )
198
+
199
+ return None
200
+
201
+
95
202
  def create_tool_call_message(tool_name: str, arguments: Dict[str, Any]) -> AIMessage:
96
203
  """Create AIMessage with tool_calls from parsed JSON.
97
204
 
@@ -125,7 +232,7 @@ def create_handle_empty_response_middleware(wrap_model_call):
125
232
  1. Detects empty or text-only responses (no tool_calls)
126
233
  2. Retries with JSON schema prompt to force structured output
127
234
  3. Parses JSON response and injects tool_calls into AIMessage
128
- 4. Falls back to synthetic final_answer if all else fails
235
+ 4. Falls back to synthetic write_todos completion if all else fails
129
236
 
130
237
  Args:
131
238
  wrap_model_call: LangChain's wrap_model_call decorator
@@ -139,42 +246,132 @@ def create_handle_empty_response_middleware(wrap_model_call):
139
246
  def handle_empty_response(request, handler):
140
247
  max_retries = 2
141
248
 
142
- # Check if all todos are completed - if so, skip processing entirely
249
+ # Check if all todos are completed - if so, return empty response to stop agent
250
+ # Method 1: Check state.todos
143
251
  todos = request.state.get("todos", [])
252
+ logger.debug(
253
+ "handle_empty_response: state.todos=%s",
254
+ json.dumps(todos, ensure_ascii=False) if todos else "[]",
255
+ )
144
256
  if todos:
145
257
  pending_todos = [
146
258
  t for t in todos if t.get("status") in ("pending", "in_progress")
147
259
  ]
148
260
  if not pending_todos:
149
- logger.info(
150
- "All %d todos completed - skipping handle_empty_response middleware",
151
- len(todos),
261
+ # Check if summary already exists AFTER the last REAL HumanMessage
262
+ # (to avoid false positives from previous task summaries)
263
+ # Note: Skip system-injected messages like "[SYSTEM] Tool completed..."
264
+ summary_exists = False
265
+ messages = request.messages
266
+
267
+ # Find index of last REAL HumanMessage (not system-injected)
268
+ last_human_idx = -1
269
+ for i, msg in enumerate(messages):
270
+ msg_type = getattr(msg, "type", "") or type(msg).__name__
271
+ if msg_type in ("human", "HumanMessage"):
272
+ msg_content = getattr(msg, "content", "") or ""
273
+ # Skip system-injected messages
274
+ if not msg_content.startswith("[SYSTEM]"):
275
+ last_human_idx = i
276
+
277
+ # Only check messages after last REAL HumanMessage for summary
278
+ messages_to_check = (
279
+ messages[last_human_idx + 1 :]
280
+ if last_human_idx >= 0
281
+ else messages[-10:]
152
282
  )
153
- return handler(request)
283
+ for msg in messages_to_check:
284
+ content = getattr(msg, "content", "") or ""
285
+ if '"summary"' in content and '"next_items"' in content:
286
+ summary_exists = True
287
+ break
154
288
 
155
- # Check if last message is final_answer_tool result - if so, don't retry/synthesize
156
- # This allows agent to naturally terminate after final_answer_tool
157
- messages = request.messages
158
- if messages:
159
- last_msg = messages[-1]
160
- if getattr(last_msg, "type", "") == "tool":
161
- tool_name = getattr(last_msg, "name", "") or ""
162
- if not tool_name:
163
- try:
164
- content_json = json.loads(last_msg.content)
165
- tool_name = content_json.get("tool", "")
166
- except (json.JSONDecodeError, TypeError, AttributeError):
167
- pass
168
- if tool_name in ("final_answer_tool", "final_answer"):
289
+ if summary_exists:
169
290
  logger.info(
170
- "Last message is final_answer_tool result - allowing natural termination"
291
+ "All %d todos completed and summary exists after last user message - stopping agent (no LLM call)",
292
+ len(todos),
171
293
  )
172
- # Just call handler and return response as-is (no retry/synthesize)
173
- return handler(request)
294
+ return AIMessage(content="", tool_calls=[])
295
+ else:
296
+ # Allow one more LLM call for summary generation
297
+ logger.info(
298
+ "All %d todos completed but no summary yet after last user message - allowing LLM call for summary",
299
+ len(todos),
300
+ )
301
+
302
+ # Method 2: Check last message if it's a write_todos ToolMessage with all completed
303
+ # Note: We now allow one more LLM call for summary generation when all todos are completed
304
+ # This check is skipped to let the agent produce a summary
305
+
306
+ # Check if summary todo is completed
307
+ # IMPORTANT: Only consider summary completed if it's the LAST todo item and ALL todos are done
308
+ # This prevents false positives when a previous summary is completed but new tasks are added
309
+ all_todos_completed = all(t.get("status") == "completed" for t in todos)
310
+ last_todo_is_summary = (
311
+ len(todos) > 0
312
+ and "작업 요약" in todos[-1].get("content", "")
313
+ and "다음 단계" in todos[-1].get("content", "")
314
+ and todos[-1].get("status") == "completed"
315
+ )
316
+ summary_todo_completed = all_todos_completed and last_todo_is_summary
317
+
318
+ if not summary_todo_completed and any(
319
+ t.get("status") == "completed"
320
+ and "작업 요약" in t.get("content", "")
321
+ for t in todos
322
+ ):
323
+ logger.debug(
324
+ "Previous summary todo completed but new tasks exist - NOT treating as final summary"
325
+ )
326
+
327
+ # Check if summary content exists in messages
328
+ messages = request.messages
329
+ summary_exists = False
330
+ for msg in messages[-15:]:
331
+ msg_content = getattr(msg, "content", "") or ""
332
+ if '"summary"' in msg_content and '"next_items"' in msg_content:
333
+ summary_exists = True
334
+ break
335
+ if any(
336
+ kw in msg_content
337
+ for kw in [
338
+ "다음 단계 제안",
339
+ "다음 단계:",
340
+ "### 다음 단계",
341
+ "## 다음 단계",
342
+ "**다음 단계**",
343
+ "모든 작업이 완료",
344
+ "**작업 요약**",
345
+ "### 작업 요약",
346
+ "## 작업 요약",
347
+ ]
348
+ ):
349
+ summary_exists = True
350
+ break
174
351
 
175
352
  for attempt in range(max_retries + 1):
176
353
  response = handler(request)
177
354
 
355
+ # If summary todo is completed AND summary content exists, accept empty response
356
+ # This prevents infinite loop when inject_continuation_middleware returns empty AIMessage
357
+ response_message = _extract_ai_message(response)
358
+ if summary_todo_completed and summary_exists:
359
+ has_content_check = (
360
+ bool(getattr(response_message, "content", None))
361
+ if response_message
362
+ else False
363
+ )
364
+ has_tool_calls_check = (
365
+ bool(getattr(response_message, "tool_calls", None))
366
+ if response_message
367
+ else False
368
+ )
369
+ if not has_content_check and not has_tool_calls_check:
370
+ logger.info(
371
+ "Summary todo completed AND summary exists - accepting empty response (agent should stop)"
372
+ )
373
+ return response
374
+
178
375
  # Extract AIMessage from response
179
376
  response_message = _extract_ai_message(response)
180
377
 
@@ -201,6 +398,23 @@ def create_handle_empty_response_middleware(wrap_model_call):
201
398
  if has_tool_calls:
202
399
  return response
203
400
 
401
+ # Try to extract and repair tool_calls from additional_kwargs
402
+ # Some LLMs (e.g., gpt-oss-120b) put tool_calls in additional_kwargs
403
+ # but with malformed arguments (missing '{', broken JSON, etc.)
404
+ if response_message and not has_tool_calls:
405
+ repaired_message = try_extract_tool_calls_from_additional_kwargs(
406
+ response_message
407
+ )
408
+ if repaired_message and repaired_message.tool_calls:
409
+ logger.info(
410
+ "Repaired tool_calls from additional_kwargs: %d calls",
411
+ len(repaired_message.tool_calls),
412
+ )
413
+ response = _replace_ai_message_in_response(
414
+ response, repaired_message
415
+ )
416
+ return response
417
+
204
418
  # Try to parse JSON from content
205
419
  if has_content and response_message:
206
420
  parsed = parse_json_tool_call(response_message.content)
@@ -216,6 +430,117 @@ def create_handle_empty_response_middleware(wrap_model_call):
216
430
  response = _replace_ai_message_in_response(response, new_message)
217
431
  return response
218
432
 
433
+ # Check if content is summary JSON (for summary todo)
434
+ # Summary JSON has "summary" and "next_items" but no "tool"
435
+ # IMPORTANT: Check for summary JSON pattern FIRST, regardless of current todo
436
+ # This handles cases where LLM outputs summary JSON mixed with other content
437
+ content = response_message.content
438
+ if isinstance(content, list):
439
+ content = " ".join(str(p) for p in content)
440
+
441
+ # Check if content contains summary JSON pattern
442
+ has_summary_pattern = ('"summary"' in content or "'summary'" in content) and (
443
+ '"next_items"' in content or "'next_items'" in content
444
+ )
445
+
446
+ if has_summary_pattern:
447
+ # Try to extract and repair summary JSON from mixed content
448
+ try:
449
+ # Try to find JSON object containing summary
450
+ import re
451
+ json_match = re.search(r'\{[^{}]*"summary"[^{}]*"next_items"[^{}]*\}', content, re.DOTALL)
452
+ if json_match:
453
+ repaired_summary = repair_json(
454
+ json_match.group(), return_objects=True
455
+ )
456
+ else:
457
+ repaired_summary = repair_json(
458
+ content, return_objects=True
459
+ )
460
+
461
+ if (
462
+ isinstance(repaired_summary, dict)
463
+ and "summary" in repaired_summary
464
+ and "next_items" in repaired_summary
465
+ ):
466
+ # Create new message with repaired JSON content
467
+ repaired_content = json.dumps(
468
+ repaired_summary, ensure_ascii=False
469
+ )
470
+ logger.info(
471
+ "Detected and repaired summary JSON in content (pattern-based detection)"
472
+ )
473
+ # Create message with repaired content
474
+ repaired_response_message = AIMessage(
475
+ content=repaired_content,
476
+ tool_calls=getattr(
477
+ response_message, "tool_calls", []
478
+ )
479
+ or [],
480
+ )
481
+ synthetic_message = _create_synthetic_completion(
482
+ request,
483
+ repaired_response_message,
484
+ has_content=True,
485
+ )
486
+ response = _replace_ai_message_in_response(
487
+ response, synthetic_message
488
+ )
489
+ return response
490
+ except Exception as e:
491
+ logger.debug(f"Failed to extract summary JSON from mixed content: {e}")
492
+
493
+ # Fallback: accept as-is if repair failed but looks like summary
494
+ logger.info(
495
+ "Detected summary JSON pattern in content - accepting and synthesizing write_todos"
496
+ )
497
+ synthetic_message = _create_synthetic_completion(
498
+ request, response_message, has_content=True
499
+ )
500
+ response = _replace_ai_message_in_response(
501
+ response, synthetic_message
502
+ )
503
+ return response
504
+
505
+ # Legacy: Also check if current todo is a summary todo (backward compatibility)
506
+ todos = request.state.get("todos", [])
507
+ in_progress_todos = [
508
+ t for t in todos if t.get("status") == "in_progress"
509
+ ]
510
+ pending_todos = [t for t in todos if t.get("status") == "pending"]
511
+ current_todo = (
512
+ in_progress_todos[0]
513
+ if in_progress_todos
514
+ else pending_todos[0]
515
+ if pending_todos
516
+ else None
517
+ )
518
+ if current_todo:
519
+ summary_keywords = [
520
+ "작업 요약",
521
+ "결과 요약",
522
+ "분석 요약",
523
+ "요약 및",
524
+ "다음단계",
525
+ "다음 단계",
526
+ "next step",
527
+ ]
528
+ is_summary_todo = any(
529
+ kw in current_todo.get("content", "") for kw in summary_keywords
530
+ )
531
+ if is_summary_todo and ('"summary"' in content or "'summary'" in content):
532
+ # This is a summary todo with summary content - accept it
533
+ logger.info(
534
+ "Summary todo with summary content detected - accepting"
535
+ )
536
+ synthetic_message = _create_synthetic_completion(
537
+ request, response_message, has_content=True
538
+ )
539
+ response = _replace_ai_message_in_response(
540
+ response, synthetic_message
541
+ )
542
+ return response
543
+
219
544
  # Invalid response - retry with JSON schema prompt
220
545
  if response_message and attempt < max_retries:
221
546
  reason = "text-only" if has_content else "empty"
@@ -230,7 +555,7 @@ def create_handle_empty_response_middleware(wrap_model_call):
230
555
  reason,
231
556
  )
232
557
  # Synthesize write_todos while preserving the content (summary)
233
- synthetic_message = _create_synthetic_final_answer(
558
+ synthetic_message = _create_synthetic_completion(
234
559
  request, response_message, has_content
235
560
  )
236
561
  response = _replace_ai_message_in_response(
@@ -245,6 +570,8 @@ def create_handle_empty_response_middleware(wrap_model_call):
245
570
  attempt + 1,
246
571
  max_retries + 1,
247
572
  )
573
+ if reason == "text-only":
574
+ _log_invalid_ai_message(response_message, reason)
248
575
 
249
576
  request = request.override(
250
577
  messages=request.messages + [HumanMessage(content=json_prompt)]
@@ -267,7 +594,7 @@ def create_handle_empty_response_middleware(wrap_model_call):
267
594
  logger.warning(
268
595
  "Max retries exhausted. Synthesizing write_todos to complete."
269
596
  )
270
- synthetic_message = _create_synthetic_final_answer(
597
+ synthetic_message = _create_synthetic_completion(
271
598
  request, response_message, has_content
272
599
  )
273
600
  response = _replace_ai_message_in_response(response, synthetic_message)
@@ -299,6 +626,23 @@ def _extract_ai_message(response):
299
626
  return None
300
627
 
301
628
 
629
+ def _log_invalid_ai_message(response_message, reason: str) -> None:
630
+ """Log full AIMessage details for invalid (text-only) responses."""
631
+ if not response_message:
632
+ return
633
+ try:
634
+ payload = _serialize_message(response_message)
635
+ except Exception as exc:
636
+ logger.warning(
637
+ "Invalid AIMessage detail (%s): failed to serialize (%s). Raw=%r",
638
+ reason,
639
+ exc,
640
+ response_message,
641
+ )
642
+ return
643
+ logger.warning("Invalid AIMessage detail (%s): %s", reason, _pretty_json(payload))
644
+
645
+
302
646
  def _replace_ai_message_in_response(response, new_message):
303
647
  """Replace AIMessage in response with a new one."""
304
648
  if hasattr(response, "result"):
@@ -318,25 +662,51 @@ def _build_json_prompt(request, response_message, has_content):
318
662
  pending_todos = [t for t in todos if t.get("status") in ("pending", "in_progress")]
319
663
  in_progress_todos = [t for t in todos if t.get("status") == "in_progress"]
320
664
 
665
+ # Check if CURRENT todo (first in_progress or first pending) is summary/next_steps
666
+ # Not checking ALL pending todos - only the one we should be working on now
667
+ summary_keywords = [
668
+ "작업 요약",
669
+ "결과 요약",
670
+ "분석 요약",
671
+ "요약 및",
672
+ "다음단계",
673
+ "다음 단계",
674
+ "next step",
675
+ ]
676
+ current_todo = (
677
+ in_progress_todos[0]
678
+ if in_progress_todos
679
+ else pending_todos[0]
680
+ if pending_todos
681
+ else None
682
+ )
683
+ is_summary_todo = current_todo is not None and any(
684
+ kw in current_todo.get("content", "") for kw in summary_keywords
685
+ )
686
+
321
687
  if has_content:
322
688
  # If all todos completed, don't force another tool call
323
689
  if todos and not pending_todos:
324
690
  return None # Signal to skip retry
325
691
 
326
- # If current in_progress todo is "작업 요약 및 다음단계 제시", accept text-only response
692
+ # If current todo is "작업 요약 및 다음단계 제시", accept text-only response
327
693
  # The LLM is outputting the summary, we'll synthesize write_todos
328
- if in_progress_todos:
329
- current_todo = in_progress_todos[0].get("content", "")
330
- if (
331
- "작업 요약" in current_todo
332
- or "다음단계" in current_todo
333
- or "다음 단계" in current_todo
334
- ):
335
- logger.info(
336
- "Current todo is summary/next steps ('%s'), accepting text-only response",
337
- current_todo[:30],
338
- )
339
- return None # Signal to skip retry - will synthesize write_todos with content
694
+ if is_summary_todo:
695
+ summary_todo = next(
696
+ (
697
+ t
698
+ for t in pending_todos
699
+ if any(kw in t.get("content", "") for kw in summary_keywords)
700
+ ),
701
+ {"content": "summary"},
702
+ )
703
+ logger.info(
704
+ "Current todo is summary/next steps ('%s'), accepting text-only response",
705
+ summary_todo.get("content", "")[:30],
706
+ )
707
+ return (
708
+ None # Signal to skip retry - will synthesize write_todos with content
709
+ )
340
710
 
341
711
  return (
342
712
  f"{JSON_TOOL_SCHEMA}\n\n"
@@ -344,6 +714,18 @@ def _build_json_prompt(request, response_message, has_content):
344
714
  f"Call the next appropriate tool to continue.\n"
345
715
  f'Example: {{"tool": "jupyter_cell_tool", "arguments": {{"code": "print(\'hello\')"}}}}'
346
716
  )
717
+ elif is_summary_todo:
718
+ # Empty response but current todo is summary - force summary JSON output
719
+ logger.info(
720
+ "Empty response but current todo is summary/next steps - forcing summary JSON prompt"
721
+ )
722
+ return (
723
+ f"{JSON_TOOL_SCHEMA}\n\n"
724
+ f"You MUST output a summary JSON with next_items. This is the final step.\n"
725
+ f"출력 형식 (반드시 이 형식으로 출력):\n"
726
+ f'{{"summary": "완료된 작업 요약 (한국어)", "next_items": [{{"subject": "다음 작업 제목", "description": "설명"}}]}}\n\n'
727
+ f"Do NOT call any tool. Just output the summary JSON directly in your response."
728
+ )
347
729
  elif pending_todos:
348
730
  todo_list = ", ".join(t.get("content", "")[:20] for t in pending_todos[:3])
349
731
  example_json = '{"tool": "jupyter_cell_tool", "arguments": {"code": "import pandas as pd\\ndf = pd.read_csv(\'titanic.csv\')\\nprint(df.head())"}}'
@@ -374,7 +756,7 @@ def _build_json_prompt(request, response_message, has_content):
374
756
  )
375
757
 
376
758
 
377
- def _create_synthetic_final_answer(request, response_message, has_content):
759
+ def _create_synthetic_completion(request, response_message, has_content):
378
760
  """Create synthetic write_todos call to mark all todos as completed.
379
761
 
380
762
  This triggers automatic session termination via router's all_todos_completed check.
@@ -382,6 +764,15 @@ def _create_synthetic_final_answer(request, response_message, has_content):
382
764
  """
383
765
  todos = request.state.get("todos", [])
384
766
 
767
+ # Warn if there are pending todos being force-completed
768
+ pending_count = sum(1 for t in todos if t.get("status") == "pending")
769
+ if pending_count > 0:
770
+ logger.warning(
771
+ "Force-completing %d pending todos that were never started: %s",
772
+ pending_count,
773
+ [t.get("content") for t in todos if t.get("status") == "pending"]
774
+ )
775
+
385
776
  # Mark all todos as completed
386
777
  completed_todos = (
387
778
  [{**todo, "status": "completed"} for todo in todos]
@@ -588,41 +979,63 @@ def create_normalize_tool_args_middleware(wrap_model_call, tools=None):
588
979
  )
589
980
  args[key] = normalized_value
590
981
 
591
- # Ensure write_todos includes summary todo as last item
982
+ # Validate write_todos: Only ONE item should be in_progress at a time
592
983
  if tool_name == "write_todos" and "todos" in args:
593
984
  todos = args["todos"]
594
985
  if isinstance(todos, list) and len(todos) > 0:
595
- # Check if any todo contains summary keywords
596
- summary_keywords = ["작업 요약", "다음단계", "다음 단계", "요약 및"]
597
- has_summary = any(
598
- any(kw in todo.get("content", "") for kw in summary_keywords)
599
- for todo in todos
600
- if isinstance(todo, dict)
986
+ # Validate: Only ONE item should be in_progress at a time
987
+ # If multiple in_progress, keep only the first one
988
+ in_progress_count = sum(
989
+ 1
990
+ for t in todos
991
+ if isinstance(t, dict)
992
+ and t.get("status") == "in_progress"
601
993
  )
602
-
603
- if not has_summary:
604
- # Add summary todo as last item
605
- summary_todo = {
606
- "content": "작업 요약 및 다음단계 제시",
607
- "status": "pending"
608
- }
609
- todos.append(summary_todo)
610
- logger.info(
611
- "Auto-added '작업 요약 및 다음단계 제시' to write_todos (total: %d todos)",
612
- len(todos),
613
- )
994
+ if in_progress_count > 1:
995
+ found_first = False
996
+ for todo in todos:
997
+ if not isinstance(todo, dict):
998
+ continue
999
+ if todo.get("status") == "in_progress":
1000
+ if found_first:
1001
+ # Reset subsequent in_progress to pending
1002
+ todo["status"] = "pending"
1003
+ logger.info(
1004
+ "Reset duplicate in_progress todo to pending: %s",
1005
+ todo.get("content", "")[
1006
+ :30
1007
+ ],
1008
+ )
1009
+ else:
1010
+ found_first = True
1011
+
1012
+ # NOTE: Previously had logic to revert summary todo to in_progress
1013
+ # if no summary JSON was found. This caused infinite loops
1014
+ # where LLM kept calling write_todos repeatedly.
1015
+ # Now we let the natural termination logic handle this.
1016
+ #
1017
+ # NOTE: Also removed logic to preserve todos when LLM tries to delete them.
1018
+ # The LLM should be able to modify todos freely when:
1019
+ # - User rejects code approval
1020
+ # - User changes their request
1021
+ # - Code execution fails
1022
+ # We rely on prompts to guide proper todo management.
614
1023
 
615
1024
  return response
616
1025
 
617
1026
  return normalize_tool_args
618
1027
 
619
1028
 
620
- def create_inject_continuation_middleware(wrap_model_call):
621
- """Create middleware to inject continuation prompt after non-HITL tool execution.
1029
+ def create_continuation_control_middleware(wrap_model_call):
1030
+ """Create unified middleware for continuation control.
622
1031
 
623
- Non-HITL tools execute immediately without user approval, which can cause
624
- Gemini to produce empty responses. This middleware injects a system message
625
- to remind the LLM to continue with the next action.
1032
+ This middleware combines two functions:
1033
+ 1. BEFORE handler: Inject continuation prompt after non-HITL tool execution
1034
+ - Checks for summary completion and stops if done
1035
+ - Injects "[SYSTEM] Tool completed..." messages to guide LLM
1036
+ 2. AFTER handler: Prevent auto-continuation after summary JSON output
1037
+ - Strips write_todos from responses containing summary JSON
1038
+ - Prevents agent from auto-creating new todos after task completion
626
1039
 
627
1040
  Args:
628
1041
  wrap_model_call: LangChain's wrap_model_call decorator
@@ -631,68 +1044,198 @@ def create_inject_continuation_middleware(wrap_model_call):
631
1044
  Middleware function
632
1045
  """
633
1046
 
1047
+ def _check_summary_exists(messages, last_real_human_idx: int) -> bool:
1048
+ """Check if summary content exists in messages after last real user message."""
1049
+ messages_to_check = (
1050
+ messages[last_real_human_idx + 1 :]
1051
+ if last_real_human_idx >= 0
1052
+ else messages[-15:]
1053
+ )
1054
+ for msg in messages_to_check:
1055
+ msg_content = getattr(msg, "content", "") or ""
1056
+ # Check for summary JSON
1057
+ if '"summary"' in msg_content and '"next_items"' in msg_content:
1058
+ return True
1059
+ # Check for markdown summary (common patterns)
1060
+ if any(
1061
+ kw in msg_content
1062
+ for kw in [
1063
+ "다음 단계 제안",
1064
+ "다음 단계:",
1065
+ "### 다음 단계",
1066
+ "## 다음 단계",
1067
+ "**다음 단계**",
1068
+ "모든 작업이 완료",
1069
+ "**작업 요약**",
1070
+ "### 작업 요약",
1071
+ "## 작업 요약",
1072
+ ]
1073
+ ):
1074
+ return True
1075
+ return False
1076
+
1077
+ def _find_last_real_human_idx(messages) -> int:
1078
+ """Find index of last real HumanMessage (not system-injected)."""
1079
+ last_real_human_idx = -1
1080
+ for i, msg in enumerate(messages):
1081
+ msg_type = getattr(msg, "type", "") or type(msg).__name__
1082
+ if msg_type in ("human", "HumanMessage"):
1083
+ msg_content = getattr(msg, "content", "") or ""
1084
+ if not msg_content.startswith("[SYSTEM]"):
1085
+ last_real_human_idx = i
1086
+ return last_real_human_idx
1087
+
634
1088
  @wrap_model_call
635
- @_with_middleware_logging("inject_continuation_after_non_hitl_tool")
636
- def inject_continuation_after_non_hitl_tool(request, handler):
1089
+ @_with_middleware_logging("continuation_control")
1090
+ def continuation_control(request, handler):
637
1091
  messages = request.messages
638
- if not messages:
639
- return handler(request)
640
-
641
- last_msg = messages[-1]
642
- if getattr(last_msg, "type", "") == "tool":
643
- tool_name = getattr(last_msg, "name", "") or ""
644
-
645
- # Try to extract tool name from content
646
- if not tool_name:
647
- try:
648
- content_json = json.loads(last_msg.content)
649
- tool_name = content_json.get("tool", "")
650
- except (json.JSONDecodeError, TypeError, AttributeError):
651
- pass
652
-
653
- if tool_name in NON_HITL_TOOLS:
654
- todos = request.state.get("todos", [])
655
- pending_todos = [
656
- t for t in todos if t.get("status") in ("pending", "in_progress")
657
- ]
658
1092
 
659
- # If all todos are completed, don't inject continuation - let router handle termination
660
- if not pending_todos and todos:
1093
+ # ===== BEFORE HANDLER: Inject continuation prompt =====
1094
+ if messages:
1095
+ last_msg = messages[-1]
1096
+ if getattr(last_msg, "type", "") == "tool":
1097
+ tool_name = getattr(last_msg, "name", "") or ""
1098
+
1099
+ # Try to extract tool name from content
1100
+ if not tool_name:
1101
+ try:
1102
+ content_json = json.loads(last_msg.content)
1103
+ tool_name = content_json.get("tool", "")
1104
+ except (json.JSONDecodeError, TypeError, AttributeError):
1105
+ pass
1106
+
1107
+ if tool_name in NON_HITL_TOOLS:
1108
+ todos = request.state.get("todos", [])
1109
+
1110
+ last_real_human_idx = _find_last_real_human_idx(messages)
1111
+ summary_exists = _check_summary_exists(
1112
+ messages, last_real_human_idx
1113
+ )
1114
+
1115
+ # STOP if summary exists (regardless of todo status)
1116
+ if summary_exists:
1117
+ logger.info(
1118
+ "Summary exists after tool: %s - stopping agent (user must request next steps)",
1119
+ tool_name,
1120
+ )
1121
+ return AIMessage(content="", tool_calls=[])
1122
+
1123
+ pending_todos = [
1124
+ t
1125
+ for t in todos
1126
+ if t.get("status") in ("pending", "in_progress")
1127
+ ]
1128
+
1129
+ # If all todos completed but no summary yet, allow LLM call for summary
1130
+ if not pending_todos and todos:
1131
+ logger.info(
1132
+ "All %d todos completed, no summary yet after tool: %s - allowing LLM for summary",
1133
+ len(todos),
1134
+ tool_name,
1135
+ )
1136
+
661
1137
  logger.info(
662
- "All todos completed, skipping continuation for tool: %s",
1138
+ "Injecting continuation prompt after non-HITL tool: %s",
663
1139
  tool_name,
664
1140
  )
665
- return handler(request)
666
1141
 
1142
+ # Skip continuation injection for write_todos
1143
+ # This prevents auto-continuation to next task after completing one
1144
+ # Agent will decide next action based on its own reasoning
1145
+ if tool_name == "write_todos":
1146
+ logger.info(
1147
+ "Skipping continuation prompt after write_todos - "
1148
+ "agent decides next action (pending: %d)",
1149
+ len(pending_todos) if pending_todos else 0,
1150
+ )
1151
+ # Don't inject continuation - let agent naturally continue or stop
1152
+ elif pending_todos:
1153
+ pending_list = ", ".join(
1154
+ t.get("content", "")[:30] for t in pending_todos[:3]
1155
+ )
1156
+ continuation = (
1157
+ f"Tool '{tool_name}' completed. "
1158
+ f"Continue with pending tasks: {pending_list}. "
1159
+ f"Call jupyter_cell_tool or the next appropriate tool."
1160
+ )
1161
+ new_messages = list(messages) + [
1162
+ HumanMessage(content=f"[SYSTEM] {continuation}")
1163
+ ]
1164
+ request = request.override(messages=new_messages)
1165
+ else:
1166
+ continuation = (
1167
+ f"Tool '{tool_name}' completed. "
1168
+ f"Create a todo list with write_todos if needed."
1169
+ )
1170
+ new_messages = list(messages) + [
1171
+ HumanMessage(content=f"[SYSTEM] {continuation}")
1172
+ ]
1173
+ request = request.override(messages=new_messages)
1174
+
1175
+ # ===== CALL HANDLER =====
1176
+ response = handler(request)
1177
+
1178
+ # ===== AFTER HANDLER: Strip write_todos if summary JSON present =====
1179
+ response_message = _extract_ai_message(response)
1180
+ if not response_message:
1181
+ return response
1182
+
1183
+ # Get content - handle both string and list formats
1184
+ content = getattr(response_message, "content", "") or ""
1185
+ if isinstance(content, list):
1186
+ content = " ".join(
1187
+ str(p) if isinstance(p, str) else p.get("text", "")
1188
+ for p in content
1189
+ if isinstance(p, (str, dict))
1190
+ )
1191
+
1192
+ # Check if content contains summary JSON pattern
1193
+ has_summary_json = '"summary"' in content and '"next_items"' in content
1194
+
1195
+ if has_summary_json:
1196
+ tool_calls = getattr(response_message, "tool_calls", []) or []
1197
+ write_todos_calls = [
1198
+ tc for tc in tool_calls if tc.get("name") == "write_todos"
1199
+ ]
1200
+
1201
+ if write_todos_calls:
667
1202
  logger.info(
668
- "Injecting continuation prompt after non-HITL tool: %s",
669
- tool_name,
1203
+ "Summary JSON 감지 - write_todos 호출 제거 (자동 계속 방지). "
1204
+ "제거된 write_todos 호출 수: %d",
1205
+ len(write_todos_calls),
670
1206
  )
671
1207
 
672
- if pending_todos:
673
- pending_list = ", ".join(
674
- t.get("content", "")[:30] for t in pending_todos[:3]
675
- )
676
- continuation = (
677
- f"Tool '{tool_name}' completed. "
678
- f"Continue with pending tasks: {pending_list}. "
679
- f"Call jupyter_cell_tool or the next appropriate tool."
680
- )
681
- else:
682
- # No todos yet - let agent create them
683
- continuation = (
684
- f"Tool '{tool_name}' completed. "
685
- f"Create a todo list with write_todos if needed."
686
- )
687
-
688
- new_messages = list(messages) + [
689
- HumanMessage(content=f"[SYSTEM] {continuation}")
1208
+ filtered_tool_calls = [
1209
+ tc for tc in tool_calls if tc.get("name") != "write_todos"
690
1210
  ]
691
- request = request.override(messages=new_messages)
692
1211
 
693
- return handler(request)
1212
+ new_message = AIMessage(
1213
+ content=response_message.content,
1214
+ tool_calls=filtered_tool_calls,
1215
+ additional_kwargs=getattr(
1216
+ response_message, "additional_kwargs", {}
1217
+ ),
1218
+ response_metadata=getattr(
1219
+ response_message, "response_metadata", {}
1220
+ ),
1221
+ )
1222
+
1223
+ response = _replace_ai_message_in_response(response, new_message)
1224
+
1225
+ return response
1226
+
1227
+ return continuation_control
1228
+
1229
+
1230
+ # Backward compatibility aliases
1231
+ def create_inject_continuation_middleware(wrap_model_call):
1232
+ """Deprecated: Use create_continuation_control_middleware instead."""
1233
+ return create_continuation_control_middleware(wrap_model_call)
1234
+
694
1235
 
695
- return inject_continuation_after_non_hitl_tool
1236
+ def create_prevent_auto_continuation_middleware(wrap_model_call):
1237
+ """Deprecated: Use create_continuation_control_middleware instead."""
1238
+ return create_continuation_control_middleware(wrap_model_call)
696
1239
 
697
1240
 
698
1241
  def create_patch_tool_calls_middleware(AgentMiddleware, ToolMessage, Overwrite):