hdsp-jupyter-extension 2.0.6__py3-none-any.whl → 2.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. agent_server/core/embedding_service.py +67 -46
  2. agent_server/core/rag_manager.py +31 -17
  3. agent_server/core/reflection_engine.py +0 -1
  4. agent_server/core/retriever.py +13 -8
  5. agent_server/core/vllm_embedding_service.py +243 -0
  6. agent_server/knowledge/watchdog_service.py +1 -1
  7. agent_server/langchain/ARCHITECTURE.md +1193 -0
  8. agent_server/langchain/agent.py +82 -588
  9. agent_server/langchain/custom_middleware.py +663 -0
  10. agent_server/langchain/executors/__init__.py +2 -7
  11. agent_server/langchain/executors/notebook_searcher.py +46 -38
  12. agent_server/langchain/hitl_config.py +71 -0
  13. agent_server/langchain/llm_factory.py +166 -0
  14. agent_server/langchain/logging_utils.py +223 -0
  15. agent_server/langchain/prompts.py +150 -0
  16. agent_server/langchain/state.py +16 -6
  17. agent_server/langchain/tools/__init__.py +19 -0
  18. agent_server/langchain/tools/file_tools.py +354 -114
  19. agent_server/langchain/tools/file_utils.py +334 -0
  20. agent_server/langchain/tools/jupyter_tools.py +18 -18
  21. agent_server/langchain/tools/lsp_tools.py +264 -0
  22. agent_server/langchain/tools/resource_tools.py +161 -0
  23. agent_server/langchain/tools/search_tools.py +198 -216
  24. agent_server/langchain/tools/shell_tools.py +54 -0
  25. agent_server/main.py +11 -1
  26. agent_server/routers/health.py +1 -1
  27. agent_server/routers/langchain_agent.py +1040 -289
  28. agent_server/routers/rag.py +8 -3
  29. hdsp_agent_core/models/rag.py +15 -1
  30. hdsp_agent_core/prompts/auto_agent_prompts.py +3 -3
  31. hdsp_agent_core/services/rag_service.py +6 -1
  32. {hdsp_jupyter_extension-2.0.6.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
  33. {hdsp_jupyter_extension-2.0.6.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +3 -2
  34. hdsp_jupyter_extension-2.0.6.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.02d346171474a0fb2dc1.js → hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.8740a527757068814573.js +470 -7
  35. hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.8740a527757068814573.js.map +1 -0
  36. hdsp_jupyter_extension-2.0.6.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.a223ea20056954479ae9.js → hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.e4ff4b5779b5e049f84c.js +3196 -441
  37. hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.e4ff4b5779b5e049f84c.js.map +1 -0
  38. hdsp_jupyter_extension-2.0.6.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.addf2fa038fa60304aa2.js → hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.020cdb0b864cfaa4e41e.js +9 -7
  39. hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.020cdb0b864cfaa4e41e.js.map +1 -0
  40. {hdsp_jupyter_extension-2.0.6.dist-info → hdsp_jupyter_extension-2.0.8.dist-info}/METADATA +2 -1
  41. {hdsp_jupyter_extension-2.0.6.dist-info → hdsp_jupyter_extension-2.0.8.dist-info}/RECORD +75 -69
  42. jupyter_ext/__init__.py +18 -0
  43. jupyter_ext/_version.py +1 -1
  44. jupyter_ext/handlers.py +1351 -58
  45. jupyter_ext/labextension/build_log.json +1 -1
  46. jupyter_ext/labextension/package.json +3 -2
  47. jupyter_ext/labextension/static/{frontend_styles_index_js.02d346171474a0fb2dc1.js → frontend_styles_index_js.8740a527757068814573.js} +470 -7
  48. jupyter_ext/labextension/static/frontend_styles_index_js.8740a527757068814573.js.map +1 -0
  49. jupyter_ext/labextension/static/{lib_index_js.a223ea20056954479ae9.js → lib_index_js.e4ff4b5779b5e049f84c.js} +3196 -441
  50. jupyter_ext/labextension/static/lib_index_js.e4ff4b5779b5e049f84c.js.map +1 -0
  51. jupyter_ext/labextension/static/{remoteEntry.addf2fa038fa60304aa2.js → remoteEntry.020cdb0b864cfaa4e41e.js} +9 -7
  52. jupyter_ext/labextension/static/remoteEntry.020cdb0b864cfaa4e41e.js.map +1 -0
  53. jupyter_ext/resource_usage.py +180 -0
  54. jupyter_ext/tests/test_handlers.py +58 -0
  55. agent_server/langchain/executors/jupyter_executor.py +0 -429
  56. agent_server/langchain/middleware/__init__.py +0 -36
  57. agent_server/langchain/middleware/code_search_middleware.py +0 -278
  58. agent_server/langchain/middleware/error_handling_middleware.py +0 -338
  59. agent_server/langchain/middleware/jupyter_execution_middleware.py +0 -301
  60. agent_server/langchain/middleware/rag_middleware.py +0 -227
  61. agent_server/langchain/middleware/validation_middleware.py +0 -240
  62. hdsp_jupyter_extension-2.0.6.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.02d346171474a0fb2dc1.js.map +0 -1
  63. hdsp_jupyter_extension-2.0.6.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.a223ea20056954479ae9.js.map +0 -1
  64. hdsp_jupyter_extension-2.0.6.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.addf2fa038fa60304aa2.js.map +0 -1
  65. jupyter_ext/labextension/static/frontend_styles_index_js.02d346171474a0fb2dc1.js.map +0 -1
  66. jupyter_ext/labextension/static/lib_index_js.a223ea20056954479ae9.js.map +0 -1
  67. jupyter_ext/labextension/static/remoteEntry.addf2fa038fa60304aa2.js.map +0 -1
  68. {hdsp_jupyter_extension-2.0.6.data → hdsp_jupyter_extension-2.0.8.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
  69. {hdsp_jupyter_extension-2.0.6.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
  70. {hdsp_jupyter_extension-2.0.6.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
  71. {hdsp_jupyter_extension-2.0.6.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
  72. {hdsp_jupyter_extension-2.0.6.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
  73. {hdsp_jupyter_extension-2.0.6.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
  74. {hdsp_jupyter_extension-2.0.6.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
  75. {hdsp_jupyter_extension-2.0.6.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
  76. {hdsp_jupyter_extension-2.0.6.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
  77. {hdsp_jupyter_extension-2.0.6.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +0 -0
  78. {hdsp_jupyter_extension-2.0.6.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +0 -0
  79. {hdsp_jupyter_extension-2.0.6.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +0 -0
  80. {hdsp_jupyter_extension-2.0.6.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +0 -0
  81. {hdsp_jupyter_extension-2.0.6.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
  82. {hdsp_jupyter_extension-2.0.6.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
  83. {hdsp_jupyter_extension-2.0.6.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
  84. {hdsp_jupyter_extension-2.0.6.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
  85. {hdsp_jupyter_extension-2.0.6.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +0 -0
  86. {hdsp_jupyter_extension-2.0.6.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +0 -0
  87. {hdsp_jupyter_extension-2.0.6.dist-info → hdsp_jupyter_extension-2.0.8.dist-info}/WHEEL +0 -0
  88. {hdsp_jupyter_extension-2.0.6.dist-info → hdsp_jupyter_extension-2.0.8.dist-info}/licenses/LICENSE +0 -0
@@ -7,12 +7,32 @@ Main agent creation module for tool-driven chat execution.
7
7
  import logging
8
8
  from typing import Any, Dict, Optional
9
9
 
10
+ from agent_server.langchain.custom_middleware import (
11
+ create_handle_empty_response_middleware,
12
+ create_inject_continuation_middleware,
13
+ create_limit_tool_calls_middleware,
14
+ create_normalize_tool_args_middleware,
15
+ create_patch_tool_calls_middleware,
16
+ )
17
+ from agent_server.langchain.hitl_config import get_hitl_interrupt_config
18
+ from agent_server.langchain.llm_factory import create_llm, create_summarization_llm
19
+ from agent_server.langchain.prompts import (
20
+ DEFAULT_SYSTEM_PROMPT,
21
+ TODO_LIST_SYSTEM_PROMPT,
22
+ TODO_LIST_TOOL_DESCRIPTION,
23
+ )
10
24
  from agent_server.langchain.tools import (
25
+ check_resource_tool,
26
+ diagnostics_tool,
27
+ edit_file_tool,
28
+ execute_command_tool,
11
29
  final_answer_tool,
12
30
  jupyter_cell_tool,
13
31
  list_files_tool,
14
32
  markdown_tool,
33
+ multiedit_file_tool,
15
34
  read_file_tool,
35
+ references_tool,
16
36
  search_notebook_cells_tool,
17
37
  search_workspace_tool,
18
38
  write_file_tool,
@@ -20,121 +40,24 @@ from agent_server.langchain.tools import (
20
40
 
21
41
  logger = logging.getLogger(__name__)
22
42
 
23
- DEFAULT_SYSTEM_PROMPT = """You are an expert Python data scientist and Jupyter notebook assistant.
24
- Your role is to help users with data analysis, visualization, and Python coding tasks in Jupyter notebooks.
25
-
26
- ## ⚠️ CRITICAL RULE: NEVER produce an empty response
27
-
28
- You MUST ALWAYS call a tool in every response. After any tool result, you MUST:
29
- 1. Check your todo list - are there pending or in_progress items?
30
- 2. If YES → call the next appropriate tool (jupyter_cell_tool, markdown_tool, etc.)
31
- 3. If ALL todos are completed → call final_answer_tool with a summary
32
-
33
- NEVER end your turn without calling a tool. NEVER produce an empty response.
34
-
35
- ## Available Tools
36
- 1. **jupyter_cell_tool**: Execute Python code in a new notebook cell
37
- 2. **markdown_tool**: Add a markdown explanation cell
38
- 3. **final_answer_tool**: Complete the task with a summary - REQUIRED when done
39
- 4. **read_file_tool**: Read file contents
40
- 5. **write_file_tool**: Write file contents
41
- 6. **list_files_tool**: List directory contents
42
- 7. **search_workspace_tool**: Search for patterns in workspace files
43
- 8. **search_notebook_cells_tool**: Search for patterns in notebook cells
44
- 9. **write_todos**: Create and update task list for complex multi-step tasks
45
-
46
- ## Mandatory Workflow
47
- 1. After EVERY tool result, immediately call the next tool
48
- 2. Continue until ALL todos show status: "completed"
49
- 3. ONLY THEN call final_answer_tool to summarize
50
- 4. If `!pip install` fails, use `!pip3 install` instead
51
- 5. For plots and charts, use English text only
52
-
53
- ## ❌ FORBIDDEN (will break the workflow)
54
- - Producing an empty response (no tool call, no content)
55
- - Stopping after any tool without calling the next tool
56
- - Ending without calling final_answer_tool
57
- - Leaving todos in "in_progress" or "pending" state without continuing
58
- """
59
-
60
-
61
- def _create_llm(llm_config: Dict[str, Any]):
62
- """Create LangChain LLM from config"""
63
- provider = llm_config.get("provider", "gemini")
64
-
65
- if provider == "gemini":
66
- from langchain_google_genai import ChatGoogleGenerativeAI
67
-
68
- gemini_config = llm_config.get("gemini", {})
69
- api_key = gemini_config.get("apiKey")
70
- model = gemini_config.get("model", "gemini-2.5-pro")
71
-
72
- if not api_key:
73
- raise ValueError("Gemini API key not configured")
74
-
75
- logger.info(f"Creating Gemini LLM with model: {model}")
76
-
77
- # Gemini 2.5 Flash has issues with tool calling in LangChain
78
- # Use convert_system_message_to_human for better compatibility
79
- llm = ChatGoogleGenerativeAI(
80
- model=model,
81
- google_api_key=api_key,
82
- temperature=0.0,
83
- max_output_tokens=8192,
84
- convert_system_message_to_human=True, # Better tool calling support
85
- )
86
- return llm
87
-
88
- elif provider == "openai":
89
- from langchain_openai import ChatOpenAI
90
-
91
- openai_config = llm_config.get("openai", {})
92
- api_key = openai_config.get("apiKey")
93
- model = openai_config.get("model", "gpt-4")
94
-
95
- if not api_key:
96
- raise ValueError("OpenAI API key not configured")
97
-
98
- llm = ChatOpenAI(
99
- model=model,
100
- api_key=api_key,
101
- temperature=0.0,
102
- max_tokens=4096,
103
- )
104
- return llm
105
-
106
- elif provider == "vllm":
107
- from langchain_openai import ChatOpenAI
108
-
109
- vllm_config = llm_config.get("vllm", {})
110
- endpoint = vllm_config.get("endpoint", "http://localhost:8000")
111
- model = vllm_config.get("model", "default")
112
- api_key = vllm_config.get("apiKey", "dummy")
113
-
114
- llm = ChatOpenAI(
115
- model=model,
116
- api_key=api_key,
117
- base_url=f"{endpoint}/v1",
118
- temperature=0.0,
119
- max_tokens=4096,
120
- )
121
- return llm
122
-
123
- else:
124
- raise ValueError(f"Unsupported LLM provider: {provider}")
125
-
126
43
 
127
44
  def _get_all_tools():
128
- """Get all available tools for the agent"""
45
+ """Get all available tools for the agent."""
129
46
  return [
130
47
  jupyter_cell_tool,
131
48
  markdown_tool,
132
49
  final_answer_tool,
133
50
  read_file_tool,
134
51
  write_file_tool,
52
+ edit_file_tool,
53
+ multiedit_file_tool,
135
54
  list_files_tool,
136
55
  search_workspace_tool,
137
56
  search_notebook_cells_tool,
57
+ execute_command_tool,
58
+ check_resource_tool,
59
+ diagnostics_tool,
60
+ references_tool,
138
61
  ]
139
62
 
140
63
 
@@ -157,6 +80,8 @@ def create_simple_chat_agent(
157
80
  workspace_root: Root directory
158
81
  enable_hitl: Enable Human-in-the-Loop for code execution
159
82
  enable_todo_list: Enable TodoListMiddleware for task planning
83
+ checkpointer: Optional checkpointer for state persistence
84
+ system_prompt_override: Optional custom system prompt
160
85
 
161
86
  Returns:
162
87
  Configured agent with HITL and TodoList middleware
@@ -167,13 +92,12 @@ def create_simple_chat_agent(
167
92
  AgentMiddleware,
168
93
  HumanInTheLoopMiddleware,
169
94
  ModelCallLimitMiddleware,
170
- ModelRequest,
171
- ModelResponse,
95
+ SummarizationMiddleware,
172
96
  TodoListMiddleware,
173
97
  ToolCallLimitMiddleware,
174
98
  wrap_model_call,
175
99
  )
176
- from langchain_core.messages import AIMessage, ToolMessage as LCToolMessage
100
+ from langchain_core.messages import ToolMessage as LCToolMessage
177
101
  from langgraph.checkpoint.memory import InMemorySaver
178
102
  from langgraph.types import Overwrite
179
103
  except ImportError as e:
@@ -184,7 +108,7 @@ def create_simple_chat_agent(
184
108
  ) from e
185
109
 
186
110
  # Create LLM
187
- llm = _create_llm(llm_config)
111
+ llm = create_llm(llm_config)
188
112
 
189
113
  # Get tools
190
114
  tools = _get_all_tools()
@@ -192,500 +116,40 @@ def create_simple_chat_agent(
192
116
  # Configure middleware
193
117
  middleware = []
194
118
 
195
- # JSON Schema for fallback tool calling
196
- JSON_TOOL_SCHEMA = """You MUST respond with ONLY valid JSON matching this schema:
197
- {
198
- "tool": "<tool_name>",
199
- "arguments": {"arg1": "value1", ...}
200
- }
201
-
202
- Available tools:
203
- - jupyter_cell_tool: Execute Python code. Arguments: {"code": "<python_code>"}
204
- - markdown_tool: Add markdown cell. Arguments: {"content": "<markdown>"}
205
- - final_answer_tool: Complete task. Arguments: {"answer": "<summary>"}
206
- - write_todos: Update task list. Arguments: {"todos": [{"content": "...", "status": "pending|in_progress|completed"}]}
207
- - read_file_tool: Read file. Arguments: {"path": "<file_path>"}
208
- - list_files_tool: List directory. Arguments: {"path": "."}
209
-
210
- Output ONLY the JSON object, no markdown, no explanation."""
211
-
212
- def _parse_json_tool_call(text: str) -> Optional[Dict[str, Any]]:
213
- """Parse JSON tool call from text response."""
214
- import json
215
- import re
216
-
217
- if not text:
218
- return None
219
-
220
- # Clean up response
221
- text = text.strip()
222
- if text.startswith("```json"):
223
- text = text[7:]
224
- elif text.startswith("```"):
225
- text = text[3:]
226
- if text.endswith("```"):
227
- text = text[:-3]
228
- text = text.strip()
229
-
230
- # Try direct JSON parse
231
- try:
232
- data = json.loads(text)
233
- if "tool" in data:
234
- return data
235
- except json.JSONDecodeError:
236
- pass
237
-
238
- # Try to find JSON object in response
239
- json_match = re.search(r'\{[\s\S]*\}', text)
240
- if json_match:
241
- try:
242
- data = json.loads(json_match.group())
243
- if "tool" in data:
244
- return data
245
- except json.JSONDecodeError:
246
- pass
247
-
248
- return None
249
-
250
- def _create_tool_call_message(tool_name: str, arguments: Dict[str, Any]) -> AIMessage:
251
- """Create AIMessage with tool_calls from parsed JSON."""
252
- import uuid
253
-
254
- # Normalize tool name
255
- if not tool_name.endswith("_tool"):
256
- tool_name = f"{tool_name}_tool"
257
-
258
- return AIMessage(
259
- content="",
260
- tool_calls=[
261
- {
262
- "name": tool_name,
263
- "args": arguments,
264
- "id": str(uuid.uuid4()),
265
- "type": "tool_call",
266
- }
267
- ],
268
- )
269
-
270
- # Middleware to detect and handle empty LLM responses with JSON fallback
271
- @wrap_model_call
272
- def handle_empty_response(
273
- request: ModelRequest,
274
- handler,
275
- ) -> ModelResponse:
276
- """
277
- Detect empty/invalid AIMessage responses and retry with JSON schema fallback.
278
-
279
- For models that don't support native tool calling well (e.g., Gemini 2.5 Flash),
280
- this middleware:
281
- 1. Detects empty or text-only responses (no tool_calls)
282
- 2. Retries with JSON schema prompt to force structured output
283
- 3. Parses JSON response and injects tool_calls into AIMessage
284
- 4. Falls back to synthetic final_answer if all else fails
285
- """
286
- import json
287
- import uuid
288
- from langchain_core.messages import HumanMessage
289
-
290
- max_retries = 2 # Allow more retries for JSON fallback
291
-
292
- for attempt in range(max_retries + 1):
293
- response = handler(request)
294
-
295
- # Extract AIMessage from response
296
- response_message = None
297
- if hasattr(response, 'result'):
298
- result = response.result
299
- if isinstance(result, list):
300
- for msg in reversed(result):
301
- if isinstance(msg, AIMessage):
302
- response_message = msg
303
- break
304
- elif isinstance(result, AIMessage):
305
- response_message = result
306
- elif hasattr(response, 'message'):
307
- response_message = response.message
308
- elif hasattr(response, 'messages') and response.messages:
309
- response_message = response.messages[-1]
310
- elif isinstance(response, AIMessage):
311
- response_message = response
312
-
313
- has_content = bool(getattr(response_message, 'content', None)) if response_message else False
314
- has_tool_calls = bool(getattr(response_message, 'tool_calls', None)) if response_message else False
315
-
316
- logger.info(
317
- "handle_empty_response: attempt=%d, type=%s, content=%s, tool_calls=%s",
318
- attempt + 1,
319
- type(response_message).__name__ if response_message else None,
320
- has_content,
321
- has_tool_calls,
322
- )
323
-
324
- # Valid response with tool_calls
325
- if has_tool_calls:
326
- return response
327
-
328
- # Try to parse JSON from content (model might have output JSON without tool_calls)
329
- if has_content and response_message:
330
- parsed = _parse_json_tool_call(response_message.content)
331
- if parsed:
332
- tool_name = parsed.get("tool", "")
333
- arguments = parsed.get("arguments", {})
334
- logger.info(
335
- "Parsed JSON tool call from content: tool=%s",
336
- tool_name,
337
- )
338
-
339
- # Create new AIMessage with tool_calls
340
- new_message = _create_tool_call_message(tool_name, arguments)
341
-
342
- # Replace in response
343
- if hasattr(response, 'result'):
344
- if isinstance(response.result, list):
345
- new_result = [
346
- new_message if isinstance(m, AIMessage) else m
347
- for m in response.result
348
- ]
349
- response.result = new_result
350
- else:
351
- response.result = new_message
352
- return response
353
-
354
- # Invalid response - retry with JSON schema prompt
355
- if response_message and attempt < max_retries:
356
- reason = "text-only" if has_content else "empty"
357
- logger.warning(
358
- "Invalid AIMessage (%s) detected (attempt %d/%d). "
359
- "Retrying with JSON schema prompt...",
360
- reason,
361
- attempt + 1,
362
- max_retries + 1,
363
- )
364
-
365
- # Get context for prompt
366
- todos = request.state.get("todos", [])
367
- pending_todos = [
368
- t for t in todos
369
- if t.get("status") in ("pending", "in_progress")
370
- ]
371
-
372
- # Build JSON-forcing prompt
373
- if has_content:
374
- # LLM wrote text - ask to wrap in final_answer
375
- content_preview = response_message.content[:300]
376
- json_prompt = (
377
- f"{JSON_TOOL_SCHEMA}\n\n"
378
- f"Your previous response was text, not JSON. "
379
- f"Wrap your answer in final_answer_tool:\n"
380
- f'{{"tool": "final_answer_tool", "arguments": {{"answer": "{content_preview}..."}}}}'
381
- )
382
- elif pending_todos:
383
- todo_list = ", ".join(t.get("content", "")[:20] for t in pending_todos[:3])
384
- example_json = '{"tool": "jupyter_cell_tool", "arguments": {"code": "import pandas as pd\\ndf = pd.read_csv(\'titanic.csv\')\\nprint(df.head())"}}'
385
- json_prompt = (
386
- f"{JSON_TOOL_SCHEMA}\n\n"
387
- f"Pending tasks: {todo_list}\n"
388
- f"Call jupyter_cell_tool with Python code to complete the next task.\n"
389
- f"Example: {example_json}"
390
- )
391
- else:
392
- json_prompt = (
393
- f"{JSON_TOOL_SCHEMA}\n\n"
394
- f"All tasks completed. Call final_answer_tool:\n"
395
- f'{{"tool": "final_answer_tool", "arguments": {{"answer": "작업이 완료되었습니다."}}}}'
396
- )
397
-
398
- # Add JSON prompt and retry
399
- request = request.override(
400
- messages=request.messages + [
401
- HumanMessage(content=json_prompt)
402
- ]
403
- )
404
- continue
405
-
406
- # Max retries exhausted - synthesize final_answer
407
- if response_message:
408
- logger.warning(
409
- "Max retries exhausted. Synthesizing final_answer response."
410
- )
411
-
412
- # Use LLM's text content if available
413
- if has_content and response_message.content:
414
- summary = response_message.content
415
- logger.info(
416
- "Using LLM's text content as final answer (length=%d)",
417
- len(summary),
418
- )
419
- else:
420
- todos = request.state.get("todos", [])
421
- completed_todos = [
422
- t.get("content", "") for t in todos
423
- if t.get("status") == "completed"
424
- ]
425
- summary = (
426
- f"작업이 완료되었습니다. 완료된 항목: {', '.join(completed_todos[:5])}"
427
- if completed_todos
428
- else "작업이 완료되었습니다."
429
- )
430
-
431
- # Create synthetic final_answer
432
- synthetic_message = AIMessage(
433
- content="",
434
- tool_calls=[
435
- {
436
- "name": "final_answer_tool",
437
- "args": {"answer": summary},
438
- "id": str(uuid.uuid4()),
439
- "type": "tool_call",
440
- }
441
- ],
442
- )
443
-
444
- # Replace in response
445
- if hasattr(response, 'result'):
446
- if isinstance(response.result, list):
447
- new_result = []
448
- replaced = False
449
- for msg in response.result:
450
- if isinstance(msg, AIMessage) and not replaced:
451
- new_result.append(synthetic_message)
452
- replaced = True
453
- else:
454
- new_result.append(msg)
455
- if not replaced:
456
- new_result.append(synthetic_message)
457
- response.result = new_result
458
- else:
459
- response.result = synthetic_message
460
-
461
- return response
462
-
463
- # Return response (either valid or after max retries)
464
- return response
465
-
466
- return response
467
-
119
+ # Add empty response handler middleware
120
+ handle_empty_response = create_handle_empty_response_middleware(wrap_model_call)
468
121
  middleware.append(handle_empty_response)
469
122
 
470
- # Middleware to limit tool calls to one at a time
471
- # This prevents "Can receive only one value per step" errors with TodoListMiddleware
472
- @wrap_model_call
473
- def limit_tool_calls_to_one(
474
- request: ModelRequest,
475
- handler,
476
- ) -> ModelResponse:
477
- """
478
- Limit the model to one tool call at a time.
479
-
480
- Some models (like vLLM GPT) return multiple tool calls in a single response.
481
- This causes conflicts with TodoListMiddleware when processing multiple decisions.
482
- By limiting to one tool call, we ensure the agent processes actions sequentially.
483
- """
484
- response = handler(request)
485
-
486
- # Check if response has multiple tool calls
487
- if hasattr(response, 'result'):
488
- result = response.result
489
- messages = result if isinstance(result, list) else [result]
490
-
491
- for msg in messages:
492
- if isinstance(msg, AIMessage) and hasattr(msg, 'tool_calls'):
493
- tool_calls = msg.tool_calls
494
- if tool_calls and len(tool_calls) > 1:
495
- logger.info(
496
- "Limiting tool calls from %d to 1 (keeping first: %s)",
497
- len(tool_calls),
498
- tool_calls[0].get("name", "unknown") if tool_calls else "none"
499
- )
500
- # Keep only the first tool call
501
- msg.tool_calls = [tool_calls[0]]
502
-
503
- return response
504
-
505
- middleware.append(limit_tool_calls_to_one)
123
+ # Add tool call limiter middleware
124
+ limit_tool_calls = create_limit_tool_calls_middleware(wrap_model_call)
125
+ middleware.append(limit_tool_calls)
506
126
 
507
- # Non-HITL tools that execute immediately without user approval
508
- NON_HITL_TOOLS = {
509
- "markdown_tool", "markdown",
510
- "read_file_tool", "read_file",
511
- "list_files_tool", "list_files",
512
- "search_workspace_tool", "search_workspace",
513
- "search_notebook_cells_tool", "search_notebook_cells",
514
- "write_todos",
515
- }
127
+ # Add tool args normalization middleware (convert list args to strings based on schema)
128
+ normalize_tool_args = create_normalize_tool_args_middleware(wrap_model_call, tools=tools)
129
+ middleware.append(normalize_tool_args)
516
130
 
517
- # Middleware to inject continuation prompt after non-HITL tool execution
518
- @wrap_model_call
519
- def inject_continuation_after_non_hitl_tool(
520
- request: ModelRequest,
521
- handler,
522
- ) -> ModelResponse:
523
- """
524
- Inject a continuation prompt when the last message is from a non-HITL tool.
131
+ # Add continuation prompt middleware
132
+ inject_continuation = create_inject_continuation_middleware(wrap_model_call)
133
+ middleware.append(inject_continuation)
525
134
 
526
- Non-HITL tools execute immediately without user approval, which can cause
527
- Gemini to produce empty responses. This middleware injects a system message
528
- to remind the LLM to continue with the next action.
529
- """
530
- messages = request.messages
531
- if not messages:
532
- return handler(request)
533
-
534
- # Check if the last message is a ToolMessage from a non-HITL tool
535
- last_msg = messages[-1]
536
- if getattr(last_msg, "type", "") == "tool":
537
- tool_name = getattr(last_msg, "name", "") or ""
538
-
539
- # Also try to extract tool name from content
540
- if not tool_name:
541
- try:
542
- import json
543
- content_json = json.loads(last_msg.content)
544
- tool_name = content_json.get("tool", "")
545
- except (json.JSONDecodeError, TypeError, AttributeError):
546
- pass
547
-
548
- if tool_name in NON_HITL_TOOLS:
549
- logger.info(
550
- "Injecting continuation prompt after non-HITL tool: %s",
551
- tool_name,
552
- )
553
-
554
- # Get todos context
555
- todos = request.state.get("todos", [])
556
- pending_todos = [
557
- t for t in todos
558
- if t.get("status") in ("pending", "in_progress")
559
- ]
560
-
561
- if pending_todos:
562
- pending_list = ", ".join(
563
- t.get("content", "")[:30] for t in pending_todos[:3]
564
- )
565
- continuation = (
566
- f"Tool '{tool_name}' completed. "
567
- f"Continue with pending tasks: {pending_list}. "
568
- f"Call jupyter_cell_tool or the next appropriate tool."
569
- )
570
- else:
571
- continuation = (
572
- f"Tool '{tool_name}' completed. All tasks done. "
573
- f"Call final_answer_tool with a summary NOW."
574
- )
575
-
576
- # Inject as a system-like user message
577
- from langchain_core.messages import HumanMessage
578
- new_messages = list(messages) + [
579
- HumanMessage(content=f"[SYSTEM] {continuation}")
580
- ]
581
- request = request.override(messages=new_messages)
582
-
583
- return handler(request)
584
-
585
- middleware.append(inject_continuation_after_non_hitl_tool)
586
-
587
- class PatchToolCallsMiddleware(AgentMiddleware):
588
- """Patch dangling tool calls so the agent can continue."""
589
-
590
- def before_agent(self, state, runtime):
591
- messages = state.get("messages", [])
592
- if not messages:
593
- return None
594
-
595
- patched = []
596
- for i, msg in enumerate(messages):
597
- patched.append(msg)
598
- if getattr(msg, "type", "") == "ai" and getattr(
599
- msg, "tool_calls", None
600
- ):
601
- for tool_call in msg.tool_calls:
602
- tool_call_id = tool_call.get("id")
603
- if not tool_call_id:
604
- continue
605
- has_tool_msg = any(
606
- (
607
- getattr(m, "type", "") == "tool"
608
- and getattr(m, "tool_call_id", None) == tool_call_id
609
- )
610
- for m in messages[i:]
611
- )
612
- if not has_tool_msg:
613
- tool_msg = (
614
- f"Tool call {tool_call.get('name', 'unknown')} with id {tool_call_id} "
615
- "was cancelled - another message came in before it could be completed."
616
- )
617
- patched.append(
618
- LCToolMessage(
619
- content=tool_msg,
620
- name=tool_call.get("name", "unknown"),
621
- tool_call_id=tool_call_id,
622
- )
623
- )
624
-
625
- if patched == messages:
626
- return None
627
- return {"messages": Overwrite(patched)}
628
-
629
- middleware.append(PatchToolCallsMiddleware())
135
+ # Add patch tool calls middleware
136
+ patch_tool_calls = create_patch_tool_calls_middleware(
137
+ AgentMiddleware, LCToolMessage, Overwrite
138
+ )
139
+ middleware.append(patch_tool_calls)
630
140
 
631
141
  # Add TodoListMiddleware for task planning
632
142
  if enable_todo_list:
633
143
  todo_middleware = TodoListMiddleware(
634
- system_prompt="""
635
- ## CRITICAL WORKFLOW RULES - MUST FOLLOW:
636
- 1. NEVER stop after calling write_todos - ALWAYS make another tool call immediately
637
- 2. write_todos is ONLY for tracking progress - it does NOT complete any work
638
- 3. After EVERY write_todos call, you MUST call another tool (jupyter_cell_tool, markdown_tool, or final_answer_tool)
639
-
640
- ## Todo List Management:
641
- - Before complex tasks, use write_todos to create a task list
642
- - Update todos as you complete each step (mark 'in_progress' → 'completed')
643
- - Each todo item should be specific and descriptive (10-50 characters)
644
- - All todo items must be written in Korean
645
- - ALWAYS include "다음 단계 제시" as the LAST item
646
-
647
- ## Task Completion Flow:
648
- 1. When current task is done → mark it 'completed' with write_todos
649
- 2. IMMEDIATELY call the next tool (jupyter_cell_tool for code, markdown_tool for text)
650
- 3. For "다음 단계 제시" → mark completed, then call final_answer_tool with suggestions
651
- 4. NEVER end your turn after write_todos - you MUST continue with actual work
652
-
653
- ## FORBIDDEN PATTERNS:
654
- ❌ Calling write_todos and then stopping
655
- ❌ Updating todo status without doing the actual work
656
- ❌ Ending turn without calling final_answer_tool when all tasks are done
657
- """,
658
- tool_description="""Update the task list for tracking progress.
659
- ⚠️ CRITICAL: This tool is ONLY for tracking - it does NOT do any actual work.
660
- After calling this tool, you MUST IMMEDIATELY call another tool (jupyter_cell_tool, markdown_tool, or final_answer_tool).
661
- NEVER end your response after calling write_todos - always continue with the next action tool.""",
144
+ system_prompt=TODO_LIST_SYSTEM_PROMPT,
145
+ tool_description=TODO_LIST_TOOL_DESCRIPTION,
662
146
  )
663
147
  middleware.append(todo_middleware)
664
148
 
665
149
  if enable_hitl:
666
150
  # Add Human-in-the-Loop middleware for code execution
667
151
  hitl_middleware = HumanInTheLoopMiddleware(
668
- interrupt_on={
669
- # Require approval before executing code
670
- "jupyter_cell_tool": {
671
- "allowed_decisions": ["approve", "edit", "reject"],
672
- "description": "🔍 Code execution requires approval",
673
- },
674
- # Safe operations - no approval needed
675
- "markdown_tool": False,
676
- "read_file_tool": False,
677
- "list_files_tool": False,
678
- "search_workspace_tool": False,
679
- "search_notebook_cells_tool": False,
680
- "write_todos": False, # Todo updates don't need approval
681
- # File write requires approval
682
- "write_file_tool": {
683
- "allowed_decisions": ["approve", "edit", "reject"],
684
- "description": "⚠️ File write requires approval",
685
- },
686
- # Final answer doesn't need approval
687
- "final_answer_tool": False,
688
- },
152
+ interrupt_on=get_hitl_interrupt_config(),
689
153
  description_prefix="Tool execution pending approval",
690
154
  )
691
155
  middleware.append(hitl_middleware)
@@ -700,7 +164,7 @@ NEVER end your response after calling write_todos - always continue with the nex
700
164
  logger.info("Added ModelCallLimitMiddleware with run_limit=30")
701
165
 
702
166
  # ToolCallLimitMiddleware: Prevent specific tools from being called too many times
703
- # Limit write_todos to prevent the loop we observed
167
+ # Limit write_todos to prevent loops
704
168
  write_todos_limit = ToolCallLimitMiddleware(
705
169
  tool_name="write_todos",
706
170
  run_limit=5, # Max 5 write_todos calls per user message
@@ -717,6 +181,23 @@ NEVER end your response after calling write_todos - always continue with the nex
717
181
  middleware.append(list_files_limit)
718
182
  logger.info("Added ToolCallLimitMiddleware for write_todos and list_files_tool")
719
183
 
184
+ # Add SummarizationMiddleware to maintain context across cycles
185
+ summary_llm = create_summarization_llm(llm_config)
186
+ if summary_llm:
187
+ try:
188
+ summarization_middleware = SummarizationMiddleware(
189
+ model=summary_llm,
190
+ trigger=("tokens", 8000), # Trigger when exceeding 8000 tokens
191
+ keep=("messages", 10), # Keep last 10 messages intact
192
+ )
193
+ middleware.append(summarization_middleware)
194
+ logger.info(
195
+ "Added SummarizationMiddleware with model=%s, trigger=8000 tokens, keep=10 msgs",
196
+ getattr(summary_llm, "model", str(summary_llm)),
197
+ )
198
+ except Exception as e:
199
+ logger.warning("Failed to add SummarizationMiddleware: %s", e)
200
+
720
201
  # System prompt for the agent (override applies only to LangChain agent)
721
202
  if system_prompt_override and system_prompt_override.strip():
722
203
  system_prompt = system_prompt_override.strip()
@@ -724,6 +205,19 @@ NEVER end your response after calling write_todos - always continue with the nex
724
205
  else:
725
206
  system_prompt = DEFAULT_SYSTEM_PROMPT
726
207
 
208
+ # Add Gemini 2.5 Flash specific prompt to ensure content is included with tool calls
209
+ gemini_model = llm_config.get("gemini", {}).get("model", "")
210
+ if "gemini-2.5-flash" in gemini_model:
211
+ gemini_content_prompt = """
212
+ ## 🔴 IMPORTANT: Always include explanation text
213
+ When calling any tool, you MUST include a brief explanation in your response content.
214
+ NEVER produce an empty content when making tool calls.
215
+ Before each tool call, write Korean explanations of what you're about to do.
216
+ Example: "데이터를 로드하겠습니다." then call jupyter_cell_tool.
217
+ """
218
+ system_prompt = system_prompt + "\n" + gemini_content_prompt
219
+ logger.info("Added Gemini 2.5 Flash specific prompt for content inclusion")
220
+
727
221
  logger.info("SimpleChatAgent system_prompt: %s", system_prompt)
728
222
 
729
223
  # Create agent with checkpointer (required for HITL)