hdsp-jupyter-extension 2.0.8__py3-none-any.whl → 2.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_server/core/rag_manager.py +12 -3
- agent_server/core/retriever.py +2 -1
- agent_server/core/vllm_embedding_service.py +8 -5
- agent_server/langchain/ARCHITECTURE.md +7 -51
- agent_server/langchain/agent.py +31 -20
- agent_server/langchain/custom_middleware.py +148 -31
- agent_server/langchain/hitl_config.py +0 -8
- agent_server/langchain/llm_factory.py +85 -1
- agent_server/langchain/logging_utils.py +7 -7
- agent_server/langchain/prompts.py +45 -36
- agent_server/langchain/tools/__init__.py +1 -10
- agent_server/langchain/tools/file_tools.py +9 -61
- agent_server/langchain/tools/jupyter_tools.py +0 -1
- agent_server/langchain/tools/lsp_tools.py +8 -8
- agent_server/langchain/tools/resource_tools.py +12 -12
- agent_server/langchain/tools/search_tools.py +3 -158
- agent_server/routers/langchain_agent.py +122 -113
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +2 -2
- hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.8740a527757068814573.js → hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js +93 -4
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js.map +1 -0
- hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.e4ff4b5779b5e049f84c.js → hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.dc6434bee96ab03a0539.js +90 -71
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.dc6434bee96ab03a0539.js.map +1 -0
- hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.020cdb0b864cfaa4e41e.js → hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.4a252df3ade74efee8d6.js +6 -6
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.4a252df3ade74efee8d6.js.map +1 -0
- {hdsp_jupyter_extension-2.0.8.dist-info → hdsp_jupyter_extension-2.0.10.dist-info}/METADATA +1 -3
- {hdsp_jupyter_extension-2.0.8.dist-info → hdsp_jupyter_extension-2.0.10.dist-info}/RECORD +57 -57
- jupyter_ext/_version.py +1 -1
- jupyter_ext/labextension/build_log.json +1 -1
- jupyter_ext/labextension/package.json +2 -2
- jupyter_ext/labextension/static/{frontend_styles_index_js.8740a527757068814573.js → frontend_styles_index_js.2d9fb488c82498c45c2d.js} +93 -4
- jupyter_ext/labextension/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js.map +1 -0
- jupyter_ext/labextension/static/{lib_index_js.e4ff4b5779b5e049f84c.js → lib_index_js.dc6434bee96ab03a0539.js} +90 -71
- jupyter_ext/labextension/static/lib_index_js.dc6434bee96ab03a0539.js.map +1 -0
- jupyter_ext/labextension/static/{remoteEntry.020cdb0b864cfaa4e41e.js → remoteEntry.4a252df3ade74efee8d6.js} +6 -6
- jupyter_ext/labextension/static/remoteEntry.4a252df3ade74efee8d6.js.map +1 -0
- hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.8740a527757068814573.js.map +0 -1
- hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.e4ff4b5779b5e049f84c.js.map +0 -1
- hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.020cdb0b864cfaa4e41e.js.map +0 -1
- jupyter_ext/labextension/static/frontend_styles_index_js.8740a527757068814573.js.map +0 -1
- jupyter_ext/labextension/static/lib_index_js.e4ff4b5779b5e049f84c.js.map +0 -1
- jupyter_ext/labextension/static/remoteEntry.020cdb0b864cfaa4e41e.js.map +0 -1
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.10.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +0 -0
- {hdsp_jupyter_extension-2.0.8.dist-info → hdsp_jupyter_extension-2.0.10.dist-info}/WHEEL +0 -0
- {hdsp_jupyter_extension-2.0.8.dist-info → hdsp_jupyter_extension-2.0.10.dist-info}/licenses/LICENSE +0 -0
agent_server/core/rag_manager.py
CHANGED
|
@@ -90,16 +90,25 @@ class RAGManager:
|
|
|
90
90
|
|
|
91
91
|
# 2. Initialize embedding service (local or vLLM backend)
|
|
92
92
|
import os
|
|
93
|
-
|
|
93
|
+
|
|
94
|
+
embedding_backend = os.environ.get(
|
|
95
|
+
"HDSP_EMBEDDING_BACKEND", "local"
|
|
96
|
+
).lower()
|
|
94
97
|
|
|
95
98
|
if embedding_backend == "vllm":
|
|
96
|
-
from agent_server.core.vllm_embedding_service import
|
|
97
|
-
|
|
99
|
+
from agent_server.core.vllm_embedding_service import (
|
|
100
|
+
get_vllm_embedding_service,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
self._embedding_service = get_vllm_embedding_service(
|
|
104
|
+
self._config.embedding
|
|
105
|
+
)
|
|
98
106
|
logger.info(
|
|
99
107
|
f"vLLM Embedding service initialized (dim={self._embedding_service.dimension})"
|
|
100
108
|
)
|
|
101
109
|
else:
|
|
102
110
|
from agent_server.core.embedding_service import get_embedding_service
|
|
111
|
+
|
|
103
112
|
self._embedding_service = get_embedding_service(self._config.embedding)
|
|
104
113
|
# Load model to get dimension
|
|
105
114
|
await self._embedding_service._ensure_model_loaded()
|
agent_server/core/retriever.py
CHANGED
|
@@ -96,7 +96,8 @@ class Retriever:
|
|
|
96
96
|
query=query_embedding,
|
|
97
97
|
query_filter=qdrant_filter,
|
|
98
98
|
limit=effective_top_k,
|
|
99
|
-
score_threshold=effective_threshold
|
|
99
|
+
score_threshold=effective_threshold
|
|
100
|
+
* 0.5, # Lower for initial retrieval
|
|
100
101
|
with_payload=True,
|
|
101
102
|
with_vectors=False,
|
|
102
103
|
)
|
|
@@ -17,7 +17,6 @@ import os
|
|
|
17
17
|
from typing import TYPE_CHECKING, List, Optional
|
|
18
18
|
|
|
19
19
|
import httpx
|
|
20
|
-
import time
|
|
21
20
|
|
|
22
21
|
if TYPE_CHECKING:
|
|
23
22
|
from hdsp_agent_core.models.rag import EmbeddingConfig
|
|
@@ -66,7 +65,7 @@ class VLLMEmbeddingService:
|
|
|
66
65
|
self._client = httpx.AsyncClient(
|
|
67
66
|
base_url=self._endpoint,
|
|
68
67
|
timeout=httpx.Timeout(30.0),
|
|
69
|
-
limits=httpx.Limits(max_keepalive_connections=5, max_connections=10)
|
|
68
|
+
limits=httpx.Limits(max_keepalive_connections=5, max_connections=10),
|
|
70
69
|
)
|
|
71
70
|
|
|
72
71
|
logger.info(
|
|
@@ -79,7 +78,9 @@ class VLLMEmbeddingService:
|
|
|
79
78
|
"""Get embedding dimension"""
|
|
80
79
|
return self._dimension
|
|
81
80
|
|
|
82
|
-
async def _call_vllm_api(
|
|
81
|
+
async def _call_vllm_api(
|
|
82
|
+
self, texts: List[str], max_retries: int = 3
|
|
83
|
+
) -> List[List[float]]:
|
|
83
84
|
"""
|
|
84
85
|
Call vLLM embedding API with retry logic.
|
|
85
86
|
|
|
@@ -126,7 +127,9 @@ class VLLMEmbeddingService:
|
|
|
126
127
|
logger.error(f"Unexpected error calling vLLM API: {e}")
|
|
127
128
|
break
|
|
128
129
|
|
|
129
|
-
raise Exception(
|
|
130
|
+
raise Exception(
|
|
131
|
+
f"Failed to connect to vLLM after {max_retries} attempts: {last_error}"
|
|
132
|
+
)
|
|
130
133
|
|
|
131
134
|
async def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
132
135
|
"""
|
|
@@ -240,4 +243,4 @@ def reset_vllm_embedding_service() -> None:
|
|
|
240
243
|
_vllm_embedding_service._initialized = False
|
|
241
244
|
_vllm_embedding_service = None
|
|
242
245
|
VLLMEmbeddingService._instance = None
|
|
243
|
-
VLLMEmbeddingService._initialized = False
|
|
246
|
+
VLLMEmbeddingService._initialized = False
|
|
@@ -151,13 +151,10 @@ jupyter_ext/
|
|
|
151
151
|
```python
|
|
152
152
|
- jupyter_cell_tool # Python 코드 실행
|
|
153
153
|
- markdown_tool # 마크다운 셀 추가
|
|
154
|
-
- final_answer_tool # 작업 완료 및 요약
|
|
155
154
|
- read_file_tool # 파일 읽기
|
|
156
155
|
- write_file_tool # 파일 쓰기
|
|
157
|
-
- list_files_tool # 디렉토리 목록
|
|
158
|
-
- search_workspace_tool # 워크스페이스 검색 (grep/rg)
|
|
159
156
|
- search_notebook_cells_tool # 노트북 셀 검색
|
|
160
|
-
- execute_command_tool # 쉘 명령 실행
|
|
157
|
+
- execute_command_tool # 쉘 명령 실행 (파일 검색은 find/grep 사용)
|
|
161
158
|
- check_resource_tool # 리소스 확인
|
|
162
159
|
```
|
|
163
160
|
|
|
@@ -441,8 +438,6 @@ non-HITL 도구 실행 후 continuation 프롬프트를 주입합니다.
|
|
|
441
438
|
NON_HITL_TOOLS = {
|
|
442
439
|
"markdown_tool",
|
|
443
440
|
"read_file_tool",
|
|
444
|
-
"list_files_tool",
|
|
445
|
-
"search_workspace_tool",
|
|
446
441
|
"search_notebook_cells_tool",
|
|
447
442
|
"write_todos",
|
|
448
443
|
}
|
|
@@ -508,8 +503,7 @@ LLM 호출 횟수를 제한합니다.
|
|
|
508
503
|
|
|
509
504
|
**설정**:
|
|
510
505
|
```python
|
|
511
|
-
- write_todos: run_limit=
|
|
512
|
-
- list_files_tool: run_limit=5, exit_behavior="continue"
|
|
506
|
+
- write_todos: run_limit=20, exit_behavior="continue"
|
|
513
507
|
```
|
|
514
508
|
|
|
515
509
|
### 9. `SummarizationMiddleware` (LangChain 내장)
|
|
@@ -634,52 +628,14 @@ Python 코드를 Jupyter 셀에서 실행합니다.
|
|
|
634
628
|
**특징**:
|
|
635
629
|
- HITL 대상 (사용자 승인 필요)
|
|
636
630
|
|
|
637
|
-
#### `list_files_tool`
|
|
638
|
-
디렉토리 목록을 가져옵니다.
|
|
639
|
-
|
|
640
|
-
**파라미터**:
|
|
641
|
-
- `path`: 디렉토리 경로 (기본 ".")
|
|
642
|
-
- `recursive`: 재귀 탐색 여부 (기본 False)
|
|
643
|
-
|
|
644
|
-
**반환**:
|
|
645
|
-
```python
|
|
646
|
-
{
|
|
647
|
-
"tool": "list_files",
|
|
648
|
-
"parameters": {"path": ".", "recursive": False},
|
|
649
|
-
"status": "completed",
|
|
650
|
-
"files": ["file1.py", "file2.csv", ...]
|
|
651
|
-
}
|
|
652
|
-
```
|
|
653
|
-
|
|
654
631
|
---
|
|
655
632
|
|
|
656
633
|
### Search Tools (`search_tools.py`)
|
|
657
634
|
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
- `pattern`: 정규식 패턴
|
|
663
|
-
- `file_types`: 파일 타입 필터 (예: ["py", "md"])
|
|
664
|
-
- `path`: 검색 경로 (기본 ".")
|
|
665
|
-
|
|
666
|
-
**반환**:
|
|
667
|
-
```python
|
|
668
|
-
{
|
|
669
|
-
"tool": "search_workspace",
|
|
670
|
-
"parameters": {"pattern": "...", "file_types": ["py"], "path": "."},
|
|
671
|
-
"status": "completed",
|
|
672
|
-
"results": [
|
|
673
|
-
{"file": "file1.py", "line_number": 10, "line": "..."},
|
|
674
|
-
...
|
|
675
|
-
],
|
|
676
|
-
"command": "rg ... (또는 grep ...)"
|
|
677
|
-
}
|
|
678
|
-
```
|
|
679
|
-
|
|
680
|
-
**특징**:
|
|
681
|
-
- ripgrep 우선 사용 (속도)
|
|
682
|
-
- 없으면 grep 사용
|
|
635
|
+
> **Note**: 파일 검색 기능은 `execute_command_tool`을 통해 `find`/`grep` 명령을 직접 사용합니다.
|
|
636
|
+
>
|
|
637
|
+
> - 파일명 검색: `execute_command_tool(command="find . -iname '*pattern*' 2>/dev/null")`
|
|
638
|
+
> - 파일 내용 검색: `execute_command_tool(command="grep -rn 'pattern' --include='*.py' .")`
|
|
683
639
|
|
|
684
640
|
#### `search_notebook_cells_tool`
|
|
685
641
|
Jupyter 노트북 셀에서 패턴을 검색합니다.
|
|
@@ -961,7 +917,7 @@ return
|
|
|
961
917
|
- **HITL**: 사용자 승인 필요
|
|
962
918
|
- `jupyter_cell_tool`, `execute_command_tool`, `write_file_tool`
|
|
963
919
|
- **non-HITL**: 즉시 실행
|
|
964
|
-
- `markdown_tool`, `read_file_tool`, `
|
|
920
|
+
- `markdown_tool`, `read_file_tool`, `search_*_tool`
|
|
965
921
|
- **클라이언트 실행**: 서버에서 실행하지 않음
|
|
966
922
|
- `check_resource_tool`: CheckResourceHandler에서 처리
|
|
967
923
|
|
agent_server/langchain/agent.py
CHANGED
|
@@ -26,15 +26,12 @@ from agent_server.langchain.tools import (
|
|
|
26
26
|
diagnostics_tool,
|
|
27
27
|
edit_file_tool,
|
|
28
28
|
execute_command_tool,
|
|
29
|
-
final_answer_tool,
|
|
30
29
|
jupyter_cell_tool,
|
|
31
|
-
list_files_tool,
|
|
32
30
|
markdown_tool,
|
|
33
31
|
multiedit_file_tool,
|
|
34
32
|
read_file_tool,
|
|
35
33
|
references_tool,
|
|
36
34
|
search_notebook_cells_tool,
|
|
37
|
-
search_workspace_tool,
|
|
38
35
|
write_file_tool,
|
|
39
36
|
)
|
|
40
37
|
|
|
@@ -46,13 +43,10 @@ def _get_all_tools():
|
|
|
46
43
|
return [
|
|
47
44
|
jupyter_cell_tool,
|
|
48
45
|
markdown_tool,
|
|
49
|
-
final_answer_tool,
|
|
50
46
|
read_file_tool,
|
|
51
47
|
write_file_tool,
|
|
52
48
|
edit_file_tool,
|
|
53
49
|
multiedit_file_tool,
|
|
54
|
-
list_files_tool,
|
|
55
|
-
search_workspace_tool,
|
|
56
50
|
search_notebook_cells_tool,
|
|
57
51
|
execute_command_tool,
|
|
58
52
|
check_resource_tool,
|
|
@@ -115,7 +109,6 @@ def create_simple_chat_agent(
|
|
|
115
109
|
|
|
116
110
|
# Configure middleware
|
|
117
111
|
middleware = []
|
|
118
|
-
|
|
119
112
|
# Add empty response handler middleware
|
|
120
113
|
handle_empty_response = create_handle_empty_response_middleware(wrap_model_call)
|
|
121
114
|
middleware.append(handle_empty_response)
|
|
@@ -125,7 +118,9 @@ def create_simple_chat_agent(
|
|
|
125
118
|
middleware.append(limit_tool_calls)
|
|
126
119
|
|
|
127
120
|
# Add tool args normalization middleware (convert list args to strings based on schema)
|
|
128
|
-
normalize_tool_args = create_normalize_tool_args_middleware(
|
|
121
|
+
normalize_tool_args = create_normalize_tool_args_middleware(
|
|
122
|
+
wrap_model_call, tools=tools
|
|
123
|
+
)
|
|
129
124
|
middleware.append(normalize_tool_args)
|
|
130
125
|
|
|
131
126
|
# Add continuation prompt middleware
|
|
@@ -164,22 +159,14 @@ def create_simple_chat_agent(
|
|
|
164
159
|
logger.info("Added ModelCallLimitMiddleware with run_limit=30")
|
|
165
160
|
|
|
166
161
|
# ToolCallLimitMiddleware: Prevent specific tools from being called too many times
|
|
167
|
-
#
|
|
162
|
+
# run_limit resets automatically per user message
|
|
168
163
|
write_todos_limit = ToolCallLimitMiddleware(
|
|
169
164
|
tool_name="write_todos",
|
|
170
|
-
run_limit=
|
|
171
|
-
exit_behavior="continue", # Let agent continue with other tools
|
|
172
|
-
)
|
|
173
|
-
middleware.append(write_todos_limit)
|
|
174
|
-
|
|
175
|
-
# Limit list_files_tool to prevent excessive directory listing
|
|
176
|
-
list_files_limit = ToolCallLimitMiddleware(
|
|
177
|
-
tool_name="list_files_tool",
|
|
178
|
-
run_limit=5, # Max 5 list_files calls per user message
|
|
165
|
+
run_limit=20, # Max 20 write_todos calls per user message
|
|
179
166
|
exit_behavior="continue",
|
|
180
167
|
)
|
|
181
|
-
middleware.append(
|
|
182
|
-
logger.info("Added ToolCallLimitMiddleware for write_todos
|
|
168
|
+
middleware.append(write_todos_limit)
|
|
169
|
+
logger.info("Added ToolCallLimitMiddleware for write_todos (20/msg)")
|
|
183
170
|
|
|
184
171
|
# Add SummarizationMiddleware to maintain context across cycles
|
|
185
172
|
summary_llm = create_summarization_llm(llm_config)
|
|
@@ -218,6 +205,30 @@ Example: "데이터를 로드하겠습니다." then call jupyter_cell_tool.
|
|
|
218
205
|
system_prompt = system_prompt + "\n" + gemini_content_prompt
|
|
219
206
|
logger.info("Added Gemini 2.5 Flash specific prompt for content inclusion")
|
|
220
207
|
|
|
208
|
+
# Add vLLM/gpt-oss specific prompt for Korean responses and proper todo structure
|
|
209
|
+
provider = llm_config.get("provider", "")
|
|
210
|
+
if provider == "vllm":
|
|
211
|
+
vllm_prompt = """
|
|
212
|
+
## 🔴 중요: 한국어로 응답하세요
|
|
213
|
+
- 모든 응답, 설명, todo 항목은 반드시 한국어로 작성하세요.
|
|
214
|
+
- 코드 주석과 출력 설명도 한국어로 작성하세요.
|
|
215
|
+
- 영어로 응답하지 마세요.
|
|
216
|
+
|
|
217
|
+
## 🔴 MANDATORY: Todo List Structure
|
|
218
|
+
When creating todos with write_todos, you MUST:
|
|
219
|
+
1. Write all todo items in Korean
|
|
220
|
+
2. ALWAYS include "작업 요약 및 다음단계 제시" as the LAST todo item
|
|
221
|
+
3. Example structure:
|
|
222
|
+
- 데이터 로드 및 확인
|
|
223
|
+
- 데이터 분석 수행
|
|
224
|
+
- 작업 요약 및 다음단계 제시 ← 반드시 마지막에 포함!
|
|
225
|
+
|
|
226
|
+
## 🔴 IMPORTANT: Never return empty responses
|
|
227
|
+
If you have nothing to say, call a tool instead. NEVER return an empty response.
|
|
228
|
+
"""
|
|
229
|
+
system_prompt = system_prompt + "\n" + vllm_prompt
|
|
230
|
+
logger.info("Added vLLM/gpt-oss specific prompt for Korean responses")
|
|
231
|
+
|
|
221
232
|
logger.info("SimpleChatAgent system_prompt: %s", system_prompt)
|
|
222
233
|
|
|
223
234
|
# Create agent with checkpointer (required for HITL)
|
|
@@ -139,6 +139,19 @@ def create_handle_empty_response_middleware(wrap_model_call):
|
|
|
139
139
|
def handle_empty_response(request, handler):
|
|
140
140
|
max_retries = 2
|
|
141
141
|
|
|
142
|
+
# Check if all todos are completed - if so, skip processing entirely
|
|
143
|
+
todos = request.state.get("todos", [])
|
|
144
|
+
if todos:
|
|
145
|
+
pending_todos = [
|
|
146
|
+
t for t in todos if t.get("status") in ("pending", "in_progress")
|
|
147
|
+
]
|
|
148
|
+
if not pending_todos:
|
|
149
|
+
logger.info(
|
|
150
|
+
"All %d todos completed - skipping handle_empty_response middleware",
|
|
151
|
+
len(todos),
|
|
152
|
+
)
|
|
153
|
+
return handler(request)
|
|
154
|
+
|
|
142
155
|
# Check if last message is final_answer_tool result - if so, don't retry/synthesize
|
|
143
156
|
# This allows agent to naturally terminate after final_answer_tool
|
|
144
157
|
messages = request.messages
|
|
@@ -206,6 +219,25 @@ def create_handle_empty_response_middleware(wrap_model_call):
|
|
|
206
219
|
# Invalid response - retry with JSON schema prompt
|
|
207
220
|
if response_message and attempt < max_retries:
|
|
208
221
|
reason = "text-only" if has_content else "empty"
|
|
222
|
+
|
|
223
|
+
json_prompt = _build_json_prompt(request, response_message, has_content)
|
|
224
|
+
|
|
225
|
+
# If _build_json_prompt returns None, skip retry and synthesize write_todos
|
|
226
|
+
# This happens when: all todos completed OR current todo is summary/next_steps
|
|
227
|
+
if json_prompt is None:
|
|
228
|
+
logger.info(
|
|
229
|
+
"Skipping retry for %s response, synthesizing write_todos with content",
|
|
230
|
+
reason,
|
|
231
|
+
)
|
|
232
|
+
# Synthesize write_todos while preserving the content (summary)
|
|
233
|
+
synthetic_message = _create_synthetic_final_answer(
|
|
234
|
+
request, response_message, has_content
|
|
235
|
+
)
|
|
236
|
+
response = _replace_ai_message_in_response(
|
|
237
|
+
response, synthetic_message
|
|
238
|
+
)
|
|
239
|
+
return response
|
|
240
|
+
|
|
209
241
|
logger.warning(
|
|
210
242
|
"Invalid AIMessage (%s) detected (attempt %d/%d). "
|
|
211
243
|
"Retrying with JSON schema prompt...",
|
|
@@ -214,16 +246,26 @@ def create_handle_empty_response_middleware(wrap_model_call):
|
|
|
214
246
|
max_retries + 1,
|
|
215
247
|
)
|
|
216
248
|
|
|
217
|
-
json_prompt = _build_json_prompt(request, response_message, has_content)
|
|
218
249
|
request = request.override(
|
|
219
250
|
messages=request.messages + [HumanMessage(content=json_prompt)]
|
|
220
251
|
)
|
|
221
252
|
continue
|
|
222
253
|
|
|
223
|
-
# Max retries exhausted - synthesize
|
|
254
|
+
# Max retries exhausted - synthesize write_todos to complete
|
|
224
255
|
if response_message:
|
|
256
|
+
# Check if todos are already all completed - if so, just return
|
|
257
|
+
todos = request.state.get("todos", [])
|
|
258
|
+
pending_todos = [
|
|
259
|
+
t for t in todos if t.get("status") in ("pending", "in_progress")
|
|
260
|
+
]
|
|
261
|
+
if todos and not pending_todos:
|
|
262
|
+
logger.info(
|
|
263
|
+
"Max retries exhausted but all todos completed - returning response as-is"
|
|
264
|
+
)
|
|
265
|
+
return response
|
|
266
|
+
|
|
225
267
|
logger.warning(
|
|
226
|
-
"Max retries exhausted. Synthesizing
|
|
268
|
+
"Max retries exhausted. Synthesizing write_todos to complete."
|
|
227
269
|
)
|
|
228
270
|
synthetic_message = _create_synthetic_final_answer(
|
|
229
271
|
request, response_message, has_content
|
|
@@ -274,14 +316,33 @@ def _build_json_prompt(request, response_message, has_content):
|
|
|
274
316
|
"""Build JSON-forcing prompt based on context."""
|
|
275
317
|
todos = request.state.get("todos", [])
|
|
276
318
|
pending_todos = [t for t in todos if t.get("status") in ("pending", "in_progress")]
|
|
319
|
+
in_progress_todos = [t for t in todos if t.get("status") == "in_progress"]
|
|
277
320
|
|
|
278
321
|
if has_content:
|
|
279
|
-
|
|
322
|
+
# If all todos completed, don't force another tool call
|
|
323
|
+
if todos and not pending_todos:
|
|
324
|
+
return None # Signal to skip retry
|
|
325
|
+
|
|
326
|
+
# If current in_progress todo is "작업 요약 및 다음단계 제시", accept text-only response
|
|
327
|
+
# The LLM is outputting the summary, we'll synthesize write_todos
|
|
328
|
+
if in_progress_todos:
|
|
329
|
+
current_todo = in_progress_todos[0].get("content", "")
|
|
330
|
+
if (
|
|
331
|
+
"작업 요약" in current_todo
|
|
332
|
+
or "다음단계" in current_todo
|
|
333
|
+
or "다음 단계" in current_todo
|
|
334
|
+
):
|
|
335
|
+
logger.info(
|
|
336
|
+
"Current todo is summary/next steps ('%s'), accepting text-only response",
|
|
337
|
+
current_todo[:30],
|
|
338
|
+
)
|
|
339
|
+
return None # Signal to skip retry - will synthesize write_todos with content
|
|
340
|
+
|
|
280
341
|
return (
|
|
281
342
|
f"{JSON_TOOL_SCHEMA}\n\n"
|
|
282
343
|
f"Your previous response was text, not JSON. "
|
|
283
|
-
f"
|
|
284
|
-
f'{{"tool": "
|
|
344
|
+
f"Call the next appropriate tool to continue.\n"
|
|
345
|
+
f'Example: {{"tool": "jupyter_cell_tool", "arguments": {{"code": "print(\'hello\')"}}}}'
|
|
285
346
|
)
|
|
286
347
|
elif pending_todos:
|
|
287
348
|
todo_list = ", ".join(t.get("content", "")[:20] for t in pending_todos[:3])
|
|
@@ -292,39 +353,62 @@ def _build_json_prompt(request, response_message, has_content):
|
|
|
292
353
|
f"Call jupyter_cell_tool with Python code to complete the next task.\n"
|
|
293
354
|
f"Example: {example_json}"
|
|
294
355
|
)
|
|
356
|
+
elif not todos:
|
|
357
|
+
# No todos yet = new task starting, LLM must create todos or call a tool
|
|
358
|
+
# This happens when LLM returns empty response at the start of a new task
|
|
359
|
+
logger.info("No todos exist yet - forcing retry to create todos or call tool")
|
|
360
|
+
return (
|
|
361
|
+
f"{JSON_TOOL_SCHEMA}\n\n"
|
|
362
|
+
f"Your response was empty. You MUST call a tool to proceed.\n"
|
|
363
|
+
f"한국어로 응답하고, write_todos로 작업 목록을 만들거나 jupyter_cell_tool/read_file_tool을 호출하세요.\n"
|
|
364
|
+
f'Example: {{"tool": "write_todos", "arguments": {{"todos": [{{"content": "데이터 분석", "status": "in_progress"}}]}}}}'
|
|
365
|
+
)
|
|
295
366
|
else:
|
|
367
|
+
# Todos exist but all completed - ask for summary
|
|
368
|
+
logger.info("All todos completed but response empty - asking for summary")
|
|
296
369
|
return (
|
|
297
370
|
f"{JSON_TOOL_SCHEMA}\n\n"
|
|
298
|
-
f"All tasks completed. Call
|
|
299
|
-
f
|
|
371
|
+
f"All tasks completed. Call markdown_tool to provide a summary in Korean.\n"
|
|
372
|
+
f"한국어로 작업 요약을 작성하세요.\n"
|
|
373
|
+
f'Example: {{"tool": "markdown_tool", "arguments": {{"content": "작업이 완료되었습니다."}}}}'
|
|
300
374
|
)
|
|
301
375
|
|
|
302
376
|
|
|
303
377
|
def _create_synthetic_final_answer(request, response_message, has_content):
|
|
304
|
-
"""Create synthetic
|
|
305
|
-
|
|
306
|
-
|
|
378
|
+
"""Create synthetic write_todos call to mark all todos as completed.
|
|
379
|
+
|
|
380
|
+
This triggers automatic session termination via router's all_todos_completed check.
|
|
381
|
+
Preserves the LLM's text content (summary) if present.
|
|
382
|
+
"""
|
|
383
|
+
todos = request.state.get("todos", [])
|
|
384
|
+
|
|
385
|
+
# Mark all todos as completed
|
|
386
|
+
completed_todos = (
|
|
387
|
+
[{**todo, "status": "completed"} for todo in todos]
|
|
388
|
+
if todos
|
|
389
|
+
else [{"content": "작업 완료", "status": "completed"}]
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
# Preserve original content (summary JSON) if present
|
|
393
|
+
original_content = ""
|
|
394
|
+
if has_content and response_message and response_message.content:
|
|
395
|
+
original_content = response_message.content
|
|
307
396
|
logger.info(
|
|
308
|
-
"
|
|
309
|
-
len(
|
|
397
|
+
"Creating synthetic write_todos with preserved content (length=%d)",
|
|
398
|
+
len(original_content),
|
|
310
399
|
)
|
|
311
400
|
else:
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
]
|
|
316
|
-
summary = (
|
|
317
|
-
f"작업이 완료되었습니다. 완료된 항목: {', '.join(completed_todos[:5])}"
|
|
318
|
-
if completed_todos
|
|
319
|
-
else "작업이 완료되었습니다."
|
|
401
|
+
logger.info(
|
|
402
|
+
"Creating synthetic write_todos to mark %d todos as completed",
|
|
403
|
+
len(completed_todos),
|
|
320
404
|
)
|
|
321
405
|
|
|
322
406
|
return AIMessage(
|
|
323
|
-
content=
|
|
407
|
+
content=original_content, # Preserve the summary content for UI
|
|
324
408
|
tool_calls=[
|
|
325
409
|
{
|
|
326
|
-
"name": "
|
|
327
|
-
"args": {"
|
|
410
|
+
"name": "write_todos",
|
|
411
|
+
"args": {"todos": completed_todos},
|
|
328
412
|
"id": str(uuid.uuid4()),
|
|
329
413
|
"type": "tool_call",
|
|
330
414
|
}
|
|
@@ -504,6 +588,30 @@ def create_normalize_tool_args_middleware(wrap_model_call, tools=None):
|
|
|
504
588
|
)
|
|
505
589
|
args[key] = normalized_value
|
|
506
590
|
|
|
591
|
+
# Ensure write_todos includes summary todo as last item
|
|
592
|
+
if tool_name == "write_todos" and "todos" in args:
|
|
593
|
+
todos = args["todos"]
|
|
594
|
+
if isinstance(todos, list) and len(todos) > 0:
|
|
595
|
+
# Check if any todo contains summary keywords
|
|
596
|
+
summary_keywords = ["작업 요약", "다음단계", "다음 단계", "요약 및"]
|
|
597
|
+
has_summary = any(
|
|
598
|
+
any(kw in todo.get("content", "") for kw in summary_keywords)
|
|
599
|
+
for todo in todos
|
|
600
|
+
if isinstance(todo, dict)
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
if not has_summary:
|
|
604
|
+
# Add summary todo as last item
|
|
605
|
+
summary_todo = {
|
|
606
|
+
"content": "작업 요약 및 다음단계 제시",
|
|
607
|
+
"status": "pending"
|
|
608
|
+
}
|
|
609
|
+
todos.append(summary_todo)
|
|
610
|
+
logger.info(
|
|
611
|
+
"Auto-added '작업 요약 및 다음단계 제시' to write_todos (total: %d todos)",
|
|
612
|
+
len(todos),
|
|
613
|
+
)
|
|
614
|
+
|
|
507
615
|
return response
|
|
508
616
|
|
|
509
617
|
return normalize_tool_args
|
|
@@ -543,16 +651,24 @@ def create_inject_continuation_middleware(wrap_model_call):
|
|
|
543
651
|
pass
|
|
544
652
|
|
|
545
653
|
if tool_name in NON_HITL_TOOLS:
|
|
546
|
-
logger.info(
|
|
547
|
-
"Injecting continuation prompt after non-HITL tool: %s",
|
|
548
|
-
tool_name,
|
|
549
|
-
)
|
|
550
|
-
|
|
551
654
|
todos = request.state.get("todos", [])
|
|
552
655
|
pending_todos = [
|
|
553
656
|
t for t in todos if t.get("status") in ("pending", "in_progress")
|
|
554
657
|
]
|
|
555
658
|
|
|
659
|
+
# If all todos are completed, don't inject continuation - let router handle termination
|
|
660
|
+
if not pending_todos and todos:
|
|
661
|
+
logger.info(
|
|
662
|
+
"All todos completed, skipping continuation for tool: %s",
|
|
663
|
+
tool_name,
|
|
664
|
+
)
|
|
665
|
+
return handler(request)
|
|
666
|
+
|
|
667
|
+
logger.info(
|
|
668
|
+
"Injecting continuation prompt after non-HITL tool: %s",
|
|
669
|
+
tool_name,
|
|
670
|
+
)
|
|
671
|
+
|
|
556
672
|
if pending_todos:
|
|
557
673
|
pending_list = ", ".join(
|
|
558
674
|
t.get("content", "")[:30] for t in pending_todos[:3]
|
|
@@ -563,9 +679,10 @@ def create_inject_continuation_middleware(wrap_model_call):
|
|
|
563
679
|
f"Call jupyter_cell_tool or the next appropriate tool."
|
|
564
680
|
)
|
|
565
681
|
else:
|
|
682
|
+
# No todos yet - let agent create them
|
|
566
683
|
continuation = (
|
|
567
|
-
f"Tool '{tool_name}' completed.
|
|
568
|
-
f"
|
|
684
|
+
f"Tool '{tool_name}' completed. "
|
|
685
|
+
f"Create a todo list with write_todos if needed."
|
|
569
686
|
)
|
|
570
687
|
|
|
571
688
|
new_messages = list(messages) + [
|
|
@@ -32,17 +32,9 @@ def get_hitl_interrupt_config() -> Dict[str, Any]:
|
|
|
32
32
|
"allowed_decisions": ["approve", "edit"],
|
|
33
33
|
"description": "📄 파일 읽기 실행 중",
|
|
34
34
|
},
|
|
35
|
-
"list_files_tool": {
|
|
36
|
-
"allowed_decisions": ["approve", "edit"],
|
|
37
|
-
"description": "📂 파일 목록 조회 중",
|
|
38
|
-
},
|
|
39
35
|
"write_todos": False, # Todo updates don't need approval
|
|
40
36
|
# Search tools need HITL for client-side execution (auto-approved by frontend)
|
|
41
37
|
# Uses 'edit' decision to pass execution_result back
|
|
42
|
-
"search_workspace_tool": {
|
|
43
|
-
"allowed_decisions": ["approve", "edit"],
|
|
44
|
-
"description": "🔍 Searching workspace files",
|
|
45
|
-
},
|
|
46
38
|
"search_notebook_cells_tool": {
|
|
47
39
|
"allowed_decisions": ["approve", "edit"],
|
|
48
40
|
"description": "🔍 Searching notebook cells",
|