hdsp-jupyter-extension 2.0.7__py3-none-any.whl → 2.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_server/core/embedding_service.py +67 -46
- agent_server/core/rag_manager.py +40 -17
- agent_server/core/retriever.py +12 -6
- agent_server/core/vllm_embedding_service.py +246 -0
- agent_server/langchain/ARCHITECTURE.md +7 -51
- agent_server/langchain/agent.py +39 -20
- agent_server/langchain/custom_middleware.py +206 -62
- agent_server/langchain/hitl_config.py +6 -9
- agent_server/langchain/llm_factory.py +85 -1
- agent_server/langchain/logging_utils.py +52 -13
- agent_server/langchain/prompts.py +85 -45
- agent_server/langchain/tools/__init__.py +14 -10
- agent_server/langchain/tools/file_tools.py +266 -40
- agent_server/langchain/tools/file_utils.py +334 -0
- agent_server/langchain/tools/jupyter_tools.py +0 -1
- agent_server/langchain/tools/lsp_tools.py +264 -0
- agent_server/langchain/tools/resource_tools.py +12 -12
- agent_server/langchain/tools/search_tools.py +3 -158
- agent_server/main.py +7 -0
- agent_server/routers/langchain_agent.py +207 -102
- agent_server/routers/rag.py +8 -3
- hdsp_agent_core/models/rag.py +15 -1
- hdsp_agent_core/services/rag_service.py +6 -1
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +3 -2
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.4770ec0fb2d173b6deb4.js → hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js +251 -5
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js.map +1 -0
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.29cf4312af19e86f82af.js → hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.dc6434bee96ab03a0539.js +1831 -274
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.dc6434bee96ab03a0539.js.map +1 -0
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.61343eb4cf0577e74b50.js → hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.4a252df3ade74efee8d6.js +11 -9
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.4a252df3ade74efee8d6.js.map +1 -0
- jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js → hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +2 -209
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +1 -0
- jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js → hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +209 -2
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +1 -0
- jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js → hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +212 -3
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +1 -0
- {hdsp_jupyter_extension-2.0.7.dist-info → hdsp_jupyter_extension-2.0.10.dist-info}/METADATA +1 -3
- hdsp_jupyter_extension-2.0.10.dist-info/RECORD +144 -0
- jupyter_ext/__init__.py +18 -0
- jupyter_ext/_version.py +1 -1
- jupyter_ext/handlers.py +176 -1
- jupyter_ext/labextension/build_log.json +1 -1
- jupyter_ext/labextension/package.json +3 -2
- jupyter_ext/labextension/static/{frontend_styles_index_js.4770ec0fb2d173b6deb4.js → frontend_styles_index_js.2d9fb488c82498c45c2d.js} +251 -5
- jupyter_ext/labextension/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js.map +1 -0
- jupyter_ext/labextension/static/{lib_index_js.29cf4312af19e86f82af.js → lib_index_js.dc6434bee96ab03a0539.js} +1831 -274
- jupyter_ext/labextension/static/lib_index_js.dc6434bee96ab03a0539.js.map +1 -0
- jupyter_ext/labextension/static/{remoteEntry.61343eb4cf0577e74b50.js → remoteEntry.4a252df3ade74efee8d6.js} +11 -9
- jupyter_ext/labextension/static/remoteEntry.4a252df3ade74efee8d6.js.map +1 -0
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js → jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +2 -209
- jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +1 -0
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js → jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +209 -2
- jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +1 -0
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js → jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +212 -3
- jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +1 -0
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.4770ec0fb2d173b6deb4.js.map +0 -1
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.29cf4312af19e86f82af.js.map +0 -1
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.61343eb4cf0577e74b50.js.map +0 -1
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js.map +0 -1
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js.map +0 -1
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js.map +0 -1
- hdsp_jupyter_extension-2.0.7.dist-info/RECORD +0 -141
- jupyter_ext/labextension/static/frontend_styles_index_js.4770ec0fb2d173b6deb4.js.map +0 -1
- jupyter_ext/labextension/static/lib_index_js.29cf4312af19e86f82af.js.map +0 -1
- jupyter_ext/labextension/static/remoteEntry.61343eb4cf0577e74b50.js.map +0 -1
- jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js.map +0 -1
- jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js.map +0 -1
- jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js.map +0 -1
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
- {hdsp_jupyter_extension-2.0.7.dist-info → hdsp_jupyter_extension-2.0.10.dist-info}/WHEEL +0 -0
- {hdsp_jupyter_extension-2.0.7.dist-info → hdsp_jupyter_extension-2.0.10.dist-info}/licenses/LICENSE +0 -0
|
@@ -32,17 +32,9 @@ def get_hitl_interrupt_config() -> Dict[str, Any]:
|
|
|
32
32
|
"allowed_decisions": ["approve", "edit"],
|
|
33
33
|
"description": "📄 파일 읽기 실행 중",
|
|
34
34
|
},
|
|
35
|
-
"list_files_tool": {
|
|
36
|
-
"allowed_decisions": ["approve", "edit"],
|
|
37
|
-
"description": "📂 파일 목록 조회 중",
|
|
38
|
-
},
|
|
39
35
|
"write_todos": False, # Todo updates don't need approval
|
|
40
36
|
# Search tools need HITL for client-side execution (auto-approved by frontend)
|
|
41
37
|
# Uses 'edit' decision to pass execution_result back
|
|
42
|
-
"search_workspace_tool": {
|
|
43
|
-
"allowed_decisions": ["approve", "edit"],
|
|
44
|
-
"description": "🔍 Searching workspace files",
|
|
45
|
-
},
|
|
46
38
|
"search_notebook_cells_tool": {
|
|
47
39
|
"allowed_decisions": ["approve", "edit"],
|
|
48
40
|
"description": "🔍 Searching notebook cells",
|
|
@@ -59,7 +51,12 @@ def get_hitl_interrupt_config() -> Dict[str, Any]:
|
|
|
59
51
|
# File write requires approval
|
|
60
52
|
"write_file_tool": {
|
|
61
53
|
"allowed_decisions": ["approve", "edit", "reject"],
|
|
62
|
-
"description": "
|
|
54
|
+
"description": "File write requires approval",
|
|
55
|
+
},
|
|
56
|
+
# File edit requires approval (string replacement with diff preview)
|
|
57
|
+
"edit_file_tool": {
|
|
58
|
+
"allowed_decisions": ["approve", "edit", "reject"],
|
|
59
|
+
"description": "File edit requires approval",
|
|
63
60
|
},
|
|
64
61
|
# Final answer doesn't need approval
|
|
65
62
|
"final_answer_tool": False,
|
|
@@ -4,6 +4,7 @@ LLM Factory for LangChain agent.
|
|
|
4
4
|
Provides functions to create LangChain LLM instances from configuration.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
import json
|
|
7
8
|
import logging
|
|
8
9
|
from typing import Any, Dict
|
|
9
10
|
|
|
@@ -12,6 +13,74 @@ from agent_server.langchain.logging_utils import LLMTraceLogger
|
|
|
12
13
|
logger = logging.getLogger(__name__)
|
|
13
14
|
|
|
14
15
|
|
|
16
|
+
def _stringify_content(content: Any) -> str:
|
|
17
|
+
if content is None:
|
|
18
|
+
return ""
|
|
19
|
+
if isinstance(content, list):
|
|
20
|
+
parts = []
|
|
21
|
+
for part in content:
|
|
22
|
+
if isinstance(part, str):
|
|
23
|
+
parts.append(part)
|
|
24
|
+
elif isinstance(part, dict):
|
|
25
|
+
if part.get("type") == "text":
|
|
26
|
+
parts.append(part.get("text", ""))
|
|
27
|
+
elif "text" in part:
|
|
28
|
+
parts.append(part.get("text", ""))
|
|
29
|
+
return "\n".join(p for p in parts if p)
|
|
30
|
+
return str(content)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _summarize_payload(
|
|
34
|
+
payload: Dict[str, Any],
|
|
35
|
+
max_preview: int = 200,
|
|
36
|
+
max_items: int = 12,
|
|
37
|
+
) -> Dict[str, Any]:
|
|
38
|
+
summary: Dict[str, Any] = {}
|
|
39
|
+
|
|
40
|
+
def summarize_messages(key: str) -> None:
|
|
41
|
+
items = payload.get(key)
|
|
42
|
+
if not isinstance(items, list):
|
|
43
|
+
return
|
|
44
|
+
summarized = []
|
|
45
|
+
for item in items[:max_items]:
|
|
46
|
+
if not isinstance(item, dict):
|
|
47
|
+
summarized.append({"type": type(item).__name__})
|
|
48
|
+
continue
|
|
49
|
+
content_text = _stringify_content(item.get("content", ""))
|
|
50
|
+
entry = {
|
|
51
|
+
"role": item.get("role"),
|
|
52
|
+
"content_len": len(content_text),
|
|
53
|
+
"content_preview": content_text[:max_preview],
|
|
54
|
+
}
|
|
55
|
+
tool_calls = item.get("tool_calls")
|
|
56
|
+
if isinstance(tool_calls, list):
|
|
57
|
+
entry["tool_calls"] = [
|
|
58
|
+
tc.get("function", {}).get("name") or tc.get("name")
|
|
59
|
+
for tc in tool_calls
|
|
60
|
+
if isinstance(tc, dict)
|
|
61
|
+
]
|
|
62
|
+
summarized.append(entry)
|
|
63
|
+
if len(items) > max_items:
|
|
64
|
+
summarized.append({"truncated": len(items) - max_items})
|
|
65
|
+
summary[key] = summarized
|
|
66
|
+
|
|
67
|
+
summarize_messages("messages")
|
|
68
|
+
summarize_messages("input")
|
|
69
|
+
|
|
70
|
+
tools = payload.get("tools")
|
|
71
|
+
if isinstance(tools, list):
|
|
72
|
+
summary["tools"] = [
|
|
73
|
+
tool.get("function", {}).get("name") or tool.get("name")
|
|
74
|
+
for tool in tools
|
|
75
|
+
if isinstance(tool, dict)
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
if "tool_choice" in payload:
|
|
79
|
+
summary["tool_choice"] = payload.get("tool_choice")
|
|
80
|
+
|
|
81
|
+
return summary
|
|
82
|
+
|
|
83
|
+
|
|
15
84
|
def create_llm(llm_config: Dict[str, Any]):
|
|
16
85
|
"""Create LangChain LLM from config.
|
|
17
86
|
|
|
@@ -99,7 +168,22 @@ def _create_vllm_llm(llm_config: Dict[str, Any], callbacks):
|
|
|
99
168
|
|
|
100
169
|
logger.info(f"Creating vLLM LLM with model: {model}, endpoint: {endpoint}")
|
|
101
170
|
|
|
102
|
-
|
|
171
|
+
llm_class = ChatOpenAI
|
|
172
|
+
if "gpt-oss" in model.lower():
|
|
173
|
+
|
|
174
|
+
class ChatOpenAIGptOss(ChatOpenAI):
|
|
175
|
+
def _get_request_payload(self, input_, *, stop=None, **kwargs):
|
|
176
|
+
payload = super()._get_request_payload(input_, stop=stop, **kwargs)
|
|
177
|
+
summary = _summarize_payload(payload)
|
|
178
|
+
logger.info(
|
|
179
|
+
"gpt-oss payload summary: %s",
|
|
180
|
+
json.dumps(summary, ensure_ascii=False),
|
|
181
|
+
)
|
|
182
|
+
return payload
|
|
183
|
+
|
|
184
|
+
llm_class = ChatOpenAIGptOss
|
|
185
|
+
|
|
186
|
+
return llm_class(
|
|
103
187
|
model=model,
|
|
104
188
|
api_key=api_key,
|
|
105
189
|
base_url=f"{endpoint}/v1",
|
|
@@ -115,7 +115,27 @@ def _with_middleware_logging(name: str):
|
|
|
115
115
|
|
|
116
116
|
|
|
117
117
|
class LLMTraceLogger(BaseCallbackHandler):
|
|
118
|
-
"""Log prompts, responses, tool calls, and tool messages.
|
|
118
|
+
"""Log prompts, responses, tool calls, and tool messages.
|
|
119
|
+
|
|
120
|
+
Only logs newly added messages to avoid duplicate logging of conversation history.
|
|
121
|
+
Uses content hash of first message (usually system prompt) to identify conversation threads.
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
def __init__(self):
|
|
125
|
+
super().__init__()
|
|
126
|
+
# Track last logged message count per conversation thread
|
|
127
|
+
# Key: hash of first message content, Value: message count
|
|
128
|
+
self._last_message_counts: Dict[str, int] = {}
|
|
129
|
+
|
|
130
|
+
def _get_conversation_key(self, batch) -> str:
|
|
131
|
+
"""Get a stable key for the conversation based on first message content."""
|
|
132
|
+
if not batch:
|
|
133
|
+
return "empty"
|
|
134
|
+
first_msg = batch[0]
|
|
135
|
+
content = getattr(first_msg, "content", "")
|
|
136
|
+
# Use hash of first 200 chars of first message (usually system prompt)
|
|
137
|
+
content_preview = str(content)[:200] if content else ""
|
|
138
|
+
return str(hash(content_preview))
|
|
119
139
|
|
|
120
140
|
def _normalize_batches(self, messages):
|
|
121
141
|
if not messages:
|
|
@@ -125,19 +145,38 @@ class LLMTraceLogger(BaseCallbackHandler):
|
|
|
125
145
|
return [messages]
|
|
126
146
|
|
|
127
147
|
def _log_prompt_batches(self, title: str, messages) -> None:
|
|
148
|
+
"""Log only new messages that haven't been logged before."""
|
|
128
149
|
for batch_idx, batch in enumerate(self._normalize_batches(messages)):
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
150
|
+
# Get stable conversation key based on first message
|
|
151
|
+
conv_key = self._get_conversation_key(batch)
|
|
152
|
+
batch_key = f"{conv_key}_{batch_idx}"
|
|
153
|
+
last_count = self._last_message_counts.get(batch_key, 0)
|
|
154
|
+
|
|
155
|
+
# Only log new messages
|
|
156
|
+
new_messages = batch[last_count:]
|
|
157
|
+
if not new_messages:
|
|
158
|
+
logger.debug(
|
|
159
|
+
"Skipping duplicate log for batch %d (already logged %d messages)",
|
|
160
|
+
batch_idx,
|
|
161
|
+
last_count,
|
|
162
|
+
)
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
# Update count
|
|
166
|
+
self._last_message_counts[batch_key] = len(batch)
|
|
167
|
+
|
|
168
|
+
# Log with offset info
|
|
169
|
+
header = f"{title} (batch={batch_idx}, new={len(new_messages)}, total={len(batch)})"
|
|
170
|
+
|
|
171
|
+
# Format new messages with correct indices
|
|
172
|
+
lines = [LOG_SEPARATOR, header, LOG_SEPARATOR]
|
|
173
|
+
for idx, message in enumerate(new_messages, start=last_count):
|
|
174
|
+
lines.append(f"[{idx}] {message.__class__.__name__}")
|
|
175
|
+
lines.append(_pretty_json(_serialize_message(message)))
|
|
176
|
+
if idx < len(batch) - 1:
|
|
177
|
+
lines.append(LOG_SUBSECTION)
|
|
178
|
+
lines.append(LOG_SEPARATOR)
|
|
179
|
+
logger.info("%s", "\n".join(lines))
|
|
141
180
|
|
|
142
181
|
def on_chat_model_start(self, serialized, messages, **kwargs) -> None:
|
|
143
182
|
if not messages:
|
|
@@ -8,23 +8,24 @@ and middleware-specific prompts.
|
|
|
8
8
|
DEFAULT_SYSTEM_PROMPT = """You are an expert Python data scientist and Jupyter notebook assistant.
|
|
9
9
|
Your role is to help users with data analysis, visualization, and Python coding tasks in Jupyter notebooks. You can use only Korean
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
# Core Behavior
|
|
12
|
+
Be concise and direct. Answer in fewer than 4 lines unless the user asks for detail.
|
|
13
|
+
After working on a file, just stop - don't explain what you did unless asked.
|
|
14
|
+
Avoid unnecessary introductions or conclusions.
|
|
15
|
+
|
|
16
|
+
## Task Management
|
|
17
|
+
Use write_todos for complex multi-step tasks (3+ steps). Mark tasks in_progress before starting, completed immediately after finishing.
|
|
18
|
+
For simple 1-2 step tasks, just do them directly without todos.
|
|
19
|
+
When creating a todo list, ALWAYS include "작업 요약 및 다음단계 제시" as the LAST todo item.
|
|
12
20
|
|
|
13
21
|
You MUST ALWAYS call a tool in every response. After any tool result, you MUST:
|
|
14
22
|
1. Check your todo list - are there pending or in_progress items?
|
|
15
23
|
2. If YES → call the next appropriate tool (jupyter_cell_tool, markdown_tool, etc.)
|
|
16
|
-
3. When
|
|
17
|
-
{
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
"description": "<detailed description for the next step>"
|
|
22
|
-
}, ...
|
|
23
|
-
]
|
|
24
|
-
}
|
|
25
|
-
4. If ALL todos are completed → call final_answer_tool with a summary
|
|
26
|
-
|
|
27
|
-
NEVER end your turn without calling a tool. NEVER produce an empty response.
|
|
24
|
+
3. When executing "작업 요약 및 다음단계 제시" (final todo):
|
|
25
|
+
- Output this JSON as text content: {"summary": "실행된 작업 요약", "next_items": [{"subject": "제목", "description": "설명"}]}
|
|
26
|
+
- Suggest 3~5 next items
|
|
27
|
+
- AND call write_todos to mark all as 'completed' IN THE SAME RESPONSE
|
|
28
|
+
- Both content AND tool call must be in ONE response
|
|
28
29
|
|
|
29
30
|
## 🔴 MANDATORY: Resource Check Before Data Hanlding
|
|
30
31
|
**ALWAYS call check_resource_tool FIRST** when the task involves:
|
|
@@ -35,21 +36,46 @@ NEVER end your turn without calling a tool. NEVER produce an empty response.
|
|
|
35
36
|
## Mandatory Workflow
|
|
36
37
|
1. After EVERY tool result, immediately call the next tool
|
|
37
38
|
2. Continue until ALL todos show status: "completed"
|
|
38
|
-
3.
|
|
39
|
-
4.
|
|
40
|
-
5. For plots and charts, use English text only.
|
|
39
|
+
3. Only use jupyter_cell_tool for Python code or when the user explicitly asks to run in a notebook cell
|
|
40
|
+
4. For plots and charts, use English text only.
|
|
41
41
|
|
|
42
42
|
## ❌ FORBIDDEN (will break the workflow)
|
|
43
43
|
- Producing an empty response (no tool call, no content)
|
|
44
44
|
- Stopping after any tool without calling the next tool
|
|
45
|
-
- Ending without calling final_answer_tool
|
|
46
45
|
- Leaving todos in "in_progress" or "pending" state without continuing
|
|
47
46
|
|
|
48
|
-
##
|
|
49
|
-
**
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
-
|
|
47
|
+
## 📂 File Search Best Practices
|
|
48
|
+
**CRITICAL**: Use `execute_command_tool` with find/grep commands for file searching.
|
|
49
|
+
|
|
50
|
+
**To find files by NAME** (e.g., find titanic.csv):
|
|
51
|
+
- `execute_command_tool(command="find . -iname '*titanic*.csv' 2>/dev/null")`
|
|
52
|
+
- `execute_command_tool(command="find . -name '*.csv' 2>/dev/null")` - find all CSV files
|
|
53
|
+
|
|
54
|
+
**To search file CONTENTS** (e.g., find code containing "import pandas"):
|
|
55
|
+
- `execute_command_tool(command="grep -rn 'import pandas' --include='*.py' .")`
|
|
56
|
+
- `execute_command_tool(command="grep -rn 'def train_model' --include='*.py' --include='*.ipynb' .")`
|
|
57
|
+
|
|
58
|
+
## 📖 File Reading Best Practices
|
|
59
|
+
**CRITICAL**: When exploring codebases or reading files, use pagination to prevent context overflow.
|
|
60
|
+
|
|
61
|
+
**Pattern for codebase exploration:**
|
|
62
|
+
1. First scan: `read_file_tool(path, limit=100)` - See file structure and key sections
|
|
63
|
+
2. Targeted read: `read_file_tool(path, offset=100, limit=200)` - Read specific sections if needed
|
|
64
|
+
3. Full read: Only read without limit when necessary for immediate editing
|
|
65
|
+
|
|
66
|
+
**When to paginate (use offset/limit):**
|
|
67
|
+
- Reading any file >500 lines
|
|
68
|
+
- Exploring unfamiliar codebases (always start with limit=100)
|
|
69
|
+
- Reading multiple files in sequence
|
|
70
|
+
- Any research or investigation task
|
|
71
|
+
|
|
72
|
+
**When full read is OK:**
|
|
73
|
+
- Small files (<500 lines)
|
|
74
|
+
- Files you need to edit immediately after reading
|
|
75
|
+
- After confirming file size with first scan
|
|
76
|
+
|
|
77
|
+
## 🔧 Code Development
|
|
78
|
+
For code generation/refactoring, use LSP tools (diagnostics_tool, references_tool) to check errors and find symbol usages. Use multiedit_file_tool for multiple changes in one file.
|
|
53
79
|
"""
|
|
54
80
|
|
|
55
81
|
JSON_TOOL_SCHEMA = """You MUST respond with ONLY valid JSON matching this schema:
|
|
@@ -61,47 +87,60 @@ JSON_TOOL_SCHEMA = """You MUST respond with ONLY valid JSON matching this schema
|
|
|
61
87
|
Available tools:
|
|
62
88
|
- jupyter_cell_tool: Execute Python code. Arguments: {"code": "<python_code>"}
|
|
63
89
|
- markdown_tool: Add markdown cell. Arguments: {"content": "<markdown>"}
|
|
64
|
-
- final_answer_tool: Complete task. Arguments: {"answer": "<summary>"}
|
|
65
90
|
- write_todos: Update task list. Arguments: {"todos": [{"content": "...", "status": "pending|in_progress|completed"}]}
|
|
66
|
-
- read_file_tool: Read file. Arguments: {"path": "<file_path>"}
|
|
91
|
+
- read_file_tool: Read file with pagination. Arguments: {"path": "<file_path>", "offset": 0, "limit": 500}
|
|
67
92
|
- write_file_tool: Write file. Arguments: {"path": "<path>", "content": "<content>", "overwrite": false}
|
|
68
|
-
- list_files_tool: List directory. Arguments: {"path": ".", "recursive": false}
|
|
69
|
-
- search_workspace_tool: Search files. Arguments: {"pattern": "<regex>", "file_types": ["py"], "path": "."}
|
|
70
93
|
- search_notebook_cells_tool: Search notebook cells. Arguments: {"pattern": "<regex>"}
|
|
71
|
-
- execute_command_tool: Execute shell command. Arguments: {"command": "<command>", "stdin": "<input_for_prompts>"}
|
|
94
|
+
- execute_command_tool: Execute shell command. Use for file search with find/grep. Arguments: {"command": "<command>", "stdin": "<input_for_prompts>"}
|
|
72
95
|
- check_resource_tool: Check resources before data processing. Arguments: {"files": ["<path>"], "dataframes": ["<var_name>"]}
|
|
73
96
|
|
|
74
97
|
Output ONLY the JSON object, no markdown, no explanation."""
|
|
75
98
|
|
|
76
99
|
TODO_LIST_SYSTEM_PROMPT = """
|
|
77
100
|
## CRITICAL WORKFLOW RULES - MUST FOLLOW:
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
101
|
+
- NEVER stop after calling write_todos - ALWAYS make another tool call immediately (unless ALL todos are completed)
|
|
102
|
+
- For simple 1-2 step tasks, just do them directly without todos.
|
|
103
|
+
|
|
104
|
+
## 🔴 NEW USER MESSAGE = FRESH START:
|
|
105
|
+
- When user sends a NEW message, treat it as a COMPLETELY NEW TASK
|
|
106
|
+
- **CRITICAL**: Previous conversation history shows PAST work, NOT current work
|
|
107
|
+
- Even if you see a similar todo was "completed" before, you MUST do it again NOW
|
|
108
|
+
- The completion status in chat history is IRRELEVANT to current todos
|
|
109
|
+
- Each todo in the CURRENT list must be executed from scratch, regardless of history
|
|
110
|
+
|
|
111
|
+
## 🔴 MANDATORY Todo List Structure:
|
|
112
|
+
When creating todos, ALWAYS include "작업 요약 및 다음단계 제시" as the LAST item:
|
|
113
|
+
- 실제 작업 항목들...
|
|
114
|
+
- 작업 요약 및 다음단계 제시 ← 반드시 마지막에 포함!
|
|
81
115
|
|
|
82
116
|
## Todo List Management:
|
|
117
|
+
- 반드시 모든 todo item 은 한국어로 생성.
|
|
83
118
|
- Before complex tasks, use write_todos to create a task list
|
|
84
119
|
- Update todos as you complete each step (mark 'in_progress' → 'completed')
|
|
85
|
-
- Each todo item should be specific and descriptive
|
|
86
|
-
- All todo items must be written in Korean
|
|
87
|
-
- ALWAYS include "다음 단계 제시" as the LAST item
|
|
120
|
+
- Each todo item should be specific and descriptive
|
|
88
121
|
|
|
89
122
|
## Task Completion Flow:
|
|
90
|
-
1.
|
|
91
|
-
2.
|
|
92
|
-
3. For "
|
|
93
|
-
4.
|
|
123
|
+
1. Check CURRENT todo list status (not chat history!) → find 'pending' or 'in_progress' item
|
|
124
|
+
2. Execute the task yourself in THIS response → then mark 'completed'
|
|
125
|
+
3. For "작업 요약 및 다음단계 제시" → output summary JSON as plain text, then mark completed
|
|
126
|
+
4. When ALL todos are 'completed' → session ends automatically
|
|
127
|
+
|
|
128
|
+
⚠️ HOW TO CHECK IF TODO IS DONE:
|
|
129
|
+
- ✅ Done: You executed it in THIS response (you made the tool call, you got the result)
|
|
130
|
+
- ❌ Not done: You only SEE it was done in previous messages (that's history, not your work)
|
|
94
131
|
|
|
95
132
|
## FORBIDDEN PATTERNS:
|
|
96
|
-
❌ Calling write_todos and then stopping
|
|
97
133
|
❌ Updating todo status without doing the actual work
|
|
98
|
-
❌
|
|
134
|
+
❌ Marking a todo as 'completed' without actually executing it in THIS response
|
|
135
|
+
❌ Creating todo list WITHOUT "작업 요약 및 다음단계 제시" as final item
|
|
136
|
+
❌ Skipping ANY todo because a similar one was completed in PREVIOUS messages - past history ≠ current work
|
|
137
|
+
❌ Assuming work is done based on chat history - you must ALWAYS execute todos yourself
|
|
99
138
|
"""
|
|
100
139
|
|
|
101
140
|
TODO_LIST_TOOL_DESCRIPTION = """Update the task list for tracking progress.
|
|
102
141
|
⚠️ CRITICAL: This tool is ONLY for tracking - it does NOT do any actual work.
|
|
103
|
-
|
|
104
|
-
|
|
142
|
+
- If there are still pending/in_progress todos: call the next action tool immediately after
|
|
143
|
+
- If ALL todos are 'completed': output summary text BEFORE calling this tool, then call this to end the session"""
|
|
105
144
|
|
|
106
145
|
# Non-HITL tools that execute immediately without user approval
|
|
107
146
|
NON_HITL_TOOLS = {
|
|
@@ -109,11 +148,12 @@ NON_HITL_TOOLS = {
|
|
|
109
148
|
"markdown",
|
|
110
149
|
"read_file_tool",
|
|
111
150
|
"read_file",
|
|
112
|
-
"list_files_tool",
|
|
113
|
-
"list_files",
|
|
114
|
-
"search_workspace_tool",
|
|
115
|
-
"search_workspace",
|
|
116
151
|
"search_notebook_cells_tool",
|
|
117
152
|
"search_notebook_cells",
|
|
118
153
|
"write_todos",
|
|
154
|
+
# LSP tools (read-only)
|
|
155
|
+
"diagnostics_tool",
|
|
156
|
+
"diagnostics",
|
|
157
|
+
"references_tool",
|
|
158
|
+
"references",
|
|
119
159
|
}
|
|
@@ -4,42 +4,46 @@ LangChain Tools for Jupyter Agent
|
|
|
4
4
|
Tools available:
|
|
5
5
|
- jupyter_cell: Execute Python code in notebook
|
|
6
6
|
- markdown: Add markdown cell
|
|
7
|
-
- final_answer: Complete the task
|
|
8
7
|
- read_file: Read file content
|
|
9
8
|
- write_file: Write file content
|
|
10
|
-
-
|
|
11
|
-
- search_workspace: Search files in workspace
|
|
9
|
+
- edit_file: Edit file with string replacement
|
|
12
10
|
- search_notebook_cells: Search cells in notebooks
|
|
13
|
-
- execute_command_tool: Run shell commands (client-executed)
|
|
11
|
+
- execute_command_tool: Run shell commands (client-executed, also for file search)
|
|
14
12
|
- check_resource_tool: Check resources before data processing (client-executed)
|
|
13
|
+
- diagnostics_tool: Get LSP diagnostics (errors, warnings)
|
|
14
|
+
- references_tool: Find symbol references via LSP
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
17
|
from agent_server.langchain.tools.file_tools import (
|
|
18
|
-
|
|
18
|
+
edit_file_tool,
|
|
19
|
+
multiedit_file_tool,
|
|
19
20
|
read_file_tool,
|
|
20
21
|
write_file_tool,
|
|
21
22
|
)
|
|
22
23
|
from agent_server.langchain.tools.jupyter_tools import (
|
|
23
|
-
final_answer_tool,
|
|
24
24
|
jupyter_cell_tool,
|
|
25
25
|
markdown_tool,
|
|
26
26
|
)
|
|
27
|
+
from agent_server.langchain.tools.lsp_tools import (
|
|
28
|
+
diagnostics_tool,
|
|
29
|
+
references_tool,
|
|
30
|
+
)
|
|
27
31
|
from agent_server.langchain.tools.resource_tools import check_resource_tool
|
|
28
32
|
from agent_server.langchain.tools.search_tools import (
|
|
29
33
|
search_notebook_cells_tool,
|
|
30
|
-
search_workspace_tool,
|
|
31
34
|
)
|
|
32
35
|
from agent_server.langchain.tools.shell_tools import execute_command_tool
|
|
33
36
|
|
|
34
37
|
__all__ = [
|
|
35
38
|
"jupyter_cell_tool",
|
|
36
39
|
"markdown_tool",
|
|
37
|
-
"final_answer_tool",
|
|
38
40
|
"read_file_tool",
|
|
39
41
|
"write_file_tool",
|
|
40
|
-
"
|
|
41
|
-
"
|
|
42
|
+
"edit_file_tool",
|
|
43
|
+
"multiedit_file_tool",
|
|
42
44
|
"search_notebook_cells_tool",
|
|
43
45
|
"execute_command_tool",
|
|
44
46
|
"check_resource_tool",
|
|
47
|
+
"diagnostics_tool",
|
|
48
|
+
"references_tool",
|
|
45
49
|
]
|