hdsp-jupyter-extension 2.0.19__py3-none-any.whl → 2.0.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_server/langchain/agent_prompts/planner_prompt.py +3 -0
- agent_server/langchain/custom_middleware.py +0 -17
- agent_server/langchain/llm_factory.py +45 -5
- agent_server/langchain/logging_utils.py +108 -30
- agent_server/langchain/middleware/subagent_middleware.py +80 -11
- agent_server/langchain/models/__init__.py +5 -0
- agent_server/langchain/models/gpt_oss_chat.py +351 -0
- agent_server/langchain/prompts.py +1 -0
- agent_server/routers/langchain_agent.py +10 -0
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +2 -2
- jupyter_ext/labextension/static/lib_index_js.1917fbaea37d75dc69b3.js → hdsp_jupyter_extension-2.0.21.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.90f80cb80187de8c5ae5.js +71 -6
- hdsp_jupyter_extension-2.0.21.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.90f80cb80187de8c5ae5.js.map +1 -0
- hdsp_jupyter_extension-2.0.19.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.d686ab71eb65b5ef8f15.js → hdsp_jupyter_extension-2.0.21.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.93b1c499786ecd47b837.js +3 -3
- hdsp_jupyter_extension-2.0.19.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.d686ab71eb65b5ef8f15.js.map → hdsp_jupyter_extension-2.0.21.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.93b1c499786ecd47b837.js.map +1 -1
- {hdsp_jupyter_extension-2.0.19.dist-info → hdsp_jupyter_extension-2.0.21.dist-info}/METADATA +1 -1
- {hdsp_jupyter_extension-2.0.19.dist-info → hdsp_jupyter_extension-2.0.21.dist-info}/RECORD +47 -45
- jupyter_ext/_version.py +1 -1
- jupyter_ext/labextension/build_log.json +1 -1
- jupyter_ext/labextension/package.json +2 -2
- hdsp_jupyter_extension-2.0.19.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.1917fbaea37d75dc69b3.js → jupyter_ext/labextension/static/lib_index_js.90f80cb80187de8c5ae5.js +71 -6
- jupyter_ext/labextension/static/lib_index_js.90f80cb80187de8c5ae5.js.map +1 -0
- jupyter_ext/labextension/static/{remoteEntry.d686ab71eb65b5ef8f15.js → remoteEntry.93b1c499786ecd47b837.js} +3 -3
- jupyter_ext/labextension/static/{remoteEntry.d686ab71eb65b5ef8f15.js.map → remoteEntry.93b1c499786ecd47b837.js.map} +1 -1
- hdsp_jupyter_extension-2.0.19.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.1917fbaea37d75dc69b3.js.map +0 -1
- jupyter_ext/labextension/static/lib_index_js.1917fbaea37d75dc69b3.js.map +0 -1
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.96745acc14125453fba8.js +0 -0
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.96745acc14125453fba8.js.map +0 -0
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +0 -0
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +0 -0
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +0 -0
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +0 -0
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +0 -0
- {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +0 -0
- {hdsp_jupyter_extension-2.0.19.dist-info → hdsp_jupyter_extension-2.0.21.dist-info}/WHEEL +0 -0
- {hdsp_jupyter_extension-2.0.19.dist-info → hdsp_jupyter_extension-2.0.21.dist-info}/licenses/LICENSE +0 -0
|
@@ -40,6 +40,9 @@ PLANNER_SYSTEM_PROMPT = """당신은 작업을 조율하는 Main Agent입니다.
|
|
|
40
40
|
|
|
41
41
|
**중요**: task_tool 결과를 받은 후 바로 write_todos로 완료 처리하지 말고, 반드시 위 도구로 결과를 먼저 적용!
|
|
42
42
|
|
|
43
|
+
**🔴 KeyboardInterrupt 발생 시**: jupyter_cell_tool 실행 중 KeyboardInterrupt가 발생하면 ask_user_tool로 중단 사유를 사용자에게 확인
|
|
44
|
+
- 예: ask_user_tool(question="코드 실행이 중단되었습니다. 중단 사유를 알려주시면 다음 진행에 참고하겠습니다.", input_type="text")
|
|
45
|
+
|
|
43
46
|
# write_todos 규칙 [필수]
|
|
44
47
|
- 한국어로 작성
|
|
45
48
|
- **🔴 기존 todo 절대 삭제 금지**: 전체 리스트를 항상 포함하고 status만 변경
|
|
@@ -1053,23 +1053,6 @@ def create_normalize_tool_args_middleware(wrap_model_call, tools=None):
|
|
|
1053
1053
|
break
|
|
1054
1054
|
break
|
|
1055
1055
|
|
|
1056
|
-
# Clean AIMessage content when write_todos is called
|
|
1057
|
-
# Remove redundant todos JSON from content (keep summary JSON)
|
|
1058
|
-
if tool_name == "write_todos":
|
|
1059
|
-
msg_content = getattr(msg, "content", "") or ""
|
|
1060
|
-
if msg_content and '"todos"' in msg_content:
|
|
1061
|
-
# Keep content only if it's summary JSON
|
|
1062
|
-
is_summary_json = (
|
|
1063
|
-
'"summary"' in msg_content
|
|
1064
|
-
and '"next_items"' in msg_content
|
|
1065
|
-
)
|
|
1066
|
-
if not is_summary_json:
|
|
1067
|
-
# Clear redundant todos content
|
|
1068
|
-
msg.content = ""
|
|
1069
|
-
logger.info(
|
|
1070
|
-
"Cleared redundant todos JSON from AIMessage content (write_todos tool_call exists)"
|
|
1071
|
-
)
|
|
1072
|
-
|
|
1073
1056
|
return response
|
|
1074
1057
|
|
|
1075
1058
|
return normalize_tool_args
|
|
@@ -97,16 +97,44 @@ def _create_vllm_llm(llm_config: Dict[str, Any], callbacks):
|
|
|
97
97
|
endpoint = vllm_config.get("endpoint", "http://localhost:8000/v1")
|
|
98
98
|
model = vllm_config.get("model", "default")
|
|
99
99
|
api_key = vllm_config.get("apiKey", "dummy")
|
|
100
|
+
use_responses_api = vllm_config.get("useResponsesApi", False)
|
|
101
|
+
temperature = vllm_config.get("temperature", 0.0)
|
|
100
102
|
|
|
101
|
-
logger.info(
|
|
103
|
+
logger.info(
|
|
104
|
+
f"Creating vLLM LLM with model: {model}, endpoint: {endpoint}, "
|
|
105
|
+
f"use_responses_api: {use_responses_api}, temperature: {temperature}"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Use ChatGPTOSS for gpt-oss models (Harmony format with developer role)
|
|
109
|
+
# NOTE: OpenRouter doesn't support 'developer' role - only use for direct gpt-oss endpoints
|
|
110
|
+
is_openrouter = "openrouter" in endpoint.lower()
|
|
111
|
+
if "gpt-oss" in model.lower() and not is_openrouter:
|
|
112
|
+
from agent_server.langchain.models import ChatGPTOSS
|
|
113
|
+
|
|
114
|
+
logger.info("Using ChatGPTOSS for gpt-oss model (developer role support)")
|
|
115
|
+
return ChatGPTOSS(
|
|
116
|
+
model=model,
|
|
117
|
+
base_url=endpoint,
|
|
118
|
+
api_key=api_key,
|
|
119
|
+
temperature=temperature,
|
|
120
|
+
max_tokens=8192,
|
|
121
|
+
streaming=False,
|
|
122
|
+
callbacks=callbacks,
|
|
123
|
+
)
|
|
124
|
+
elif "gpt-oss" in model.lower() and is_openrouter:
|
|
125
|
+
logger.warning(
|
|
126
|
+
"gpt-oss model via OpenRouter - using standard ChatOpenAI "
|
|
127
|
+
"(developer role not supported by OpenRouter)"
|
|
128
|
+
)
|
|
102
129
|
|
|
103
130
|
return ChatOpenAI(
|
|
104
131
|
model=model,
|
|
105
132
|
api_key=api_key,
|
|
106
133
|
base_url=endpoint, # Use endpoint as-is (no /v1 suffix added)
|
|
107
134
|
streaming=False, # Agent mode: disable LLM streaming (SSE handled by agent server)
|
|
108
|
-
temperature=
|
|
109
|
-
max_tokens=
|
|
135
|
+
temperature=temperature,
|
|
136
|
+
max_tokens=8192,
|
|
137
|
+
use_responses_api=use_responses_api, # Use /v1/responses endpoint if True
|
|
110
138
|
callbacks=callbacks,
|
|
111
139
|
)
|
|
112
140
|
|
|
@@ -148,14 +176,26 @@ def create_summarization_llm(llm_config: Dict[str, Any]):
|
|
|
148
176
|
temperature=0.0,
|
|
149
177
|
)
|
|
150
178
|
elif provider == "vllm":
|
|
151
|
-
from langchain_openai import ChatOpenAI
|
|
152
|
-
|
|
153
179
|
vllm_config = llm_config.get("vllm", {})
|
|
154
180
|
# User provides full base URL (e.g., https://openrouter.ai/api/v1)
|
|
155
181
|
endpoint = vllm_config.get("endpoint", "http://localhost:8000/v1")
|
|
156
182
|
model = vllm_config.get("model", "default")
|
|
157
183
|
api_key = vllm_config.get("apiKey", "dummy")
|
|
158
184
|
|
|
185
|
+
# Use ChatGPTOSS for gpt-oss models (but not via OpenRouter)
|
|
186
|
+
is_openrouter = "openrouter" in endpoint.lower()
|
|
187
|
+
if "gpt-oss" in model.lower() and not is_openrouter:
|
|
188
|
+
from agent_server.langchain.models import ChatGPTOSS
|
|
189
|
+
|
|
190
|
+
return ChatGPTOSS(
|
|
191
|
+
model=model,
|
|
192
|
+
base_url=endpoint,
|
|
193
|
+
api_key=api_key,
|
|
194
|
+
temperature=0.0,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
from langchain_openai import ChatOpenAI
|
|
198
|
+
|
|
159
199
|
return ChatOpenAI(
|
|
160
200
|
model=model,
|
|
161
201
|
api_key=api_key,
|
|
@@ -14,8 +14,40 @@ from langchain_core.callbacks import BaseCallbackHandler
|
|
|
14
14
|
|
|
15
15
|
logger = logging.getLogger(__name__)
|
|
16
16
|
|
|
17
|
+
# Dedicated logger for LLM responses - always enabled with its own handler
|
|
18
|
+
llm_response_logger = logging.getLogger("agent_server.llm_response")
|
|
19
|
+
llm_response_logger.setLevel(logging.INFO)
|
|
20
|
+
llm_response_logger.propagate = True # Propagate to root logger
|
|
21
|
+
|
|
22
|
+
# Ensure it has a handler if running standalone
|
|
23
|
+
if not llm_response_logger.handlers and not logging.getLogger().handlers:
|
|
24
|
+
_handler = logging.StreamHandler()
|
|
25
|
+
_handler.setFormatter(logging.Formatter("%(message)s"))
|
|
26
|
+
llm_response_logger.addHandler(_handler)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def disable_langchain_logging():
|
|
30
|
+
"""Disable all langchain logging except LLM responses."""
|
|
31
|
+
# Set all langchain loggers to CRITICAL
|
|
32
|
+
for name in list(logging.Logger.manager.loggerDict.keys()):
|
|
33
|
+
if "langchain" in name.lower() or name.startswith("agent_server.langchain"):
|
|
34
|
+
logging.getLogger(name).setLevel(logging.CRITICAL)
|
|
35
|
+
# Keep LLM response logger at INFO
|
|
36
|
+
llm_response_logger.setLevel(logging.INFO)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# Auto-disable on import (comment this line to re-enable all logs)
|
|
40
|
+
disable_langchain_logging()
|
|
41
|
+
|
|
17
42
|
LOG_SEPARATOR = "=" * 96
|
|
18
43
|
LOG_SUBSECTION = "-" * 96
|
|
44
|
+
LOG_EMOJI_LINE = "🔵" * 48
|
|
45
|
+
LOG_REQUEST_START = f"\n\n{'🟢' * 48}\n{'=' * 96}\n 📤 LLM REQUEST START\n{'=' * 96}"
|
|
46
|
+
LOG_REQUEST_END = f"{'=' * 96}\n 📤 LLM REQUEST END\n{'=' * 96}\n{'🟢' * 48}\n"
|
|
47
|
+
LOG_RESPONSE_START = (
|
|
48
|
+
f"\n\n{LOG_EMOJI_LINE}\n{'=' * 96}\n ✨ LLM RESPONSE START\n{'=' * 96}"
|
|
49
|
+
)
|
|
50
|
+
LOG_RESPONSE_END = f"{'=' * 96}\n ✅ LLM RESPONSE END\n{'=' * 96}\n{LOG_EMOJI_LINE}\n"
|
|
19
51
|
|
|
20
52
|
|
|
21
53
|
def _format_system_prompt_for_log(messages) -> tuple[int, int, str]:
|
|
@@ -179,45 +211,91 @@ class LLMTraceLogger(BaseCallbackHandler):
|
|
|
179
211
|
logger.info("%s", "\n".join(lines))
|
|
180
212
|
|
|
181
213
|
def on_chat_model_start(self, serialized, messages, **kwargs) -> None:
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
214
|
+
"""Log LLM request messages as raw structured JSON."""
|
|
215
|
+
print(LOG_REQUEST_START, flush=True)
|
|
216
|
+
|
|
217
|
+
# Build raw structured request data
|
|
218
|
+
request_data = {
|
|
219
|
+
"model": serialized.get("name", "unknown") if serialized else "unknown",
|
|
220
|
+
"kwargs": {k: str(v)[:200] for k, v in kwargs.items() if k != "messages"},
|
|
221
|
+
"messages": [],
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
for batch in self._normalize_batches(messages):
|
|
225
|
+
batch_messages = []
|
|
226
|
+
for msg in batch:
|
|
227
|
+
batch_messages.append(_serialize_message(msg))
|
|
228
|
+
request_data["messages"].append(batch_messages)
|
|
229
|
+
|
|
230
|
+
# Output beautified JSON
|
|
231
|
+
print(_pretty_json(request_data), flush=True)
|
|
232
|
+
|
|
233
|
+
print(LOG_REQUEST_END, flush=True)
|
|
234
|
+
|
|
235
|
+
# --- OLD TEXT-PARSED LOGGING (commented out) ---
|
|
236
|
+
# for batch_idx, batch in enumerate(self._normalize_batches(messages)):
|
|
237
|
+
# msg_types = {}
|
|
238
|
+
# for msg in batch:
|
|
239
|
+
# msg_type = msg.__class__.__name__
|
|
240
|
+
# msg_types[msg_type] = msg_types.get(msg_type, 0) + 1
|
|
241
|
+
# print(f"\nBatch {batch_idx}: {len(batch)} messages - {msg_types}", flush=True)
|
|
242
|
+
# recent_count = min(5, len(batch))
|
|
243
|
+
# if len(batch) > recent_count:
|
|
244
|
+
# print(f"... ({len(batch) - recent_count} earlier messages omitted)", flush=True)
|
|
245
|
+
# for idx, message in enumerate(batch[-recent_count:], start=len(batch) - recent_count):
|
|
246
|
+
# lines = [LOG_SUBSECTION]
|
|
247
|
+
# lines.append(f"[{idx}] {message.__class__.__name__}")
|
|
248
|
+
# lines.append(_pretty_json(_serialize_message(message)))
|
|
249
|
+
# print("\n".join(lines), flush=True)
|
|
189
250
|
|
|
190
251
|
def on_chat_model_end(self, response, **kwargs) -> None:
|
|
252
|
+
"""Log LLM response as raw structured JSON."""
|
|
253
|
+
print(LOG_RESPONSE_START, flush=True)
|
|
254
|
+
|
|
255
|
+
# Build raw structured response data
|
|
256
|
+
response_data = {
|
|
257
|
+
"llm_output": getattr(response, "llm_output", None),
|
|
258
|
+
"generations": [],
|
|
259
|
+
}
|
|
260
|
+
|
|
191
261
|
generations = getattr(response, "generations", None) or []
|
|
192
262
|
if generations and isinstance(generations[0], list):
|
|
193
263
|
batches = generations
|
|
194
264
|
else:
|
|
195
265
|
batches = [generations]
|
|
196
266
|
|
|
197
|
-
for
|
|
198
|
-
|
|
267
|
+
for batch in batches:
|
|
268
|
+
batch_data = []
|
|
269
|
+
for generation in batch:
|
|
270
|
+
gen_data = {}
|
|
199
271
|
message = getattr(generation, "message", None)
|
|
200
|
-
if
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
272
|
+
if message:
|
|
273
|
+
gen_data["message"] = _serialize_message(message)
|
|
274
|
+
gen_data["text"] = getattr(generation, "text", None)
|
|
275
|
+
gen_data["generation_info"] = getattr(
|
|
276
|
+
generation, "generation_info", None
|
|
205
277
|
)
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
278
|
+
batch_data.append(gen_data)
|
|
279
|
+
response_data["generations"].append(batch_data)
|
|
280
|
+
|
|
281
|
+
# Output beautified JSON
|
|
282
|
+
print(_pretty_json(response_data), flush=True)
|
|
283
|
+
|
|
284
|
+
print(LOG_RESPONSE_END, flush=True)
|
|
285
|
+
|
|
286
|
+
# --- OLD TEXT-PARSED LOGGING (commented out) ---
|
|
287
|
+
# for batch_idx, batch in enumerate(batches):
|
|
288
|
+
# for gen_idx, generation in enumerate(batch):
|
|
289
|
+
# message = getattr(generation, "message", None)
|
|
290
|
+
# if not message:
|
|
291
|
+
# continue
|
|
292
|
+
# title = f"LLM -> AGENT RESPONSE (batch={batch_idx}, generation={gen_idx})"
|
|
293
|
+
# print(_format_messages_block(title, [message]), flush=True)
|
|
294
|
+
# tool_calls = getattr(message, "tool_calls", None)
|
|
295
|
+
# if tool_calls:
|
|
296
|
+
# tool_title = f"LLM -> AGENT TOOL CALLS (batch={batch_idx}, generation={gen_idx})"
|
|
297
|
+
# print(_format_json_block(tool_title, tool_calls), flush=True)
|
|
215
298
|
|
|
216
299
|
def on_llm_start(self, serialized, prompts, **kwargs) -> None:
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
return
|
|
220
|
-
|
|
221
|
-
for idx, prompt in enumerate(prompts):
|
|
222
|
-
title = f"LLM PROMPT (batch={idx}, length={len(prompt)})"
|
|
223
|
-
logger.info("%s", _format_json_block(title, prompt))
|
|
300
|
+
# Request logging disabled - only log responses
|
|
301
|
+
pass
|
|
@@ -9,8 +9,11 @@ Key features:
|
|
|
9
9
|
- Context isolation: subagents run in clean context
|
|
10
10
|
- Synchronous execution: subagent returns result directly to caller
|
|
11
11
|
- Nested subagent support: python_developer can call athena_query
|
|
12
|
+
- Subagent caching: compiled agents are cached to avoid recompilation overhead
|
|
12
13
|
"""
|
|
13
14
|
|
|
15
|
+
import hashlib
|
|
16
|
+
import json
|
|
14
17
|
import logging
|
|
15
18
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
|
16
19
|
|
|
@@ -25,6 +28,8 @@ logger = logging.getLogger(__name__)
|
|
|
25
28
|
# Global registry for subagent factories (set by AgentFactory)
|
|
26
29
|
_subagent_factory = None
|
|
27
30
|
_current_llm_config = None
|
|
31
|
+
# Subagent cache: key = "{agent_name}_{config_hash}" -> compiled agent
|
|
32
|
+
_subagent_cache: Dict[str, Any] = {}
|
|
28
33
|
|
|
29
34
|
|
|
30
35
|
def set_subagent_factory(factory_func, llm_config: Dict[str, Any]):
|
|
@@ -32,10 +37,12 @@ def set_subagent_factory(factory_func, llm_config: Dict[str, Any]):
|
|
|
32
37
|
Set the subagent factory function.
|
|
33
38
|
Called by AgentFactory during initialization.
|
|
34
39
|
"""
|
|
35
|
-
global _subagent_factory, _current_llm_config
|
|
40
|
+
global _subagent_factory, _current_llm_config, _subagent_cache
|
|
36
41
|
_subagent_factory = factory_func
|
|
37
42
|
_current_llm_config = llm_config
|
|
38
|
-
|
|
43
|
+
# Clear cache when factory changes (new LLM config)
|
|
44
|
+
_subagent_cache.clear()
|
|
45
|
+
logger.info("SubAgentMiddleware factory initialized (cache cleared)")
|
|
39
46
|
|
|
40
47
|
|
|
41
48
|
def get_subagent_factory():
|
|
@@ -43,6 +50,48 @@ def get_subagent_factory():
|
|
|
43
50
|
return _subagent_factory, _current_llm_config
|
|
44
51
|
|
|
45
52
|
|
|
53
|
+
def _get_config_hash(llm_config: Dict[str, Any]) -> str:
|
|
54
|
+
"""Generate a hash of llm_config for caching."""
|
|
55
|
+
config_str = json.dumps(llm_config, sort_keys=True, default=str)
|
|
56
|
+
return hashlib.md5(config_str.encode()).hexdigest()[:12]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def get_or_create_subagent(
|
|
60
|
+
agent_name: str, factory_func, llm_config: Dict[str, Any]
|
|
61
|
+
) -> Any:
|
|
62
|
+
"""
|
|
63
|
+
Get cached subagent or create new one.
|
|
64
|
+
|
|
65
|
+
Caching avoids expensive recompilation of LangGraph agents.
|
|
66
|
+
Cache key = "{agent_name}_{config_hash}" to handle different LLM configs.
|
|
67
|
+
"""
|
|
68
|
+
global _subagent_cache
|
|
69
|
+
|
|
70
|
+
config_hash = _get_config_hash(llm_config)
|
|
71
|
+
cache_key = f"{agent_name}_{config_hash}"
|
|
72
|
+
|
|
73
|
+
if cache_key in _subagent_cache:
|
|
74
|
+
logger.info(f"Using cached subagent '{agent_name}' (key={cache_key})")
|
|
75
|
+
return _subagent_cache[cache_key]
|
|
76
|
+
|
|
77
|
+
logger.info(f"Creating new subagent '{agent_name}' (key={cache_key})...")
|
|
78
|
+
subagent = factory_func(agent_name, llm_config)
|
|
79
|
+
_subagent_cache[cache_key] = subagent
|
|
80
|
+
logger.info(
|
|
81
|
+
f"Cached subagent '{agent_name}' (total cached: {len(_subagent_cache)})"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
return subagent
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def clear_subagent_cache():
|
|
88
|
+
"""Clear the subagent cache. Useful for testing or config changes."""
|
|
89
|
+
global _subagent_cache
|
|
90
|
+
count = len(_subagent_cache)
|
|
91
|
+
_subagent_cache.clear()
|
|
92
|
+
logger.info(f"Subagent cache cleared ({count} entries removed)")
|
|
93
|
+
|
|
94
|
+
|
|
46
95
|
def create_task_tool(
|
|
47
96
|
caller_name: str,
|
|
48
97
|
allowed_subagents: Optional[List[str]] = None,
|
|
@@ -96,11 +145,13 @@ def create_task_tool(
|
|
|
96
145
|
)
|
|
97
146
|
context: Optional[str] = Field(
|
|
98
147
|
default=None,
|
|
99
|
-
description="Additional context for the subagent: resource info (file sizes, memory), previous code, variable state, etc."
|
|
148
|
+
description="Additional context for the subagent: resource info (file sizes, memory), previous code, variable state, etc.",
|
|
100
149
|
)
|
|
101
150
|
|
|
102
151
|
@tool(args_schema=TaskInput)
|
|
103
|
-
def task_tool(
|
|
152
|
+
def task_tool(
|
|
153
|
+
agent_name: str, description: str, context: Optional[str] = None
|
|
154
|
+
) -> str:
|
|
104
155
|
"""
|
|
105
156
|
Delegate a task to a specialized subagent.
|
|
106
157
|
|
|
@@ -133,10 +184,10 @@ def create_task_tool(
|
|
|
133
184
|
|
|
134
185
|
# Import subagent event emitters
|
|
135
186
|
from agent_server.langchain.middleware.subagent_events import (
|
|
136
|
-
|
|
187
|
+
clear_current_subagent,
|
|
137
188
|
emit_subagent_complete,
|
|
189
|
+
emit_subagent_start,
|
|
138
190
|
set_current_subagent,
|
|
139
|
-
clear_current_subagent,
|
|
140
191
|
)
|
|
141
192
|
|
|
142
193
|
# Emit subagent start event for UI
|
|
@@ -148,11 +199,17 @@ def create_task_tool(
|
|
|
148
199
|
return "Error: SubAgentMiddleware not initialized. Call set_subagent_factory first."
|
|
149
200
|
|
|
150
201
|
try:
|
|
202
|
+
import time
|
|
203
|
+
|
|
151
204
|
# Set current subagent context for tool call tracking
|
|
152
205
|
set_current_subagent(agent_name)
|
|
153
206
|
|
|
154
|
-
#
|
|
155
|
-
|
|
207
|
+
# Get or create the subagent (cached for performance)
|
|
208
|
+
# Avoids expensive LangGraph recompilation on each call
|
|
209
|
+
t0 = time.time()
|
|
210
|
+
subagent = get_or_create_subagent(agent_name, factory_func, llm_config)
|
|
211
|
+
t1 = time.time()
|
|
212
|
+
logger.info(f"[TIMING] get_or_create_subagent took {t1-t0:.2f}s")
|
|
156
213
|
|
|
157
214
|
# Execute subagent synchronously with clean context
|
|
158
215
|
# The subagent runs in isolation, receiving task description + optional context
|
|
@@ -169,15 +226,18 @@ def create_task_tool(
|
|
|
169
226
|
enhanced_context = context
|
|
170
227
|
if agent_name == "python_developer":
|
|
171
228
|
try:
|
|
229
|
+
t2 = time.time()
|
|
172
230
|
from agent_server.langchain.middleware.code_history_middleware import (
|
|
173
|
-
get_context_with_history,
|
|
174
231
|
get_code_history_tracker,
|
|
232
|
+
get_context_with_history,
|
|
175
233
|
)
|
|
234
|
+
|
|
176
235
|
tracker = get_code_history_tracker()
|
|
177
236
|
if tracker.get_entry_count() > 0:
|
|
178
237
|
enhanced_context = get_context_with_history(context)
|
|
238
|
+
t3 = time.time()
|
|
179
239
|
logger.info(
|
|
180
|
-
f"[
|
|
240
|
+
f"[TIMING] code history injection took {t3-t2:.2f}s "
|
|
181
241
|
f"(entries={tracker.get_entry_count()}, "
|
|
182
242
|
f"context_len={len(enhanced_context) if enhanced_context else 0})"
|
|
183
243
|
)
|
|
@@ -194,13 +254,21 @@ def create_task_tool(
|
|
|
194
254
|
else:
|
|
195
255
|
message_content = description
|
|
196
256
|
|
|
197
|
-
logger.info(
|
|
257
|
+
logger.info(
|
|
258
|
+
f"[{caller_name}] Subagent message length: {len(message_content)}"
|
|
259
|
+
)
|
|
198
260
|
|
|
199
261
|
# Execute the subagent
|
|
262
|
+
t_invoke_start = time.time()
|
|
263
|
+
logger.info(f"[TIMING] About to invoke subagent '{agent_name}'...")
|
|
200
264
|
result = subagent.invoke(
|
|
201
265
|
{"messages": [{"role": "user", "content": message_content}]},
|
|
202
266
|
config=subagent_config,
|
|
203
267
|
)
|
|
268
|
+
t_invoke_end = time.time()
|
|
269
|
+
logger.info(
|
|
270
|
+
f"[TIMING] subagent.invoke() took {t_invoke_end-t_invoke_start:.2f}s"
|
|
271
|
+
)
|
|
204
272
|
|
|
205
273
|
# Extract the final message from the result
|
|
206
274
|
messages = result.get("messages", [])
|
|
@@ -223,6 +291,7 @@ def create_task_tool(
|
|
|
223
291
|
from agent_server.langchain.middleware.description_injector import (
|
|
224
292
|
process_task_tool_response,
|
|
225
293
|
)
|
|
294
|
+
|
|
226
295
|
process_task_tool_response(agent_name, str(response))
|
|
227
296
|
except Exception as e:
|
|
228
297
|
logger.warning(f"Failed to extract description: {e}")
|