hdsp-jupyter-extension 2.0.19__py3-none-any.whl → 2.0.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. agent_server/langchain/agent_prompts/planner_prompt.py +3 -0
  2. agent_server/langchain/custom_middleware.py +0 -17
  3. agent_server/langchain/llm_factory.py +45 -5
  4. agent_server/langchain/logging_utils.py +108 -30
  5. agent_server/langchain/middleware/subagent_middleware.py +80 -11
  6. agent_server/langchain/models/__init__.py +5 -0
  7. agent_server/langchain/models/gpt_oss_chat.py +351 -0
  8. agent_server/langchain/prompts.py +1 -0
  9. agent_server/routers/langchain_agent.py +10 -0
  10. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
  11. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +2 -2
  12. jupyter_ext/labextension/static/lib_index_js.1917fbaea37d75dc69b3.js → hdsp_jupyter_extension-2.0.21.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.90f80cb80187de8c5ae5.js +71 -6
  13. hdsp_jupyter_extension-2.0.21.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.90f80cb80187de8c5ae5.js.map +1 -0
  14. hdsp_jupyter_extension-2.0.19.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.d686ab71eb65b5ef8f15.js → hdsp_jupyter_extension-2.0.21.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.93b1c499786ecd47b837.js +3 -3
  15. hdsp_jupyter_extension-2.0.19.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.d686ab71eb65b5ef8f15.js.map → hdsp_jupyter_extension-2.0.21.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.93b1c499786ecd47b837.js.map +1 -1
  16. {hdsp_jupyter_extension-2.0.19.dist-info → hdsp_jupyter_extension-2.0.21.dist-info}/METADATA +1 -1
  17. {hdsp_jupyter_extension-2.0.19.dist-info → hdsp_jupyter_extension-2.0.21.dist-info}/RECORD +47 -45
  18. jupyter_ext/_version.py +1 -1
  19. jupyter_ext/labextension/build_log.json +1 -1
  20. jupyter_ext/labextension/package.json +2 -2
  21. hdsp_jupyter_extension-2.0.19.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.1917fbaea37d75dc69b3.js → jupyter_ext/labextension/static/lib_index_js.90f80cb80187de8c5ae5.js +71 -6
  22. jupyter_ext/labextension/static/lib_index_js.90f80cb80187de8c5ae5.js.map +1 -0
  23. jupyter_ext/labextension/static/{remoteEntry.d686ab71eb65b5ef8f15.js → remoteEntry.93b1c499786ecd47b837.js} +3 -3
  24. jupyter_ext/labextension/static/{remoteEntry.d686ab71eb65b5ef8f15.js.map → remoteEntry.93b1c499786ecd47b837.js.map} +1 -1
  25. hdsp_jupyter_extension-2.0.19.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.1917fbaea37d75dc69b3.js.map +0 -1
  26. jupyter_ext/labextension/static/lib_index_js.1917fbaea37d75dc69b3.js.map +0 -1
  27. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
  28. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
  29. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.96745acc14125453fba8.js +0 -0
  30. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.96745acc14125453fba8.js.map +0 -0
  31. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
  32. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
  33. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
  34. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
  35. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
  36. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
  37. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
  38. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +0 -0
  39. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +0 -0
  40. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +0 -0
  41. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +0 -0
  42. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
  43. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
  44. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
  45. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
  46. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +0 -0
  47. {hdsp_jupyter_extension-2.0.19.data → hdsp_jupyter_extension-2.0.21.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +0 -0
  48. {hdsp_jupyter_extension-2.0.19.dist-info → hdsp_jupyter_extension-2.0.21.dist-info}/WHEEL +0 -0
  49. {hdsp_jupyter_extension-2.0.19.dist-info → hdsp_jupyter_extension-2.0.21.dist-info}/licenses/LICENSE +0 -0
@@ -40,6 +40,9 @@ PLANNER_SYSTEM_PROMPT = """당신은 작업을 조율하는 Main Agent입니다.
40
40
 
41
41
  **중요**: task_tool 결과를 받은 후 바로 write_todos로 완료 처리하지 말고, 반드시 위 도구로 결과를 먼저 적용!
42
42
 
43
+ **🔴 KeyboardInterrupt 발생 시**: jupyter_cell_tool 실행 중 KeyboardInterrupt가 발생하면 ask_user_tool로 중단 사유를 사용자에게 확인
44
+ - 예: ask_user_tool(question="코드 실행이 중단되었습니다. 중단 사유를 알려주시면 다음 진행에 참고하겠습니다.", input_type="text")
45
+
43
46
  # write_todos 규칙 [필수]
44
47
  - 한국어로 작성
45
48
  - **🔴 기존 todo 절대 삭제 금지**: 전체 리스트를 항상 포함하고 status만 변경
@@ -1053,23 +1053,6 @@ def create_normalize_tool_args_middleware(wrap_model_call, tools=None):
1053
1053
  break
1054
1054
  break
1055
1055
 
1056
- # Clean AIMessage content when write_todos is called
1057
- # Remove redundant todos JSON from content (keep summary JSON)
1058
- if tool_name == "write_todos":
1059
- msg_content = getattr(msg, "content", "") or ""
1060
- if msg_content and '"todos"' in msg_content:
1061
- # Keep content only if it's summary JSON
1062
- is_summary_json = (
1063
- '"summary"' in msg_content
1064
- and '"next_items"' in msg_content
1065
- )
1066
- if not is_summary_json:
1067
- # Clear redundant todos content
1068
- msg.content = ""
1069
- logger.info(
1070
- "Cleared redundant todos JSON from AIMessage content (write_todos tool_call exists)"
1071
- )
1072
-
1073
1056
  return response
1074
1057
 
1075
1058
  return normalize_tool_args
@@ -97,16 +97,44 @@ def _create_vllm_llm(llm_config: Dict[str, Any], callbacks):
97
97
  endpoint = vllm_config.get("endpoint", "http://localhost:8000/v1")
98
98
  model = vllm_config.get("model", "default")
99
99
  api_key = vllm_config.get("apiKey", "dummy")
100
+ use_responses_api = vllm_config.get("useResponsesApi", False)
101
+ temperature = vllm_config.get("temperature", 0.0)
100
102
 
101
- logger.info(f"Creating vLLM LLM with model: {model}, endpoint: {endpoint}")
103
+ logger.info(
104
+ f"Creating vLLM LLM with model: {model}, endpoint: {endpoint}, "
105
+ f"use_responses_api: {use_responses_api}, temperature: {temperature}"
106
+ )
107
+
108
+ # Use ChatGPTOSS for gpt-oss models (Harmony format with developer role)
109
+ # NOTE: OpenRouter doesn't support 'developer' role - only use for direct gpt-oss endpoints
110
+ is_openrouter = "openrouter" in endpoint.lower()
111
+ if "gpt-oss" in model.lower() and not is_openrouter:
112
+ from agent_server.langchain.models import ChatGPTOSS
113
+
114
+ logger.info("Using ChatGPTOSS for gpt-oss model (developer role support)")
115
+ return ChatGPTOSS(
116
+ model=model,
117
+ base_url=endpoint,
118
+ api_key=api_key,
119
+ temperature=temperature,
120
+ max_tokens=8192,
121
+ streaming=False,
122
+ callbacks=callbacks,
123
+ )
124
+ elif "gpt-oss" in model.lower() and is_openrouter:
125
+ logger.warning(
126
+ "gpt-oss model via OpenRouter - using standard ChatOpenAI "
127
+ "(developer role not supported by OpenRouter)"
128
+ )
102
129
 
103
130
  return ChatOpenAI(
104
131
  model=model,
105
132
  api_key=api_key,
106
133
  base_url=endpoint, # Use endpoint as-is (no /v1 suffix added)
107
134
  streaming=False, # Agent mode: disable LLM streaming (SSE handled by agent server)
108
- temperature=0.0,
109
- max_tokens=32768,
135
+ temperature=temperature,
136
+ max_tokens=8192,
137
+ use_responses_api=use_responses_api, # Use /v1/responses endpoint if True
110
138
  callbacks=callbacks,
111
139
  )
112
140
 
@@ -148,14 +176,26 @@ def create_summarization_llm(llm_config: Dict[str, Any]):
148
176
  temperature=0.0,
149
177
  )
150
178
  elif provider == "vllm":
151
- from langchain_openai import ChatOpenAI
152
-
153
179
  vllm_config = llm_config.get("vllm", {})
154
180
  # User provides full base URL (e.g., https://openrouter.ai/api/v1)
155
181
  endpoint = vllm_config.get("endpoint", "http://localhost:8000/v1")
156
182
  model = vllm_config.get("model", "default")
157
183
  api_key = vllm_config.get("apiKey", "dummy")
158
184
 
185
+ # Use ChatGPTOSS for gpt-oss models (but not via OpenRouter)
186
+ is_openrouter = "openrouter" in endpoint.lower()
187
+ if "gpt-oss" in model.lower() and not is_openrouter:
188
+ from agent_server.langchain.models import ChatGPTOSS
189
+
190
+ return ChatGPTOSS(
191
+ model=model,
192
+ base_url=endpoint,
193
+ api_key=api_key,
194
+ temperature=0.0,
195
+ )
196
+
197
+ from langchain_openai import ChatOpenAI
198
+
159
199
  return ChatOpenAI(
160
200
  model=model,
161
201
  api_key=api_key,
@@ -14,8 +14,40 @@ from langchain_core.callbacks import BaseCallbackHandler
14
14
 
15
15
  logger = logging.getLogger(__name__)
16
16
 
17
+ # Dedicated logger for LLM responses - always enabled with its own handler
18
+ llm_response_logger = logging.getLogger("agent_server.llm_response")
19
+ llm_response_logger.setLevel(logging.INFO)
20
+ llm_response_logger.propagate = True # Propagate to root logger
21
+
22
+ # Ensure it has a handler if running standalone
23
+ if not llm_response_logger.handlers and not logging.getLogger().handlers:
24
+ _handler = logging.StreamHandler()
25
+ _handler.setFormatter(logging.Formatter("%(message)s"))
26
+ llm_response_logger.addHandler(_handler)
27
+
28
+
29
+ def disable_langchain_logging():
30
+ """Disable all langchain logging except LLM responses."""
31
+ # Set all langchain loggers to CRITICAL
32
+ for name in list(logging.Logger.manager.loggerDict.keys()):
33
+ if "langchain" in name.lower() or name.startswith("agent_server.langchain"):
34
+ logging.getLogger(name).setLevel(logging.CRITICAL)
35
+ # Keep LLM response logger at INFO
36
+ llm_response_logger.setLevel(logging.INFO)
37
+
38
+
39
+ # Auto-disable on import (comment this line to re-enable all logs)
40
+ disable_langchain_logging()
41
+
17
42
  LOG_SEPARATOR = "=" * 96
18
43
  LOG_SUBSECTION = "-" * 96
44
+ LOG_EMOJI_LINE = "🔵" * 48
45
+ LOG_REQUEST_START = f"\n\n{'🟢' * 48}\n{'=' * 96}\n 📤 LLM REQUEST START\n{'=' * 96}"
46
+ LOG_REQUEST_END = f"{'=' * 96}\n 📤 LLM REQUEST END\n{'=' * 96}\n{'🟢' * 48}\n"
47
+ LOG_RESPONSE_START = (
48
+ f"\n\n{LOG_EMOJI_LINE}\n{'=' * 96}\n ✨ LLM RESPONSE START\n{'=' * 96}"
49
+ )
50
+ LOG_RESPONSE_END = f"{'=' * 96}\n ✅ LLM RESPONSE END\n{'=' * 96}\n{LOG_EMOJI_LINE}\n"
19
51
 
20
52
 
21
53
  def _format_system_prompt_for_log(messages) -> tuple[int, int, str]:
@@ -179,45 +211,91 @@ class LLMTraceLogger(BaseCallbackHandler):
179
211
  logger.info("%s", "\n".join(lines))
180
212
 
181
213
  def on_chat_model_start(self, serialized, messages, **kwargs) -> None:
182
- if not messages:
183
- logger.info(
184
- "%s",
185
- _format_messages_block("AGENT -> LLM PROMPT (<none>)", []),
186
- )
187
- return
188
- self._log_prompt_batches("AGENT -> LLM PROMPT", messages)
214
+ """Log LLM request messages as raw structured JSON."""
215
+ print(LOG_REQUEST_START, flush=True)
216
+
217
+ # Build raw structured request data
218
+ request_data = {
219
+ "model": serialized.get("name", "unknown") if serialized else "unknown",
220
+ "kwargs": {k: str(v)[:200] for k, v in kwargs.items() if k != "messages"},
221
+ "messages": [],
222
+ }
223
+
224
+ for batch in self._normalize_batches(messages):
225
+ batch_messages = []
226
+ for msg in batch:
227
+ batch_messages.append(_serialize_message(msg))
228
+ request_data["messages"].append(batch_messages)
229
+
230
+ # Output beautified JSON
231
+ print(_pretty_json(request_data), flush=True)
232
+
233
+ print(LOG_REQUEST_END, flush=True)
234
+
235
+ # --- OLD TEXT-PARSED LOGGING (commented out) ---
236
+ # for batch_idx, batch in enumerate(self._normalize_batches(messages)):
237
+ # msg_types = {}
238
+ # for msg in batch:
239
+ # msg_type = msg.__class__.__name__
240
+ # msg_types[msg_type] = msg_types.get(msg_type, 0) + 1
241
+ # print(f"\nBatch {batch_idx}: {len(batch)} messages - {msg_types}", flush=True)
242
+ # recent_count = min(5, len(batch))
243
+ # if len(batch) > recent_count:
244
+ # print(f"... ({len(batch) - recent_count} earlier messages omitted)", flush=True)
245
+ # for idx, message in enumerate(batch[-recent_count:], start=len(batch) - recent_count):
246
+ # lines = [LOG_SUBSECTION]
247
+ # lines.append(f"[{idx}] {message.__class__.__name__}")
248
+ # lines.append(_pretty_json(_serialize_message(message)))
249
+ # print("\n".join(lines), flush=True)
189
250
 
190
251
  def on_chat_model_end(self, response, **kwargs) -> None:
252
+ """Log LLM response as raw structured JSON."""
253
+ print(LOG_RESPONSE_START, flush=True)
254
+
255
+ # Build raw structured response data
256
+ response_data = {
257
+ "llm_output": getattr(response, "llm_output", None),
258
+ "generations": [],
259
+ }
260
+
191
261
  generations = getattr(response, "generations", None) or []
192
262
  if generations and isinstance(generations[0], list):
193
263
  batches = generations
194
264
  else:
195
265
  batches = [generations]
196
266
 
197
- for batch_idx, batch in enumerate(batches):
198
- for gen_idx, generation in enumerate(batch):
267
+ for batch in batches:
268
+ batch_data = []
269
+ for generation in batch:
270
+ gen_data = {}
199
271
  message = getattr(generation, "message", None)
200
- if not message:
201
- continue
202
-
203
- title = (
204
- f"LLM -> AGENT RESPONSE (batch={batch_idx}, generation={gen_idx})"
272
+ if message:
273
+ gen_data["message"] = _serialize_message(message)
274
+ gen_data["text"] = getattr(generation, "text", None)
275
+ gen_data["generation_info"] = getattr(
276
+ generation, "generation_info", None
205
277
  )
206
- logger.info("%s", _format_messages_block(title, [message]))
207
-
208
- tool_calls = getattr(message, "tool_calls", None)
209
- if tool_calls:
210
- tool_title = (
211
- "LLM -> AGENT TOOL CALLS "
212
- f"(batch={batch_idx}, generation={gen_idx})"
213
- )
214
- logger.info("%s", _format_json_block(tool_title, tool_calls))
278
+ batch_data.append(gen_data)
279
+ response_data["generations"].append(batch_data)
280
+
281
+ # Output beautified JSON
282
+ print(_pretty_json(response_data), flush=True)
283
+
284
+ print(LOG_RESPONSE_END, flush=True)
285
+
286
+ # --- OLD TEXT-PARSED LOGGING (commented out) ---
287
+ # for batch_idx, batch in enumerate(batches):
288
+ # for gen_idx, generation in enumerate(batch):
289
+ # message = getattr(generation, "message", None)
290
+ # if not message:
291
+ # continue
292
+ # title = f"LLM -> AGENT RESPONSE (batch={batch_idx}, generation={gen_idx})"
293
+ # print(_format_messages_block(title, [message]), flush=True)
294
+ # tool_calls = getattr(message, "tool_calls", None)
295
+ # if tool_calls:
296
+ # tool_title = f"LLM -> AGENT TOOL CALLS (batch={batch_idx}, generation={gen_idx})"
297
+ # print(_format_json_block(tool_title, tool_calls), flush=True)
215
298
 
216
299
  def on_llm_start(self, serialized, prompts, **kwargs) -> None:
217
- if not prompts:
218
- logger.info("%s", _format_json_block("LLM PROMPT (<none>)", ""))
219
- return
220
-
221
- for idx, prompt in enumerate(prompts):
222
- title = f"LLM PROMPT (batch={idx}, length={len(prompt)})"
223
- logger.info("%s", _format_json_block(title, prompt))
300
+ # Request logging disabled - only log responses
301
+ pass
@@ -9,8 +9,11 @@ Key features:
9
9
  - Context isolation: subagents run in clean context
10
10
  - Synchronous execution: subagent returns result directly to caller
11
11
  - Nested subagent support: python_developer can call athena_query
12
+ - Subagent caching: compiled agents are cached to avoid recompilation overhead
12
13
  """
13
14
 
15
+ import hashlib
16
+ import json
14
17
  import logging
15
18
  from typing import TYPE_CHECKING, Any, Dict, List, Optional
16
19
 
@@ -25,6 +28,8 @@ logger = logging.getLogger(__name__)
25
28
  # Global registry for subagent factories (set by AgentFactory)
26
29
  _subagent_factory = None
27
30
  _current_llm_config = None
31
+ # Subagent cache: key = "{agent_name}_{config_hash}" -> compiled agent
32
+ _subagent_cache: Dict[str, Any] = {}
28
33
 
29
34
 
30
35
  def set_subagent_factory(factory_func, llm_config: Dict[str, Any]):
@@ -32,10 +37,12 @@ def set_subagent_factory(factory_func, llm_config: Dict[str, Any]):
32
37
  Set the subagent factory function.
33
38
  Called by AgentFactory during initialization.
34
39
  """
35
- global _subagent_factory, _current_llm_config
40
+ global _subagent_factory, _current_llm_config, _subagent_cache
36
41
  _subagent_factory = factory_func
37
42
  _current_llm_config = llm_config
38
- logger.info("SubAgentMiddleware factory initialized")
43
+ # Clear cache when factory changes (new LLM config)
44
+ _subagent_cache.clear()
45
+ logger.info("SubAgentMiddleware factory initialized (cache cleared)")
39
46
 
40
47
 
41
48
  def get_subagent_factory():
@@ -43,6 +50,48 @@ def get_subagent_factory():
43
50
  return _subagent_factory, _current_llm_config
44
51
 
45
52
 
53
+ def _get_config_hash(llm_config: Dict[str, Any]) -> str:
54
+ """Generate a hash of llm_config for caching."""
55
+ config_str = json.dumps(llm_config, sort_keys=True, default=str)
56
+ return hashlib.md5(config_str.encode()).hexdigest()[:12]
57
+
58
+
59
+ def get_or_create_subagent(
60
+ agent_name: str, factory_func, llm_config: Dict[str, Any]
61
+ ) -> Any:
62
+ """
63
+ Get cached subagent or create new one.
64
+
65
+ Caching avoids expensive recompilation of LangGraph agents.
66
+ Cache key = "{agent_name}_{config_hash}" to handle different LLM configs.
67
+ """
68
+ global _subagent_cache
69
+
70
+ config_hash = _get_config_hash(llm_config)
71
+ cache_key = f"{agent_name}_{config_hash}"
72
+
73
+ if cache_key in _subagent_cache:
74
+ logger.info(f"Using cached subagent '{agent_name}' (key={cache_key})")
75
+ return _subagent_cache[cache_key]
76
+
77
+ logger.info(f"Creating new subagent '{agent_name}' (key={cache_key})...")
78
+ subagent = factory_func(agent_name, llm_config)
79
+ _subagent_cache[cache_key] = subagent
80
+ logger.info(
81
+ f"Cached subagent '{agent_name}' (total cached: {len(_subagent_cache)})"
82
+ )
83
+
84
+ return subagent
85
+
86
+
87
+ def clear_subagent_cache():
88
+ """Clear the subagent cache. Useful for testing or config changes."""
89
+ global _subagent_cache
90
+ count = len(_subagent_cache)
91
+ _subagent_cache.clear()
92
+ logger.info(f"Subagent cache cleared ({count} entries removed)")
93
+
94
+
46
95
  def create_task_tool(
47
96
  caller_name: str,
48
97
  allowed_subagents: Optional[List[str]] = None,
@@ -96,11 +145,13 @@ def create_task_tool(
96
145
  )
97
146
  context: Optional[str] = Field(
98
147
  default=None,
99
- description="Additional context for the subagent: resource info (file sizes, memory), previous code, variable state, etc."
148
+ description="Additional context for the subagent: resource info (file sizes, memory), previous code, variable state, etc.",
100
149
  )
101
150
 
102
151
  @tool(args_schema=TaskInput)
103
- def task_tool(agent_name: str, description: str, context: Optional[str] = None) -> str:
152
+ def task_tool(
153
+ agent_name: str, description: str, context: Optional[str] = None
154
+ ) -> str:
104
155
  """
105
156
  Delegate a task to a specialized subagent.
106
157
 
@@ -133,10 +184,10 @@ def create_task_tool(
133
184
 
134
185
  # Import subagent event emitters
135
186
  from agent_server.langchain.middleware.subagent_events import (
136
- emit_subagent_start,
187
+ clear_current_subagent,
137
188
  emit_subagent_complete,
189
+ emit_subagent_start,
138
190
  set_current_subagent,
139
- clear_current_subagent,
140
191
  )
141
192
 
142
193
  # Emit subagent start event for UI
@@ -148,11 +199,17 @@ def create_task_tool(
148
199
  return "Error: SubAgentMiddleware not initialized. Call set_subagent_factory first."
149
200
 
150
201
  try:
202
+ import time
203
+
151
204
  # Set current subagent context for tool call tracking
152
205
  set_current_subagent(agent_name)
153
206
 
154
- # Create the subagent
155
- subagent = factory_func(agent_name, llm_config)
207
+ # Get or create the subagent (cached for performance)
208
+ # Avoids expensive LangGraph recompilation on each call
209
+ t0 = time.time()
210
+ subagent = get_or_create_subagent(agent_name, factory_func, llm_config)
211
+ t1 = time.time()
212
+ logger.info(f"[TIMING] get_or_create_subagent took {t1-t0:.2f}s")
156
213
 
157
214
  # Execute subagent synchronously with clean context
158
215
  # The subagent runs in isolation, receiving task description + optional context
@@ -169,15 +226,18 @@ def create_task_tool(
169
226
  enhanced_context = context
170
227
  if agent_name == "python_developer":
171
228
  try:
229
+ t2 = time.time()
172
230
  from agent_server.langchain.middleware.code_history_middleware import (
173
- get_context_with_history,
174
231
  get_code_history_tracker,
232
+ get_context_with_history,
175
233
  )
234
+
176
235
  tracker = get_code_history_tracker()
177
236
  if tracker.get_entry_count() > 0:
178
237
  enhanced_context = get_context_with_history(context)
238
+ t3 = time.time()
179
239
  logger.info(
180
- f"[{caller_name}] Injected code history into context "
240
+ f"[TIMING] code history injection took {t3-t2:.2f}s "
181
241
  f"(entries={tracker.get_entry_count()}, "
182
242
  f"context_len={len(enhanced_context) if enhanced_context else 0})"
183
243
  )
@@ -194,13 +254,21 @@ def create_task_tool(
194
254
  else:
195
255
  message_content = description
196
256
 
197
- logger.info(f"[{caller_name}] Subagent message length: {len(message_content)}")
257
+ logger.info(
258
+ f"[{caller_name}] Subagent message length: {len(message_content)}"
259
+ )
198
260
 
199
261
  # Execute the subagent
262
+ t_invoke_start = time.time()
263
+ logger.info(f"[TIMING] About to invoke subagent '{agent_name}'...")
200
264
  result = subagent.invoke(
201
265
  {"messages": [{"role": "user", "content": message_content}]},
202
266
  config=subagent_config,
203
267
  )
268
+ t_invoke_end = time.time()
269
+ logger.info(
270
+ f"[TIMING] subagent.invoke() took {t_invoke_end-t_invoke_start:.2f}s"
271
+ )
204
272
 
205
273
  # Extract the final message from the result
206
274
  messages = result.get("messages", [])
@@ -223,6 +291,7 @@ def create_task_tool(
223
291
  from agent_server.langchain.middleware.description_injector import (
224
292
  process_task_tool_response,
225
293
  )
294
+
226
295
  process_task_tool_response(agent_name, str(response))
227
296
  except Exception as e:
228
297
  logger.warning(f"Failed to extract description: {e}")
@@ -0,0 +1,5 @@
1
+ """Custom LangChain chat models."""
2
+
3
+ from agent_server.langchain.models.gpt_oss_chat import ChatGPTOSS
4
+
5
+ __all__ = ["ChatGPTOSS"]