hdsp-jupyter-extension 2.0.18__py3-none-any.whl → 2.0.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. agent_server/langchain/agent_prompts/planner_prompt.py +22 -11
  2. agent_server/langchain/custom_middleware.py +97 -65
  3. agent_server/langchain/llm_factory.py +37 -5
  4. agent_server/langchain/logging_utils.py +41 -16
  5. agent_server/langchain/models/__init__.py +5 -0
  6. agent_server/langchain/models/gpt_oss_chat.py +351 -0
  7. agent_server/langchain/prompts.py +12 -7
  8. agent_server/routers/langchain_agent.py +10 -0
  9. {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
  10. {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +2 -2
  11. hdsp_jupyter_extension-2.0.18.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.037b3c8e5d6a92b63b16.js → hdsp_jupyter_extension-2.0.20.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.96745acc14125453fba8.js +36 -2
  12. hdsp_jupyter_extension-2.0.20.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.96745acc14125453fba8.js.map +1 -0
  13. jupyter_ext/labextension/static/lib_index_js.8f72c63cdf542389aa9d.js → hdsp_jupyter_extension-2.0.20.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.90f80cb80187de8c5ae5.js +91 -8
  14. hdsp_jupyter_extension-2.0.20.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.90f80cb80187de8c5ae5.js.map +1 -0
  15. hdsp_jupyter_extension-2.0.18.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.5099145cc2b28312d170.js → hdsp_jupyter_extension-2.0.20.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.586bf5521d043cdd37b8.js +3 -3
  16. jupyter_ext/labextension/static/remoteEntry.5099145cc2b28312d170.js.map → hdsp_jupyter_extension-2.0.20.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.586bf5521d043cdd37b8.js.map +1 -1
  17. {hdsp_jupyter_extension-2.0.18.dist-info → hdsp_jupyter_extension-2.0.20.dist-info}/METADATA +1 -1
  18. {hdsp_jupyter_extension-2.0.18.dist-info → hdsp_jupyter_extension-2.0.20.dist-info}/RECORD +48 -46
  19. jupyter_ext/_version.py +1 -1
  20. jupyter_ext/labextension/build_log.json +1 -1
  21. jupyter_ext/labextension/package.json +2 -2
  22. jupyter_ext/labextension/static/{frontend_styles_index_js.037b3c8e5d6a92b63b16.js → frontend_styles_index_js.96745acc14125453fba8.js} +36 -2
  23. jupyter_ext/labextension/static/frontend_styles_index_js.96745acc14125453fba8.js.map +1 -0
  24. hdsp_jupyter_extension-2.0.18.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.8f72c63cdf542389aa9d.js → jupyter_ext/labextension/static/lib_index_js.90f80cb80187de8c5ae5.js +91 -8
  25. jupyter_ext/labextension/static/lib_index_js.90f80cb80187de8c5ae5.js.map +1 -0
  26. jupyter_ext/labextension/static/{remoteEntry.5099145cc2b28312d170.js → remoteEntry.586bf5521d043cdd37b8.js} +3 -3
  27. hdsp_jupyter_extension-2.0.18.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.5099145cc2b28312d170.js.map → jupyter_ext/labextension/static/remoteEntry.586bf5521d043cdd37b8.js.map +1 -1
  28. hdsp_jupyter_extension-2.0.18.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.037b3c8e5d6a92b63b16.js.map +0 -1
  29. hdsp_jupyter_extension-2.0.18.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.8f72c63cdf542389aa9d.js.map +0 -1
  30. jupyter_ext/labextension/static/frontend_styles_index_js.037b3c8e5d6a92b63b16.js.map +0 -1
  31. jupyter_ext/labextension/static/lib_index_js.8f72c63cdf542389aa9d.js.map +0 -1
  32. {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
  33. {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
  34. {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
  35. {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
  36. {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
  37. {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
  38. {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
  39. {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
  40. {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
  41. {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +0 -0
  42. {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +0 -0
  43. {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +0 -0
  44. {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +0 -0
  45. {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
  46. {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
  47. {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
  48. {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
  49. {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +0 -0
  50. {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +0 -0
  51. {hdsp_jupyter_extension-2.0.18.dist-info → hdsp_jupyter_extension-2.0.20.dist-info}/WHEEL +0 -0
  52. {hdsp_jupyter_extension-2.0.18.dist-info → hdsp_jupyter_extension-2.0.20.dist-info}/licenses/LICENSE +0 -0
@@ -23,24 +23,35 @@ PLANNER_SYSTEM_PROMPT = """당신은 작업을 조율하는 Main Agent입니다.
23
23
  | athena_query | SQL 쿼리 생성 | task_tool(agent_name="athena_query", description="매출 테이블 조회 쿼리") |
24
24
  | researcher | 정보 검색 | task_tool(agent_name="researcher", description="관련 문서 검색") |
25
25
 
26
- ## Step 3: 결과 실행/표시 (필수!)
26
+ ## Step 3: 결과 실행/적용 (필수!)
27
27
  **task_tool을 호출 했다면, 호출 후 반드시 결과를 처리해야 함:**
28
28
 
29
- | 서브에이전트 | 처리 방법 | 예시 |
30
- |-------------|----------|------|
31
- | python_developer | jupyter_cell_tool로 실행 또는 write/edit/multiedit file tool로 적용 | jupyter_cell_tool(code=반환된_코드) |
32
- | athena_query | **markdown_tool로 SQL 표시** (필수) | markdown_tool(content="```sql\n반환된_쿼리\n```") |
33
- | researcher | 텍스트로 요약 | 직접 응답 |
29
+ | 서브에이전트 | 작업 유형 | 처리 방법 | 예시 |
30
+ |-------------|----------|----------|------|
31
+ | python_developer | 코드 실행 (데이터 분석, 시각화) | jupyter_cell_tool | jupyter_cell_tool(code=반환된_코드) |
32
+ | python_developer | **파일 생성/수정** | **write_file_tool 또는 multiedit_file_tool** | write_file_tool(path="script.js", content=반환된_코드) |
33
+ | athena_query | SQL 표시 | markdown_tool | markdown_tool(content="```sql\n반환된_쿼리\n```") |
34
+ | researcher | 텍스트 요약 | 직접 응답 | - |
34
35
 
35
- **중요**: task_tool 결과를 받은 바로 write_todos로 완료 처리하지 말고, 반드시 위 도구로 결과를 먼저 표시!
36
+ **🔴 중요: 코드 저장 도구 선택**
37
+ - **파일 생성/수정 요청** → `write_file_tool` 또는 `multiedit_file_tool` 사용
38
+ - **코드 실행 요청** (데이터 분석, 차트 등) → `jupyter_cell_tool` 사용
39
+ - **❌ markdown_tool은 코드 저장용이 아님!** (표시 전용)
40
+
41
+ **중요**: task_tool 결과를 받은 후 바로 write_todos로 완료 처리하지 말고, 반드시 위 도구로 결과를 먼저 적용!
42
+
43
+ **🔴 KeyboardInterrupt 발생 시**: jupyter_cell_tool 실행 중 KeyboardInterrupt가 발생하면 ask_user_tool로 중단 사유를 사용자에게 확인
44
+ - 예: ask_user_tool(question="코드 실행이 중단되었습니다. 중단 사유를 알려주시면 다음 진행에 참고하겠습니다.", input_type="text")
36
45
 
37
46
  # write_todos 규칙 [필수]
38
47
  - 한국어로 작성
39
48
  - **🔴 기존 todo 절대 삭제 금지**: 전체 리스트를 항상 포함하고 status만 변경
40
- - 잘못된 예: [{"content": "작업 요약", "status": "completed"}] 기존 todo 삭제됨!
41
- - 올바른 예: [{"content": "기존 작업1", "status": "completed"}, {"content": "작업 요약", "status": "completed"}]
42
- - **일괄 업데이트**: 연속 완료된 todo는 번의 write_todos 호출로 처리
43
- - in_progress **1개만** 유지
49
+ - **🔴 상태 전환 순서 필수**: pending in_progress completed (건너뛰기 금지!)
50
+ - **🔴 초기 생성 규칙**: write_todos 호출 번째 todo만 in_progress, 나머지는 모두 pending
51
+ - 올바른 초기 예: [{"content": "작업1", "status": "in_progress"}, {"content": "작업2", "status": "pending"}, {"content": "작업 요약 및 다음 단계 제시", "status": "pending"}]
52
+ - 잘못된 초기 예: [{"content": "작업1", "status": "completed"}, ...] ← 실제 작업 없이 completed 금지!
53
+ - **🔴 completed 전환 조건**: 실제 도구(task_tool, jupyter_cell_tool 등)로 작업 수행 후에만 completed로 변경
54
+ - in_progress 상태는 **동시에 1개만** 허용 (completed, pending todo는 삭제하지 않고 모두 유지)
44
55
  - content에 도구(tool)명 언급 금지
45
56
  - **[필수] 마지막 todo는 반드시 "작업 요약 및 다음 단계 제시"**
46
57
 
@@ -444,63 +444,78 @@ def create_handle_empty_response_middleware(wrap_model_call):
444
444
  )
445
445
 
446
446
  if has_summary_pattern:
447
- # Try to extract and repair summary JSON from mixed content
448
- try:
449
- # Try to find JSON object containing summary
450
- import re
451
- json_match = re.search(r'\{[^{}]*"summary"[^{}]*"next_items"[^{}]*\}', content, re.DOTALL)
452
- if json_match:
453
- repaired_summary = repair_json(
454
- json_match.group(), return_objects=True
455
- )
456
- else:
457
- repaired_summary = repair_json(
458
- content, return_objects=True
459
- )
447
+ # Check if pending todos exist - if so, don't force complete
448
+ current_todos = request.state.get("todos", [])
449
+ pending_todos = [
450
+ t for t in current_todos
451
+ if isinstance(t, dict) and t.get("status") == "pending"
452
+ ]
453
+ if pending_todos:
454
+ logger.warning(
455
+ "Summary JSON detected but pending todos remain - not forcing completion: %s",
456
+ [t.get("content", "")[:30] for t in pending_todos],
457
+ )
458
+ # Don't synthesize completion, return response as-is
459
+ # Let LLM continue working on pending todos
460
+ else:
461
+ # No pending todos, safe to synthesize completion
462
+ # Try to extract and repair summary JSON from mixed content
463
+ try:
464
+ # Try to find JSON object containing summary
465
+ import re
466
+ json_match = re.search(r'\{[^{}]*"summary"[^{}]*"next_items"[^{}]*\}', content, re.DOTALL)
467
+ if json_match:
468
+ repaired_summary = repair_json(
469
+ json_match.group(), return_objects=True
470
+ )
471
+ else:
472
+ repaired_summary = repair_json(
473
+ content, return_objects=True
474
+ )
460
475
 
461
- if (
462
- isinstance(repaired_summary, dict)
463
- and "summary" in repaired_summary
464
- and "next_items" in repaired_summary
465
- ):
466
- # Create new message with repaired JSON content
467
- repaired_content = json.dumps(
468
- repaired_summary, ensure_ascii=False
469
- )
470
- logger.info(
471
- "Detected and repaired summary JSON in content (pattern-based detection)"
472
- )
473
- # Create message with repaired content
474
- repaired_response_message = AIMessage(
475
- content=repaired_content,
476
- tool_calls=getattr(
477
- response_message, "tool_calls", []
476
+ if (
477
+ isinstance(repaired_summary, dict)
478
+ and "summary" in repaired_summary
479
+ and "next_items" in repaired_summary
480
+ ):
481
+ # Create new message with repaired JSON content
482
+ repaired_content = json.dumps(
483
+ repaired_summary, ensure_ascii=False
478
484
  )
479
- or [],
480
- )
481
- synthetic_message = _create_synthetic_completion(
482
- request,
483
- repaired_response_message,
484
- has_content=True,
485
- )
486
- response = _replace_ai_message_in_response(
487
- response, synthetic_message
488
- )
489
- return response
490
- except Exception as e:
491
- logger.debug(f"Failed to extract summary JSON from mixed content: {e}")
485
+ logger.info(
486
+ "Detected and repaired summary JSON in content (pattern-based detection)"
487
+ )
488
+ # Create message with repaired content
489
+ repaired_response_message = AIMessage(
490
+ content=repaired_content,
491
+ tool_calls=getattr(
492
+ response_message, "tool_calls", []
493
+ )
494
+ or [],
495
+ )
496
+ synthetic_message = _create_synthetic_completion(
497
+ request,
498
+ repaired_response_message,
499
+ has_content=True,
500
+ )
501
+ response = _replace_ai_message_in_response(
502
+ response, synthetic_message
503
+ )
504
+ return response
505
+ except Exception as e:
506
+ logger.debug(f"Failed to extract summary JSON from mixed content: {e}")
492
507
 
493
- # Fallback: accept as-is if repair failed but looks like summary
494
- logger.info(
495
- "Detected summary JSON pattern in content - accepting and synthesizing write_todos"
496
- )
497
- synthetic_message = _create_synthetic_completion(
498
- request, response_message, has_content=True
499
- )
500
- response = _replace_ai_message_in_response(
501
- response, synthetic_message
502
- )
503
- return response
508
+ # Fallback: accept as-is if repair failed but looks like summary
509
+ logger.info(
510
+ "Detected summary JSON pattern in content - accepting and synthesizing write_todos"
511
+ )
512
+ synthetic_message = _create_synthetic_completion(
513
+ request, response_message, has_content=True
514
+ )
515
+ response = _replace_ai_message_in_response(
516
+ response, synthetic_message
517
+ )
518
+ return response
504
519
 
505
520
  # Legacy: Also check if current todo is a summary todo (backward compatibility)
506
521
  todos = request.state.get("todos", [])
@@ -1009,17 +1024,34 @@ def create_normalize_tool_args_middleware(wrap_model_call, tools=None):
1009
1024
  else:
1010
1025
  found_first = True
1011
1026
 
1012
- # NOTE: Previously had logic to revert summary todo to in_progress
1013
- # if no summary JSON was found. This caused infinite loops
1014
- # where LLM kept calling write_todos repeatedly.
1015
- # Now we let the natural termination logic handle this.
1016
- #
1017
- # NOTE: Also removed logic to preserve todos when LLM tries to delete them.
1018
- # The LLM should be able to modify todos freely when:
1019
- # - User rejects code approval
1020
- # - User changes their request
1021
- # - Code execution fails
1022
- # We rely on prompts to guide proper todo management.
1027
+ # Validate: "작업 요약 다음 단계 제시" cannot be in_progress if pending todos exist
1028
+ # This prevents LLM from skipping pending tasks
1029
+ summary_keywords = ["작업 요약", "다음 단계 제시"]
1030
+ for i, todo in enumerate(todos):
1031
+ if not isinstance(todo, dict):
1032
+ continue
1033
+ content = todo.get("content", "")
1034
+ is_summary_todo = any(kw in content for kw in summary_keywords)
1035
+
1036
+ if is_summary_todo and todo.get("status") == "in_progress":
1037
+ # Check if there are pending todos before this one
1038
+ pending_before = [
1039
+ t for t in todos[:i]
1040
+ if isinstance(t, dict) and t.get("status") == "pending"
1041
+ ]
1042
+ if pending_before:
1043
+ # Revert summary todo to pending
1044
+ todo["status"] = "pending"
1045
+ # Set the first pending todo to in_progress
1046
+ for t in todos:
1047
+ if isinstance(t, dict) and t.get("status") == "pending":
1048
+ t["status"] = "in_progress"
1049
+ logger.warning(
1050
+ "Reverted summary todo to pending, set '%s' to in_progress (pending todos exist)",
1051
+ t.get("content", "")[:30],
1052
+ )
1053
+ break
1054
+ break
1023
1055
 
1024
1056
  return response
1025
1057
 
@@ -97,16 +97,37 @@ def _create_vllm_llm(llm_config: Dict[str, Any], callbacks):
97
97
  endpoint = vllm_config.get("endpoint", "http://localhost:8000/v1")
98
98
  model = vllm_config.get("model", "default")
99
99
  api_key = vllm_config.get("apiKey", "dummy")
100
+ use_responses_api = vllm_config.get("useResponsesApi", False)
101
+ temperature = vllm_config.get("temperature", 0.0)
100
102
 
101
- logger.info(f"Creating vLLM LLM with model: {model}, endpoint: {endpoint}")
103
+ logger.info(
104
+ f"Creating vLLM LLM with model: {model}, endpoint: {endpoint}, "
105
+ f"use_responses_api: {use_responses_api}, temperature: {temperature}"
106
+ )
107
+
108
+ # Use ChatGPTOSS for gpt-oss models (Harmony format with developer role)
109
+ if "gpt-oss" in model.lower():
110
+ from agent_server.langchain.models import ChatGPTOSS
111
+
112
+ logger.info(f"Using ChatGPTOSS for gpt-oss model (developer role support)")
113
+ return ChatGPTOSS(
114
+ model=model,
115
+ base_url=endpoint,
116
+ api_key=api_key,
117
+ temperature=temperature,
118
+ max_tokens=8192,
119
+ streaming=False,
120
+ callbacks=callbacks,
121
+ )
102
122
 
103
123
  return ChatOpenAI(
104
124
  model=model,
105
125
  api_key=api_key,
106
126
  base_url=endpoint, # Use endpoint as-is (no /v1 suffix added)
107
127
  streaming=False, # Agent mode: disable LLM streaming (SSE handled by agent server)
108
- temperature=0.0,
109
- max_tokens=32768,
128
+ temperature=temperature,
129
+ max_tokens=8192,
130
+ use_responses_api=use_responses_api, # Use /v1/responses endpoint if True
110
131
  callbacks=callbacks,
111
132
  )
112
133
 
@@ -148,14 +169,25 @@ def create_summarization_llm(llm_config: Dict[str, Any]):
148
169
  temperature=0.0,
149
170
  )
150
171
  elif provider == "vllm":
151
- from langchain_openai import ChatOpenAI
152
-
153
172
  vllm_config = llm_config.get("vllm", {})
154
173
  # User provides full base URL (e.g., https://openrouter.ai/api/v1)
155
174
  endpoint = vllm_config.get("endpoint", "http://localhost:8000/v1")
156
175
  model = vllm_config.get("model", "default")
157
176
  api_key = vllm_config.get("apiKey", "dummy")
158
177
 
178
+ # Use ChatGPTOSS for gpt-oss models
179
+ if "gpt-oss" in model.lower():
180
+ from agent_server.langchain.models import ChatGPTOSS
181
+
182
+ return ChatGPTOSS(
183
+ model=model,
184
+ base_url=endpoint,
185
+ api_key=api_key,
186
+ temperature=0.0,
187
+ )
188
+
189
+ from langchain_openai import ChatOpenAI
190
+
159
191
  return ChatOpenAI(
160
192
  model=model,
161
193
  api_key=api_key,
@@ -14,8 +14,36 @@ from langchain_core.callbacks import BaseCallbackHandler
14
14
 
15
15
  logger = logging.getLogger(__name__)
16
16
 
17
+ # Dedicated logger for LLM responses - always enabled with its own handler
18
+ llm_response_logger = logging.getLogger("agent_server.llm_response")
19
+ llm_response_logger.setLevel(logging.INFO)
20
+ llm_response_logger.propagate = True # Propagate to root logger
21
+
22
+ # Ensure it has a handler if running standalone
23
+ if not llm_response_logger.handlers and not logging.getLogger().handlers:
24
+ _handler = logging.StreamHandler()
25
+ _handler.setFormatter(logging.Formatter('%(message)s'))
26
+ llm_response_logger.addHandler(_handler)
27
+
28
+
29
+ def disable_langchain_logging():
30
+ """Disable all langchain logging except LLM responses."""
31
+ # Set all langchain loggers to CRITICAL
32
+ for name in list(logging.Logger.manager.loggerDict.keys()):
33
+ if "langchain" in name.lower() or name.startswith("agent_server.langchain"):
34
+ logging.getLogger(name).setLevel(logging.CRITICAL)
35
+ # Keep LLM response logger at INFO
36
+ llm_response_logger.setLevel(logging.INFO)
37
+
38
+
39
+ # Auto-disable on import (comment this line to re-enable all logs)
40
+ disable_langchain_logging()
41
+
17
42
  LOG_SEPARATOR = "=" * 96
18
43
  LOG_SUBSECTION = "-" * 96
44
+ LOG_EMOJI_LINE = "🔵" * 48
45
+ LOG_RESPONSE_START = f"\n\n{LOG_EMOJI_LINE}\n{'=' * 96}\n ✨ LLM RESPONSE START\n{'=' * 96}"
46
+ LOG_RESPONSE_END = f"{'=' * 96}\n ✅ LLM RESPONSE END\n{'=' * 96}\n{LOG_EMOJI_LINE}\n"
19
47
 
20
48
 
21
49
  def _format_system_prompt_for_log(messages) -> tuple[int, int, str]:
@@ -179,15 +207,15 @@ class LLMTraceLogger(BaseCallbackHandler):
179
207
  logger.info("%s", "\n".join(lines))
180
208
 
181
209
  def on_chat_model_start(self, serialized, messages, **kwargs) -> None:
182
- if not messages:
183
- logger.info(
184
- "%s",
185
- _format_messages_block("AGENT -> LLM PROMPT (<none>)", []),
186
- )
187
- return
188
- self._log_prompt_batches("AGENT -> LLM PROMPT", messages)
210
+ # Request logging disabled - only log responses
211
+ pass
189
212
 
190
213
  def on_chat_model_end(self, response, **kwargs) -> None:
214
+ # Debug: Check if callback is even called
215
+ print("[DEBUG] on_chat_model_end CALLED!", flush=True)
216
+ # Use print for guaranteed visibility
217
+ print(LOG_RESPONSE_START, flush=True)
218
+
191
219
  generations = getattr(response, "generations", None) or []
192
220
  if generations and isinstance(generations[0], list):
193
221
  batches = generations
@@ -203,7 +231,7 @@ class LLMTraceLogger(BaseCallbackHandler):
203
231
  title = (
204
232
  f"LLM -> AGENT RESPONSE (batch={batch_idx}, generation={gen_idx})"
205
233
  )
206
- logger.info("%s", _format_messages_block(title, [message]))
234
+ print(_format_messages_block(title, [message]), flush=True)
207
235
 
208
236
  tool_calls = getattr(message, "tool_calls", None)
209
237
  if tool_calls:
@@ -211,13 +239,10 @@ class LLMTraceLogger(BaseCallbackHandler):
211
239
  "LLM -> AGENT TOOL CALLS "
212
240
  f"(batch={batch_idx}, generation={gen_idx})"
213
241
  )
214
- logger.info("%s", _format_json_block(tool_title, tool_calls))
242
+ print(_format_json_block(tool_title, tool_calls), flush=True)
215
243
 
216
- def on_llm_start(self, serialized, prompts, **kwargs) -> None:
217
- if not prompts:
218
- logger.info("%s", _format_json_block("LLM PROMPT (<none>)", ""))
219
- return
244
+ print(LOG_RESPONSE_END, flush=True)
220
245
 
221
- for idx, prompt in enumerate(prompts):
222
- title = f"LLM PROMPT (batch={idx}, length={len(prompt)})"
223
- logger.info("%s", _format_json_block(title, prompt))
246
+ def on_llm_start(self, serialized, prompts, **kwargs) -> None:
247
+ # Request logging disabled - only log responses
248
+ pass
@@ -0,0 +1,5 @@
1
+ """Custom LangChain chat models."""
2
+
3
+ from agent_server.langchain.models.gpt_oss_chat import ChatGPTOSS
4
+
5
+ __all__ = ["ChatGPTOSS"]