hdsp-jupyter-extension 2.0.18__py3-none-any.whl → 2.0.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_server/langchain/agent_prompts/planner_prompt.py +22 -11
- agent_server/langchain/custom_middleware.py +97 -65
- agent_server/langchain/llm_factory.py +37 -5
- agent_server/langchain/logging_utils.py +41 -16
- agent_server/langchain/models/__init__.py +5 -0
- agent_server/langchain/models/gpt_oss_chat.py +351 -0
- agent_server/langchain/prompts.py +12 -7
- agent_server/routers/langchain_agent.py +10 -0
- {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
- {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +2 -2
- hdsp_jupyter_extension-2.0.18.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.037b3c8e5d6a92b63b16.js → hdsp_jupyter_extension-2.0.20.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.96745acc14125453fba8.js +36 -2
- hdsp_jupyter_extension-2.0.20.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.96745acc14125453fba8.js.map +1 -0
- jupyter_ext/labextension/static/lib_index_js.8f72c63cdf542389aa9d.js → hdsp_jupyter_extension-2.0.20.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.90f80cb80187de8c5ae5.js +91 -8
- hdsp_jupyter_extension-2.0.20.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.90f80cb80187de8c5ae5.js.map +1 -0
- hdsp_jupyter_extension-2.0.18.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.5099145cc2b28312d170.js → hdsp_jupyter_extension-2.0.20.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.586bf5521d043cdd37b8.js +3 -3
- jupyter_ext/labextension/static/remoteEntry.5099145cc2b28312d170.js.map → hdsp_jupyter_extension-2.0.20.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.586bf5521d043cdd37b8.js.map +1 -1
- {hdsp_jupyter_extension-2.0.18.dist-info → hdsp_jupyter_extension-2.0.20.dist-info}/METADATA +1 -1
- {hdsp_jupyter_extension-2.0.18.dist-info → hdsp_jupyter_extension-2.0.20.dist-info}/RECORD +48 -46
- jupyter_ext/_version.py +1 -1
- jupyter_ext/labextension/build_log.json +1 -1
- jupyter_ext/labextension/package.json +2 -2
- jupyter_ext/labextension/static/{frontend_styles_index_js.037b3c8e5d6a92b63b16.js → frontend_styles_index_js.96745acc14125453fba8.js} +36 -2
- jupyter_ext/labextension/static/frontend_styles_index_js.96745acc14125453fba8.js.map +1 -0
- hdsp_jupyter_extension-2.0.18.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.8f72c63cdf542389aa9d.js → jupyter_ext/labextension/static/lib_index_js.90f80cb80187de8c5ae5.js +91 -8
- jupyter_ext/labextension/static/lib_index_js.90f80cb80187de8c5ae5.js.map +1 -0
- jupyter_ext/labextension/static/{remoteEntry.5099145cc2b28312d170.js → remoteEntry.586bf5521d043cdd37b8.js} +3 -3
- hdsp_jupyter_extension-2.0.18.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.5099145cc2b28312d170.js.map → jupyter_ext/labextension/static/remoteEntry.586bf5521d043cdd37b8.js.map +1 -1
- hdsp_jupyter_extension-2.0.18.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.037b3c8e5d6a92b63b16.js.map +0 -1
- hdsp_jupyter_extension-2.0.18.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.8f72c63cdf542389aa9d.js.map +0 -1
- jupyter_ext/labextension/static/frontend_styles_index_js.037b3c8e5d6a92b63b16.js.map +0 -1
- jupyter_ext/labextension/static/lib_index_js.8f72c63cdf542389aa9d.js.map +0 -1
- {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
- {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
- {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
- {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
- {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
- {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
- {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
- {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
- {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
- {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +0 -0
- {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +0 -0
- {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +0 -0
- {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +0 -0
- {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
- {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
- {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
- {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
- {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +0 -0
- {hdsp_jupyter_extension-2.0.18.data → hdsp_jupyter_extension-2.0.20.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +0 -0
- {hdsp_jupyter_extension-2.0.18.dist-info → hdsp_jupyter_extension-2.0.20.dist-info}/WHEEL +0 -0
- {hdsp_jupyter_extension-2.0.18.dist-info → hdsp_jupyter_extension-2.0.20.dist-info}/licenses/LICENSE +0 -0
|
@@ -23,24 +23,35 @@ PLANNER_SYSTEM_PROMPT = """당신은 작업을 조율하는 Main Agent입니다.
|
|
|
23
23
|
| athena_query | SQL 쿼리 생성 | task_tool(agent_name="athena_query", description="매출 테이블 조회 쿼리") |
|
|
24
24
|
| researcher | 정보 검색 | task_tool(agent_name="researcher", description="관련 문서 검색") |
|
|
25
25
|
|
|
26
|
-
## Step 3: 결과
|
|
26
|
+
## Step 3: 결과 실행/적용 (필수!)
|
|
27
27
|
**task_tool을 호출 했다면, 호출 후 반드시 결과를 처리해야 함:**
|
|
28
28
|
|
|
29
|
-
| 서브에이전트 | 처리 방법 | 예시 |
|
|
30
|
-
|
|
31
|
-
| python_developer |
|
|
32
|
-
|
|
|
33
|
-
|
|
|
29
|
+
| 서브에이전트 | 작업 유형 | 처리 방법 | 예시 |
|
|
30
|
+
|-------------|----------|----------|------|
|
|
31
|
+
| python_developer | 코드 실행 (데이터 분석, 시각화) | jupyter_cell_tool | jupyter_cell_tool(code=반환된_코드) |
|
|
32
|
+
| python_developer | **파일 생성/수정** | **write_file_tool 또는 multiedit_file_tool** | write_file_tool(path="script.js", content=반환된_코드) |
|
|
33
|
+
| athena_query | SQL 표시 | markdown_tool | markdown_tool(content="```sql\n반환된_쿼리\n```") |
|
|
34
|
+
| researcher | 텍스트 요약 | 직접 응답 | - |
|
|
34
35
|
|
|
35
|
-
|
|
36
|
+
**🔴 중요: 코드 저장 도구 선택**
|
|
37
|
+
- **파일 생성/수정 요청** → `write_file_tool` 또는 `multiedit_file_tool` 사용
|
|
38
|
+
- **코드 실행 요청** (데이터 분석, 차트 등) → `jupyter_cell_tool` 사용
|
|
39
|
+
- **❌ markdown_tool은 코드 저장용이 아님!** (표시 전용)
|
|
40
|
+
|
|
41
|
+
**중요**: task_tool 결과를 받은 후 바로 write_todos로 완료 처리하지 말고, 반드시 위 도구로 결과를 먼저 적용!
|
|
42
|
+
|
|
43
|
+
**🔴 KeyboardInterrupt 발생 시**: jupyter_cell_tool 실행 중 KeyboardInterrupt가 발생하면 ask_user_tool로 중단 사유를 사용자에게 확인
|
|
44
|
+
- 예: ask_user_tool(question="코드 실행이 중단되었습니다. 중단 사유를 알려주시면 다음 진행에 참고하겠습니다.", input_type="text")
|
|
36
45
|
|
|
37
46
|
# write_todos 규칙 [필수]
|
|
38
47
|
- 한국어로 작성
|
|
39
48
|
- **🔴 기존 todo 절대 삭제 금지**: 전체 리스트를 항상 포함하고 status만 변경
|
|
40
|
-
-
|
|
41
|
-
-
|
|
42
|
-
|
|
43
|
-
|
|
49
|
+
- **🔴 상태 전환 순서 필수**: pending → in_progress → completed (건너뛰기 금지!)
|
|
50
|
+
- **🔴 초기 생성 규칙**: 첫 write_todos 호출 시 첫 번째 todo만 in_progress, 나머지는 모두 pending
|
|
51
|
+
- 올바른 초기 예: [{"content": "작업1", "status": "in_progress"}, {"content": "작업2", "status": "pending"}, {"content": "작업 요약 및 다음 단계 제시", "status": "pending"}]
|
|
52
|
+
- 잘못된 초기 예: [{"content": "작업1", "status": "completed"}, ...] ← 실제 작업 없이 completed 금지!
|
|
53
|
+
- **🔴 completed 전환 조건**: 실제 도구(task_tool, jupyter_cell_tool 등)로 작업 수행 후에만 completed로 변경
|
|
54
|
+
- in_progress 상태는 **동시에 1개만** 허용 (completed, pending todo는 삭제하지 않고 모두 유지)
|
|
44
55
|
- content에 도구(tool)명 언급 금지
|
|
45
56
|
- **[필수] 마지막 todo는 반드시 "작업 요약 및 다음 단계 제시"**
|
|
46
57
|
|
|
@@ -444,63 +444,78 @@ def create_handle_empty_response_middleware(wrap_model_call):
|
|
|
444
444
|
)
|
|
445
445
|
|
|
446
446
|
if has_summary_pattern:
|
|
447
|
-
#
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
447
|
+
# Check if pending todos exist - if so, don't force complete
|
|
448
|
+
current_todos = request.state.get("todos", [])
|
|
449
|
+
pending_todos = [
|
|
450
|
+
t for t in current_todos
|
|
451
|
+
if isinstance(t, dict) and t.get("status") == "pending"
|
|
452
|
+
]
|
|
453
|
+
if pending_todos:
|
|
454
|
+
logger.warning(
|
|
455
|
+
"Summary JSON detected but pending todos remain - not forcing completion: %s",
|
|
456
|
+
[t.get("content", "")[:30] for t in pending_todos],
|
|
457
|
+
)
|
|
458
|
+
# Don't synthesize completion, return response as-is
|
|
459
|
+
# Let LLM continue working on pending todos
|
|
460
|
+
else:
|
|
461
|
+
# No pending todos, safe to synthesize completion
|
|
462
|
+
# Try to extract and repair summary JSON from mixed content
|
|
463
|
+
try:
|
|
464
|
+
# Try to find JSON object containing summary
|
|
465
|
+
import re
|
|
466
|
+
json_match = re.search(r'\{[^{}]*"summary"[^{}]*"next_items"[^{}]*\}', content, re.DOTALL)
|
|
467
|
+
if json_match:
|
|
468
|
+
repaired_summary = repair_json(
|
|
469
|
+
json_match.group(), return_objects=True
|
|
470
|
+
)
|
|
471
|
+
else:
|
|
472
|
+
repaired_summary = repair_json(
|
|
473
|
+
content, return_objects=True
|
|
474
|
+
)
|
|
460
475
|
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
)
|
|
470
|
-
logger.info(
|
|
471
|
-
"Detected and repaired summary JSON in content (pattern-based detection)"
|
|
472
|
-
)
|
|
473
|
-
# Create message with repaired content
|
|
474
|
-
repaired_response_message = AIMessage(
|
|
475
|
-
content=repaired_content,
|
|
476
|
-
tool_calls=getattr(
|
|
477
|
-
response_message, "tool_calls", []
|
|
476
|
+
if (
|
|
477
|
+
isinstance(repaired_summary, dict)
|
|
478
|
+
and "summary" in repaired_summary
|
|
479
|
+
and "next_items" in repaired_summary
|
|
480
|
+
):
|
|
481
|
+
# Create new message with repaired JSON content
|
|
482
|
+
repaired_content = json.dumps(
|
|
483
|
+
repaired_summary, ensure_ascii=False
|
|
478
484
|
)
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
repaired_response_message
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
485
|
+
logger.info(
|
|
486
|
+
"Detected and repaired summary JSON in content (pattern-based detection)"
|
|
487
|
+
)
|
|
488
|
+
# Create message with repaired content
|
|
489
|
+
repaired_response_message = AIMessage(
|
|
490
|
+
content=repaired_content,
|
|
491
|
+
tool_calls=getattr(
|
|
492
|
+
response_message, "tool_calls", []
|
|
493
|
+
)
|
|
494
|
+
or [],
|
|
495
|
+
)
|
|
496
|
+
synthetic_message = _create_synthetic_completion(
|
|
497
|
+
request,
|
|
498
|
+
repaired_response_message,
|
|
499
|
+
has_content=True,
|
|
500
|
+
)
|
|
501
|
+
response = _replace_ai_message_in_response(
|
|
502
|
+
response, synthetic_message
|
|
503
|
+
)
|
|
504
|
+
return response
|
|
505
|
+
except Exception as e:
|
|
506
|
+
logger.debug(f"Failed to extract summary JSON from mixed content: {e}")
|
|
492
507
|
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
508
|
+
# Fallback: accept as-is if repair failed but looks like summary
|
|
509
|
+
logger.info(
|
|
510
|
+
"Detected summary JSON pattern in content - accepting and synthesizing write_todos"
|
|
511
|
+
)
|
|
512
|
+
synthetic_message = _create_synthetic_completion(
|
|
513
|
+
request, response_message, has_content=True
|
|
514
|
+
)
|
|
515
|
+
response = _replace_ai_message_in_response(
|
|
516
|
+
response, synthetic_message
|
|
517
|
+
)
|
|
518
|
+
return response
|
|
504
519
|
|
|
505
520
|
# Legacy: Also check if current todo is a summary todo (backward compatibility)
|
|
506
521
|
todos = request.state.get("todos", [])
|
|
@@ -1009,17 +1024,34 @@ def create_normalize_tool_args_middleware(wrap_model_call, tools=None):
|
|
|
1009
1024
|
else:
|
|
1010
1025
|
found_first = True
|
|
1011
1026
|
|
|
1012
|
-
#
|
|
1013
|
-
#
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1027
|
+
# Validate: "작업 요약 및 다음 단계 제시" cannot be in_progress if pending todos exist
|
|
1028
|
+
# This prevents LLM from skipping pending tasks
|
|
1029
|
+
summary_keywords = ["작업 요약", "다음 단계 제시"]
|
|
1030
|
+
for i, todo in enumerate(todos):
|
|
1031
|
+
if not isinstance(todo, dict):
|
|
1032
|
+
continue
|
|
1033
|
+
content = todo.get("content", "")
|
|
1034
|
+
is_summary_todo = any(kw in content for kw in summary_keywords)
|
|
1035
|
+
|
|
1036
|
+
if is_summary_todo and todo.get("status") == "in_progress":
|
|
1037
|
+
# Check if there are pending todos before this one
|
|
1038
|
+
pending_before = [
|
|
1039
|
+
t for t in todos[:i]
|
|
1040
|
+
if isinstance(t, dict) and t.get("status") == "pending"
|
|
1041
|
+
]
|
|
1042
|
+
if pending_before:
|
|
1043
|
+
# Revert summary todo to pending
|
|
1044
|
+
todo["status"] = "pending"
|
|
1045
|
+
# Set the first pending todo to in_progress
|
|
1046
|
+
for t in todos:
|
|
1047
|
+
if isinstance(t, dict) and t.get("status") == "pending":
|
|
1048
|
+
t["status"] = "in_progress"
|
|
1049
|
+
logger.warning(
|
|
1050
|
+
"Reverted summary todo to pending, set '%s' to in_progress (pending todos exist)",
|
|
1051
|
+
t.get("content", "")[:30],
|
|
1052
|
+
)
|
|
1053
|
+
break
|
|
1054
|
+
break
|
|
1023
1055
|
|
|
1024
1056
|
return response
|
|
1025
1057
|
|
|
@@ -97,16 +97,37 @@ def _create_vllm_llm(llm_config: Dict[str, Any], callbacks):
|
|
|
97
97
|
endpoint = vllm_config.get("endpoint", "http://localhost:8000/v1")
|
|
98
98
|
model = vllm_config.get("model", "default")
|
|
99
99
|
api_key = vllm_config.get("apiKey", "dummy")
|
|
100
|
+
use_responses_api = vllm_config.get("useResponsesApi", False)
|
|
101
|
+
temperature = vllm_config.get("temperature", 0.0)
|
|
100
102
|
|
|
101
|
-
logger.info(
|
|
103
|
+
logger.info(
|
|
104
|
+
f"Creating vLLM LLM with model: {model}, endpoint: {endpoint}, "
|
|
105
|
+
f"use_responses_api: {use_responses_api}, temperature: {temperature}"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Use ChatGPTOSS for gpt-oss models (Harmony format with developer role)
|
|
109
|
+
if "gpt-oss" in model.lower():
|
|
110
|
+
from agent_server.langchain.models import ChatGPTOSS
|
|
111
|
+
|
|
112
|
+
logger.info(f"Using ChatGPTOSS for gpt-oss model (developer role support)")
|
|
113
|
+
return ChatGPTOSS(
|
|
114
|
+
model=model,
|
|
115
|
+
base_url=endpoint,
|
|
116
|
+
api_key=api_key,
|
|
117
|
+
temperature=temperature,
|
|
118
|
+
max_tokens=8192,
|
|
119
|
+
streaming=False,
|
|
120
|
+
callbacks=callbacks,
|
|
121
|
+
)
|
|
102
122
|
|
|
103
123
|
return ChatOpenAI(
|
|
104
124
|
model=model,
|
|
105
125
|
api_key=api_key,
|
|
106
126
|
base_url=endpoint, # Use endpoint as-is (no /v1 suffix added)
|
|
107
127
|
streaming=False, # Agent mode: disable LLM streaming (SSE handled by agent server)
|
|
108
|
-
temperature=
|
|
109
|
-
max_tokens=
|
|
128
|
+
temperature=temperature,
|
|
129
|
+
max_tokens=8192,
|
|
130
|
+
use_responses_api=use_responses_api, # Use /v1/responses endpoint if True
|
|
110
131
|
callbacks=callbacks,
|
|
111
132
|
)
|
|
112
133
|
|
|
@@ -148,14 +169,25 @@ def create_summarization_llm(llm_config: Dict[str, Any]):
|
|
|
148
169
|
temperature=0.0,
|
|
149
170
|
)
|
|
150
171
|
elif provider == "vllm":
|
|
151
|
-
from langchain_openai import ChatOpenAI
|
|
152
|
-
|
|
153
172
|
vllm_config = llm_config.get("vllm", {})
|
|
154
173
|
# User provides full base URL (e.g., https://openrouter.ai/api/v1)
|
|
155
174
|
endpoint = vllm_config.get("endpoint", "http://localhost:8000/v1")
|
|
156
175
|
model = vllm_config.get("model", "default")
|
|
157
176
|
api_key = vllm_config.get("apiKey", "dummy")
|
|
158
177
|
|
|
178
|
+
# Use ChatGPTOSS for gpt-oss models
|
|
179
|
+
if "gpt-oss" in model.lower():
|
|
180
|
+
from agent_server.langchain.models import ChatGPTOSS
|
|
181
|
+
|
|
182
|
+
return ChatGPTOSS(
|
|
183
|
+
model=model,
|
|
184
|
+
base_url=endpoint,
|
|
185
|
+
api_key=api_key,
|
|
186
|
+
temperature=0.0,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
from langchain_openai import ChatOpenAI
|
|
190
|
+
|
|
159
191
|
return ChatOpenAI(
|
|
160
192
|
model=model,
|
|
161
193
|
api_key=api_key,
|
|
@@ -14,8 +14,36 @@ from langchain_core.callbacks import BaseCallbackHandler
|
|
|
14
14
|
|
|
15
15
|
logger = logging.getLogger(__name__)
|
|
16
16
|
|
|
17
|
+
# Dedicated logger for LLM responses - always enabled with its own handler
|
|
18
|
+
llm_response_logger = logging.getLogger("agent_server.llm_response")
|
|
19
|
+
llm_response_logger.setLevel(logging.INFO)
|
|
20
|
+
llm_response_logger.propagate = True # Propagate to root logger
|
|
21
|
+
|
|
22
|
+
# Ensure it has a handler if running standalone
|
|
23
|
+
if not llm_response_logger.handlers and not logging.getLogger().handlers:
|
|
24
|
+
_handler = logging.StreamHandler()
|
|
25
|
+
_handler.setFormatter(logging.Formatter('%(message)s'))
|
|
26
|
+
llm_response_logger.addHandler(_handler)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def disable_langchain_logging():
|
|
30
|
+
"""Disable all langchain logging except LLM responses."""
|
|
31
|
+
# Set all langchain loggers to CRITICAL
|
|
32
|
+
for name in list(logging.Logger.manager.loggerDict.keys()):
|
|
33
|
+
if "langchain" in name.lower() or name.startswith("agent_server.langchain"):
|
|
34
|
+
logging.getLogger(name).setLevel(logging.CRITICAL)
|
|
35
|
+
# Keep LLM response logger at INFO
|
|
36
|
+
llm_response_logger.setLevel(logging.INFO)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# Auto-disable on import (comment this line to re-enable all logs)
|
|
40
|
+
disable_langchain_logging()
|
|
41
|
+
|
|
17
42
|
LOG_SEPARATOR = "=" * 96
|
|
18
43
|
LOG_SUBSECTION = "-" * 96
|
|
44
|
+
LOG_EMOJI_LINE = "🔵" * 48
|
|
45
|
+
LOG_RESPONSE_START = f"\n\n{LOG_EMOJI_LINE}\n{'=' * 96}\n ✨ LLM RESPONSE START\n{'=' * 96}"
|
|
46
|
+
LOG_RESPONSE_END = f"{'=' * 96}\n ✅ LLM RESPONSE END\n{'=' * 96}\n{LOG_EMOJI_LINE}\n"
|
|
19
47
|
|
|
20
48
|
|
|
21
49
|
def _format_system_prompt_for_log(messages) -> tuple[int, int, str]:
|
|
@@ -179,15 +207,15 @@ class LLMTraceLogger(BaseCallbackHandler):
|
|
|
179
207
|
logger.info("%s", "\n".join(lines))
|
|
180
208
|
|
|
181
209
|
def on_chat_model_start(self, serialized, messages, **kwargs) -> None:
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
"%s",
|
|
185
|
-
_format_messages_block("AGENT -> LLM PROMPT (<none>)", []),
|
|
186
|
-
)
|
|
187
|
-
return
|
|
188
|
-
self._log_prompt_batches("AGENT -> LLM PROMPT", messages)
|
|
210
|
+
# Request logging disabled - only log responses
|
|
211
|
+
pass
|
|
189
212
|
|
|
190
213
|
def on_chat_model_end(self, response, **kwargs) -> None:
|
|
214
|
+
# Debug: Check if callback is even called
|
|
215
|
+
print("[DEBUG] on_chat_model_end CALLED!", flush=True)
|
|
216
|
+
# Use print for guaranteed visibility
|
|
217
|
+
print(LOG_RESPONSE_START, flush=True)
|
|
218
|
+
|
|
191
219
|
generations = getattr(response, "generations", None) or []
|
|
192
220
|
if generations and isinstance(generations[0], list):
|
|
193
221
|
batches = generations
|
|
@@ -203,7 +231,7 @@ class LLMTraceLogger(BaseCallbackHandler):
|
|
|
203
231
|
title = (
|
|
204
232
|
f"LLM -> AGENT RESPONSE (batch={batch_idx}, generation={gen_idx})"
|
|
205
233
|
)
|
|
206
|
-
|
|
234
|
+
print(_format_messages_block(title, [message]), flush=True)
|
|
207
235
|
|
|
208
236
|
tool_calls = getattr(message, "tool_calls", None)
|
|
209
237
|
if tool_calls:
|
|
@@ -211,13 +239,10 @@ class LLMTraceLogger(BaseCallbackHandler):
|
|
|
211
239
|
"LLM -> AGENT TOOL CALLS "
|
|
212
240
|
f"(batch={batch_idx}, generation={gen_idx})"
|
|
213
241
|
)
|
|
214
|
-
|
|
242
|
+
print(_format_json_block(tool_title, tool_calls), flush=True)
|
|
215
243
|
|
|
216
|
-
|
|
217
|
-
if not prompts:
|
|
218
|
-
logger.info("%s", _format_json_block("LLM PROMPT (<none>)", ""))
|
|
219
|
-
return
|
|
244
|
+
print(LOG_RESPONSE_END, flush=True)
|
|
220
245
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
246
|
+
def on_llm_start(self, serialized, prompts, **kwargs) -> None:
|
|
247
|
+
# Request logging disabled - only log responses
|
|
248
|
+
pass
|