hdsp-jupyter-extension 2.0.27__py3-none-any.whl → 2.0.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_server/context_providers/__init__.py +4 -2
- agent_server/context_providers/actions.py +73 -7
- agent_server/context_providers/file.py +23 -23
- agent_server/langchain/__init__.py +2 -2
- agent_server/langchain/agent.py +18 -251
- agent_server/langchain/agent_factory.py +26 -4
- agent_server/langchain/agent_prompts/planner_prompt.py +22 -31
- agent_server/langchain/custom_middleware.py +268 -43
- agent_server/langchain/llm_factory.py +102 -54
- agent_server/langchain/logging_utils.py +1 -1
- agent_server/langchain/middleware/__init__.py +5 -0
- agent_server/langchain/middleware/content_injection_middleware.py +110 -0
- agent_server/langchain/middleware/subagent_events.py +88 -9
- agent_server/langchain/middleware/subagent_middleware.py +501 -245
- agent_server/langchain/prompts.py +5 -22
- agent_server/langchain/state_schema.py +44 -0
- agent_server/langchain/tools/jupyter_tools.py +4 -5
- agent_server/langchain/tools/tool_registry.py +6 -0
- agent_server/routers/chat.py +305 -2
- agent_server/routers/config.py +193 -8
- agent_server/routers/config_schema.py +254 -0
- agent_server/routers/context.py +31 -8
- agent_server/routers/langchain_agent.py +276 -155
- hdsp_agent_core/managers/config_manager.py +100 -1
- {hdsp_jupyter_extension-2.0.27.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
- {hdsp_jupyter_extension-2.0.27.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +2 -2
- hdsp_jupyter_extension-2.0.27.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.b5e4416b4e07ec087aad.js → hdsp_jupyter_extension-2.0.28.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.55727265b00191e68d9a.js +479 -15
- hdsp_jupyter_extension-2.0.28.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.55727265b00191e68d9a.js.map +1 -0
- jupyter_ext/labextension/static/lib_index_js.67505497667f9c0a763d.js → hdsp_jupyter_extension-2.0.28.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.df05d90f366bfd5fa023.js +1287 -190
- hdsp_jupyter_extension-2.0.28.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.df05d90f366bfd5fa023.js.map +1 -0
- hdsp_jupyter_extension-2.0.27.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.4ab73bb5068405670214.js → hdsp_jupyter_extension-2.0.28.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.08fce819ee32e9d25175.js +3 -3
- jupyter_ext/labextension/static/remoteEntry.4ab73bb5068405670214.js.map → hdsp_jupyter_extension-2.0.28.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.08fce819ee32e9d25175.js.map +1 -1
- {hdsp_jupyter_extension-2.0.27.dist-info → hdsp_jupyter_extension-2.0.28.dist-info}/METADATA +1 -1
- {hdsp_jupyter_extension-2.0.27.dist-info → hdsp_jupyter_extension-2.0.28.dist-info}/RECORD +65 -63
- jupyter_ext/_version.py +1 -1
- jupyter_ext/handlers.py +41 -0
- jupyter_ext/labextension/build_log.json +1 -1
- jupyter_ext/labextension/package.json +2 -2
- jupyter_ext/labextension/static/{frontend_styles_index_js.b5e4416b4e07ec087aad.js → frontend_styles_index_js.55727265b00191e68d9a.js} +479 -15
- jupyter_ext/labextension/static/frontend_styles_index_js.55727265b00191e68d9a.js.map +1 -0
- hdsp_jupyter_extension-2.0.27.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.67505497667f9c0a763d.js → jupyter_ext/labextension/static/lib_index_js.df05d90f366bfd5fa023.js +1287 -190
- jupyter_ext/labextension/static/lib_index_js.df05d90f366bfd5fa023.js.map +1 -0
- jupyter_ext/labextension/static/{remoteEntry.4ab73bb5068405670214.js → remoteEntry.08fce819ee32e9d25175.js} +3 -3
- hdsp_jupyter_extension-2.0.27.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.4ab73bb5068405670214.js.map → jupyter_ext/labextension/static/remoteEntry.08fce819ee32e9d25175.js.map +1 -1
- agent_server/langchain/middleware/description_injector.py +0 -150
- hdsp_jupyter_extension-2.0.27.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.b5e4416b4e07ec087aad.js.map +0 -1
- hdsp_jupyter_extension-2.0.27.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.67505497667f9c0a763d.js.map +0 -1
- jupyter_ext/labextension/static/frontend_styles_index_js.b5e4416b4e07ec087aad.js.map +0 -1
- jupyter_ext/labextension/static/lib_index_js.67505497667f9c0a763d.js.map +0 -1
- {hdsp_jupyter_extension-2.0.27.data → hdsp_jupyter_extension-2.0.28.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
- {hdsp_jupyter_extension-2.0.27.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
- {hdsp_jupyter_extension-2.0.27.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
- {hdsp_jupyter_extension-2.0.27.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
- {hdsp_jupyter_extension-2.0.27.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
- {hdsp_jupyter_extension-2.0.27.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
- {hdsp_jupyter_extension-2.0.27.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
- {hdsp_jupyter_extension-2.0.27.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
- {hdsp_jupyter_extension-2.0.27.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
- {hdsp_jupyter_extension-2.0.27.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +0 -0
- {hdsp_jupyter_extension-2.0.27.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +0 -0
- {hdsp_jupyter_extension-2.0.27.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +0 -0
- {hdsp_jupyter_extension-2.0.27.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +0 -0
- {hdsp_jupyter_extension-2.0.27.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
- {hdsp_jupyter_extension-2.0.27.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
- {hdsp_jupyter_extension-2.0.27.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
- {hdsp_jupyter_extension-2.0.27.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
- {hdsp_jupyter_extension-2.0.27.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +0 -0
- {hdsp_jupyter_extension-2.0.27.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +0 -0
- {hdsp_jupyter_extension-2.0.27.dist-info → hdsp_jupyter_extension-2.0.28.dist-info}/WHEEL +0 -0
- {hdsp_jupyter_extension-2.0.27.dist-info → hdsp_jupyter_extension-2.0.28.dist-info}/licenses/LICENSE +0 -0
|
@@ -13,30 +13,19 @@ DEFAULT_SYSTEM_PROMPT = """You are an expert Python data scientist and Jupyter n
|
|
|
13
13
|
- 설명 없이 tool_call만 하면 안 됨
|
|
14
14
|
|
|
15
15
|
# 작업 흐름
|
|
16
|
-
1. 간단한 작업 (1-2단계): 바로 실행
|
|
17
|
-
2. 복잡한 작업 (3단계+): write_todos로 계획 → 순차 실행
|
|
16
|
+
1. 간단한 작업 (1-2단계): 바로 실행 → 완료 후 write_todos 사용하지 말고 바로 종료
|
|
17
|
+
2. 복잡한 작업 (3단계+): write_todos로 계획 → 순차 실행 → 완료 시 final_summary_tool 호출
|
|
18
18
|
|
|
19
19
|
# write_todos 규칙 [필수]
|
|
20
20
|
- 한국어로 작성
|
|
21
21
|
- **🔴 기존 todo 절대 삭제 금지**: 전체 리스트를 항상 포함하고 status만 변경
|
|
22
22
|
- **🔴 상태 전환 순서 필수**: pending → in_progress → completed (건너뛰기 금지!)
|
|
23
23
|
- **🔴 초기 생성 규칙**: 첫 write_todos 호출 시 첫 번째 todo만 in_progress, 나머지는 모두 pending
|
|
24
|
-
- 올바른 초기 예: [{"content": "작업1", "status": "in_progress"}, {"content": "작업2", "status": "pending"}
|
|
24
|
+
- 올바른 초기 예: [{"content": "작업1", "status": "in_progress"}, {"content": "작업2", "status": "pending"}]
|
|
25
25
|
- 잘못된 초기 예: [{"content": "작업1", "status": "completed"}, ...] ← 실제 작업 없이 completed 금지!
|
|
26
26
|
- **🔴 completed 전환 조건**: 실제 도구로 작업 수행 후에만 completed로 변경
|
|
27
27
|
- in_progress는 **동시에 1개만** 유지
|
|
28
|
-
- **
|
|
29
|
-
- **🔴 [실행 순서 필수]**: "작업 요약 및 다음 단계 제시"는 **반드시 가장 마지막에 실행**
|
|
30
|
-
- 다른 모든 todo가 completed 상태가 된 후에만 이 todo를 in_progress로 변경
|
|
31
|
-
- **[중요] "작업 요약 및 다음 단계 제시"는 summary JSON 출력 후에만 completed 표시**
|
|
32
|
-
|
|
33
|
-
# 모든 작업 완료 후 [필수]
|
|
34
|
-
마지막 todo "작업 요약 및 다음 단계 제시"를 완료할 때:
|
|
35
|
-
**반드시 final_summary_tool을 호출**하여 요약과 다음 단계를 제시하세요.
|
|
36
|
-
- final_summary_tool(summary="완료된 작업 요약", next_items=[{"subject": "제목", "description": "설명"}, ...])
|
|
37
|
-
- next_items 3개 이상 필수
|
|
38
|
-
- **final_summary_tool 호출 없이 종료 금지**
|
|
39
|
-
- **주의**: 텍스트로 JSON 출력하지 말고, 반드시 도구 호출로!
|
|
28
|
+
- **"작업 요약" todo 추가 금지**: 실제 작업만 todo로 생성 (요약은 시스템이 자동 처리)
|
|
40
29
|
|
|
41
30
|
# 도구 사용
|
|
42
31
|
- check_resource_tool: 대용량 파일/데이터프레임 작업 전 필수
|
|
@@ -57,8 +46,6 @@ DEFAULT_SYSTEM_PROMPT = """You are an expert Python data scientist and Jupyter n
|
|
|
57
46
|
- 빈 응답 (도구 호출도 없고 내용도 없음)
|
|
58
47
|
- 설명 없이 도구만 호출
|
|
59
48
|
- pending/in_progress todo 남기고 종료
|
|
60
|
-
- "작업 요약 및 다음 단계 제시" todo 없이 todo 리스트 생성
|
|
61
|
-
- **🔴 다른 pending todo가 있는데 "작업 요약 및 다음 단계 제시"를 먼저 실행** (순서 위반)
|
|
62
49
|
"""
|
|
63
50
|
|
|
64
51
|
JSON_TOOL_SCHEMA = """Respond with ONLY valid JSON:
|
|
@@ -92,11 +79,7 @@ TODO_LIST_TOOL_DESCRIPTION = """Todo 리스트 관리 도구.
|
|
|
92
79
|
- **🔴 초기 생성**: 첫 호출 시 첫 번째만 in_progress, 나머지는 pending
|
|
93
80
|
- **🔴 completed 조건**: 실제 도구로 작업 수행 후에만 completed로 변경
|
|
94
81
|
- in_progress 상태는 **동시에 1개만** 허용
|
|
95
|
-
- **
|
|
96
|
-
- **🔴 [실행 순서]**: todo는 반드시 리스트 순서대로 실행하고, "작업 요약 및 다음 단계 제시"는 맨 마지막에 실행
|
|
97
|
-
- 이 "작업 요약 및 다음 단계 제시" todo 완료 시 **반드시 final_summary_tool 호출**:
|
|
98
|
-
final_summary_tool(summary="완료 요약", next_items=[{"subject": "...", "description": "..."}])
|
|
99
|
-
(next_items 3개 이상 필수, 텍스트 JSON 출력 금지!)
|
|
82
|
+
- **"작업 요약" todo 추가 금지**: 실제 작업만 todo로 생성 (요약은 시스템이 자동 처리)
|
|
100
83
|
"""
|
|
101
84
|
|
|
102
85
|
# List of tools available to the agent
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""
|
|
2
|
+
HDSP Agent State Schema
|
|
3
|
+
|
|
4
|
+
Extends LangChain AgentState with custom fields for state-based
|
|
5
|
+
content passing between Main Agent and Subagents.
|
|
6
|
+
|
|
7
|
+
Key fields:
|
|
8
|
+
- generated_content: Python code or SQL query from subagents
|
|
9
|
+
- generated_content_type: "python" | "sql" to determine injection target
|
|
10
|
+
- content_description: [DESCRIPTION] section from subagent response
|
|
11
|
+
|
|
12
|
+
This eliminates JSON serialization issues when passing code/SQL
|
|
13
|
+
between agents — content flows through LangGraph State, not LLM JSON.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from typing import Optional
|
|
17
|
+
|
|
18
|
+
from langchain.agents import AgentState
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class HDSPAgentState(AgentState):
|
|
22
|
+
"""Extended agent state for HDSP multi-agent architecture.
|
|
23
|
+
|
|
24
|
+
Adds fields for state-based content passing:
|
|
25
|
+
- Subagent writes generated_content via Command
|
|
26
|
+
- ContentInjectionMiddleware reads and injects into tool args
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
generated_content: Optional[str]
|
|
30
|
+
"""Generated Python code or SQL query from subagent.
|
|
31
|
+
Set by task tool via Command(update={"generated_content": ...})."""
|
|
32
|
+
|
|
33
|
+
generated_content_type: Optional[str]
|
|
34
|
+
"""Content type: "python" | "sql" | None.
|
|
35
|
+
Determines which tool receives the injection."""
|
|
36
|
+
|
|
37
|
+
content_description: Optional[str]
|
|
38
|
+
"""Description extracted from [DESCRIPTION] section.
|
|
39
|
+
Injected alongside content into tool args."""
|
|
40
|
+
|
|
41
|
+
todo_active: bool
|
|
42
|
+
"""True when todo list workflow is active.
|
|
43
|
+
Set to True by TodoActiveMiddleware when write_todos is called.
|
|
44
|
+
Set to False when final_summary_tool is called, user cancels, or new request starts."""
|
|
@@ -113,7 +113,7 @@ def markdown_tool(content: str) -> Dict[str, Any]:
|
|
|
113
113
|
"content": content,
|
|
114
114
|
},
|
|
115
115
|
"status": "completed",
|
|
116
|
-
"message": "Markdown cell added successfully.
|
|
116
|
+
"message": "Markdown cell added successfully.",
|
|
117
117
|
}
|
|
118
118
|
|
|
119
119
|
|
|
@@ -194,13 +194,12 @@ def final_summary_tool(
|
|
|
194
194
|
next_items: List[Dict[str, str]],
|
|
195
195
|
) -> Dict[str, Any]:
|
|
196
196
|
"""
|
|
197
|
-
모든
|
|
198
|
-
|
|
199
|
-
이 도구는 반드시 모든 todo가 완료된 후, 마지막 "작업 요약 및 다음 단계 제시" todo를 처리할 때만 호출하세요.
|
|
197
|
+
모든 todo가 completed 상태가 된 후 호출하는 마무리 도구.
|
|
198
|
+
별도의 "요약" todo를 만들지 말고, 실제 작업 todo가 모두 완료되면 바로 이 도구를 호출하세요.
|
|
200
199
|
|
|
201
200
|
Args:
|
|
202
201
|
summary: 완료된 작업에 대한 요약 (한국어로 작성)
|
|
203
|
-
next_items:
|
|
202
|
+
next_items: 후속 작업 제안 목록 (각각 subject와 description 포함, 3개 이상)
|
|
204
203
|
|
|
205
204
|
Returns:
|
|
206
205
|
Dict containing the summary and next items for frontend display
|
|
@@ -22,6 +22,7 @@ from agent_server.langchain.tools.file_tools import (
|
|
|
22
22
|
)
|
|
23
23
|
from agent_server.langchain.tools.jupyter_tools import (
|
|
24
24
|
ask_user_tool,
|
|
25
|
+
final_summary_tool,
|
|
25
26
|
jupyter_cell_tool,
|
|
26
27
|
markdown_tool,
|
|
27
28
|
)
|
|
@@ -46,6 +47,7 @@ ALL_TOOLS = {
|
|
|
46
47
|
"jupyter_cell_tool": jupyter_cell_tool,
|
|
47
48
|
"markdown_tool": markdown_tool,
|
|
48
49
|
"ask_user_tool": ask_user_tool,
|
|
50
|
+
"final_summary_tool": final_summary_tool,
|
|
49
51
|
# File tools
|
|
50
52
|
"read_file_tool": read_file_tool,
|
|
51
53
|
"write_file_tool": write_file_tool,
|
|
@@ -95,6 +97,8 @@ AGENT_TOOLS_CONFIG = {
|
|
|
95
97
|
"multiedit_file_tool",
|
|
96
98
|
# Shell for additional operations
|
|
97
99
|
"execute_command_tool",
|
|
100
|
+
# Final summary (auto-triggered by middleware when all todos done)
|
|
101
|
+
"final_summary_tool",
|
|
98
102
|
],
|
|
99
103
|
# write_todos and task tools are added by middleware
|
|
100
104
|
"description": "Main Agent executes code and manages files directly",
|
|
@@ -118,6 +122,8 @@ AGENT_TOOLS_CONFIG = {
|
|
|
118
122
|
"multiedit_file_tool",
|
|
119
123
|
# Shell
|
|
120
124
|
"execute_command_tool",
|
|
125
|
+
# Final summary (auto-triggered by middleware when all todos done)
|
|
126
|
+
"final_summary_tool",
|
|
121
127
|
],
|
|
122
128
|
"description": "Alias for main_agent (backward compatibility)",
|
|
123
129
|
},
|
agent_server/routers/chat.py
CHANGED
|
@@ -8,16 +8,97 @@ Supports @file context injection for including file contents in prompts.
|
|
|
8
8
|
import json
|
|
9
9
|
import logging
|
|
10
10
|
import os
|
|
11
|
-
from typing import Any, AsyncGenerator, Dict
|
|
11
|
+
from typing import Any, AsyncGenerator, Dict, Optional
|
|
12
12
|
|
|
13
13
|
from fastapi import APIRouter, HTTPException
|
|
14
14
|
from fastapi.responses import StreamingResponse
|
|
15
15
|
from hdsp_agent_core.managers.config_manager import ConfigManager
|
|
16
|
-
from hdsp_agent_core.managers.session_manager import get_session_manager
|
|
16
|
+
from hdsp_agent_core.managers.session_manager import ChatMessage, get_session_manager
|
|
17
17
|
from hdsp_agent_core.models.chat import ChatRequest, ChatResponse
|
|
18
|
+
from pydantic import BaseModel
|
|
18
19
|
|
|
19
20
|
from agent_server.context_providers import ContextProcessor
|
|
20
21
|
from agent_server.core.llm_service import LLMService
|
|
22
|
+
from agent_server.langchain.llm_factory import create_summarization_llm
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
26
|
+
# Auto-Compact Configuration (Aligned with Agent mode SummarizationMiddleware)
|
|
27
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
28
|
+
# These values should match agent_factory.py SummarizationMiddleware settings
|
|
29
|
+
AUTO_COMPACT_TOKEN_THRESHOLD = 30000 # Trigger when tokens exceed this threshold
|
|
30
|
+
AUTO_COMPACT_KEEP_MESSAGES = 15 # Keep last N messages after compaction
|
|
31
|
+
|
|
32
|
+
# Approximate tokens per character (for Korean/English mixed text)
|
|
33
|
+
CHARS_PER_TOKEN = 3.5 # Conservative estimate
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _estimate_tokens(text: str) -> int:
|
|
37
|
+
"""Estimate token count from text length."""
|
|
38
|
+
return int(len(text) / CHARS_PER_TOKEN)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _estimate_session_tokens(messages: list) -> int:
|
|
42
|
+
"""Estimate total tokens in session messages."""
|
|
43
|
+
total_chars = sum(len(m.content) for m in messages)
|
|
44
|
+
return int(total_chars / CHARS_PER_TOKEN)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
48
|
+
# Summarization Prompt (Claude Code Benchmark)
|
|
49
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
50
|
+
SUMMARIZATION_PROMPT = """다음 대화 내용을 요약하여 향후 컨텍스트 윈도우에서 작업을 효율적으로 재개할 수 있도록 해주세요.
|
|
51
|
+
이 요약은 대화 히스토리를 대체하므로, 구조화되고 간결하며 실행 가능해야 합니다.
|
|
52
|
+
|
|
53
|
+
요약에 반드시 포함할 내용:
|
|
54
|
+
|
|
55
|
+
## 완료된 작업
|
|
56
|
+
- 완료된 태스크와 주요 결과물
|
|
57
|
+
- 생성/수정된 파일 목록
|
|
58
|
+
|
|
59
|
+
## 현재 상태
|
|
60
|
+
- 진행 중인 작업
|
|
61
|
+
- 마지막으로 논의된 주제
|
|
62
|
+
|
|
63
|
+
## 다음 단계
|
|
64
|
+
- 명확한 후속 액션 항목
|
|
65
|
+
- 보류 중인 결정 사항
|
|
66
|
+
|
|
67
|
+
## 핵심 맥락
|
|
68
|
+
- 사용자 선호사항 및 제약조건
|
|
69
|
+
- 중요한 기술적 결정사항
|
|
70
|
+
- 작업 재개에 필수적인 정보
|
|
71
|
+
|
|
72
|
+
작성 지침:
|
|
73
|
+
- 간결하되, 작업이 끊김 없이 계속될 수 있을 정도의 세부사항은 보존
|
|
74
|
+
- 불필요한 인사말, 확인 메시지, 중복 내용은 제외
|
|
75
|
+
- 코드 스니펫은 핵심적인 경우에만 포함
|
|
76
|
+
- 한국어로 작성
|
|
77
|
+
|
|
78
|
+
대화 내용:
|
|
79
|
+
{conversation}
|
|
80
|
+
|
|
81
|
+
요약:"""
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
85
|
+
# Compact Request/Response Models
|
|
86
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
87
|
+
class CompactRequest(BaseModel):
|
|
88
|
+
"""Request for conversation compaction."""
|
|
89
|
+
|
|
90
|
+
conversationId: str
|
|
91
|
+
llmConfig: Optional[dict] = None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class CompactResponse(BaseModel):
|
|
95
|
+
"""Response from conversation compaction."""
|
|
96
|
+
|
|
97
|
+
success: bool
|
|
98
|
+
message: str
|
|
99
|
+
originalMessages: int
|
|
100
|
+
compressedMessages: int
|
|
101
|
+
summary: Optional[str] = None
|
|
21
102
|
|
|
22
103
|
router = APIRouter()
|
|
23
104
|
logger = logging.getLogger(__name__)
|
|
@@ -81,6 +162,98 @@ def _store_messages(
|
|
|
81
162
|
session_manager.store_messages(conversation_id, user_message, assistant_response)
|
|
82
163
|
|
|
83
164
|
|
|
165
|
+
async def _auto_compact_if_needed(
|
|
166
|
+
conversation_id: str, llm_config: Dict[str, Any]
|
|
167
|
+
) -> Optional[str]:
|
|
168
|
+
"""
|
|
169
|
+
Automatically compact conversation if token threshold exceeded.
|
|
170
|
+
|
|
171
|
+
Aligned with Agent mode SummarizationMiddleware:
|
|
172
|
+
- Trigger: Token count exceeds AUTO_COMPACT_TOKEN_THRESHOLD
|
|
173
|
+
- Keep: Last AUTO_COMPACT_KEEP_MESSAGES messages
|
|
174
|
+
- Result: [summary] + [recent messages]
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
Summary string if compacted, None otherwise
|
|
178
|
+
"""
|
|
179
|
+
from datetime import datetime
|
|
180
|
+
|
|
181
|
+
session_manager = get_session_manager()
|
|
182
|
+
session = session_manager.get_session(conversation_id)
|
|
183
|
+
|
|
184
|
+
if not session or not session.messages:
|
|
185
|
+
return None
|
|
186
|
+
|
|
187
|
+
# Estimate tokens in session
|
|
188
|
+
estimated_tokens = _estimate_session_tokens(session.messages)
|
|
189
|
+
|
|
190
|
+
# Check if auto-compact needed (token-based, same as Agent mode)
|
|
191
|
+
if estimated_tokens <= AUTO_COMPACT_TOKEN_THRESHOLD:
|
|
192
|
+
return None
|
|
193
|
+
|
|
194
|
+
# Need at least more messages than we keep
|
|
195
|
+
if len(session.messages) <= AUTO_COMPACT_KEEP_MESSAGES:
|
|
196
|
+
return None
|
|
197
|
+
|
|
198
|
+
logger.info(
|
|
199
|
+
f"[Chat] Auto-compact triggered for {conversation_id}: "
|
|
200
|
+
f"~{estimated_tokens} tokens > {AUTO_COMPACT_TOKEN_THRESHOLD} threshold, "
|
|
201
|
+
f"{len(session.messages)} messages"
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
try:
|
|
205
|
+
# Split messages: older (to summarize) vs recent (to keep)
|
|
206
|
+
messages_to_summarize = session.messages[:-AUTO_COMPACT_KEEP_MESSAGES]
|
|
207
|
+
recent_messages = session.messages[-AUTO_COMPACT_KEEP_MESSAGES:]
|
|
208
|
+
|
|
209
|
+
# Build conversation text for summarization
|
|
210
|
+
conversation_text = "\n".join(
|
|
211
|
+
[
|
|
212
|
+
f"{'사용자' if m.role == 'user' else '어시스턴트'}: {m.content}"
|
|
213
|
+
for m in messages_to_summarize
|
|
214
|
+
]
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# Create summarization LLM (uses settings from llm_config)
|
|
218
|
+
summarization_llm = create_summarization_llm(llm_config)
|
|
219
|
+
|
|
220
|
+
if not summarization_llm:
|
|
221
|
+
logger.warning("[Chat] Auto-compact skipped: no summarization LLM available")
|
|
222
|
+
return None
|
|
223
|
+
|
|
224
|
+
# Generate summary using LLM
|
|
225
|
+
prompt = SUMMARIZATION_PROMPT.format(conversation=conversation_text)
|
|
226
|
+
response = await summarization_llm.ainvoke(prompt)
|
|
227
|
+
summary = response.content if hasattr(response, "content") else str(response)
|
|
228
|
+
|
|
229
|
+
# Create summary message
|
|
230
|
+
summary_message = ChatMessage(
|
|
231
|
+
role="assistant",
|
|
232
|
+
content=f"[이전 대화 요약]\n\n{summary}",
|
|
233
|
+
timestamp=datetime.now().timestamp(),
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Replace session messages: [summary] + [last N]
|
|
237
|
+
original_count = len(session.messages)
|
|
238
|
+
session.messages = [summary_message] + list(recent_messages)
|
|
239
|
+
session.updated_at = datetime.now().timestamp()
|
|
240
|
+
|
|
241
|
+
# NOTE: Session save disabled - sessions are not loaded/used by frontend
|
|
242
|
+
# session_manager._save_sessions()
|
|
243
|
+
|
|
244
|
+
compressed_count = len(session.messages)
|
|
245
|
+
logger.info(
|
|
246
|
+
f"[Chat] Auto-compacted {conversation_id}: "
|
|
247
|
+
f"{original_count} -> {compressed_count} messages"
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
return summary
|
|
251
|
+
|
|
252
|
+
except Exception as e:
|
|
253
|
+
logger.error(f"[Chat] Auto-compact failed: {e}", exc_info=True)
|
|
254
|
+
return None
|
|
255
|
+
|
|
256
|
+
|
|
84
257
|
@router.post("/message", response_model=ChatResponse)
|
|
85
258
|
async def chat_message(request: ChatRequest) -> Dict[str, Any]:
|
|
86
259
|
"""
|
|
@@ -138,6 +311,9 @@ async def chat_message(request: ChatRequest) -> Dict[str, Any]:
|
|
|
138
311
|
# Store messages (use cleaned message for history)
|
|
139
312
|
_store_messages(conversation_id, cleaned_message, response)
|
|
140
313
|
|
|
314
|
+
# Auto-compact if token threshold exceeded
|
|
315
|
+
auto_compact_summary = await _auto_compact_if_needed(conversation_id, config)
|
|
316
|
+
|
|
141
317
|
# Get model info
|
|
142
318
|
provider = config.get("provider", "unknown")
|
|
143
319
|
model = config.get(provider, {}).get("model", "unknown")
|
|
@@ -148,6 +324,10 @@ async def chat_message(request: ChatRequest) -> Dict[str, Any]:
|
|
|
148
324
|
"model": f"{provider}/{model}",
|
|
149
325
|
}
|
|
150
326
|
|
|
327
|
+
# Include auto-compact info if triggered
|
|
328
|
+
if auto_compact_summary:
|
|
329
|
+
result["autoCompacted"] = True
|
|
330
|
+
|
|
151
331
|
# Include context errors if any
|
|
152
332
|
if context_errors:
|
|
153
333
|
result["contextErrors"] = context_errors
|
|
@@ -221,6 +401,22 @@ async def chat_stream(request: ChatRequest) -> StreamingResponse:
|
|
|
221
401
|
# Store messages after streaming complete (use cleaned message)
|
|
222
402
|
_store_messages(conversation_id, cleaned_message, full_response)
|
|
223
403
|
|
|
404
|
+
# Check if auto-compact is needed and send status updates
|
|
405
|
+
session_manager = get_session_manager()
|
|
406
|
+
session = session_manager.get_session(conversation_id)
|
|
407
|
+
if session and session.messages:
|
|
408
|
+
estimated_tokens = _estimate_session_tokens(session.messages)
|
|
409
|
+
if estimated_tokens > AUTO_COMPACT_TOKEN_THRESHOLD and len(session.messages) > AUTO_COMPACT_KEEP_MESSAGES:
|
|
410
|
+
# Send status: compacting in progress
|
|
411
|
+
yield f"data: {json.dumps({'status': '대화 컨텍스트 요약 중...', 'icon': 'thinking'})}\n\n"
|
|
412
|
+
|
|
413
|
+
# Auto-compact
|
|
414
|
+
auto_compact_summary = await _auto_compact_if_needed(conversation_id, config)
|
|
415
|
+
|
|
416
|
+
# Send status: compact complete
|
|
417
|
+
if auto_compact_summary:
|
|
418
|
+
yield f"data: {json.dumps({'status': '대화가 자동으로 압축되었습니다.', 'icon': 'check'})}\n\n"
|
|
419
|
+
|
|
224
420
|
# Send final chunk with conversation ID
|
|
225
421
|
yield f"data: {json.dumps({'content': '', 'done': True, 'conversationId': conversation_id})}\n\n"
|
|
226
422
|
|
|
@@ -237,3 +433,110 @@ async def chat_stream(request: ChatRequest) -> StreamingResponse:
|
|
|
237
433
|
"X-Accel-Buffering": "no", # Disable nginx buffering
|
|
238
434
|
},
|
|
239
435
|
)
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
439
|
+
# Compact Endpoint - LLM-based conversation summarization
|
|
440
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
441
|
+
@router.post("/compact", response_model=CompactResponse)
|
|
442
|
+
async def compact_conversation(request: CompactRequest) -> CompactResponse:
|
|
443
|
+
"""
|
|
444
|
+
Compact conversation history by summarizing older messages.
|
|
445
|
+
|
|
446
|
+
Strategy (Claude Code benchmark):
|
|
447
|
+
- Keep the last 3 messages intact
|
|
448
|
+
- Summarize all older messages using LLM
|
|
449
|
+
- Replace history with [summary] + [last 3 messages]
|
|
450
|
+
"""
|
|
451
|
+
logger.info(f"Compact request for conversation: {request.conversationId}")
|
|
452
|
+
|
|
453
|
+
session_manager = get_session_manager()
|
|
454
|
+
session = session_manager.get_session(request.conversationId)
|
|
455
|
+
|
|
456
|
+
if not session:
|
|
457
|
+
return CompactResponse(
|
|
458
|
+
success=False,
|
|
459
|
+
message="세션을 찾을 수 없습니다.",
|
|
460
|
+
originalMessages=0,
|
|
461
|
+
compressedMessages=0,
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
original_count = len(session.messages)
|
|
465
|
+
|
|
466
|
+
# Already minimal - no compaction needed
|
|
467
|
+
if original_count <= 3:
|
|
468
|
+
return CompactResponse(
|
|
469
|
+
success=True,
|
|
470
|
+
message="이미 최소 상태입니다. 압축이 필요하지 않습니다.",
|
|
471
|
+
originalMessages=original_count,
|
|
472
|
+
compressedMessages=original_count,
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
try:
|
|
476
|
+
# Split messages: older (to summarize) vs recent (to keep)
|
|
477
|
+
messages_to_summarize = session.messages[:-3]
|
|
478
|
+
recent_messages = session.messages[-3:]
|
|
479
|
+
|
|
480
|
+
# Build conversation text for summarization
|
|
481
|
+
conversation_text = "\n".join(
|
|
482
|
+
[
|
|
483
|
+
f"{'사용자' if m.role == 'user' else '어시스턴트'}: {m.content}"
|
|
484
|
+
for m in messages_to_summarize
|
|
485
|
+
]
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
# Create summarization LLM
|
|
489
|
+
llm_config = request.llmConfig or _get_config()
|
|
490
|
+
summarization_llm = create_summarization_llm(llm_config)
|
|
491
|
+
|
|
492
|
+
if not summarization_llm:
|
|
493
|
+
return CompactResponse(
|
|
494
|
+
success=False,
|
|
495
|
+
message="요약용 LLM을 생성할 수 없습니다. API 키를 확인해주세요.",
|
|
496
|
+
originalMessages=original_count,
|
|
497
|
+
compressedMessages=original_count,
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
# Generate summary using LLM
|
|
501
|
+
prompt = SUMMARIZATION_PROMPT.format(conversation=conversation_text)
|
|
502
|
+
response = await summarization_llm.ainvoke(prompt)
|
|
503
|
+
summary = response.content if hasattr(response, "content") else str(response)
|
|
504
|
+
|
|
505
|
+
# Create summary message
|
|
506
|
+
from datetime import datetime
|
|
507
|
+
|
|
508
|
+
summary_message = ChatMessage(
|
|
509
|
+
role="assistant",
|
|
510
|
+
content=f"[이전 대화 요약]\n\n{summary}",
|
|
511
|
+
timestamp=datetime.now().timestamp(),
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
# Replace session messages: [summary] + [last 3]
|
|
515
|
+
session.messages = [summary_message] + list(recent_messages)
|
|
516
|
+
session.updated_at = datetime.now().timestamp()
|
|
517
|
+
|
|
518
|
+
# Persist changes
|
|
519
|
+
session_manager._save_sessions()
|
|
520
|
+
|
|
521
|
+
compressed_count = len(session.messages)
|
|
522
|
+
logger.info(
|
|
523
|
+
f"Compacted conversation {request.conversationId}: "
|
|
524
|
+
f"{original_count} -> {compressed_count} messages"
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
return CompactResponse(
|
|
528
|
+
success=True,
|
|
529
|
+
message=f"대화가 압축되었습니다. ({original_count}개 → {compressed_count}개 메시지)",
|
|
530
|
+
originalMessages=original_count,
|
|
531
|
+
compressedMessages=compressed_count,
|
|
532
|
+
summary=summary,
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
except Exception as e:
|
|
536
|
+
logger.error(f"Compact failed: {e}", exc_info=True)
|
|
537
|
+
return CompactResponse(
|
|
538
|
+
success=False,
|
|
539
|
+
message=f"압축 중 오류가 발생했습니다: {str(e)}",
|
|
540
|
+
originalMessages=original_count,
|
|
541
|
+
compressedMessages=original_count,
|
|
542
|
+
)
|