hdsp-jupyter-extension 2.0.26__py3-none-any.whl → 2.0.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. agent_server/context_providers/__init__.py +4 -2
  2. agent_server/context_providers/actions.py +73 -7
  3. agent_server/context_providers/file.py +23 -23
  4. agent_server/langchain/__init__.py +2 -2
  5. agent_server/langchain/agent.py +18 -251
  6. agent_server/langchain/agent_factory.py +26 -4
  7. agent_server/langchain/agent_prompts/planner_prompt.py +22 -35
  8. agent_server/langchain/custom_middleware.py +278 -43
  9. agent_server/langchain/llm_factory.py +102 -54
  10. agent_server/langchain/logging_utils.py +1 -1
  11. agent_server/langchain/middleware/__init__.py +5 -0
  12. agent_server/langchain/middleware/code_history_middleware.py +126 -37
  13. agent_server/langchain/middleware/content_injection_middleware.py +110 -0
  14. agent_server/langchain/middleware/subagent_events.py +88 -9
  15. agent_server/langchain/middleware/subagent_middleware.py +518 -240
  16. agent_server/langchain/prompts.py +5 -22
  17. agent_server/langchain/state_schema.py +44 -0
  18. agent_server/langchain/tools/jupyter_tools.py +4 -5
  19. agent_server/langchain/tools/tool_registry.py +6 -0
  20. agent_server/routers/chat.py +305 -2
  21. agent_server/routers/config.py +193 -8
  22. agent_server/routers/config_schema.py +254 -0
  23. agent_server/routers/context.py +31 -8
  24. agent_server/routers/langchain_agent.py +310 -153
  25. hdsp_agent_core/managers/config_manager.py +100 -1
  26. {hdsp_jupyter_extension-2.0.26.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
  27. {hdsp_jupyter_extension-2.0.26.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +2 -2
  28. hdsp_jupyter_extension-2.0.26.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.b5e4416b4e07ec087aad.js → hdsp_jupyter_extension-2.0.28.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.55727265b00191e68d9a.js +479 -15
  29. hdsp_jupyter_extension-2.0.28.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.55727265b00191e68d9a.js.map +1 -0
  30. jupyter_ext/labextension/static/lib_index_js.67505497667f9c0a763d.js → hdsp_jupyter_extension-2.0.28.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.df05d90f366bfd5fa023.js +1287 -190
  31. hdsp_jupyter_extension-2.0.28.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.df05d90f366bfd5fa023.js.map +1 -0
  32. hdsp_jupyter_extension-2.0.26.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.0fe2dcbbd176ee0efceb.js → hdsp_jupyter_extension-2.0.28.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.08fce819ee32e9d25175.js +3 -3
  33. jupyter_ext/labextension/static/remoteEntry.0fe2dcbbd176ee0efceb.js.map → hdsp_jupyter_extension-2.0.28.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.08fce819ee32e9d25175.js.map +1 -1
  34. {hdsp_jupyter_extension-2.0.26.dist-info → hdsp_jupyter_extension-2.0.28.dist-info}/METADATA +1 -1
  35. {hdsp_jupyter_extension-2.0.26.dist-info → hdsp_jupyter_extension-2.0.28.dist-info}/RECORD +66 -64
  36. jupyter_ext/_version.py +1 -1
  37. jupyter_ext/handlers.py +41 -0
  38. jupyter_ext/labextension/build_log.json +1 -1
  39. jupyter_ext/labextension/package.json +2 -2
  40. jupyter_ext/labextension/static/{frontend_styles_index_js.b5e4416b4e07ec087aad.js → frontend_styles_index_js.55727265b00191e68d9a.js} +479 -15
  41. jupyter_ext/labextension/static/frontend_styles_index_js.55727265b00191e68d9a.js.map +1 -0
  42. hdsp_jupyter_extension-2.0.26.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.67505497667f9c0a763d.js → jupyter_ext/labextension/static/lib_index_js.df05d90f366bfd5fa023.js +1287 -190
  43. jupyter_ext/labextension/static/lib_index_js.df05d90f366bfd5fa023.js.map +1 -0
  44. jupyter_ext/labextension/static/{remoteEntry.0fe2dcbbd176ee0efceb.js → remoteEntry.08fce819ee32e9d25175.js} +3 -3
  45. hdsp_jupyter_extension-2.0.26.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.0fe2dcbbd176ee0efceb.js.map → jupyter_ext/labextension/static/remoteEntry.08fce819ee32e9d25175.js.map +1 -1
  46. agent_server/langchain/middleware/description_injector.py +0 -150
  47. hdsp_jupyter_extension-2.0.26.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.b5e4416b4e07ec087aad.js.map +0 -1
  48. hdsp_jupyter_extension-2.0.26.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.67505497667f9c0a763d.js.map +0 -1
  49. jupyter_ext/labextension/static/frontend_styles_index_js.b5e4416b4e07ec087aad.js.map +0 -1
  50. jupyter_ext/labextension/static/lib_index_js.67505497667f9c0a763d.js.map +0 -1
  51. {hdsp_jupyter_extension-2.0.26.data → hdsp_jupyter_extension-2.0.28.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
  52. {hdsp_jupyter_extension-2.0.26.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
  53. {hdsp_jupyter_extension-2.0.26.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
  54. {hdsp_jupyter_extension-2.0.26.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
  55. {hdsp_jupyter_extension-2.0.26.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
  56. {hdsp_jupyter_extension-2.0.26.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
  57. {hdsp_jupyter_extension-2.0.26.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
  58. {hdsp_jupyter_extension-2.0.26.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
  59. {hdsp_jupyter_extension-2.0.26.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
  60. {hdsp_jupyter_extension-2.0.26.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +0 -0
  61. {hdsp_jupyter_extension-2.0.26.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +0 -0
  62. {hdsp_jupyter_extension-2.0.26.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +0 -0
  63. {hdsp_jupyter_extension-2.0.26.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +0 -0
  64. {hdsp_jupyter_extension-2.0.26.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
  65. {hdsp_jupyter_extension-2.0.26.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
  66. {hdsp_jupyter_extension-2.0.26.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
  67. {hdsp_jupyter_extension-2.0.26.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
  68. {hdsp_jupyter_extension-2.0.26.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +0 -0
  69. {hdsp_jupyter_extension-2.0.26.data → hdsp_jupyter_extension-2.0.28.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +0 -0
  70. {hdsp_jupyter_extension-2.0.26.dist-info → hdsp_jupyter_extension-2.0.28.dist-info}/WHEEL +0 -0
  71. {hdsp_jupyter_extension-2.0.26.dist-info → hdsp_jupyter_extension-2.0.28.dist-info}/licenses/LICENSE +0 -0
@@ -13,30 +13,19 @@ DEFAULT_SYSTEM_PROMPT = """You are an expert Python data scientist and Jupyter n
13
13
  - 설명 없이 tool_call만 하면 안 됨
14
14
 
15
15
  # 작업 흐름
16
- 1. 간단한 작업 (1-2단계): 바로 실행
17
- 2. 복잡한 작업 (3단계+): write_todos로 계획 → 순차 실행
16
+ 1. 간단한 작업 (1-2단계): 바로 실행 → 완료 후 write_todos 사용하지 말고 바로 종료
17
+ 2. 복잡한 작업 (3단계+): write_todos로 계획 → 순차 실행 → 완료 시 final_summary_tool 호출
18
18
 
19
19
  # write_todos 규칙 [필수]
20
20
  - 한국어로 작성
21
21
  - **🔴 기존 todo 절대 삭제 금지**: 전체 리스트를 항상 포함하고 status만 변경
22
22
  - **🔴 상태 전환 순서 필수**: pending → in_progress → completed (건너뛰기 금지!)
23
23
  - **🔴 초기 생성 규칙**: 첫 write_todos 호출 시 첫 번째 todo만 in_progress, 나머지는 모두 pending
24
- - 올바른 초기 예: [{"content": "작업1", "status": "in_progress"}, {"content": "작업2", "status": "pending"}, {"content": "작업 요약 및 다음 단계 제시", "status": "pending"}]
24
+ - 올바른 초기 예: [{"content": "작업1", "status": "in_progress"}, {"content": "작업2", "status": "pending"}]
25
25
  - 잘못된 초기 예: [{"content": "작업1", "status": "completed"}, ...] ← 실제 작업 없이 completed 금지!
26
26
  - **🔴 completed 전환 조건**: 실제 도구로 작업 수행 후에만 completed로 변경
27
27
  - in_progress는 **동시에 1개만** 유지
28
- - **[필수] 마지막 todo 반드시 "작업 요약 다음 단계 제시"로 생성**
29
- - **🔴 [실행 순서 필수]**: "작업 요약 및 다음 단계 제시"는 **반드시 가장 마지막에 실행**
30
- - 다른 모든 todo가 completed 상태가 된 후에만 이 todo를 in_progress로 변경
31
- - **[중요] "작업 요약 및 다음 단계 제시"는 summary JSON 출력 후에만 completed 표시**
32
-
33
- # 모든 작업 완료 후 [필수]
34
- 마지막 todo "작업 요약 및 다음 단계 제시"를 완료할 때:
35
- **반드시 final_summary_tool을 호출**하여 요약과 다음 단계를 제시하세요.
36
- - final_summary_tool(summary="완료된 작업 요약", next_items=[{"subject": "제목", "description": "설명"}, ...])
37
- - next_items 3개 이상 필수
38
- - **final_summary_tool 호출 없이 종료 금지**
39
- - **주의**: 텍스트로 JSON 출력하지 말고, 반드시 도구 호출로!
28
+ - **"작업 요약" todo 추가 금지**: 실제 작업만 todo로 생성 (요약은 시스템이 자동 처리)
40
29
 
41
30
  # 도구 사용
42
31
  - check_resource_tool: 대용량 파일/데이터프레임 작업 전 필수
@@ -57,8 +46,6 @@ DEFAULT_SYSTEM_PROMPT = """You are an expert Python data scientist and Jupyter n
57
46
  - 빈 응답 (도구 호출도 없고 내용도 없음)
58
47
  - 설명 없이 도구만 호출
59
48
  - pending/in_progress todo 남기고 종료
60
- - "작업 요약 및 다음 단계 제시" todo 없이 todo 리스트 생성
61
- - **🔴 다른 pending todo가 있는데 "작업 요약 및 다음 단계 제시"를 먼저 실행** (순서 위반)
62
49
  """
63
50
 
64
51
  JSON_TOOL_SCHEMA = """Respond with ONLY valid JSON:
@@ -92,11 +79,7 @@ TODO_LIST_TOOL_DESCRIPTION = """Todo 리스트 관리 도구.
92
79
  - **🔴 초기 생성**: 첫 호출 시 첫 번째만 in_progress, 나머지는 pending
93
80
  - **🔴 completed 조건**: 실제 도구로 작업 수행 후에만 completed로 변경
94
81
  - in_progress 상태는 **동시에 1개만** 허용
95
- - **[필수] 마지막 todo 반드시 "작업 요약 다음 단계 제시"로 생성**
96
- - **🔴 [실행 순서]**: todo는 반드시 리스트 순서대로 실행하고, "작업 요약 및 다음 단계 제시"는 맨 마지막에 실행
97
- - 이 "작업 요약 및 다음 단계 제시" todo 완료 시 **반드시 final_summary_tool 호출**:
98
- final_summary_tool(summary="완료 요약", next_items=[{"subject": "...", "description": "..."}])
99
- (next_items 3개 이상 필수, 텍스트 JSON 출력 금지!)
82
+ - **"작업 요약" todo 추가 금지**: 실제 작업만 todo로 생성 (요약은 시스템이 자동 처리)
100
83
  """
101
84
 
102
85
  # List of tools available to the agent
@@ -0,0 +1,44 @@
1
+ """
2
+ HDSP Agent State Schema
3
+
4
+ Extends LangChain AgentState with custom fields for state-based
5
+ content passing between Main Agent and Subagents.
6
+
7
+ Key fields:
8
+ - generated_content: Python code or SQL query from subagents
9
+ - generated_content_type: "python" | "sql" to determine injection target
10
+ - content_description: [DESCRIPTION] section from subagent response
11
+
12
+ This eliminates JSON serialization issues when passing code/SQL
13
+ between agents — content flows through LangGraph State, not LLM JSON.
14
+ """
15
+
16
+ from typing import Optional
17
+
18
+ from langchain.agents import AgentState
19
+
20
+
21
+ class HDSPAgentState(AgentState):
22
+ """Extended agent state for HDSP multi-agent architecture.
23
+
24
+ Adds fields for state-based content passing:
25
+ - Subagent writes generated_content via Command
26
+ - ContentInjectionMiddleware reads and injects into tool args
27
+ """
28
+
29
+ generated_content: Optional[str]
30
+ """Generated Python code or SQL query from subagent.
31
+ Set by task tool via Command(update={"generated_content": ...})."""
32
+
33
+ generated_content_type: Optional[str]
34
+ """Content type: "python" | "sql" | None.
35
+ Determines which tool receives the injection."""
36
+
37
+ content_description: Optional[str]
38
+ """Description extracted from [DESCRIPTION] section.
39
+ Injected alongside content into tool args."""
40
+
41
+ todo_active: bool
42
+ """True when todo list workflow is active.
43
+ Set to True by TodoActiveMiddleware when write_todos is called.
44
+ Set to False when final_summary_tool is called, user cancels, or new request starts."""
@@ -113,7 +113,7 @@ def markdown_tool(content: str) -> Dict[str, Any]:
113
113
  "content": content,
114
114
  },
115
115
  "status": "completed",
116
- "message": "Markdown cell added successfully. Continue with the next task.",
116
+ "message": "Markdown cell added successfully.",
117
117
  }
118
118
 
119
119
 
@@ -194,13 +194,12 @@ def final_summary_tool(
194
194
  next_items: List[Dict[str, str]],
195
195
  ) -> Dict[str, Any]:
196
196
  """
197
- 모든 작업이 완료된 최종 요약과 다음 단계를 제시하는 도구.
198
-
199
- 이 도구는 반드시 모든 todo가 완료된 후, 마지막 "작업 요약 및 다음 단계 제시" todo를 처리할 때만 호출하세요.
197
+ 모든 todo가 completed 상태가 호출하는 마무리 도구.
198
+ 별도의 "요약" todo를 만들지 말고, 실제 작업 todo가 모두 완료되면 바로 이 도구를 호출하세요.
200
199
 
201
200
  Args:
202
201
  summary: 완료된 작업에 대한 요약 (한국어로 작성)
203
- next_items: 다음 단계 제안 목록 (각각 subject와 description 포함, 3개 이상)
202
+ next_items: 후속 작업 제안 목록 (각각 subject와 description 포함, 3개 이상)
204
203
 
205
204
  Returns:
206
205
  Dict containing the summary and next items for frontend display
@@ -22,6 +22,7 @@ from agent_server.langchain.tools.file_tools import (
22
22
  )
23
23
  from agent_server.langchain.tools.jupyter_tools import (
24
24
  ask_user_tool,
25
+ final_summary_tool,
25
26
  jupyter_cell_tool,
26
27
  markdown_tool,
27
28
  )
@@ -46,6 +47,7 @@ ALL_TOOLS = {
46
47
  "jupyter_cell_tool": jupyter_cell_tool,
47
48
  "markdown_tool": markdown_tool,
48
49
  "ask_user_tool": ask_user_tool,
50
+ "final_summary_tool": final_summary_tool,
49
51
  # File tools
50
52
  "read_file_tool": read_file_tool,
51
53
  "write_file_tool": write_file_tool,
@@ -95,6 +97,8 @@ AGENT_TOOLS_CONFIG = {
95
97
  "multiedit_file_tool",
96
98
  # Shell for additional operations
97
99
  "execute_command_tool",
100
+ # Final summary (auto-triggered by middleware when all todos done)
101
+ "final_summary_tool",
98
102
  ],
99
103
  # write_todos and task tools are added by middleware
100
104
  "description": "Main Agent executes code and manages files directly",
@@ -118,6 +122,8 @@ AGENT_TOOLS_CONFIG = {
118
122
  "multiedit_file_tool",
119
123
  # Shell
120
124
  "execute_command_tool",
125
+ # Final summary (auto-triggered by middleware when all todos done)
126
+ "final_summary_tool",
121
127
  ],
122
128
  "description": "Alias for main_agent (backward compatibility)",
123
129
  },
@@ -8,16 +8,97 @@ Supports @file context injection for including file contents in prompts.
8
8
  import json
9
9
  import logging
10
10
  import os
11
- from typing import Any, AsyncGenerator, Dict
11
+ from typing import Any, AsyncGenerator, Dict, Optional
12
12
 
13
13
  from fastapi import APIRouter, HTTPException
14
14
  from fastapi.responses import StreamingResponse
15
15
  from hdsp_agent_core.managers.config_manager import ConfigManager
16
- from hdsp_agent_core.managers.session_manager import get_session_manager
16
+ from hdsp_agent_core.managers.session_manager import ChatMessage, get_session_manager
17
17
  from hdsp_agent_core.models.chat import ChatRequest, ChatResponse
18
+ from pydantic import BaseModel
18
19
 
19
20
  from agent_server.context_providers import ContextProcessor
20
21
  from agent_server.core.llm_service import LLMService
22
+ from agent_server.langchain.llm_factory import create_summarization_llm
23
+
24
+
25
+ # ═══════════════════════════════════════════════════════════════════════════
26
+ # Auto-Compact Configuration (Aligned with Agent mode SummarizationMiddleware)
27
+ # ═══════════════════════════════════════════════════════════════════════════
28
+ # These values should match agent_factory.py SummarizationMiddleware settings
29
+ AUTO_COMPACT_TOKEN_THRESHOLD = 30000 # Trigger when tokens exceed this threshold
30
+ AUTO_COMPACT_KEEP_MESSAGES = 15 # Keep last N messages after compaction
31
+
32
+ # Approximate tokens per character (for Korean/English mixed text)
33
+ CHARS_PER_TOKEN = 3.5 # Conservative estimate
34
+
35
+
36
+ def _estimate_tokens(text: str) -> int:
37
+ """Estimate token count from text length."""
38
+ return int(len(text) / CHARS_PER_TOKEN)
39
+
40
+
41
+ def _estimate_session_tokens(messages: list) -> int:
42
+ """Estimate total tokens in session messages."""
43
+ total_chars = sum(len(m.content) for m in messages)
44
+ return int(total_chars / CHARS_PER_TOKEN)
45
+
46
+
47
+ # ═══════════════════════════════════════════════════════════════════════════
48
+ # Summarization Prompt (Claude Code Benchmark)
49
+ # ═══════════════════════════════════════════════════════════════════════════
50
+ SUMMARIZATION_PROMPT = """다음 대화 내용을 요약하여 향후 컨텍스트 윈도우에서 작업을 효율적으로 재개할 수 있도록 해주세요.
51
+ 이 요약은 대화 히스토리를 대체하므로, 구조화되고 간결하며 실행 가능해야 합니다.
52
+
53
+ 요약에 반드시 포함할 내용:
54
+
55
+ ## 완료된 작업
56
+ - 완료된 태스크와 주요 결과물
57
+ - 생성/수정된 파일 목록
58
+
59
+ ## 현재 상태
60
+ - 진행 중인 작업
61
+ - 마지막으로 논의된 주제
62
+
63
+ ## 다음 단계
64
+ - 명확한 후속 액션 항목
65
+ - 보류 중인 결정 사항
66
+
67
+ ## 핵심 맥락
68
+ - 사용자 선호사항 및 제약조건
69
+ - 중요한 기술적 결정사항
70
+ - 작업 재개에 필수적인 정보
71
+
72
+ 작성 지침:
73
+ - 간결하되, 작업이 끊김 없이 계속될 수 있을 정도의 세부사항은 보존
74
+ - 불필요한 인사말, 확인 메시지, 중복 내용은 제외
75
+ - 코드 스니펫은 핵심적인 경우에만 포함
76
+ - 한국어로 작성
77
+
78
+ 대화 내용:
79
+ {conversation}
80
+
81
+ 요약:"""
82
+
83
+
84
+ # ═══════════════════════════════════════════════════════════════════════════
85
+ # Compact Request/Response Models
86
+ # ═══════════════════════════════════════════════════════════════════════════
87
+ class CompactRequest(BaseModel):
88
+ """Request for conversation compaction."""
89
+
90
+ conversationId: str
91
+ llmConfig: Optional[dict] = None
92
+
93
+
94
+ class CompactResponse(BaseModel):
95
+ """Response from conversation compaction."""
96
+
97
+ success: bool
98
+ message: str
99
+ originalMessages: int
100
+ compressedMessages: int
101
+ summary: Optional[str] = None
21
102
 
22
103
  router = APIRouter()
23
104
  logger = logging.getLogger(__name__)
@@ -81,6 +162,98 @@ def _store_messages(
81
162
  session_manager.store_messages(conversation_id, user_message, assistant_response)
82
163
 
83
164
 
165
+ async def _auto_compact_if_needed(
166
+ conversation_id: str, llm_config: Dict[str, Any]
167
+ ) -> Optional[str]:
168
+ """
169
+ Automatically compact conversation if token threshold exceeded.
170
+
171
+ Aligned with Agent mode SummarizationMiddleware:
172
+ - Trigger: Token count exceeds AUTO_COMPACT_TOKEN_THRESHOLD
173
+ - Keep: Last AUTO_COMPACT_KEEP_MESSAGES messages
174
+ - Result: [summary] + [recent messages]
175
+
176
+ Returns:
177
+ Summary string if compacted, None otherwise
178
+ """
179
+ from datetime import datetime
180
+
181
+ session_manager = get_session_manager()
182
+ session = session_manager.get_session(conversation_id)
183
+
184
+ if not session or not session.messages:
185
+ return None
186
+
187
+ # Estimate tokens in session
188
+ estimated_tokens = _estimate_session_tokens(session.messages)
189
+
190
+ # Check if auto-compact needed (token-based, same as Agent mode)
191
+ if estimated_tokens <= AUTO_COMPACT_TOKEN_THRESHOLD:
192
+ return None
193
+
194
+ # Need at least more messages than we keep
195
+ if len(session.messages) <= AUTO_COMPACT_KEEP_MESSAGES:
196
+ return None
197
+
198
+ logger.info(
199
+ f"[Chat] Auto-compact triggered for {conversation_id}: "
200
+ f"~{estimated_tokens} tokens > {AUTO_COMPACT_TOKEN_THRESHOLD} threshold, "
201
+ f"{len(session.messages)} messages"
202
+ )
203
+
204
+ try:
205
+ # Split messages: older (to summarize) vs recent (to keep)
206
+ messages_to_summarize = session.messages[:-AUTO_COMPACT_KEEP_MESSAGES]
207
+ recent_messages = session.messages[-AUTO_COMPACT_KEEP_MESSAGES:]
208
+
209
+ # Build conversation text for summarization
210
+ conversation_text = "\n".join(
211
+ [
212
+ f"{'사용자' if m.role == 'user' else '어시스턴트'}: {m.content}"
213
+ for m in messages_to_summarize
214
+ ]
215
+ )
216
+
217
+ # Create summarization LLM (uses settings from llm_config)
218
+ summarization_llm = create_summarization_llm(llm_config)
219
+
220
+ if not summarization_llm:
221
+ logger.warning("[Chat] Auto-compact skipped: no summarization LLM available")
222
+ return None
223
+
224
+ # Generate summary using LLM
225
+ prompt = SUMMARIZATION_PROMPT.format(conversation=conversation_text)
226
+ response = await summarization_llm.ainvoke(prompt)
227
+ summary = response.content if hasattr(response, "content") else str(response)
228
+
229
+ # Create summary message
230
+ summary_message = ChatMessage(
231
+ role="assistant",
232
+ content=f"[이전 대화 요약]\n\n{summary}",
233
+ timestamp=datetime.now().timestamp(),
234
+ )
235
+
236
+ # Replace session messages: [summary] + [last N]
237
+ original_count = len(session.messages)
238
+ session.messages = [summary_message] + list(recent_messages)
239
+ session.updated_at = datetime.now().timestamp()
240
+
241
+ # NOTE: Session save disabled - sessions are not loaded/used by frontend
242
+ # session_manager._save_sessions()
243
+
244
+ compressed_count = len(session.messages)
245
+ logger.info(
246
+ f"[Chat] Auto-compacted {conversation_id}: "
247
+ f"{original_count} -> {compressed_count} messages"
248
+ )
249
+
250
+ return summary
251
+
252
+ except Exception as e:
253
+ logger.error(f"[Chat] Auto-compact failed: {e}", exc_info=True)
254
+ return None
255
+
256
+
84
257
  @router.post("/message", response_model=ChatResponse)
85
258
  async def chat_message(request: ChatRequest) -> Dict[str, Any]:
86
259
  """
@@ -138,6 +311,9 @@ async def chat_message(request: ChatRequest) -> Dict[str, Any]:
138
311
  # Store messages (use cleaned message for history)
139
312
  _store_messages(conversation_id, cleaned_message, response)
140
313
 
314
+ # Auto-compact if token threshold exceeded
315
+ auto_compact_summary = await _auto_compact_if_needed(conversation_id, config)
316
+
141
317
  # Get model info
142
318
  provider = config.get("provider", "unknown")
143
319
  model = config.get(provider, {}).get("model", "unknown")
@@ -148,6 +324,10 @@ async def chat_message(request: ChatRequest) -> Dict[str, Any]:
148
324
  "model": f"{provider}/{model}",
149
325
  }
150
326
 
327
+ # Include auto-compact info if triggered
328
+ if auto_compact_summary:
329
+ result["autoCompacted"] = True
330
+
151
331
  # Include context errors if any
152
332
  if context_errors:
153
333
  result["contextErrors"] = context_errors
@@ -221,6 +401,22 @@ async def chat_stream(request: ChatRequest) -> StreamingResponse:
221
401
  # Store messages after streaming complete (use cleaned message)
222
402
  _store_messages(conversation_id, cleaned_message, full_response)
223
403
 
404
+ # Check if auto-compact is needed and send status updates
405
+ session_manager = get_session_manager()
406
+ session = session_manager.get_session(conversation_id)
407
+ if session and session.messages:
408
+ estimated_tokens = _estimate_session_tokens(session.messages)
409
+ if estimated_tokens > AUTO_COMPACT_TOKEN_THRESHOLD and len(session.messages) > AUTO_COMPACT_KEEP_MESSAGES:
410
+ # Send status: compacting in progress
411
+ yield f"data: {json.dumps({'status': '대화 컨텍스트 요약 중...', 'icon': 'thinking'})}\n\n"
412
+
413
+ # Auto-compact
414
+ auto_compact_summary = await _auto_compact_if_needed(conversation_id, config)
415
+
416
+ # Send status: compact complete
417
+ if auto_compact_summary:
418
+ yield f"data: {json.dumps({'status': '대화가 자동으로 압축되었습니다.', 'icon': 'check'})}\n\n"
419
+
224
420
  # Send final chunk with conversation ID
225
421
  yield f"data: {json.dumps({'content': '', 'done': True, 'conversationId': conversation_id})}\n\n"
226
422
 
@@ -237,3 +433,110 @@ async def chat_stream(request: ChatRequest) -> StreamingResponse:
237
433
  "X-Accel-Buffering": "no", # Disable nginx buffering
238
434
  },
239
435
  )
436
+
437
+
438
+ # ═══════════════════════════════════════════════════════════════════════════
439
+ # Compact Endpoint - LLM-based conversation summarization
440
+ # ═══════════════════════════════════════════════════════════════════════════
441
+ @router.post("/compact", response_model=CompactResponse)
442
+ async def compact_conversation(request: CompactRequest) -> CompactResponse:
443
+ """
444
+ Compact conversation history by summarizing older messages.
445
+
446
+ Strategy (Claude Code benchmark):
447
+ - Keep the last 3 messages intact
448
+ - Summarize all older messages using LLM
449
+ - Replace history with [summary] + [last 3 messages]
450
+ """
451
+ logger.info(f"Compact request for conversation: {request.conversationId}")
452
+
453
+ session_manager = get_session_manager()
454
+ session = session_manager.get_session(request.conversationId)
455
+
456
+ if not session:
457
+ return CompactResponse(
458
+ success=False,
459
+ message="세션을 찾을 수 없습니다.",
460
+ originalMessages=0,
461
+ compressedMessages=0,
462
+ )
463
+
464
+ original_count = len(session.messages)
465
+
466
+ # Already minimal - no compaction needed
467
+ if original_count <= 3:
468
+ return CompactResponse(
469
+ success=True,
470
+ message="이미 최소 상태입니다. 압축이 필요하지 않습니다.",
471
+ originalMessages=original_count,
472
+ compressedMessages=original_count,
473
+ )
474
+
475
+ try:
476
+ # Split messages: older (to summarize) vs recent (to keep)
477
+ messages_to_summarize = session.messages[:-3]
478
+ recent_messages = session.messages[-3:]
479
+
480
+ # Build conversation text for summarization
481
+ conversation_text = "\n".join(
482
+ [
483
+ f"{'사용자' if m.role == 'user' else '어시스턴트'}: {m.content}"
484
+ for m in messages_to_summarize
485
+ ]
486
+ )
487
+
488
+ # Create summarization LLM
489
+ llm_config = request.llmConfig or _get_config()
490
+ summarization_llm = create_summarization_llm(llm_config)
491
+
492
+ if not summarization_llm:
493
+ return CompactResponse(
494
+ success=False,
495
+ message="요약용 LLM을 생성할 수 없습니다. API 키를 확인해주세요.",
496
+ originalMessages=original_count,
497
+ compressedMessages=original_count,
498
+ )
499
+
500
+ # Generate summary using LLM
501
+ prompt = SUMMARIZATION_PROMPT.format(conversation=conversation_text)
502
+ response = await summarization_llm.ainvoke(prompt)
503
+ summary = response.content if hasattr(response, "content") else str(response)
504
+
505
+ # Create summary message
506
+ from datetime import datetime
507
+
508
+ summary_message = ChatMessage(
509
+ role="assistant",
510
+ content=f"[이전 대화 요약]\n\n{summary}",
511
+ timestamp=datetime.now().timestamp(),
512
+ )
513
+
514
+ # Replace session messages: [summary] + [last 3]
515
+ session.messages = [summary_message] + list(recent_messages)
516
+ session.updated_at = datetime.now().timestamp()
517
+
518
+ # Persist changes
519
+ session_manager._save_sessions()
520
+
521
+ compressed_count = len(session.messages)
522
+ logger.info(
523
+ f"Compacted conversation {request.conversationId}: "
524
+ f"{original_count} -> {compressed_count} messages"
525
+ )
526
+
527
+ return CompactResponse(
528
+ success=True,
529
+ message=f"대화가 압축되었습니다. ({original_count}개 → {compressed_count}개 메시지)",
530
+ originalMessages=original_count,
531
+ compressedMessages=compressed_count,
532
+ summary=summary,
533
+ )
534
+
535
+ except Exception as e:
536
+ logger.error(f"Compact failed: {e}", exc_info=True)
537
+ return CompactResponse(
538
+ success=False,
539
+ message=f"압축 중 오류가 발생했습니다: {str(e)}",
540
+ originalMessages=original_count,
541
+ compressedMessages=original_count,
542
+ )