hdsp-jupyter-extension 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. agent_server/__init__.py +8 -0
  2. agent_server/core/__init__.py +92 -0
  3. agent_server/core/api_key_manager.py +427 -0
  4. agent_server/core/code_validator.py +1238 -0
  5. agent_server/core/context_condenser.py +308 -0
  6. agent_server/core/embedding_service.py +254 -0
  7. agent_server/core/error_classifier.py +577 -0
  8. agent_server/core/llm_client.py +95 -0
  9. agent_server/core/llm_service.py +649 -0
  10. agent_server/core/notebook_generator.py +274 -0
  11. agent_server/core/prompt_builder.py +35 -0
  12. agent_server/core/rag_manager.py +742 -0
  13. agent_server/core/reflection_engine.py +489 -0
  14. agent_server/core/retriever.py +248 -0
  15. agent_server/core/state_verifier.py +452 -0
  16. agent_server/core/summary_generator.py +484 -0
  17. agent_server/core/task_manager.py +198 -0
  18. agent_server/knowledge/__init__.py +9 -0
  19. agent_server/knowledge/watchdog_service.py +352 -0
  20. agent_server/main.py +160 -0
  21. agent_server/prompts/__init__.py +60 -0
  22. agent_server/prompts/file_action_prompts.py +113 -0
  23. agent_server/routers/__init__.py +9 -0
  24. agent_server/routers/agent.py +591 -0
  25. agent_server/routers/chat.py +188 -0
  26. agent_server/routers/config.py +100 -0
  27. agent_server/routers/file_resolver.py +293 -0
  28. agent_server/routers/health.py +42 -0
  29. agent_server/routers/rag.py +163 -0
  30. agent_server/schemas/__init__.py +60 -0
  31. hdsp_agent_core/__init__.py +158 -0
  32. hdsp_agent_core/factory.py +252 -0
  33. hdsp_agent_core/interfaces.py +203 -0
  34. hdsp_agent_core/knowledge/__init__.py +31 -0
  35. hdsp_agent_core/knowledge/chunking.py +356 -0
  36. hdsp_agent_core/knowledge/libraries/dask.md +188 -0
  37. hdsp_agent_core/knowledge/libraries/matplotlib.md +164 -0
  38. hdsp_agent_core/knowledge/libraries/polars.md +68 -0
  39. hdsp_agent_core/knowledge/loader.py +337 -0
  40. hdsp_agent_core/llm/__init__.py +13 -0
  41. hdsp_agent_core/llm/service.py +556 -0
  42. hdsp_agent_core/managers/__init__.py +22 -0
  43. hdsp_agent_core/managers/config_manager.py +133 -0
  44. hdsp_agent_core/managers/session_manager.py +251 -0
  45. hdsp_agent_core/models/__init__.py +115 -0
  46. hdsp_agent_core/models/agent.py +316 -0
  47. hdsp_agent_core/models/chat.py +41 -0
  48. hdsp_agent_core/models/common.py +95 -0
  49. hdsp_agent_core/models/rag.py +368 -0
  50. hdsp_agent_core/prompts/__init__.py +63 -0
  51. hdsp_agent_core/prompts/auto_agent_prompts.py +1260 -0
  52. hdsp_agent_core/prompts/cell_action_prompts.py +98 -0
  53. hdsp_agent_core/services/__init__.py +18 -0
  54. hdsp_agent_core/services/agent_service.py +438 -0
  55. hdsp_agent_core/services/chat_service.py +205 -0
  56. hdsp_agent_core/services/rag_service.py +262 -0
  57. hdsp_agent_core/tests/__init__.py +1 -0
  58. hdsp_agent_core/tests/conftest.py +102 -0
  59. hdsp_agent_core/tests/test_factory.py +251 -0
  60. hdsp_agent_core/tests/test_services.py +326 -0
  61. hdsp_jupyter_extension-2.0.0.data/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +7 -0
  62. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/build_log.json +738 -0
  63. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/install.json +5 -0
  64. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/package.json +134 -0
  65. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2607ff74c74acfa83158.js +4369 -0
  66. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2607ff74c74acfa83158.js.map +1 -0
  67. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.622c1a5918b3aafb2315.js +12496 -0
  68. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.622c1a5918b3aafb2315.js.map +1 -0
  69. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +94 -0
  70. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +1 -0
  71. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +94 -0
  72. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +1 -0
  73. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.dae97cde171e13b8c834.js +623 -0
  74. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.dae97cde171e13b8c834.js.map +1 -0
  75. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/style.js +4 -0
  76. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +507 -0
  77. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +1 -0
  78. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js +2071 -0
  79. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js.map +1 -0
  80. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js +1059 -0
  81. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js.map +1 -0
  82. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +376 -0
  83. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +1 -0
  84. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +60336 -0
  85. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +1 -0
  86. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js +7132 -0
  87. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js.map +1 -0
  88. hdsp_jupyter_extension-2.0.0.dist-info/METADATA +152 -0
  89. hdsp_jupyter_extension-2.0.0.dist-info/RECORD +121 -0
  90. hdsp_jupyter_extension-2.0.0.dist-info/WHEEL +4 -0
  91. hdsp_jupyter_extension-2.0.0.dist-info/licenses/LICENSE +21 -0
  92. jupyter_ext/__init__.py +233 -0
  93. jupyter_ext/_version.py +4 -0
  94. jupyter_ext/config.py +111 -0
  95. jupyter_ext/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +7 -0
  96. jupyter_ext/handlers.py +632 -0
  97. jupyter_ext/labextension/build_log.json +738 -0
  98. jupyter_ext/labextension/package.json +134 -0
  99. jupyter_ext/labextension/static/frontend_styles_index_js.2607ff74c74acfa83158.js +4369 -0
  100. jupyter_ext/labextension/static/frontend_styles_index_js.2607ff74c74acfa83158.js.map +1 -0
  101. jupyter_ext/labextension/static/lib_index_js.622c1a5918b3aafb2315.js +12496 -0
  102. jupyter_ext/labextension/static/lib_index_js.622c1a5918b3aafb2315.js.map +1 -0
  103. jupyter_ext/labextension/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +94 -0
  104. jupyter_ext/labextension/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +1 -0
  105. jupyter_ext/labextension/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +94 -0
  106. jupyter_ext/labextension/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +1 -0
  107. jupyter_ext/labextension/static/remoteEntry.dae97cde171e13b8c834.js +623 -0
  108. jupyter_ext/labextension/static/remoteEntry.dae97cde171e13b8c834.js.map +1 -0
  109. jupyter_ext/labextension/static/style.js +4 -0
  110. jupyter_ext/labextension/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +507 -0
  111. jupyter_ext/labextension/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +1 -0
  112. jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js +2071 -0
  113. jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js.map +1 -0
  114. jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js +1059 -0
  115. jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js.map +1 -0
  116. jupyter_ext/labextension/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +376 -0
  117. jupyter_ext/labextension/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +1 -0
  118. jupyter_ext/labextension/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +60336 -0
  119. jupyter_ext/labextension/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +1 -0
  120. jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js +7132 -0
  121. jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js.map +1 -0
@@ -0,0 +1,649 @@
1
+ """
2
+ LLM Service - Handles interactions with different LLM providers
3
+ """
4
+
5
+ import asyncio
6
+ import json
7
+ import ssl
8
+ from contextlib import asynccontextmanager
9
+ from typing import Any, Dict, Optional
10
+
11
+ import aiohttp
12
+ import certifi
13
+
14
+
15
+ class LLMService:
16
+ """Service for interacting with various LLM providers"""
17
+
18
+ def __init__(self, config: Dict[str, Any], key_manager=None):
19
+ self.config = config
20
+ self.provider = config.get("provider", "gemini")
21
+ self._key_manager = key_manager # Optional injection for testing
22
+ # Create SSL context with certifi certificates
23
+ self._ssl_context = ssl.create_default_context(cafile=certifi.where())
24
+
25
+ def _get_key_manager(self):
26
+ """Get key manager if using Gemini provider"""
27
+ if self._key_manager:
28
+ return self._key_manager
29
+ if self.provider == "gemini":
30
+ try:
31
+ from hdsp_agent_core.managers.config_manager import ConfigManager
32
+
33
+ from agent_server.core.api_key_manager import get_key_manager
34
+
35
+ return get_key_manager(ConfigManager.get_instance())
36
+ except ImportError:
37
+ # Fallback for standalone usage
38
+ return None
39
+ return None
40
+
41
+ # ========== Config Helpers ==========
42
+
43
+ def _get_gemini_config(self) -> tuple[str, str, str]:
44
+ """Get Gemini config: (api_key, model, base_url). Raises if api_key missing.
45
+
46
+ NOTE: Server receives SINGLE API key from client per request.
47
+ Key rotation is managed by the frontend (financial security compliance).
48
+ """
49
+ cfg = self.config.get("gemini", {})
50
+ api_key = cfg.get("apiKey")
51
+ if not api_key:
52
+ raise ValueError("Gemini API key not configured")
53
+ model = cfg.get("model", "gemini-2.5-pro")
54
+ base_url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}"
55
+ return api_key, model, base_url
56
+
57
+ def _get_openai_config(self) -> tuple[str, str, Dict[str, str]]:
58
+ """Get OpenAI config: (model, url, headers). Raises if api_key missing."""
59
+ cfg = self.config.get("openai", {})
60
+ api_key = cfg.get("apiKey")
61
+ if not api_key:
62
+ raise ValueError("OpenAI API key not configured")
63
+ model = cfg.get("model", "gpt-4")
64
+ url = "https://api.openai.com/v1/chat/completions"
65
+ headers = {
66
+ "Authorization": f"Bearer {api_key}",
67
+ "Content-Type": "application/json",
68
+ }
69
+ return model, url, headers
70
+
71
+ def _get_vllm_config(self) -> tuple[str, str, Dict[str, str]]:
72
+ """Get vLLM config: (model, url, headers)."""
73
+ cfg = self.config.get("vllm", {})
74
+ endpoint = cfg.get("endpoint", "http://localhost:8000")
75
+ model = cfg.get("model", "default")
76
+ url = f"{endpoint}/v1/chat/completions"
77
+ headers = {"Content-Type": "application/json"}
78
+ if cfg.get("apiKey"):
79
+ headers["Authorization"] = f"Bearer {cfg['apiKey']}"
80
+ return model, url, headers
81
+
82
+ # ========== Message/Payload Builders ==========
83
+
84
+ def _build_prompt(self, prompt: str, context: Optional[str] = None) -> str:
85
+ """Build full prompt with optional context"""
86
+ if context:
87
+ return f"Context:\n{context}\n\nUser Request:\n{prompt}"
88
+ return prompt
89
+
90
+ def _build_openai_messages(
91
+ self, prompt: str, context: Optional[str] = None
92
+ ) -> list:
93
+ """Build OpenAI-style messages array"""
94
+ messages = []
95
+ if context:
96
+ messages.append({"role": "system", "content": f"Context:\n{context}"})
97
+ messages.append({"role": "user", "content": prompt})
98
+ return messages
99
+
100
+ async def _retry_with_backoff(
101
+ self,
102
+ operation,
103
+ max_retries: int = 3,
104
+ provider: str = "API",
105
+ retryable_statuses: tuple = (503, 429),
106
+ ):
107
+ """Execute operation with exponential backoff retry logic"""
108
+ for attempt in range(max_retries):
109
+ try:
110
+ return await operation()
111
+ except asyncio.TimeoutError:
112
+ if attempt < max_retries - 1:
113
+ wait_time = (2**attempt) * 3
114
+ print(
115
+ f"[LLMService] Request timeout. Retrying in {wait_time}s... (attempt {attempt + 1}/{max_retries})"
116
+ )
117
+ await asyncio.sleep(wait_time)
118
+ continue
119
+ raise Exception(f"Request timeout after {max_retries} retries")
120
+ except Exception as e:
121
+ error_msg = str(e)
122
+ # ★ Rate limit (429) 에러는 재시도 가능
123
+ if "rate limit" in error_msg.lower() or "(429)" in error_msg:
124
+ if attempt < max_retries - 1:
125
+ # 429 에러는 더 긴 대기 시간 사용 (40-80초)
126
+ wait_time = 40 + (attempt * 20)
127
+ print(
128
+ f"[LLMService] Rate limit hit. Waiting {wait_time}s before retry... (attempt {attempt + 1}/{max_retries})"
129
+ )
130
+ await asyncio.sleep(wait_time)
131
+ continue
132
+ raise Exception(
133
+ f"Rate limit exceeded after {max_retries} retries. Please wait a minute and try again."
134
+ )
135
+ # ★ 서버 과부하 (503) 에러도 재시도 가능
136
+ if "overloaded" in error_msg.lower() or "(503)" in error_msg:
137
+ if attempt < max_retries - 1:
138
+ wait_time = (2**attempt) * 5
139
+ print(
140
+ f"[LLMService] Server overloaded. Retrying in {wait_time}s... (attempt {attempt + 1}/{max_retries})"
141
+ )
142
+ await asyncio.sleep(wait_time)
143
+ continue
144
+ raise
145
+ # 다른 API 에러는 즉시 실패
146
+ if "API error" in error_msg and "rate limit" not in error_msg.lower():
147
+ raise
148
+ if "timeout" in error_msg.lower():
149
+ raise
150
+ # 네트워크 에러 재시도
151
+ if attempt < max_retries - 1:
152
+ wait_time = (2**attempt) * 2
153
+ print(
154
+ f"[LLMService] Network error: {e}. Retrying in {wait_time}s... (attempt {attempt + 1}/{max_retries})"
155
+ )
156
+ await asyncio.sleep(wait_time)
157
+ continue
158
+ raise
159
+
160
+ @asynccontextmanager
161
+ async def _request(
162
+ self,
163
+ url: str,
164
+ payload: Dict[str, Any],
165
+ headers: Optional[Dict[str, str]] = None,
166
+ timeout_seconds: int = 60,
167
+ provider: str = "API",
168
+ ):
169
+ """Context manager for HTTP POST requests with automatic session cleanup"""
170
+ timeout = aiohttp.ClientTimeout(total=timeout_seconds)
171
+ connector = aiohttp.TCPConnector(ssl=self._ssl_context)
172
+ async with aiohttp.ClientSession(
173
+ timeout=timeout, connector=connector
174
+ ) as session:
175
+ async with session.post(url, json=payload, headers=headers) as response:
176
+ if response.status != 200:
177
+ error_text = await response.text()
178
+ print(f"[LLMService] {provider} API Error: {error_text}")
179
+ raise Exception(f"{provider} API error: {error_text}")
180
+ yield response
181
+
182
+ async def _request_json(
183
+ self,
184
+ url: str,
185
+ payload: Dict[str, Any],
186
+ headers: Optional[Dict[str, str]] = None,
187
+ timeout_seconds: int = 60,
188
+ provider: str = "API",
189
+ ) -> Dict[str, Any]:
190
+ """Make request and return JSON response"""
191
+ async with self._request(
192
+ url, payload, headers, timeout_seconds, provider
193
+ ) as response:
194
+ return await response.json()
195
+
196
+ async def _stream_response(
197
+ self,
198
+ url: str,
199
+ payload: Dict[str, Any],
200
+ headers: Optional[Dict[str, str]],
201
+ provider: str,
202
+ line_parser,
203
+ ):
204
+ """Stream response and yield parsed content"""
205
+ async with self._request(
206
+ url, payload, headers, timeout_seconds=120, provider=provider
207
+ ) as response:
208
+ async for line in response.content:
209
+ line_text = line.decode("utf-8").strip()
210
+ content = line_parser(line_text)
211
+ if content:
212
+ yield content
213
+
214
+ # ========== Response Parsers ==========
215
+
216
+ def _parse_openai_response(self, data: Dict[str, Any]) -> str:
217
+ """Parse OpenAI-compatible response format (used by OpenAI and vLLM)"""
218
+ if "choices" in data and len(data["choices"]) > 0:
219
+ choice = data["choices"][0]
220
+ if "message" in choice and "content" in choice["message"]:
221
+ return choice["message"]["content"]
222
+ elif "text" in choice:
223
+ return choice["text"]
224
+ raise Exception("No valid response from API")
225
+
226
+ def _extract_gemini_text(self, data: Dict[str, Any]) -> Optional[str]:
227
+ """Extract text from Gemini response data (shared by response and stream parsing)"""
228
+ if "candidates" in data and len(data["candidates"]) > 0:
229
+ candidate = data["candidates"][0]
230
+ if "content" in candidate and "parts" in candidate["content"]:
231
+ parts = candidate["content"]["parts"]
232
+ if len(parts) > 0 and "text" in parts[0]:
233
+ return parts[0]["text"]
234
+ return None
235
+
236
+ def _parse_gemini_response(self, data: Dict[str, Any]) -> str:
237
+ """Parse Gemini API response format"""
238
+ text = self._extract_gemini_text(data)
239
+ if text is not None:
240
+ return text
241
+ raise Exception("No valid response from Gemini API")
242
+
243
+ def _parse_sse_line(self, line_text: str, extractor) -> Optional[str]:
244
+ """Parse SSE line with given extractor function"""
245
+ if not line_text.startswith("data: "):
246
+ return None
247
+ data_str = line_text[6:]
248
+ if data_str == "[DONE]":
249
+ return None
250
+ try:
251
+ data = json.loads(data_str)
252
+ return extractor(data)
253
+ except json.JSONDecodeError:
254
+ return None
255
+
256
+ def _extract_openai_delta(self, data: Dict[str, Any]) -> Optional[str]:
257
+ """Extract content delta from OpenAI stream data"""
258
+ if "choices" in data and len(data["choices"]) > 0:
259
+ delta = data["choices"][0].get("delta", {})
260
+ return delta.get("content", "") or None
261
+ return None
262
+
263
+ def _parse_openai_stream_line(self, line_text: str) -> Optional[str]:
264
+ """Parse a single SSE line from OpenAI-compatible stream"""
265
+ return self._parse_sse_line(line_text, self._extract_openai_delta)
266
+
267
+ def _parse_gemini_stream_line(self, line_text: str) -> Optional[str]:
268
+ """Parse a single SSE line from Gemini stream"""
269
+ return self._parse_sse_line(line_text, self._extract_gemini_text)
270
+
271
+ def _build_openai_payload(
272
+ self,
273
+ model: str,
274
+ messages: list,
275
+ max_tokens: int = 4096,
276
+ temperature: float = 0.0, # 0.0 = 결정적 출력 (일관성 최대화)
277
+ stream: bool = False,
278
+ ) -> Dict[str, Any]:
279
+ """Build OpenAI-compatible request payload"""
280
+ return {
281
+ "model": model,
282
+ "messages": messages,
283
+ "max_tokens": max_tokens,
284
+ "temperature": temperature,
285
+ "stream": stream,
286
+ }
287
+
288
+ def _build_gemini_payload(
289
+ self,
290
+ prompt: str,
291
+ max_output_tokens: int = 32768, # Gemini 2.5 supports up to 65535, increased for thinking overhead
292
+ temperature: Optional[float] = None,
293
+ ) -> Dict[str, Any]:
294
+ """Build Gemini API request payload
295
+
296
+ Args:
297
+ prompt: The prompt text
298
+ max_output_tokens: Maximum tokens in response (default 32768 for Gemini 2.5 with thinking)
299
+ temperature: 0.0 for deterministic, higher for creativity (default from config)
300
+ """
301
+ # temperature 기본값: config에서 가져오거나 0.0 (일관성 우선)
302
+ cfg = self.config.get("gemini", {})
303
+ temp = temperature if temperature is not None else cfg.get("temperature", 0.0)
304
+ model = cfg.get("model", "gemini-2.5-flash")
305
+
306
+ payload = {
307
+ "contents": [{"parts": [{"text": prompt}]}],
308
+ "generationConfig": {
309
+ "temperature": temp, # 0.0 = 결정적 출력 (일관성 최대화)
310
+ "topK": 1, # 가장 확률 높은 토큰만 선택 (일관성)
311
+ "topP": 0.95,
312
+ "maxOutputTokens": max_output_tokens,
313
+ },
314
+ "safetySettings": [
315
+ {
316
+ "category": "HARM_CATEGORY_HARASSMENT",
317
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE",
318
+ },
319
+ {
320
+ "category": "HARM_CATEGORY_HATE_SPEECH",
321
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE",
322
+ },
323
+ {
324
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
325
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE",
326
+ },
327
+ {
328
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
329
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE",
330
+ },
331
+ ],
332
+ }
333
+
334
+ # Gemini 2.5 models have built-in "thinking" that consumes output tokens
335
+ # Set thinkingConfig to allocate budget appropriately
336
+ if "2.5" in model or "2-5" in model:
337
+ payload["generationConfig"]["thinkingConfig"] = {
338
+ "thinkingBudget": 8192 # Reserve 8K tokens for thinking, rest for output
339
+ }
340
+
341
+ return payload
342
+
343
+ async def generate_response_stream(
344
+ self, prompt: str, context: Optional[str] = None
345
+ ):
346
+ """Generate a streaming response from the configured LLM provider (async generator)"""
347
+ if self.provider == "gemini":
348
+ async for chunk in self._call_gemini_stream(prompt, context):
349
+ yield chunk
350
+ elif self.provider == "vllm":
351
+ async for chunk in self._call_vllm_stream(prompt, context):
352
+ yield chunk
353
+ elif self.provider == "openai":
354
+ async for chunk in self._call_openai_stream(prompt, context):
355
+ yield chunk
356
+ else:
357
+ raise ValueError(f"Unsupported provider: {self.provider}")
358
+
359
+ async def generate_response(
360
+ self, prompt: str, context: Optional[str] = None
361
+ ) -> str:
362
+ """Generate a response from the configured LLM provider"""
363
+
364
+ if self.provider == "gemini":
365
+ return await self._call_gemini(prompt, context)
366
+ elif self.provider == "vllm":
367
+ return await self._call_vllm(prompt, context)
368
+ elif self.provider == "openai":
369
+ return await self._call_openai(prompt, context)
370
+ else:
371
+ raise ValueError(f"Unsupported provider: {self.provider}")
372
+
373
+ async def _call_gemini(
374
+ self, prompt: str, context: Optional[str] = None, max_retries: int = 3
375
+ ) -> str:
376
+ """Call Google Gemini API with single API key.
377
+
378
+ NOTE: Server does NOT manage key rotation (financial security compliance).
379
+ On 429 rate limit, error is returned to client for frontend key rotation.
380
+ """
381
+ api_key, model, base_url = self._get_gemini_config()
382
+ full_prompt = self._build_prompt(prompt, context)
383
+ payload = self._build_gemini_payload(full_prompt)
384
+
385
+ url = f"{base_url}:generateContent?key={api_key}"
386
+ print(f"[LLMService] Calling Gemini API with model: {model}")
387
+
388
+ for attempt in range(max_retries):
389
+ try:
390
+ timeout = aiohttp.ClientTimeout(total=60)
391
+ connector = aiohttp.TCPConnector(ssl=self._ssl_context)
392
+ async with aiohttp.ClientSession(
393
+ timeout=timeout, connector=connector
394
+ ) as session:
395
+ async with session.post(url, json=payload) as response:
396
+ # 429 Rate limit - return to client for key rotation
397
+ if response.status == 429:
398
+ error_text = await response.text()
399
+ print(
400
+ f"[LLMService] Rate limit (429): {error_text[:100]}..."
401
+ )
402
+ raise Exception(f"RATE_LIMIT_EXCEEDED: {error_text}")
403
+
404
+ # 503 Server overload - retry with backoff
405
+ if response.status == 503:
406
+ error_text = await response.text()
407
+ print(
408
+ f"[LLMService] Server overloaded (503): {error_text[:100]}..."
409
+ )
410
+ if attempt < max_retries - 1:
411
+ wait_time = (2**attempt) * 5
412
+ print(
413
+ f"[LLMService] Waiting {wait_time}s before retry..."
414
+ )
415
+ await asyncio.sleep(wait_time)
416
+ continue
417
+ raise Exception(f"Server overloaded: {error_text}")
418
+
419
+ if response.status != 200:
420
+ error_text = await response.text()
421
+ print(f"[LLMService] Gemini API Error: {error_text}")
422
+ raise Exception(f"Gemini API error: {error_text}")
423
+
424
+ # Success
425
+ data = await response.json()
426
+ print(
427
+ f"[LLMService] Gemini API Response Status: {response.status}"
428
+ )
429
+
430
+ # Debug: finishReason 확인
431
+ if "candidates" in data and len(data["candidates"]) > 0:
432
+ candidate = data["candidates"][0]
433
+ finish_reason = candidate.get("finishReason", "UNKNOWN")
434
+ print(f"[LLMService] Gemini finishReason: {finish_reason}")
435
+ if finish_reason not in ["STOP", "UNKNOWN"]:
436
+ print(
437
+ f"[LLMService] WARNING: Response may be incomplete! finishReason={finish_reason}"
438
+ )
439
+
440
+ response_text = self._parse_gemini_response(data)
441
+ print(
442
+ f"[LLMService] Successfully received response from {model} (length: {len(response_text)} chars)"
443
+ )
444
+
445
+ return response_text
446
+
447
+ except asyncio.TimeoutError:
448
+ if attempt < max_retries - 1:
449
+ wait_time = (2**attempt) * 3
450
+ print(f"[LLMService] Timeout. Retrying in {wait_time}s...")
451
+ await asyncio.sleep(wait_time)
452
+ continue
453
+ raise Exception("Request timeout after retries")
454
+
455
+ except Exception as e:
456
+ error_msg = str(e)
457
+ # Rate limit - propagate to client immediately
458
+ if "RATE_LIMIT_EXCEEDED" in error_msg:
459
+ raise
460
+ # API error - don't retry
461
+ if "API error" in error_msg:
462
+ raise
463
+ # Network error - retry with delay
464
+ if attempt < max_retries - 1:
465
+ wait_time = (2**attempt) * 2
466
+ print(
467
+ f"[LLMService] Network error: {e}. Retrying in {wait_time}s..."
468
+ )
469
+ await asyncio.sleep(wait_time)
470
+ continue
471
+ raise
472
+
473
+ raise Exception("Max retries exceeded")
474
+
475
+ async def _call_vllm(self, prompt: str, context: Optional[str] = None) -> str:
476
+ """Call vLLM endpoint with OpenAI Compatible API"""
477
+ model, url, headers = self._get_vllm_config()
478
+ full_prompt = self._build_prompt(prompt, context)
479
+ messages = [{"role": "user", "content": full_prompt}]
480
+ payload = self._build_openai_payload(model, messages, stream=False)
481
+
482
+ data = await self._request_json(url, payload, headers, provider="vLLM")
483
+ return self._parse_openai_response(data)
484
+
485
+ async def _call_openai(self, prompt: str, context: Optional[str] = None) -> str:
486
+ """Call OpenAI API"""
487
+ model, url, headers = self._get_openai_config()
488
+ messages = self._build_openai_messages(prompt, context)
489
+ payload = self._build_openai_payload(
490
+ model, messages, max_tokens=2000, stream=False
491
+ )
492
+
493
+ data = await self._request_json(url, payload, headers, provider="OpenAI")
494
+ return self._parse_openai_response(data)
495
+
496
+ async def _call_gemini_stream(
497
+ self, prompt: str, context: Optional[str] = None, max_retries: int = 3
498
+ ):
499
+ """Call Google Gemini API with streaming using single API key.
500
+
501
+ NOTE: Server does NOT manage key rotation (financial security compliance).
502
+ On 429 rate limit, error is returned to client for frontend key rotation.
503
+ """
504
+ api_key, model, base_url = self._get_gemini_config()
505
+ full_prompt = self._build_prompt(prompt, context)
506
+ payload = self._build_gemini_payload(full_prompt)
507
+
508
+ url = f"{base_url}:streamGenerateContent?key={api_key}&alt=sse"
509
+ print(f"[LLMService] Calling Gemini Stream API with model: {model}")
510
+
511
+ for attempt in range(max_retries):
512
+ try:
513
+ timeout = aiohttp.ClientTimeout(total=120)
514
+ connector = aiohttp.TCPConnector(ssl=self._ssl_context)
515
+ async with aiohttp.ClientSession(
516
+ timeout=timeout, connector=connector
517
+ ) as session:
518
+ async with session.post(url, json=payload) as response:
519
+ # 429 Rate limit - return to client for key rotation
520
+ if response.status == 429:
521
+ error_text = await response.text()
522
+ print(
523
+ f"[LLMService] Rate limit (429) stream: {error_text[:100]}..."
524
+ )
525
+ raise Exception(f"RATE_LIMIT_EXCEEDED: {error_text}")
526
+
527
+ # 503 Server overload - retry with backoff
528
+ if response.status == 503:
529
+ error_text = await response.text()
530
+ print(
531
+ f"[LLMService] Server overloaded (503) stream: {error_text[:100]}..."
532
+ )
533
+ if attempt < max_retries - 1:
534
+ wait_time = (2**attempt) * 5
535
+ print(
536
+ f"[LLMService] Waiting {wait_time}s before retry..."
537
+ )
538
+ await asyncio.sleep(wait_time)
539
+ continue
540
+ raise Exception(f"Server overloaded: {error_text}")
541
+
542
+ if response.status != 200:
543
+ error_text = await response.text()
544
+ print(f"[LLMService] Gemini Stream API Error: {error_text}")
545
+ raise Exception(f"Gemini API error: {error_text}")
546
+
547
+ # Success - stream the response
548
+ print("[LLMService] Successfully connected to Gemini stream")
549
+ async for line in response.content:
550
+ line_text = line.decode("utf-8").strip()
551
+ content = self._parse_gemini_stream_line(line_text)
552
+ if content:
553
+ yield content
554
+ return # Successfully completed streaming
555
+
556
+ except asyncio.TimeoutError:
557
+ if attempt < max_retries - 1:
558
+ wait_time = (2**attempt) * 3
559
+ print(f"[LLMService] Timeout. Retrying in {wait_time}s...")
560
+ await asyncio.sleep(wait_time)
561
+ continue
562
+ raise Exception("Request timeout after retries")
563
+
564
+ except Exception as e:
565
+ error_msg = str(e)
566
+ # Rate limit - propagate to client immediately
567
+ if "RATE_LIMIT_EXCEEDED" in error_msg:
568
+ raise
569
+ # API error - don't retry
570
+ if "API error" in error_msg:
571
+ raise
572
+ # Network error - retry with delay
573
+ if attempt < max_retries - 1:
574
+ wait_time = (2**attempt) * 2
575
+ print(
576
+ f"[LLMService] Network error: {e}. Retrying in {wait_time}s..."
577
+ )
578
+ await asyncio.sleep(wait_time)
579
+ continue
580
+ raise
581
+
582
+ raise Exception("Max retries exceeded for streaming")
583
+
584
+ async def _call_vllm_stream(self, prompt: str, context: Optional[str] = None):
585
+ """Call vLLM endpoint with streaming"""
586
+ model, url, headers = self._get_vllm_config()
587
+ full_prompt = self._build_prompt(prompt, context)
588
+ messages = [{"role": "user", "content": full_prompt}]
589
+ payload = self._build_openai_payload(model, messages, stream=True)
590
+
591
+ async for content in self._stream_response(
592
+ url, payload, headers, "vLLM", self._parse_openai_stream_line
593
+ ):
594
+ yield content
595
+
596
+ async def _call_openai_stream(self, prompt: str, context: Optional[str] = None):
597
+ """Call OpenAI API with streaming"""
598
+ model, url, headers = self._get_openai_config()
599
+ messages = self._build_openai_messages(prompt, context)
600
+ payload = self._build_openai_payload(
601
+ model, messages, max_tokens=2000, stream=True
602
+ )
603
+
604
+ async for content in self._stream_response(
605
+ url, payload, headers, "OpenAI", self._parse_openai_stream_line
606
+ ):
607
+ yield content
608
+
609
+
610
+ # ═══════════════════════════════════════════════════════════════════════════
611
+ # Module-level helper functions for Auto-Agent
612
+ # ═══════════════════════════════════════════════════════════════════════════
613
+
614
+
615
+ async def call_llm(
616
+ prompt: str, config: Dict[str, Any], context: Optional[str] = None
617
+ ) -> str:
618
+ """
619
+ Convenience function to call LLM with the given config.
620
+
621
+ Args:
622
+ prompt: The prompt to send to the LLM
623
+ config: LLM configuration dictionary
624
+ context: Optional context to include
625
+
626
+ Returns:
627
+ The LLM response string
628
+ """
629
+ service = LLMService(config)
630
+ return await service.generate_response(prompt, context)
631
+
632
+
633
+ async def call_llm_stream(
634
+ prompt: str, config: Dict[str, Any], context: Optional[str] = None
635
+ ):
636
+ """
637
+ Convenience function to stream LLM response with the given config.
638
+
639
+ Args:
640
+ prompt: The prompt to send to the LLM
641
+ config: LLM configuration dictionary
642
+ context: Optional context to include
643
+
644
+ Yields:
645
+ Response chunks as they arrive
646
+ """
647
+ service = LLMService(config)
648
+ async for chunk in service.generate_response_stream(prompt, context):
649
+ yield chunk