hdsp-jupyter-extension 2.0.8__py3-none-any.whl → 2.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_server/core/notebook_generator.py +4 -4
- agent_server/core/rag_manager.py +12 -3
- agent_server/core/retriever.py +2 -1
- agent_server/core/vllm_embedding_service.py +8 -5
- agent_server/langchain/ARCHITECTURE.md +7 -51
- agent_server/langchain/agent.py +31 -20
- agent_server/langchain/custom_middleware.py +234 -31
- agent_server/langchain/hitl_config.py +5 -8
- agent_server/langchain/logging_utils.py +7 -7
- agent_server/langchain/prompts.py +106 -120
- agent_server/langchain/tools/__init__.py +1 -10
- agent_server/langchain/tools/file_tools.py +9 -61
- agent_server/langchain/tools/jupyter_tools.py +0 -1
- agent_server/langchain/tools/lsp_tools.py +8 -8
- agent_server/langchain/tools/resource_tools.py +12 -12
- agent_server/langchain/tools/search_tools.py +3 -158
- agent_server/prompts/file_action_prompts.py +8 -8
- agent_server/routers/langchain_agent.py +200 -125
- hdsp_agent_core/__init__.py +46 -47
- hdsp_agent_core/factory.py +6 -10
- hdsp_agent_core/interfaces.py +4 -2
- hdsp_agent_core/knowledge/__init__.py +5 -5
- hdsp_agent_core/knowledge/chunking.py +87 -61
- hdsp_agent_core/knowledge/loader.py +103 -101
- hdsp_agent_core/llm/service.py +192 -107
- hdsp_agent_core/managers/config_manager.py +16 -22
- hdsp_agent_core/managers/session_manager.py +5 -4
- hdsp_agent_core/models/__init__.py +12 -12
- hdsp_agent_core/models/agent.py +15 -8
- hdsp_agent_core/models/common.py +1 -2
- hdsp_agent_core/models/rag.py +48 -111
- hdsp_agent_core/prompts/__init__.py +12 -12
- hdsp_agent_core/prompts/cell_action_prompts.py +9 -7
- hdsp_agent_core/services/agent_service.py +10 -8
- hdsp_agent_core/services/chat_service.py +10 -6
- hdsp_agent_core/services/rag_service.py +3 -6
- hdsp_agent_core/tests/conftest.py +4 -1
- hdsp_agent_core/tests/test_factory.py +2 -2
- hdsp_agent_core/tests/test_services.py +12 -19
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +2 -2
- hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.8740a527757068814573.js → hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js +93 -4
- hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js.map +1 -0
- hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.e4ff4b5779b5e049f84c.js → hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.58c1e128ba0b76f41f04.js +153 -130
- hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.58c1e128ba0b76f41f04.js.map +1 -0
- hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.020cdb0b864cfaa4e41e.js → hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.9da31d1134a53b0c4af5.js +6 -6
- hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.9da31d1134a53b0c4af5.js.map +1 -0
- {hdsp_jupyter_extension-2.0.8.dist-info → hdsp_jupyter_extension-2.0.11.dist-info}/METADATA +1 -3
- hdsp_jupyter_extension-2.0.11.dist-info/RECORD +144 -0
- jupyter_ext/__init__.py +21 -11
- jupyter_ext/_version.py +1 -1
- jupyter_ext/handlers.py +69 -50
- jupyter_ext/labextension/build_log.json +1 -1
- jupyter_ext/labextension/package.json +2 -2
- jupyter_ext/labextension/static/{frontend_styles_index_js.8740a527757068814573.js → frontend_styles_index_js.2d9fb488c82498c45c2d.js} +93 -4
- jupyter_ext/labextension/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js.map +1 -0
- jupyter_ext/labextension/static/{lib_index_js.e4ff4b5779b5e049f84c.js → lib_index_js.58c1e128ba0b76f41f04.js} +153 -130
- jupyter_ext/labextension/static/lib_index_js.58c1e128ba0b76f41f04.js.map +1 -0
- jupyter_ext/labextension/static/{remoteEntry.020cdb0b864cfaa4e41e.js → remoteEntry.9da31d1134a53b0c4af5.js} +6 -6
- jupyter_ext/labextension/static/remoteEntry.9da31d1134a53b0c4af5.js.map +1 -0
- hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.8740a527757068814573.js.map +0 -1
- hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.e4ff4b5779b5e049f84c.js.map +0 -1
- hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.020cdb0b864cfaa4e41e.js.map +0 -1
- hdsp_jupyter_extension-2.0.8.dist-info/RECORD +0 -144
- jupyter_ext/labextension/static/frontend_styles_index_js.8740a527757068814573.js.map +0 -1
- jupyter_ext/labextension/static/lib_index_js.e4ff4b5779b5e049f84c.js.map +0 -1
- jupyter_ext/labextension/static/remoteEntry.020cdb0b864cfaa4e41e.js.map +0 -1
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +0 -0
- {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +0 -0
- {hdsp_jupyter_extension-2.0.8.dist-info → hdsp_jupyter_extension-2.0.11.dist-info}/WHEEL +0 -0
- {hdsp_jupyter_extension-2.0.8.dist-info → hdsp_jupyter_extension-2.0.11.dist-info}/licenses/LICENSE +0 -0
hdsp_agent_core/llm/service.py
CHANGED
|
@@ -4,12 +4,12 @@ LLM Service - Handles interactions with different LLM providers
|
|
|
4
4
|
Supports Gemini, OpenAI, and vLLM providers with unified interface.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
import
|
|
7
|
+
import asyncio
|
|
8
8
|
import json
|
|
9
9
|
import ssl
|
|
10
|
-
import asyncio
|
|
11
|
-
from typing import Dict, Any, Optional, Tuple
|
|
12
10
|
from contextlib import asynccontextmanager
|
|
11
|
+
from typing import Any, Dict, Optional
|
|
12
|
+
|
|
13
13
|
import aiohttp
|
|
14
14
|
import certifi
|
|
15
15
|
|
|
@@ -19,7 +19,7 @@ class LLMService:
|
|
|
19
19
|
|
|
20
20
|
def __init__(self, config: Dict[str, Any], key_manager=None):
|
|
21
21
|
self.config = config
|
|
22
|
-
self.provider = config.get(
|
|
22
|
+
self.provider = config.get("provider", "gemini")
|
|
23
23
|
self._key_manager = key_manager # Optional injection for testing
|
|
24
24
|
# Create SSL context with certifi certificates
|
|
25
25
|
self._ssl_context = ssl.create_default_context(cafile=certifi.where())
|
|
@@ -28,10 +28,11 @@ class LLMService:
|
|
|
28
28
|
"""Get key manager if using Gemini provider"""
|
|
29
29
|
if self._key_manager:
|
|
30
30
|
return self._key_manager
|
|
31
|
-
if self.provider ==
|
|
31
|
+
if self.provider == "gemini":
|
|
32
32
|
try:
|
|
33
33
|
from hdsp_agent_core.managers.api_key_manager import get_key_manager
|
|
34
34
|
from hdsp_agent_core.managers.config_manager import ConfigManager
|
|
35
|
+
|
|
35
36
|
return get_key_manager(ConfigManager.get_instance())
|
|
36
37
|
except ImportError:
|
|
37
38
|
# Fallback for standalone usage
|
|
@@ -46,33 +47,36 @@ class LLMService:
|
|
|
46
47
|
NOTE: Server receives SINGLE API key from client per request.
|
|
47
48
|
Key rotation is managed by the frontend (financial security compliance).
|
|
48
49
|
"""
|
|
49
|
-
cfg = self.config.get(
|
|
50
|
-
api_key = cfg.get(
|
|
50
|
+
cfg = self.config.get("gemini", {})
|
|
51
|
+
api_key = cfg.get("apiKey")
|
|
51
52
|
if not api_key:
|
|
52
53
|
raise ValueError("Gemini API key not configured")
|
|
53
|
-
model = cfg.get(
|
|
54
|
+
model = cfg.get("model", "gemini-2.5-pro")
|
|
54
55
|
base_url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}"
|
|
55
56
|
return api_key, model, base_url
|
|
56
57
|
|
|
57
58
|
def _get_openai_config(self) -> tuple[str, str, Dict[str, str]]:
|
|
58
59
|
"""Get OpenAI config: (model, url, headers). Raises if api_key missing."""
|
|
59
|
-
cfg = self.config.get(
|
|
60
|
-
api_key = cfg.get(
|
|
60
|
+
cfg = self.config.get("openai", {})
|
|
61
|
+
api_key = cfg.get("apiKey")
|
|
61
62
|
if not api_key:
|
|
62
63
|
raise ValueError("OpenAI API key not configured")
|
|
63
|
-
model = cfg.get(
|
|
64
|
+
model = cfg.get("model", "gpt-4")
|
|
64
65
|
url = "https://api.openai.com/v1/chat/completions"
|
|
65
|
-
headers = {
|
|
66
|
+
headers = {
|
|
67
|
+
"Authorization": f"Bearer {api_key}",
|
|
68
|
+
"Content-Type": "application/json",
|
|
69
|
+
}
|
|
66
70
|
return model, url, headers
|
|
67
71
|
|
|
68
72
|
def _get_vllm_config(self) -> tuple[str, str, Dict[str, str]]:
|
|
69
73
|
"""Get vLLM config: (model, url, headers)."""
|
|
70
|
-
cfg = self.config.get(
|
|
71
|
-
endpoint = cfg.get(
|
|
72
|
-
model = cfg.get(
|
|
74
|
+
cfg = self.config.get("vllm", {})
|
|
75
|
+
endpoint = cfg.get("endpoint", "http://localhost:8000")
|
|
76
|
+
model = cfg.get("model", "default")
|
|
73
77
|
url = f"{endpoint}/v1/chat/completions"
|
|
74
78
|
headers = {"Content-Type": "application/json"}
|
|
75
|
-
if cfg.get(
|
|
79
|
+
if cfg.get("apiKey"):
|
|
76
80
|
headers["Authorization"] = f"Bearer {cfg['apiKey']}"
|
|
77
81
|
return model, url, headers
|
|
78
82
|
|
|
@@ -84,7 +88,9 @@ class LLMService:
|
|
|
84
88
|
return f"Context:\n{context}\n\nUser Request:\n{prompt}"
|
|
85
89
|
return prompt
|
|
86
90
|
|
|
87
|
-
def _build_openai_messages(
|
|
91
|
+
def _build_openai_messages(
|
|
92
|
+
self, prompt: str, context: Optional[str] = None
|
|
93
|
+
) -> list:
|
|
88
94
|
"""Build OpenAI-style messages array"""
|
|
89
95
|
messages = []
|
|
90
96
|
if context:
|
|
@@ -97,7 +103,7 @@ class LLMService:
|
|
|
97
103
|
operation,
|
|
98
104
|
max_retries: int = 3,
|
|
99
105
|
provider: str = "API",
|
|
100
|
-
retryable_statuses: tuple = (503, 429)
|
|
106
|
+
retryable_statuses: tuple = (503, 429),
|
|
101
107
|
):
|
|
102
108
|
"""Execute operation with exponential backoff retry logic"""
|
|
103
109
|
for attempt in range(max_retries):
|
|
@@ -105,8 +111,10 @@ class LLMService:
|
|
|
105
111
|
return await operation()
|
|
106
112
|
except asyncio.TimeoutError:
|
|
107
113
|
if attempt < max_retries - 1:
|
|
108
|
-
wait_time = (2
|
|
109
|
-
print(
|
|
114
|
+
wait_time = (2**attempt) * 3
|
|
115
|
+
print(
|
|
116
|
+
f"[LLMService] Request timeout. Retrying in {wait_time}s... (attempt {attempt + 1}/{max_retries})"
|
|
117
|
+
)
|
|
110
118
|
await asyncio.sleep(wait_time)
|
|
111
119
|
continue
|
|
112
120
|
raise Exception(f"Request timeout after {max_retries} retries")
|
|
@@ -116,15 +124,21 @@ class LLMService:
|
|
|
116
124
|
if "rate limit" in error_msg.lower() or "(429)" in error_msg:
|
|
117
125
|
if attempt < max_retries - 1:
|
|
118
126
|
wait_time = 40 + (attempt * 20)
|
|
119
|
-
print(
|
|
127
|
+
print(
|
|
128
|
+
f"[LLMService] Rate limit hit. Waiting {wait_time}s before retry... (attempt {attempt + 1}/{max_retries})"
|
|
129
|
+
)
|
|
120
130
|
await asyncio.sleep(wait_time)
|
|
121
131
|
continue
|
|
122
|
-
raise Exception(
|
|
132
|
+
raise Exception(
|
|
133
|
+
f"Rate limit exceeded after {max_retries} retries. Please wait a minute and try again."
|
|
134
|
+
)
|
|
123
135
|
# Server overload (503) error is also retryable
|
|
124
136
|
if "overloaded" in error_msg.lower() or "(503)" in error_msg:
|
|
125
137
|
if attempt < max_retries - 1:
|
|
126
|
-
wait_time = (2
|
|
127
|
-
print(
|
|
138
|
+
wait_time = (2**attempt) * 5
|
|
139
|
+
print(
|
|
140
|
+
f"[LLMService] Server overloaded. Retrying in {wait_time}s... (attempt {attempt + 1}/{max_retries})"
|
|
141
|
+
)
|
|
128
142
|
await asyncio.sleep(wait_time)
|
|
129
143
|
continue
|
|
130
144
|
raise
|
|
@@ -135,8 +149,10 @@ class LLMService:
|
|
|
135
149
|
raise
|
|
136
150
|
# Network error retry
|
|
137
151
|
if attempt < max_retries - 1:
|
|
138
|
-
wait_time = (2
|
|
139
|
-
print(
|
|
152
|
+
wait_time = (2**attempt) * 2
|
|
153
|
+
print(
|
|
154
|
+
f"[LLMService] Network error: {e}. Retrying in {wait_time}s... (attempt {attempt + 1}/{max_retries})"
|
|
155
|
+
)
|
|
140
156
|
await asyncio.sleep(wait_time)
|
|
141
157
|
continue
|
|
142
158
|
raise
|
|
@@ -148,12 +164,14 @@ class LLMService:
|
|
|
148
164
|
payload: Dict[str, Any],
|
|
149
165
|
headers: Optional[Dict[str, str]] = None,
|
|
150
166
|
timeout_seconds: int = 60,
|
|
151
|
-
provider: str = "API"
|
|
167
|
+
provider: str = "API",
|
|
152
168
|
):
|
|
153
169
|
"""Context manager for HTTP POST requests with automatic session cleanup"""
|
|
154
170
|
timeout = aiohttp.ClientTimeout(total=timeout_seconds)
|
|
155
171
|
connector = aiohttp.TCPConnector(ssl=self._ssl_context)
|
|
156
|
-
async with aiohttp.ClientSession(
|
|
172
|
+
async with aiohttp.ClientSession(
|
|
173
|
+
timeout=timeout, connector=connector
|
|
174
|
+
) as session:
|
|
157
175
|
async with session.post(url, json=payload, headers=headers) as response:
|
|
158
176
|
if response.status != 200:
|
|
159
177
|
error_text = await response.text()
|
|
@@ -167,17 +185,28 @@ class LLMService:
|
|
|
167
185
|
payload: Dict[str, Any],
|
|
168
186
|
headers: Optional[Dict[str, str]] = None,
|
|
169
187
|
timeout_seconds: int = 60,
|
|
170
|
-
provider: str = "API"
|
|
188
|
+
provider: str = "API",
|
|
171
189
|
) -> Dict[str, Any]:
|
|
172
190
|
"""Make request and return JSON response"""
|
|
173
|
-
async with self._request(
|
|
191
|
+
async with self._request(
|
|
192
|
+
url, payload, headers, timeout_seconds, provider
|
|
193
|
+
) as response:
|
|
174
194
|
return await response.json()
|
|
175
195
|
|
|
176
|
-
async def _stream_response(
|
|
196
|
+
async def _stream_response(
|
|
197
|
+
self,
|
|
198
|
+
url: str,
|
|
199
|
+
payload: Dict[str, Any],
|
|
200
|
+
headers: Optional[Dict[str, str]],
|
|
201
|
+
provider: str,
|
|
202
|
+
line_parser,
|
|
203
|
+
):
|
|
177
204
|
"""Stream response and yield parsed content"""
|
|
178
|
-
async with self._request(
|
|
205
|
+
async with self._request(
|
|
206
|
+
url, payload, headers, timeout_seconds=120, provider=provider
|
|
207
|
+
) as response:
|
|
179
208
|
async for line in response.content:
|
|
180
|
-
line_text = line.decode(
|
|
209
|
+
line_text = line.decode("utf-8").strip()
|
|
181
210
|
content = line_parser(line_text)
|
|
182
211
|
if content:
|
|
183
212
|
yield content
|
|
@@ -186,22 +215,22 @@ class LLMService:
|
|
|
186
215
|
|
|
187
216
|
def _parse_openai_response(self, data: Dict[str, Any]) -> str:
|
|
188
217
|
"""Parse OpenAI-compatible response format (used by OpenAI and vLLM)"""
|
|
189
|
-
if
|
|
190
|
-
choice = data[
|
|
191
|
-
if
|
|
192
|
-
return choice[
|
|
193
|
-
elif
|
|
194
|
-
return choice[
|
|
218
|
+
if "choices" in data and len(data["choices"]) > 0:
|
|
219
|
+
choice = data["choices"][0]
|
|
220
|
+
if "message" in choice and "content" in choice["message"]:
|
|
221
|
+
return choice["message"]["content"]
|
|
222
|
+
elif "text" in choice:
|
|
223
|
+
return choice["text"]
|
|
195
224
|
raise Exception("No valid response from API")
|
|
196
225
|
|
|
197
226
|
def _extract_gemini_text(self, data: Dict[str, Any]) -> Optional[str]:
|
|
198
227
|
"""Extract text from Gemini response data (shared by response and stream parsing)"""
|
|
199
|
-
if
|
|
200
|
-
candidate = data[
|
|
201
|
-
if
|
|
202
|
-
parts = candidate[
|
|
203
|
-
if len(parts) > 0 and
|
|
204
|
-
return parts[0][
|
|
228
|
+
if "candidates" in data and len(data["candidates"]) > 0:
|
|
229
|
+
candidate = data["candidates"][0]
|
|
230
|
+
if "content" in candidate and "parts" in candidate["content"]:
|
|
231
|
+
parts = candidate["content"]["parts"]
|
|
232
|
+
if len(parts) > 0 and "text" in parts[0]:
|
|
233
|
+
return parts[0]["text"]
|
|
205
234
|
return None
|
|
206
235
|
|
|
207
236
|
def _parse_gemini_response(self, data: Dict[str, Any]) -> str:
|
|
@@ -213,10 +242,10 @@ class LLMService:
|
|
|
213
242
|
|
|
214
243
|
def _parse_sse_line(self, line_text: str, extractor) -> Optional[str]:
|
|
215
244
|
"""Parse SSE line with given extractor function"""
|
|
216
|
-
if not line_text.startswith(
|
|
245
|
+
if not line_text.startswith("data: "):
|
|
217
246
|
return None
|
|
218
247
|
data_str = line_text[6:]
|
|
219
|
-
if data_str ==
|
|
248
|
+
if data_str == "[DONE]":
|
|
220
249
|
return None
|
|
221
250
|
try:
|
|
222
251
|
data = json.loads(data_str)
|
|
@@ -226,9 +255,9 @@ class LLMService:
|
|
|
226
255
|
|
|
227
256
|
def _extract_openai_delta(self, data: Dict[str, Any]) -> Optional[str]:
|
|
228
257
|
"""Extract content delta from OpenAI stream data"""
|
|
229
|
-
if
|
|
230
|
-
delta = data[
|
|
231
|
-
return delta.get(
|
|
258
|
+
if "choices" in data and len(data["choices"]) > 0:
|
|
259
|
+
delta = data["choices"][0].get("delta", {})
|
|
260
|
+
return delta.get("content", "") or None
|
|
232
261
|
return None
|
|
233
262
|
|
|
234
263
|
def _parse_openai_stream_line(self, line_text: str) -> Optional[str]:
|
|
@@ -245,7 +274,7 @@ class LLMService:
|
|
|
245
274
|
messages: list,
|
|
246
275
|
max_tokens: int = 4096,
|
|
247
276
|
temperature: float = 0.0,
|
|
248
|
-
stream: bool = False
|
|
277
|
+
stream: bool = False,
|
|
249
278
|
) -> Dict[str, Any]:
|
|
250
279
|
"""Build OpenAI-compatible request payload"""
|
|
251
280
|
return {
|
|
@@ -253,14 +282,14 @@ class LLMService:
|
|
|
253
282
|
"messages": messages,
|
|
254
283
|
"max_tokens": max_tokens,
|
|
255
284
|
"temperature": temperature,
|
|
256
|
-
"stream": stream
|
|
285
|
+
"stream": stream,
|
|
257
286
|
}
|
|
258
287
|
|
|
259
288
|
def _build_gemini_payload(
|
|
260
289
|
self,
|
|
261
290
|
prompt: str,
|
|
262
291
|
max_output_tokens: int = 32768,
|
|
263
|
-
temperature: Optional[float] = None
|
|
292
|
+
temperature: Optional[float] = None,
|
|
264
293
|
) -> Dict[str, Any]:
|
|
265
294
|
"""Build Gemini API request payload
|
|
266
295
|
|
|
@@ -269,9 +298,9 @@ class LLMService:
|
|
|
269
298
|
max_output_tokens: Maximum tokens in response (default 32768 for Gemini 2.5 with thinking)
|
|
270
299
|
temperature: 0.0 for deterministic, higher for creativity (default from config)
|
|
271
300
|
"""
|
|
272
|
-
cfg = self.config.get(
|
|
273
|
-
temp = temperature if temperature is not None else cfg.get(
|
|
274
|
-
model = cfg.get(
|
|
301
|
+
cfg = self.config.get("gemini", {})
|
|
302
|
+
temp = temperature if temperature is not None else cfg.get("temperature", 0.0)
|
|
303
|
+
model = cfg.get("model", "gemini-2.5-flash")
|
|
275
304
|
|
|
276
305
|
payload = {
|
|
277
306
|
"contents": [{"parts": [{"text": prompt}]}],
|
|
@@ -279,51 +308,67 @@ class LLMService:
|
|
|
279
308
|
"temperature": temp,
|
|
280
309
|
"topK": 1,
|
|
281
310
|
"topP": 0.95,
|
|
282
|
-
"maxOutputTokens": max_output_tokens
|
|
311
|
+
"maxOutputTokens": max_output_tokens,
|
|
283
312
|
},
|
|
284
313
|
"safetySettings": [
|
|
285
|
-
{
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
314
|
+
{
|
|
315
|
+
"category": "HARM_CATEGORY_HARASSMENT",
|
|
316
|
+
"threshold": "BLOCK_MEDIUM_AND_ABOVE",
|
|
317
|
+
},
|
|
318
|
+
{
|
|
319
|
+
"category": "HARM_CATEGORY_HATE_SPEECH",
|
|
320
|
+
"threshold": "BLOCK_MEDIUM_AND_ABOVE",
|
|
321
|
+
},
|
|
322
|
+
{
|
|
323
|
+
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
|
324
|
+
"threshold": "BLOCK_MEDIUM_AND_ABOVE",
|
|
325
|
+
},
|
|
326
|
+
{
|
|
327
|
+
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
|
328
|
+
"threshold": "BLOCK_MEDIUM_AND_ABOVE",
|
|
329
|
+
},
|
|
330
|
+
],
|
|
290
331
|
}
|
|
291
332
|
|
|
292
333
|
# Gemini 2.5 models have built-in "thinking" that consumes output tokens
|
|
293
|
-
if
|
|
294
|
-
payload["generationConfig"]["thinkingConfig"] = {
|
|
295
|
-
"thinkingBudget": 8192
|
|
296
|
-
}
|
|
334
|
+
if "2.5" in model or "2-5" in model:
|
|
335
|
+
payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": 8192}
|
|
297
336
|
|
|
298
337
|
return payload
|
|
299
338
|
|
|
300
|
-
async def generate_response_stream(
|
|
339
|
+
async def generate_response_stream(
|
|
340
|
+
self, prompt: str, context: Optional[str] = None
|
|
341
|
+
):
|
|
301
342
|
"""Generate a streaming response from the configured LLM provider (async generator)"""
|
|
302
|
-
if self.provider ==
|
|
343
|
+
if self.provider == "gemini":
|
|
303
344
|
async for chunk in self._call_gemini_stream(prompt, context):
|
|
304
345
|
yield chunk
|
|
305
|
-
elif self.provider ==
|
|
346
|
+
elif self.provider == "vllm":
|
|
306
347
|
async for chunk in self._call_vllm_stream(prompt, context):
|
|
307
348
|
yield chunk
|
|
308
|
-
elif self.provider ==
|
|
349
|
+
elif self.provider == "openai":
|
|
309
350
|
async for chunk in self._call_openai_stream(prompt, context):
|
|
310
351
|
yield chunk
|
|
311
352
|
else:
|
|
312
353
|
raise ValueError(f"Unsupported provider: {self.provider}")
|
|
313
354
|
|
|
314
|
-
async def generate_response(
|
|
355
|
+
async def generate_response(
|
|
356
|
+
self, prompt: str, context: Optional[str] = None
|
|
357
|
+
) -> str:
|
|
315
358
|
"""Generate a response from the configured LLM provider"""
|
|
316
359
|
|
|
317
|
-
if self.provider ==
|
|
360
|
+
if self.provider == "gemini":
|
|
318
361
|
return await self._call_gemini(prompt, context)
|
|
319
|
-
elif self.provider ==
|
|
362
|
+
elif self.provider == "vllm":
|
|
320
363
|
return await self._call_vllm(prompt, context)
|
|
321
|
-
elif self.provider ==
|
|
364
|
+
elif self.provider == "openai":
|
|
322
365
|
return await self._call_openai(prompt, context)
|
|
323
366
|
else:
|
|
324
367
|
raise ValueError(f"Unsupported provider: {self.provider}")
|
|
325
368
|
|
|
326
|
-
async def _call_gemini(
|
|
369
|
+
async def _call_gemini(
|
|
370
|
+
self, prompt: str, context: Optional[str] = None, max_retries: int = 3
|
|
371
|
+
) -> str:
|
|
327
372
|
"""Call Google Gemini API with single API key.
|
|
328
373
|
|
|
329
374
|
NOTE: Server does NOT manage key rotation (financial security compliance).
|
|
@@ -340,21 +385,29 @@ class LLMService:
|
|
|
340
385
|
try:
|
|
341
386
|
timeout = aiohttp.ClientTimeout(total=60)
|
|
342
387
|
connector = aiohttp.TCPConnector(ssl=self._ssl_context)
|
|
343
|
-
async with aiohttp.ClientSession(
|
|
388
|
+
async with aiohttp.ClientSession(
|
|
389
|
+
timeout=timeout, connector=connector
|
|
390
|
+
) as session:
|
|
344
391
|
async with session.post(url, json=payload) as response:
|
|
345
392
|
# 429 Rate limit - return to client for key rotation
|
|
346
393
|
if response.status == 429:
|
|
347
394
|
error_text = await response.text()
|
|
348
|
-
print(
|
|
395
|
+
print(
|
|
396
|
+
f"[LLMService] Rate limit (429): {error_text[:100]}..."
|
|
397
|
+
)
|
|
349
398
|
raise Exception(f"RATE_LIMIT_EXCEEDED: {error_text}")
|
|
350
399
|
|
|
351
400
|
# 503 Server overload - retry with backoff
|
|
352
401
|
if response.status == 503:
|
|
353
402
|
error_text = await response.text()
|
|
354
|
-
print(
|
|
403
|
+
print(
|
|
404
|
+
f"[LLMService] Server overloaded (503): {error_text[:100]}..."
|
|
405
|
+
)
|
|
355
406
|
if attempt < max_retries - 1:
|
|
356
|
-
wait_time = (2
|
|
357
|
-
print(
|
|
407
|
+
wait_time = (2**attempt) * 5
|
|
408
|
+
print(
|
|
409
|
+
f"[LLMService] Waiting {wait_time}s before retry..."
|
|
410
|
+
)
|
|
358
411
|
await asyncio.sleep(wait_time)
|
|
359
412
|
continue
|
|
360
413
|
raise Exception(f"Server overloaded: {error_text}")
|
|
@@ -366,24 +419,30 @@ class LLMService:
|
|
|
366
419
|
|
|
367
420
|
# Success
|
|
368
421
|
data = await response.json()
|
|
369
|
-
print(
|
|
422
|
+
print(
|
|
423
|
+
f"[LLMService] Gemini API Response Status: {response.status}"
|
|
424
|
+
)
|
|
370
425
|
|
|
371
426
|
# Debug: finishReason check
|
|
372
|
-
if
|
|
373
|
-
candidate = data[
|
|
374
|
-
finish_reason = candidate.get(
|
|
427
|
+
if "candidates" in data and len(data["candidates"]) > 0:
|
|
428
|
+
candidate = data["candidates"][0]
|
|
429
|
+
finish_reason = candidate.get("finishReason", "UNKNOWN")
|
|
375
430
|
print(f"[LLMService] Gemini finishReason: {finish_reason}")
|
|
376
|
-
if finish_reason not in [
|
|
377
|
-
print(
|
|
431
|
+
if finish_reason not in ["STOP", "UNKNOWN"]:
|
|
432
|
+
print(
|
|
433
|
+
f"[LLMService] WARNING: Response may be incomplete! finishReason={finish_reason}"
|
|
434
|
+
)
|
|
378
435
|
|
|
379
436
|
response_text = self._parse_gemini_response(data)
|
|
380
|
-
print(
|
|
437
|
+
print(
|
|
438
|
+
f"[LLMService] Successfully received response from {model} (length: {len(response_text)} chars)"
|
|
439
|
+
)
|
|
381
440
|
|
|
382
441
|
return response_text
|
|
383
442
|
|
|
384
443
|
except asyncio.TimeoutError:
|
|
385
444
|
if attempt < max_retries - 1:
|
|
386
|
-
wait_time = (2
|
|
445
|
+
wait_time = (2**attempt) * 3
|
|
387
446
|
print(f"[LLMService] Timeout. Retrying in {wait_time}s...")
|
|
388
447
|
await asyncio.sleep(wait_time)
|
|
389
448
|
continue
|
|
@@ -399,8 +458,10 @@ class LLMService:
|
|
|
399
458
|
raise
|
|
400
459
|
# Network error - retry with delay
|
|
401
460
|
if attempt < max_retries - 1:
|
|
402
|
-
wait_time = (2
|
|
403
|
-
print(
|
|
461
|
+
wait_time = (2**attempt) * 2
|
|
462
|
+
print(
|
|
463
|
+
f"[LLMService] Network error: {e}. Retrying in {wait_time}s..."
|
|
464
|
+
)
|
|
404
465
|
await asyncio.sleep(wait_time)
|
|
405
466
|
continue
|
|
406
467
|
raise
|
|
@@ -421,12 +482,16 @@ class LLMService:
|
|
|
421
482
|
"""Call OpenAI API"""
|
|
422
483
|
model, url, headers = self._get_openai_config()
|
|
423
484
|
messages = self._build_openai_messages(prompt, context)
|
|
424
|
-
payload = self._build_openai_payload(
|
|
485
|
+
payload = self._build_openai_payload(
|
|
486
|
+
model, messages, max_tokens=2000, stream=False
|
|
487
|
+
)
|
|
425
488
|
|
|
426
489
|
data = await self._request_json(url, payload, headers, provider="OpenAI")
|
|
427
490
|
return self._parse_openai_response(data)
|
|
428
491
|
|
|
429
|
-
async def _call_gemini_stream(
|
|
492
|
+
async def _call_gemini_stream(
|
|
493
|
+
self, prompt: str, context: Optional[str] = None, max_retries: int = 3
|
|
494
|
+
):
|
|
430
495
|
"""Call Google Gemini API with streaming using single API key.
|
|
431
496
|
|
|
432
497
|
NOTE: Server does NOT manage key rotation (financial security compliance).
|
|
@@ -443,21 +508,29 @@ class LLMService:
|
|
|
443
508
|
try:
|
|
444
509
|
timeout = aiohttp.ClientTimeout(total=120)
|
|
445
510
|
connector = aiohttp.TCPConnector(ssl=self._ssl_context)
|
|
446
|
-
async with aiohttp.ClientSession(
|
|
511
|
+
async with aiohttp.ClientSession(
|
|
512
|
+
timeout=timeout, connector=connector
|
|
513
|
+
) as session:
|
|
447
514
|
async with session.post(url, json=payload) as response:
|
|
448
515
|
# 429 Rate limit - return to client for key rotation
|
|
449
516
|
if response.status == 429:
|
|
450
517
|
error_text = await response.text()
|
|
451
|
-
print(
|
|
518
|
+
print(
|
|
519
|
+
f"[LLMService] Rate limit (429) stream: {error_text[:100]}..."
|
|
520
|
+
)
|
|
452
521
|
raise Exception(f"RATE_LIMIT_EXCEEDED: {error_text}")
|
|
453
522
|
|
|
454
523
|
# 503 Server overload - retry with backoff
|
|
455
524
|
if response.status == 503:
|
|
456
525
|
error_text = await response.text()
|
|
457
|
-
print(
|
|
526
|
+
print(
|
|
527
|
+
f"[LLMService] Server overloaded (503) stream: {error_text[:100]}..."
|
|
528
|
+
)
|
|
458
529
|
if attempt < max_retries - 1:
|
|
459
|
-
wait_time = (2
|
|
460
|
-
print(
|
|
530
|
+
wait_time = (2**attempt) * 5
|
|
531
|
+
print(
|
|
532
|
+
f"[LLMService] Waiting {wait_time}s before retry..."
|
|
533
|
+
)
|
|
461
534
|
await asyncio.sleep(wait_time)
|
|
462
535
|
continue
|
|
463
536
|
raise Exception(f"Server overloaded: {error_text}")
|
|
@@ -468,9 +541,9 @@ class LLMService:
|
|
|
468
541
|
raise Exception(f"Gemini API error: {error_text}")
|
|
469
542
|
|
|
470
543
|
# Success - stream the response
|
|
471
|
-
print(
|
|
544
|
+
print("[LLMService] Successfully connected to Gemini stream")
|
|
472
545
|
async for line in response.content:
|
|
473
|
-
line_text = line.decode(
|
|
546
|
+
line_text = line.decode("utf-8").strip()
|
|
474
547
|
content = self._parse_gemini_stream_line(line_text)
|
|
475
548
|
if content:
|
|
476
549
|
yield content
|
|
@@ -478,7 +551,7 @@ class LLMService:
|
|
|
478
551
|
|
|
479
552
|
except asyncio.TimeoutError:
|
|
480
553
|
if attempt < max_retries - 1:
|
|
481
|
-
wait_time = (2
|
|
554
|
+
wait_time = (2**attempt) * 3
|
|
482
555
|
print(f"[LLMService] Timeout. Retrying in {wait_time}s...")
|
|
483
556
|
await asyncio.sleep(wait_time)
|
|
484
557
|
continue
|
|
@@ -494,8 +567,10 @@ class LLMService:
|
|
|
494
567
|
raise
|
|
495
568
|
# Network error - retry with delay
|
|
496
569
|
if attempt < max_retries - 1:
|
|
497
|
-
wait_time = (2
|
|
498
|
-
print(
|
|
570
|
+
wait_time = (2**attempt) * 2
|
|
571
|
+
print(
|
|
572
|
+
f"[LLMService] Network error: {e}. Retrying in {wait_time}s..."
|
|
573
|
+
)
|
|
499
574
|
await asyncio.sleep(wait_time)
|
|
500
575
|
continue
|
|
501
576
|
raise
|
|
@@ -509,21 +584,29 @@ class LLMService:
|
|
|
509
584
|
messages = [{"role": "user", "content": full_prompt}]
|
|
510
585
|
payload = self._build_openai_payload(model, messages, stream=True)
|
|
511
586
|
|
|
512
|
-
async for content in self._stream_response(
|
|
587
|
+
async for content in self._stream_response(
|
|
588
|
+
url, payload, headers, "vLLM", self._parse_openai_stream_line
|
|
589
|
+
):
|
|
513
590
|
yield content
|
|
514
591
|
|
|
515
592
|
async def _call_openai_stream(self, prompt: str, context: Optional[str] = None):
|
|
516
593
|
"""Call OpenAI API with streaming"""
|
|
517
594
|
model, url, headers = self._get_openai_config()
|
|
518
595
|
messages = self._build_openai_messages(prompt, context)
|
|
519
|
-
payload = self._build_openai_payload(
|
|
596
|
+
payload = self._build_openai_payload(
|
|
597
|
+
model, messages, max_tokens=2000, stream=True
|
|
598
|
+
)
|
|
520
599
|
|
|
521
|
-
async for content in self._stream_response(
|
|
600
|
+
async for content in self._stream_response(
|
|
601
|
+
url, payload, headers, "OpenAI", self._parse_openai_stream_line
|
|
602
|
+
):
|
|
522
603
|
yield content
|
|
523
604
|
|
|
524
605
|
|
|
525
606
|
# Module-level helper functions for Auto-Agent
|
|
526
|
-
async def call_llm(
|
|
607
|
+
async def call_llm(
|
|
608
|
+
prompt: str, config: Dict[str, Any], context: Optional[str] = None
|
|
609
|
+
) -> str:
|
|
527
610
|
"""
|
|
528
611
|
Convenience function to call LLM with the given config.
|
|
529
612
|
|
|
@@ -539,7 +622,9 @@ async def call_llm(prompt: str, config: Dict[str, Any], context: Optional[str] =
|
|
|
539
622
|
return await service.generate_response(prompt, context)
|
|
540
623
|
|
|
541
624
|
|
|
542
|
-
async def call_llm_stream(
|
|
625
|
+
async def call_llm_stream(
|
|
626
|
+
prompt: str, config: Dict[str, Any], context: Optional[str] = None
|
|
627
|
+
):
|
|
543
628
|
"""
|
|
544
629
|
Convenience function to stream LLM response with the given config.
|
|
545
630
|
|