hdsp-jupyter-extension 2.0.8__py3-none-any.whl → 2.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. agent_server/core/notebook_generator.py +4 -4
  2. agent_server/core/rag_manager.py +12 -3
  3. agent_server/core/retriever.py +2 -1
  4. agent_server/core/vllm_embedding_service.py +8 -5
  5. agent_server/langchain/ARCHITECTURE.md +7 -51
  6. agent_server/langchain/agent.py +31 -20
  7. agent_server/langchain/custom_middleware.py +234 -31
  8. agent_server/langchain/hitl_config.py +5 -8
  9. agent_server/langchain/logging_utils.py +7 -7
  10. agent_server/langchain/prompts.py +106 -120
  11. agent_server/langchain/tools/__init__.py +1 -10
  12. agent_server/langchain/tools/file_tools.py +9 -61
  13. agent_server/langchain/tools/jupyter_tools.py +0 -1
  14. agent_server/langchain/tools/lsp_tools.py +8 -8
  15. agent_server/langchain/tools/resource_tools.py +12 -12
  16. agent_server/langchain/tools/search_tools.py +3 -158
  17. agent_server/prompts/file_action_prompts.py +8 -8
  18. agent_server/routers/langchain_agent.py +200 -125
  19. hdsp_agent_core/__init__.py +46 -47
  20. hdsp_agent_core/factory.py +6 -10
  21. hdsp_agent_core/interfaces.py +4 -2
  22. hdsp_agent_core/knowledge/__init__.py +5 -5
  23. hdsp_agent_core/knowledge/chunking.py +87 -61
  24. hdsp_agent_core/knowledge/loader.py +103 -101
  25. hdsp_agent_core/llm/service.py +192 -107
  26. hdsp_agent_core/managers/config_manager.py +16 -22
  27. hdsp_agent_core/managers/session_manager.py +5 -4
  28. hdsp_agent_core/models/__init__.py +12 -12
  29. hdsp_agent_core/models/agent.py +15 -8
  30. hdsp_agent_core/models/common.py +1 -2
  31. hdsp_agent_core/models/rag.py +48 -111
  32. hdsp_agent_core/prompts/__init__.py +12 -12
  33. hdsp_agent_core/prompts/cell_action_prompts.py +9 -7
  34. hdsp_agent_core/services/agent_service.py +10 -8
  35. hdsp_agent_core/services/chat_service.py +10 -6
  36. hdsp_agent_core/services/rag_service.py +3 -6
  37. hdsp_agent_core/tests/conftest.py +4 -1
  38. hdsp_agent_core/tests/test_factory.py +2 -2
  39. hdsp_agent_core/tests/test_services.py +12 -19
  40. {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
  41. {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +2 -2
  42. hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.8740a527757068814573.js → hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js +93 -4
  43. hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js.map +1 -0
  44. hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.e4ff4b5779b5e049f84c.js → hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.58c1e128ba0b76f41f04.js +153 -130
  45. hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.58c1e128ba0b76f41f04.js.map +1 -0
  46. hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.020cdb0b864cfaa4e41e.js → hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.9da31d1134a53b0c4af5.js +6 -6
  47. hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.9da31d1134a53b0c4af5.js.map +1 -0
  48. {hdsp_jupyter_extension-2.0.8.dist-info → hdsp_jupyter_extension-2.0.11.dist-info}/METADATA +1 -3
  49. hdsp_jupyter_extension-2.0.11.dist-info/RECORD +144 -0
  50. jupyter_ext/__init__.py +21 -11
  51. jupyter_ext/_version.py +1 -1
  52. jupyter_ext/handlers.py +69 -50
  53. jupyter_ext/labextension/build_log.json +1 -1
  54. jupyter_ext/labextension/package.json +2 -2
  55. jupyter_ext/labextension/static/{frontend_styles_index_js.8740a527757068814573.js → frontend_styles_index_js.2d9fb488c82498c45c2d.js} +93 -4
  56. jupyter_ext/labextension/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js.map +1 -0
  57. jupyter_ext/labextension/static/{lib_index_js.e4ff4b5779b5e049f84c.js → lib_index_js.58c1e128ba0b76f41f04.js} +153 -130
  58. jupyter_ext/labextension/static/lib_index_js.58c1e128ba0b76f41f04.js.map +1 -0
  59. jupyter_ext/labextension/static/{remoteEntry.020cdb0b864cfaa4e41e.js → remoteEntry.9da31d1134a53b0c4af5.js} +6 -6
  60. jupyter_ext/labextension/static/remoteEntry.9da31d1134a53b0c4af5.js.map +1 -0
  61. hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.8740a527757068814573.js.map +0 -1
  62. hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.e4ff4b5779b5e049f84c.js.map +0 -1
  63. hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.020cdb0b864cfaa4e41e.js.map +0 -1
  64. hdsp_jupyter_extension-2.0.8.dist-info/RECORD +0 -144
  65. jupyter_ext/labextension/static/frontend_styles_index_js.8740a527757068814573.js.map +0 -1
  66. jupyter_ext/labextension/static/lib_index_js.e4ff4b5779b5e049f84c.js.map +0 -1
  67. jupyter_ext/labextension/static/remoteEntry.020cdb0b864cfaa4e41e.js.map +0 -1
  68. {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
  69. {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
  70. {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
  71. {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
  72. {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
  73. {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
  74. {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
  75. {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
  76. {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
  77. {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +0 -0
  78. {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +0 -0
  79. {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +0 -0
  80. {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +0 -0
  81. {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
  82. {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
  83. {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
  84. {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
  85. {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +0 -0
  86. {hdsp_jupyter_extension-2.0.8.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +0 -0
  87. {hdsp_jupyter_extension-2.0.8.dist-info → hdsp_jupyter_extension-2.0.11.dist-info}/WHEEL +0 -0
  88. {hdsp_jupyter_extension-2.0.8.dist-info → hdsp_jupyter_extension-2.0.11.dist-info}/licenses/LICENSE +0 -0
@@ -4,12 +4,12 @@ LLM Service - Handles interactions with different LLM providers
4
4
  Supports Gemini, OpenAI, and vLLM providers with unified interface.
5
5
  """
6
6
 
7
- import os
7
+ import asyncio
8
8
  import json
9
9
  import ssl
10
- import asyncio
11
- from typing import Dict, Any, Optional, Tuple
12
10
  from contextlib import asynccontextmanager
11
+ from typing import Any, Dict, Optional
12
+
13
13
  import aiohttp
14
14
  import certifi
15
15
 
@@ -19,7 +19,7 @@ class LLMService:
19
19
 
20
20
  def __init__(self, config: Dict[str, Any], key_manager=None):
21
21
  self.config = config
22
- self.provider = config.get('provider', 'gemini')
22
+ self.provider = config.get("provider", "gemini")
23
23
  self._key_manager = key_manager # Optional injection for testing
24
24
  # Create SSL context with certifi certificates
25
25
  self._ssl_context = ssl.create_default_context(cafile=certifi.where())
@@ -28,10 +28,11 @@ class LLMService:
28
28
  """Get key manager if using Gemini provider"""
29
29
  if self._key_manager:
30
30
  return self._key_manager
31
- if self.provider == 'gemini':
31
+ if self.provider == "gemini":
32
32
  try:
33
33
  from hdsp_agent_core.managers.api_key_manager import get_key_manager
34
34
  from hdsp_agent_core.managers.config_manager import ConfigManager
35
+
35
36
  return get_key_manager(ConfigManager.get_instance())
36
37
  except ImportError:
37
38
  # Fallback for standalone usage
@@ -46,33 +47,36 @@ class LLMService:
46
47
  NOTE: Server receives SINGLE API key from client per request.
47
48
  Key rotation is managed by the frontend (financial security compliance).
48
49
  """
49
- cfg = self.config.get('gemini', {})
50
- api_key = cfg.get('apiKey')
50
+ cfg = self.config.get("gemini", {})
51
+ api_key = cfg.get("apiKey")
51
52
  if not api_key:
52
53
  raise ValueError("Gemini API key not configured")
53
- model = cfg.get('model', 'gemini-2.5-pro')
54
+ model = cfg.get("model", "gemini-2.5-pro")
54
55
  base_url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}"
55
56
  return api_key, model, base_url
56
57
 
57
58
  def _get_openai_config(self) -> tuple[str, str, Dict[str, str]]:
58
59
  """Get OpenAI config: (model, url, headers). Raises if api_key missing."""
59
- cfg = self.config.get('openai', {})
60
- api_key = cfg.get('apiKey')
60
+ cfg = self.config.get("openai", {})
61
+ api_key = cfg.get("apiKey")
61
62
  if not api_key:
62
63
  raise ValueError("OpenAI API key not configured")
63
- model = cfg.get('model', 'gpt-4')
64
+ model = cfg.get("model", "gpt-4")
64
65
  url = "https://api.openai.com/v1/chat/completions"
65
- headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
66
+ headers = {
67
+ "Authorization": f"Bearer {api_key}",
68
+ "Content-Type": "application/json",
69
+ }
66
70
  return model, url, headers
67
71
 
68
72
  def _get_vllm_config(self) -> tuple[str, str, Dict[str, str]]:
69
73
  """Get vLLM config: (model, url, headers)."""
70
- cfg = self.config.get('vllm', {})
71
- endpoint = cfg.get('endpoint', 'http://localhost:8000')
72
- model = cfg.get('model', 'default')
74
+ cfg = self.config.get("vllm", {})
75
+ endpoint = cfg.get("endpoint", "http://localhost:8000")
76
+ model = cfg.get("model", "default")
73
77
  url = f"{endpoint}/v1/chat/completions"
74
78
  headers = {"Content-Type": "application/json"}
75
- if cfg.get('apiKey'):
79
+ if cfg.get("apiKey"):
76
80
  headers["Authorization"] = f"Bearer {cfg['apiKey']}"
77
81
  return model, url, headers
78
82
 
@@ -84,7 +88,9 @@ class LLMService:
84
88
  return f"Context:\n{context}\n\nUser Request:\n{prompt}"
85
89
  return prompt
86
90
 
87
- def _build_openai_messages(self, prompt: str, context: Optional[str] = None) -> list:
91
+ def _build_openai_messages(
92
+ self, prompt: str, context: Optional[str] = None
93
+ ) -> list:
88
94
  """Build OpenAI-style messages array"""
89
95
  messages = []
90
96
  if context:
@@ -97,7 +103,7 @@ class LLMService:
97
103
  operation,
98
104
  max_retries: int = 3,
99
105
  provider: str = "API",
100
- retryable_statuses: tuple = (503, 429)
106
+ retryable_statuses: tuple = (503, 429),
101
107
  ):
102
108
  """Execute operation with exponential backoff retry logic"""
103
109
  for attempt in range(max_retries):
@@ -105,8 +111,10 @@ class LLMService:
105
111
  return await operation()
106
112
  except asyncio.TimeoutError:
107
113
  if attempt < max_retries - 1:
108
- wait_time = (2 ** attempt) * 3
109
- print(f"[LLMService] Request timeout. Retrying in {wait_time}s... (attempt {attempt + 1}/{max_retries})")
114
+ wait_time = (2**attempt) * 3
115
+ print(
116
+ f"[LLMService] Request timeout. Retrying in {wait_time}s... (attempt {attempt + 1}/{max_retries})"
117
+ )
110
118
  await asyncio.sleep(wait_time)
111
119
  continue
112
120
  raise Exception(f"Request timeout after {max_retries} retries")
@@ -116,15 +124,21 @@ class LLMService:
116
124
  if "rate limit" in error_msg.lower() or "(429)" in error_msg:
117
125
  if attempt < max_retries - 1:
118
126
  wait_time = 40 + (attempt * 20)
119
- print(f"[LLMService] Rate limit hit. Waiting {wait_time}s before retry... (attempt {attempt + 1}/{max_retries})")
127
+ print(
128
+ f"[LLMService] Rate limit hit. Waiting {wait_time}s before retry... (attempt {attempt + 1}/{max_retries})"
129
+ )
120
130
  await asyncio.sleep(wait_time)
121
131
  continue
122
- raise Exception(f"Rate limit exceeded after {max_retries} retries. Please wait a minute and try again.")
132
+ raise Exception(
133
+ f"Rate limit exceeded after {max_retries} retries. Please wait a minute and try again."
134
+ )
123
135
  # Server overload (503) error is also retryable
124
136
  if "overloaded" in error_msg.lower() or "(503)" in error_msg:
125
137
  if attempt < max_retries - 1:
126
- wait_time = (2 ** attempt) * 5
127
- print(f"[LLMService] Server overloaded. Retrying in {wait_time}s... (attempt {attempt + 1}/{max_retries})")
138
+ wait_time = (2**attempt) * 5
139
+ print(
140
+ f"[LLMService] Server overloaded. Retrying in {wait_time}s... (attempt {attempt + 1}/{max_retries})"
141
+ )
128
142
  await asyncio.sleep(wait_time)
129
143
  continue
130
144
  raise
@@ -135,8 +149,10 @@ class LLMService:
135
149
  raise
136
150
  # Network error retry
137
151
  if attempt < max_retries - 1:
138
- wait_time = (2 ** attempt) * 2
139
- print(f"[LLMService] Network error: {e}. Retrying in {wait_time}s... (attempt {attempt + 1}/{max_retries})")
152
+ wait_time = (2**attempt) * 2
153
+ print(
154
+ f"[LLMService] Network error: {e}. Retrying in {wait_time}s... (attempt {attempt + 1}/{max_retries})"
155
+ )
140
156
  await asyncio.sleep(wait_time)
141
157
  continue
142
158
  raise
@@ -148,12 +164,14 @@ class LLMService:
148
164
  payload: Dict[str, Any],
149
165
  headers: Optional[Dict[str, str]] = None,
150
166
  timeout_seconds: int = 60,
151
- provider: str = "API"
167
+ provider: str = "API",
152
168
  ):
153
169
  """Context manager for HTTP POST requests with automatic session cleanup"""
154
170
  timeout = aiohttp.ClientTimeout(total=timeout_seconds)
155
171
  connector = aiohttp.TCPConnector(ssl=self._ssl_context)
156
- async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
172
+ async with aiohttp.ClientSession(
173
+ timeout=timeout, connector=connector
174
+ ) as session:
157
175
  async with session.post(url, json=payload, headers=headers) as response:
158
176
  if response.status != 200:
159
177
  error_text = await response.text()
@@ -167,17 +185,28 @@ class LLMService:
167
185
  payload: Dict[str, Any],
168
186
  headers: Optional[Dict[str, str]] = None,
169
187
  timeout_seconds: int = 60,
170
- provider: str = "API"
188
+ provider: str = "API",
171
189
  ) -> Dict[str, Any]:
172
190
  """Make request and return JSON response"""
173
- async with self._request(url, payload, headers, timeout_seconds, provider) as response:
191
+ async with self._request(
192
+ url, payload, headers, timeout_seconds, provider
193
+ ) as response:
174
194
  return await response.json()
175
195
 
176
- async def _stream_response(self, url: str, payload: Dict[str, Any], headers: Optional[Dict[str, str]], provider: str, line_parser):
196
+ async def _stream_response(
197
+ self,
198
+ url: str,
199
+ payload: Dict[str, Any],
200
+ headers: Optional[Dict[str, str]],
201
+ provider: str,
202
+ line_parser,
203
+ ):
177
204
  """Stream response and yield parsed content"""
178
- async with self._request(url, payload, headers, timeout_seconds=120, provider=provider) as response:
205
+ async with self._request(
206
+ url, payload, headers, timeout_seconds=120, provider=provider
207
+ ) as response:
179
208
  async for line in response.content:
180
- line_text = line.decode('utf-8').strip()
209
+ line_text = line.decode("utf-8").strip()
181
210
  content = line_parser(line_text)
182
211
  if content:
183
212
  yield content
@@ -186,22 +215,22 @@ class LLMService:
186
215
 
187
216
  def _parse_openai_response(self, data: Dict[str, Any]) -> str:
188
217
  """Parse OpenAI-compatible response format (used by OpenAI and vLLM)"""
189
- if 'choices' in data and len(data['choices']) > 0:
190
- choice = data['choices'][0]
191
- if 'message' in choice and 'content' in choice['message']:
192
- return choice['message']['content']
193
- elif 'text' in choice:
194
- return choice['text']
218
+ if "choices" in data and len(data["choices"]) > 0:
219
+ choice = data["choices"][0]
220
+ if "message" in choice and "content" in choice["message"]:
221
+ return choice["message"]["content"]
222
+ elif "text" in choice:
223
+ return choice["text"]
195
224
  raise Exception("No valid response from API")
196
225
 
197
226
  def _extract_gemini_text(self, data: Dict[str, Any]) -> Optional[str]:
198
227
  """Extract text from Gemini response data (shared by response and stream parsing)"""
199
- if 'candidates' in data and len(data['candidates']) > 0:
200
- candidate = data['candidates'][0]
201
- if 'content' in candidate and 'parts' in candidate['content']:
202
- parts = candidate['content']['parts']
203
- if len(parts) > 0 and 'text' in parts[0]:
204
- return parts[0]['text']
228
+ if "candidates" in data and len(data["candidates"]) > 0:
229
+ candidate = data["candidates"][0]
230
+ if "content" in candidate and "parts" in candidate["content"]:
231
+ parts = candidate["content"]["parts"]
232
+ if len(parts) > 0 and "text" in parts[0]:
233
+ return parts[0]["text"]
205
234
  return None
206
235
 
207
236
  def _parse_gemini_response(self, data: Dict[str, Any]) -> str:
@@ -213,10 +242,10 @@ class LLMService:
213
242
 
214
243
  def _parse_sse_line(self, line_text: str, extractor) -> Optional[str]:
215
244
  """Parse SSE line with given extractor function"""
216
- if not line_text.startswith('data: '):
245
+ if not line_text.startswith("data: "):
217
246
  return None
218
247
  data_str = line_text[6:]
219
- if data_str == '[DONE]':
248
+ if data_str == "[DONE]":
220
249
  return None
221
250
  try:
222
251
  data = json.loads(data_str)
@@ -226,9 +255,9 @@ class LLMService:
226
255
 
227
256
  def _extract_openai_delta(self, data: Dict[str, Any]) -> Optional[str]:
228
257
  """Extract content delta from OpenAI stream data"""
229
- if 'choices' in data and len(data['choices']) > 0:
230
- delta = data['choices'][0].get('delta', {})
231
- return delta.get('content', '') or None
258
+ if "choices" in data and len(data["choices"]) > 0:
259
+ delta = data["choices"][0].get("delta", {})
260
+ return delta.get("content", "") or None
232
261
  return None
233
262
 
234
263
  def _parse_openai_stream_line(self, line_text: str) -> Optional[str]:
@@ -245,7 +274,7 @@ class LLMService:
245
274
  messages: list,
246
275
  max_tokens: int = 4096,
247
276
  temperature: float = 0.0,
248
- stream: bool = False
277
+ stream: bool = False,
249
278
  ) -> Dict[str, Any]:
250
279
  """Build OpenAI-compatible request payload"""
251
280
  return {
@@ -253,14 +282,14 @@ class LLMService:
253
282
  "messages": messages,
254
283
  "max_tokens": max_tokens,
255
284
  "temperature": temperature,
256
- "stream": stream
285
+ "stream": stream,
257
286
  }
258
287
 
259
288
  def _build_gemini_payload(
260
289
  self,
261
290
  prompt: str,
262
291
  max_output_tokens: int = 32768,
263
- temperature: Optional[float] = None
292
+ temperature: Optional[float] = None,
264
293
  ) -> Dict[str, Any]:
265
294
  """Build Gemini API request payload
266
295
 
@@ -269,9 +298,9 @@ class LLMService:
269
298
  max_output_tokens: Maximum tokens in response (default 32768 for Gemini 2.5 with thinking)
270
299
  temperature: 0.0 for deterministic, higher for creativity (default from config)
271
300
  """
272
- cfg = self.config.get('gemini', {})
273
- temp = temperature if temperature is not None else cfg.get('temperature', 0.0)
274
- model = cfg.get('model', 'gemini-2.5-flash')
301
+ cfg = self.config.get("gemini", {})
302
+ temp = temperature if temperature is not None else cfg.get("temperature", 0.0)
303
+ model = cfg.get("model", "gemini-2.5-flash")
275
304
 
276
305
  payload = {
277
306
  "contents": [{"parts": [{"text": prompt}]}],
@@ -279,51 +308,67 @@ class LLMService:
279
308
  "temperature": temp,
280
309
  "topK": 1,
281
310
  "topP": 0.95,
282
- "maxOutputTokens": max_output_tokens
311
+ "maxOutputTokens": max_output_tokens,
283
312
  },
284
313
  "safetySettings": [
285
- {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
286
- {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
287
- {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
288
- {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
289
- ]
314
+ {
315
+ "category": "HARM_CATEGORY_HARASSMENT",
316
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE",
317
+ },
318
+ {
319
+ "category": "HARM_CATEGORY_HATE_SPEECH",
320
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE",
321
+ },
322
+ {
323
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
324
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE",
325
+ },
326
+ {
327
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
328
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE",
329
+ },
330
+ ],
290
331
  }
291
332
 
292
333
  # Gemini 2.5 models have built-in "thinking" that consumes output tokens
293
- if '2.5' in model or '2-5' in model:
294
- payload["generationConfig"]["thinkingConfig"] = {
295
- "thinkingBudget": 8192
296
- }
334
+ if "2.5" in model or "2-5" in model:
335
+ payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": 8192}
297
336
 
298
337
  return payload
299
338
 
300
- async def generate_response_stream(self, prompt: str, context: Optional[str] = None):
339
+ async def generate_response_stream(
340
+ self, prompt: str, context: Optional[str] = None
341
+ ):
301
342
  """Generate a streaming response from the configured LLM provider (async generator)"""
302
- if self.provider == 'gemini':
343
+ if self.provider == "gemini":
303
344
  async for chunk in self._call_gemini_stream(prompt, context):
304
345
  yield chunk
305
- elif self.provider == 'vllm':
346
+ elif self.provider == "vllm":
306
347
  async for chunk in self._call_vllm_stream(prompt, context):
307
348
  yield chunk
308
- elif self.provider == 'openai':
349
+ elif self.provider == "openai":
309
350
  async for chunk in self._call_openai_stream(prompt, context):
310
351
  yield chunk
311
352
  else:
312
353
  raise ValueError(f"Unsupported provider: {self.provider}")
313
354
 
314
- async def generate_response(self, prompt: str, context: Optional[str] = None) -> str:
355
+ async def generate_response(
356
+ self, prompt: str, context: Optional[str] = None
357
+ ) -> str:
315
358
  """Generate a response from the configured LLM provider"""
316
359
 
317
- if self.provider == 'gemini':
360
+ if self.provider == "gemini":
318
361
  return await self._call_gemini(prompt, context)
319
- elif self.provider == 'vllm':
362
+ elif self.provider == "vllm":
320
363
  return await self._call_vllm(prompt, context)
321
- elif self.provider == 'openai':
364
+ elif self.provider == "openai":
322
365
  return await self._call_openai(prompt, context)
323
366
  else:
324
367
  raise ValueError(f"Unsupported provider: {self.provider}")
325
368
 
326
- async def _call_gemini(self, prompt: str, context: Optional[str] = None, max_retries: int = 3) -> str:
369
+ async def _call_gemini(
370
+ self, prompt: str, context: Optional[str] = None, max_retries: int = 3
371
+ ) -> str:
327
372
  """Call Google Gemini API with single API key.
328
373
 
329
374
  NOTE: Server does NOT manage key rotation (financial security compliance).
@@ -340,21 +385,29 @@ class LLMService:
340
385
  try:
341
386
  timeout = aiohttp.ClientTimeout(total=60)
342
387
  connector = aiohttp.TCPConnector(ssl=self._ssl_context)
343
- async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
388
+ async with aiohttp.ClientSession(
389
+ timeout=timeout, connector=connector
390
+ ) as session:
344
391
  async with session.post(url, json=payload) as response:
345
392
  # 429 Rate limit - return to client for key rotation
346
393
  if response.status == 429:
347
394
  error_text = await response.text()
348
- print(f"[LLMService] Rate limit (429): {error_text[:100]}...")
395
+ print(
396
+ f"[LLMService] Rate limit (429): {error_text[:100]}..."
397
+ )
349
398
  raise Exception(f"RATE_LIMIT_EXCEEDED: {error_text}")
350
399
 
351
400
  # 503 Server overload - retry with backoff
352
401
  if response.status == 503:
353
402
  error_text = await response.text()
354
- print(f"[LLMService] Server overloaded (503): {error_text[:100]}...")
403
+ print(
404
+ f"[LLMService] Server overloaded (503): {error_text[:100]}..."
405
+ )
355
406
  if attempt < max_retries - 1:
356
- wait_time = (2 ** attempt) * 5
357
- print(f"[LLMService] Waiting {wait_time}s before retry...")
407
+ wait_time = (2**attempt) * 5
408
+ print(
409
+ f"[LLMService] Waiting {wait_time}s before retry..."
410
+ )
358
411
  await asyncio.sleep(wait_time)
359
412
  continue
360
413
  raise Exception(f"Server overloaded: {error_text}")
@@ -366,24 +419,30 @@ class LLMService:
366
419
 
367
420
  # Success
368
421
  data = await response.json()
369
- print(f"[LLMService] Gemini API Response Status: {response.status}")
422
+ print(
423
+ f"[LLMService] Gemini API Response Status: {response.status}"
424
+ )
370
425
 
371
426
  # Debug: finishReason check
372
- if 'candidates' in data and len(data['candidates']) > 0:
373
- candidate = data['candidates'][0]
374
- finish_reason = candidate.get('finishReason', 'UNKNOWN')
427
+ if "candidates" in data and len(data["candidates"]) > 0:
428
+ candidate = data["candidates"][0]
429
+ finish_reason = candidate.get("finishReason", "UNKNOWN")
375
430
  print(f"[LLMService] Gemini finishReason: {finish_reason}")
376
- if finish_reason not in ['STOP', 'UNKNOWN']:
377
- print(f"[LLMService] WARNING: Response may be incomplete! finishReason={finish_reason}")
431
+ if finish_reason not in ["STOP", "UNKNOWN"]:
432
+ print(
433
+ f"[LLMService] WARNING: Response may be incomplete! finishReason={finish_reason}"
434
+ )
378
435
 
379
436
  response_text = self._parse_gemini_response(data)
380
- print(f"[LLMService] Successfully received response from {model} (length: {len(response_text)} chars)")
437
+ print(
438
+ f"[LLMService] Successfully received response from {model} (length: {len(response_text)} chars)"
439
+ )
381
440
 
382
441
  return response_text
383
442
 
384
443
  except asyncio.TimeoutError:
385
444
  if attempt < max_retries - 1:
386
- wait_time = (2 ** attempt) * 3
445
+ wait_time = (2**attempt) * 3
387
446
  print(f"[LLMService] Timeout. Retrying in {wait_time}s...")
388
447
  await asyncio.sleep(wait_time)
389
448
  continue
@@ -399,8 +458,10 @@ class LLMService:
399
458
  raise
400
459
  # Network error - retry with delay
401
460
  if attempt < max_retries - 1:
402
- wait_time = (2 ** attempt) * 2
403
- print(f"[LLMService] Network error: {e}. Retrying in {wait_time}s...")
461
+ wait_time = (2**attempt) * 2
462
+ print(
463
+ f"[LLMService] Network error: {e}. Retrying in {wait_time}s..."
464
+ )
404
465
  await asyncio.sleep(wait_time)
405
466
  continue
406
467
  raise
@@ -421,12 +482,16 @@ class LLMService:
421
482
  """Call OpenAI API"""
422
483
  model, url, headers = self._get_openai_config()
423
484
  messages = self._build_openai_messages(prompt, context)
424
- payload = self._build_openai_payload(model, messages, max_tokens=2000, stream=False)
485
+ payload = self._build_openai_payload(
486
+ model, messages, max_tokens=2000, stream=False
487
+ )
425
488
 
426
489
  data = await self._request_json(url, payload, headers, provider="OpenAI")
427
490
  return self._parse_openai_response(data)
428
491
 
429
- async def _call_gemini_stream(self, prompt: str, context: Optional[str] = None, max_retries: int = 3):
492
+ async def _call_gemini_stream(
493
+ self, prompt: str, context: Optional[str] = None, max_retries: int = 3
494
+ ):
430
495
  """Call Google Gemini API with streaming using single API key.
431
496
 
432
497
  NOTE: Server does NOT manage key rotation (financial security compliance).
@@ -443,21 +508,29 @@ class LLMService:
443
508
  try:
444
509
  timeout = aiohttp.ClientTimeout(total=120)
445
510
  connector = aiohttp.TCPConnector(ssl=self._ssl_context)
446
- async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
511
+ async with aiohttp.ClientSession(
512
+ timeout=timeout, connector=connector
513
+ ) as session:
447
514
  async with session.post(url, json=payload) as response:
448
515
  # 429 Rate limit - return to client for key rotation
449
516
  if response.status == 429:
450
517
  error_text = await response.text()
451
- print(f"[LLMService] Rate limit (429) stream: {error_text[:100]}...")
518
+ print(
519
+ f"[LLMService] Rate limit (429) stream: {error_text[:100]}..."
520
+ )
452
521
  raise Exception(f"RATE_LIMIT_EXCEEDED: {error_text}")
453
522
 
454
523
  # 503 Server overload - retry with backoff
455
524
  if response.status == 503:
456
525
  error_text = await response.text()
457
- print(f"[LLMService] Server overloaded (503) stream: {error_text[:100]}...")
526
+ print(
527
+ f"[LLMService] Server overloaded (503) stream: {error_text[:100]}..."
528
+ )
458
529
  if attempt < max_retries - 1:
459
- wait_time = (2 ** attempt) * 5
460
- print(f"[LLMService] Waiting {wait_time}s before retry...")
530
+ wait_time = (2**attempt) * 5
531
+ print(
532
+ f"[LLMService] Waiting {wait_time}s before retry..."
533
+ )
461
534
  await asyncio.sleep(wait_time)
462
535
  continue
463
536
  raise Exception(f"Server overloaded: {error_text}")
@@ -468,9 +541,9 @@ class LLMService:
468
541
  raise Exception(f"Gemini API error: {error_text}")
469
542
 
470
543
  # Success - stream the response
471
- print(f"[LLMService] Successfully connected to Gemini stream")
544
+ print("[LLMService] Successfully connected to Gemini stream")
472
545
  async for line in response.content:
473
- line_text = line.decode('utf-8').strip()
546
+ line_text = line.decode("utf-8").strip()
474
547
  content = self._parse_gemini_stream_line(line_text)
475
548
  if content:
476
549
  yield content
@@ -478,7 +551,7 @@ class LLMService:
478
551
 
479
552
  except asyncio.TimeoutError:
480
553
  if attempt < max_retries - 1:
481
- wait_time = (2 ** attempt) * 3
554
+ wait_time = (2**attempt) * 3
482
555
  print(f"[LLMService] Timeout. Retrying in {wait_time}s...")
483
556
  await asyncio.sleep(wait_time)
484
557
  continue
@@ -494,8 +567,10 @@ class LLMService:
494
567
  raise
495
568
  # Network error - retry with delay
496
569
  if attempt < max_retries - 1:
497
- wait_time = (2 ** attempt) * 2
498
- print(f"[LLMService] Network error: {e}. Retrying in {wait_time}s...")
570
+ wait_time = (2**attempt) * 2
571
+ print(
572
+ f"[LLMService] Network error: {e}. Retrying in {wait_time}s..."
573
+ )
499
574
  await asyncio.sleep(wait_time)
500
575
  continue
501
576
  raise
@@ -509,21 +584,29 @@ class LLMService:
509
584
  messages = [{"role": "user", "content": full_prompt}]
510
585
  payload = self._build_openai_payload(model, messages, stream=True)
511
586
 
512
- async for content in self._stream_response(url, payload, headers, "vLLM", self._parse_openai_stream_line):
587
+ async for content in self._stream_response(
588
+ url, payload, headers, "vLLM", self._parse_openai_stream_line
589
+ ):
513
590
  yield content
514
591
 
515
592
  async def _call_openai_stream(self, prompt: str, context: Optional[str] = None):
516
593
  """Call OpenAI API with streaming"""
517
594
  model, url, headers = self._get_openai_config()
518
595
  messages = self._build_openai_messages(prompt, context)
519
- payload = self._build_openai_payload(model, messages, max_tokens=2000, stream=True)
596
+ payload = self._build_openai_payload(
597
+ model, messages, max_tokens=2000, stream=True
598
+ )
520
599
 
521
- async for content in self._stream_response(url, payload, headers, "OpenAI", self._parse_openai_stream_line):
600
+ async for content in self._stream_response(
601
+ url, payload, headers, "OpenAI", self._parse_openai_stream_line
602
+ ):
522
603
  yield content
523
604
 
524
605
 
525
606
  # Module-level helper functions for Auto-Agent
526
- async def call_llm(prompt: str, config: Dict[str, Any], context: Optional[str] = None) -> str:
607
+ async def call_llm(
608
+ prompt: str, config: Dict[str, Any], context: Optional[str] = None
609
+ ) -> str:
527
610
  """
528
611
  Convenience function to call LLM with the given config.
529
612
 
@@ -539,7 +622,9 @@ async def call_llm(prompt: str, config: Dict[str, Any], context: Optional[str] =
539
622
  return await service.generate_response(prompt, context)
540
623
 
541
624
 
542
- async def call_llm_stream(prompt: str, config: Dict[str, Any], context: Optional[str] = None):
625
+ async def call_llm_stream(
626
+ prompt: str, config: Dict[str, Any], context: Optional[str] = None
627
+ ):
543
628
  """
544
629
  Convenience function to stream LLM response with the given config.
545
630