hdsp-jupyter-extension 2.0.7__py3-none-any.whl → 2.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_server/core/embedding_service.py +67 -46
- agent_server/core/rag_manager.py +31 -17
- agent_server/core/retriever.py +13 -8
- agent_server/core/vllm_embedding_service.py +243 -0
- agent_server/langchain/agent.py +8 -0
- agent_server/langchain/custom_middleware.py +58 -31
- agent_server/langchain/hitl_config.py +6 -1
- agent_server/langchain/logging_utils.py +53 -14
- agent_server/langchain/prompts.py +47 -16
- agent_server/langchain/tools/__init__.py +13 -0
- agent_server/langchain/tools/file_tools.py +285 -7
- agent_server/langchain/tools/file_utils.py +334 -0
- agent_server/langchain/tools/lsp_tools.py +264 -0
- agent_server/main.py +7 -0
- agent_server/routers/langchain_agent.py +115 -19
- agent_server/routers/rag.py +8 -3
- hdsp_agent_core/models/rag.py +15 -1
- hdsp_agent_core/services/rag_service.py +6 -1
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +3 -2
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.4770ec0fb2d173b6deb4.js → hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.8740a527757068814573.js +160 -3
- hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.8740a527757068814573.js.map +1 -0
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.29cf4312af19e86f82af.js → hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.e4ff4b5779b5e049f84c.js +1759 -221
- hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.e4ff4b5779b5e049f84c.js.map +1 -0
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.61343eb4cf0577e74b50.js → hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.020cdb0b864cfaa4e41e.js +14 -12
- hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.020cdb0b864cfaa4e41e.js.map +1 -0
- jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js → hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +2 -209
- hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +1 -0
- jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js → hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +209 -2
- hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +1 -0
- jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js → hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +212 -3
- hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +1 -0
- {hdsp_jupyter_extension-2.0.7.dist-info → hdsp_jupyter_extension-2.0.8.dist-info}/METADATA +1 -1
- {hdsp_jupyter_extension-2.0.7.dist-info → hdsp_jupyter_extension-2.0.8.dist-info}/RECORD +66 -63
- jupyter_ext/__init__.py +18 -0
- jupyter_ext/_version.py +1 -1
- jupyter_ext/handlers.py +176 -1
- jupyter_ext/labextension/build_log.json +1 -1
- jupyter_ext/labextension/package.json +3 -2
- jupyter_ext/labextension/static/{frontend_styles_index_js.4770ec0fb2d173b6deb4.js → frontend_styles_index_js.8740a527757068814573.js} +160 -3
- jupyter_ext/labextension/static/frontend_styles_index_js.8740a527757068814573.js.map +1 -0
- jupyter_ext/labextension/static/{lib_index_js.29cf4312af19e86f82af.js → lib_index_js.e4ff4b5779b5e049f84c.js} +1759 -221
- jupyter_ext/labextension/static/lib_index_js.e4ff4b5779b5e049f84c.js.map +1 -0
- jupyter_ext/labextension/static/{remoteEntry.61343eb4cf0577e74b50.js → remoteEntry.020cdb0b864cfaa4e41e.js} +14 -12
- jupyter_ext/labextension/static/remoteEntry.020cdb0b864cfaa4e41e.js.map +1 -0
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js → jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +2 -209
- jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +1 -0
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js → jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +209 -2
- jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +1 -0
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js → jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +212 -3
- jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +1 -0
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.4770ec0fb2d173b6deb4.js.map +0 -1
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.29cf4312af19e86f82af.js.map +0 -1
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.61343eb4cf0577e74b50.js.map +0 -1
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js.map +0 -1
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js.map +0 -1
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js.map +0 -1
- jupyter_ext/labextension/static/frontend_styles_index_js.4770ec0fb2d173b6deb4.js.map +0 -1
- jupyter_ext/labextension/static/lib_index_js.29cf4312af19e86f82af.js.map +0 -1
- jupyter_ext/labextension/static/remoteEntry.61343eb4cf0577e74b50.js.map +0 -1
- jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js.map +0 -1
- jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js.map +0 -1
- jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js.map +0 -1
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
- {hdsp_jupyter_extension-2.0.7.dist-info → hdsp_jupyter_extension-2.0.8.dist-info}/WHEEL +0 -0
- {hdsp_jupyter_extension-2.0.7.dist-info → hdsp_jupyter_extension-2.0.8.dist-info}/licenses/LICENSE +0 -0
|
@@ -4,13 +4,14 @@ Local Embedding Service - Wraps sentence-transformers for local embedding genera
|
|
|
4
4
|
Features:
|
|
5
5
|
- Zero external API calls (data sovereignty)
|
|
6
6
|
- Lazy model loading (only when first needed)
|
|
7
|
-
- Thread-safe singleton pattern
|
|
7
|
+
- Thread-safe singleton pattern with async support
|
|
8
8
|
- Configurable model and device
|
|
9
9
|
- E5 model prefix handling for optimal performance
|
|
10
10
|
|
|
11
11
|
Default model: intfloat/multilingual-e5-small (384 dimensions, Korean support)
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
|
+
import asyncio
|
|
14
15
|
import logging
|
|
15
16
|
from typing import TYPE_CHECKING, List, Optional
|
|
16
17
|
|
|
@@ -55,51 +56,59 @@ class EmbeddingService:
|
|
|
55
56
|
self._model = None
|
|
56
57
|
self._dimension: Optional[int] = None
|
|
57
58
|
self._is_e5_model: bool = False
|
|
59
|
+
self._load_lock = asyncio.Lock() # Thread-safe lazy loading
|
|
58
60
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
self._load_model()
|
|
64
|
-
return self._model
|
|
65
|
-
|
|
66
|
-
def _load_model(self) -> None:
|
|
67
|
-
"""Load the sentence-transformers model"""
|
|
68
|
-
try:
|
|
69
|
-
from sentence_transformers import SentenceTransformer
|
|
70
|
-
except ImportError:
|
|
71
|
-
raise ImportError(
|
|
72
|
-
"sentence-transformers is required for RAG. "
|
|
73
|
-
"Install with: pip install sentence-transformers"
|
|
74
|
-
)
|
|
75
|
-
|
|
76
|
-
model_name = self._config.get_model_name()
|
|
77
|
-
device = self._config.get_device()
|
|
78
|
-
|
|
79
|
-
logger.info(f"Loading embedding model: {model_name} on {device}")
|
|
80
|
-
|
|
81
|
-
try:
|
|
82
|
-
self._model = SentenceTransformer(
|
|
83
|
-
model_name, device=device, cache_folder=self._config.cache_folder
|
|
84
|
-
)
|
|
85
|
-
self._dimension = self._model.get_sentence_embedding_dimension()
|
|
86
|
-
|
|
87
|
-
# Check if E5 model (requires special prefix)
|
|
88
|
-
self._is_e5_model = "e5" in model_name.lower()
|
|
61
|
+
async def _ensure_model_loaded(self):
|
|
62
|
+
"""Lazy load the embedding model (thread-safe, async)"""
|
|
63
|
+
if self._model is not None:
|
|
64
|
+
return
|
|
89
65
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
66
|
+
async with self._load_lock:
|
|
67
|
+
# Double-check after acquiring lock
|
|
68
|
+
if self._model is not None:
|
|
69
|
+
return
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
from sentence_transformers import SentenceTransformer
|
|
73
|
+
except ImportError:
|
|
74
|
+
raise ImportError(
|
|
75
|
+
"sentence-transformers is required for RAG. "
|
|
76
|
+
"Install with: pip install sentence-transformers"
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
model_name = self._config.get_model_name()
|
|
80
|
+
device = self._config.get_device()
|
|
81
|
+
|
|
82
|
+
logger.info(f"Loading embedding model: {model_name} on {device}")
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
# Load model in separate thread to avoid blocking event loop
|
|
86
|
+
self._model = await asyncio.to_thread(
|
|
87
|
+
SentenceTransformer,
|
|
88
|
+
model_name,
|
|
89
|
+
device=device,
|
|
90
|
+
cache_folder=self._config.cache_folder,
|
|
91
|
+
)
|
|
92
|
+
self._dimension = self._model.get_sentence_embedding_dimension()
|
|
93
|
+
|
|
94
|
+
# Check if E5 model (requires special prefix)
|
|
95
|
+
self._is_e5_model = "e5" in model_name.lower()
|
|
96
|
+
|
|
97
|
+
logger.info(
|
|
98
|
+
f"Embedding model loaded successfully. "
|
|
99
|
+
f"Dimension: {self._dimension}, E5 model: {self._is_e5_model}"
|
|
100
|
+
)
|
|
101
|
+
except Exception as e:
|
|
102
|
+
logger.error(f"Failed to load embedding model: {e}")
|
|
103
|
+
raise
|
|
97
104
|
|
|
98
105
|
@property
|
|
99
106
|
def dimension(self) -> int:
|
|
100
|
-
"""Get embedding dimension (
|
|
107
|
+
"""Get embedding dimension (must be loaded first)"""
|
|
101
108
|
if self._dimension is None:
|
|
102
|
-
|
|
109
|
+
raise RuntimeError(
|
|
110
|
+
"Embedding dimension not available. Model not loaded yet."
|
|
111
|
+
)
|
|
103
112
|
return self._dimension
|
|
104
113
|
|
|
105
114
|
def _prepare_texts(self, texts: List[str], is_query: bool = False) -> List[str]:
|
|
@@ -116,7 +125,7 @@ class EmbeddingService:
|
|
|
116
125
|
prefix = "query: " if is_query else "passage: "
|
|
117
126
|
return [prefix + text for text in texts]
|
|
118
127
|
|
|
119
|
-
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
128
|
+
async def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
120
129
|
"""
|
|
121
130
|
Generate embeddings for a list of texts (documents/passages).
|
|
122
131
|
|
|
@@ -129,11 +138,15 @@ class EmbeddingService:
|
|
|
129
138
|
if not texts:
|
|
130
139
|
return []
|
|
131
140
|
|
|
141
|
+
await self._ensure_model_loaded()
|
|
142
|
+
|
|
132
143
|
# Prepare texts with prefix if E5 model
|
|
133
144
|
prepared_texts = self._prepare_texts(texts, is_query=False)
|
|
134
145
|
|
|
135
146
|
try:
|
|
136
|
-
|
|
147
|
+
# Run in separate thread to avoid blocking event loop
|
|
148
|
+
embeddings = await asyncio.to_thread(
|
|
149
|
+
self._model.encode,
|
|
137
150
|
prepared_texts,
|
|
138
151
|
batch_size=self._config.batch_size,
|
|
139
152
|
show_progress_bar=len(texts) > 100,
|
|
@@ -145,7 +158,7 @@ class EmbeddingService:
|
|
|
145
158
|
logger.error(f"Failed to generate embeddings: {e}")
|
|
146
159
|
raise
|
|
147
160
|
|
|
148
|
-
def embed_query(self, query: str) -> List[float]:
|
|
161
|
+
async def embed_query(self, query: str) -> List[float]:
|
|
149
162
|
"""
|
|
150
163
|
Generate embedding for a single query.
|
|
151
164
|
|
|
@@ -160,11 +173,15 @@ class EmbeddingService:
|
|
|
160
173
|
if not query:
|
|
161
174
|
raise ValueError("Query cannot be empty")
|
|
162
175
|
|
|
176
|
+
await self._ensure_model_loaded()
|
|
177
|
+
|
|
163
178
|
# Prepare query with prefix if E5 model
|
|
164
179
|
prepared_query = self._prepare_texts([query], is_query=True)[0]
|
|
165
180
|
|
|
166
181
|
try:
|
|
167
|
-
|
|
182
|
+
# Run in separate thread to avoid blocking event loop
|
|
183
|
+
embedding = await asyncio.to_thread(
|
|
184
|
+
self._model.encode,
|
|
168
185
|
prepared_query,
|
|
169
186
|
convert_to_numpy=True,
|
|
170
187
|
normalize_embeddings=self._config.normalize_embeddings,
|
|
@@ -174,7 +191,7 @@ class EmbeddingService:
|
|
|
174
191
|
logger.error(f"Failed to generate query embedding: {e}")
|
|
175
192
|
raise
|
|
176
193
|
|
|
177
|
-
def embed_batch(
|
|
194
|
+
async def embed_batch(
|
|
178
195
|
self, texts: List[str], batch_size: Optional[int] = None
|
|
179
196
|
) -> List[List[float]]:
|
|
180
197
|
"""
|
|
@@ -190,11 +207,15 @@ class EmbeddingService:
|
|
|
190
207
|
if not texts:
|
|
191
208
|
return []
|
|
192
209
|
|
|
210
|
+
await self._ensure_model_loaded()
|
|
211
|
+
|
|
193
212
|
prepared_texts = self._prepare_texts(texts, is_query=False)
|
|
194
213
|
effective_batch_size = batch_size or self._config.batch_size
|
|
195
214
|
|
|
196
215
|
try:
|
|
197
|
-
|
|
216
|
+
# Run in separate thread to avoid blocking event loop
|
|
217
|
+
embeddings = await asyncio.to_thread(
|
|
218
|
+
self._model.encode,
|
|
198
219
|
prepared_texts,
|
|
199
220
|
batch_size=effective_batch_size,
|
|
200
221
|
show_progress_bar=True,
|
agent_server/core/rag_manager.py
CHANGED
|
@@ -88,13 +88,24 @@ class RAGManager:
|
|
|
88
88
|
self._client = self._create_qdrant_client()
|
|
89
89
|
logger.info("Qdrant client initialized")
|
|
90
90
|
|
|
91
|
-
# 2. Initialize embedding service
|
|
92
|
-
|
|
91
|
+
# 2. Initialize embedding service (local or vLLM backend)
|
|
92
|
+
import os
|
|
93
|
+
embedding_backend = os.environ.get("HDSP_EMBEDDING_BACKEND", "local").lower()
|
|
93
94
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
95
|
+
if embedding_backend == "vllm":
|
|
96
|
+
from agent_server.core.vllm_embedding_service import get_vllm_embedding_service
|
|
97
|
+
self._embedding_service = get_vllm_embedding_service(self._config.embedding)
|
|
98
|
+
logger.info(
|
|
99
|
+
f"vLLM Embedding service initialized (dim={self._embedding_service.dimension})"
|
|
100
|
+
)
|
|
101
|
+
else:
|
|
102
|
+
from agent_server.core.embedding_service import get_embedding_service
|
|
103
|
+
self._embedding_service = get_embedding_service(self._config.embedding)
|
|
104
|
+
# Load model to get dimension
|
|
105
|
+
await self._embedding_service._ensure_model_loaded()
|
|
106
|
+
logger.info(
|
|
107
|
+
f"Local Embedding service initialized (dim={self._embedding_service.dimension})"
|
|
108
|
+
)
|
|
98
109
|
|
|
99
110
|
# 3. Ensure collection exists
|
|
100
111
|
await self._ensure_collection()
|
|
@@ -151,26 +162,29 @@ class RAGManager:
|
|
|
151
162
|
)
|
|
152
163
|
|
|
153
164
|
cfg = self._config.qdrant
|
|
165
|
+
mode = cfg.get_mode() # Use get_mode() for env override
|
|
154
166
|
|
|
155
|
-
if
|
|
167
|
+
if mode == "local":
|
|
156
168
|
# Local file-based storage
|
|
157
169
|
local_path = cfg.get_local_path()
|
|
158
170
|
Path(local_path).mkdir(parents=True, exist_ok=True)
|
|
159
171
|
logger.info(f"Initializing Qdrant in local mode: {local_path}")
|
|
160
172
|
return QdrantClient(path=local_path)
|
|
161
173
|
|
|
162
|
-
elif
|
|
174
|
+
elif mode == "server":
|
|
163
175
|
# Docker or external server
|
|
164
|
-
|
|
165
|
-
|
|
176
|
+
url = cfg.get_url() # Use get_url() for env override
|
|
177
|
+
logger.info(f"Connecting to Qdrant server: {url}")
|
|
178
|
+
return QdrantClient(url=url)
|
|
166
179
|
|
|
167
|
-
elif
|
|
180
|
+
elif mode == "cloud":
|
|
168
181
|
# Qdrant Cloud
|
|
182
|
+
url = cfg.get_url() # Use get_url() for env override
|
|
169
183
|
logger.info("Connecting to Qdrant Cloud")
|
|
170
|
-
return QdrantClient(url=
|
|
184
|
+
return QdrantClient(url=url, api_key=cfg.api_key)
|
|
171
185
|
|
|
172
186
|
else:
|
|
173
|
-
raise ValueError(f"Unknown Qdrant mode: {
|
|
187
|
+
raise ValueError(f"Unknown Qdrant mode: {mode}")
|
|
174
188
|
|
|
175
189
|
async def _ensure_collection(self) -> None:
|
|
176
190
|
"""Create collection if it doesn't exist."""
|
|
@@ -274,7 +288,7 @@ class RAGManager:
|
|
|
274
288
|
)
|
|
275
289
|
|
|
276
290
|
if chunks:
|
|
277
|
-
self._index_chunks(chunks, file_path)
|
|
291
|
+
await self._index_chunks(chunks, file_path)
|
|
278
292
|
indexed += 1
|
|
279
293
|
self._index_stats["total_documents"] += 1
|
|
280
294
|
self._index_stats["total_chunks"] += len(chunks)
|
|
@@ -345,13 +359,13 @@ class RAGManager:
|
|
|
345
359
|
else:
|
|
346
360
|
return "general"
|
|
347
361
|
|
|
348
|
-
def _index_chunks(self, chunks: List[Dict], file_path: Path) -> None:
|
|
362
|
+
async def _index_chunks(self, chunks: List[Dict], file_path: Path) -> None:
|
|
349
363
|
"""Index document chunks to Qdrant."""
|
|
350
364
|
from qdrant_client.models import PointStruct
|
|
351
365
|
|
|
352
366
|
# Generate embeddings
|
|
353
367
|
texts = [c["content"] for c in chunks]
|
|
354
|
-
embeddings = self._embedding_service.embed_texts(texts)
|
|
368
|
+
embeddings = await self._embedding_service.embed_texts(texts)
|
|
355
369
|
|
|
356
370
|
# Add content hash to all chunks
|
|
357
371
|
file_hash = self._compute_file_hash(file_path)
|
|
@@ -430,7 +444,7 @@ class RAGManager:
|
|
|
430
444
|
)
|
|
431
445
|
|
|
432
446
|
if chunks:
|
|
433
|
-
self._index_chunks(chunks, file_path)
|
|
447
|
+
await self._index_chunks(chunks, file_path)
|
|
434
448
|
logger.info(f"Reindexed: {file_path}")
|
|
435
449
|
except Exception as e:
|
|
436
450
|
logger.error(f"Failed to reindex {file_path}: {e}")
|
agent_server/core/retriever.py
CHANGED
|
@@ -84,21 +84,23 @@ class Retriever:
|
|
|
84
84
|
effective_threshold = score_threshold or self._config.score_threshold
|
|
85
85
|
|
|
86
86
|
# Generate query embedding
|
|
87
|
-
query_embedding = self._embedding_service.embed_query(query)
|
|
87
|
+
query_embedding = await self._embedding_service.embed_query(query)
|
|
88
88
|
|
|
89
89
|
# Build filter condition
|
|
90
90
|
qdrant_filter = self._build_filter(filters) if filters else None
|
|
91
91
|
|
|
92
92
|
# Dense vector search
|
|
93
93
|
try:
|
|
94
|
-
|
|
94
|
+
response = self._client.query_points(
|
|
95
95
|
collection_name=self._config.qdrant.collection_name,
|
|
96
|
-
|
|
96
|
+
query=query_embedding,
|
|
97
97
|
query_filter=qdrant_filter,
|
|
98
98
|
limit=effective_top_k,
|
|
99
|
-
score_threshold=effective_threshold
|
|
100
|
-
|
|
99
|
+
score_threshold=effective_threshold * 0.5, # Lower for initial retrieval
|
|
100
|
+
with_payload=True,
|
|
101
|
+
with_vectors=False,
|
|
101
102
|
)
|
|
103
|
+
results = response.points
|
|
102
104
|
except Exception as e:
|
|
103
105
|
logger.error(f"Search failed: {e}")
|
|
104
106
|
return []
|
|
@@ -193,7 +195,7 @@ class Retriever:
|
|
|
193
195
|
effective_threshold = score_threshold or self._config.score_threshold
|
|
194
196
|
|
|
195
197
|
# Generate query embedding
|
|
196
|
-
query_embedding = self._embedding_service.embed_query(query)
|
|
198
|
+
query_embedding = await self._embedding_service.embed_query(query)
|
|
197
199
|
|
|
198
200
|
# Build filter condition
|
|
199
201
|
qdrant_filter = self._build_filter(filters) if filters else None
|
|
@@ -201,13 +203,16 @@ class Retriever:
|
|
|
201
203
|
# Vector search with timing
|
|
202
204
|
try:
|
|
203
205
|
# 디버그용으로 더 많은 결과 (3배)를 낮은 threshold로 가져옴
|
|
204
|
-
|
|
206
|
+
response = self._client.query_points(
|
|
205
207
|
collection_name=self._config.qdrant.collection_name,
|
|
206
|
-
|
|
208
|
+
query=query_embedding,
|
|
207
209
|
query_filter=qdrant_filter,
|
|
208
210
|
limit=effective_top_k * 3,
|
|
209
211
|
score_threshold=effective_threshold * 0.3,
|
|
212
|
+
with_payload=True,
|
|
213
|
+
with_vectors=False,
|
|
210
214
|
)
|
|
215
|
+
results = response.points
|
|
211
216
|
except Exception as e:
|
|
212
217
|
logger.error(f"Search failed: {e}")
|
|
213
218
|
return DebugSearchResult(
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
"""
|
|
2
|
+
vLLM Embedding Service - Remote embedding generation using vLLM server.
|
|
3
|
+
|
|
4
|
+
Features:
|
|
5
|
+
- GPU-accelerated embeddings via vLLM server
|
|
6
|
+
- OpenAI-compatible API interface
|
|
7
|
+
- Retry logic for reliability
|
|
8
|
+
- Support for large models (qwen3-embedding-8b, gte-Qwen2-7B, etc.)
|
|
9
|
+
|
|
10
|
+
Prerequisites:
|
|
11
|
+
- vLLM embedding server running (e.g., http://10.222.52.31:8000)
|
|
12
|
+
- Model loaded on vLLM server
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
import os
|
|
17
|
+
from typing import TYPE_CHECKING, List, Optional
|
|
18
|
+
|
|
19
|
+
import httpx
|
|
20
|
+
import time
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from hdsp_agent_core.models.rag import EmbeddingConfig
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class VLLMEmbeddingService:
|
|
29
|
+
"""
|
|
30
|
+
Remote embedding generation using vLLM server.
|
|
31
|
+
|
|
32
|
+
Design Principles:
|
|
33
|
+
- Stateless client (vLLM server holds the model)
|
|
34
|
+
- Retry logic for network resilience
|
|
35
|
+
- OpenAI-compatible API interface
|
|
36
|
+
|
|
37
|
+
Usage:
|
|
38
|
+
service = get_vllm_embedding_service()
|
|
39
|
+
embeddings = service.embed_texts(["text1", "text2"])
|
|
40
|
+
query_embedding = service.embed_query("search query")
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
_instance: Optional["VLLMEmbeddingService"] = None
|
|
44
|
+
_initialized: bool = False
|
|
45
|
+
|
|
46
|
+
def __new__(cls, *args, **kwargs):
|
|
47
|
+
if cls._instance is None:
|
|
48
|
+
cls._instance = super().__new__(cls)
|
|
49
|
+
return cls._instance
|
|
50
|
+
|
|
51
|
+
def __init__(self, config: Optional["EmbeddingConfig"] = None):
|
|
52
|
+
if self._initialized:
|
|
53
|
+
return
|
|
54
|
+
self._initialized = True
|
|
55
|
+
|
|
56
|
+
from hdsp_agent_core.models.rag import EmbeddingConfig
|
|
57
|
+
|
|
58
|
+
self._config = config or EmbeddingConfig()
|
|
59
|
+
|
|
60
|
+
# vLLM configuration from environment variables
|
|
61
|
+
self._endpoint = os.environ.get("HDSP_VLLM_ENDPOINT", "http://localhost:8000")
|
|
62
|
+
self._model = os.environ.get("HDSP_VLLM_MODEL", "qwen3-embedding-8b")
|
|
63
|
+
self._dimension = int(os.environ.get("HDSP_VLLM_DIMENSION", "8192"))
|
|
64
|
+
|
|
65
|
+
# HTTP client with retry
|
|
66
|
+
self._client = httpx.AsyncClient(
|
|
67
|
+
base_url=self._endpoint,
|
|
68
|
+
timeout=httpx.Timeout(30.0),
|
|
69
|
+
limits=httpx.Limits(max_keepalive_connections=5, max_connections=10)
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
logger.info(
|
|
73
|
+
f"vLLM Embedding Service initialized: "
|
|
74
|
+
f"endpoint={self._endpoint}, model={self._model}, dim={self._dimension}"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def dimension(self) -> int:
|
|
79
|
+
"""Get embedding dimension"""
|
|
80
|
+
return self._dimension
|
|
81
|
+
|
|
82
|
+
async def _call_vllm_api(self, texts: List[str], max_retries: int = 3) -> List[List[float]]:
|
|
83
|
+
"""
|
|
84
|
+
Call vLLM embedding API with retry logic.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
texts: List of text strings to embed
|
|
88
|
+
max_retries: Maximum number of retry attempts
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
List of embedding vectors
|
|
92
|
+
|
|
93
|
+
Raises:
|
|
94
|
+
Exception if all retries fail
|
|
95
|
+
"""
|
|
96
|
+
payload = {
|
|
97
|
+
"model": self._model,
|
|
98
|
+
"input": texts,
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
last_error = None
|
|
102
|
+
for attempt in range(max_retries):
|
|
103
|
+
try:
|
|
104
|
+
response = await self._client.post("/v1/embeddings", json=payload)
|
|
105
|
+
response.raise_for_status()
|
|
106
|
+
|
|
107
|
+
data = response.json()
|
|
108
|
+
# Sort by index to ensure correct order
|
|
109
|
+
sorted_items = sorted(data["data"], key=lambda x: x["index"])
|
|
110
|
+
embeddings = [item["embedding"] for item in sorted_items]
|
|
111
|
+
return embeddings
|
|
112
|
+
|
|
113
|
+
except httpx.HTTPStatusError as e:
|
|
114
|
+
last_error = e
|
|
115
|
+
logger.warning(
|
|
116
|
+
f"vLLM API HTTP error (attempt {attempt + 1}/{max_retries}): "
|
|
117
|
+
f"{e.response.status_code} - {e.response.text}"
|
|
118
|
+
)
|
|
119
|
+
except httpx.RequestError as e:
|
|
120
|
+
last_error = e
|
|
121
|
+
logger.warning(
|
|
122
|
+
f"vLLM API connection error (attempt {attempt + 1}/{max_retries}): {e}"
|
|
123
|
+
)
|
|
124
|
+
except Exception as e:
|
|
125
|
+
last_error = e
|
|
126
|
+
logger.error(f"Unexpected error calling vLLM API: {e}")
|
|
127
|
+
break
|
|
128
|
+
|
|
129
|
+
raise Exception(f"Failed to connect to vLLM after {max_retries} attempts: {last_error}")
|
|
130
|
+
|
|
131
|
+
async def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
132
|
+
"""
|
|
133
|
+
Generate embeddings for a list of texts (documents/passages).
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
texts: List of text strings to embed
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
List of embedding vectors (as lists of floats)
|
|
140
|
+
"""
|
|
141
|
+
if not texts:
|
|
142
|
+
return []
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
return await self._call_vllm_api(texts)
|
|
146
|
+
except Exception as e:
|
|
147
|
+
logger.error(f"Failed to generate embeddings via vLLM: {e}")
|
|
148
|
+
raise
|
|
149
|
+
|
|
150
|
+
async def embed_query(self, query: str) -> List[float]:
|
|
151
|
+
"""
|
|
152
|
+
Generate embedding for a single query.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
query: Query string
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
Embedding vector as list of floats
|
|
159
|
+
"""
|
|
160
|
+
if not query:
|
|
161
|
+
raise ValueError("Query cannot be empty")
|
|
162
|
+
|
|
163
|
+
try:
|
|
164
|
+
embeddings = await self._call_vllm_api([query])
|
|
165
|
+
return embeddings[0]
|
|
166
|
+
except Exception as e:
|
|
167
|
+
logger.error(f"Failed to generate query embedding via vLLM: {e}")
|
|
168
|
+
raise
|
|
169
|
+
|
|
170
|
+
async def embed_batch(
|
|
171
|
+
self, texts: List[str], batch_size: Optional[int] = None
|
|
172
|
+
) -> List[List[float]]:
|
|
173
|
+
"""
|
|
174
|
+
Generate embeddings with batching for large document sets.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
texts: List of text strings to embed
|
|
178
|
+
batch_size: Override default batch size (for vLLM, can handle large batches)
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
List of embedding vectors
|
|
182
|
+
"""
|
|
183
|
+
if not texts:
|
|
184
|
+
return []
|
|
185
|
+
|
|
186
|
+
# vLLM can handle large batches efficiently
|
|
187
|
+
effective_batch_size = batch_size or 100
|
|
188
|
+
all_embeddings = []
|
|
189
|
+
|
|
190
|
+
for i in range(0, len(texts), effective_batch_size):
|
|
191
|
+
batch = texts[i : i + effective_batch_size]
|
|
192
|
+
embeddings = await self._call_vllm_api(batch)
|
|
193
|
+
all_embeddings.extend(embeddings)
|
|
194
|
+
|
|
195
|
+
return all_embeddings
|
|
196
|
+
|
|
197
|
+
def get_model_info(self) -> dict:
|
|
198
|
+
"""Get information about the vLLM embedding service"""
|
|
199
|
+
return {
|
|
200
|
+
"backend": "vllm",
|
|
201
|
+
"endpoint": self._endpoint,
|
|
202
|
+
"model_name": self._model,
|
|
203
|
+
"dimension": self._dimension,
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
async def close(self):
|
|
207
|
+
"""Close HTTP client connection"""
|
|
208
|
+
await self._client.aclose()
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
# ============ Singleton Accessor ============
|
|
212
|
+
|
|
213
|
+
_vllm_embedding_service: Optional[VLLMEmbeddingService] = None
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def get_vllm_embedding_service(
|
|
217
|
+
config: Optional["EmbeddingConfig"] = None,
|
|
218
|
+
) -> VLLMEmbeddingService:
|
|
219
|
+
"""
|
|
220
|
+
Get the singleton VLLMEmbeddingService instance.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
config: Optional EmbeddingConfig (only used on first call)
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
VLLMEmbeddingService singleton instance
|
|
227
|
+
"""
|
|
228
|
+
global _vllm_embedding_service
|
|
229
|
+
if _vllm_embedding_service is None:
|
|
230
|
+
_vllm_embedding_service = VLLMEmbeddingService(config)
|
|
231
|
+
return _vllm_embedding_service
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def reset_vllm_embedding_service() -> None:
|
|
235
|
+
"""
|
|
236
|
+
Reset the singleton instance (for testing purposes).
|
|
237
|
+
"""
|
|
238
|
+
global _vllm_embedding_service
|
|
239
|
+
if _vllm_embedding_service is not None:
|
|
240
|
+
_vllm_embedding_service._initialized = False
|
|
241
|
+
_vllm_embedding_service = None
|
|
242
|
+
VLLMEmbeddingService._instance = None
|
|
243
|
+
VLLMEmbeddingService._initialized = False
|
agent_server/langchain/agent.py
CHANGED
|
@@ -23,12 +23,16 @@ from agent_server.langchain.prompts import (
|
|
|
23
23
|
)
|
|
24
24
|
from agent_server.langchain.tools import (
|
|
25
25
|
check_resource_tool,
|
|
26
|
+
diagnostics_tool,
|
|
27
|
+
edit_file_tool,
|
|
26
28
|
execute_command_tool,
|
|
27
29
|
final_answer_tool,
|
|
28
30
|
jupyter_cell_tool,
|
|
29
31
|
list_files_tool,
|
|
30
32
|
markdown_tool,
|
|
33
|
+
multiedit_file_tool,
|
|
31
34
|
read_file_tool,
|
|
35
|
+
references_tool,
|
|
32
36
|
search_notebook_cells_tool,
|
|
33
37
|
search_workspace_tool,
|
|
34
38
|
write_file_tool,
|
|
@@ -45,11 +49,15 @@ def _get_all_tools():
|
|
|
45
49
|
final_answer_tool,
|
|
46
50
|
read_file_tool,
|
|
47
51
|
write_file_tool,
|
|
52
|
+
edit_file_tool,
|
|
53
|
+
multiedit_file_tool,
|
|
48
54
|
list_files_tool,
|
|
49
55
|
search_workspace_tool,
|
|
50
56
|
search_notebook_cells_tool,
|
|
51
57
|
execute_command_tool,
|
|
52
58
|
check_resource_tool,
|
|
59
|
+
diagnostics_tool,
|
|
60
|
+
references_tool,
|
|
53
61
|
]
|
|
54
62
|
|
|
55
63
|
|