hdsp-jupyter-extension 2.0.7__py3-none-any.whl → 2.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_server/core/embedding_service.py +67 -46
- agent_server/core/rag_manager.py +40 -17
- agent_server/core/retriever.py +12 -6
- agent_server/core/vllm_embedding_service.py +246 -0
- agent_server/langchain/ARCHITECTURE.md +7 -51
- agent_server/langchain/agent.py +39 -20
- agent_server/langchain/custom_middleware.py +206 -62
- agent_server/langchain/hitl_config.py +6 -9
- agent_server/langchain/llm_factory.py +85 -1
- agent_server/langchain/logging_utils.py +52 -13
- agent_server/langchain/prompts.py +85 -45
- agent_server/langchain/tools/__init__.py +14 -10
- agent_server/langchain/tools/file_tools.py +266 -40
- agent_server/langchain/tools/file_utils.py +334 -0
- agent_server/langchain/tools/jupyter_tools.py +0 -1
- agent_server/langchain/tools/lsp_tools.py +264 -0
- agent_server/langchain/tools/resource_tools.py +12 -12
- agent_server/langchain/tools/search_tools.py +3 -158
- agent_server/main.py +7 -0
- agent_server/routers/langchain_agent.py +207 -102
- agent_server/routers/rag.py +8 -3
- hdsp_agent_core/models/rag.py +15 -1
- hdsp_agent_core/services/rag_service.py +6 -1
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +3 -2
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.4770ec0fb2d173b6deb4.js → hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js +251 -5
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js.map +1 -0
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.29cf4312af19e86f82af.js → hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.dc6434bee96ab03a0539.js +1831 -274
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.dc6434bee96ab03a0539.js.map +1 -0
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.61343eb4cf0577e74b50.js → hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.4a252df3ade74efee8d6.js +11 -9
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.4a252df3ade74efee8d6.js.map +1 -0
- jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js → hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +2 -209
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +1 -0
- jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js → hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +209 -2
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +1 -0
- jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js → hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +212 -3
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +1 -0
- {hdsp_jupyter_extension-2.0.7.dist-info → hdsp_jupyter_extension-2.0.10.dist-info}/METADATA +1 -3
- hdsp_jupyter_extension-2.0.10.dist-info/RECORD +144 -0
- jupyter_ext/__init__.py +18 -0
- jupyter_ext/_version.py +1 -1
- jupyter_ext/handlers.py +176 -1
- jupyter_ext/labextension/build_log.json +1 -1
- jupyter_ext/labextension/package.json +3 -2
- jupyter_ext/labextension/static/{frontend_styles_index_js.4770ec0fb2d173b6deb4.js → frontend_styles_index_js.2d9fb488c82498c45c2d.js} +251 -5
- jupyter_ext/labextension/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js.map +1 -0
- jupyter_ext/labextension/static/{lib_index_js.29cf4312af19e86f82af.js → lib_index_js.dc6434bee96ab03a0539.js} +1831 -274
- jupyter_ext/labextension/static/lib_index_js.dc6434bee96ab03a0539.js.map +1 -0
- jupyter_ext/labextension/static/{remoteEntry.61343eb4cf0577e74b50.js → remoteEntry.4a252df3ade74efee8d6.js} +11 -9
- jupyter_ext/labextension/static/remoteEntry.4a252df3ade74efee8d6.js.map +1 -0
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js → jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +2 -209
- jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +1 -0
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js → jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +209 -2
- jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +1 -0
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js → jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +212 -3
- jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +1 -0
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.4770ec0fb2d173b6deb4.js.map +0 -1
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.29cf4312af19e86f82af.js.map +0 -1
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.61343eb4cf0577e74b50.js.map +0 -1
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js.map +0 -1
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js.map +0 -1
- hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js.map +0 -1
- hdsp_jupyter_extension-2.0.7.dist-info/RECORD +0 -141
- jupyter_ext/labextension/static/frontend_styles_index_js.4770ec0fb2d173b6deb4.js.map +0 -1
- jupyter_ext/labextension/static/lib_index_js.29cf4312af19e86f82af.js.map +0 -1
- jupyter_ext/labextension/static/remoteEntry.61343eb4cf0577e74b50.js.map +0 -1
- jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js.map +0 -1
- jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js.map +0 -1
- jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js.map +0 -1
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
- {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
- {hdsp_jupyter_extension-2.0.7.dist-info → hdsp_jupyter_extension-2.0.10.dist-info}/WHEEL +0 -0
- {hdsp_jupyter_extension-2.0.7.dist-info → hdsp_jupyter_extension-2.0.10.dist-info}/licenses/LICENSE +0 -0
|
@@ -4,13 +4,14 @@ Local Embedding Service - Wraps sentence-transformers for local embedding genera
|
|
|
4
4
|
Features:
|
|
5
5
|
- Zero external API calls (data sovereignty)
|
|
6
6
|
- Lazy model loading (only when first needed)
|
|
7
|
-
- Thread-safe singleton pattern
|
|
7
|
+
- Thread-safe singleton pattern with async support
|
|
8
8
|
- Configurable model and device
|
|
9
9
|
- E5 model prefix handling for optimal performance
|
|
10
10
|
|
|
11
11
|
Default model: intfloat/multilingual-e5-small (384 dimensions, Korean support)
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
|
+
import asyncio
|
|
14
15
|
import logging
|
|
15
16
|
from typing import TYPE_CHECKING, List, Optional
|
|
16
17
|
|
|
@@ -55,51 +56,59 @@ class EmbeddingService:
|
|
|
55
56
|
self._model = None
|
|
56
57
|
self._dimension: Optional[int] = None
|
|
57
58
|
self._is_e5_model: bool = False
|
|
59
|
+
self._load_lock = asyncio.Lock() # Thread-safe lazy loading
|
|
58
60
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
self._load_model()
|
|
64
|
-
return self._model
|
|
65
|
-
|
|
66
|
-
def _load_model(self) -> None:
|
|
67
|
-
"""Load the sentence-transformers model"""
|
|
68
|
-
try:
|
|
69
|
-
from sentence_transformers import SentenceTransformer
|
|
70
|
-
except ImportError:
|
|
71
|
-
raise ImportError(
|
|
72
|
-
"sentence-transformers is required for RAG. "
|
|
73
|
-
"Install with: pip install sentence-transformers"
|
|
74
|
-
)
|
|
75
|
-
|
|
76
|
-
model_name = self._config.get_model_name()
|
|
77
|
-
device = self._config.get_device()
|
|
78
|
-
|
|
79
|
-
logger.info(f"Loading embedding model: {model_name} on {device}")
|
|
80
|
-
|
|
81
|
-
try:
|
|
82
|
-
self._model = SentenceTransformer(
|
|
83
|
-
model_name, device=device, cache_folder=self._config.cache_folder
|
|
84
|
-
)
|
|
85
|
-
self._dimension = self._model.get_sentence_embedding_dimension()
|
|
86
|
-
|
|
87
|
-
# Check if E5 model (requires special prefix)
|
|
88
|
-
self._is_e5_model = "e5" in model_name.lower()
|
|
61
|
+
async def _ensure_model_loaded(self):
|
|
62
|
+
"""Lazy load the embedding model (thread-safe, async)"""
|
|
63
|
+
if self._model is not None:
|
|
64
|
+
return
|
|
89
65
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
66
|
+
async with self._load_lock:
|
|
67
|
+
# Double-check after acquiring lock
|
|
68
|
+
if self._model is not None:
|
|
69
|
+
return
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
from sentence_transformers import SentenceTransformer
|
|
73
|
+
except ImportError:
|
|
74
|
+
raise ImportError(
|
|
75
|
+
"sentence-transformers is required for RAG. "
|
|
76
|
+
"Install with: pip install sentence-transformers"
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
model_name = self._config.get_model_name()
|
|
80
|
+
device = self._config.get_device()
|
|
81
|
+
|
|
82
|
+
logger.info(f"Loading embedding model: {model_name} on {device}")
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
# Load model in separate thread to avoid blocking event loop
|
|
86
|
+
self._model = await asyncio.to_thread(
|
|
87
|
+
SentenceTransformer,
|
|
88
|
+
model_name,
|
|
89
|
+
device=device,
|
|
90
|
+
cache_folder=self._config.cache_folder,
|
|
91
|
+
)
|
|
92
|
+
self._dimension = self._model.get_sentence_embedding_dimension()
|
|
93
|
+
|
|
94
|
+
# Check if E5 model (requires special prefix)
|
|
95
|
+
self._is_e5_model = "e5" in model_name.lower()
|
|
96
|
+
|
|
97
|
+
logger.info(
|
|
98
|
+
f"Embedding model loaded successfully. "
|
|
99
|
+
f"Dimension: {self._dimension}, E5 model: {self._is_e5_model}"
|
|
100
|
+
)
|
|
101
|
+
except Exception as e:
|
|
102
|
+
logger.error(f"Failed to load embedding model: {e}")
|
|
103
|
+
raise
|
|
97
104
|
|
|
98
105
|
@property
|
|
99
106
|
def dimension(self) -> int:
|
|
100
|
-
"""Get embedding dimension (
|
|
107
|
+
"""Get embedding dimension (must be loaded first)"""
|
|
101
108
|
if self._dimension is None:
|
|
102
|
-
|
|
109
|
+
raise RuntimeError(
|
|
110
|
+
"Embedding dimension not available. Model not loaded yet."
|
|
111
|
+
)
|
|
103
112
|
return self._dimension
|
|
104
113
|
|
|
105
114
|
def _prepare_texts(self, texts: List[str], is_query: bool = False) -> List[str]:
|
|
@@ -116,7 +125,7 @@ class EmbeddingService:
|
|
|
116
125
|
prefix = "query: " if is_query else "passage: "
|
|
117
126
|
return [prefix + text for text in texts]
|
|
118
127
|
|
|
119
|
-
def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
128
|
+
async def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
120
129
|
"""
|
|
121
130
|
Generate embeddings for a list of texts (documents/passages).
|
|
122
131
|
|
|
@@ -129,11 +138,15 @@ class EmbeddingService:
|
|
|
129
138
|
if not texts:
|
|
130
139
|
return []
|
|
131
140
|
|
|
141
|
+
await self._ensure_model_loaded()
|
|
142
|
+
|
|
132
143
|
# Prepare texts with prefix if E5 model
|
|
133
144
|
prepared_texts = self._prepare_texts(texts, is_query=False)
|
|
134
145
|
|
|
135
146
|
try:
|
|
136
|
-
|
|
147
|
+
# Run in separate thread to avoid blocking event loop
|
|
148
|
+
embeddings = await asyncio.to_thread(
|
|
149
|
+
self._model.encode,
|
|
137
150
|
prepared_texts,
|
|
138
151
|
batch_size=self._config.batch_size,
|
|
139
152
|
show_progress_bar=len(texts) > 100,
|
|
@@ -145,7 +158,7 @@ class EmbeddingService:
|
|
|
145
158
|
logger.error(f"Failed to generate embeddings: {e}")
|
|
146
159
|
raise
|
|
147
160
|
|
|
148
|
-
def embed_query(self, query: str) -> List[float]:
|
|
161
|
+
async def embed_query(self, query: str) -> List[float]:
|
|
149
162
|
"""
|
|
150
163
|
Generate embedding for a single query.
|
|
151
164
|
|
|
@@ -160,11 +173,15 @@ class EmbeddingService:
|
|
|
160
173
|
if not query:
|
|
161
174
|
raise ValueError("Query cannot be empty")
|
|
162
175
|
|
|
176
|
+
await self._ensure_model_loaded()
|
|
177
|
+
|
|
163
178
|
# Prepare query with prefix if E5 model
|
|
164
179
|
prepared_query = self._prepare_texts([query], is_query=True)[0]
|
|
165
180
|
|
|
166
181
|
try:
|
|
167
|
-
|
|
182
|
+
# Run in separate thread to avoid blocking event loop
|
|
183
|
+
embedding = await asyncio.to_thread(
|
|
184
|
+
self._model.encode,
|
|
168
185
|
prepared_query,
|
|
169
186
|
convert_to_numpy=True,
|
|
170
187
|
normalize_embeddings=self._config.normalize_embeddings,
|
|
@@ -174,7 +191,7 @@ class EmbeddingService:
|
|
|
174
191
|
logger.error(f"Failed to generate query embedding: {e}")
|
|
175
192
|
raise
|
|
176
193
|
|
|
177
|
-
def embed_batch(
|
|
194
|
+
async def embed_batch(
|
|
178
195
|
self, texts: List[str], batch_size: Optional[int] = None
|
|
179
196
|
) -> List[List[float]]:
|
|
180
197
|
"""
|
|
@@ -190,11 +207,15 @@ class EmbeddingService:
|
|
|
190
207
|
if not texts:
|
|
191
208
|
return []
|
|
192
209
|
|
|
210
|
+
await self._ensure_model_loaded()
|
|
211
|
+
|
|
193
212
|
prepared_texts = self._prepare_texts(texts, is_query=False)
|
|
194
213
|
effective_batch_size = batch_size or self._config.batch_size
|
|
195
214
|
|
|
196
215
|
try:
|
|
197
|
-
|
|
216
|
+
# Run in separate thread to avoid blocking event loop
|
|
217
|
+
embeddings = await asyncio.to_thread(
|
|
218
|
+
self._model.encode,
|
|
198
219
|
prepared_texts,
|
|
199
220
|
batch_size=effective_batch_size,
|
|
200
221
|
show_progress_bar=True,
|
agent_server/core/rag_manager.py
CHANGED
|
@@ -88,13 +88,33 @@ class RAGManager:
|
|
|
88
88
|
self._client = self._create_qdrant_client()
|
|
89
89
|
logger.info("Qdrant client initialized")
|
|
90
90
|
|
|
91
|
-
# 2. Initialize embedding service
|
|
92
|
-
|
|
91
|
+
# 2. Initialize embedding service (local or vLLM backend)
|
|
92
|
+
import os
|
|
93
93
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
94
|
+
embedding_backend = os.environ.get(
|
|
95
|
+
"HDSP_EMBEDDING_BACKEND", "local"
|
|
96
|
+
).lower()
|
|
97
|
+
|
|
98
|
+
if embedding_backend == "vllm":
|
|
99
|
+
from agent_server.core.vllm_embedding_service import (
|
|
100
|
+
get_vllm_embedding_service,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
self._embedding_service = get_vllm_embedding_service(
|
|
104
|
+
self._config.embedding
|
|
105
|
+
)
|
|
106
|
+
logger.info(
|
|
107
|
+
f"vLLM Embedding service initialized (dim={self._embedding_service.dimension})"
|
|
108
|
+
)
|
|
109
|
+
else:
|
|
110
|
+
from agent_server.core.embedding_service import get_embedding_service
|
|
111
|
+
|
|
112
|
+
self._embedding_service = get_embedding_service(self._config.embedding)
|
|
113
|
+
# Load model to get dimension
|
|
114
|
+
await self._embedding_service._ensure_model_loaded()
|
|
115
|
+
logger.info(
|
|
116
|
+
f"Local Embedding service initialized (dim={self._embedding_service.dimension})"
|
|
117
|
+
)
|
|
98
118
|
|
|
99
119
|
# 3. Ensure collection exists
|
|
100
120
|
await self._ensure_collection()
|
|
@@ -151,26 +171,29 @@ class RAGManager:
|
|
|
151
171
|
)
|
|
152
172
|
|
|
153
173
|
cfg = self._config.qdrant
|
|
174
|
+
mode = cfg.get_mode() # Use get_mode() for env override
|
|
154
175
|
|
|
155
|
-
if
|
|
176
|
+
if mode == "local":
|
|
156
177
|
# Local file-based storage
|
|
157
178
|
local_path = cfg.get_local_path()
|
|
158
179
|
Path(local_path).mkdir(parents=True, exist_ok=True)
|
|
159
180
|
logger.info(f"Initializing Qdrant in local mode: {local_path}")
|
|
160
181
|
return QdrantClient(path=local_path)
|
|
161
182
|
|
|
162
|
-
elif
|
|
183
|
+
elif mode == "server":
|
|
163
184
|
# Docker or external server
|
|
164
|
-
|
|
165
|
-
|
|
185
|
+
url = cfg.get_url() # Use get_url() for env override
|
|
186
|
+
logger.info(f"Connecting to Qdrant server: {url}")
|
|
187
|
+
return QdrantClient(url=url)
|
|
166
188
|
|
|
167
|
-
elif
|
|
189
|
+
elif mode == "cloud":
|
|
168
190
|
# Qdrant Cloud
|
|
191
|
+
url = cfg.get_url() # Use get_url() for env override
|
|
169
192
|
logger.info("Connecting to Qdrant Cloud")
|
|
170
|
-
return QdrantClient(url=
|
|
193
|
+
return QdrantClient(url=url, api_key=cfg.api_key)
|
|
171
194
|
|
|
172
195
|
else:
|
|
173
|
-
raise ValueError(f"Unknown Qdrant mode: {
|
|
196
|
+
raise ValueError(f"Unknown Qdrant mode: {mode}")
|
|
174
197
|
|
|
175
198
|
async def _ensure_collection(self) -> None:
|
|
176
199
|
"""Create collection if it doesn't exist."""
|
|
@@ -274,7 +297,7 @@ class RAGManager:
|
|
|
274
297
|
)
|
|
275
298
|
|
|
276
299
|
if chunks:
|
|
277
|
-
self._index_chunks(chunks, file_path)
|
|
300
|
+
await self._index_chunks(chunks, file_path)
|
|
278
301
|
indexed += 1
|
|
279
302
|
self._index_stats["total_documents"] += 1
|
|
280
303
|
self._index_stats["total_chunks"] += len(chunks)
|
|
@@ -345,13 +368,13 @@ class RAGManager:
|
|
|
345
368
|
else:
|
|
346
369
|
return "general"
|
|
347
370
|
|
|
348
|
-
def _index_chunks(self, chunks: List[Dict], file_path: Path) -> None:
|
|
371
|
+
async def _index_chunks(self, chunks: List[Dict], file_path: Path) -> None:
|
|
349
372
|
"""Index document chunks to Qdrant."""
|
|
350
373
|
from qdrant_client.models import PointStruct
|
|
351
374
|
|
|
352
375
|
# Generate embeddings
|
|
353
376
|
texts = [c["content"] for c in chunks]
|
|
354
|
-
embeddings = self._embedding_service.embed_texts(texts)
|
|
377
|
+
embeddings = await self._embedding_service.embed_texts(texts)
|
|
355
378
|
|
|
356
379
|
# Add content hash to all chunks
|
|
357
380
|
file_hash = self._compute_file_hash(file_path)
|
|
@@ -430,7 +453,7 @@ class RAGManager:
|
|
|
430
453
|
)
|
|
431
454
|
|
|
432
455
|
if chunks:
|
|
433
|
-
self._index_chunks(chunks, file_path)
|
|
456
|
+
await self._index_chunks(chunks, file_path)
|
|
434
457
|
logger.info(f"Reindexed: {file_path}")
|
|
435
458
|
except Exception as e:
|
|
436
459
|
logger.error(f"Failed to reindex {file_path}: {e}")
|
agent_server/core/retriever.py
CHANGED
|
@@ -84,21 +84,24 @@ class Retriever:
|
|
|
84
84
|
effective_threshold = score_threshold or self._config.score_threshold
|
|
85
85
|
|
|
86
86
|
# Generate query embedding
|
|
87
|
-
query_embedding = self._embedding_service.embed_query(query)
|
|
87
|
+
query_embedding = await self._embedding_service.embed_query(query)
|
|
88
88
|
|
|
89
89
|
# Build filter condition
|
|
90
90
|
qdrant_filter = self._build_filter(filters) if filters else None
|
|
91
91
|
|
|
92
92
|
# Dense vector search
|
|
93
93
|
try:
|
|
94
|
-
|
|
94
|
+
response = self._client.query_points(
|
|
95
95
|
collection_name=self._config.qdrant.collection_name,
|
|
96
|
-
|
|
96
|
+
query=query_embedding,
|
|
97
97
|
query_filter=qdrant_filter,
|
|
98
98
|
limit=effective_top_k,
|
|
99
99
|
score_threshold=effective_threshold
|
|
100
100
|
* 0.5, # Lower for initial retrieval
|
|
101
|
+
with_payload=True,
|
|
102
|
+
with_vectors=False,
|
|
101
103
|
)
|
|
104
|
+
results = response.points
|
|
102
105
|
except Exception as e:
|
|
103
106
|
logger.error(f"Search failed: {e}")
|
|
104
107
|
return []
|
|
@@ -193,7 +196,7 @@ class Retriever:
|
|
|
193
196
|
effective_threshold = score_threshold or self._config.score_threshold
|
|
194
197
|
|
|
195
198
|
# Generate query embedding
|
|
196
|
-
query_embedding = self._embedding_service.embed_query(query)
|
|
199
|
+
query_embedding = await self._embedding_service.embed_query(query)
|
|
197
200
|
|
|
198
201
|
# Build filter condition
|
|
199
202
|
qdrant_filter = self._build_filter(filters) if filters else None
|
|
@@ -201,13 +204,16 @@ class Retriever:
|
|
|
201
204
|
# Vector search with timing
|
|
202
205
|
try:
|
|
203
206
|
# 디버그용으로 더 많은 결과 (3배)를 낮은 threshold로 가져옴
|
|
204
|
-
|
|
207
|
+
response = self._client.query_points(
|
|
205
208
|
collection_name=self._config.qdrant.collection_name,
|
|
206
|
-
|
|
209
|
+
query=query_embedding,
|
|
207
210
|
query_filter=qdrant_filter,
|
|
208
211
|
limit=effective_top_k * 3,
|
|
209
212
|
score_threshold=effective_threshold * 0.3,
|
|
213
|
+
with_payload=True,
|
|
214
|
+
with_vectors=False,
|
|
210
215
|
)
|
|
216
|
+
results = response.points
|
|
211
217
|
except Exception as e:
|
|
212
218
|
logger.error(f"Search failed: {e}")
|
|
213
219
|
return DebugSearchResult(
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
"""
|
|
2
|
+
vLLM Embedding Service - Remote embedding generation using vLLM server.
|
|
3
|
+
|
|
4
|
+
Features:
|
|
5
|
+
- GPU-accelerated embeddings via vLLM server
|
|
6
|
+
- OpenAI-compatible API interface
|
|
7
|
+
- Retry logic for reliability
|
|
8
|
+
- Support for large models (qwen3-embedding-8b, gte-Qwen2-7B, etc.)
|
|
9
|
+
|
|
10
|
+
Prerequisites:
|
|
11
|
+
- vLLM embedding server running (e.g., http://10.222.52.31:8000)
|
|
12
|
+
- Model loaded on vLLM server
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
import os
|
|
17
|
+
from typing import TYPE_CHECKING, List, Optional
|
|
18
|
+
|
|
19
|
+
import httpx
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from hdsp_agent_core.models.rag import EmbeddingConfig
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class VLLMEmbeddingService:
|
|
28
|
+
"""
|
|
29
|
+
Remote embedding generation using vLLM server.
|
|
30
|
+
|
|
31
|
+
Design Principles:
|
|
32
|
+
- Stateless client (vLLM server holds the model)
|
|
33
|
+
- Retry logic for network resilience
|
|
34
|
+
- OpenAI-compatible API interface
|
|
35
|
+
|
|
36
|
+
Usage:
|
|
37
|
+
service = get_vllm_embedding_service()
|
|
38
|
+
embeddings = service.embed_texts(["text1", "text2"])
|
|
39
|
+
query_embedding = service.embed_query("search query")
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
_instance: Optional["VLLMEmbeddingService"] = None
|
|
43
|
+
_initialized: bool = False
|
|
44
|
+
|
|
45
|
+
def __new__(cls, *args, **kwargs):
|
|
46
|
+
if cls._instance is None:
|
|
47
|
+
cls._instance = super().__new__(cls)
|
|
48
|
+
return cls._instance
|
|
49
|
+
|
|
50
|
+
def __init__(self, config: Optional["EmbeddingConfig"] = None):
|
|
51
|
+
if self._initialized:
|
|
52
|
+
return
|
|
53
|
+
self._initialized = True
|
|
54
|
+
|
|
55
|
+
from hdsp_agent_core.models.rag import EmbeddingConfig
|
|
56
|
+
|
|
57
|
+
self._config = config or EmbeddingConfig()
|
|
58
|
+
|
|
59
|
+
# vLLM configuration from environment variables
|
|
60
|
+
self._endpoint = os.environ.get("HDSP_VLLM_ENDPOINT", "http://localhost:8000")
|
|
61
|
+
self._model = os.environ.get("HDSP_VLLM_MODEL", "qwen3-embedding-8b")
|
|
62
|
+
self._dimension = int(os.environ.get("HDSP_VLLM_DIMENSION", "8192"))
|
|
63
|
+
|
|
64
|
+
# HTTP client with retry
|
|
65
|
+
self._client = httpx.AsyncClient(
|
|
66
|
+
base_url=self._endpoint,
|
|
67
|
+
timeout=httpx.Timeout(30.0),
|
|
68
|
+
limits=httpx.Limits(max_keepalive_connections=5, max_connections=10),
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
logger.info(
|
|
72
|
+
f"vLLM Embedding Service initialized: "
|
|
73
|
+
f"endpoint={self._endpoint}, model={self._model}, dim={self._dimension}"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def dimension(self) -> int:
|
|
78
|
+
"""Get embedding dimension"""
|
|
79
|
+
return self._dimension
|
|
80
|
+
|
|
81
|
+
async def _call_vllm_api(
|
|
82
|
+
self, texts: List[str], max_retries: int = 3
|
|
83
|
+
) -> List[List[float]]:
|
|
84
|
+
"""
|
|
85
|
+
Call vLLM embedding API with retry logic.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
texts: List of text strings to embed
|
|
89
|
+
max_retries: Maximum number of retry attempts
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
List of embedding vectors
|
|
93
|
+
|
|
94
|
+
Raises:
|
|
95
|
+
Exception if all retries fail
|
|
96
|
+
"""
|
|
97
|
+
payload = {
|
|
98
|
+
"model": self._model,
|
|
99
|
+
"input": texts,
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
last_error = None
|
|
103
|
+
for attempt in range(max_retries):
|
|
104
|
+
try:
|
|
105
|
+
response = await self._client.post("/v1/embeddings", json=payload)
|
|
106
|
+
response.raise_for_status()
|
|
107
|
+
|
|
108
|
+
data = response.json()
|
|
109
|
+
# Sort by index to ensure correct order
|
|
110
|
+
sorted_items = sorted(data["data"], key=lambda x: x["index"])
|
|
111
|
+
embeddings = [item["embedding"] for item in sorted_items]
|
|
112
|
+
return embeddings
|
|
113
|
+
|
|
114
|
+
except httpx.HTTPStatusError as e:
|
|
115
|
+
last_error = e
|
|
116
|
+
logger.warning(
|
|
117
|
+
f"vLLM API HTTP error (attempt {attempt + 1}/{max_retries}): "
|
|
118
|
+
f"{e.response.status_code} - {e.response.text}"
|
|
119
|
+
)
|
|
120
|
+
except httpx.RequestError as e:
|
|
121
|
+
last_error = e
|
|
122
|
+
logger.warning(
|
|
123
|
+
f"vLLM API connection error (attempt {attempt + 1}/{max_retries}): {e}"
|
|
124
|
+
)
|
|
125
|
+
except Exception as e:
|
|
126
|
+
last_error = e
|
|
127
|
+
logger.error(f"Unexpected error calling vLLM API: {e}")
|
|
128
|
+
break
|
|
129
|
+
|
|
130
|
+
raise Exception(
|
|
131
|
+
f"Failed to connect to vLLM after {max_retries} attempts: {last_error}"
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
async def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
|
135
|
+
"""
|
|
136
|
+
Generate embeddings for a list of texts (documents/passages).
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
texts: List of text strings to embed
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
List of embedding vectors (as lists of floats)
|
|
143
|
+
"""
|
|
144
|
+
if not texts:
|
|
145
|
+
return []
|
|
146
|
+
|
|
147
|
+
try:
|
|
148
|
+
return await self._call_vllm_api(texts)
|
|
149
|
+
except Exception as e:
|
|
150
|
+
logger.error(f"Failed to generate embeddings via vLLM: {e}")
|
|
151
|
+
raise
|
|
152
|
+
|
|
153
|
+
async def embed_query(self, query: str) -> List[float]:
|
|
154
|
+
"""
|
|
155
|
+
Generate embedding for a single query.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
query: Query string
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Embedding vector as list of floats
|
|
162
|
+
"""
|
|
163
|
+
if not query:
|
|
164
|
+
raise ValueError("Query cannot be empty")
|
|
165
|
+
|
|
166
|
+
try:
|
|
167
|
+
embeddings = await self._call_vllm_api([query])
|
|
168
|
+
return embeddings[0]
|
|
169
|
+
except Exception as e:
|
|
170
|
+
logger.error(f"Failed to generate query embedding via vLLM: {e}")
|
|
171
|
+
raise
|
|
172
|
+
|
|
173
|
+
async def embed_batch(
|
|
174
|
+
self, texts: List[str], batch_size: Optional[int] = None
|
|
175
|
+
) -> List[List[float]]:
|
|
176
|
+
"""
|
|
177
|
+
Generate embeddings with batching for large document sets.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
texts: List of text strings to embed
|
|
181
|
+
batch_size: Override default batch size (for vLLM, can handle large batches)
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
List of embedding vectors
|
|
185
|
+
"""
|
|
186
|
+
if not texts:
|
|
187
|
+
return []
|
|
188
|
+
|
|
189
|
+
# vLLM can handle large batches efficiently
|
|
190
|
+
effective_batch_size = batch_size or 100
|
|
191
|
+
all_embeddings = []
|
|
192
|
+
|
|
193
|
+
for i in range(0, len(texts), effective_batch_size):
|
|
194
|
+
batch = texts[i : i + effective_batch_size]
|
|
195
|
+
embeddings = await self._call_vllm_api(batch)
|
|
196
|
+
all_embeddings.extend(embeddings)
|
|
197
|
+
|
|
198
|
+
return all_embeddings
|
|
199
|
+
|
|
200
|
+
def get_model_info(self) -> dict:
|
|
201
|
+
"""Get information about the vLLM embedding service"""
|
|
202
|
+
return {
|
|
203
|
+
"backend": "vllm",
|
|
204
|
+
"endpoint": self._endpoint,
|
|
205
|
+
"model_name": self._model,
|
|
206
|
+
"dimension": self._dimension,
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
async def close(self):
|
|
210
|
+
"""Close HTTP client connection"""
|
|
211
|
+
await self._client.aclose()
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
# ============ Singleton Accessor ============
|
|
215
|
+
|
|
216
|
+
_vllm_embedding_service: Optional[VLLMEmbeddingService] = None
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def get_vllm_embedding_service(
|
|
220
|
+
config: Optional["EmbeddingConfig"] = None,
|
|
221
|
+
) -> VLLMEmbeddingService:
|
|
222
|
+
"""
|
|
223
|
+
Get the singleton VLLMEmbeddingService instance.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
config: Optional EmbeddingConfig (only used on first call)
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
VLLMEmbeddingService singleton instance
|
|
230
|
+
"""
|
|
231
|
+
global _vllm_embedding_service
|
|
232
|
+
if _vllm_embedding_service is None:
|
|
233
|
+
_vllm_embedding_service = VLLMEmbeddingService(config)
|
|
234
|
+
return _vllm_embedding_service
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def reset_vllm_embedding_service() -> None:
|
|
238
|
+
"""
|
|
239
|
+
Reset the singleton instance (for testing purposes).
|
|
240
|
+
"""
|
|
241
|
+
global _vllm_embedding_service
|
|
242
|
+
if _vllm_embedding_service is not None:
|
|
243
|
+
_vllm_embedding_service._initialized = False
|
|
244
|
+
_vllm_embedding_service = None
|
|
245
|
+
VLLMEmbeddingService._instance = None
|
|
246
|
+
VLLMEmbeddingService._initialized = False
|