hdsp-jupyter-extension 2.0.7__py3-none-any.whl → 2.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. agent_server/core/embedding_service.py +67 -46
  2. agent_server/core/rag_manager.py +40 -17
  3. agent_server/core/retriever.py +12 -6
  4. agent_server/core/vllm_embedding_service.py +246 -0
  5. agent_server/langchain/ARCHITECTURE.md +7 -51
  6. agent_server/langchain/agent.py +39 -20
  7. agent_server/langchain/custom_middleware.py +206 -62
  8. agent_server/langchain/hitl_config.py +6 -9
  9. agent_server/langchain/llm_factory.py +85 -1
  10. agent_server/langchain/logging_utils.py +52 -13
  11. agent_server/langchain/prompts.py +85 -45
  12. agent_server/langchain/tools/__init__.py +14 -10
  13. agent_server/langchain/tools/file_tools.py +266 -40
  14. agent_server/langchain/tools/file_utils.py +334 -0
  15. agent_server/langchain/tools/jupyter_tools.py +0 -1
  16. agent_server/langchain/tools/lsp_tools.py +264 -0
  17. agent_server/langchain/tools/resource_tools.py +12 -12
  18. agent_server/langchain/tools/search_tools.py +3 -158
  19. agent_server/main.py +7 -0
  20. agent_server/routers/langchain_agent.py +207 -102
  21. agent_server/routers/rag.py +8 -3
  22. hdsp_agent_core/models/rag.py +15 -1
  23. hdsp_agent_core/services/rag_service.py +6 -1
  24. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
  25. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +3 -2
  26. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.4770ec0fb2d173b6deb4.js → hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js +251 -5
  27. hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js.map +1 -0
  28. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.29cf4312af19e86f82af.js → hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.dc6434bee96ab03a0539.js +1831 -274
  29. hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.dc6434bee96ab03a0539.js.map +1 -0
  30. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.61343eb4cf0577e74b50.js → hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.4a252df3ade74efee8d6.js +11 -9
  31. hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.4a252df3ade74efee8d6.js.map +1 -0
  32. jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js → hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +2 -209
  33. hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +1 -0
  34. jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js → hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +209 -2
  35. hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +1 -0
  36. jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js → hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +212 -3
  37. hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +1 -0
  38. {hdsp_jupyter_extension-2.0.7.dist-info → hdsp_jupyter_extension-2.0.10.dist-info}/METADATA +1 -3
  39. hdsp_jupyter_extension-2.0.10.dist-info/RECORD +144 -0
  40. jupyter_ext/__init__.py +18 -0
  41. jupyter_ext/_version.py +1 -1
  42. jupyter_ext/handlers.py +176 -1
  43. jupyter_ext/labextension/build_log.json +1 -1
  44. jupyter_ext/labextension/package.json +3 -2
  45. jupyter_ext/labextension/static/{frontend_styles_index_js.4770ec0fb2d173b6deb4.js → frontend_styles_index_js.2d9fb488c82498c45c2d.js} +251 -5
  46. jupyter_ext/labextension/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js.map +1 -0
  47. jupyter_ext/labextension/static/{lib_index_js.29cf4312af19e86f82af.js → lib_index_js.dc6434bee96ab03a0539.js} +1831 -274
  48. jupyter_ext/labextension/static/lib_index_js.dc6434bee96ab03a0539.js.map +1 -0
  49. jupyter_ext/labextension/static/{remoteEntry.61343eb4cf0577e74b50.js → remoteEntry.4a252df3ade74efee8d6.js} +11 -9
  50. jupyter_ext/labextension/static/remoteEntry.4a252df3ade74efee8d6.js.map +1 -0
  51. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js → jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +2 -209
  52. jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +1 -0
  53. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js → jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +209 -2
  54. jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +1 -0
  55. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js → jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +212 -3
  56. jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +1 -0
  57. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.4770ec0fb2d173b6deb4.js.map +0 -1
  58. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.29cf4312af19e86f82af.js.map +0 -1
  59. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.61343eb4cf0577e74b50.js.map +0 -1
  60. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js.map +0 -1
  61. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js.map +0 -1
  62. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js.map +0 -1
  63. hdsp_jupyter_extension-2.0.7.dist-info/RECORD +0 -141
  64. jupyter_ext/labextension/static/frontend_styles_index_js.4770ec0fb2d173b6deb4.js.map +0 -1
  65. jupyter_ext/labextension/static/lib_index_js.29cf4312af19e86f82af.js.map +0 -1
  66. jupyter_ext/labextension/static/remoteEntry.61343eb4cf0577e74b50.js.map +0 -1
  67. jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js.map +0 -1
  68. jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js.map +0 -1
  69. jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js.map +0 -1
  70. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
  71. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
  72. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
  73. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
  74. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
  75. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
  76. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
  77. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
  78. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
  79. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
  80. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
  81. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
  82. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.10.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
  83. {hdsp_jupyter_extension-2.0.7.dist-info → hdsp_jupyter_extension-2.0.10.dist-info}/WHEEL +0 -0
  84. {hdsp_jupyter_extension-2.0.7.dist-info → hdsp_jupyter_extension-2.0.10.dist-info}/licenses/LICENSE +0 -0
@@ -4,13 +4,14 @@ Local Embedding Service - Wraps sentence-transformers for local embedding genera
4
4
  Features:
5
5
  - Zero external API calls (data sovereignty)
6
6
  - Lazy model loading (only when first needed)
7
- - Thread-safe singleton pattern
7
+ - Thread-safe singleton pattern with async support
8
8
  - Configurable model and device
9
9
  - E5 model prefix handling for optimal performance
10
10
 
11
11
  Default model: intfloat/multilingual-e5-small (384 dimensions, Korean support)
12
12
  """
13
13
 
14
+ import asyncio
14
15
  import logging
15
16
  from typing import TYPE_CHECKING, List, Optional
16
17
 
@@ -55,51 +56,59 @@ class EmbeddingService:
55
56
  self._model = None
56
57
  self._dimension: Optional[int] = None
57
58
  self._is_e5_model: bool = False
59
+ self._load_lock = asyncio.Lock() # Thread-safe lazy loading
58
60
 
59
- @property
60
- def model(self):
61
- """Lazy load the embedding model"""
62
- if self._model is None:
63
- self._load_model()
64
- return self._model
65
-
66
- def _load_model(self) -> None:
67
- """Load the sentence-transformers model"""
68
- try:
69
- from sentence_transformers import SentenceTransformer
70
- except ImportError:
71
- raise ImportError(
72
- "sentence-transformers is required for RAG. "
73
- "Install with: pip install sentence-transformers"
74
- )
75
-
76
- model_name = self._config.get_model_name()
77
- device = self._config.get_device()
78
-
79
- logger.info(f"Loading embedding model: {model_name} on {device}")
80
-
81
- try:
82
- self._model = SentenceTransformer(
83
- model_name, device=device, cache_folder=self._config.cache_folder
84
- )
85
- self._dimension = self._model.get_sentence_embedding_dimension()
86
-
87
- # Check if E5 model (requires special prefix)
88
- self._is_e5_model = "e5" in model_name.lower()
61
+ async def _ensure_model_loaded(self):
62
+ """Lazy load the embedding model (thread-safe, async)"""
63
+ if self._model is not None:
64
+ return
89
65
 
90
- logger.info(
91
- f"Embedding model loaded successfully. "
92
- f"Dimension: {self._dimension}, E5 model: {self._is_e5_model}"
93
- )
94
- except Exception as e:
95
- logger.error(f"Failed to load embedding model: {e}")
96
- raise
66
+ async with self._load_lock:
67
+ # Double-check after acquiring lock
68
+ if self._model is not None:
69
+ return
70
+
71
+ try:
72
+ from sentence_transformers import SentenceTransformer
73
+ except ImportError:
74
+ raise ImportError(
75
+ "sentence-transformers is required for RAG. "
76
+ "Install with: pip install sentence-transformers"
77
+ )
78
+
79
+ model_name = self._config.get_model_name()
80
+ device = self._config.get_device()
81
+
82
+ logger.info(f"Loading embedding model: {model_name} on {device}")
83
+
84
+ try:
85
+ # Load model in separate thread to avoid blocking event loop
86
+ self._model = await asyncio.to_thread(
87
+ SentenceTransformer,
88
+ model_name,
89
+ device=device,
90
+ cache_folder=self._config.cache_folder,
91
+ )
92
+ self._dimension = self._model.get_sentence_embedding_dimension()
93
+
94
+ # Check if E5 model (requires special prefix)
95
+ self._is_e5_model = "e5" in model_name.lower()
96
+
97
+ logger.info(
98
+ f"Embedding model loaded successfully. "
99
+ f"Dimension: {self._dimension}, E5 model: {self._is_e5_model}"
100
+ )
101
+ except Exception as e:
102
+ logger.error(f"Failed to load embedding model: {e}")
103
+ raise
97
104
 
98
105
  @property
99
106
  def dimension(self) -> int:
100
- """Get embedding dimension (triggers model load if needed)"""
107
+ """Get embedding dimension (must be loaded first)"""
101
108
  if self._dimension is None:
102
- _ = self.model # Trigger lazy load
109
+ raise RuntimeError(
110
+ "Embedding dimension not available. Model not loaded yet."
111
+ )
103
112
  return self._dimension
104
113
 
105
114
  def _prepare_texts(self, texts: List[str], is_query: bool = False) -> List[str]:
@@ -116,7 +125,7 @@ class EmbeddingService:
116
125
  prefix = "query: " if is_query else "passage: "
117
126
  return [prefix + text for text in texts]
118
127
 
119
- def embed_texts(self, texts: List[str]) -> List[List[float]]:
128
+ async def embed_texts(self, texts: List[str]) -> List[List[float]]:
120
129
  """
121
130
  Generate embeddings for a list of texts (documents/passages).
122
131
 
@@ -129,11 +138,15 @@ class EmbeddingService:
129
138
  if not texts:
130
139
  return []
131
140
 
141
+ await self._ensure_model_loaded()
142
+
132
143
  # Prepare texts with prefix if E5 model
133
144
  prepared_texts = self._prepare_texts(texts, is_query=False)
134
145
 
135
146
  try:
136
- embeddings = self.model.encode(
147
+ # Run in separate thread to avoid blocking event loop
148
+ embeddings = await asyncio.to_thread(
149
+ self._model.encode,
137
150
  prepared_texts,
138
151
  batch_size=self._config.batch_size,
139
152
  show_progress_bar=len(texts) > 100,
@@ -145,7 +158,7 @@ class EmbeddingService:
145
158
  logger.error(f"Failed to generate embeddings: {e}")
146
159
  raise
147
160
 
148
- def embed_query(self, query: str) -> List[float]:
161
+ async def embed_query(self, query: str) -> List[float]:
149
162
  """
150
163
  Generate embedding for a single query.
151
164
 
@@ -160,11 +173,15 @@ class EmbeddingService:
160
173
  if not query:
161
174
  raise ValueError("Query cannot be empty")
162
175
 
176
+ await self._ensure_model_loaded()
177
+
163
178
  # Prepare query with prefix if E5 model
164
179
  prepared_query = self._prepare_texts([query], is_query=True)[0]
165
180
 
166
181
  try:
167
- embedding = self.model.encode(
182
+ # Run in separate thread to avoid blocking event loop
183
+ embedding = await asyncio.to_thread(
184
+ self._model.encode,
168
185
  prepared_query,
169
186
  convert_to_numpy=True,
170
187
  normalize_embeddings=self._config.normalize_embeddings,
@@ -174,7 +191,7 @@ class EmbeddingService:
174
191
  logger.error(f"Failed to generate query embedding: {e}")
175
192
  raise
176
193
 
177
- def embed_batch(
194
+ async def embed_batch(
178
195
  self, texts: List[str], batch_size: Optional[int] = None
179
196
  ) -> List[List[float]]:
180
197
  """
@@ -190,11 +207,15 @@ class EmbeddingService:
190
207
  if not texts:
191
208
  return []
192
209
 
210
+ await self._ensure_model_loaded()
211
+
193
212
  prepared_texts = self._prepare_texts(texts, is_query=False)
194
213
  effective_batch_size = batch_size or self._config.batch_size
195
214
 
196
215
  try:
197
- embeddings = self.model.encode(
216
+ # Run in separate thread to avoid blocking event loop
217
+ embeddings = await asyncio.to_thread(
218
+ self._model.encode,
198
219
  prepared_texts,
199
220
  batch_size=effective_batch_size,
200
221
  show_progress_bar=True,
@@ -88,13 +88,33 @@ class RAGManager:
88
88
  self._client = self._create_qdrant_client()
89
89
  logger.info("Qdrant client initialized")
90
90
 
91
- # 2. Initialize embedding service
92
- from agent_server.core.embedding_service import get_embedding_service
91
+ # 2. Initialize embedding service (local or vLLM backend)
92
+ import os
93
93
 
94
- self._embedding_service = get_embedding_service(self._config.embedding)
95
- logger.info(
96
- f"Embedding service initialized (dim={self._embedding_service.dimension})"
97
- )
94
+ embedding_backend = os.environ.get(
95
+ "HDSP_EMBEDDING_BACKEND", "local"
96
+ ).lower()
97
+
98
+ if embedding_backend == "vllm":
99
+ from agent_server.core.vllm_embedding_service import (
100
+ get_vllm_embedding_service,
101
+ )
102
+
103
+ self._embedding_service = get_vllm_embedding_service(
104
+ self._config.embedding
105
+ )
106
+ logger.info(
107
+ f"vLLM Embedding service initialized (dim={self._embedding_service.dimension})"
108
+ )
109
+ else:
110
+ from agent_server.core.embedding_service import get_embedding_service
111
+
112
+ self._embedding_service = get_embedding_service(self._config.embedding)
113
+ # Load model to get dimension
114
+ await self._embedding_service._ensure_model_loaded()
115
+ logger.info(
116
+ f"Local Embedding service initialized (dim={self._embedding_service.dimension})"
117
+ )
98
118
 
99
119
  # 3. Ensure collection exists
100
120
  await self._ensure_collection()
@@ -151,26 +171,29 @@ class RAGManager:
151
171
  )
152
172
 
153
173
  cfg = self._config.qdrant
174
+ mode = cfg.get_mode() # Use get_mode() for env override
154
175
 
155
- if cfg.mode == "local":
176
+ if mode == "local":
156
177
  # Local file-based storage
157
178
  local_path = cfg.get_local_path()
158
179
  Path(local_path).mkdir(parents=True, exist_ok=True)
159
180
  logger.info(f"Initializing Qdrant in local mode: {local_path}")
160
181
  return QdrantClient(path=local_path)
161
182
 
162
- elif cfg.mode == "server":
183
+ elif mode == "server":
163
184
  # Docker or external server
164
- logger.info(f"Connecting to Qdrant server: {cfg.url}")
165
- return QdrantClient(url=cfg.url)
185
+ url = cfg.get_url() # Use get_url() for env override
186
+ logger.info(f"Connecting to Qdrant server: {url}")
187
+ return QdrantClient(url=url)
166
188
 
167
- elif cfg.mode == "cloud":
189
+ elif mode == "cloud":
168
190
  # Qdrant Cloud
191
+ url = cfg.get_url() # Use get_url() for env override
169
192
  logger.info("Connecting to Qdrant Cloud")
170
- return QdrantClient(url=cfg.url, api_key=cfg.api_key)
193
+ return QdrantClient(url=url, api_key=cfg.api_key)
171
194
 
172
195
  else:
173
- raise ValueError(f"Unknown Qdrant mode: {cfg.mode}")
196
+ raise ValueError(f"Unknown Qdrant mode: {mode}")
174
197
 
175
198
  async def _ensure_collection(self) -> None:
176
199
  """Create collection if it doesn't exist."""
@@ -274,7 +297,7 @@ class RAGManager:
274
297
  )
275
298
 
276
299
  if chunks:
277
- self._index_chunks(chunks, file_path)
300
+ await self._index_chunks(chunks, file_path)
278
301
  indexed += 1
279
302
  self._index_stats["total_documents"] += 1
280
303
  self._index_stats["total_chunks"] += len(chunks)
@@ -345,13 +368,13 @@ class RAGManager:
345
368
  else:
346
369
  return "general"
347
370
 
348
- def _index_chunks(self, chunks: List[Dict], file_path: Path) -> None:
371
+ async def _index_chunks(self, chunks: List[Dict], file_path: Path) -> None:
349
372
  """Index document chunks to Qdrant."""
350
373
  from qdrant_client.models import PointStruct
351
374
 
352
375
  # Generate embeddings
353
376
  texts = [c["content"] for c in chunks]
354
- embeddings = self._embedding_service.embed_texts(texts)
377
+ embeddings = await self._embedding_service.embed_texts(texts)
355
378
 
356
379
  # Add content hash to all chunks
357
380
  file_hash = self._compute_file_hash(file_path)
@@ -430,7 +453,7 @@ class RAGManager:
430
453
  )
431
454
 
432
455
  if chunks:
433
- self._index_chunks(chunks, file_path)
456
+ await self._index_chunks(chunks, file_path)
434
457
  logger.info(f"Reindexed: {file_path}")
435
458
  except Exception as e:
436
459
  logger.error(f"Failed to reindex {file_path}: {e}")
@@ -84,21 +84,24 @@ class Retriever:
84
84
  effective_threshold = score_threshold or self._config.score_threshold
85
85
 
86
86
  # Generate query embedding
87
- query_embedding = self._embedding_service.embed_query(query)
87
+ query_embedding = await self._embedding_service.embed_query(query)
88
88
 
89
89
  # Build filter condition
90
90
  qdrant_filter = self._build_filter(filters) if filters else None
91
91
 
92
92
  # Dense vector search
93
93
  try:
94
- results = self._client.search(
94
+ response = self._client.query_points(
95
95
  collection_name=self._config.qdrant.collection_name,
96
- query_vector=query_embedding,
96
+ query=query_embedding,
97
97
  query_filter=qdrant_filter,
98
98
  limit=effective_top_k,
99
99
  score_threshold=effective_threshold
100
100
  * 0.5, # Lower for initial retrieval
101
+ with_payload=True,
102
+ with_vectors=False,
101
103
  )
104
+ results = response.points
102
105
  except Exception as e:
103
106
  logger.error(f"Search failed: {e}")
104
107
  return []
@@ -193,7 +196,7 @@ class Retriever:
193
196
  effective_threshold = score_threshold or self._config.score_threshold
194
197
 
195
198
  # Generate query embedding
196
- query_embedding = self._embedding_service.embed_query(query)
199
+ query_embedding = await self._embedding_service.embed_query(query)
197
200
 
198
201
  # Build filter condition
199
202
  qdrant_filter = self._build_filter(filters) if filters else None
@@ -201,13 +204,16 @@ class Retriever:
201
204
  # Vector search with timing
202
205
  try:
203
206
  # 디버그용으로 더 많은 결과 (3배)를 낮은 threshold로 가져옴
204
- results = self._client.search(
207
+ response = self._client.query_points(
205
208
  collection_name=self._config.qdrant.collection_name,
206
- query_vector=query_embedding,
209
+ query=query_embedding,
207
210
  query_filter=qdrant_filter,
208
211
  limit=effective_top_k * 3,
209
212
  score_threshold=effective_threshold * 0.3,
213
+ with_payload=True,
214
+ with_vectors=False,
210
215
  )
216
+ results = response.points
211
217
  except Exception as e:
212
218
  logger.error(f"Search failed: {e}")
213
219
  return DebugSearchResult(
@@ -0,0 +1,246 @@
1
+ """
2
+ vLLM Embedding Service - Remote embedding generation using vLLM server.
3
+
4
+ Features:
5
+ - GPU-accelerated embeddings via vLLM server
6
+ - OpenAI-compatible API interface
7
+ - Retry logic for reliability
8
+ - Support for large models (qwen3-embedding-8b, gte-Qwen2-7B, etc.)
9
+
10
+ Prerequisites:
11
+ - vLLM embedding server running (e.g., http://10.222.52.31:8000)
12
+ - Model loaded on vLLM server
13
+ """
14
+
15
+ import logging
16
+ import os
17
+ from typing import TYPE_CHECKING, List, Optional
18
+
19
+ import httpx
20
+
21
+ if TYPE_CHECKING:
22
+ from hdsp_agent_core.models.rag import EmbeddingConfig
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class VLLMEmbeddingService:
28
+ """
29
+ Remote embedding generation using vLLM server.
30
+
31
+ Design Principles:
32
+ - Stateless client (vLLM server holds the model)
33
+ - Retry logic for network resilience
34
+ - OpenAI-compatible API interface
35
+
36
+ Usage:
37
+ service = get_vllm_embedding_service()
38
+ embeddings = service.embed_texts(["text1", "text2"])
39
+ query_embedding = service.embed_query("search query")
40
+ """
41
+
42
+ _instance: Optional["VLLMEmbeddingService"] = None
43
+ _initialized: bool = False
44
+
45
+ def __new__(cls, *args, **kwargs):
46
+ if cls._instance is None:
47
+ cls._instance = super().__new__(cls)
48
+ return cls._instance
49
+
50
+ def __init__(self, config: Optional["EmbeddingConfig"] = None):
51
+ if self._initialized:
52
+ return
53
+ self._initialized = True
54
+
55
+ from hdsp_agent_core.models.rag import EmbeddingConfig
56
+
57
+ self._config = config or EmbeddingConfig()
58
+
59
+ # vLLM configuration from environment variables
60
+ self._endpoint = os.environ.get("HDSP_VLLM_ENDPOINT", "http://localhost:8000")
61
+ self._model = os.environ.get("HDSP_VLLM_MODEL", "qwen3-embedding-8b")
62
+ self._dimension = int(os.environ.get("HDSP_VLLM_DIMENSION", "8192"))
63
+
64
+ # HTTP client with retry
65
+ self._client = httpx.AsyncClient(
66
+ base_url=self._endpoint,
67
+ timeout=httpx.Timeout(30.0),
68
+ limits=httpx.Limits(max_keepalive_connections=5, max_connections=10),
69
+ )
70
+
71
+ logger.info(
72
+ f"vLLM Embedding Service initialized: "
73
+ f"endpoint={self._endpoint}, model={self._model}, dim={self._dimension}"
74
+ )
75
+
76
+ @property
77
+ def dimension(self) -> int:
78
+ """Get embedding dimension"""
79
+ return self._dimension
80
+
81
+ async def _call_vllm_api(
82
+ self, texts: List[str], max_retries: int = 3
83
+ ) -> List[List[float]]:
84
+ """
85
+ Call vLLM embedding API with retry logic.
86
+
87
+ Args:
88
+ texts: List of text strings to embed
89
+ max_retries: Maximum number of retry attempts
90
+
91
+ Returns:
92
+ List of embedding vectors
93
+
94
+ Raises:
95
+ Exception if all retries fail
96
+ """
97
+ payload = {
98
+ "model": self._model,
99
+ "input": texts,
100
+ }
101
+
102
+ last_error = None
103
+ for attempt in range(max_retries):
104
+ try:
105
+ response = await self._client.post("/v1/embeddings", json=payload)
106
+ response.raise_for_status()
107
+
108
+ data = response.json()
109
+ # Sort by index to ensure correct order
110
+ sorted_items = sorted(data["data"], key=lambda x: x["index"])
111
+ embeddings = [item["embedding"] for item in sorted_items]
112
+ return embeddings
113
+
114
+ except httpx.HTTPStatusError as e:
115
+ last_error = e
116
+ logger.warning(
117
+ f"vLLM API HTTP error (attempt {attempt + 1}/{max_retries}): "
118
+ f"{e.response.status_code} - {e.response.text}"
119
+ )
120
+ except httpx.RequestError as e:
121
+ last_error = e
122
+ logger.warning(
123
+ f"vLLM API connection error (attempt {attempt + 1}/{max_retries}): {e}"
124
+ )
125
+ except Exception as e:
126
+ last_error = e
127
+ logger.error(f"Unexpected error calling vLLM API: {e}")
128
+ break
129
+
130
+ raise Exception(
131
+ f"Failed to connect to vLLM after {max_retries} attempts: {last_error}"
132
+ )
133
+
134
+ async def embed_texts(self, texts: List[str]) -> List[List[float]]:
135
+ """
136
+ Generate embeddings for a list of texts (documents/passages).
137
+
138
+ Args:
139
+ texts: List of text strings to embed
140
+
141
+ Returns:
142
+ List of embedding vectors (as lists of floats)
143
+ """
144
+ if not texts:
145
+ return []
146
+
147
+ try:
148
+ return await self._call_vllm_api(texts)
149
+ except Exception as e:
150
+ logger.error(f"Failed to generate embeddings via vLLM: {e}")
151
+ raise
152
+
153
+ async def embed_query(self, query: str) -> List[float]:
154
+ """
155
+ Generate embedding for a single query.
156
+
157
+ Args:
158
+ query: Query string
159
+
160
+ Returns:
161
+ Embedding vector as list of floats
162
+ """
163
+ if not query:
164
+ raise ValueError("Query cannot be empty")
165
+
166
+ try:
167
+ embeddings = await self._call_vllm_api([query])
168
+ return embeddings[0]
169
+ except Exception as e:
170
+ logger.error(f"Failed to generate query embedding via vLLM: {e}")
171
+ raise
172
+
173
+ async def embed_batch(
174
+ self, texts: List[str], batch_size: Optional[int] = None
175
+ ) -> List[List[float]]:
176
+ """
177
+ Generate embeddings with batching for large document sets.
178
+
179
+ Args:
180
+ texts: List of text strings to embed
181
+ batch_size: Override default batch size (for vLLM, can handle large batches)
182
+
183
+ Returns:
184
+ List of embedding vectors
185
+ """
186
+ if not texts:
187
+ return []
188
+
189
+ # vLLM can handle large batches efficiently
190
+ effective_batch_size = batch_size or 100
191
+ all_embeddings = []
192
+
193
+ for i in range(0, len(texts), effective_batch_size):
194
+ batch = texts[i : i + effective_batch_size]
195
+ embeddings = await self._call_vllm_api(batch)
196
+ all_embeddings.extend(embeddings)
197
+
198
+ return all_embeddings
199
+
200
+ def get_model_info(self) -> dict:
201
+ """Get information about the vLLM embedding service"""
202
+ return {
203
+ "backend": "vllm",
204
+ "endpoint": self._endpoint,
205
+ "model_name": self._model,
206
+ "dimension": self._dimension,
207
+ }
208
+
209
+ async def close(self):
210
+ """Close HTTP client connection"""
211
+ await self._client.aclose()
212
+
213
+
214
+ # ============ Singleton Accessor ============
215
+
216
+ _vllm_embedding_service: Optional[VLLMEmbeddingService] = None
217
+
218
+
219
+ def get_vllm_embedding_service(
220
+ config: Optional["EmbeddingConfig"] = None,
221
+ ) -> VLLMEmbeddingService:
222
+ """
223
+ Get the singleton VLLMEmbeddingService instance.
224
+
225
+ Args:
226
+ config: Optional EmbeddingConfig (only used on first call)
227
+
228
+ Returns:
229
+ VLLMEmbeddingService singleton instance
230
+ """
231
+ global _vllm_embedding_service
232
+ if _vllm_embedding_service is None:
233
+ _vllm_embedding_service = VLLMEmbeddingService(config)
234
+ return _vllm_embedding_service
235
+
236
+
237
+ def reset_vllm_embedding_service() -> None:
238
+ """
239
+ Reset the singleton instance (for testing purposes).
240
+ """
241
+ global _vllm_embedding_service
242
+ if _vllm_embedding_service is not None:
243
+ _vllm_embedding_service._initialized = False
244
+ _vllm_embedding_service = None
245
+ VLLMEmbeddingService._instance = None
246
+ VLLMEmbeddingService._initialized = False