hdsp-jupyter-extension 2.0.7__py3-none-any.whl → 2.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. agent_server/core/embedding_service.py +67 -46
  2. agent_server/core/rag_manager.py +31 -17
  3. agent_server/core/retriever.py +13 -8
  4. agent_server/core/vllm_embedding_service.py +243 -0
  5. agent_server/langchain/agent.py +8 -0
  6. agent_server/langchain/custom_middleware.py +58 -31
  7. agent_server/langchain/hitl_config.py +6 -1
  8. agent_server/langchain/logging_utils.py +53 -14
  9. agent_server/langchain/prompts.py +47 -16
  10. agent_server/langchain/tools/__init__.py +13 -0
  11. agent_server/langchain/tools/file_tools.py +285 -7
  12. agent_server/langchain/tools/file_utils.py +334 -0
  13. agent_server/langchain/tools/lsp_tools.py +264 -0
  14. agent_server/main.py +7 -0
  15. agent_server/routers/langchain_agent.py +115 -19
  16. agent_server/routers/rag.py +8 -3
  17. hdsp_agent_core/models/rag.py +15 -1
  18. hdsp_agent_core/services/rag_service.py +6 -1
  19. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
  20. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +3 -2
  21. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.4770ec0fb2d173b6deb4.js → hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.8740a527757068814573.js +160 -3
  22. hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.8740a527757068814573.js.map +1 -0
  23. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.29cf4312af19e86f82af.js → hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.e4ff4b5779b5e049f84c.js +1759 -221
  24. hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.e4ff4b5779b5e049f84c.js.map +1 -0
  25. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.61343eb4cf0577e74b50.js → hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.020cdb0b864cfaa4e41e.js +14 -12
  26. hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.020cdb0b864cfaa4e41e.js.map +1 -0
  27. jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js → hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +2 -209
  28. hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +1 -0
  29. jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js → hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +209 -2
  30. hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +1 -0
  31. jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js → hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +212 -3
  32. hdsp_jupyter_extension-2.0.8.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +1 -0
  33. {hdsp_jupyter_extension-2.0.7.dist-info → hdsp_jupyter_extension-2.0.8.dist-info}/METADATA +1 -1
  34. {hdsp_jupyter_extension-2.0.7.dist-info → hdsp_jupyter_extension-2.0.8.dist-info}/RECORD +66 -63
  35. jupyter_ext/__init__.py +18 -0
  36. jupyter_ext/_version.py +1 -1
  37. jupyter_ext/handlers.py +176 -1
  38. jupyter_ext/labextension/build_log.json +1 -1
  39. jupyter_ext/labextension/package.json +3 -2
  40. jupyter_ext/labextension/static/{frontend_styles_index_js.4770ec0fb2d173b6deb4.js → frontend_styles_index_js.8740a527757068814573.js} +160 -3
  41. jupyter_ext/labextension/static/frontend_styles_index_js.8740a527757068814573.js.map +1 -0
  42. jupyter_ext/labextension/static/{lib_index_js.29cf4312af19e86f82af.js → lib_index_js.e4ff4b5779b5e049f84c.js} +1759 -221
  43. jupyter_ext/labextension/static/lib_index_js.e4ff4b5779b5e049f84c.js.map +1 -0
  44. jupyter_ext/labextension/static/{remoteEntry.61343eb4cf0577e74b50.js → remoteEntry.020cdb0b864cfaa4e41e.js} +14 -12
  45. jupyter_ext/labextension/static/remoteEntry.020cdb0b864cfaa4e41e.js.map +1 -0
  46. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js → jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +2 -209
  47. jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +1 -0
  48. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js → jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +209 -2
  49. jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +1 -0
  50. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js → jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +212 -3
  51. jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +1 -0
  52. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.4770ec0fb2d173b6deb4.js.map +0 -1
  53. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.29cf4312af19e86f82af.js.map +0 -1
  54. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.61343eb4cf0577e74b50.js.map +0 -1
  55. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js.map +0 -1
  56. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js.map +0 -1
  57. hdsp_jupyter_extension-2.0.7.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js.map +0 -1
  58. jupyter_ext/labextension/static/frontend_styles_index_js.4770ec0fb2d173b6deb4.js.map +0 -1
  59. jupyter_ext/labextension/static/lib_index_js.29cf4312af19e86f82af.js.map +0 -1
  60. jupyter_ext/labextension/static/remoteEntry.61343eb4cf0577e74b50.js.map +0 -1
  61. jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js.map +0 -1
  62. jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js.map +0 -1
  63. jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js.map +0 -1
  64. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
  65. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
  66. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
  67. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
  68. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
  69. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
  70. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
  71. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
  72. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
  73. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
  74. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
  75. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
  76. {hdsp_jupyter_extension-2.0.7.data → hdsp_jupyter_extension-2.0.8.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
  77. {hdsp_jupyter_extension-2.0.7.dist-info → hdsp_jupyter_extension-2.0.8.dist-info}/WHEEL +0 -0
  78. {hdsp_jupyter_extension-2.0.7.dist-info → hdsp_jupyter_extension-2.0.8.dist-info}/licenses/LICENSE +0 -0
@@ -4,13 +4,14 @@ Local Embedding Service - Wraps sentence-transformers for local embedding genera
4
4
  Features:
5
5
  - Zero external API calls (data sovereignty)
6
6
  - Lazy model loading (only when first needed)
7
- - Thread-safe singleton pattern
7
+ - Thread-safe singleton pattern with async support
8
8
  - Configurable model and device
9
9
  - E5 model prefix handling for optimal performance
10
10
 
11
11
  Default model: intfloat/multilingual-e5-small (384 dimensions, Korean support)
12
12
  """
13
13
 
14
+ import asyncio
14
15
  import logging
15
16
  from typing import TYPE_CHECKING, List, Optional
16
17
 
@@ -55,51 +56,59 @@ class EmbeddingService:
55
56
  self._model = None
56
57
  self._dimension: Optional[int] = None
57
58
  self._is_e5_model: bool = False
59
+ self._load_lock = asyncio.Lock() # Thread-safe lazy loading
58
60
 
59
- @property
60
- def model(self):
61
- """Lazy load the embedding model"""
62
- if self._model is None:
63
- self._load_model()
64
- return self._model
65
-
66
- def _load_model(self) -> None:
67
- """Load the sentence-transformers model"""
68
- try:
69
- from sentence_transformers import SentenceTransformer
70
- except ImportError:
71
- raise ImportError(
72
- "sentence-transformers is required for RAG. "
73
- "Install with: pip install sentence-transformers"
74
- )
75
-
76
- model_name = self._config.get_model_name()
77
- device = self._config.get_device()
78
-
79
- logger.info(f"Loading embedding model: {model_name} on {device}")
80
-
81
- try:
82
- self._model = SentenceTransformer(
83
- model_name, device=device, cache_folder=self._config.cache_folder
84
- )
85
- self._dimension = self._model.get_sentence_embedding_dimension()
86
-
87
- # Check if E5 model (requires special prefix)
88
- self._is_e5_model = "e5" in model_name.lower()
61
+ async def _ensure_model_loaded(self):
62
+ """Lazy load the embedding model (thread-safe, async)"""
63
+ if self._model is not None:
64
+ return
89
65
 
90
- logger.info(
91
- f"Embedding model loaded successfully. "
92
- f"Dimension: {self._dimension}, E5 model: {self._is_e5_model}"
93
- )
94
- except Exception as e:
95
- logger.error(f"Failed to load embedding model: {e}")
96
- raise
66
+ async with self._load_lock:
67
+ # Double-check after acquiring lock
68
+ if self._model is not None:
69
+ return
70
+
71
+ try:
72
+ from sentence_transformers import SentenceTransformer
73
+ except ImportError:
74
+ raise ImportError(
75
+ "sentence-transformers is required for RAG. "
76
+ "Install with: pip install sentence-transformers"
77
+ )
78
+
79
+ model_name = self._config.get_model_name()
80
+ device = self._config.get_device()
81
+
82
+ logger.info(f"Loading embedding model: {model_name} on {device}")
83
+
84
+ try:
85
+ # Load model in separate thread to avoid blocking event loop
86
+ self._model = await asyncio.to_thread(
87
+ SentenceTransformer,
88
+ model_name,
89
+ device=device,
90
+ cache_folder=self._config.cache_folder,
91
+ )
92
+ self._dimension = self._model.get_sentence_embedding_dimension()
93
+
94
+ # Check if E5 model (requires special prefix)
95
+ self._is_e5_model = "e5" in model_name.lower()
96
+
97
+ logger.info(
98
+ f"Embedding model loaded successfully. "
99
+ f"Dimension: {self._dimension}, E5 model: {self._is_e5_model}"
100
+ )
101
+ except Exception as e:
102
+ logger.error(f"Failed to load embedding model: {e}")
103
+ raise
97
104
 
98
105
  @property
99
106
  def dimension(self) -> int:
100
- """Get embedding dimension (triggers model load if needed)"""
107
+ """Get embedding dimension (must be loaded first)"""
101
108
  if self._dimension is None:
102
- _ = self.model # Trigger lazy load
109
+ raise RuntimeError(
110
+ "Embedding dimension not available. Model not loaded yet."
111
+ )
103
112
  return self._dimension
104
113
 
105
114
  def _prepare_texts(self, texts: List[str], is_query: bool = False) -> List[str]:
@@ -116,7 +125,7 @@ class EmbeddingService:
116
125
  prefix = "query: " if is_query else "passage: "
117
126
  return [prefix + text for text in texts]
118
127
 
119
- def embed_texts(self, texts: List[str]) -> List[List[float]]:
128
+ async def embed_texts(self, texts: List[str]) -> List[List[float]]:
120
129
  """
121
130
  Generate embeddings for a list of texts (documents/passages).
122
131
 
@@ -129,11 +138,15 @@ class EmbeddingService:
129
138
  if not texts:
130
139
  return []
131
140
 
141
+ await self._ensure_model_loaded()
142
+
132
143
  # Prepare texts with prefix if E5 model
133
144
  prepared_texts = self._prepare_texts(texts, is_query=False)
134
145
 
135
146
  try:
136
- embeddings = self.model.encode(
147
+ # Run in separate thread to avoid blocking event loop
148
+ embeddings = await asyncio.to_thread(
149
+ self._model.encode,
137
150
  prepared_texts,
138
151
  batch_size=self._config.batch_size,
139
152
  show_progress_bar=len(texts) > 100,
@@ -145,7 +158,7 @@ class EmbeddingService:
145
158
  logger.error(f"Failed to generate embeddings: {e}")
146
159
  raise
147
160
 
148
- def embed_query(self, query: str) -> List[float]:
161
+ async def embed_query(self, query: str) -> List[float]:
149
162
  """
150
163
  Generate embedding for a single query.
151
164
 
@@ -160,11 +173,15 @@ class EmbeddingService:
160
173
  if not query:
161
174
  raise ValueError("Query cannot be empty")
162
175
 
176
+ await self._ensure_model_loaded()
177
+
163
178
  # Prepare query with prefix if E5 model
164
179
  prepared_query = self._prepare_texts([query], is_query=True)[0]
165
180
 
166
181
  try:
167
- embedding = self.model.encode(
182
+ # Run in separate thread to avoid blocking event loop
183
+ embedding = await asyncio.to_thread(
184
+ self._model.encode,
168
185
  prepared_query,
169
186
  convert_to_numpy=True,
170
187
  normalize_embeddings=self._config.normalize_embeddings,
@@ -174,7 +191,7 @@ class EmbeddingService:
174
191
  logger.error(f"Failed to generate query embedding: {e}")
175
192
  raise
176
193
 
177
- def embed_batch(
194
+ async def embed_batch(
178
195
  self, texts: List[str], batch_size: Optional[int] = None
179
196
  ) -> List[List[float]]:
180
197
  """
@@ -190,11 +207,15 @@ class EmbeddingService:
190
207
  if not texts:
191
208
  return []
192
209
 
210
+ await self._ensure_model_loaded()
211
+
193
212
  prepared_texts = self._prepare_texts(texts, is_query=False)
194
213
  effective_batch_size = batch_size or self._config.batch_size
195
214
 
196
215
  try:
197
- embeddings = self.model.encode(
216
+ # Run in separate thread to avoid blocking event loop
217
+ embeddings = await asyncio.to_thread(
218
+ self._model.encode,
198
219
  prepared_texts,
199
220
  batch_size=effective_batch_size,
200
221
  show_progress_bar=True,
@@ -88,13 +88,24 @@ class RAGManager:
88
88
  self._client = self._create_qdrant_client()
89
89
  logger.info("Qdrant client initialized")
90
90
 
91
- # 2. Initialize embedding service
92
- from agent_server.core.embedding_service import get_embedding_service
91
+ # 2. Initialize embedding service (local or vLLM backend)
92
+ import os
93
+ embedding_backend = os.environ.get("HDSP_EMBEDDING_BACKEND", "local").lower()
93
94
 
94
- self._embedding_service = get_embedding_service(self._config.embedding)
95
- logger.info(
96
- f"Embedding service initialized (dim={self._embedding_service.dimension})"
97
- )
95
+ if embedding_backend == "vllm":
96
+ from agent_server.core.vllm_embedding_service import get_vllm_embedding_service
97
+ self._embedding_service = get_vllm_embedding_service(self._config.embedding)
98
+ logger.info(
99
+ f"vLLM Embedding service initialized (dim={self._embedding_service.dimension})"
100
+ )
101
+ else:
102
+ from agent_server.core.embedding_service import get_embedding_service
103
+ self._embedding_service = get_embedding_service(self._config.embedding)
104
+ # Load model to get dimension
105
+ await self._embedding_service._ensure_model_loaded()
106
+ logger.info(
107
+ f"Local Embedding service initialized (dim={self._embedding_service.dimension})"
108
+ )
98
109
 
99
110
  # 3. Ensure collection exists
100
111
  await self._ensure_collection()
@@ -151,26 +162,29 @@ class RAGManager:
151
162
  )
152
163
 
153
164
  cfg = self._config.qdrant
165
+ mode = cfg.get_mode() # Use get_mode() for env override
154
166
 
155
- if cfg.mode == "local":
167
+ if mode == "local":
156
168
  # Local file-based storage
157
169
  local_path = cfg.get_local_path()
158
170
  Path(local_path).mkdir(parents=True, exist_ok=True)
159
171
  logger.info(f"Initializing Qdrant in local mode: {local_path}")
160
172
  return QdrantClient(path=local_path)
161
173
 
162
- elif cfg.mode == "server":
174
+ elif mode == "server":
163
175
  # Docker or external server
164
- logger.info(f"Connecting to Qdrant server: {cfg.url}")
165
- return QdrantClient(url=cfg.url)
176
+ url = cfg.get_url() # Use get_url() for env override
177
+ logger.info(f"Connecting to Qdrant server: {url}")
178
+ return QdrantClient(url=url)
166
179
 
167
- elif cfg.mode == "cloud":
180
+ elif mode == "cloud":
168
181
  # Qdrant Cloud
182
+ url = cfg.get_url() # Use get_url() for env override
169
183
  logger.info("Connecting to Qdrant Cloud")
170
- return QdrantClient(url=cfg.url, api_key=cfg.api_key)
184
+ return QdrantClient(url=url, api_key=cfg.api_key)
171
185
 
172
186
  else:
173
- raise ValueError(f"Unknown Qdrant mode: {cfg.mode}")
187
+ raise ValueError(f"Unknown Qdrant mode: {mode}")
174
188
 
175
189
  async def _ensure_collection(self) -> None:
176
190
  """Create collection if it doesn't exist."""
@@ -274,7 +288,7 @@ class RAGManager:
274
288
  )
275
289
 
276
290
  if chunks:
277
- self._index_chunks(chunks, file_path)
291
+ await self._index_chunks(chunks, file_path)
278
292
  indexed += 1
279
293
  self._index_stats["total_documents"] += 1
280
294
  self._index_stats["total_chunks"] += len(chunks)
@@ -345,13 +359,13 @@ class RAGManager:
345
359
  else:
346
360
  return "general"
347
361
 
348
- def _index_chunks(self, chunks: List[Dict], file_path: Path) -> None:
362
+ async def _index_chunks(self, chunks: List[Dict], file_path: Path) -> None:
349
363
  """Index document chunks to Qdrant."""
350
364
  from qdrant_client.models import PointStruct
351
365
 
352
366
  # Generate embeddings
353
367
  texts = [c["content"] for c in chunks]
354
- embeddings = self._embedding_service.embed_texts(texts)
368
+ embeddings = await self._embedding_service.embed_texts(texts)
355
369
 
356
370
  # Add content hash to all chunks
357
371
  file_hash = self._compute_file_hash(file_path)
@@ -430,7 +444,7 @@ class RAGManager:
430
444
  )
431
445
 
432
446
  if chunks:
433
- self._index_chunks(chunks, file_path)
447
+ await self._index_chunks(chunks, file_path)
434
448
  logger.info(f"Reindexed: {file_path}")
435
449
  except Exception as e:
436
450
  logger.error(f"Failed to reindex {file_path}: {e}")
@@ -84,21 +84,23 @@ class Retriever:
84
84
  effective_threshold = score_threshold or self._config.score_threshold
85
85
 
86
86
  # Generate query embedding
87
- query_embedding = self._embedding_service.embed_query(query)
87
+ query_embedding = await self._embedding_service.embed_query(query)
88
88
 
89
89
  # Build filter condition
90
90
  qdrant_filter = self._build_filter(filters) if filters else None
91
91
 
92
92
  # Dense vector search
93
93
  try:
94
- results = self._client.search(
94
+ response = self._client.query_points(
95
95
  collection_name=self._config.qdrant.collection_name,
96
- query_vector=query_embedding,
96
+ query=query_embedding,
97
97
  query_filter=qdrant_filter,
98
98
  limit=effective_top_k,
99
- score_threshold=effective_threshold
100
- * 0.5, # Lower for initial retrieval
99
+ score_threshold=effective_threshold * 0.5, # Lower for initial retrieval
100
+ with_payload=True,
101
+ with_vectors=False,
101
102
  )
103
+ results = response.points
102
104
  except Exception as e:
103
105
  logger.error(f"Search failed: {e}")
104
106
  return []
@@ -193,7 +195,7 @@ class Retriever:
193
195
  effective_threshold = score_threshold or self._config.score_threshold
194
196
 
195
197
  # Generate query embedding
196
- query_embedding = self._embedding_service.embed_query(query)
198
+ query_embedding = await self._embedding_service.embed_query(query)
197
199
 
198
200
  # Build filter condition
199
201
  qdrant_filter = self._build_filter(filters) if filters else None
@@ -201,13 +203,16 @@ class Retriever:
201
203
  # Vector search with timing
202
204
  try:
203
205
  # 디버그용으로 더 많은 결과 (3배)를 낮은 threshold로 가져옴
204
- results = self._client.search(
206
+ response = self._client.query_points(
205
207
  collection_name=self._config.qdrant.collection_name,
206
- query_vector=query_embedding,
208
+ query=query_embedding,
207
209
  query_filter=qdrant_filter,
208
210
  limit=effective_top_k * 3,
209
211
  score_threshold=effective_threshold * 0.3,
212
+ with_payload=True,
213
+ with_vectors=False,
210
214
  )
215
+ results = response.points
211
216
  except Exception as e:
212
217
  logger.error(f"Search failed: {e}")
213
218
  return DebugSearchResult(
@@ -0,0 +1,243 @@
1
+ """
2
+ vLLM Embedding Service - Remote embedding generation using vLLM server.
3
+
4
+ Features:
5
+ - GPU-accelerated embeddings via vLLM server
6
+ - OpenAI-compatible API interface
7
+ - Retry logic for reliability
8
+ - Support for large models (qwen3-embedding-8b, gte-Qwen2-7B, etc.)
9
+
10
+ Prerequisites:
11
+ - vLLM embedding server running (e.g., http://10.222.52.31:8000)
12
+ - Model loaded on vLLM server
13
+ """
14
+
15
+ import logging
16
+ import os
17
+ from typing import TYPE_CHECKING, List, Optional
18
+
19
+ import httpx
20
+ import time
21
+
22
+ if TYPE_CHECKING:
23
+ from hdsp_agent_core.models.rag import EmbeddingConfig
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class VLLMEmbeddingService:
29
+ """
30
+ Remote embedding generation using vLLM server.
31
+
32
+ Design Principles:
33
+ - Stateless client (vLLM server holds the model)
34
+ - Retry logic for network resilience
35
+ - OpenAI-compatible API interface
36
+
37
+ Usage:
38
+ service = get_vllm_embedding_service()
39
+ embeddings = service.embed_texts(["text1", "text2"])
40
+ query_embedding = service.embed_query("search query")
41
+ """
42
+
43
+ _instance: Optional["VLLMEmbeddingService"] = None
44
+ _initialized: bool = False
45
+
46
+ def __new__(cls, *args, **kwargs):
47
+ if cls._instance is None:
48
+ cls._instance = super().__new__(cls)
49
+ return cls._instance
50
+
51
+ def __init__(self, config: Optional["EmbeddingConfig"] = None):
52
+ if self._initialized:
53
+ return
54
+ self._initialized = True
55
+
56
+ from hdsp_agent_core.models.rag import EmbeddingConfig
57
+
58
+ self._config = config or EmbeddingConfig()
59
+
60
+ # vLLM configuration from environment variables
61
+ self._endpoint = os.environ.get("HDSP_VLLM_ENDPOINT", "http://localhost:8000")
62
+ self._model = os.environ.get("HDSP_VLLM_MODEL", "qwen3-embedding-8b")
63
+ self._dimension = int(os.environ.get("HDSP_VLLM_DIMENSION", "8192"))
64
+
65
+ # HTTP client with retry
66
+ self._client = httpx.AsyncClient(
67
+ base_url=self._endpoint,
68
+ timeout=httpx.Timeout(30.0),
69
+ limits=httpx.Limits(max_keepalive_connections=5, max_connections=10)
70
+ )
71
+
72
+ logger.info(
73
+ f"vLLM Embedding Service initialized: "
74
+ f"endpoint={self._endpoint}, model={self._model}, dim={self._dimension}"
75
+ )
76
+
77
+ @property
78
+ def dimension(self) -> int:
79
+ """Get embedding dimension"""
80
+ return self._dimension
81
+
82
+ async def _call_vllm_api(self, texts: List[str], max_retries: int = 3) -> List[List[float]]:
83
+ """
84
+ Call vLLM embedding API with retry logic.
85
+
86
+ Args:
87
+ texts: List of text strings to embed
88
+ max_retries: Maximum number of retry attempts
89
+
90
+ Returns:
91
+ List of embedding vectors
92
+
93
+ Raises:
94
+ Exception if all retries fail
95
+ """
96
+ payload = {
97
+ "model": self._model,
98
+ "input": texts,
99
+ }
100
+
101
+ last_error = None
102
+ for attempt in range(max_retries):
103
+ try:
104
+ response = await self._client.post("/v1/embeddings", json=payload)
105
+ response.raise_for_status()
106
+
107
+ data = response.json()
108
+ # Sort by index to ensure correct order
109
+ sorted_items = sorted(data["data"], key=lambda x: x["index"])
110
+ embeddings = [item["embedding"] for item in sorted_items]
111
+ return embeddings
112
+
113
+ except httpx.HTTPStatusError as e:
114
+ last_error = e
115
+ logger.warning(
116
+ f"vLLM API HTTP error (attempt {attempt + 1}/{max_retries}): "
117
+ f"{e.response.status_code} - {e.response.text}"
118
+ )
119
+ except httpx.RequestError as e:
120
+ last_error = e
121
+ logger.warning(
122
+ f"vLLM API connection error (attempt {attempt + 1}/{max_retries}): {e}"
123
+ )
124
+ except Exception as e:
125
+ last_error = e
126
+ logger.error(f"Unexpected error calling vLLM API: {e}")
127
+ break
128
+
129
+ raise Exception(f"Failed to connect to vLLM after {max_retries} attempts: {last_error}")
130
+
131
+ async def embed_texts(self, texts: List[str]) -> List[List[float]]:
132
+ """
133
+ Generate embeddings for a list of texts (documents/passages).
134
+
135
+ Args:
136
+ texts: List of text strings to embed
137
+
138
+ Returns:
139
+ List of embedding vectors (as lists of floats)
140
+ """
141
+ if not texts:
142
+ return []
143
+
144
+ try:
145
+ return await self._call_vllm_api(texts)
146
+ except Exception as e:
147
+ logger.error(f"Failed to generate embeddings via vLLM: {e}")
148
+ raise
149
+
150
+ async def embed_query(self, query: str) -> List[float]:
151
+ """
152
+ Generate embedding for a single query.
153
+
154
+ Args:
155
+ query: Query string
156
+
157
+ Returns:
158
+ Embedding vector as list of floats
159
+ """
160
+ if not query:
161
+ raise ValueError("Query cannot be empty")
162
+
163
+ try:
164
+ embeddings = await self._call_vllm_api([query])
165
+ return embeddings[0]
166
+ except Exception as e:
167
+ logger.error(f"Failed to generate query embedding via vLLM: {e}")
168
+ raise
169
+
170
+ async def embed_batch(
171
+ self, texts: List[str], batch_size: Optional[int] = None
172
+ ) -> List[List[float]]:
173
+ """
174
+ Generate embeddings with batching for large document sets.
175
+
176
+ Args:
177
+ texts: List of text strings to embed
178
+ batch_size: Override default batch size (for vLLM, can handle large batches)
179
+
180
+ Returns:
181
+ List of embedding vectors
182
+ """
183
+ if not texts:
184
+ return []
185
+
186
+ # vLLM can handle large batches efficiently
187
+ effective_batch_size = batch_size or 100
188
+ all_embeddings = []
189
+
190
+ for i in range(0, len(texts), effective_batch_size):
191
+ batch = texts[i : i + effective_batch_size]
192
+ embeddings = await self._call_vllm_api(batch)
193
+ all_embeddings.extend(embeddings)
194
+
195
+ return all_embeddings
196
+
197
+ def get_model_info(self) -> dict:
198
+ """Get information about the vLLM embedding service"""
199
+ return {
200
+ "backend": "vllm",
201
+ "endpoint": self._endpoint,
202
+ "model_name": self._model,
203
+ "dimension": self._dimension,
204
+ }
205
+
206
+ async def close(self):
207
+ """Close HTTP client connection"""
208
+ await self._client.aclose()
209
+
210
+
211
+ # ============ Singleton Accessor ============
212
+
213
+ _vllm_embedding_service: Optional[VLLMEmbeddingService] = None
214
+
215
+
216
+ def get_vllm_embedding_service(
217
+ config: Optional["EmbeddingConfig"] = None,
218
+ ) -> VLLMEmbeddingService:
219
+ """
220
+ Get the singleton VLLMEmbeddingService instance.
221
+
222
+ Args:
223
+ config: Optional EmbeddingConfig (only used on first call)
224
+
225
+ Returns:
226
+ VLLMEmbeddingService singleton instance
227
+ """
228
+ global _vllm_embedding_service
229
+ if _vllm_embedding_service is None:
230
+ _vllm_embedding_service = VLLMEmbeddingService(config)
231
+ return _vllm_embedding_service
232
+
233
+
234
+ def reset_vllm_embedding_service() -> None:
235
+ """
236
+ Reset the singleton instance (for testing purposes).
237
+ """
238
+ global _vllm_embedding_service
239
+ if _vllm_embedding_service is not None:
240
+ _vllm_embedding_service._initialized = False
241
+ _vllm_embedding_service = None
242
+ VLLMEmbeddingService._instance = None
243
+ VLLMEmbeddingService._initialized = False
@@ -23,12 +23,16 @@ from agent_server.langchain.prompts import (
23
23
  )
24
24
  from agent_server.langchain.tools import (
25
25
  check_resource_tool,
26
+ diagnostics_tool,
27
+ edit_file_tool,
26
28
  execute_command_tool,
27
29
  final_answer_tool,
28
30
  jupyter_cell_tool,
29
31
  list_files_tool,
30
32
  markdown_tool,
33
+ multiedit_file_tool,
31
34
  read_file_tool,
35
+ references_tool,
32
36
  search_notebook_cells_tool,
33
37
  search_workspace_tool,
34
38
  write_file_tool,
@@ -45,11 +49,15 @@ def _get_all_tools():
45
49
  final_answer_tool,
46
50
  read_file_tool,
47
51
  write_file_tool,
52
+ edit_file_tool,
53
+ multiedit_file_tool,
48
54
  list_files_tool,
49
55
  search_workspace_tool,
50
56
  search_notebook_cells_tool,
51
57
  execute_command_tool,
52
58
  check_resource_tool,
59
+ diagnostics_tool,
60
+ references_tool,
53
61
  ]
54
62
 
55
63