hdsp-jupyter-extension 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. agent_server/__init__.py +8 -0
  2. agent_server/core/__init__.py +92 -0
  3. agent_server/core/api_key_manager.py +427 -0
  4. agent_server/core/code_validator.py +1238 -0
  5. agent_server/core/context_condenser.py +308 -0
  6. agent_server/core/embedding_service.py +254 -0
  7. agent_server/core/error_classifier.py +577 -0
  8. agent_server/core/llm_client.py +95 -0
  9. agent_server/core/llm_service.py +649 -0
  10. agent_server/core/notebook_generator.py +274 -0
  11. agent_server/core/prompt_builder.py +35 -0
  12. agent_server/core/rag_manager.py +742 -0
  13. agent_server/core/reflection_engine.py +489 -0
  14. agent_server/core/retriever.py +248 -0
  15. agent_server/core/state_verifier.py +452 -0
  16. agent_server/core/summary_generator.py +484 -0
  17. agent_server/core/task_manager.py +198 -0
  18. agent_server/knowledge/__init__.py +9 -0
  19. agent_server/knowledge/watchdog_service.py +352 -0
  20. agent_server/main.py +160 -0
  21. agent_server/prompts/__init__.py +60 -0
  22. agent_server/prompts/file_action_prompts.py +113 -0
  23. agent_server/routers/__init__.py +9 -0
  24. agent_server/routers/agent.py +591 -0
  25. agent_server/routers/chat.py +188 -0
  26. agent_server/routers/config.py +100 -0
  27. agent_server/routers/file_resolver.py +293 -0
  28. agent_server/routers/health.py +42 -0
  29. agent_server/routers/rag.py +163 -0
  30. agent_server/schemas/__init__.py +60 -0
  31. hdsp_agent_core/__init__.py +158 -0
  32. hdsp_agent_core/factory.py +252 -0
  33. hdsp_agent_core/interfaces.py +203 -0
  34. hdsp_agent_core/knowledge/__init__.py +31 -0
  35. hdsp_agent_core/knowledge/chunking.py +356 -0
  36. hdsp_agent_core/knowledge/libraries/dask.md +188 -0
  37. hdsp_agent_core/knowledge/libraries/matplotlib.md +164 -0
  38. hdsp_agent_core/knowledge/libraries/polars.md +68 -0
  39. hdsp_agent_core/knowledge/loader.py +337 -0
  40. hdsp_agent_core/llm/__init__.py +13 -0
  41. hdsp_agent_core/llm/service.py +556 -0
  42. hdsp_agent_core/managers/__init__.py +22 -0
  43. hdsp_agent_core/managers/config_manager.py +133 -0
  44. hdsp_agent_core/managers/session_manager.py +251 -0
  45. hdsp_agent_core/models/__init__.py +115 -0
  46. hdsp_agent_core/models/agent.py +316 -0
  47. hdsp_agent_core/models/chat.py +41 -0
  48. hdsp_agent_core/models/common.py +95 -0
  49. hdsp_agent_core/models/rag.py +368 -0
  50. hdsp_agent_core/prompts/__init__.py +63 -0
  51. hdsp_agent_core/prompts/auto_agent_prompts.py +1260 -0
  52. hdsp_agent_core/prompts/cell_action_prompts.py +98 -0
  53. hdsp_agent_core/services/__init__.py +18 -0
  54. hdsp_agent_core/services/agent_service.py +438 -0
  55. hdsp_agent_core/services/chat_service.py +205 -0
  56. hdsp_agent_core/services/rag_service.py +262 -0
  57. hdsp_agent_core/tests/__init__.py +1 -0
  58. hdsp_agent_core/tests/conftest.py +102 -0
  59. hdsp_agent_core/tests/test_factory.py +251 -0
  60. hdsp_agent_core/tests/test_services.py +326 -0
  61. hdsp_jupyter_extension-2.0.0.data/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +7 -0
  62. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/build_log.json +738 -0
  63. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/install.json +5 -0
  64. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/package.json +134 -0
  65. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2607ff74c74acfa83158.js +4369 -0
  66. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2607ff74c74acfa83158.js.map +1 -0
  67. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.622c1a5918b3aafb2315.js +12496 -0
  68. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.622c1a5918b3aafb2315.js.map +1 -0
  69. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +94 -0
  70. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +1 -0
  71. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +94 -0
  72. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +1 -0
  73. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.dae97cde171e13b8c834.js +623 -0
  74. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.dae97cde171e13b8c834.js.map +1 -0
  75. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/style.js +4 -0
  76. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +507 -0
  77. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +1 -0
  78. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js +2071 -0
  79. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js.map +1 -0
  80. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js +1059 -0
  81. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js.map +1 -0
  82. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +376 -0
  83. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +1 -0
  84. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +60336 -0
  85. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +1 -0
  86. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js +7132 -0
  87. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js.map +1 -0
  88. hdsp_jupyter_extension-2.0.0.dist-info/METADATA +152 -0
  89. hdsp_jupyter_extension-2.0.0.dist-info/RECORD +121 -0
  90. hdsp_jupyter_extension-2.0.0.dist-info/WHEEL +4 -0
  91. hdsp_jupyter_extension-2.0.0.dist-info/licenses/LICENSE +21 -0
  92. jupyter_ext/__init__.py +233 -0
  93. jupyter_ext/_version.py +4 -0
  94. jupyter_ext/config.py +111 -0
  95. jupyter_ext/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +7 -0
  96. jupyter_ext/handlers.py +632 -0
  97. jupyter_ext/labextension/build_log.json +738 -0
  98. jupyter_ext/labextension/package.json +134 -0
  99. jupyter_ext/labextension/static/frontend_styles_index_js.2607ff74c74acfa83158.js +4369 -0
  100. jupyter_ext/labextension/static/frontend_styles_index_js.2607ff74c74acfa83158.js.map +1 -0
  101. jupyter_ext/labextension/static/lib_index_js.622c1a5918b3aafb2315.js +12496 -0
  102. jupyter_ext/labextension/static/lib_index_js.622c1a5918b3aafb2315.js.map +1 -0
  103. jupyter_ext/labextension/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +94 -0
  104. jupyter_ext/labextension/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +1 -0
  105. jupyter_ext/labextension/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +94 -0
  106. jupyter_ext/labextension/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +1 -0
  107. jupyter_ext/labextension/static/remoteEntry.dae97cde171e13b8c834.js +623 -0
  108. jupyter_ext/labextension/static/remoteEntry.dae97cde171e13b8c834.js.map +1 -0
  109. jupyter_ext/labextension/static/style.js +4 -0
  110. jupyter_ext/labextension/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +507 -0
  111. jupyter_ext/labextension/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +1 -0
  112. jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js +2071 -0
  113. jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js.map +1 -0
  114. jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js +1059 -0
  115. jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js.map +1 -0
  116. jupyter_ext/labextension/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +376 -0
  117. jupyter_ext/labextension/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +1 -0
  118. jupyter_ext/labextension/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +60336 -0
  119. jupyter_ext/labextension/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +1 -0
  120. jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js +7132 -0
  121. jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js.map +1 -0
@@ -0,0 +1,308 @@
1
+ """
2
+ Context Condenser for intelligent context compression.
3
+
4
+ Provides token-aware context management with multiple compression strategies
5
+ to optimize LLM input while preserving important information.
6
+ """
7
+
8
+ import logging
9
+ from dataclasses import dataclass
10
+ from enum import Enum
11
+ from typing import Dict, List, Optional, Tuple
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class CompressionStrategy(Enum):
17
+ """Available compression strategies."""
18
+
19
+ TRUNCATE = "truncate" # Keep only recent messages
20
+ SUMMARIZE = "summarize" # Summarize old messages, keep recent
21
+ ADAPTIVE = "adaptive" # Auto-select based on context size
22
+
23
+
24
+ @dataclass
25
+ class CompressionStats:
26
+ """Statistics from a compression operation."""
27
+
28
+ original_tokens: int
29
+ compressed_tokens: int
30
+ compression_ratio: float
31
+ strategy_used: str
32
+ messages_kept: int
33
+ messages_removed: int
34
+
35
+
36
+ class ContextCondenser:
37
+ """
38
+ Context compressor - optimizes conversation context within token budget.
39
+
40
+ Supports multiple LLM providers with different token limits.
41
+ Uses rule-based compression without additional LLM calls.
42
+ """
43
+
44
+ # Provider-specific token limits for context
45
+ TOKEN_LIMITS = {
46
+ "gemini": 30000,
47
+ "openai": 4000,
48
+ "vllm": 8000,
49
+ "default": 4000,
50
+ }
51
+
52
+ # Token estimation: average tokens per word (conservative)
53
+ TOKENS_PER_WORD = 1.3
54
+
55
+ def __init__(self, provider: str = "default"):
56
+ """Initialize condenser with provider-specific settings.
57
+
58
+ Args:
59
+ provider: LLM provider name for token limit selection
60
+ """
61
+ self._provider = provider
62
+ self._stats_history: List[CompressionStats] = []
63
+
64
+ @property
65
+ def provider(self) -> str:
66
+ """Current LLM provider."""
67
+ return self._provider
68
+
69
+ @provider.setter
70
+ def provider(self, value: str) -> None:
71
+ """Update LLM provider."""
72
+ self._provider = value
73
+
74
+ def estimate_tokens(self, text: str) -> int:
75
+ """Estimate token count for text.
76
+
77
+ Uses word count with a conservative multiplier.
78
+ More accurate than character count for most text.
79
+
80
+ Args:
81
+ text: Input text to estimate
82
+
83
+ Returns:
84
+ Estimated token count
85
+ """
86
+ if not text:
87
+ return 0
88
+ words = len(text.split())
89
+ return int(words * self.TOKENS_PER_WORD)
90
+
91
+ def get_token_limit(self) -> int:
92
+ """Get token limit for current provider.
93
+
94
+ Returns:
95
+ Maximum tokens for context
96
+ """
97
+ return self.TOKEN_LIMITS.get(self._provider, self.TOKEN_LIMITS["default"])
98
+
99
+ def condense(
100
+ self,
101
+ messages: List[Dict[str, str]],
102
+ target_tokens: Optional[int] = None,
103
+ strategy: CompressionStrategy = CompressionStrategy.ADAPTIVE,
104
+ ) -> Tuple[List[Dict[str, str]], CompressionStats]:
105
+ """Compress message list to fit within token budget.
106
+
107
+ Args:
108
+ messages: List of message dicts with 'role' and 'content' keys
109
+ target_tokens: Target token count (default: 50% of provider limit)
110
+ strategy: Compression strategy to use
111
+
112
+ Returns:
113
+ Tuple of (compressed_messages, compression_stats)
114
+ """
115
+ if not messages:
116
+ return [], CompressionStats(0, 0, 1.0, "none", 0, 0)
117
+
118
+ target = target_tokens or (self.get_token_limit() // 2)
119
+ original_tokens = sum(
120
+ self.estimate_tokens(m.get("content", "")) for m in messages
121
+ )
122
+
123
+ # Already within budget - no compression needed
124
+ if original_tokens <= target:
125
+ stats = CompressionStats(
126
+ original_tokens=original_tokens,
127
+ compressed_tokens=original_tokens,
128
+ compression_ratio=1.0,
129
+ strategy_used="none",
130
+ messages_kept=len(messages),
131
+ messages_removed=0,
132
+ )
133
+ return messages, stats
134
+
135
+ # Select strategy if adaptive
136
+ if strategy == CompressionStrategy.ADAPTIVE:
137
+ strategy = self._select_strategy(original_tokens, target)
138
+
139
+ # Apply selected strategy
140
+ if strategy == CompressionStrategy.TRUNCATE:
141
+ compressed, stats = self._truncate(messages, target)
142
+ elif strategy == CompressionStrategy.SUMMARIZE:
143
+ compressed, stats = self._summarize(messages, target)
144
+ else:
145
+ compressed, stats = self._truncate(messages, target)
146
+
147
+ self._stats_history.append(stats)
148
+ logger.info(
149
+ f"Context compressed: {stats.original_tokens} → {stats.compressed_tokens} "
150
+ f"tokens ({stats.compression_ratio:.1%}), strategy={stats.strategy_used}"
151
+ )
152
+ return compressed, stats
153
+
154
+ def _select_strategy(self, original: int, target: int) -> CompressionStrategy:
155
+ """Select best compression strategy based on reduction needed.
156
+
157
+ Args:
158
+ original: Original token count
159
+ target: Target token count
160
+
161
+ Returns:
162
+ Selected compression strategy
163
+ """
164
+ ratio = target / original
165
+ # If we need to keep more than 50%, simple truncation works
166
+ if ratio >= 0.5:
167
+ return CompressionStrategy.TRUNCATE
168
+ # For more aggressive compression, use summarization
169
+ return CompressionStrategy.SUMMARIZE
170
+
171
+ def _truncate(
172
+ self, messages: List[Dict[str, str]], target: int
173
+ ) -> Tuple[List[Dict[str, str]], CompressionStats]:
174
+ """Keep only recent messages within token budget.
175
+
176
+ Preserves most recent messages, dropping oldest first.
177
+
178
+ Args:
179
+ messages: Original messages
180
+ target: Target token count
181
+
182
+ Returns:
183
+ Tuple of (truncated_messages, stats)
184
+ """
185
+ original_tokens = sum(
186
+ self.estimate_tokens(m.get("content", "")) for m in messages
187
+ )
188
+
189
+ # Keep messages from the end (most recent)
190
+ kept: List[Dict[str, str]] = []
191
+ current_tokens = 0
192
+
193
+ for msg in reversed(messages):
194
+ msg_tokens = self.estimate_tokens(msg.get("content", ""))
195
+ if current_tokens + msg_tokens <= target:
196
+ kept.insert(0, msg)
197
+ current_tokens += msg_tokens
198
+ else:
199
+ break
200
+
201
+ return kept, CompressionStats(
202
+ original_tokens=original_tokens,
203
+ compressed_tokens=current_tokens,
204
+ compression_ratio=(
205
+ current_tokens / original_tokens if original_tokens else 1.0
206
+ ),
207
+ strategy_used="truncate",
208
+ messages_kept=len(kept),
209
+ messages_removed=len(messages) - len(kept),
210
+ )
211
+
212
+ def _summarize(
213
+ self, messages: List[Dict[str, str]], target: int
214
+ ) -> Tuple[List[Dict[str, str]], CompressionStats]:
215
+ """Summarize old messages, keep recent ones intact.
216
+
217
+ Rule-based summarization without LLM calls.
218
+ Extracts first sentence from each old message.
219
+
220
+ Args:
221
+ messages: Original messages
222
+ target: Target token count
223
+
224
+ Returns:
225
+ Tuple of (summary + recent_messages, stats)
226
+ """
227
+ original_tokens = sum(
228
+ self.estimate_tokens(m.get("content", "")) for m in messages
229
+ )
230
+
231
+ # Keep last 3 messages intact
232
+ recent_count = min(3, len(messages))
233
+ recent = messages[-recent_count:]
234
+ old = messages[:-recent_count] if len(messages) > recent_count else []
235
+
236
+ recent_tokens = sum(self.estimate_tokens(m.get("content", "")) for m in recent)
237
+ remaining = target - recent_tokens
238
+
239
+ # If recent messages already exceed budget, fallback to truncate
240
+ if remaining <= 0 or not old:
241
+ return self._truncate(messages, target)
242
+
243
+ # Summarize old messages (extract first sentence, max 100 chars)
244
+ summary_parts = []
245
+ for msg in old:
246
+ content = msg.get("content", "")
247
+ # Get first sentence or first 100 chars
248
+ first_sentence = content.split(".")[0][:100]
249
+ if first_sentence:
250
+ role = "User" if msg.get("role") == "user" else "Assistant"
251
+ summary_parts.append(f"[{role}]: {first_sentence}...")
252
+
253
+ summary_text = "\n".join(summary_parts)
254
+ summary_tokens = self.estimate_tokens(summary_text)
255
+
256
+ # If summary exceeds remaining budget, fallback to truncate
257
+ if summary_tokens > remaining:
258
+ return self._truncate(messages, target)
259
+
260
+ # Combine summary with recent messages
261
+ summary_msg = {
262
+ "role": "system",
263
+ "content": f"[Previous conversation summary]\n{summary_text}",
264
+ }
265
+ result = [summary_msg] + recent
266
+
267
+ total_tokens = summary_tokens + recent_tokens
268
+ return result, CompressionStats(
269
+ original_tokens=original_tokens,
270
+ compressed_tokens=total_tokens,
271
+ compression_ratio=total_tokens / original_tokens
272
+ if original_tokens
273
+ else 1.0,
274
+ strategy_used="summarize",
275
+ messages_kept=len(recent),
276
+ messages_removed=len(old),
277
+ )
278
+
279
+ def get_stats_history(self) -> List[CompressionStats]:
280
+ """Get history of compression operations.
281
+
282
+ Returns:
283
+ List of CompressionStats from previous operations
284
+ """
285
+ return self._stats_history.copy()
286
+
287
+ def clear_stats_history(self) -> None:
288
+ """Clear compression statistics history."""
289
+ self._stats_history.clear()
290
+
291
+
292
+ # Singleton accessor
293
+ _context_condenser: Optional[ContextCondenser] = None
294
+
295
+
296
+ def get_context_condenser(provider: str = "default") -> ContextCondenser:
297
+ """Get or create singleton ContextCondenser instance.
298
+
299
+ Args:
300
+ provider: LLM provider name (only used on first call)
301
+
302
+ Returns:
303
+ Singleton ContextCondenser instance
304
+ """
305
+ global _context_condenser
306
+ if _context_condenser is None:
307
+ _context_condenser = ContextCondenser(provider)
308
+ return _context_condenser
@@ -0,0 +1,254 @@
1
+ """
2
+ Local Embedding Service - Wraps sentence-transformers for local embedding generation.
3
+
4
+ Features:
5
+ - Zero external API calls (data sovereignty)
6
+ - Lazy model loading (only when first needed)
7
+ - Thread-safe singleton pattern
8
+ - Configurable model and device
9
+ - E5 model prefix handling for optimal performance
10
+
11
+ Default model: intfloat/multilingual-e5-small (384 dimensions, Korean support)
12
+ """
13
+
14
+ import logging
15
+ from typing import TYPE_CHECKING, List, Optional
16
+
17
+ if TYPE_CHECKING:
18
+ from hdsp_agent_core.models.rag import EmbeddingConfig
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class EmbeddingService:
24
+ """
25
+ Local embedding generation using sentence-transformers.
26
+
27
+ Design Principles:
28
+ - No external API calls (data sovereignty)
29
+ - Lazy model loading (only when needed)
30
+ - Thread-safe singleton pattern
31
+ - Configurable model and device
32
+
33
+ Usage:
34
+ service = get_embedding_service()
35
+ embeddings = service.embed_texts(["text1", "text2"])
36
+ query_embedding = service.embed_query("search query")
37
+ """
38
+
39
+ _instance: Optional["EmbeddingService"] = None
40
+ _initialized: bool = False
41
+
42
+ def __new__(cls, *args, **kwargs):
43
+ if cls._instance is None:
44
+ cls._instance = super().__new__(cls)
45
+ return cls._instance
46
+
47
+ def __init__(self, config: Optional["EmbeddingConfig"] = None):
48
+ if self._initialized:
49
+ return
50
+ self._initialized = True
51
+
52
+ from hdsp_agent_core.models.rag import EmbeddingConfig
53
+
54
+ self._config = config or EmbeddingConfig()
55
+ self._model = None
56
+ self._dimension: Optional[int] = None
57
+ self._is_e5_model: bool = False
58
+
59
+ @property
60
+ def model(self):
61
+ """Lazy load the embedding model"""
62
+ if self._model is None:
63
+ self._load_model()
64
+ return self._model
65
+
66
+ def _load_model(self) -> None:
67
+ """Load the sentence-transformers model"""
68
+ try:
69
+ from sentence_transformers import SentenceTransformer
70
+ except ImportError:
71
+ raise ImportError(
72
+ "sentence-transformers is required for RAG. "
73
+ "Install with: pip install sentence-transformers"
74
+ )
75
+
76
+ model_name = self._config.get_model_name()
77
+ device = self._config.get_device()
78
+
79
+ logger.info(f"Loading embedding model: {model_name} on {device}")
80
+
81
+ try:
82
+ self._model = SentenceTransformer(
83
+ model_name, device=device, cache_folder=self._config.cache_folder
84
+ )
85
+ self._dimension = self._model.get_sentence_embedding_dimension()
86
+
87
+ # Check if E5 model (requires special prefix)
88
+ self._is_e5_model = "e5" in model_name.lower()
89
+
90
+ logger.info(
91
+ f"Embedding model loaded successfully. "
92
+ f"Dimension: {self._dimension}, E5 model: {self._is_e5_model}"
93
+ )
94
+ except Exception as e:
95
+ logger.error(f"Failed to load embedding model: {e}")
96
+ raise
97
+
98
+ @property
99
+ def dimension(self) -> int:
100
+ """Get embedding dimension (triggers model load if needed)"""
101
+ if self._dimension is None:
102
+ _ = self.model # Trigger lazy load
103
+ return self._dimension
104
+
105
+ def _prepare_texts(self, texts: List[str], is_query: bool = False) -> List[str]:
106
+ """
107
+ Prepare texts for embedding, adding E5 prefixes if needed.
108
+
109
+ E5 models require specific prefixes:
110
+ - "query: " for search queries
111
+ - "passage: " for documents/passages
112
+ """
113
+ if not self._is_e5_model:
114
+ return texts
115
+
116
+ prefix = "query: " if is_query else "passage: "
117
+ return [prefix + text for text in texts]
118
+
119
+ def embed_texts(self, texts: List[str]) -> List[List[float]]:
120
+ """
121
+ Generate embeddings for a list of texts (documents/passages).
122
+
123
+ Args:
124
+ texts: List of text strings to embed
125
+
126
+ Returns:
127
+ List of embedding vectors (as lists of floats)
128
+ """
129
+ if not texts:
130
+ return []
131
+
132
+ # Prepare texts with prefix if E5 model
133
+ prepared_texts = self._prepare_texts(texts, is_query=False)
134
+
135
+ try:
136
+ embeddings = self.model.encode(
137
+ prepared_texts,
138
+ batch_size=self._config.batch_size,
139
+ show_progress_bar=len(texts) > 100,
140
+ convert_to_numpy=True,
141
+ normalize_embeddings=self._config.normalize_embeddings,
142
+ )
143
+ return embeddings.tolist()
144
+ except Exception as e:
145
+ logger.error(f"Failed to generate embeddings: {e}")
146
+ raise
147
+
148
+ def embed_query(self, query: str) -> List[float]:
149
+ """
150
+ Generate embedding for a single query.
151
+
152
+ Uses "query: " prefix for E5 models to optimize search retrieval.
153
+
154
+ Args:
155
+ query: Query string
156
+
157
+ Returns:
158
+ Embedding vector as list of floats
159
+ """
160
+ if not query:
161
+ raise ValueError("Query cannot be empty")
162
+
163
+ # Prepare query with prefix if E5 model
164
+ prepared_query = self._prepare_texts([query], is_query=True)[0]
165
+
166
+ try:
167
+ embedding = self.model.encode(
168
+ prepared_query,
169
+ convert_to_numpy=True,
170
+ normalize_embeddings=self._config.normalize_embeddings,
171
+ )
172
+ return embedding.tolist()
173
+ except Exception as e:
174
+ logger.error(f"Failed to generate query embedding: {e}")
175
+ raise
176
+
177
+ def embed_batch(
178
+ self, texts: List[str], batch_size: Optional[int] = None
179
+ ) -> List[List[float]]:
180
+ """
181
+ Generate embeddings with custom batch size for large document sets.
182
+
183
+ Args:
184
+ texts: List of text strings to embed
185
+ batch_size: Override default batch size
186
+
187
+ Returns:
188
+ List of embedding vectors
189
+ """
190
+ if not texts:
191
+ return []
192
+
193
+ prepared_texts = self._prepare_texts(texts, is_query=False)
194
+ effective_batch_size = batch_size or self._config.batch_size
195
+
196
+ try:
197
+ embeddings = self.model.encode(
198
+ prepared_texts,
199
+ batch_size=effective_batch_size,
200
+ show_progress_bar=True,
201
+ convert_to_numpy=True,
202
+ normalize_embeddings=self._config.normalize_embeddings,
203
+ )
204
+ return embeddings.tolist()
205
+ except Exception as e:
206
+ logger.error(f"Failed to generate batch embeddings: {e}")
207
+ raise
208
+
209
+ def get_model_info(self) -> dict:
210
+ """Get information about the loaded model"""
211
+ return {
212
+ "model_name": self._config.get_model_name(),
213
+ "dimension": self.dimension,
214
+ "device": self._config.get_device(),
215
+ "is_e5_model": self._is_e5_model,
216
+ "normalize_embeddings": self._config.normalize_embeddings,
217
+ "loaded": self._model is not None,
218
+ }
219
+
220
+
221
+ # ============ Singleton Accessor ============
222
+
223
+ _embedding_service: Optional[EmbeddingService] = None
224
+
225
+
226
+ def get_embedding_service(
227
+ config: Optional["EmbeddingConfig"] = None,
228
+ ) -> EmbeddingService:
229
+ """
230
+ Get the singleton EmbeddingService instance.
231
+
232
+ Args:
233
+ config: Optional EmbeddingConfig (only used on first call)
234
+
235
+ Returns:
236
+ EmbeddingService singleton instance
237
+ """
238
+ global _embedding_service
239
+ if _embedding_service is None:
240
+ _embedding_service = EmbeddingService(config)
241
+ return _embedding_service
242
+
243
+
244
+ def reset_embedding_service() -> None:
245
+ """
246
+ Reset the singleton instance (for testing purposes).
247
+ """
248
+ global _embedding_service
249
+ if _embedding_service is not None:
250
+ _embedding_service._initialized = False
251
+ _embedding_service._model = None
252
+ _embedding_service = None
253
+ EmbeddingService._instance = None
254
+ EmbeddingService._initialized = False