claude-code-workflow 6.2.7 → 6.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/CLAUDE.md +16 -1
- package/.claude/workflows/cli-templates/protocols/analysis-protocol.md +11 -4
- package/.claude/workflows/cli-templates/protocols/write-protocol.md +10 -75
- package/.claude/workflows/cli-tools-usage.md +14 -24
- package/.codex/AGENTS.md +51 -1
- package/.codex/prompts/compact.md +378 -0
- package/.gemini/GEMINI.md +57 -20
- package/ccw/dist/cli.d.ts.map +1 -1
- package/ccw/dist/cli.js +21 -8
- package/ccw/dist/cli.js.map +1 -1
- package/ccw/dist/commands/cli.d.ts +2 -0
- package/ccw/dist/commands/cli.d.ts.map +1 -1
- package/ccw/dist/commands/cli.js +129 -8
- package/ccw/dist/commands/cli.js.map +1 -1
- package/ccw/dist/commands/hook.d.ts.map +1 -1
- package/ccw/dist/commands/hook.js +3 -2
- package/ccw/dist/commands/hook.js.map +1 -1
- package/ccw/dist/config/litellm-api-config-manager.d.ts +180 -0
- package/ccw/dist/config/litellm-api-config-manager.d.ts.map +1 -0
- package/ccw/dist/config/litellm-api-config-manager.js +770 -0
- package/ccw/dist/config/litellm-api-config-manager.js.map +1 -0
- package/ccw/dist/config/provider-models.d.ts +73 -0
- package/ccw/dist/config/provider-models.d.ts.map +1 -0
- package/ccw/dist/config/provider-models.js +172 -0
- package/ccw/dist/config/provider-models.js.map +1 -0
- package/ccw/dist/core/cache-manager.d.ts.map +1 -1
- package/ccw/dist/core/cache-manager.js +3 -5
- package/ccw/dist/core/cache-manager.js.map +1 -1
- package/ccw/dist/core/dashboard-generator.d.ts.map +1 -1
- package/ccw/dist/core/dashboard-generator.js +3 -1
- package/ccw/dist/core/dashboard-generator.js.map +1 -1
- package/ccw/dist/core/routes/cli-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/cli-routes.js +169 -0
- package/ccw/dist/core/routes/cli-routes.js.map +1 -1
- package/ccw/dist/core/routes/codexlens-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/codexlens-routes.js +234 -18
- package/ccw/dist/core/routes/codexlens-routes.js.map +1 -1
- package/ccw/dist/core/routes/hooks-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/hooks-routes.js +30 -32
- package/ccw/dist/core/routes/hooks-routes.js.map +1 -1
- package/ccw/dist/core/routes/litellm-api-routes.d.ts +21 -0
- package/ccw/dist/core/routes/litellm-api-routes.d.ts.map +1 -0
- package/ccw/dist/core/routes/litellm-api-routes.js +780 -0
- package/ccw/dist/core/routes/litellm-api-routes.js.map +1 -0
- package/ccw/dist/core/routes/litellm-routes.d.ts +20 -0
- package/ccw/dist/core/routes/litellm-routes.d.ts.map +1 -0
- package/ccw/dist/core/routes/litellm-routes.js +85 -0
- package/ccw/dist/core/routes/litellm-routes.js.map +1 -0
- package/ccw/dist/core/routes/mcp-routes.js +2 -2
- package/ccw/dist/core/routes/mcp-routes.js.map +1 -1
- package/ccw/dist/core/routes/status-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/status-routes.js +39 -0
- package/ccw/dist/core/routes/status-routes.js.map +1 -1
- package/ccw/dist/core/routes/system-routes.js +1 -1
- package/ccw/dist/core/routes/system-routes.js.map +1 -1
- package/ccw/dist/core/server.d.ts.map +1 -1
- package/ccw/dist/core/server.js +15 -1
- package/ccw/dist/core/server.js.map +1 -1
- package/ccw/dist/mcp-server/index.js +1 -1
- package/ccw/dist/mcp-server/index.js.map +1 -1
- package/ccw/dist/tools/claude-cli-tools.d.ts +82 -0
- package/ccw/dist/tools/claude-cli-tools.d.ts.map +1 -0
- package/ccw/dist/tools/claude-cli-tools.js +216 -0
- package/ccw/dist/tools/claude-cli-tools.js.map +1 -0
- package/ccw/dist/tools/cli-executor.d.ts.map +1 -1
- package/ccw/dist/tools/cli-executor.js +76 -14
- package/ccw/dist/tools/cli-executor.js.map +1 -1
- package/ccw/dist/tools/codex-lens.d.ts +9 -2
- package/ccw/dist/tools/codex-lens.d.ts.map +1 -1
- package/ccw/dist/tools/codex-lens.js +114 -9
- package/ccw/dist/tools/codex-lens.js.map +1 -1
- package/ccw/dist/tools/context-cache-store.d.ts +136 -0
- package/ccw/dist/tools/context-cache-store.d.ts.map +1 -0
- package/ccw/dist/tools/context-cache-store.js +256 -0
- package/ccw/dist/tools/context-cache-store.js.map +1 -0
- package/ccw/dist/tools/context-cache.d.ts +56 -0
- package/ccw/dist/tools/context-cache.d.ts.map +1 -0
- package/ccw/dist/tools/context-cache.js +294 -0
- package/ccw/dist/tools/context-cache.js.map +1 -0
- package/ccw/dist/tools/core-memory.d.ts.map +1 -1
- package/ccw/dist/tools/core-memory.js +33 -19
- package/ccw/dist/tools/core-memory.js.map +1 -1
- package/ccw/dist/tools/index.d.ts.map +1 -1
- package/ccw/dist/tools/index.js +2 -0
- package/ccw/dist/tools/index.js.map +1 -1
- package/ccw/dist/tools/litellm-client.d.ts +85 -0
- package/ccw/dist/tools/litellm-client.d.ts.map +1 -0
- package/ccw/dist/tools/litellm-client.js +188 -0
- package/ccw/dist/tools/litellm-client.js.map +1 -0
- package/ccw/dist/tools/litellm-executor.d.ts +34 -0
- package/ccw/dist/tools/litellm-executor.d.ts.map +1 -0
- package/ccw/dist/tools/litellm-executor.js +192 -0
- package/ccw/dist/tools/litellm-executor.js.map +1 -0
- package/ccw/dist/tools/pattern-parser.d.ts +55 -0
- package/ccw/dist/tools/pattern-parser.d.ts.map +1 -0
- package/ccw/dist/tools/pattern-parser.js +237 -0
- package/ccw/dist/tools/pattern-parser.js.map +1 -0
- package/ccw/dist/tools/smart-search.d.ts +1 -0
- package/ccw/dist/tools/smart-search.d.ts.map +1 -1
- package/ccw/dist/tools/smart-search.js +117 -41
- package/ccw/dist/tools/smart-search.js.map +1 -1
- package/ccw/dist/types/litellm-api-config.d.ts +294 -0
- package/ccw/dist/types/litellm-api-config.d.ts.map +1 -0
- package/ccw/dist/types/litellm-api-config.js +8 -0
- package/ccw/dist/types/litellm-api-config.js.map +1 -0
- package/ccw/src/cli.ts +258 -244
- package/ccw/src/commands/cli.ts +153 -9
- package/ccw/src/commands/hook.ts +3 -2
- package/ccw/src/config/.litellm-api-config-manager.ts.2025-12-23T11-57-43-727Z.bak +441 -0
- package/ccw/src/config/litellm-api-config-manager.ts +1012 -0
- package/ccw/src/config/provider-models.ts +222 -0
- package/ccw/src/core/cache-manager.ts +292 -294
- package/ccw/src/core/dashboard-generator.ts +3 -1
- package/ccw/src/core/routes/cli-routes.ts +192 -0
- package/ccw/src/core/routes/codexlens-routes.ts +241 -19
- package/ccw/src/core/routes/hooks-routes.ts +399 -405
- package/ccw/src/core/routes/litellm-api-routes.ts +930 -0
- package/ccw/src/core/routes/litellm-routes.ts +107 -0
- package/ccw/src/core/routes/mcp-routes.ts +1271 -1271
- package/ccw/src/core/routes/status-routes.ts +51 -0
- package/ccw/src/core/routes/system-routes.ts +1 -1
- package/ccw/src/core/server.ts +15 -1
- package/ccw/src/mcp-server/index.ts +1 -1
- package/ccw/src/templates/dashboard-css/12-cli-legacy.css +44 -0
- package/ccw/src/templates/dashboard-css/31-api-settings.css +2265 -0
- package/ccw/src/templates/dashboard-js/components/cli-history.js +15 -8
- package/ccw/src/templates/dashboard-js/components/cli-status.js +323 -9
- package/ccw/src/templates/dashboard-js/components/navigation.js +329 -313
- package/ccw/src/templates/dashboard-js/i18n.js +583 -1
- package/ccw/src/templates/dashboard-js/views/api-settings.js +3362 -0
- package/ccw/src/templates/dashboard-js/views/cli-manager.js +199 -24
- package/ccw/src/templates/dashboard-js/views/codexlens-manager.js +1265 -27
- package/ccw/src/templates/dashboard.html +840 -831
- package/ccw/src/tools/claude-cli-tools.ts +300 -0
- package/ccw/src/tools/cli-executor.ts +83 -14
- package/ccw/src/tools/codex-lens.ts +146 -9
- package/ccw/src/tools/context-cache-store.ts +368 -0
- package/ccw/src/tools/context-cache.ts +393 -0
- package/ccw/src/tools/core-memory.ts +33 -19
- package/ccw/src/tools/index.ts +2 -0
- package/ccw/src/tools/litellm-client.ts +246 -0
- package/ccw/src/tools/litellm-executor.ts +241 -0
- package/ccw/src/tools/pattern-parser.ts +329 -0
- package/ccw/src/tools/smart-search.ts +142 -41
- package/ccw/src/types/litellm-api-config.ts +402 -0
- package/ccw-litellm/README.md +180 -0
- package/ccw-litellm/pyproject.toml +35 -0
- package/ccw-litellm/src/ccw_litellm/__init__.py +47 -0
- package/ccw-litellm/src/ccw_litellm/__pycache__/__init__.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/__pycache__/cli.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/cli.py +108 -0
- package/ccw-litellm/src/ccw_litellm/clients/__init__.py +12 -0
- package/ccw-litellm/src/ccw_litellm/clients/__pycache__/__init__.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/clients/__pycache__/litellm_embedder.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/clients/__pycache__/litellm_llm.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/clients/litellm_embedder.py +251 -0
- package/ccw-litellm/src/ccw_litellm/clients/litellm_llm.py +165 -0
- package/ccw-litellm/src/ccw_litellm/config/__init__.py +22 -0
- package/ccw-litellm/src/ccw_litellm/config/__pycache__/__init__.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/config/__pycache__/loader.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/config/__pycache__/models.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/config/loader.py +316 -0
- package/ccw-litellm/src/ccw_litellm/config/models.py +130 -0
- package/ccw-litellm/src/ccw_litellm/interfaces/__init__.py +14 -0
- package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/__init__.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/embedder.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/llm.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/interfaces/embedder.py +52 -0
- package/ccw-litellm/src/ccw_litellm/interfaces/llm.py +45 -0
- package/codex-lens/src/codexlens/__pycache__/config.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/commands.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/embedding_manager.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/model_manager.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/output.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/commands.py +378 -23
- package/codex-lens/src/codexlens/cli/embedding_manager.py +660 -56
- package/codex-lens/src/codexlens/cli/model_manager.py +31 -18
- package/codex-lens/src/codexlens/cli/output.py +12 -1
- package/codex-lens/src/codexlens/config.py +93 -0
- package/codex-lens/src/codexlens/search/__pycache__/chain_search.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/hybrid_search.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/ranking.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/chain_search.py +6 -2
- package/codex-lens/src/codexlens/search/hybrid_search.py +44 -21
- package/codex-lens/src/codexlens/search/ranking.py +1 -1
- package/codex-lens/src/codexlens/semantic/__init__.py +42 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/base.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/chunker.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/embedder.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/factory.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/gpu_support.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/litellm_embedder.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/vector_store.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/base.py +61 -0
- package/codex-lens/src/codexlens/semantic/chunker.py +43 -20
- package/codex-lens/src/codexlens/semantic/embedder.py +60 -13
- package/codex-lens/src/codexlens/semantic/factory.py +98 -0
- package/codex-lens/src/codexlens/semantic/gpu_support.py +225 -3
- package/codex-lens/src/codexlens/semantic/litellm_embedder.py +144 -0
- package/codex-lens/src/codexlens/semantic/rotational_embedder.py +434 -0
- package/codex-lens/src/codexlens/semantic/vector_store.py +33 -8
- package/codex-lens/src/codexlens/storage/__pycache__/path_mapper.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_004_dual_fts.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/path_mapper.py +27 -1
- package/package.json +15 -5
- package/.codex/prompts.zip +0 -0
- package/ccw/package.json +0 -65
|
@@ -14,7 +14,8 @@ from typing import Dict, Iterable, List, Optional
|
|
|
14
14
|
import numpy as np
|
|
15
15
|
|
|
16
16
|
from . import SEMANTIC_AVAILABLE
|
|
17
|
-
from .
|
|
17
|
+
from .base import BaseEmbedder
|
|
18
|
+
from .gpu_support import get_optimal_providers, is_gpu_available, get_gpu_summary, get_selected_device_id
|
|
18
19
|
|
|
19
20
|
logger = logging.getLogger(__name__)
|
|
20
21
|
|
|
@@ -84,7 +85,7 @@ def clear_embedder_cache() -> None:
|
|
|
84
85
|
gc.collect()
|
|
85
86
|
|
|
86
87
|
|
|
87
|
-
class Embedder:
|
|
88
|
+
class Embedder(BaseEmbedder):
|
|
88
89
|
"""Generate embeddings for code chunks using fastembed (ONNX-based).
|
|
89
90
|
|
|
90
91
|
Supported Model Profiles:
|
|
@@ -138,25 +139,58 @@ class Embedder:
|
|
|
138
139
|
|
|
139
140
|
# Resolve model name from profile or use explicit name
|
|
140
141
|
if model_name:
|
|
141
|
-
self.
|
|
142
|
+
self._model_name = model_name
|
|
142
143
|
elif profile and profile in self.MODELS:
|
|
143
|
-
self.
|
|
144
|
+
self._model_name = self.MODELS[profile]
|
|
144
145
|
else:
|
|
145
|
-
self.
|
|
146
|
+
self._model_name = self.DEFAULT_MODEL
|
|
146
147
|
|
|
147
|
-
# Configure ONNX execution providers
|
|
148
|
+
# Configure ONNX execution providers with device_id options for GPU selection
|
|
149
|
+
# Using with_device_options=True ensures DirectML/CUDA device_id is passed correctly
|
|
148
150
|
if providers is not None:
|
|
149
151
|
self._providers = providers
|
|
150
152
|
else:
|
|
151
|
-
self._providers = get_optimal_providers(use_gpu=use_gpu)
|
|
153
|
+
self._providers = get_optimal_providers(use_gpu=use_gpu, with_device_options=True)
|
|
152
154
|
|
|
153
155
|
self._use_gpu = use_gpu
|
|
154
156
|
self._model = None
|
|
155
157
|
|
|
158
|
+
@property
|
|
159
|
+
def model_name(self) -> str:
|
|
160
|
+
"""Get model name."""
|
|
161
|
+
return self._model_name
|
|
162
|
+
|
|
156
163
|
@property
|
|
157
164
|
def embedding_dim(self) -> int:
|
|
158
165
|
"""Get embedding dimension for current model."""
|
|
159
|
-
return self.MODEL_DIMS.get(self.
|
|
166
|
+
return self.MODEL_DIMS.get(self._model_name, 768) # Default to 768 if unknown
|
|
167
|
+
|
|
168
|
+
@property
|
|
169
|
+
def max_tokens(self) -> int:
|
|
170
|
+
"""Get maximum token limit for current model.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
int: Maximum number of tokens based on model profile.
|
|
174
|
+
- fast: 512 (lightweight, optimized for speed)
|
|
175
|
+
- code: 8192 (code-optimized, larger context)
|
|
176
|
+
- multilingual: 512 (standard multilingual model)
|
|
177
|
+
- balanced: 512 (general purpose)
|
|
178
|
+
"""
|
|
179
|
+
# Determine profile from model name
|
|
180
|
+
profile = None
|
|
181
|
+
for prof, model in self.MODELS.items():
|
|
182
|
+
if model == self._model_name:
|
|
183
|
+
profile = prof
|
|
184
|
+
break
|
|
185
|
+
|
|
186
|
+
# Return token limit based on profile
|
|
187
|
+
if profile == "code":
|
|
188
|
+
return 8192
|
|
189
|
+
elif profile in ("fast", "multilingual", "balanced"):
|
|
190
|
+
return 512
|
|
191
|
+
else:
|
|
192
|
+
# Default for unknown models
|
|
193
|
+
return 512
|
|
160
194
|
|
|
161
195
|
@property
|
|
162
196
|
def providers(self) -> List[str]:
|
|
@@ -168,7 +202,12 @@ class Embedder:
|
|
|
168
202
|
"""Check if GPU acceleration is enabled for this embedder."""
|
|
169
203
|
gpu_providers = {"CUDAExecutionProvider", "TensorrtExecutionProvider",
|
|
170
204
|
"DmlExecutionProvider", "ROCMExecutionProvider", "CoreMLExecutionProvider"}
|
|
171
|
-
|
|
205
|
+
# Handle both string providers and tuple providers (name, options)
|
|
206
|
+
for p in self._providers:
|
|
207
|
+
provider_name = p[0] if isinstance(p, tuple) else p
|
|
208
|
+
if provider_name in gpu_providers:
|
|
209
|
+
return True
|
|
210
|
+
return False
|
|
172
211
|
|
|
173
212
|
def _load_model(self) -> None:
|
|
174
213
|
"""Lazy load the embedding model with configured providers."""
|
|
@@ -177,7 +216,9 @@ class Embedder:
|
|
|
177
216
|
|
|
178
217
|
from fastembed import TextEmbedding
|
|
179
218
|
|
|
180
|
-
#
|
|
219
|
+
# providers already include device_id options via get_optimal_providers(with_device_options=True)
|
|
220
|
+
# DO NOT pass device_ids separately - fastembed ignores it when providers is specified
|
|
221
|
+
# See: fastembed/text/onnx_embedding.py - device_ids is only used with cuda=True
|
|
181
222
|
try:
|
|
182
223
|
self._model = TextEmbedding(
|
|
183
224
|
model_name=self.model_name,
|
|
@@ -215,7 +256,7 @@ class Embedder:
|
|
|
215
256
|
embeddings = list(self._model.embed(texts))
|
|
216
257
|
return [emb.tolist() for emb in embeddings]
|
|
217
258
|
|
|
218
|
-
def embed_to_numpy(self, texts: str | Iterable[str]) -> np.ndarray:
|
|
259
|
+
def embed_to_numpy(self, texts: str | Iterable[str], batch_size: Optional[int] = None) -> np.ndarray:
|
|
219
260
|
"""Generate embeddings for one or more texts (returns numpy arrays).
|
|
220
261
|
|
|
221
262
|
This method is more memory-efficient than embed() as it avoids converting
|
|
@@ -224,6 +265,8 @@ class Embedder:
|
|
|
224
265
|
|
|
225
266
|
Args:
|
|
226
267
|
texts: Single text or iterable of texts to embed.
|
|
268
|
+
batch_size: Optional batch size for fastembed processing.
|
|
269
|
+
Larger values improve GPU utilization but use more memory.
|
|
227
270
|
|
|
228
271
|
Returns:
|
|
229
272
|
numpy.ndarray of shape (n_texts, embedding_dim) containing embeddings.
|
|
@@ -235,8 +278,12 @@ class Embedder:
|
|
|
235
278
|
else:
|
|
236
279
|
texts = list(texts)
|
|
237
280
|
|
|
238
|
-
#
|
|
239
|
-
|
|
281
|
+
# Pass batch_size to fastembed for optimal GPU utilization
|
|
282
|
+
# Default batch_size in fastembed is 256, but larger values can improve throughput
|
|
283
|
+
if batch_size is not None:
|
|
284
|
+
embeddings = list(self._model.embed(texts, batch_size=batch_size))
|
|
285
|
+
else:
|
|
286
|
+
embeddings = list(self._model.embed(texts))
|
|
240
287
|
return np.array(embeddings)
|
|
241
288
|
|
|
242
289
|
def embed_single(self, text: str) -> List[float]:
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Factory for creating embedders.
|
|
2
|
+
|
|
3
|
+
Provides a unified interface for instantiating different embedder backends.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
from .base import BaseEmbedder
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_embedder(
|
|
14
|
+
backend: str = "fastembed",
|
|
15
|
+
profile: str = "code",
|
|
16
|
+
model: str = "default",
|
|
17
|
+
use_gpu: bool = True,
|
|
18
|
+
endpoints: Optional[List[Dict[str, Any]]] = None,
|
|
19
|
+
strategy: str = "latency_aware",
|
|
20
|
+
cooldown: float = 60.0,
|
|
21
|
+
**kwargs: Any,
|
|
22
|
+
) -> BaseEmbedder:
|
|
23
|
+
"""Factory function to create embedder based on backend.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
backend: Embedder backend to use. Options:
|
|
27
|
+
- "fastembed": Use fastembed (ONNX-based) embedder (default)
|
|
28
|
+
- "litellm": Use ccw-litellm embedder
|
|
29
|
+
profile: Model profile for fastembed backend ("fast", "code", "multilingual", "balanced")
|
|
30
|
+
Used only when backend="fastembed". Default: "code"
|
|
31
|
+
model: Model identifier for litellm backend.
|
|
32
|
+
Used only when backend="litellm". Default: "default"
|
|
33
|
+
use_gpu: Whether to use GPU acceleration when available (default: True).
|
|
34
|
+
Used only when backend="fastembed".
|
|
35
|
+
endpoints: Optional list of endpoint configurations for multi-endpoint load balancing.
|
|
36
|
+
Each endpoint is a dict with keys: model, api_key, api_base, weight.
|
|
37
|
+
Used only when backend="litellm" and multiple endpoints provided.
|
|
38
|
+
strategy: Selection strategy for multi-endpoint mode:
|
|
39
|
+
"round_robin", "latency_aware", "weighted_random".
|
|
40
|
+
Default: "latency_aware"
|
|
41
|
+
cooldown: Default cooldown seconds for rate-limited endpoints (default: 60.0)
|
|
42
|
+
**kwargs: Additional backend-specific arguments
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
BaseEmbedder: Configured embedder instance
|
|
46
|
+
|
|
47
|
+
Raises:
|
|
48
|
+
ValueError: If backend is not recognized
|
|
49
|
+
ImportError: If required backend dependencies are not installed
|
|
50
|
+
|
|
51
|
+
Examples:
|
|
52
|
+
Create fastembed embedder with code profile:
|
|
53
|
+
>>> embedder = get_embedder(backend="fastembed", profile="code")
|
|
54
|
+
|
|
55
|
+
Create fastembed embedder with fast profile and CPU only:
|
|
56
|
+
>>> embedder = get_embedder(backend="fastembed", profile="fast", use_gpu=False)
|
|
57
|
+
|
|
58
|
+
Create litellm embedder:
|
|
59
|
+
>>> embedder = get_embedder(backend="litellm", model="text-embedding-3-small")
|
|
60
|
+
|
|
61
|
+
Create rotational embedder with multiple endpoints:
|
|
62
|
+
>>> endpoints = [
|
|
63
|
+
... {"model": "openai/text-embedding-3-small", "api_key": "sk-..."},
|
|
64
|
+
... {"model": "azure/my-embedding", "api_base": "https://...", "api_key": "..."},
|
|
65
|
+
... ]
|
|
66
|
+
>>> embedder = get_embedder(backend="litellm", endpoints=endpoints)
|
|
67
|
+
"""
|
|
68
|
+
if backend == "fastembed":
|
|
69
|
+
from .embedder import Embedder
|
|
70
|
+
return Embedder(profile=profile, use_gpu=use_gpu, **kwargs)
|
|
71
|
+
elif backend == "litellm":
|
|
72
|
+
# Check if multi-endpoint mode is requested
|
|
73
|
+
if endpoints and len(endpoints) > 1:
|
|
74
|
+
from .rotational_embedder import create_rotational_embedder
|
|
75
|
+
return create_rotational_embedder(
|
|
76
|
+
endpoints_config=endpoints,
|
|
77
|
+
strategy=strategy,
|
|
78
|
+
default_cooldown=cooldown,
|
|
79
|
+
)
|
|
80
|
+
elif endpoints and len(endpoints) == 1:
|
|
81
|
+
# Single endpoint in list - use it directly
|
|
82
|
+
ep = endpoints[0]
|
|
83
|
+
ep_kwargs = {**kwargs}
|
|
84
|
+
if "api_key" in ep:
|
|
85
|
+
ep_kwargs["api_key"] = ep["api_key"]
|
|
86
|
+
if "api_base" in ep:
|
|
87
|
+
ep_kwargs["api_base"] = ep["api_base"]
|
|
88
|
+
from .litellm_embedder import LiteLLMEmbedderWrapper
|
|
89
|
+
return LiteLLMEmbedderWrapper(model=ep["model"], **ep_kwargs)
|
|
90
|
+
else:
|
|
91
|
+
# No endpoints list - use model parameter
|
|
92
|
+
from .litellm_embedder import LiteLLMEmbedderWrapper
|
|
93
|
+
return LiteLLMEmbedderWrapper(model=model, **kwargs)
|
|
94
|
+
else:
|
|
95
|
+
raise ValueError(
|
|
96
|
+
f"Unknown backend: {backend}. "
|
|
97
|
+
f"Supported backends: 'fastembed', 'litellm'"
|
|
98
|
+
)
|
|
@@ -13,6 +13,15 @@ from typing import List, Optional
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
14
14
|
|
|
15
15
|
|
|
16
|
+
@dataclass
|
|
17
|
+
class GPUDevice:
|
|
18
|
+
"""Individual GPU device info."""
|
|
19
|
+
device_id: int
|
|
20
|
+
name: str
|
|
21
|
+
is_discrete: bool # True for discrete GPU (NVIDIA, AMD), False for integrated (Intel UHD)
|
|
22
|
+
vendor: str # "nvidia", "amd", "intel", "unknown"
|
|
23
|
+
|
|
24
|
+
|
|
16
25
|
@dataclass
|
|
17
26
|
class GPUInfo:
|
|
18
27
|
"""GPU availability and configuration info."""
|
|
@@ -22,15 +31,117 @@ class GPUInfo:
|
|
|
22
31
|
gpu_count: int = 0
|
|
23
32
|
gpu_name: Optional[str] = None
|
|
24
33
|
onnx_providers: List[str] = None
|
|
34
|
+
devices: List[GPUDevice] = None # List of detected GPU devices
|
|
35
|
+
preferred_device_id: Optional[int] = None # Preferred GPU for embedding
|
|
25
36
|
|
|
26
37
|
def __post_init__(self):
|
|
27
38
|
if self.onnx_providers is None:
|
|
28
39
|
self.onnx_providers = ["CPUExecutionProvider"]
|
|
40
|
+
if self.devices is None:
|
|
41
|
+
self.devices = []
|
|
29
42
|
|
|
30
43
|
|
|
31
44
|
_gpu_info_cache: Optional[GPUInfo] = None
|
|
32
45
|
|
|
33
46
|
|
|
47
|
+
def _enumerate_gpus() -> List[GPUDevice]:
|
|
48
|
+
"""Enumerate available GPU devices using WMI on Windows.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
List of GPUDevice with device info, ordered by device_id.
|
|
52
|
+
"""
|
|
53
|
+
devices = []
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
import subprocess
|
|
57
|
+
import sys
|
|
58
|
+
|
|
59
|
+
if sys.platform == "win32":
|
|
60
|
+
# Use PowerShell to query GPU information via WMI
|
|
61
|
+
cmd = [
|
|
62
|
+
"powershell", "-NoProfile", "-Command",
|
|
63
|
+
"Get-WmiObject Win32_VideoController | Select-Object DeviceID, Name, AdapterCompatibility | ConvertTo-Json"
|
|
64
|
+
]
|
|
65
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
|
|
66
|
+
|
|
67
|
+
if result.returncode == 0 and result.stdout.strip():
|
|
68
|
+
import json
|
|
69
|
+
gpu_data = json.loads(result.stdout)
|
|
70
|
+
|
|
71
|
+
# Handle single GPU case (returns dict instead of list)
|
|
72
|
+
if isinstance(gpu_data, dict):
|
|
73
|
+
gpu_data = [gpu_data]
|
|
74
|
+
|
|
75
|
+
for idx, gpu in enumerate(gpu_data):
|
|
76
|
+
name = gpu.get("Name", "Unknown GPU")
|
|
77
|
+
compat = gpu.get("AdapterCompatibility", "").lower()
|
|
78
|
+
|
|
79
|
+
# Determine vendor
|
|
80
|
+
name_lower = name.lower()
|
|
81
|
+
if "nvidia" in name_lower or "nvidia" in compat:
|
|
82
|
+
vendor = "nvidia"
|
|
83
|
+
is_discrete = True
|
|
84
|
+
elif "amd" in name_lower or "radeon" in name_lower or "amd" in compat:
|
|
85
|
+
vendor = "amd"
|
|
86
|
+
is_discrete = True
|
|
87
|
+
elif "intel" in name_lower or "intel" in compat:
|
|
88
|
+
vendor = "intel"
|
|
89
|
+
# Intel UHD/Iris are integrated, Intel Arc is discrete
|
|
90
|
+
is_discrete = "arc" in name_lower
|
|
91
|
+
else:
|
|
92
|
+
vendor = "unknown"
|
|
93
|
+
is_discrete = False
|
|
94
|
+
|
|
95
|
+
devices.append(GPUDevice(
|
|
96
|
+
device_id=idx,
|
|
97
|
+
name=name,
|
|
98
|
+
is_discrete=is_discrete,
|
|
99
|
+
vendor=vendor
|
|
100
|
+
))
|
|
101
|
+
logger.debug(f"Detected GPU {idx}: {name} (vendor={vendor}, discrete={is_discrete})")
|
|
102
|
+
|
|
103
|
+
except Exception as e:
|
|
104
|
+
logger.debug(f"GPU enumeration failed: {e}")
|
|
105
|
+
|
|
106
|
+
return devices
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _get_preferred_device_id(devices: List[GPUDevice]) -> Optional[int]:
|
|
110
|
+
"""Determine the preferred GPU device_id for embedding.
|
|
111
|
+
|
|
112
|
+
Preference order:
|
|
113
|
+
1. NVIDIA discrete GPU (best DirectML/CUDA support)
|
|
114
|
+
2. AMD discrete GPU
|
|
115
|
+
3. Intel Arc (discrete)
|
|
116
|
+
4. Intel integrated (fallback)
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
device_id of preferred GPU, or None to use default.
|
|
120
|
+
"""
|
|
121
|
+
if not devices:
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
# Priority: NVIDIA > AMD > Intel Arc > Intel integrated
|
|
125
|
+
priority_order = [
|
|
126
|
+
("nvidia", True), # NVIDIA discrete
|
|
127
|
+
("amd", True), # AMD discrete
|
|
128
|
+
("intel", True), # Intel Arc (discrete)
|
|
129
|
+
("intel", False), # Intel integrated (fallback)
|
|
130
|
+
]
|
|
131
|
+
|
|
132
|
+
for target_vendor, target_discrete in priority_order:
|
|
133
|
+
for device in devices:
|
|
134
|
+
if device.vendor == target_vendor and device.is_discrete == target_discrete:
|
|
135
|
+
logger.info(f"Preferred GPU: {device.name} (device_id={device.device_id})")
|
|
136
|
+
return device.device_id
|
|
137
|
+
|
|
138
|
+
# If no match, use first device
|
|
139
|
+
if devices:
|
|
140
|
+
return devices[0].device_id
|
|
141
|
+
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
|
|
34
145
|
def detect_gpu(force_refresh: bool = False) -> GPUInfo:
|
|
35
146
|
"""Detect available GPU resources for embedding acceleration.
|
|
36
147
|
|
|
@@ -47,6 +158,18 @@ def detect_gpu(force_refresh: bool = False) -> GPUInfo:
|
|
|
47
158
|
|
|
48
159
|
info = GPUInfo()
|
|
49
160
|
|
|
161
|
+
# Enumerate GPU devices first
|
|
162
|
+
info.devices = _enumerate_gpus()
|
|
163
|
+
info.gpu_count = len(info.devices)
|
|
164
|
+
if info.devices:
|
|
165
|
+
# Set preferred device (discrete GPU preferred over integrated)
|
|
166
|
+
info.preferred_device_id = _get_preferred_device_id(info.devices)
|
|
167
|
+
# Set gpu_name to preferred device name
|
|
168
|
+
for dev in info.devices:
|
|
169
|
+
if dev.device_id == info.preferred_device_id:
|
|
170
|
+
info.gpu_name = dev.name
|
|
171
|
+
break
|
|
172
|
+
|
|
50
173
|
# Check PyTorch CUDA availability (most reliable detection)
|
|
51
174
|
try:
|
|
52
175
|
import torch
|
|
@@ -143,21 +266,48 @@ def detect_gpu(force_refresh: bool = False) -> GPUInfo:
|
|
|
143
266
|
return info
|
|
144
267
|
|
|
145
268
|
|
|
146
|
-
def get_optimal_providers(use_gpu: bool = True) ->
|
|
269
|
+
def get_optimal_providers(use_gpu: bool = True, with_device_options: bool = False) -> list:
|
|
147
270
|
"""Get optimal ONNX execution providers based on availability.
|
|
148
271
|
|
|
149
272
|
Args:
|
|
150
273
|
use_gpu: If True, include GPU providers when available.
|
|
151
274
|
If False, force CPU-only execution.
|
|
275
|
+
with_device_options: If True, return providers as tuples with device_id options
|
|
276
|
+
for proper GPU device selection (required for DirectML).
|
|
152
277
|
|
|
153
278
|
Returns:
|
|
154
|
-
List of provider names in priority order.
|
|
279
|
+
List of provider names or tuples (provider_name, options_dict) in priority order.
|
|
155
280
|
"""
|
|
156
281
|
if not use_gpu:
|
|
157
282
|
return ["CPUExecutionProvider"]
|
|
158
283
|
|
|
159
284
|
gpu_info = detect_gpu()
|
|
160
|
-
|
|
285
|
+
|
|
286
|
+
if not with_device_options:
|
|
287
|
+
return gpu_info.onnx_providers
|
|
288
|
+
|
|
289
|
+
# Build providers with device_id options for GPU providers
|
|
290
|
+
device_id = get_selected_device_id()
|
|
291
|
+
providers = []
|
|
292
|
+
|
|
293
|
+
for provider in gpu_info.onnx_providers:
|
|
294
|
+
if provider == "DmlExecutionProvider" and device_id is not None:
|
|
295
|
+
# DirectML requires device_id in provider_options tuple
|
|
296
|
+
providers.append(("DmlExecutionProvider", {"device_id": device_id}))
|
|
297
|
+
logger.debug(f"DmlExecutionProvider configured with device_id={device_id}")
|
|
298
|
+
elif provider == "CUDAExecutionProvider" and device_id is not None:
|
|
299
|
+
# CUDA also supports device_id in provider_options
|
|
300
|
+
providers.append(("CUDAExecutionProvider", {"device_id": device_id}))
|
|
301
|
+
logger.debug(f"CUDAExecutionProvider configured with device_id={device_id}")
|
|
302
|
+
elif provider == "ROCMExecutionProvider" and device_id is not None:
|
|
303
|
+
# ROCm supports device_id
|
|
304
|
+
providers.append(("ROCMExecutionProvider", {"device_id": device_id}))
|
|
305
|
+
logger.debug(f"ROCMExecutionProvider configured with device_id={device_id}")
|
|
306
|
+
else:
|
|
307
|
+
# CPU and other providers don't need device_id
|
|
308
|
+
providers.append(provider)
|
|
309
|
+
|
|
310
|
+
return providers
|
|
161
311
|
|
|
162
312
|
|
|
163
313
|
def is_gpu_available() -> bool:
|
|
@@ -190,3 +340,75 @@ def clear_gpu_cache() -> None:
|
|
|
190
340
|
"""Clear cached GPU detection info."""
|
|
191
341
|
global _gpu_info_cache
|
|
192
342
|
_gpu_info_cache = None
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
# User-selected device ID (overrides auto-detection)
|
|
346
|
+
_selected_device_id: Optional[int] = None
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def get_gpu_devices() -> List[dict]:
|
|
350
|
+
"""Get list of available GPU devices for frontend selection.
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
List of dicts with device info for each GPU.
|
|
354
|
+
"""
|
|
355
|
+
info = detect_gpu()
|
|
356
|
+
devices = []
|
|
357
|
+
|
|
358
|
+
for dev in info.devices:
|
|
359
|
+
devices.append({
|
|
360
|
+
"device_id": dev.device_id,
|
|
361
|
+
"name": dev.name,
|
|
362
|
+
"vendor": dev.vendor,
|
|
363
|
+
"is_discrete": dev.is_discrete,
|
|
364
|
+
"is_preferred": dev.device_id == info.preferred_device_id,
|
|
365
|
+
"is_selected": dev.device_id == get_selected_device_id(),
|
|
366
|
+
})
|
|
367
|
+
|
|
368
|
+
return devices
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def get_selected_device_id() -> Optional[int]:
|
|
372
|
+
"""Get the user-selected GPU device_id.
|
|
373
|
+
|
|
374
|
+
Returns:
|
|
375
|
+
User-selected device_id, or auto-detected preferred device_id if not set.
|
|
376
|
+
"""
|
|
377
|
+
global _selected_device_id
|
|
378
|
+
|
|
379
|
+
if _selected_device_id is not None:
|
|
380
|
+
return _selected_device_id
|
|
381
|
+
|
|
382
|
+
# Fall back to auto-detected preferred device
|
|
383
|
+
info = detect_gpu()
|
|
384
|
+
return info.preferred_device_id
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def set_selected_device_id(device_id: Optional[int]) -> bool:
|
|
388
|
+
"""Set the GPU device_id to use for embeddings.
|
|
389
|
+
|
|
390
|
+
Args:
|
|
391
|
+
device_id: GPU device_id to use, or None to use auto-detection.
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
True if device_id is valid, False otherwise.
|
|
395
|
+
"""
|
|
396
|
+
global _selected_device_id
|
|
397
|
+
|
|
398
|
+
if device_id is None:
|
|
399
|
+
_selected_device_id = None
|
|
400
|
+
logger.info("GPU selection reset to auto-detection")
|
|
401
|
+
return True
|
|
402
|
+
|
|
403
|
+
# Validate device_id exists
|
|
404
|
+
info = detect_gpu()
|
|
405
|
+
valid_ids = [dev.device_id for dev in info.devices]
|
|
406
|
+
|
|
407
|
+
if device_id in valid_ids:
|
|
408
|
+
_selected_device_id = device_id
|
|
409
|
+
device_name = next((dev.name for dev in info.devices if dev.device_id == device_id), "Unknown")
|
|
410
|
+
logger.info(f"GPU selection set to device {device_id}: {device_name}")
|
|
411
|
+
return True
|
|
412
|
+
else:
|
|
413
|
+
logger.warning(f"Invalid device_id {device_id}. Valid IDs: {valid_ids}")
|
|
414
|
+
return False
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""LiteLLM embedder wrapper for CodexLens.
|
|
2
|
+
|
|
3
|
+
Provides integration with ccw-litellm's LiteLLMEmbedder for embedding generation.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from typing import Iterable
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
from .base import BaseEmbedder
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class LiteLLMEmbedderWrapper(BaseEmbedder):
|
|
16
|
+
"""Wrapper for ccw-litellm LiteLLMEmbedder.
|
|
17
|
+
|
|
18
|
+
This wrapper adapts the ccw-litellm LiteLLMEmbedder to the CodexLens
|
|
19
|
+
BaseEmbedder interface, enabling seamless integration with CodexLens
|
|
20
|
+
semantic search functionality.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
model: Model identifier for LiteLLM (default: "default")
|
|
24
|
+
**kwargs: Additional arguments passed to LiteLLMEmbedder
|
|
25
|
+
|
|
26
|
+
Raises:
|
|
27
|
+
ImportError: If ccw-litellm package is not installed
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, model: str = "default", **kwargs) -> None:
|
|
31
|
+
"""Initialize LiteLLM embedder wrapper.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
model: Model identifier for LiteLLM (default: "default")
|
|
35
|
+
**kwargs: Additional arguments passed to LiteLLMEmbedder
|
|
36
|
+
|
|
37
|
+
Raises:
|
|
38
|
+
ImportError: If ccw-litellm package is not installed
|
|
39
|
+
"""
|
|
40
|
+
try:
|
|
41
|
+
from ccw_litellm import LiteLLMEmbedder
|
|
42
|
+
self._embedder = LiteLLMEmbedder(model=model, **kwargs)
|
|
43
|
+
except ImportError as e:
|
|
44
|
+
raise ImportError(
|
|
45
|
+
"ccw-litellm not installed. Install with: pip install ccw-litellm"
|
|
46
|
+
) from e
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def embedding_dim(self) -> int:
|
|
50
|
+
"""Return embedding dimensions from LiteLLMEmbedder.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
int: Dimension of the embedding vectors.
|
|
54
|
+
"""
|
|
55
|
+
return self._embedder.dimensions
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def model_name(self) -> str:
|
|
59
|
+
"""Return model name from LiteLLMEmbedder.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
str: Name or identifier of the underlying model.
|
|
63
|
+
"""
|
|
64
|
+
return self._embedder.model_name
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def max_tokens(self) -> int:
|
|
68
|
+
"""Return maximum token limit for the embedding model.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
int: Maximum number of tokens that can be embedded at once.
|
|
72
|
+
Inferred from model config or model name patterns.
|
|
73
|
+
"""
|
|
74
|
+
# Try to get from LiteLLM config first
|
|
75
|
+
if hasattr(self._embedder, 'max_input_tokens') and self._embedder.max_input_tokens:
|
|
76
|
+
return self._embedder.max_input_tokens
|
|
77
|
+
|
|
78
|
+
# Infer from model name
|
|
79
|
+
model_name_lower = self.model_name.lower()
|
|
80
|
+
|
|
81
|
+
# Large models (8B or "large" in name)
|
|
82
|
+
if '8b' in model_name_lower or 'large' in model_name_lower:
|
|
83
|
+
return 32768
|
|
84
|
+
|
|
85
|
+
# OpenAI text-embedding-3-* models
|
|
86
|
+
if 'text-embedding-3' in model_name_lower:
|
|
87
|
+
return 8191
|
|
88
|
+
|
|
89
|
+
# Default fallback
|
|
90
|
+
return 8192
|
|
91
|
+
|
|
92
|
+
def _sanitize_text(self, text: str) -> str:
|
|
93
|
+
"""Sanitize text to work around ModelScope API routing bug.
|
|
94
|
+
|
|
95
|
+
ModelScope incorrectly routes text starting with lowercase 'import'
|
|
96
|
+
to an Ollama endpoint, causing failures. This adds a leading space
|
|
97
|
+
to work around the issue without affecting embedding quality.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
text: Text to sanitize.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Sanitized text safe for embedding API.
|
|
104
|
+
"""
|
|
105
|
+
if text.startswith('import'):
|
|
106
|
+
return ' ' + text
|
|
107
|
+
return text
|
|
108
|
+
|
|
109
|
+
def embed_to_numpy(self, texts: str | Iterable[str], **kwargs) -> np.ndarray:
|
|
110
|
+
"""Embed texts to numpy array using LiteLLMEmbedder.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
texts: Single text or iterable of texts to embed.
|
|
114
|
+
**kwargs: Additional arguments (ignored for LiteLLM backend).
|
|
115
|
+
Accepts batch_size for API compatibility with fastembed.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
numpy.ndarray: Array of shape (n_texts, embedding_dim) containing embeddings.
|
|
119
|
+
"""
|
|
120
|
+
if isinstance(texts, str):
|
|
121
|
+
texts = [texts]
|
|
122
|
+
else:
|
|
123
|
+
texts = list(texts)
|
|
124
|
+
|
|
125
|
+
# Sanitize texts to avoid ModelScope routing bug
|
|
126
|
+
texts = [self._sanitize_text(t) for t in texts]
|
|
127
|
+
|
|
128
|
+
# LiteLLM handles batching internally, ignore batch_size parameter
|
|
129
|
+
return self._embedder.embed(texts)
|
|
130
|
+
|
|
131
|
+
def embed_single(self, text: str) -> list[float]:
|
|
132
|
+
"""Generate embedding for a single text.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
text: Text to embed.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
list[float]: Embedding vector as a list of floats.
|
|
139
|
+
"""
|
|
140
|
+
# Sanitize text before embedding
|
|
141
|
+
sanitized = self._sanitize_text(text)
|
|
142
|
+
embedding = self._embedder.embed([sanitized])
|
|
143
|
+
return embedding[0].tolist()
|
|
144
|
+
|