claude-code-workflow 6.2.7 → 6.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (208) hide show
  1. package/.claude/CLAUDE.md +16 -1
  2. package/.claude/workflows/cli-templates/protocols/analysis-protocol.md +11 -4
  3. package/.claude/workflows/cli-templates/protocols/write-protocol.md +10 -75
  4. package/.claude/workflows/cli-tools-usage.md +14 -24
  5. package/.codex/AGENTS.md +51 -1
  6. package/.codex/prompts/compact.md +378 -0
  7. package/.gemini/GEMINI.md +57 -20
  8. package/ccw/dist/cli.d.ts.map +1 -1
  9. package/ccw/dist/cli.js +21 -8
  10. package/ccw/dist/cli.js.map +1 -1
  11. package/ccw/dist/commands/cli.d.ts +2 -0
  12. package/ccw/dist/commands/cli.d.ts.map +1 -1
  13. package/ccw/dist/commands/cli.js +129 -8
  14. package/ccw/dist/commands/cli.js.map +1 -1
  15. package/ccw/dist/commands/hook.d.ts.map +1 -1
  16. package/ccw/dist/commands/hook.js +3 -2
  17. package/ccw/dist/commands/hook.js.map +1 -1
  18. package/ccw/dist/config/litellm-api-config-manager.d.ts +180 -0
  19. package/ccw/dist/config/litellm-api-config-manager.d.ts.map +1 -0
  20. package/ccw/dist/config/litellm-api-config-manager.js +770 -0
  21. package/ccw/dist/config/litellm-api-config-manager.js.map +1 -0
  22. package/ccw/dist/config/provider-models.d.ts +73 -0
  23. package/ccw/dist/config/provider-models.d.ts.map +1 -0
  24. package/ccw/dist/config/provider-models.js +172 -0
  25. package/ccw/dist/config/provider-models.js.map +1 -0
  26. package/ccw/dist/core/cache-manager.d.ts.map +1 -1
  27. package/ccw/dist/core/cache-manager.js +3 -5
  28. package/ccw/dist/core/cache-manager.js.map +1 -1
  29. package/ccw/dist/core/dashboard-generator.d.ts.map +1 -1
  30. package/ccw/dist/core/dashboard-generator.js +3 -1
  31. package/ccw/dist/core/dashboard-generator.js.map +1 -1
  32. package/ccw/dist/core/routes/cli-routes.d.ts.map +1 -1
  33. package/ccw/dist/core/routes/cli-routes.js +169 -0
  34. package/ccw/dist/core/routes/cli-routes.js.map +1 -1
  35. package/ccw/dist/core/routes/codexlens-routes.d.ts.map +1 -1
  36. package/ccw/dist/core/routes/codexlens-routes.js +234 -18
  37. package/ccw/dist/core/routes/codexlens-routes.js.map +1 -1
  38. package/ccw/dist/core/routes/hooks-routes.d.ts.map +1 -1
  39. package/ccw/dist/core/routes/hooks-routes.js +30 -32
  40. package/ccw/dist/core/routes/hooks-routes.js.map +1 -1
  41. package/ccw/dist/core/routes/litellm-api-routes.d.ts +21 -0
  42. package/ccw/dist/core/routes/litellm-api-routes.d.ts.map +1 -0
  43. package/ccw/dist/core/routes/litellm-api-routes.js +780 -0
  44. package/ccw/dist/core/routes/litellm-api-routes.js.map +1 -0
  45. package/ccw/dist/core/routes/litellm-routes.d.ts +20 -0
  46. package/ccw/dist/core/routes/litellm-routes.d.ts.map +1 -0
  47. package/ccw/dist/core/routes/litellm-routes.js +85 -0
  48. package/ccw/dist/core/routes/litellm-routes.js.map +1 -0
  49. package/ccw/dist/core/routes/mcp-routes.js +2 -2
  50. package/ccw/dist/core/routes/mcp-routes.js.map +1 -1
  51. package/ccw/dist/core/routes/status-routes.d.ts.map +1 -1
  52. package/ccw/dist/core/routes/status-routes.js +39 -0
  53. package/ccw/dist/core/routes/status-routes.js.map +1 -1
  54. package/ccw/dist/core/routes/system-routes.js +1 -1
  55. package/ccw/dist/core/routes/system-routes.js.map +1 -1
  56. package/ccw/dist/core/server.d.ts.map +1 -1
  57. package/ccw/dist/core/server.js +15 -1
  58. package/ccw/dist/core/server.js.map +1 -1
  59. package/ccw/dist/mcp-server/index.js +1 -1
  60. package/ccw/dist/mcp-server/index.js.map +1 -1
  61. package/ccw/dist/tools/claude-cli-tools.d.ts +82 -0
  62. package/ccw/dist/tools/claude-cli-tools.d.ts.map +1 -0
  63. package/ccw/dist/tools/claude-cli-tools.js +216 -0
  64. package/ccw/dist/tools/claude-cli-tools.js.map +1 -0
  65. package/ccw/dist/tools/cli-executor.d.ts.map +1 -1
  66. package/ccw/dist/tools/cli-executor.js +76 -14
  67. package/ccw/dist/tools/cli-executor.js.map +1 -1
  68. package/ccw/dist/tools/codex-lens.d.ts +9 -2
  69. package/ccw/dist/tools/codex-lens.d.ts.map +1 -1
  70. package/ccw/dist/tools/codex-lens.js +114 -9
  71. package/ccw/dist/tools/codex-lens.js.map +1 -1
  72. package/ccw/dist/tools/context-cache-store.d.ts +136 -0
  73. package/ccw/dist/tools/context-cache-store.d.ts.map +1 -0
  74. package/ccw/dist/tools/context-cache-store.js +256 -0
  75. package/ccw/dist/tools/context-cache-store.js.map +1 -0
  76. package/ccw/dist/tools/context-cache.d.ts +56 -0
  77. package/ccw/dist/tools/context-cache.d.ts.map +1 -0
  78. package/ccw/dist/tools/context-cache.js +294 -0
  79. package/ccw/dist/tools/context-cache.js.map +1 -0
  80. package/ccw/dist/tools/core-memory.d.ts.map +1 -1
  81. package/ccw/dist/tools/core-memory.js +33 -19
  82. package/ccw/dist/tools/core-memory.js.map +1 -1
  83. package/ccw/dist/tools/index.d.ts.map +1 -1
  84. package/ccw/dist/tools/index.js +2 -0
  85. package/ccw/dist/tools/index.js.map +1 -1
  86. package/ccw/dist/tools/litellm-client.d.ts +85 -0
  87. package/ccw/dist/tools/litellm-client.d.ts.map +1 -0
  88. package/ccw/dist/tools/litellm-client.js +188 -0
  89. package/ccw/dist/tools/litellm-client.js.map +1 -0
  90. package/ccw/dist/tools/litellm-executor.d.ts +34 -0
  91. package/ccw/dist/tools/litellm-executor.d.ts.map +1 -0
  92. package/ccw/dist/tools/litellm-executor.js +192 -0
  93. package/ccw/dist/tools/litellm-executor.js.map +1 -0
  94. package/ccw/dist/tools/pattern-parser.d.ts +55 -0
  95. package/ccw/dist/tools/pattern-parser.d.ts.map +1 -0
  96. package/ccw/dist/tools/pattern-parser.js +237 -0
  97. package/ccw/dist/tools/pattern-parser.js.map +1 -0
  98. package/ccw/dist/tools/smart-search.d.ts +1 -0
  99. package/ccw/dist/tools/smart-search.d.ts.map +1 -1
  100. package/ccw/dist/tools/smart-search.js +117 -41
  101. package/ccw/dist/tools/smart-search.js.map +1 -1
  102. package/ccw/dist/types/litellm-api-config.d.ts +294 -0
  103. package/ccw/dist/types/litellm-api-config.d.ts.map +1 -0
  104. package/ccw/dist/types/litellm-api-config.js +8 -0
  105. package/ccw/dist/types/litellm-api-config.js.map +1 -0
  106. package/ccw/src/cli.ts +258 -244
  107. package/ccw/src/commands/cli.ts +153 -9
  108. package/ccw/src/commands/hook.ts +3 -2
  109. package/ccw/src/config/.litellm-api-config-manager.ts.2025-12-23T11-57-43-727Z.bak +441 -0
  110. package/ccw/src/config/litellm-api-config-manager.ts +1012 -0
  111. package/ccw/src/config/provider-models.ts +222 -0
  112. package/ccw/src/core/cache-manager.ts +292 -294
  113. package/ccw/src/core/dashboard-generator.ts +3 -1
  114. package/ccw/src/core/routes/cli-routes.ts +192 -0
  115. package/ccw/src/core/routes/codexlens-routes.ts +241 -19
  116. package/ccw/src/core/routes/hooks-routes.ts +399 -405
  117. package/ccw/src/core/routes/litellm-api-routes.ts +930 -0
  118. package/ccw/src/core/routes/litellm-routes.ts +107 -0
  119. package/ccw/src/core/routes/mcp-routes.ts +1271 -1271
  120. package/ccw/src/core/routes/status-routes.ts +51 -0
  121. package/ccw/src/core/routes/system-routes.ts +1 -1
  122. package/ccw/src/core/server.ts +15 -1
  123. package/ccw/src/mcp-server/index.ts +1 -1
  124. package/ccw/src/templates/dashboard-css/12-cli-legacy.css +44 -0
  125. package/ccw/src/templates/dashboard-css/31-api-settings.css +2265 -0
  126. package/ccw/src/templates/dashboard-js/components/cli-history.js +15 -8
  127. package/ccw/src/templates/dashboard-js/components/cli-status.js +323 -9
  128. package/ccw/src/templates/dashboard-js/components/navigation.js +329 -313
  129. package/ccw/src/templates/dashboard-js/i18n.js +583 -1
  130. package/ccw/src/templates/dashboard-js/views/api-settings.js +3362 -0
  131. package/ccw/src/templates/dashboard-js/views/cli-manager.js +199 -24
  132. package/ccw/src/templates/dashboard-js/views/codexlens-manager.js +1265 -27
  133. package/ccw/src/templates/dashboard.html +840 -831
  134. package/ccw/src/tools/claude-cli-tools.ts +300 -0
  135. package/ccw/src/tools/cli-executor.ts +83 -14
  136. package/ccw/src/tools/codex-lens.ts +146 -9
  137. package/ccw/src/tools/context-cache-store.ts +368 -0
  138. package/ccw/src/tools/context-cache.ts +393 -0
  139. package/ccw/src/tools/core-memory.ts +33 -19
  140. package/ccw/src/tools/index.ts +2 -0
  141. package/ccw/src/tools/litellm-client.ts +246 -0
  142. package/ccw/src/tools/litellm-executor.ts +241 -0
  143. package/ccw/src/tools/pattern-parser.ts +329 -0
  144. package/ccw/src/tools/smart-search.ts +142 -41
  145. package/ccw/src/types/litellm-api-config.ts +402 -0
  146. package/ccw-litellm/README.md +180 -0
  147. package/ccw-litellm/pyproject.toml +35 -0
  148. package/ccw-litellm/src/ccw_litellm/__init__.py +47 -0
  149. package/ccw-litellm/src/ccw_litellm/__pycache__/__init__.cpython-313.pyc +0 -0
  150. package/ccw-litellm/src/ccw_litellm/__pycache__/cli.cpython-313.pyc +0 -0
  151. package/ccw-litellm/src/ccw_litellm/cli.py +108 -0
  152. package/ccw-litellm/src/ccw_litellm/clients/__init__.py +12 -0
  153. package/ccw-litellm/src/ccw_litellm/clients/__pycache__/__init__.cpython-313.pyc +0 -0
  154. package/ccw-litellm/src/ccw_litellm/clients/__pycache__/litellm_embedder.cpython-313.pyc +0 -0
  155. package/ccw-litellm/src/ccw_litellm/clients/__pycache__/litellm_llm.cpython-313.pyc +0 -0
  156. package/ccw-litellm/src/ccw_litellm/clients/litellm_embedder.py +251 -0
  157. package/ccw-litellm/src/ccw_litellm/clients/litellm_llm.py +165 -0
  158. package/ccw-litellm/src/ccw_litellm/config/__init__.py +22 -0
  159. package/ccw-litellm/src/ccw_litellm/config/__pycache__/__init__.cpython-313.pyc +0 -0
  160. package/ccw-litellm/src/ccw_litellm/config/__pycache__/loader.cpython-313.pyc +0 -0
  161. package/ccw-litellm/src/ccw_litellm/config/__pycache__/models.cpython-313.pyc +0 -0
  162. package/ccw-litellm/src/ccw_litellm/config/loader.py +316 -0
  163. package/ccw-litellm/src/ccw_litellm/config/models.py +130 -0
  164. package/ccw-litellm/src/ccw_litellm/interfaces/__init__.py +14 -0
  165. package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/__init__.cpython-313.pyc +0 -0
  166. package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/embedder.cpython-313.pyc +0 -0
  167. package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/llm.cpython-313.pyc +0 -0
  168. package/ccw-litellm/src/ccw_litellm/interfaces/embedder.py +52 -0
  169. package/ccw-litellm/src/ccw_litellm/interfaces/llm.py +45 -0
  170. package/codex-lens/src/codexlens/__pycache__/config.cpython-313.pyc +0 -0
  171. package/codex-lens/src/codexlens/cli/__pycache__/commands.cpython-313.pyc +0 -0
  172. package/codex-lens/src/codexlens/cli/__pycache__/embedding_manager.cpython-313.pyc +0 -0
  173. package/codex-lens/src/codexlens/cli/__pycache__/model_manager.cpython-313.pyc +0 -0
  174. package/codex-lens/src/codexlens/cli/__pycache__/output.cpython-313.pyc +0 -0
  175. package/codex-lens/src/codexlens/cli/commands.py +378 -23
  176. package/codex-lens/src/codexlens/cli/embedding_manager.py +660 -56
  177. package/codex-lens/src/codexlens/cli/model_manager.py +31 -18
  178. package/codex-lens/src/codexlens/cli/output.py +12 -1
  179. package/codex-lens/src/codexlens/config.py +93 -0
  180. package/codex-lens/src/codexlens/search/__pycache__/chain_search.cpython-313.pyc +0 -0
  181. package/codex-lens/src/codexlens/search/__pycache__/hybrid_search.cpython-313.pyc +0 -0
  182. package/codex-lens/src/codexlens/search/__pycache__/ranking.cpython-313.pyc +0 -0
  183. package/codex-lens/src/codexlens/search/chain_search.py +6 -2
  184. package/codex-lens/src/codexlens/search/hybrid_search.py +44 -21
  185. package/codex-lens/src/codexlens/search/ranking.py +1 -1
  186. package/codex-lens/src/codexlens/semantic/__init__.py +42 -0
  187. package/codex-lens/src/codexlens/semantic/__pycache__/__init__.cpython-313.pyc +0 -0
  188. package/codex-lens/src/codexlens/semantic/__pycache__/base.cpython-313.pyc +0 -0
  189. package/codex-lens/src/codexlens/semantic/__pycache__/chunker.cpython-313.pyc +0 -0
  190. package/codex-lens/src/codexlens/semantic/__pycache__/embedder.cpython-313.pyc +0 -0
  191. package/codex-lens/src/codexlens/semantic/__pycache__/factory.cpython-313.pyc +0 -0
  192. package/codex-lens/src/codexlens/semantic/__pycache__/gpu_support.cpython-313.pyc +0 -0
  193. package/codex-lens/src/codexlens/semantic/__pycache__/litellm_embedder.cpython-313.pyc +0 -0
  194. package/codex-lens/src/codexlens/semantic/__pycache__/vector_store.cpython-313.pyc +0 -0
  195. package/codex-lens/src/codexlens/semantic/base.py +61 -0
  196. package/codex-lens/src/codexlens/semantic/chunker.py +43 -20
  197. package/codex-lens/src/codexlens/semantic/embedder.py +60 -13
  198. package/codex-lens/src/codexlens/semantic/factory.py +98 -0
  199. package/codex-lens/src/codexlens/semantic/gpu_support.py +225 -3
  200. package/codex-lens/src/codexlens/semantic/litellm_embedder.py +144 -0
  201. package/codex-lens/src/codexlens/semantic/rotational_embedder.py +434 -0
  202. package/codex-lens/src/codexlens/semantic/vector_store.py +33 -8
  203. package/codex-lens/src/codexlens/storage/__pycache__/path_mapper.cpython-313.pyc +0 -0
  204. package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_004_dual_fts.cpython-313.pyc +0 -0
  205. package/codex-lens/src/codexlens/storage/path_mapper.py +27 -1
  206. package/package.json +15 -5
  207. package/.codex/prompts.zip +0 -0
  208. package/ccw/package.json +0 -65
@@ -14,7 +14,8 @@ from typing import Dict, Iterable, List, Optional
14
14
  import numpy as np
15
15
 
16
16
  from . import SEMANTIC_AVAILABLE
17
- from .gpu_support import get_optimal_providers, is_gpu_available, get_gpu_summary
17
+ from .base import BaseEmbedder
18
+ from .gpu_support import get_optimal_providers, is_gpu_available, get_gpu_summary, get_selected_device_id
18
19
 
19
20
  logger = logging.getLogger(__name__)
20
21
 
@@ -84,7 +85,7 @@ def clear_embedder_cache() -> None:
84
85
  gc.collect()
85
86
 
86
87
 
87
- class Embedder:
88
+ class Embedder(BaseEmbedder):
88
89
  """Generate embeddings for code chunks using fastembed (ONNX-based).
89
90
 
90
91
  Supported Model Profiles:
@@ -138,25 +139,58 @@ class Embedder:
138
139
 
139
140
  # Resolve model name from profile or use explicit name
140
141
  if model_name:
141
- self.model_name = model_name
142
+ self._model_name = model_name
142
143
  elif profile and profile in self.MODELS:
143
- self.model_name = self.MODELS[profile]
144
+ self._model_name = self.MODELS[profile]
144
145
  else:
145
- self.model_name = self.DEFAULT_MODEL
146
+ self._model_name = self.DEFAULT_MODEL
146
147
 
147
- # Configure ONNX execution providers
148
+ # Configure ONNX execution providers with device_id options for GPU selection
149
+ # Using with_device_options=True ensures DirectML/CUDA device_id is passed correctly
148
150
  if providers is not None:
149
151
  self._providers = providers
150
152
  else:
151
- self._providers = get_optimal_providers(use_gpu=use_gpu)
153
+ self._providers = get_optimal_providers(use_gpu=use_gpu, with_device_options=True)
152
154
 
153
155
  self._use_gpu = use_gpu
154
156
  self._model = None
155
157
 
158
+ @property
159
+ def model_name(self) -> str:
160
+ """Get model name."""
161
+ return self._model_name
162
+
156
163
  @property
157
164
  def embedding_dim(self) -> int:
158
165
  """Get embedding dimension for current model."""
159
- return self.MODEL_DIMS.get(self.model_name, 768) # Default to 768 if unknown
166
+ return self.MODEL_DIMS.get(self._model_name, 768) # Default to 768 if unknown
167
+
168
+ @property
169
+ def max_tokens(self) -> int:
170
+ """Get maximum token limit for current model.
171
+
172
+ Returns:
173
+ int: Maximum number of tokens based on model profile.
174
+ - fast: 512 (lightweight, optimized for speed)
175
+ - code: 8192 (code-optimized, larger context)
176
+ - multilingual: 512 (standard multilingual model)
177
+ - balanced: 512 (general purpose)
178
+ """
179
+ # Determine profile from model name
180
+ profile = None
181
+ for prof, model in self.MODELS.items():
182
+ if model == self._model_name:
183
+ profile = prof
184
+ break
185
+
186
+ # Return token limit based on profile
187
+ if profile == "code":
188
+ return 8192
189
+ elif profile in ("fast", "multilingual", "balanced"):
190
+ return 512
191
+ else:
192
+ # Default for unknown models
193
+ return 512
160
194
 
161
195
  @property
162
196
  def providers(self) -> List[str]:
@@ -168,7 +202,12 @@ class Embedder:
168
202
  """Check if GPU acceleration is enabled for this embedder."""
169
203
  gpu_providers = {"CUDAExecutionProvider", "TensorrtExecutionProvider",
170
204
  "DmlExecutionProvider", "ROCMExecutionProvider", "CoreMLExecutionProvider"}
171
- return any(p in gpu_providers for p in self._providers)
205
+ # Handle both string providers and tuple providers (name, options)
206
+ for p in self._providers:
207
+ provider_name = p[0] if isinstance(p, tuple) else p
208
+ if provider_name in gpu_providers:
209
+ return True
210
+ return False
172
211
 
173
212
  def _load_model(self) -> None:
174
213
  """Lazy load the embedding model with configured providers."""
@@ -177,7 +216,9 @@ class Embedder:
177
216
 
178
217
  from fastembed import TextEmbedding
179
218
 
180
- # fastembed supports 'providers' parameter for ONNX execution providers
219
+ # providers already include device_id options via get_optimal_providers(with_device_options=True)
220
+ # DO NOT pass device_ids separately - fastembed ignores it when providers is specified
221
+ # See: fastembed/text/onnx_embedding.py - device_ids is only used with cuda=True
181
222
  try:
182
223
  self._model = TextEmbedding(
183
224
  model_name=self.model_name,
@@ -215,7 +256,7 @@ class Embedder:
215
256
  embeddings = list(self._model.embed(texts))
216
257
  return [emb.tolist() for emb in embeddings]
217
258
 
218
- def embed_to_numpy(self, texts: str | Iterable[str]) -> np.ndarray:
259
+ def embed_to_numpy(self, texts: str | Iterable[str], batch_size: Optional[int] = None) -> np.ndarray:
219
260
  """Generate embeddings for one or more texts (returns numpy arrays).
220
261
 
221
262
  This method is more memory-efficient than embed() as it avoids converting
@@ -224,6 +265,8 @@ class Embedder:
224
265
 
225
266
  Args:
226
267
  texts: Single text or iterable of texts to embed.
268
+ batch_size: Optional batch size for fastembed processing.
269
+ Larger values improve GPU utilization but use more memory.
227
270
 
228
271
  Returns:
229
272
  numpy.ndarray of shape (n_texts, embedding_dim) containing embeddings.
@@ -235,8 +278,12 @@ class Embedder:
235
278
  else:
236
279
  texts = list(texts)
237
280
 
238
- # Return embeddings as numpy array directly (no .tolist() conversion)
239
- embeddings = list(self._model.embed(texts))
281
+ # Pass batch_size to fastembed for optimal GPU utilization
282
+ # Default batch_size in fastembed is 256, but larger values can improve throughput
283
+ if batch_size is not None:
284
+ embeddings = list(self._model.embed(texts, batch_size=batch_size))
285
+ else:
286
+ embeddings = list(self._model.embed(texts))
240
287
  return np.array(embeddings)
241
288
 
242
289
  def embed_single(self, text: str) -> List[float]:
@@ -0,0 +1,98 @@
1
+ """Factory for creating embedders.
2
+
3
+ Provides a unified interface for instantiating different embedder backends.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import Any, Dict, List, Optional
9
+
10
+ from .base import BaseEmbedder
11
+
12
+
13
+ def get_embedder(
14
+ backend: str = "fastembed",
15
+ profile: str = "code",
16
+ model: str = "default",
17
+ use_gpu: bool = True,
18
+ endpoints: Optional[List[Dict[str, Any]]] = None,
19
+ strategy: str = "latency_aware",
20
+ cooldown: float = 60.0,
21
+ **kwargs: Any,
22
+ ) -> BaseEmbedder:
23
+ """Factory function to create embedder based on backend.
24
+
25
+ Args:
26
+ backend: Embedder backend to use. Options:
27
+ - "fastembed": Use fastembed (ONNX-based) embedder (default)
28
+ - "litellm": Use ccw-litellm embedder
29
+ profile: Model profile for fastembed backend ("fast", "code", "multilingual", "balanced")
30
+ Used only when backend="fastembed". Default: "code"
31
+ model: Model identifier for litellm backend.
32
+ Used only when backend="litellm". Default: "default"
33
+ use_gpu: Whether to use GPU acceleration when available (default: True).
34
+ Used only when backend="fastembed".
35
+ endpoints: Optional list of endpoint configurations for multi-endpoint load balancing.
36
+ Each endpoint is a dict with keys: model, api_key, api_base, weight.
37
+ Used only when backend="litellm" and multiple endpoints provided.
38
+ strategy: Selection strategy for multi-endpoint mode:
39
+ "round_robin", "latency_aware", "weighted_random".
40
+ Default: "latency_aware"
41
+ cooldown: Default cooldown seconds for rate-limited endpoints (default: 60.0)
42
+ **kwargs: Additional backend-specific arguments
43
+
44
+ Returns:
45
+ BaseEmbedder: Configured embedder instance
46
+
47
+ Raises:
48
+ ValueError: If backend is not recognized
49
+ ImportError: If required backend dependencies are not installed
50
+
51
+ Examples:
52
+ Create fastembed embedder with code profile:
53
+ >>> embedder = get_embedder(backend="fastembed", profile="code")
54
+
55
+ Create fastembed embedder with fast profile and CPU only:
56
+ >>> embedder = get_embedder(backend="fastembed", profile="fast", use_gpu=False)
57
+
58
+ Create litellm embedder:
59
+ >>> embedder = get_embedder(backend="litellm", model="text-embedding-3-small")
60
+
61
+ Create rotational embedder with multiple endpoints:
62
+ >>> endpoints = [
63
+ ... {"model": "openai/text-embedding-3-small", "api_key": "sk-..."},
64
+ ... {"model": "azure/my-embedding", "api_base": "https://...", "api_key": "..."},
65
+ ... ]
66
+ >>> embedder = get_embedder(backend="litellm", endpoints=endpoints)
67
+ """
68
+ if backend == "fastembed":
69
+ from .embedder import Embedder
70
+ return Embedder(profile=profile, use_gpu=use_gpu, **kwargs)
71
+ elif backend == "litellm":
72
+ # Check if multi-endpoint mode is requested
73
+ if endpoints and len(endpoints) > 1:
74
+ from .rotational_embedder import create_rotational_embedder
75
+ return create_rotational_embedder(
76
+ endpoints_config=endpoints,
77
+ strategy=strategy,
78
+ default_cooldown=cooldown,
79
+ )
80
+ elif endpoints and len(endpoints) == 1:
81
+ # Single endpoint in list - use it directly
82
+ ep = endpoints[0]
83
+ ep_kwargs = {**kwargs}
84
+ if "api_key" in ep:
85
+ ep_kwargs["api_key"] = ep["api_key"]
86
+ if "api_base" in ep:
87
+ ep_kwargs["api_base"] = ep["api_base"]
88
+ from .litellm_embedder import LiteLLMEmbedderWrapper
89
+ return LiteLLMEmbedderWrapper(model=ep["model"], **ep_kwargs)
90
+ else:
91
+ # No endpoints list - use model parameter
92
+ from .litellm_embedder import LiteLLMEmbedderWrapper
93
+ return LiteLLMEmbedderWrapper(model=model, **kwargs)
94
+ else:
95
+ raise ValueError(
96
+ f"Unknown backend: {backend}. "
97
+ f"Supported backends: 'fastembed', 'litellm'"
98
+ )
@@ -13,6 +13,15 @@ from typing import List, Optional
13
13
  logger = logging.getLogger(__name__)
14
14
 
15
15
 
16
+ @dataclass
17
+ class GPUDevice:
18
+ """Individual GPU device info."""
19
+ device_id: int
20
+ name: str
21
+ is_discrete: bool # True for discrete GPU (NVIDIA, AMD), False for integrated (Intel UHD)
22
+ vendor: str # "nvidia", "amd", "intel", "unknown"
23
+
24
+
16
25
  @dataclass
17
26
  class GPUInfo:
18
27
  """GPU availability and configuration info."""
@@ -22,15 +31,117 @@ class GPUInfo:
22
31
  gpu_count: int = 0
23
32
  gpu_name: Optional[str] = None
24
33
  onnx_providers: List[str] = None
34
+ devices: List[GPUDevice] = None # List of detected GPU devices
35
+ preferred_device_id: Optional[int] = None # Preferred GPU for embedding
25
36
 
26
37
  def __post_init__(self):
27
38
  if self.onnx_providers is None:
28
39
  self.onnx_providers = ["CPUExecutionProvider"]
40
+ if self.devices is None:
41
+ self.devices = []
29
42
 
30
43
 
31
44
  _gpu_info_cache: Optional[GPUInfo] = None
32
45
 
33
46
 
47
+ def _enumerate_gpus() -> List[GPUDevice]:
48
+ """Enumerate available GPU devices using WMI on Windows.
49
+
50
+ Returns:
51
+ List of GPUDevice with device info, ordered by device_id.
52
+ """
53
+ devices = []
54
+
55
+ try:
56
+ import subprocess
57
+ import sys
58
+
59
+ if sys.platform == "win32":
60
+ # Use PowerShell to query GPU information via WMI
61
+ cmd = [
62
+ "powershell", "-NoProfile", "-Command",
63
+ "Get-WmiObject Win32_VideoController | Select-Object DeviceID, Name, AdapterCompatibility | ConvertTo-Json"
64
+ ]
65
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
66
+
67
+ if result.returncode == 0 and result.stdout.strip():
68
+ import json
69
+ gpu_data = json.loads(result.stdout)
70
+
71
+ # Handle single GPU case (returns dict instead of list)
72
+ if isinstance(gpu_data, dict):
73
+ gpu_data = [gpu_data]
74
+
75
+ for idx, gpu in enumerate(gpu_data):
76
+ name = gpu.get("Name", "Unknown GPU")
77
+ compat = gpu.get("AdapterCompatibility", "").lower()
78
+
79
+ # Determine vendor
80
+ name_lower = name.lower()
81
+ if "nvidia" in name_lower or "nvidia" in compat:
82
+ vendor = "nvidia"
83
+ is_discrete = True
84
+ elif "amd" in name_lower or "radeon" in name_lower or "amd" in compat:
85
+ vendor = "amd"
86
+ is_discrete = True
87
+ elif "intel" in name_lower or "intel" in compat:
88
+ vendor = "intel"
89
+ # Intel UHD/Iris are integrated, Intel Arc is discrete
90
+ is_discrete = "arc" in name_lower
91
+ else:
92
+ vendor = "unknown"
93
+ is_discrete = False
94
+
95
+ devices.append(GPUDevice(
96
+ device_id=idx,
97
+ name=name,
98
+ is_discrete=is_discrete,
99
+ vendor=vendor
100
+ ))
101
+ logger.debug(f"Detected GPU {idx}: {name} (vendor={vendor}, discrete={is_discrete})")
102
+
103
+ except Exception as e:
104
+ logger.debug(f"GPU enumeration failed: {e}")
105
+
106
+ return devices
107
+
108
+
109
+ def _get_preferred_device_id(devices: List[GPUDevice]) -> Optional[int]:
110
+ """Determine the preferred GPU device_id for embedding.
111
+
112
+ Preference order:
113
+ 1. NVIDIA discrete GPU (best DirectML/CUDA support)
114
+ 2. AMD discrete GPU
115
+ 3. Intel Arc (discrete)
116
+ 4. Intel integrated (fallback)
117
+
118
+ Returns:
119
+ device_id of preferred GPU, or None to use default.
120
+ """
121
+ if not devices:
122
+ return None
123
+
124
+ # Priority: NVIDIA > AMD > Intel Arc > Intel integrated
125
+ priority_order = [
126
+ ("nvidia", True), # NVIDIA discrete
127
+ ("amd", True), # AMD discrete
128
+ ("intel", True), # Intel Arc (discrete)
129
+ ("intel", False), # Intel integrated (fallback)
130
+ ]
131
+
132
+ for target_vendor, target_discrete in priority_order:
133
+ for device in devices:
134
+ if device.vendor == target_vendor and device.is_discrete == target_discrete:
135
+ logger.info(f"Preferred GPU: {device.name} (device_id={device.device_id})")
136
+ return device.device_id
137
+
138
+ # If no match, use first device
139
+ if devices:
140
+ return devices[0].device_id
141
+
142
+ return None
143
+
144
+
34
145
  def detect_gpu(force_refresh: bool = False) -> GPUInfo:
35
146
  """Detect available GPU resources for embedding acceleration.
36
147
 
@@ -47,6 +158,18 @@ def detect_gpu(force_refresh: bool = False) -> GPUInfo:
47
158
 
48
159
  info = GPUInfo()
49
160
 
161
+ # Enumerate GPU devices first
162
+ info.devices = _enumerate_gpus()
163
+ info.gpu_count = len(info.devices)
164
+ if info.devices:
165
+ # Set preferred device (discrete GPU preferred over integrated)
166
+ info.preferred_device_id = _get_preferred_device_id(info.devices)
167
+ # Set gpu_name to preferred device name
168
+ for dev in info.devices:
169
+ if dev.device_id == info.preferred_device_id:
170
+ info.gpu_name = dev.name
171
+ break
172
+
50
173
  # Check PyTorch CUDA availability (most reliable detection)
51
174
  try:
52
175
  import torch
@@ -143,21 +266,48 @@ def detect_gpu(force_refresh: bool = False) -> GPUInfo:
143
266
  return info
144
267
 
145
268
 
146
- def get_optimal_providers(use_gpu: bool = True) -> List[str]:
269
+ def get_optimal_providers(use_gpu: bool = True, with_device_options: bool = False) -> list:
147
270
  """Get optimal ONNX execution providers based on availability.
148
271
 
149
272
  Args:
150
273
  use_gpu: If True, include GPU providers when available.
151
274
  If False, force CPU-only execution.
275
+ with_device_options: If True, return providers as tuples with device_id options
276
+ for proper GPU device selection (required for DirectML).
152
277
 
153
278
  Returns:
154
- List of provider names in priority order.
279
+ List of provider names or tuples (provider_name, options_dict) in priority order.
155
280
  """
156
281
  if not use_gpu:
157
282
  return ["CPUExecutionProvider"]
158
283
 
159
284
  gpu_info = detect_gpu()
160
- return gpu_info.onnx_providers
285
+
286
+ if not with_device_options:
287
+ return gpu_info.onnx_providers
288
+
289
+ # Build providers with device_id options for GPU providers
290
+ device_id = get_selected_device_id()
291
+ providers = []
292
+
293
+ for provider in gpu_info.onnx_providers:
294
+ if provider == "DmlExecutionProvider" and device_id is not None:
295
+ # DirectML requires device_id in provider_options tuple
296
+ providers.append(("DmlExecutionProvider", {"device_id": device_id}))
297
+ logger.debug(f"DmlExecutionProvider configured with device_id={device_id}")
298
+ elif provider == "CUDAExecutionProvider" and device_id is not None:
299
+ # CUDA also supports device_id in provider_options
300
+ providers.append(("CUDAExecutionProvider", {"device_id": device_id}))
301
+ logger.debug(f"CUDAExecutionProvider configured with device_id={device_id}")
302
+ elif provider == "ROCMExecutionProvider" and device_id is not None:
303
+ # ROCm supports device_id
304
+ providers.append(("ROCMExecutionProvider", {"device_id": device_id}))
305
+ logger.debug(f"ROCMExecutionProvider configured with device_id={device_id}")
306
+ else:
307
+ # CPU and other providers don't need device_id
308
+ providers.append(provider)
309
+
310
+ return providers
161
311
 
162
312
 
163
313
  def is_gpu_available() -> bool:
@@ -190,3 +340,75 @@ def clear_gpu_cache() -> None:
190
340
  """Clear cached GPU detection info."""
191
341
  global _gpu_info_cache
192
342
  _gpu_info_cache = None
343
+
344
+
345
+ # User-selected device ID (overrides auto-detection)
346
+ _selected_device_id: Optional[int] = None
347
+
348
+
349
+ def get_gpu_devices() -> List[dict]:
350
+ """Get list of available GPU devices for frontend selection.
351
+
352
+ Returns:
353
+ List of dicts with device info for each GPU.
354
+ """
355
+ info = detect_gpu()
356
+ devices = []
357
+
358
+ for dev in info.devices:
359
+ devices.append({
360
+ "device_id": dev.device_id,
361
+ "name": dev.name,
362
+ "vendor": dev.vendor,
363
+ "is_discrete": dev.is_discrete,
364
+ "is_preferred": dev.device_id == info.preferred_device_id,
365
+ "is_selected": dev.device_id == get_selected_device_id(),
366
+ })
367
+
368
+ return devices
369
+
370
+
371
+ def get_selected_device_id() -> Optional[int]:
372
+ """Get the user-selected GPU device_id.
373
+
374
+ Returns:
375
+ User-selected device_id, or auto-detected preferred device_id if not set.
376
+ """
377
+ global _selected_device_id
378
+
379
+ if _selected_device_id is not None:
380
+ return _selected_device_id
381
+
382
+ # Fall back to auto-detected preferred device
383
+ info = detect_gpu()
384
+ return info.preferred_device_id
385
+
386
+
387
+ def set_selected_device_id(device_id: Optional[int]) -> bool:
388
+ """Set the GPU device_id to use for embeddings.
389
+
390
+ Args:
391
+ device_id: GPU device_id to use, or None to use auto-detection.
392
+
393
+ Returns:
394
+ True if device_id is valid, False otherwise.
395
+ """
396
+ global _selected_device_id
397
+
398
+ if device_id is None:
399
+ _selected_device_id = None
400
+ logger.info("GPU selection reset to auto-detection")
401
+ return True
402
+
403
+ # Validate device_id exists
404
+ info = detect_gpu()
405
+ valid_ids = [dev.device_id for dev in info.devices]
406
+
407
+ if device_id in valid_ids:
408
+ _selected_device_id = device_id
409
+ device_name = next((dev.name for dev in info.devices if dev.device_id == device_id), "Unknown")
410
+ logger.info(f"GPU selection set to device {device_id}: {device_name}")
411
+ return True
412
+ else:
413
+ logger.warning(f"Invalid device_id {device_id}. Valid IDs: {valid_ids}")
414
+ return False
@@ -0,0 +1,144 @@
1
+ """LiteLLM embedder wrapper for CodexLens.
2
+
3
+ Provides integration with ccw-litellm's LiteLLMEmbedder for embedding generation.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import Iterable
9
+
10
+ import numpy as np
11
+
12
+ from .base import BaseEmbedder
13
+
14
+
15
+ class LiteLLMEmbedderWrapper(BaseEmbedder):
16
+ """Wrapper for ccw-litellm LiteLLMEmbedder.
17
+
18
+ This wrapper adapts the ccw-litellm LiteLLMEmbedder to the CodexLens
19
+ BaseEmbedder interface, enabling seamless integration with CodexLens
20
+ semantic search functionality.
21
+
22
+ Args:
23
+ model: Model identifier for LiteLLM (default: "default")
24
+ **kwargs: Additional arguments passed to LiteLLMEmbedder
25
+
26
+ Raises:
27
+ ImportError: If ccw-litellm package is not installed
28
+ """
29
+
30
+ def __init__(self, model: str = "default", **kwargs) -> None:
31
+ """Initialize LiteLLM embedder wrapper.
32
+
33
+ Args:
34
+ model: Model identifier for LiteLLM (default: "default")
35
+ **kwargs: Additional arguments passed to LiteLLMEmbedder
36
+
37
+ Raises:
38
+ ImportError: If ccw-litellm package is not installed
39
+ """
40
+ try:
41
+ from ccw_litellm import LiteLLMEmbedder
42
+ self._embedder = LiteLLMEmbedder(model=model, **kwargs)
43
+ except ImportError as e:
44
+ raise ImportError(
45
+ "ccw-litellm not installed. Install with: pip install ccw-litellm"
46
+ ) from e
47
+
48
+ @property
49
+ def embedding_dim(self) -> int:
50
+ """Return embedding dimensions from LiteLLMEmbedder.
51
+
52
+ Returns:
53
+ int: Dimension of the embedding vectors.
54
+ """
55
+ return self._embedder.dimensions
56
+
57
+ @property
58
+ def model_name(self) -> str:
59
+ """Return model name from LiteLLMEmbedder.
60
+
61
+ Returns:
62
+ str: Name or identifier of the underlying model.
63
+ """
64
+ return self._embedder.model_name
65
+
66
+ @property
67
+ def max_tokens(self) -> int:
68
+ """Return maximum token limit for the embedding model.
69
+
70
+ Returns:
71
+ int: Maximum number of tokens that can be embedded at once.
72
+ Inferred from model config or model name patterns.
73
+ """
74
+ # Try to get from LiteLLM config first
75
+ if hasattr(self._embedder, 'max_input_tokens') and self._embedder.max_input_tokens:
76
+ return self._embedder.max_input_tokens
77
+
78
+ # Infer from model name
79
+ model_name_lower = self.model_name.lower()
80
+
81
+ # Large models (8B or "large" in name)
82
+ if '8b' in model_name_lower or 'large' in model_name_lower:
83
+ return 32768
84
+
85
+ # OpenAI text-embedding-3-* models
86
+ if 'text-embedding-3' in model_name_lower:
87
+ return 8191
88
+
89
+ # Default fallback
90
+ return 8192
91
+
92
+ def _sanitize_text(self, text: str) -> str:
93
+ """Sanitize text to work around ModelScope API routing bug.
94
+
95
+ ModelScope incorrectly routes text starting with lowercase 'import'
96
+ to an Ollama endpoint, causing failures. This adds a leading space
97
+ to work around the issue without affecting embedding quality.
98
+
99
+ Args:
100
+ text: Text to sanitize.
101
+
102
+ Returns:
103
+ Sanitized text safe for embedding API.
104
+ """
105
+ if text.startswith('import'):
106
+ return ' ' + text
107
+ return text
108
+
109
+ def embed_to_numpy(self, texts: str | Iterable[str], **kwargs) -> np.ndarray:
110
+ """Embed texts to numpy array using LiteLLMEmbedder.
111
+
112
+ Args:
113
+ texts: Single text or iterable of texts to embed.
114
+ **kwargs: Additional arguments (ignored for LiteLLM backend).
115
+ Accepts batch_size for API compatibility with fastembed.
116
+
117
+ Returns:
118
+ numpy.ndarray: Array of shape (n_texts, embedding_dim) containing embeddings.
119
+ """
120
+ if isinstance(texts, str):
121
+ texts = [texts]
122
+ else:
123
+ texts = list(texts)
124
+
125
+ # Sanitize texts to avoid ModelScope routing bug
126
+ texts = [self._sanitize_text(t) for t in texts]
127
+
128
+ # LiteLLM handles batching internally, ignore batch_size parameter
129
+ return self._embedder.embed(texts)
130
+
131
+ def embed_single(self, text: str) -> list[float]:
132
+ """Generate embedding for a single text.
133
+
134
+ Args:
135
+ text: Text to embed.
136
+
137
+ Returns:
138
+ list[float]: Embedding vector as a list of floats.
139
+ """
140
+ # Sanitize text before embedding
141
+ sanitized = self._sanitize_text(text)
142
+ embedding = self._embedder.embed([sanitized])
143
+ return embedding[0].tolist()
144
+