claude-code-workflow 6.2.7 → 6.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (208) hide show
  1. package/.claude/CLAUDE.md +16 -1
  2. package/.claude/workflows/cli-templates/protocols/analysis-protocol.md +11 -4
  3. package/.claude/workflows/cli-templates/protocols/write-protocol.md +10 -75
  4. package/.claude/workflows/cli-tools-usage.md +14 -24
  5. package/.codex/AGENTS.md +51 -1
  6. package/.codex/prompts/compact.md +378 -0
  7. package/.gemini/GEMINI.md +57 -20
  8. package/ccw/dist/cli.d.ts.map +1 -1
  9. package/ccw/dist/cli.js +21 -8
  10. package/ccw/dist/cli.js.map +1 -1
  11. package/ccw/dist/commands/cli.d.ts +2 -0
  12. package/ccw/dist/commands/cli.d.ts.map +1 -1
  13. package/ccw/dist/commands/cli.js +129 -8
  14. package/ccw/dist/commands/cli.js.map +1 -1
  15. package/ccw/dist/commands/hook.d.ts.map +1 -1
  16. package/ccw/dist/commands/hook.js +3 -2
  17. package/ccw/dist/commands/hook.js.map +1 -1
  18. package/ccw/dist/config/litellm-api-config-manager.d.ts +180 -0
  19. package/ccw/dist/config/litellm-api-config-manager.d.ts.map +1 -0
  20. package/ccw/dist/config/litellm-api-config-manager.js +770 -0
  21. package/ccw/dist/config/litellm-api-config-manager.js.map +1 -0
  22. package/ccw/dist/config/provider-models.d.ts +73 -0
  23. package/ccw/dist/config/provider-models.d.ts.map +1 -0
  24. package/ccw/dist/config/provider-models.js +172 -0
  25. package/ccw/dist/config/provider-models.js.map +1 -0
  26. package/ccw/dist/core/cache-manager.d.ts.map +1 -1
  27. package/ccw/dist/core/cache-manager.js +3 -5
  28. package/ccw/dist/core/cache-manager.js.map +1 -1
  29. package/ccw/dist/core/dashboard-generator.d.ts.map +1 -1
  30. package/ccw/dist/core/dashboard-generator.js +3 -1
  31. package/ccw/dist/core/dashboard-generator.js.map +1 -1
  32. package/ccw/dist/core/routes/cli-routes.d.ts.map +1 -1
  33. package/ccw/dist/core/routes/cli-routes.js +169 -0
  34. package/ccw/dist/core/routes/cli-routes.js.map +1 -1
  35. package/ccw/dist/core/routes/codexlens-routes.d.ts.map +1 -1
  36. package/ccw/dist/core/routes/codexlens-routes.js +234 -18
  37. package/ccw/dist/core/routes/codexlens-routes.js.map +1 -1
  38. package/ccw/dist/core/routes/hooks-routes.d.ts.map +1 -1
  39. package/ccw/dist/core/routes/hooks-routes.js +30 -32
  40. package/ccw/dist/core/routes/hooks-routes.js.map +1 -1
  41. package/ccw/dist/core/routes/litellm-api-routes.d.ts +21 -0
  42. package/ccw/dist/core/routes/litellm-api-routes.d.ts.map +1 -0
  43. package/ccw/dist/core/routes/litellm-api-routes.js +780 -0
  44. package/ccw/dist/core/routes/litellm-api-routes.js.map +1 -0
  45. package/ccw/dist/core/routes/litellm-routes.d.ts +20 -0
  46. package/ccw/dist/core/routes/litellm-routes.d.ts.map +1 -0
  47. package/ccw/dist/core/routes/litellm-routes.js +85 -0
  48. package/ccw/dist/core/routes/litellm-routes.js.map +1 -0
  49. package/ccw/dist/core/routes/mcp-routes.js +2 -2
  50. package/ccw/dist/core/routes/mcp-routes.js.map +1 -1
  51. package/ccw/dist/core/routes/status-routes.d.ts.map +1 -1
  52. package/ccw/dist/core/routes/status-routes.js +39 -0
  53. package/ccw/dist/core/routes/status-routes.js.map +1 -1
  54. package/ccw/dist/core/routes/system-routes.js +1 -1
  55. package/ccw/dist/core/routes/system-routes.js.map +1 -1
  56. package/ccw/dist/core/server.d.ts.map +1 -1
  57. package/ccw/dist/core/server.js +15 -1
  58. package/ccw/dist/core/server.js.map +1 -1
  59. package/ccw/dist/mcp-server/index.js +1 -1
  60. package/ccw/dist/mcp-server/index.js.map +1 -1
  61. package/ccw/dist/tools/claude-cli-tools.d.ts +82 -0
  62. package/ccw/dist/tools/claude-cli-tools.d.ts.map +1 -0
  63. package/ccw/dist/tools/claude-cli-tools.js +216 -0
  64. package/ccw/dist/tools/claude-cli-tools.js.map +1 -0
  65. package/ccw/dist/tools/cli-executor.d.ts.map +1 -1
  66. package/ccw/dist/tools/cli-executor.js +76 -14
  67. package/ccw/dist/tools/cli-executor.js.map +1 -1
  68. package/ccw/dist/tools/codex-lens.d.ts +9 -2
  69. package/ccw/dist/tools/codex-lens.d.ts.map +1 -1
  70. package/ccw/dist/tools/codex-lens.js +114 -9
  71. package/ccw/dist/tools/codex-lens.js.map +1 -1
  72. package/ccw/dist/tools/context-cache-store.d.ts +136 -0
  73. package/ccw/dist/tools/context-cache-store.d.ts.map +1 -0
  74. package/ccw/dist/tools/context-cache-store.js +256 -0
  75. package/ccw/dist/tools/context-cache-store.js.map +1 -0
  76. package/ccw/dist/tools/context-cache.d.ts +56 -0
  77. package/ccw/dist/tools/context-cache.d.ts.map +1 -0
  78. package/ccw/dist/tools/context-cache.js +294 -0
  79. package/ccw/dist/tools/context-cache.js.map +1 -0
  80. package/ccw/dist/tools/core-memory.d.ts.map +1 -1
  81. package/ccw/dist/tools/core-memory.js +33 -19
  82. package/ccw/dist/tools/core-memory.js.map +1 -1
  83. package/ccw/dist/tools/index.d.ts.map +1 -1
  84. package/ccw/dist/tools/index.js +2 -0
  85. package/ccw/dist/tools/index.js.map +1 -1
  86. package/ccw/dist/tools/litellm-client.d.ts +85 -0
  87. package/ccw/dist/tools/litellm-client.d.ts.map +1 -0
  88. package/ccw/dist/tools/litellm-client.js +188 -0
  89. package/ccw/dist/tools/litellm-client.js.map +1 -0
  90. package/ccw/dist/tools/litellm-executor.d.ts +34 -0
  91. package/ccw/dist/tools/litellm-executor.d.ts.map +1 -0
  92. package/ccw/dist/tools/litellm-executor.js +192 -0
  93. package/ccw/dist/tools/litellm-executor.js.map +1 -0
  94. package/ccw/dist/tools/pattern-parser.d.ts +55 -0
  95. package/ccw/dist/tools/pattern-parser.d.ts.map +1 -0
  96. package/ccw/dist/tools/pattern-parser.js +237 -0
  97. package/ccw/dist/tools/pattern-parser.js.map +1 -0
  98. package/ccw/dist/tools/smart-search.d.ts +1 -0
  99. package/ccw/dist/tools/smart-search.d.ts.map +1 -1
  100. package/ccw/dist/tools/smart-search.js +117 -41
  101. package/ccw/dist/tools/smart-search.js.map +1 -1
  102. package/ccw/dist/types/litellm-api-config.d.ts +294 -0
  103. package/ccw/dist/types/litellm-api-config.d.ts.map +1 -0
  104. package/ccw/dist/types/litellm-api-config.js +8 -0
  105. package/ccw/dist/types/litellm-api-config.js.map +1 -0
  106. package/ccw/src/cli.ts +258 -244
  107. package/ccw/src/commands/cli.ts +153 -9
  108. package/ccw/src/commands/hook.ts +3 -2
  109. package/ccw/src/config/.litellm-api-config-manager.ts.2025-12-23T11-57-43-727Z.bak +441 -0
  110. package/ccw/src/config/litellm-api-config-manager.ts +1012 -0
  111. package/ccw/src/config/provider-models.ts +222 -0
  112. package/ccw/src/core/cache-manager.ts +292 -294
  113. package/ccw/src/core/dashboard-generator.ts +3 -1
  114. package/ccw/src/core/routes/cli-routes.ts +192 -0
  115. package/ccw/src/core/routes/codexlens-routes.ts +241 -19
  116. package/ccw/src/core/routes/hooks-routes.ts +399 -405
  117. package/ccw/src/core/routes/litellm-api-routes.ts +930 -0
  118. package/ccw/src/core/routes/litellm-routes.ts +107 -0
  119. package/ccw/src/core/routes/mcp-routes.ts +1271 -1271
  120. package/ccw/src/core/routes/status-routes.ts +51 -0
  121. package/ccw/src/core/routes/system-routes.ts +1 -1
  122. package/ccw/src/core/server.ts +15 -1
  123. package/ccw/src/mcp-server/index.ts +1 -1
  124. package/ccw/src/templates/dashboard-css/12-cli-legacy.css +44 -0
  125. package/ccw/src/templates/dashboard-css/31-api-settings.css +2265 -0
  126. package/ccw/src/templates/dashboard-js/components/cli-history.js +15 -8
  127. package/ccw/src/templates/dashboard-js/components/cli-status.js +323 -9
  128. package/ccw/src/templates/dashboard-js/components/navigation.js +329 -313
  129. package/ccw/src/templates/dashboard-js/i18n.js +583 -1
  130. package/ccw/src/templates/dashboard-js/views/api-settings.js +3362 -0
  131. package/ccw/src/templates/dashboard-js/views/cli-manager.js +199 -24
  132. package/ccw/src/templates/dashboard-js/views/codexlens-manager.js +1265 -27
  133. package/ccw/src/templates/dashboard.html +840 -831
  134. package/ccw/src/tools/claude-cli-tools.ts +300 -0
  135. package/ccw/src/tools/cli-executor.ts +83 -14
  136. package/ccw/src/tools/codex-lens.ts +146 -9
  137. package/ccw/src/tools/context-cache-store.ts +368 -0
  138. package/ccw/src/tools/context-cache.ts +393 -0
  139. package/ccw/src/tools/core-memory.ts +33 -19
  140. package/ccw/src/tools/index.ts +2 -0
  141. package/ccw/src/tools/litellm-client.ts +246 -0
  142. package/ccw/src/tools/litellm-executor.ts +241 -0
  143. package/ccw/src/tools/pattern-parser.ts +329 -0
  144. package/ccw/src/tools/smart-search.ts +142 -41
  145. package/ccw/src/types/litellm-api-config.ts +402 -0
  146. package/ccw-litellm/README.md +180 -0
  147. package/ccw-litellm/pyproject.toml +35 -0
  148. package/ccw-litellm/src/ccw_litellm/__init__.py +47 -0
  149. package/ccw-litellm/src/ccw_litellm/__pycache__/__init__.cpython-313.pyc +0 -0
  150. package/ccw-litellm/src/ccw_litellm/__pycache__/cli.cpython-313.pyc +0 -0
  151. package/ccw-litellm/src/ccw_litellm/cli.py +108 -0
  152. package/ccw-litellm/src/ccw_litellm/clients/__init__.py +12 -0
  153. package/ccw-litellm/src/ccw_litellm/clients/__pycache__/__init__.cpython-313.pyc +0 -0
  154. package/ccw-litellm/src/ccw_litellm/clients/__pycache__/litellm_embedder.cpython-313.pyc +0 -0
  155. package/ccw-litellm/src/ccw_litellm/clients/__pycache__/litellm_llm.cpython-313.pyc +0 -0
  156. package/ccw-litellm/src/ccw_litellm/clients/litellm_embedder.py +251 -0
  157. package/ccw-litellm/src/ccw_litellm/clients/litellm_llm.py +165 -0
  158. package/ccw-litellm/src/ccw_litellm/config/__init__.py +22 -0
  159. package/ccw-litellm/src/ccw_litellm/config/__pycache__/__init__.cpython-313.pyc +0 -0
  160. package/ccw-litellm/src/ccw_litellm/config/__pycache__/loader.cpython-313.pyc +0 -0
  161. package/ccw-litellm/src/ccw_litellm/config/__pycache__/models.cpython-313.pyc +0 -0
  162. package/ccw-litellm/src/ccw_litellm/config/loader.py +316 -0
  163. package/ccw-litellm/src/ccw_litellm/config/models.py +130 -0
  164. package/ccw-litellm/src/ccw_litellm/interfaces/__init__.py +14 -0
  165. package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/__init__.cpython-313.pyc +0 -0
  166. package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/embedder.cpython-313.pyc +0 -0
  167. package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/llm.cpython-313.pyc +0 -0
  168. package/ccw-litellm/src/ccw_litellm/interfaces/embedder.py +52 -0
  169. package/ccw-litellm/src/ccw_litellm/interfaces/llm.py +45 -0
  170. package/codex-lens/src/codexlens/__pycache__/config.cpython-313.pyc +0 -0
  171. package/codex-lens/src/codexlens/cli/__pycache__/commands.cpython-313.pyc +0 -0
  172. package/codex-lens/src/codexlens/cli/__pycache__/embedding_manager.cpython-313.pyc +0 -0
  173. package/codex-lens/src/codexlens/cli/__pycache__/model_manager.cpython-313.pyc +0 -0
  174. package/codex-lens/src/codexlens/cli/__pycache__/output.cpython-313.pyc +0 -0
  175. package/codex-lens/src/codexlens/cli/commands.py +378 -23
  176. package/codex-lens/src/codexlens/cli/embedding_manager.py +660 -56
  177. package/codex-lens/src/codexlens/cli/model_manager.py +31 -18
  178. package/codex-lens/src/codexlens/cli/output.py +12 -1
  179. package/codex-lens/src/codexlens/config.py +93 -0
  180. package/codex-lens/src/codexlens/search/__pycache__/chain_search.cpython-313.pyc +0 -0
  181. package/codex-lens/src/codexlens/search/__pycache__/hybrid_search.cpython-313.pyc +0 -0
  182. package/codex-lens/src/codexlens/search/__pycache__/ranking.cpython-313.pyc +0 -0
  183. package/codex-lens/src/codexlens/search/chain_search.py +6 -2
  184. package/codex-lens/src/codexlens/search/hybrid_search.py +44 -21
  185. package/codex-lens/src/codexlens/search/ranking.py +1 -1
  186. package/codex-lens/src/codexlens/semantic/__init__.py +42 -0
  187. package/codex-lens/src/codexlens/semantic/__pycache__/__init__.cpython-313.pyc +0 -0
  188. package/codex-lens/src/codexlens/semantic/__pycache__/base.cpython-313.pyc +0 -0
  189. package/codex-lens/src/codexlens/semantic/__pycache__/chunker.cpython-313.pyc +0 -0
  190. package/codex-lens/src/codexlens/semantic/__pycache__/embedder.cpython-313.pyc +0 -0
  191. package/codex-lens/src/codexlens/semantic/__pycache__/factory.cpython-313.pyc +0 -0
  192. package/codex-lens/src/codexlens/semantic/__pycache__/gpu_support.cpython-313.pyc +0 -0
  193. package/codex-lens/src/codexlens/semantic/__pycache__/litellm_embedder.cpython-313.pyc +0 -0
  194. package/codex-lens/src/codexlens/semantic/__pycache__/vector_store.cpython-313.pyc +0 -0
  195. package/codex-lens/src/codexlens/semantic/base.py +61 -0
  196. package/codex-lens/src/codexlens/semantic/chunker.py +43 -20
  197. package/codex-lens/src/codexlens/semantic/embedder.py +60 -13
  198. package/codex-lens/src/codexlens/semantic/factory.py +98 -0
  199. package/codex-lens/src/codexlens/semantic/gpu_support.py +225 -3
  200. package/codex-lens/src/codexlens/semantic/litellm_embedder.py +144 -0
  201. package/codex-lens/src/codexlens/semantic/rotational_embedder.py +434 -0
  202. package/codex-lens/src/codexlens/semantic/vector_store.py +33 -8
  203. package/codex-lens/src/codexlens/storage/__pycache__/path_mapper.cpython-313.pyc +0 -0
  204. package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_004_dual_fts.cpython-313.pyc +0 -0
  205. package/codex-lens/src/codexlens/storage/path_mapper.py +27 -1
  206. package/package.json +15 -5
  207. package/.codex/prompts.zip +0 -0
  208. package/ccw/package.json +0 -65
@@ -1,27 +1,36 @@
1
1
  """Embedding Manager - Manage semantic embeddings for code indexes."""
2
2
 
3
3
  import gc
4
+ import json
4
5
  import logging
5
6
  import sqlite3
6
7
  import time
8
+ from concurrent.futures import ThreadPoolExecutor, as_completed
7
9
  from itertools import islice
8
10
  from pathlib import Path
9
- from typing import Dict, Generator, List, Optional, Tuple
11
+ from typing import Any, Dict, Generator, List, Optional, Tuple
10
12
 
11
13
  try:
12
- from codexlens.semantic import SEMANTIC_AVAILABLE
13
- if SEMANTIC_AVAILABLE:
14
- from codexlens.semantic.embedder import Embedder, get_embedder, clear_embedder_cache
15
- from codexlens.semantic.vector_store import VectorStore
16
- from codexlens.semantic.chunker import Chunker, ChunkConfig
14
+ from codexlens.semantic import SEMANTIC_AVAILABLE, is_embedding_backend_available
17
15
  except ImportError:
18
16
  SEMANTIC_AVAILABLE = False
17
+ def is_embedding_backend_available(_backend: str): # type: ignore[no-redef]
18
+ return False, "codexlens.semantic not available"
19
19
 
20
20
  logger = logging.getLogger(__name__)
21
21
 
22
22
  # Embedding batch size - larger values improve throughput on modern hardware
23
23
  # Benchmark: 256 gives ~2.35x speedup over 64 with DirectML GPU acceleration
24
- EMBEDDING_BATCH_SIZE = 256 # Optimized from 64 based on batch size benchmarks
24
+ EMBEDDING_BATCH_SIZE = 256
25
+
26
+
27
+ def _cleanup_fastembed_resources() -> None:
28
+ """Best-effort cleanup for fastembed/ONNX resources (no-op for other backends)."""
29
+ try:
30
+ from codexlens.semantic.embedder import clear_embedder_cache
31
+ clear_embedder_cache()
32
+ except Exception:
33
+ pass
25
34
 
26
35
 
27
36
  def _generate_chunks_from_cursor(
@@ -79,6 +88,44 @@ def _generate_chunks_from_cursor(
79
88
  failed_files.append((file_path, str(e)))
80
89
 
81
90
 
91
+ def _create_token_aware_batches(
92
+ chunk_generator: Generator,
93
+ max_tokens_per_batch: int = 8000,
94
+ ) -> Generator[List[Tuple], None, None]:
95
+ """Group chunks by total token count instead of fixed count.
96
+
97
+ Uses fast token estimation (len(content) // 4) for efficiency.
98
+ Yields batches when approaching the token limit.
99
+
100
+ Args:
101
+ chunk_generator: Generator yielding (chunk, file_path) tuples
102
+ max_tokens_per_batch: Maximum tokens per batch (default: 8000)
103
+
104
+ Yields:
105
+ List of (chunk, file_path) tuples representing a batch
106
+ """
107
+ current_batch = []
108
+ current_tokens = 0
109
+
110
+ for chunk, file_path in chunk_generator:
111
+ # Fast token estimation: len(content) // 4
112
+ chunk_tokens = len(chunk.content) // 4
113
+
114
+ # If adding this chunk would exceed limit and we have items, yield current batch
115
+ if current_tokens + chunk_tokens > max_tokens_per_batch and current_batch:
116
+ yield current_batch
117
+ current_batch = []
118
+ current_tokens = 0
119
+
120
+ # Add chunk to current batch
121
+ current_batch.append((chunk, file_path))
122
+ current_tokens += chunk_tokens
123
+
124
+ # Yield final batch if not empty
125
+ if current_batch:
126
+ yield current_batch
127
+
128
+
82
129
  def _get_path_column(conn: sqlite3.Connection) -> str:
83
130
  """Detect whether files table uses 'path' or 'full_path' column.
84
131
 
@@ -189,33 +236,110 @@ def check_index_embeddings(index_path: Path) -> Dict[str, any]:
189
236
  }
190
237
 
191
238
 
239
+ def _get_embedding_defaults() -> tuple[str, str, bool, List, str, float]:
240
+ """Get default embedding settings from config.
241
+
242
+ Returns:
243
+ Tuple of (backend, model, use_gpu, endpoints, strategy, cooldown)
244
+ """
245
+ try:
246
+ from codexlens.config import Config
247
+ config = Config.load()
248
+ return (
249
+ config.embedding_backend,
250
+ config.embedding_model,
251
+ config.embedding_use_gpu,
252
+ config.embedding_endpoints,
253
+ config.embedding_strategy,
254
+ config.embedding_cooldown,
255
+ )
256
+ except Exception:
257
+ return "fastembed", "code", True, [], "latency_aware", 60.0
258
+
259
+
192
260
  def generate_embeddings(
193
261
  index_path: Path,
194
- model_profile: str = "code",
262
+ embedding_backend: Optional[str] = None,
263
+ model_profile: Optional[str] = None,
195
264
  force: bool = False,
196
265
  chunk_size: int = 2000,
266
+ overlap: int = 200,
197
267
  progress_callback: Optional[callable] = None,
268
+ use_gpu: Optional[bool] = None,
269
+ max_tokens_per_batch: Optional[int] = None,
270
+ max_workers: Optional[int] = None,
271
+ endpoints: Optional[List] = None,
272
+ strategy: Optional[str] = None,
273
+ cooldown: Optional[float] = None,
198
274
  ) -> Dict[str, any]:
199
275
  """Generate embeddings for an index using memory-efficient batch processing.
200
276
 
201
277
  This function processes files in small batches to keep memory usage under 2GB,
202
- regardless of the total project size.
278
+ regardless of the total project size. Supports concurrent API calls for
279
+ LiteLLM backend to improve throughput.
203
280
 
204
281
  Args:
205
282
  index_path: Path to _index.db file
206
- model_profile: Model profile (fast, code, multilingual, balanced)
283
+ embedding_backend: Embedding backend to use (fastembed or litellm).
284
+ Defaults to config setting.
285
+ model_profile: Model profile for fastembed (fast, code, multilingual, balanced)
286
+ or model name for litellm (e.g., qwen3-embedding).
287
+ Defaults to config setting.
207
288
  force: If True, regenerate even if embeddings exist
208
289
  chunk_size: Maximum chunk size in characters
290
+ overlap: Overlap size in characters for sliding window chunking (default: 200)
209
291
  progress_callback: Optional callback for progress updates
292
+ use_gpu: Whether to use GPU acceleration (fastembed only).
293
+ Defaults to config setting.
294
+ max_tokens_per_batch: Maximum tokens per batch for token-aware batching.
295
+ If None, attempts to get from embedder.max_tokens,
296
+ then falls back to 8000. If set, overrides automatic detection.
297
+ max_workers: Maximum number of concurrent API calls.
298
+ If None, uses dynamic defaults based on backend and endpoint count.
299
+ endpoints: Optional list of endpoint configurations for multi-API load balancing.
300
+ Each dict has keys: model, api_key, api_base, weight.
301
+ strategy: Selection strategy for multi-endpoint mode (round_robin, latency_aware).
302
+ cooldown: Default cooldown seconds for rate-limited endpoints.
210
303
 
211
304
  Returns:
212
305
  Result dictionary with generation statistics
213
306
  """
214
- if not SEMANTIC_AVAILABLE:
215
- return {
216
- "success": False,
217
- "error": "Semantic search not available. Install with: pip install codexlens[semantic]",
218
- }
307
+ # Get defaults from config if not specified
308
+ (default_backend, default_model, default_gpu,
309
+ default_endpoints, default_strategy, default_cooldown) = _get_embedding_defaults()
310
+
311
+ if embedding_backend is None:
312
+ embedding_backend = default_backend
313
+ if model_profile is None:
314
+ model_profile = default_model
315
+ if use_gpu is None:
316
+ use_gpu = default_gpu
317
+ if endpoints is None:
318
+ endpoints = default_endpoints
319
+ if strategy is None:
320
+ strategy = default_strategy
321
+ if cooldown is None:
322
+ cooldown = default_cooldown
323
+
324
+ # Calculate endpoint count for worker scaling
325
+ endpoint_count = len(endpoints) if endpoints else 1
326
+
327
+ # Set dynamic max_workers default based on backend type and endpoint count
328
+ # - FastEmbed: CPU-bound, sequential is optimal (1 worker)
329
+ # - LiteLLM single endpoint: 4 workers default
330
+ # - LiteLLM multi-endpoint: workers = endpoint_count * 2 (to saturate all APIs)
331
+ if max_workers is None:
332
+ if embedding_backend == "litellm":
333
+ if endpoint_count > 1:
334
+ max_workers = endpoint_count * 2 # No cap, scale with endpoints
335
+ else:
336
+ max_workers = 4
337
+ else:
338
+ max_workers = 1
339
+
340
+ backend_available, backend_error = is_embedding_backend_available(embedding_backend)
341
+ if not backend_available:
342
+ return {"success": False, "error": backend_error or "Embedding backend not available"}
219
343
 
220
344
  if not index_path.exists():
221
345
  return {
@@ -253,13 +377,43 @@ def generate_embeddings(
253
377
 
254
378
  # Initialize components
255
379
  try:
256
- # Initialize embedder (singleton, reused throughout the function)
257
- embedder = get_embedder(profile=model_profile)
380
+ # Import factory function to support both backends
381
+ from codexlens.semantic.factory import get_embedder as get_embedder_factory
382
+ from codexlens.semantic.vector_store import VectorStore
383
+ from codexlens.semantic.chunker import Chunker, ChunkConfig
384
+
385
+ # Initialize embedder using factory (supports fastembed, litellm, and rotational)
386
+ # For fastembed: model_profile is a profile name (fast/code/multilingual/balanced)
387
+ # For litellm: model_profile is a model name (e.g., qwen3-embedding)
388
+ # For multi-endpoint: endpoints list enables load balancing
389
+ if embedding_backend == "fastembed":
390
+ embedder = get_embedder_factory(backend="fastembed", profile=model_profile, use_gpu=use_gpu)
391
+ elif embedding_backend == "litellm":
392
+ embedder = get_embedder_factory(
393
+ backend="litellm",
394
+ model=model_profile,
395
+ endpoints=endpoints if endpoints else None,
396
+ strategy=strategy,
397
+ cooldown=cooldown,
398
+ )
399
+ else:
400
+ return {
401
+ "success": False,
402
+ "error": f"Invalid embedding backend: {embedding_backend}. Must be 'fastembed' or 'litellm'.",
403
+ }
404
+
258
405
  # skip_token_count=True: Use fast estimation (len/4) instead of expensive tiktoken
259
406
  # This significantly reduces CPU usage with minimal impact on metadata accuracy
260
- chunker = Chunker(config=ChunkConfig(max_chunk_size=chunk_size, skip_token_count=True))
407
+ chunker = Chunker(config=ChunkConfig(
408
+ max_chunk_size=chunk_size,
409
+ overlap=overlap,
410
+ skip_token_count=True
411
+ ))
261
412
 
413
+ # Log embedder info with endpoint count for multi-endpoint mode
262
414
  if progress_callback:
415
+ if endpoint_count > 1:
416
+ progress_callback(f"Using {endpoint_count} API endpoints with {strategy} strategy")
263
417
  progress_callback(f"Using model: {embedder.model_name} ({embedder.embedding_dim} dimensions)")
264
418
 
265
419
  except Exception as e:
@@ -292,7 +446,7 @@ def generate_embeddings(
292
446
 
293
447
  # Set/update model configuration for this index
294
448
  vector_store.set_model_config(
295
- model_profile, embedder.model_name, embedder.embedding_dim
449
+ model_profile, embedder.model_name, embedder.embedding_dim, backend=embedding_backend
296
450
  )
297
451
  # Use bulk insert mode for efficient batch ANN index building
298
452
  # This defers ANN updates until end_bulk_insert() is called
@@ -319,42 +473,203 @@ def generate_embeddings(
319
473
  cursor, chunker, path_column, FILE_BATCH_SIZE, failed_files
320
474
  )
321
475
 
476
+ # Determine max tokens per batch
477
+ # Priority: explicit parameter > embedder.max_tokens > default 8000
478
+ if max_tokens_per_batch is None:
479
+ max_tokens_per_batch = getattr(embedder, 'max_tokens', 8000)
480
+
481
+ # Create token-aware batches or fall back to fixed-size batching
482
+ if max_tokens_per_batch:
483
+ batch_generator = _create_token_aware_batches(
484
+ chunk_generator, max_tokens_per_batch
485
+ )
486
+ else:
487
+ # Fallback to fixed-size batching for backward compatibility
488
+ def fixed_size_batches():
489
+ while True:
490
+ batch = list(islice(chunk_generator, EMBEDDING_BATCH_SIZE))
491
+ if not batch:
492
+ break
493
+ yield batch
494
+ batch_generator = fixed_size_batches()
495
+
322
496
  batch_number = 0
323
497
  files_seen = set()
324
498
 
325
- while True:
326
- # Get a small batch of chunks from the generator (EMBEDDING_BATCH_SIZE at a time)
327
- chunk_batch = list(islice(chunk_generator, EMBEDDING_BATCH_SIZE))
328
- if not chunk_batch:
329
- break
499
+ def compute_embeddings_only(batch_data: Tuple[int, List[Tuple]]):
500
+ """Compute embeddings for a batch (no DB write) with retry logic.
330
501
 
331
- batch_number += 1
332
-
333
- # Track unique files for progress
334
- for _, file_path in chunk_batch:
335
- files_seen.add(file_path)
502
+ Args:
503
+ batch_data: Tuple of (batch_number, chunk_batch)
336
504
 
337
- # Generate embeddings directly to numpy (no tolist() conversion)
338
- try:
339
- batch_contents = [chunk.content for chunk, _ in chunk_batch]
340
- embeddings_numpy = embedder.embed_to_numpy(batch_contents)
505
+ Returns:
506
+ Tuple of (batch_num, chunk_batch, embeddings_numpy, batch_files, error)
507
+ """
508
+ import random
341
509
 
342
- # Use add_chunks_batch_numpy to avoid numpy->list->numpy roundtrip
343
- vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
344
-
345
- total_chunks_created += len(chunk_batch)
346
- total_files_processed = len(files_seen)
347
-
348
- if progress_callback and batch_number % 10 == 0:
510
+ batch_num, chunk_batch = batch_data
511
+ batch_files = set()
512
+ for _, file_path in chunk_batch:
513
+ batch_files.add(file_path)
514
+
515
+ max_retries = 5
516
+ base_delay = 2.0
517
+
518
+ for attempt in range(max_retries + 1):
519
+ try:
520
+ batch_contents = [chunk.content for chunk, _ in chunk_batch]
521
+ embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
522
+ return batch_num, chunk_batch, embeddings_numpy, batch_files, None
523
+
524
+ except Exception as e:
525
+ error_str = str(e).lower()
526
+ # Check for retryable errors (rate limit, connection, backend issues)
527
+ # Note: Some backends (e.g., ModelScope) return 400 with nested 500 errors
528
+ is_retryable = any(x in error_str for x in [
529
+ "429", "rate limit", "connection", "timeout",
530
+ "502", "503", "504", "service unavailable",
531
+ "500", "400", "badrequesterror", "internal server error",
532
+ "11434" # Ollama port - indicates backend routing issue
533
+ ])
534
+
535
+ if attempt < max_retries and is_retryable:
536
+ sleep_time = base_delay * (2 ** attempt) + random.uniform(0, 0.5)
537
+ logger.warning(f"Batch {batch_num} failed (attempt {attempt+1}/{max_retries+1}). "
538
+ f"Retrying in {sleep_time:.1f}s. Error: {e}")
539
+ time.sleep(sleep_time)
540
+ continue
541
+
542
+ error_msg = f"Batch {batch_num}: {str(e)}"
543
+ logger.error(f"Failed to compute embeddings for batch {batch_num}: {str(e)}")
544
+ return batch_num, chunk_batch, None, batch_files, error_msg
545
+
546
+ # Should not reach here, but just in case
547
+ return batch_num, chunk_batch, None, batch_files, f"Batch {batch_num}: Max retries exceeded"
548
+
549
+ # Process batches based on max_workers setting
550
+ if max_workers <= 1:
551
+ # Sequential processing - stream directly from generator (no pre-materialization)
552
+ for chunk_batch in batch_generator:
553
+ batch_number += 1
554
+
555
+ # Track files in this batch
556
+ batch_files = set()
557
+ for _, file_path in chunk_batch:
558
+ batch_files.add(file_path)
559
+
560
+ # Retry logic for transient backend errors
561
+ max_retries = 5
562
+ base_delay = 2.0
563
+ success = False
564
+
565
+ for attempt in range(max_retries + 1):
566
+ try:
567
+ # Generate embeddings
568
+ batch_contents = [chunk.content for chunk, _ in chunk_batch]
569
+ embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
570
+
571
+ # Store embeddings
572
+ vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
573
+
574
+ files_seen.update(batch_files)
575
+ total_chunks_created += len(chunk_batch)
576
+ total_files_processed = len(files_seen)
577
+ success = True
578
+ break
579
+
580
+ except Exception as e:
581
+ error_str = str(e).lower()
582
+ # Check for retryable errors (rate limit, connection, backend issues)
583
+ is_retryable = any(x in error_str for x in [
584
+ "429", "rate limit", "connection", "timeout",
585
+ "502", "503", "504", "service unavailable",
586
+ "500", "400", "badrequesterror", "internal server error",
587
+ "11434" # Ollama port - indicates backend routing issue
588
+ ])
589
+
590
+ if attempt < max_retries and is_retryable:
591
+ import random
592
+ sleep_time = base_delay * (2 ** attempt) + random.uniform(0, 0.5)
593
+ logger.warning(f"Batch {batch_number} failed (attempt {attempt+1}/{max_retries+1}). "
594
+ f"Retrying in {sleep_time:.1f}s. Error: {e}")
595
+ time.sleep(sleep_time)
596
+ continue
597
+
598
+ logger.error(f"Failed to process batch {batch_number}: {str(e)}")
599
+ files_seen.update(batch_files)
600
+ break
601
+
602
+ if success and progress_callback and batch_number % 10 == 0:
349
603
  progress_callback(f" Batch {batch_number}: {total_chunks_created} chunks, {total_files_processed} files")
350
-
351
- # Cleanup intermediate data
352
- del batch_contents, embeddings_numpy, chunk_batch
353
-
354
- except Exception as e:
355
- logger.error(f"Failed to process embedding batch {batch_number}: {str(e)}")
356
- # Continue to next batch instead of failing entirely
357
- continue
604
+ else:
605
+ # Concurrent processing - main thread iterates batches (SQLite safe),
606
+ # workers compute embeddings (parallel), main thread writes to DB (serial)
607
+ if progress_callback:
608
+ progress_callback(f"Processing with {max_workers} concurrent embedding workers...")
609
+
610
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
611
+ pending_futures = {} # future -> (batch_num, chunk_batch)
612
+ completed_batches = 0
613
+ last_reported_batch = 0
614
+
615
+ def process_completed_futures():
616
+ """Process any completed futures and write to DB."""
617
+ nonlocal total_chunks_created, total_files_processed, completed_batches, last_reported_batch
618
+ done_futures = [f for f in pending_futures if f.done()]
619
+ for f in done_futures:
620
+ try:
621
+ batch_num, chunk_batch, embeddings_numpy, batch_files, error = f.result()
622
+ if embeddings_numpy is not None and error is None:
623
+ # Write to DB in main thread (no contention)
624
+ vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
625
+ total_chunks_created += len(chunk_batch)
626
+ files_seen.update(batch_files)
627
+ total_files_processed = len(files_seen)
628
+ completed_batches += 1
629
+ except Exception as e:
630
+ logger.error(f"Future raised exception: {e}")
631
+ completed_batches += 1
632
+ del pending_futures[f]
633
+
634
+ # Report progress based on completed batches (every 5 batches)
635
+ if progress_callback and completed_batches >= last_reported_batch + 5:
636
+ progress_callback(f" Batch {completed_batches}: {total_chunks_created} chunks, {total_files_processed} files")
637
+ last_reported_batch = completed_batches
638
+
639
+ # Iterate batches in main thread (SQLite cursor is main-thread bound)
640
+ for chunk_batch in batch_generator:
641
+ batch_number += 1
642
+
643
+ # Submit compute task to worker pool
644
+ future = executor.submit(compute_embeddings_only, (batch_number, chunk_batch))
645
+ pending_futures[future] = batch_number
646
+
647
+ # Process any completed futures to free memory and write to DB
648
+ process_completed_futures()
649
+
650
+ # Backpressure: wait if too many pending
651
+ while len(pending_futures) >= max_workers * 2:
652
+ process_completed_futures()
653
+ if len(pending_futures) >= max_workers * 2:
654
+ time.sleep(0.1) # time is imported at module level
655
+
656
+ # Wait for remaining futures
657
+ for future in as_completed(list(pending_futures.keys())):
658
+ try:
659
+ batch_num, chunk_batch, embeddings_numpy, batch_files, error = future.result()
660
+ if embeddings_numpy is not None and error is None:
661
+ vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
662
+ total_chunks_created += len(chunk_batch)
663
+ files_seen.update(batch_files)
664
+ total_files_processed = len(files_seen)
665
+ completed_batches += 1
666
+
667
+ # Report progress for remaining batches
668
+ if progress_callback and completed_batches >= last_reported_batch + 5:
669
+ progress_callback(f" Batch {completed_batches}: {total_chunks_created} chunks, {total_files_processed} files")
670
+ last_reported_batch = completed_batches
671
+ except Exception as e:
672
+ logger.error(f"Future raised exception: {e}")
358
673
 
359
674
  # Notify before ANN index finalization (happens when bulk_insert context exits)
360
675
  if progress_callback:
@@ -363,7 +678,7 @@ def generate_embeddings(
363
678
  except Exception as e:
364
679
  # Cleanup on error to prevent process hanging
365
680
  try:
366
- clear_embedder_cache()
681
+ _cleanup_fastembed_resources()
367
682
  gc.collect()
368
683
  except Exception:
369
684
  pass
@@ -374,7 +689,7 @@ def generate_embeddings(
374
689
  # Final cleanup: release ONNX resources to allow process exit
375
690
  # This is critical - without it, ONNX Runtime threads prevent Python from exiting
376
691
  try:
377
- clear_embedder_cache()
692
+ _cleanup_fastembed_resources()
378
693
  gc.collect()
379
694
  except Exception:
380
695
  pass
@@ -427,23 +742,76 @@ def find_all_indexes(scan_dir: Path) -> List[Path]:
427
742
 
428
743
  def generate_embeddings_recursive(
429
744
  index_root: Path,
430
- model_profile: str = "code",
745
+ embedding_backend: Optional[str] = None,
746
+ model_profile: Optional[str] = None,
431
747
  force: bool = False,
432
748
  chunk_size: int = 2000,
749
+ overlap: int = 200,
433
750
  progress_callback: Optional[callable] = None,
751
+ use_gpu: Optional[bool] = None,
752
+ max_tokens_per_batch: Optional[int] = None,
753
+ max_workers: Optional[int] = None,
754
+ endpoints: Optional[List] = None,
755
+ strategy: Optional[str] = None,
756
+ cooldown: Optional[float] = None,
434
757
  ) -> Dict[str, any]:
435
758
  """Generate embeddings for all index databases in a project recursively.
436
759
 
437
760
  Args:
438
761
  index_root: Root index directory containing _index.db files
439
- model_profile: Model profile (fast, code, multilingual, balanced)
762
+ embedding_backend: Embedding backend to use (fastembed or litellm).
763
+ Defaults to config setting.
764
+ model_profile: Model profile for fastembed (fast, code, multilingual, balanced)
765
+ or model name for litellm (e.g., qwen3-embedding).
766
+ Defaults to config setting.
440
767
  force: If True, regenerate even if embeddings exist
441
768
  chunk_size: Maximum chunk size in characters
769
+ overlap: Overlap size in characters for sliding window chunking (default: 200)
442
770
  progress_callback: Optional callback for progress updates
771
+ use_gpu: Whether to use GPU acceleration (fastembed only).
772
+ Defaults to config setting.
773
+ max_tokens_per_batch: Maximum tokens per batch for token-aware batching.
774
+ If None, attempts to get from embedder.max_tokens,
775
+ then falls back to 8000. If set, overrides automatic detection.
776
+ max_workers: Maximum number of concurrent API calls.
777
+ If None, uses dynamic defaults based on backend and endpoint count.
778
+ endpoints: Optional list of endpoint configurations for multi-API load balancing.
779
+ strategy: Selection strategy for multi-endpoint mode.
780
+ cooldown: Default cooldown seconds for rate-limited endpoints.
443
781
 
444
782
  Returns:
445
783
  Aggregated result dictionary with generation statistics
446
784
  """
785
+ # Get defaults from config if not specified
786
+ (default_backend, default_model, default_gpu,
787
+ default_endpoints, default_strategy, default_cooldown) = _get_embedding_defaults()
788
+
789
+ if embedding_backend is None:
790
+ embedding_backend = default_backend
791
+ if model_profile is None:
792
+ model_profile = default_model
793
+ if use_gpu is None:
794
+ use_gpu = default_gpu
795
+ if endpoints is None:
796
+ endpoints = default_endpoints
797
+ if strategy is None:
798
+ strategy = default_strategy
799
+ if cooldown is None:
800
+ cooldown = default_cooldown
801
+
802
+ # Calculate endpoint count for worker scaling
803
+ endpoint_count = len(endpoints) if endpoints else 1
804
+
805
+ # Set dynamic max_workers default based on backend type and endpoint count
806
+ if max_workers is None:
807
+ if embedding_backend == "litellm":
808
+ if endpoint_count > 1:
809
+ max_workers = endpoint_count * 2 # No cap, scale with endpoints
810
+ else:
811
+ max_workers = 4
812
+ else:
813
+ max_workers = 1
814
+
447
815
  # Discover all _index.db files
448
816
  index_files = discover_all_index_dbs(index_root)
449
817
 
@@ -473,10 +841,18 @@ def generate_embeddings_recursive(
473
841
 
474
842
  result = generate_embeddings(
475
843
  index_path,
844
+ embedding_backend=embedding_backend,
476
845
  model_profile=model_profile,
477
846
  force=force,
478
847
  chunk_size=chunk_size,
848
+ overlap=overlap,
479
849
  progress_callback=None, # Don't cascade callbacks
850
+ use_gpu=use_gpu,
851
+ max_tokens_per_batch=max_tokens_per_batch,
852
+ max_workers=max_workers,
853
+ endpoints=endpoints,
854
+ strategy=strategy,
855
+ cooldown=cooldown,
480
856
  )
481
857
 
482
858
  all_results.append({
@@ -497,9 +873,8 @@ def generate_embeddings_recursive(
497
873
  # Final cleanup after processing all indexes
498
874
  # Each generate_embeddings() call does its own cleanup, but do a final one to be safe
499
875
  try:
500
- if SEMANTIC_AVAILABLE:
501
- clear_embedder_cache()
502
- gc.collect()
876
+ _cleanup_fastembed_resources()
877
+ gc.collect()
503
878
  except Exception:
504
879
  pass
505
880
 
@@ -525,7 +900,7 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
525
900
  index_root: Root index directory
526
901
 
527
902
  Returns:
528
- Aggregated status with coverage statistics
903
+ Aggregated status with coverage statistics, model info, and timestamps
529
904
  """
530
905
  index_files = discover_all_index_dbs(index_root)
531
906
 
@@ -541,6 +916,7 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
541
916
  "coverage_percent": 0.0,
542
917
  "indexes_with_embeddings": 0,
543
918
  "indexes_without_embeddings": 0,
919
+ "model_info": None,
544
920
  },
545
921
  }
546
922
 
@@ -548,6 +924,8 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
548
924
  files_with_embeddings = 0
549
925
  total_chunks = 0
550
926
  indexes_with_embeddings = 0
927
+ model_info = None
928
+ latest_updated_at = None
551
929
 
552
930
  for index_path in index_files:
553
931
  status = check_index_embeddings(index_path)
@@ -559,6 +937,40 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
559
937
  if result["has_embeddings"]:
560
938
  indexes_with_embeddings += 1
561
939
 
940
+ # Get model config from first index with embeddings (they should all match)
941
+ if model_info is None:
942
+ try:
943
+ from codexlens.semantic.vector_store import VectorStore
944
+ with VectorStore(index_path) as vs:
945
+ config = vs.get_model_config()
946
+ if config:
947
+ model_info = {
948
+ "model_profile": config.get("model_profile"),
949
+ "model_name": config.get("model_name"),
950
+ "embedding_dim": config.get("embedding_dim"),
951
+ "backend": config.get("backend"),
952
+ "created_at": config.get("created_at"),
953
+ "updated_at": config.get("updated_at"),
954
+ }
955
+ latest_updated_at = config.get("updated_at")
956
+ except Exception:
957
+ pass
958
+ else:
959
+ # Track the latest updated_at across all indexes
960
+ try:
961
+ from codexlens.semantic.vector_store import VectorStore
962
+ with VectorStore(index_path) as vs:
963
+ config = vs.get_model_config()
964
+ if config and config.get("updated_at"):
965
+ if latest_updated_at is None or config["updated_at"] > latest_updated_at:
966
+ latest_updated_at = config["updated_at"]
967
+ except Exception:
968
+ pass
969
+
970
+ # Update model_info with latest timestamp
971
+ if model_info and latest_updated_at:
972
+ model_info["updated_at"] = latest_updated_at
973
+
562
974
  return {
563
975
  "success": True,
564
976
  "result": {
@@ -570,6 +982,7 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
570
982
  "coverage_percent": round((files_with_embeddings / total_files * 100) if total_files > 0 else 0, 1),
571
983
  "indexes_with_embeddings": indexes_with_embeddings,
572
984
  "indexes_without_embeddings": len(index_files) - indexes_with_embeddings,
985
+ "model_info": model_info,
573
986
  },
574
987
  }
575
988
 
@@ -633,3 +1046,194 @@ def get_embedding_stats_summary(index_root: Path) -> Dict[str, any]:
633
1046
  "indexes": index_stats,
634
1047
  },
635
1048
  }
1049
+
1050
+
1051
+ def scan_for_model_conflicts(
1052
+ index_root: Path,
1053
+ target_backend: str,
1054
+ target_model: str,
1055
+ ) -> Dict[str, any]:
1056
+ """Scan for model conflicts across all indexes in a directory.
1057
+
1058
+ Checks if any existing embeddings were generated with a different
1059
+ backend or model than the target configuration.
1060
+
1061
+ Args:
1062
+ index_root: Root index directory to scan
1063
+ target_backend: Target embedding backend (fastembed or litellm)
1064
+ target_model: Target model profile/name
1065
+
1066
+ Returns:
1067
+ Dictionary with:
1068
+ - has_conflict: True if any index has different model config
1069
+ - existing_config: Config from first index with embeddings (if any)
1070
+ - target_config: The requested configuration
1071
+ - conflicts: List of conflicting index paths with their configs
1072
+ - indexes_with_embeddings: Count of indexes that have embeddings
1073
+ """
1074
+ index_files = discover_all_index_dbs(index_root)
1075
+
1076
+ if not index_files:
1077
+ return {
1078
+ "has_conflict": False,
1079
+ "existing_config": None,
1080
+ "target_config": {"backend": target_backend, "model": target_model},
1081
+ "conflicts": [],
1082
+ "indexes_with_embeddings": 0,
1083
+ }
1084
+
1085
+ conflicts = []
1086
+ existing_config = None
1087
+ indexes_with_embeddings = 0
1088
+
1089
+ for index_path in index_files:
1090
+ try:
1091
+ from codexlens.semantic.vector_store import VectorStore
1092
+
1093
+ with VectorStore(index_path) as vs:
1094
+ config = vs.get_model_config()
1095
+ if config and config.get("model_profile"):
1096
+ indexes_with_embeddings += 1
1097
+
1098
+ # Store first existing config as reference
1099
+ if existing_config is None:
1100
+ existing_config = {
1101
+ "backend": config.get("backend"),
1102
+ "model": config.get("model_profile"),
1103
+ "model_name": config.get("model_name"),
1104
+ "embedding_dim": config.get("embedding_dim"),
1105
+ }
1106
+
1107
+ # Check for conflict: different backend OR different model
1108
+ existing_backend = config.get("backend", "")
1109
+ existing_model = config.get("model_profile", "")
1110
+
1111
+ if existing_backend != target_backend or existing_model != target_model:
1112
+ conflicts.append({
1113
+ "path": str(index_path),
1114
+ "existing": {
1115
+ "backend": existing_backend,
1116
+ "model": existing_model,
1117
+ "model_name": config.get("model_name"),
1118
+ },
1119
+ })
1120
+ except Exception as e:
1121
+ logger.debug(f"Failed to check model config for {index_path}: {e}")
1122
+ continue
1123
+
1124
+ return {
1125
+ "has_conflict": len(conflicts) > 0,
1126
+ "existing_config": existing_config,
1127
+ "target_config": {"backend": target_backend, "model": target_model},
1128
+ "conflicts": conflicts,
1129
+ "indexes_with_embeddings": indexes_with_embeddings,
1130
+ }
1131
+
1132
+
1133
+ def _get_global_settings_path() -> Path:
1134
+ """Get the path to global embedding settings file."""
1135
+ return Path.home() / ".codexlens" / "embedding_lock.json"
1136
+
1137
+
1138
+ def get_locked_model_config() -> Optional[Dict[str, Any]]:
1139
+ """Get the globally locked embedding model configuration.
1140
+
1141
+ Returns:
1142
+ Dictionary with backend and model if locked, None otherwise.
1143
+ """
1144
+ settings_path = _get_global_settings_path()
1145
+ if not settings_path.exists():
1146
+ return None
1147
+
1148
+ try:
1149
+ with open(settings_path, "r", encoding="utf-8") as f:
1150
+ data = json.load(f)
1151
+ if data.get("locked"):
1152
+ return {
1153
+ "backend": data.get("backend"),
1154
+ "model": data.get("model"),
1155
+ "locked_at": data.get("locked_at"),
1156
+ }
1157
+ except (json.JSONDecodeError, OSError):
1158
+ pass
1159
+
1160
+ return None
1161
+
1162
+
1163
+ def set_locked_model_config(backend: str, model: str) -> None:
1164
+ """Set the globally locked embedding model configuration.
1165
+
1166
+ This is called after the first successful embedding generation
1167
+ to lock the model for all future operations.
1168
+
1169
+ Args:
1170
+ backend: Embedding backend (fastembed or litellm)
1171
+ model: Model profile/name
1172
+ """
1173
+ import datetime
1174
+
1175
+ settings_path = _get_global_settings_path()
1176
+ settings_path.parent.mkdir(parents=True, exist_ok=True)
1177
+
1178
+ data = {
1179
+ "locked": True,
1180
+ "backend": backend,
1181
+ "model": model,
1182
+ "locked_at": datetime.datetime.now().isoformat(),
1183
+ }
1184
+
1185
+ with open(settings_path, "w", encoding="utf-8") as f:
1186
+ json.dump(data, f, indent=2)
1187
+
1188
+
1189
+ def clear_locked_model_config() -> bool:
1190
+ """Clear the globally locked embedding model configuration.
1191
+
1192
+ Returns:
1193
+ True if lock was cleared, False if no lock existed.
1194
+ """
1195
+ settings_path = _get_global_settings_path()
1196
+ if settings_path.exists():
1197
+ settings_path.unlink()
1198
+ return True
1199
+ return False
1200
+
1201
+
1202
+ def check_global_model_lock(
1203
+ target_backend: str,
1204
+ target_model: str,
1205
+ ) -> Dict[str, Any]:
1206
+ """Check if the target model conflicts with the global lock.
1207
+
1208
+ Args:
1209
+ target_backend: Requested embedding backend
1210
+ target_model: Requested model profile/name
1211
+
1212
+ Returns:
1213
+ Dictionary with:
1214
+ - is_locked: True if a global lock exists
1215
+ - has_conflict: True if target differs from locked config
1216
+ - locked_config: The locked configuration (if any)
1217
+ - target_config: The requested configuration
1218
+ """
1219
+ locked_config = get_locked_model_config()
1220
+
1221
+ if locked_config is None:
1222
+ return {
1223
+ "is_locked": False,
1224
+ "has_conflict": False,
1225
+ "locked_config": None,
1226
+ "target_config": {"backend": target_backend, "model": target_model},
1227
+ }
1228
+
1229
+ has_conflict = (
1230
+ locked_config["backend"] != target_backend or
1231
+ locked_config["model"] != target_model
1232
+ )
1233
+
1234
+ return {
1235
+ "is_locked": True,
1236
+ "has_conflict": has_conflict,
1237
+ "locked_config": locked_config,
1238
+ "target_config": {"backend": target_backend, "model": target_model},
1239
+ }