claude-code-workflow 6.2.9 → 6.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/.claude/CLAUDE.md +16 -1
  2. package/.claude/workflows/cli-templates/protocols/analysis-protocol.md +11 -4
  3. package/.claude/workflows/cli-templates/protocols/write-protocol.md +10 -75
  4. package/.claude/workflows/cli-tools-usage.md +14 -24
  5. package/.codex/AGENTS.md +51 -1
  6. package/.codex/prompts/compact.md +378 -0
  7. package/.gemini/GEMINI.md +57 -20
  8. package/ccw/dist/cli.d.ts.map +1 -1
  9. package/ccw/dist/cli.js +3 -1
  10. package/ccw/dist/cli.js.map +1 -1
  11. package/ccw/dist/commands/cli.d.ts +2 -0
  12. package/ccw/dist/commands/cli.d.ts.map +1 -1
  13. package/ccw/dist/commands/cli.js +129 -8
  14. package/ccw/dist/commands/cli.js.map +1 -1
  15. package/ccw/dist/commands/hook.d.ts.map +1 -1
  16. package/ccw/dist/commands/hook.js +3 -2
  17. package/ccw/dist/commands/hook.js.map +1 -1
  18. package/ccw/dist/config/litellm-api-config-manager.d.ts +180 -0
  19. package/ccw/dist/config/litellm-api-config-manager.d.ts.map +1 -0
  20. package/ccw/dist/config/litellm-api-config-manager.js +770 -0
  21. package/ccw/dist/config/litellm-api-config-manager.js.map +1 -0
  22. package/ccw/dist/config/provider-models.d.ts +73 -0
  23. package/ccw/dist/config/provider-models.d.ts.map +1 -0
  24. package/ccw/dist/config/provider-models.js +172 -0
  25. package/ccw/dist/config/provider-models.js.map +1 -0
  26. package/ccw/dist/core/cache-manager.d.ts.map +1 -1
  27. package/ccw/dist/core/cache-manager.js +3 -5
  28. package/ccw/dist/core/cache-manager.js.map +1 -1
  29. package/ccw/dist/core/dashboard-generator.d.ts.map +1 -1
  30. package/ccw/dist/core/dashboard-generator.js +3 -1
  31. package/ccw/dist/core/dashboard-generator.js.map +1 -1
  32. package/ccw/dist/core/routes/cli-routes.d.ts.map +1 -1
  33. package/ccw/dist/core/routes/cli-routes.js +169 -0
  34. package/ccw/dist/core/routes/cli-routes.js.map +1 -1
  35. package/ccw/dist/core/routes/codexlens-routes.d.ts.map +1 -1
  36. package/ccw/dist/core/routes/codexlens-routes.js +234 -18
  37. package/ccw/dist/core/routes/codexlens-routes.js.map +1 -1
  38. package/ccw/dist/core/routes/hooks-routes.d.ts.map +1 -1
  39. package/ccw/dist/core/routes/hooks-routes.js +30 -32
  40. package/ccw/dist/core/routes/hooks-routes.js.map +1 -1
  41. package/ccw/dist/core/routes/litellm-api-routes.d.ts +21 -0
  42. package/ccw/dist/core/routes/litellm-api-routes.d.ts.map +1 -0
  43. package/ccw/dist/core/routes/litellm-api-routes.js +780 -0
  44. package/ccw/dist/core/routes/litellm-api-routes.js.map +1 -0
  45. package/ccw/dist/core/routes/litellm-routes.d.ts +20 -0
  46. package/ccw/dist/core/routes/litellm-routes.d.ts.map +1 -0
  47. package/ccw/dist/core/routes/litellm-routes.js +85 -0
  48. package/ccw/dist/core/routes/litellm-routes.js.map +1 -0
  49. package/ccw/dist/core/routes/mcp-routes.js +2 -2
  50. package/ccw/dist/core/routes/mcp-routes.js.map +1 -1
  51. package/ccw/dist/core/routes/status-routes.d.ts.map +1 -1
  52. package/ccw/dist/core/routes/status-routes.js +39 -0
  53. package/ccw/dist/core/routes/status-routes.js.map +1 -1
  54. package/ccw/dist/core/server.d.ts.map +1 -1
  55. package/ccw/dist/core/server.js +15 -1
  56. package/ccw/dist/core/server.js.map +1 -1
  57. package/ccw/dist/mcp-server/index.js +1 -1
  58. package/ccw/dist/mcp-server/index.js.map +1 -1
  59. package/ccw/dist/tools/claude-cli-tools.d.ts +82 -0
  60. package/ccw/dist/tools/claude-cli-tools.d.ts.map +1 -0
  61. package/ccw/dist/tools/claude-cli-tools.js +216 -0
  62. package/ccw/dist/tools/claude-cli-tools.js.map +1 -0
  63. package/ccw/dist/tools/cli-executor.d.ts.map +1 -1
  64. package/ccw/dist/tools/cli-executor.js +76 -14
  65. package/ccw/dist/tools/cli-executor.js.map +1 -1
  66. package/ccw/dist/tools/codex-lens.d.ts +9 -2
  67. package/ccw/dist/tools/codex-lens.d.ts.map +1 -1
  68. package/ccw/dist/tools/codex-lens.js +114 -9
  69. package/ccw/dist/tools/codex-lens.js.map +1 -1
  70. package/ccw/dist/tools/context-cache-store.d.ts +136 -0
  71. package/ccw/dist/tools/context-cache-store.d.ts.map +1 -0
  72. package/ccw/dist/tools/context-cache-store.js +256 -0
  73. package/ccw/dist/tools/context-cache-store.js.map +1 -0
  74. package/ccw/dist/tools/context-cache.d.ts +56 -0
  75. package/ccw/dist/tools/context-cache.d.ts.map +1 -0
  76. package/ccw/dist/tools/context-cache.js +294 -0
  77. package/ccw/dist/tools/context-cache.js.map +1 -0
  78. package/ccw/dist/tools/core-memory.d.ts.map +1 -1
  79. package/ccw/dist/tools/core-memory.js +33 -19
  80. package/ccw/dist/tools/core-memory.js.map +1 -1
  81. package/ccw/dist/tools/index.d.ts.map +1 -1
  82. package/ccw/dist/tools/index.js +2 -0
  83. package/ccw/dist/tools/index.js.map +1 -1
  84. package/ccw/dist/tools/litellm-client.d.ts +85 -0
  85. package/ccw/dist/tools/litellm-client.d.ts.map +1 -0
  86. package/ccw/dist/tools/litellm-client.js +188 -0
  87. package/ccw/dist/tools/litellm-client.js.map +1 -0
  88. package/ccw/dist/tools/litellm-executor.d.ts +34 -0
  89. package/ccw/dist/tools/litellm-executor.d.ts.map +1 -0
  90. package/ccw/dist/tools/litellm-executor.js +192 -0
  91. package/ccw/dist/tools/litellm-executor.js.map +1 -0
  92. package/ccw/dist/tools/pattern-parser.d.ts +55 -0
  93. package/ccw/dist/tools/pattern-parser.d.ts.map +1 -0
  94. package/ccw/dist/tools/pattern-parser.js +237 -0
  95. package/ccw/dist/tools/pattern-parser.js.map +1 -0
  96. package/ccw/dist/tools/smart-search.d.ts +1 -0
  97. package/ccw/dist/tools/smart-search.d.ts.map +1 -1
  98. package/ccw/dist/tools/smart-search.js +117 -41
  99. package/ccw/dist/tools/smart-search.js.map +1 -1
  100. package/ccw/dist/types/litellm-api-config.d.ts +294 -0
  101. package/ccw/dist/types/litellm-api-config.d.ts.map +1 -0
  102. package/ccw/dist/types/litellm-api-config.js +8 -0
  103. package/ccw/dist/types/litellm-api-config.js.map +1 -0
  104. package/ccw/src/cli.ts +3 -1
  105. package/ccw/src/commands/cli.ts +153 -9
  106. package/ccw/src/commands/hook.ts +3 -2
  107. package/ccw/src/config/.litellm-api-config-manager.ts.2025-12-23T11-57-43-727Z.bak +441 -0
  108. package/ccw/src/config/litellm-api-config-manager.ts +1012 -0
  109. package/ccw/src/config/provider-models.ts +222 -0
  110. package/ccw/src/core/cache-manager.ts +292 -294
  111. package/ccw/src/core/dashboard-generator.ts +3 -1
  112. package/ccw/src/core/routes/cli-routes.ts +192 -0
  113. package/ccw/src/core/routes/codexlens-routes.ts +241 -19
  114. package/ccw/src/core/routes/hooks-routes.ts +399 -405
  115. package/ccw/src/core/routes/litellm-api-routes.ts +930 -0
  116. package/ccw/src/core/routes/litellm-routes.ts +107 -0
  117. package/ccw/src/core/routes/mcp-routes.ts +1271 -1271
  118. package/ccw/src/core/routes/status-routes.ts +51 -0
  119. package/ccw/src/core/server.ts +15 -1
  120. package/ccw/src/mcp-server/index.ts +1 -1
  121. package/ccw/src/templates/dashboard-css/12-cli-legacy.css +44 -0
  122. package/ccw/src/templates/dashboard-css/31-api-settings.css +2265 -0
  123. package/ccw/src/templates/dashboard-js/components/cli-history.js +15 -8
  124. package/ccw/src/templates/dashboard-js/components/cli-status.js +323 -9
  125. package/ccw/src/templates/dashboard-js/components/navigation.js +329 -313
  126. package/ccw/src/templates/dashboard-js/i18n.js +583 -1
  127. package/ccw/src/templates/dashboard-js/views/api-settings.js +3362 -0
  128. package/ccw/src/templates/dashboard-js/views/cli-manager.js +199 -24
  129. package/ccw/src/templates/dashboard-js/views/codexlens-manager.js +1265 -27
  130. package/ccw/src/templates/dashboard.html +840 -831
  131. package/ccw/src/tools/claude-cli-tools.ts +300 -0
  132. package/ccw/src/tools/cli-executor.ts +83 -14
  133. package/ccw/src/tools/codex-lens.ts +146 -9
  134. package/ccw/src/tools/context-cache-store.ts +368 -0
  135. package/ccw/src/tools/context-cache.ts +393 -0
  136. package/ccw/src/tools/core-memory.ts +33 -19
  137. package/ccw/src/tools/index.ts +2 -0
  138. package/ccw/src/tools/litellm-client.ts +246 -0
  139. package/ccw/src/tools/litellm-executor.ts +241 -0
  140. package/ccw/src/tools/pattern-parser.ts +329 -0
  141. package/ccw/src/tools/smart-search.ts +142 -41
  142. package/ccw/src/types/litellm-api-config.ts +402 -0
  143. package/ccw-litellm/README.md +180 -0
  144. package/ccw-litellm/pyproject.toml +35 -0
  145. package/ccw-litellm/src/ccw_litellm/__init__.py +47 -0
  146. package/ccw-litellm/src/ccw_litellm/__pycache__/__init__.cpython-313.pyc +0 -0
  147. package/ccw-litellm/src/ccw_litellm/__pycache__/cli.cpython-313.pyc +0 -0
  148. package/ccw-litellm/src/ccw_litellm/cli.py +108 -0
  149. package/ccw-litellm/src/ccw_litellm/clients/__init__.py +12 -0
  150. package/ccw-litellm/src/ccw_litellm/clients/__pycache__/__init__.cpython-313.pyc +0 -0
  151. package/ccw-litellm/src/ccw_litellm/clients/__pycache__/litellm_embedder.cpython-313.pyc +0 -0
  152. package/ccw-litellm/src/ccw_litellm/clients/__pycache__/litellm_llm.cpython-313.pyc +0 -0
  153. package/ccw-litellm/src/ccw_litellm/clients/litellm_embedder.py +251 -0
  154. package/ccw-litellm/src/ccw_litellm/clients/litellm_llm.py +165 -0
  155. package/ccw-litellm/src/ccw_litellm/config/__init__.py +22 -0
  156. package/ccw-litellm/src/ccw_litellm/config/__pycache__/__init__.cpython-313.pyc +0 -0
  157. package/ccw-litellm/src/ccw_litellm/config/__pycache__/loader.cpython-313.pyc +0 -0
  158. package/ccw-litellm/src/ccw_litellm/config/__pycache__/models.cpython-313.pyc +0 -0
  159. package/ccw-litellm/src/ccw_litellm/config/loader.py +316 -0
  160. package/ccw-litellm/src/ccw_litellm/config/models.py +130 -0
  161. package/ccw-litellm/src/ccw_litellm/interfaces/__init__.py +14 -0
  162. package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/__init__.cpython-313.pyc +0 -0
  163. package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/embedder.cpython-313.pyc +0 -0
  164. package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/llm.cpython-313.pyc +0 -0
  165. package/ccw-litellm/src/ccw_litellm/interfaces/embedder.py +52 -0
  166. package/ccw-litellm/src/ccw_litellm/interfaces/llm.py +45 -0
  167. package/codex-lens/src/codexlens/__pycache__/config.cpython-313.pyc +0 -0
  168. package/codex-lens/src/codexlens/cli/__pycache__/commands.cpython-313.pyc +0 -0
  169. package/codex-lens/src/codexlens/cli/__pycache__/embedding_manager.cpython-313.pyc +0 -0
  170. package/codex-lens/src/codexlens/cli/__pycache__/model_manager.cpython-313.pyc +0 -0
  171. package/codex-lens/src/codexlens/cli/__pycache__/output.cpython-313.pyc +0 -0
  172. package/codex-lens/src/codexlens/cli/commands.py +360 -22
  173. package/codex-lens/src/codexlens/cli/embedding_manager.py +660 -56
  174. package/codex-lens/src/codexlens/cli/model_manager.py +31 -18
  175. package/codex-lens/src/codexlens/cli/output.py +12 -1
  176. package/codex-lens/src/codexlens/config.py +93 -0
  177. package/codex-lens/src/codexlens/search/__pycache__/chain_search.cpython-313.pyc +0 -0
  178. package/codex-lens/src/codexlens/search/__pycache__/hybrid_search.cpython-313.pyc +0 -0
  179. package/codex-lens/src/codexlens/search/__pycache__/ranking.cpython-313.pyc +0 -0
  180. package/codex-lens/src/codexlens/search/chain_search.py +6 -2
  181. package/codex-lens/src/codexlens/search/hybrid_search.py +44 -21
  182. package/codex-lens/src/codexlens/search/ranking.py +1 -1
  183. package/codex-lens/src/codexlens/semantic/__init__.py +42 -0
  184. package/codex-lens/src/codexlens/semantic/__pycache__/__init__.cpython-313.pyc +0 -0
  185. package/codex-lens/src/codexlens/semantic/__pycache__/base.cpython-313.pyc +0 -0
  186. package/codex-lens/src/codexlens/semantic/__pycache__/chunker.cpython-313.pyc +0 -0
  187. package/codex-lens/src/codexlens/semantic/__pycache__/embedder.cpython-313.pyc +0 -0
  188. package/codex-lens/src/codexlens/semantic/__pycache__/factory.cpython-313.pyc +0 -0
  189. package/codex-lens/src/codexlens/semantic/__pycache__/gpu_support.cpython-313.pyc +0 -0
  190. package/codex-lens/src/codexlens/semantic/__pycache__/litellm_embedder.cpython-313.pyc +0 -0
  191. package/codex-lens/src/codexlens/semantic/__pycache__/vector_store.cpython-313.pyc +0 -0
  192. package/codex-lens/src/codexlens/semantic/base.py +61 -0
  193. package/codex-lens/src/codexlens/semantic/chunker.py +43 -20
  194. package/codex-lens/src/codexlens/semantic/embedder.py +60 -13
  195. package/codex-lens/src/codexlens/semantic/factory.py +98 -0
  196. package/codex-lens/src/codexlens/semantic/gpu_support.py +225 -3
  197. package/codex-lens/src/codexlens/semantic/litellm_embedder.py +144 -0
  198. package/codex-lens/src/codexlens/semantic/rotational_embedder.py +434 -0
  199. package/codex-lens/src/codexlens/semantic/vector_store.py +33 -8
  200. package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_004_dual_fts.cpython-313.pyc +0 -0
  201. package/package.json +3 -1
  202. package/.codex/prompts.zip +0 -0
@@ -103,10 +103,12 @@ def init(
103
103
  "-l",
104
104
  help="Limit indexing to specific languages (repeat or comma-separated).",
105
105
  ),
106
- workers: Optional[int] = typer.Option(None, "--workers", "-w", min=1, max=16, help="Parallel worker processes (default: auto-detect based on CPU count, max 16)."),
106
+ workers: Optional[int] = typer.Option(None, "--workers", "-w", min=1, help="Parallel worker processes (default: auto-detect based on CPU count)."),
107
107
  force: bool = typer.Option(False, "--force", "-f", help="Force full reindex (skip incremental mode)."),
108
108
  no_embeddings: bool = typer.Option(False, "--no-embeddings", help="Skip automatic embedding generation (if semantic deps installed)."),
109
- embedding_model: str = typer.Option("code", "--embedding-model", help="Embedding model profile: fast, code, multilingual, balanced."),
109
+ embedding_backend: str = typer.Option("fastembed", "--embedding-backend", help="Embedding backend: fastembed (local) or litellm (remote API)."),
110
+ embedding_model: str = typer.Option("code", "--embedding-model", help="Embedding model: profile name for fastembed (fast/code/multilingual/balanced) or model name for litellm (e.g. text-embedding-3-small)."),
111
+ max_workers: int = typer.Option(1, "--max-workers", min=1, help="Max concurrent API calls for embedding generation. Recommended: 4-8 for litellm backend."),
110
112
  json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
111
113
  verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."),
112
114
  ) -> None:
@@ -120,6 +122,14 @@ def init(
120
122
 
121
123
  If semantic search dependencies are installed, automatically generates embeddings
122
124
  after indexing completes. Use --no-embeddings to skip this step.
125
+
126
+ Embedding Backend Options:
127
+ - fastembed: Local ONNX-based embeddings (default, no API calls)
128
+ - litellm: Remote API embeddings via ccw-litellm (requires API keys)
129
+
130
+ Embedding Model Options:
131
+ - For fastembed backend: Use profile names (fast, code, multilingual, balanced)
132
+ - For litellm backend: Use model names (e.g., text-embedding-3-small, text-embedding-ada-002)
123
133
  """
124
134
  _configure_logging(verbose, json_mode)
125
135
  config = Config()
@@ -156,26 +166,37 @@ def init(
156
166
  "errors": len(build_result.errors),
157
167
  }
158
168
 
159
- if json_mode:
160
- print_json(success=True, result=result)
161
- else:
169
+ if not json_mode:
162
170
  console.print(f"[green]OK[/green] Indexed [bold]{build_result.total_files}[/bold] files in [bold]{build_result.total_dirs}[/bold] directories")
163
171
  console.print(f" Index root: {build_result.index_root}")
164
172
  if build_result.errors:
165
173
  console.print(f" [yellow]Warnings:[/yellow] {len(build_result.errors)} errors")
166
174
 
167
- # Auto-generate embeddings if semantic search is available
175
+ # Auto-generate embeddings if the requested backend is available
168
176
  if not no_embeddings:
169
177
  try:
170
- from codexlens.semantic import SEMANTIC_AVAILABLE
178
+ from codexlens.semantic import is_embedding_backend_available
171
179
  from codexlens.cli.embedding_manager import generate_embeddings_recursive, get_embeddings_status
172
180
 
173
- if SEMANTIC_AVAILABLE:
181
+ # Validate embedding backend
182
+ valid_backends = ["fastembed", "litellm"]
183
+ if embedding_backend not in valid_backends:
184
+ error_msg = f"Invalid embedding backend: {embedding_backend}. Must be one of: {', '.join(valid_backends)}"
185
+ if json_mode:
186
+ print_json(success=False, error=error_msg)
187
+ else:
188
+ console.print(f"[red]Error:[/red] {error_msg}")
189
+ raise typer.Exit(code=1)
190
+
191
+ backend_available, backend_error = is_embedding_backend_available(embedding_backend)
192
+
193
+ if backend_available:
174
194
  # Use the index root directory (not the _index.db file)
175
195
  index_root = Path(build_result.index_root)
176
196
 
177
197
  if not json_mode:
178
198
  console.print("\n[bold]Generating embeddings...[/bold]")
199
+ console.print(f"Backend: [cyan]{embedding_backend}[/cyan]")
179
200
  console.print(f"Model: [cyan]{embedding_model}[/cyan]")
180
201
  else:
181
202
  # Output progress message for JSON mode (parsed by Node.js)
@@ -196,10 +217,12 @@ def init(
196
217
 
197
218
  embed_result = generate_embeddings_recursive(
198
219
  index_root,
220
+ embedding_backend=embedding_backend,
199
221
  model_profile=embedding_model,
200
222
  force=False, # Don't force regenerate during init
201
223
  chunk_size=2000,
202
224
  progress_callback=progress_update, # Always use callback
225
+ max_workers=max_workers,
203
226
  )
204
227
 
205
228
  if embed_result["success"]:
@@ -241,10 +264,10 @@ def init(
241
264
  }
242
265
  else:
243
266
  if not json_mode and verbose:
244
- console.print("[dim]Semantic search not available. Skipping embeddings.[/dim]")
267
+ console.print(f"[dim]Embedding backend '{embedding_backend}' not available. Skipping embeddings.[/dim]")
245
268
  result["embeddings"] = {
246
269
  "generated": False,
247
- "error": "Semantic dependencies not installed",
270
+ "error": backend_error or "Embedding backend not available",
248
271
  }
249
272
  except Exception as e:
250
273
  if not json_mode and verbose:
@@ -259,6 +282,10 @@ def init(
259
282
  "error": "Skipped (--no-embeddings)",
260
283
  }
261
284
 
285
+ # Output final JSON result with embeddings status
286
+ if json_mode:
287
+ print_json(success=True, result=result)
288
+
262
289
  except StorageError as exc:
263
290
  if json_mode:
264
291
  print_json(success=False, error=f"Storage error: {exc}")
@@ -324,7 +351,7 @@ def search(
324
351
  Use 'codexlens embeddings-generate' to create embeddings first.
325
352
 
326
353
  Hybrid Mode:
327
- Default weights: exact=0.4, fuzzy=0.3, vector=0.3
354
+ Default weights: exact=0.3, fuzzy=0.1, vector=0.6
328
355
  Use --weights to customize (e.g., --weights 0.5,0.3,0.2)
329
356
 
330
357
  Examples:
@@ -451,6 +478,7 @@ def search(
451
478
  "path": r.path,
452
479
  "score": r.score,
453
480
  "excerpt": r.excerpt,
481
+ "content": r.content, # Full function/class body
454
482
  "source": getattr(r, "search_source", None),
455
483
  "symbol": getattr(r, "symbol", None),
456
484
  }
@@ -732,6 +760,16 @@ def status(
732
760
  console.print(f" Coverage: {embeddings_info['coverage_percent']:.1f}%")
733
761
  console.print(f" Total Chunks: {embeddings_info['total_chunks']}")
734
762
 
763
+ # Display model information if available
764
+ model_info = embeddings_info.get('model_info')
765
+ if model_info:
766
+ console.print("\n[bold]Embedding Model:[/bold]")
767
+ console.print(f" Backend: [cyan]{model_info.get('backend', 'unknown')}[/cyan]")
768
+ console.print(f" Model: [cyan]{model_info.get('model_profile', 'unknown')}[/cyan] ({model_info.get('model_name', '')})")
769
+ console.print(f" Dimensions: {model_info.get('embedding_dim', 'unknown')}")
770
+ if model_info.get('updated_at'):
771
+ console.print(f" Last Updated: {model_info['updated_at']}")
772
+
735
773
  except StorageError as exc:
736
774
  if json_mode:
737
775
  print_json(success=False, error=f"Storage error: {exc}")
@@ -1781,11 +1819,17 @@ def embeddings_generate(
1781
1819
  exists=True,
1782
1820
  help="Path to _index.db file or project directory.",
1783
1821
  ),
1822
+ backend: str = typer.Option(
1823
+ "fastembed",
1824
+ "--backend",
1825
+ "-b",
1826
+ help="Embedding backend: fastembed (local) or litellm (remote API).",
1827
+ ),
1784
1828
  model: str = typer.Option(
1785
1829
  "code",
1786
1830
  "--model",
1787
1831
  "-m",
1788
- help="Model profile: fast, code, multilingual, balanced.",
1832
+ help="Model: profile name for fastembed (fast/code/multilingual/balanced) or model name for litellm (e.g. text-embedding-3-small).",
1789
1833
  ),
1790
1834
  force: bool = typer.Option(
1791
1835
  False,
@@ -1804,6 +1848,13 @@ def embeddings_generate(
1804
1848
  "-r",
1805
1849
  help="Recursively process all _index.db files in directory tree.",
1806
1850
  ),
1851
+ max_workers: int = typer.Option(
1852
+ 1,
1853
+ "--max-workers",
1854
+ "-w",
1855
+ min=1,
1856
+ help="Max concurrent API calls. Recommended: 4-8 for litellm backend. Default: 1 (sequential).",
1857
+ ),
1807
1858
  json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
1808
1859
  verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose output."),
1809
1860
  ) -> None:
@@ -1813,20 +1864,48 @@ def embeddings_generate(
1813
1864
  semantic search capabilities. Embeddings are stored in the same
1814
1865
  database as the FTS index.
1815
1866
 
1816
- Model Profiles:
1817
- - fast: BAAI/bge-small-en-v1.5 (384 dims, ~80MB)
1818
- - code: jinaai/jina-embeddings-v2-base-code (768 dims, ~150MB) [recommended]
1819
- - multilingual: intfloat/multilingual-e5-large (1024 dims, ~1GB)
1820
- - balanced: mixedbread-ai/mxbai-embed-large-v1 (1024 dims, ~600MB)
1867
+ Embedding Backend Options:
1868
+ - fastembed: Local ONNX-based embeddings (default, no API calls)
1869
+ - litellm: Remote API embeddings via ccw-litellm (requires API keys)
1870
+
1871
+ Model Options:
1872
+ For fastembed backend (profiles):
1873
+ - fast: BAAI/bge-small-en-v1.5 (384 dims, ~80MB)
1874
+ - code: jinaai/jina-embeddings-v2-base-code (768 dims, ~150MB) [recommended]
1875
+ - multilingual: intfloat/multilingual-e5-large (1024 dims, ~1GB)
1876
+ - balanced: mixedbread-ai/mxbai-embed-large-v1 (1024 dims, ~600MB)
1877
+
1878
+ For litellm backend (model names):
1879
+ - text-embedding-3-small, text-embedding-3-large (OpenAI)
1880
+ - text-embedding-ada-002 (OpenAI legacy)
1881
+ - Any model supported by ccw-litellm
1821
1882
 
1822
1883
  Examples:
1823
- codexlens embeddings-generate ~/projects/my-app # Auto-find index for project
1884
+ codexlens embeddings-generate ~/projects/my-app # Auto-find index (fastembed, code profile)
1824
1885
  codexlens embeddings-generate ~/.codexlens/indexes/project/_index.db # Specific index
1825
- codexlens embeddings-generate ~/projects/my-app --model fast --force # Regenerate with fast model
1886
+ codexlens embeddings-generate ~/projects/my-app --backend litellm --model text-embedding-3-small # Use LiteLLM
1887
+ codexlens embeddings-generate ~/projects/my-app --model fast --force # Regenerate with fast profile
1826
1888
  """
1827
1889
  _configure_logging(verbose, json_mode)
1828
1890
 
1829
- from codexlens.cli.embedding_manager import generate_embeddings, generate_embeddings_recursive
1891
+ from codexlens.cli.embedding_manager import (
1892
+ generate_embeddings,
1893
+ generate_embeddings_recursive,
1894
+ scan_for_model_conflicts,
1895
+ check_global_model_lock,
1896
+ set_locked_model_config,
1897
+ )
1898
+
1899
+ # Validate backend
1900
+ valid_backends = ["fastembed", "litellm"]
1901
+ if backend not in valid_backends:
1902
+ error_msg = f"Invalid backend: {backend}. Must be one of: {', '.join(valid_backends)}"
1903
+ if json_mode:
1904
+ print_json(success=False, error=error_msg)
1905
+ else:
1906
+ console.print(f"[red]Error:[/red] {error_msg}")
1907
+ console.print(f"[dim]Valid backends: {', '.join(valid_backends)}[/dim]")
1908
+ raise typer.Exit(code=1)
1830
1909
 
1831
1910
  # Resolve path
1832
1911
  target_path = path.expanduser().resolve()
@@ -1877,23 +1956,100 @@ def embeddings_generate(
1877
1956
  console.print(f"Mode: [yellow]Recursive[/yellow]")
1878
1957
  else:
1879
1958
  console.print(f"Index: [dim]{index_path}[/dim]")
1880
- console.print(f"Model: [cyan]{model}[/cyan]\n")
1959
+ console.print(f"Backend: [cyan]{backend}[/cyan]")
1960
+ console.print(f"Model: [cyan]{model}[/cyan]")
1961
+ if max_workers > 1:
1962
+ console.print(f"Concurrency: [cyan]{max_workers} workers[/cyan]")
1963
+ console.print()
1964
+
1965
+ # Check global model lock (prevents mixing different models)
1966
+ if not force:
1967
+ lock_result = check_global_model_lock(backend, model)
1968
+ if lock_result["has_conflict"]:
1969
+ locked = lock_result["locked_config"]
1970
+ if json_mode:
1971
+ print_json(
1972
+ success=False,
1973
+ error="Global model lock conflict",
1974
+ code="MODEL_LOCKED",
1975
+ locked_config=locked,
1976
+ target_config=lock_result["target_config"],
1977
+ hint="Use --force to override the lock and switch to a different model (will regenerate all embeddings)",
1978
+ )
1979
+ raise typer.Exit(code=1)
1980
+ else:
1981
+ console.print("[red]⛔ Global Model Lock Active[/red]")
1982
+ console.print(f" Locked model: [cyan]{locked['backend']}/{locked['model']}[/cyan]")
1983
+ console.print(f" Requested: [yellow]{backend}/{model}[/yellow]")
1984
+ console.print(f" Locked at: {locked.get('locked_at', 'unknown')}")
1985
+ console.print()
1986
+ console.print("[dim]All indexes must use the same embedding model.[/dim]")
1987
+ console.print("[dim]Use --force to switch models (will regenerate all embeddings).[/dim]")
1988
+ raise typer.Exit(code=1)
1989
+
1990
+ # Pre-check for model conflicts (only if not forcing)
1991
+ if not force:
1992
+ # Determine the index root for conflict scanning
1993
+ scan_root = index_root if use_recursive else (index_path.parent if index_path else None)
1994
+
1995
+ if scan_root:
1996
+ conflict_result = scan_for_model_conflicts(scan_root, backend, model)
1997
+
1998
+ if conflict_result["has_conflict"]:
1999
+ existing = conflict_result["existing_config"]
2000
+ conflict_count = len(conflict_result["conflicts"])
2001
+
2002
+ if json_mode:
2003
+ # JSON mode: return structured error for UI handling
2004
+ print_json(
2005
+ success=False,
2006
+ error="Model conflict detected",
2007
+ code="MODEL_CONFLICT",
2008
+ existing_config=existing,
2009
+ target_config=conflict_result["target_config"],
2010
+ conflict_count=conflict_count,
2011
+ conflicts=conflict_result["conflicts"][:5], # Show first 5 conflicts
2012
+ hint="Use --force to overwrite existing embeddings with the new model",
2013
+ )
2014
+ raise typer.Exit(code=1)
2015
+ else:
2016
+ # Interactive mode: show warning and ask for confirmation
2017
+ console.print("[yellow]⚠ Model Conflict Detected[/yellow]")
2018
+ console.print(f" Existing: [red]{existing['backend']}/{existing['model']}[/red] ({existing.get('embedding_dim', '?')} dim)")
2019
+ console.print(f" Requested: [green]{backend}/{model}[/green]")
2020
+ console.print(f" Affected indexes: [yellow]{conflict_count}[/yellow]")
2021
+ console.print()
2022
+ console.print("[dim]Mixing different embedding models in the same index is not supported.[/dim]")
2023
+ console.print("[dim]Overwriting will delete all existing embeddings and regenerate with the new model.[/dim]")
2024
+ console.print()
2025
+
2026
+ # Ask for confirmation
2027
+ if typer.confirm("Overwrite existing embeddings with the new model?", default=False):
2028
+ force = True
2029
+ console.print("[green]Confirmed.[/green] Proceeding with overwrite...\n")
2030
+ else:
2031
+ console.print("[yellow]Cancelled.[/yellow] Use --force to skip this prompt.")
2032
+ raise typer.Exit(code=0)
1881
2033
 
1882
2034
  if use_recursive:
1883
2035
  result = generate_embeddings_recursive(
1884
2036
  index_root,
2037
+ embedding_backend=backend,
1885
2038
  model_profile=model,
1886
2039
  force=force,
1887
2040
  chunk_size=chunk_size,
1888
2041
  progress_callback=progress_update,
2042
+ max_workers=max_workers,
1889
2043
  )
1890
2044
  else:
1891
2045
  result = generate_embeddings(
1892
2046
  index_path,
2047
+ embedding_backend=backend,
1893
2048
  model_profile=model,
1894
2049
  force=force,
1895
2050
  chunk_size=chunk_size,
1896
2051
  progress_callback=progress_update,
2052
+ max_workers=max_workers,
1897
2053
  )
1898
2054
 
1899
2055
  if json_mode:
@@ -1906,14 +2062,21 @@ def embeddings_generate(
1906
2062
  # Provide helpful hints
1907
2063
  if "already has" in error_msg:
1908
2064
  console.print("\n[dim]Use --force to regenerate existing embeddings[/dim]")
1909
- elif "Semantic search not available" in error_msg:
2065
+ elif "fastembed not available" in error_msg or "Semantic search not available" in error_msg:
1910
2066
  console.print("\n[dim]Install semantic dependencies:[/dim]")
1911
2067
  console.print(" [cyan]pip install codexlens[semantic][/cyan]")
2068
+ elif "ccw-litellm not available" in error_msg:
2069
+ console.print("\n[dim]Install LiteLLM backend dependencies:[/dim]")
2070
+ console.print(" [cyan]pip install ccw-litellm[/cyan]")
1912
2071
 
1913
2072
  raise typer.Exit(code=1)
1914
2073
 
1915
2074
  data = result["result"]
1916
2075
 
2076
+ # Set global model lock after successful generation
2077
+ # This prevents using different models for future indexes
2078
+ set_locked_model_config(backend, model)
2079
+
1917
2080
  if use_recursive:
1918
2081
  # Recursive mode output
1919
2082
  console.print(f"[green]✓[/green] Recursive embeddings generation complete!")
@@ -1955,3 +2118,178 @@ def embeddings_generate(
1955
2118
 
1956
2119
  console.print("\n[dim]Use vector search with:[/dim]")
1957
2120
  console.print(" [cyan]codexlens search 'your query' --mode pure-vector[/cyan]")
2121
+
2122
+
2123
+ # ==================== GPU Management Commands ====================
2124
+
2125
+ @app.command(name="gpu-list")
2126
+ def gpu_list(
2127
+ json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
2128
+ ) -> None:
2129
+ """List available GPU devices for embedding acceleration.
2130
+
2131
+ Shows all detected GPU devices with their capabilities and selection status.
2132
+ Discrete GPUs (NVIDIA, AMD) are automatically preferred over integrated GPUs.
2133
+
2134
+ Examples:
2135
+ codexlens gpu-list # List all GPUs
2136
+ codexlens gpu-list --json # JSON output for scripting
2137
+ """
2138
+ from codexlens.semantic.gpu_support import get_gpu_devices, detect_gpu, get_selected_device_id
2139
+
2140
+ gpu_info = detect_gpu()
2141
+ devices = get_gpu_devices()
2142
+ selected_id = get_selected_device_id()
2143
+
2144
+ if json_mode:
2145
+ print_json(
2146
+ success=True,
2147
+ result={
2148
+ "devices": devices,
2149
+ "selected_device_id": selected_id,
2150
+ "gpu_available": gpu_info.gpu_available,
2151
+ "providers": gpu_info.onnx_providers,
2152
+ }
2153
+ )
2154
+ else:
2155
+ if not devices:
2156
+ console.print("[yellow]No GPU devices detected[/yellow]")
2157
+ console.print(f"ONNX Providers: [dim]{', '.join(gpu_info.onnx_providers)}[/dim]")
2158
+ return
2159
+
2160
+ console.print("[bold]Available GPU Devices[/bold]\n")
2161
+
2162
+ table = Table(show_header=True, header_style="bold")
2163
+ table.add_column("ID", justify="center")
2164
+ table.add_column("Name")
2165
+ table.add_column("Vendor", justify="center")
2166
+ table.add_column("Type", justify="center")
2167
+ table.add_column("Status", justify="center")
2168
+
2169
+ for dev in devices:
2170
+ type_str = "[green]Discrete[/green]" if dev["is_discrete"] else "[dim]Integrated[/dim]"
2171
+ vendor_color = {
2172
+ "nvidia": "green",
2173
+ "amd": "red",
2174
+ "intel": "blue"
2175
+ }.get(dev["vendor"], "white")
2176
+ vendor_str = f"[{vendor_color}]{dev['vendor'].upper()}[/{vendor_color}]"
2177
+
2178
+ status_parts = []
2179
+ if dev["is_preferred"]:
2180
+ status_parts.append("[cyan]Auto[/cyan]")
2181
+ if dev["is_selected"]:
2182
+ status_parts.append("[green]✓ Selected[/green]")
2183
+
2184
+ status_str = " ".join(status_parts) if status_parts else "[dim]—[/dim]"
2185
+
2186
+ table.add_row(
2187
+ str(dev["device_id"]),
2188
+ dev["name"],
2189
+ vendor_str,
2190
+ type_str,
2191
+ status_str,
2192
+ )
2193
+
2194
+ console.print(table)
2195
+ console.print(f"\nONNX Providers: [dim]{', '.join(gpu_info.onnx_providers)}[/dim]")
2196
+ console.print("\n[dim]Select GPU with:[/dim]")
2197
+ console.print(" [cyan]codexlens gpu-select <device_id>[/cyan]")
2198
+
2199
+
2200
+ @app.command(name="gpu-select")
2201
+ def gpu_select(
2202
+ device_id: int = typer.Argument(
2203
+ ...,
2204
+ help="GPU device ID to use for embeddings. Use 'codexlens gpu-list' to see available IDs.",
2205
+ ),
2206
+ json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
2207
+ ) -> None:
2208
+ """Select a specific GPU device for embedding generation.
2209
+
2210
+ By default, CodexLens automatically selects the most powerful GPU (discrete over integrated).
2211
+ Use this command to override the selection.
2212
+
2213
+ Examples:
2214
+ codexlens gpu-select 1 # Use GPU device 1
2215
+ codexlens gpu-select 0 --json # Select GPU 0 with JSON output
2216
+ """
2217
+ from codexlens.semantic.gpu_support import set_selected_device_id, get_gpu_devices
2218
+ from codexlens.semantic.embedder import clear_embedder_cache
2219
+
2220
+ devices = get_gpu_devices()
2221
+ valid_ids = [dev["device_id"] for dev in devices]
2222
+
2223
+ if device_id not in valid_ids:
2224
+ if json_mode:
2225
+ print_json(success=False, error=f"Invalid device_id {device_id}. Valid IDs: {valid_ids}")
2226
+ else:
2227
+ console.print(f"[red]Error:[/red] Invalid device_id {device_id}")
2228
+ console.print(f"Valid IDs: {valid_ids}")
2229
+ console.print("\n[dim]Use 'codexlens gpu-list' to see available devices[/dim]")
2230
+ raise typer.Exit(code=1)
2231
+
2232
+ success = set_selected_device_id(device_id)
2233
+
2234
+ if success:
2235
+ # Clear embedder cache to force reload with new GPU
2236
+ clear_embedder_cache()
2237
+
2238
+ device_name = next((dev["name"] for dev in devices if dev["device_id"] == device_id), "Unknown")
2239
+
2240
+ if json_mode:
2241
+ print_json(
2242
+ success=True,
2243
+ result={
2244
+ "device_id": device_id,
2245
+ "device_name": device_name,
2246
+ "message": f"GPU selection set to device {device_id}: {device_name}",
2247
+ }
2248
+ )
2249
+ else:
2250
+ console.print(f"[green]✓[/green] GPU selection updated")
2251
+ console.print(f" Device ID: {device_id}")
2252
+ console.print(f" Device: [cyan]{device_name}[/cyan]")
2253
+ console.print("\n[dim]New embeddings will use this GPU[/dim]")
2254
+ else:
2255
+ if json_mode:
2256
+ print_json(success=False, error="Failed to set GPU selection")
2257
+ else:
2258
+ console.print("[red]Error:[/red] Failed to set GPU selection")
2259
+ raise typer.Exit(code=1)
2260
+
2261
+
2262
+ @app.command(name="gpu-reset")
2263
+ def gpu_reset(
2264
+ json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
2265
+ ) -> None:
2266
+ """Reset GPU selection to automatic detection.
2267
+
2268
+ Clears any manual GPU selection and returns to automatic selection
2269
+ (discrete GPU preferred over integrated).
2270
+
2271
+ Examples:
2272
+ codexlens gpu-reset # Reset to auto-detection
2273
+ """
2274
+ from codexlens.semantic.gpu_support import set_selected_device_id, detect_gpu
2275
+ from codexlens.semantic.embedder import clear_embedder_cache
2276
+
2277
+ set_selected_device_id(None)
2278
+ clear_embedder_cache()
2279
+
2280
+ gpu_info = detect_gpu(force_refresh=True)
2281
+
2282
+ if json_mode:
2283
+ print_json(
2284
+ success=True,
2285
+ result={
2286
+ "message": "GPU selection reset to auto-detection",
2287
+ "preferred_device_id": gpu_info.preferred_device_id,
2288
+ "preferred_device_name": gpu_info.gpu_name,
2289
+ }
2290
+ )
2291
+ else:
2292
+ console.print("[green]✓[/green] GPU selection reset to auto-detection")
2293
+ if gpu_info.preferred_device_id is not None:
2294
+ console.print(f" Auto-selected device: {gpu_info.preferred_device_id}")
2295
+ console.print(f" Device: [cyan]{gpu_info.gpu_name}[/cyan]")