claude-code-workflow 6.2.7 → 6.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/CLAUDE.md +16 -1
- package/.claude/workflows/cli-templates/protocols/analysis-protocol.md +11 -4
- package/.claude/workflows/cli-templates/protocols/write-protocol.md +10 -75
- package/.claude/workflows/cli-tools-usage.md +14 -24
- package/.codex/AGENTS.md +51 -1
- package/.codex/prompts/compact.md +378 -0
- package/.gemini/GEMINI.md +57 -20
- package/ccw/dist/cli.d.ts.map +1 -1
- package/ccw/dist/cli.js +21 -8
- package/ccw/dist/cli.js.map +1 -1
- package/ccw/dist/commands/cli.d.ts +2 -0
- package/ccw/dist/commands/cli.d.ts.map +1 -1
- package/ccw/dist/commands/cli.js +129 -8
- package/ccw/dist/commands/cli.js.map +1 -1
- package/ccw/dist/commands/hook.d.ts.map +1 -1
- package/ccw/dist/commands/hook.js +3 -2
- package/ccw/dist/commands/hook.js.map +1 -1
- package/ccw/dist/config/litellm-api-config-manager.d.ts +180 -0
- package/ccw/dist/config/litellm-api-config-manager.d.ts.map +1 -0
- package/ccw/dist/config/litellm-api-config-manager.js +770 -0
- package/ccw/dist/config/litellm-api-config-manager.js.map +1 -0
- package/ccw/dist/config/provider-models.d.ts +73 -0
- package/ccw/dist/config/provider-models.d.ts.map +1 -0
- package/ccw/dist/config/provider-models.js +172 -0
- package/ccw/dist/config/provider-models.js.map +1 -0
- package/ccw/dist/core/cache-manager.d.ts.map +1 -1
- package/ccw/dist/core/cache-manager.js +3 -5
- package/ccw/dist/core/cache-manager.js.map +1 -1
- package/ccw/dist/core/dashboard-generator.d.ts.map +1 -1
- package/ccw/dist/core/dashboard-generator.js +3 -1
- package/ccw/dist/core/dashboard-generator.js.map +1 -1
- package/ccw/dist/core/routes/cli-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/cli-routes.js +169 -0
- package/ccw/dist/core/routes/cli-routes.js.map +1 -1
- package/ccw/dist/core/routes/codexlens-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/codexlens-routes.js +234 -18
- package/ccw/dist/core/routes/codexlens-routes.js.map +1 -1
- package/ccw/dist/core/routes/hooks-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/hooks-routes.js +30 -32
- package/ccw/dist/core/routes/hooks-routes.js.map +1 -1
- package/ccw/dist/core/routes/litellm-api-routes.d.ts +21 -0
- package/ccw/dist/core/routes/litellm-api-routes.d.ts.map +1 -0
- package/ccw/dist/core/routes/litellm-api-routes.js +780 -0
- package/ccw/dist/core/routes/litellm-api-routes.js.map +1 -0
- package/ccw/dist/core/routes/litellm-routes.d.ts +20 -0
- package/ccw/dist/core/routes/litellm-routes.d.ts.map +1 -0
- package/ccw/dist/core/routes/litellm-routes.js +85 -0
- package/ccw/dist/core/routes/litellm-routes.js.map +1 -0
- package/ccw/dist/core/routes/mcp-routes.js +2 -2
- package/ccw/dist/core/routes/mcp-routes.js.map +1 -1
- package/ccw/dist/core/routes/status-routes.d.ts.map +1 -1
- package/ccw/dist/core/routes/status-routes.js +39 -0
- package/ccw/dist/core/routes/status-routes.js.map +1 -1
- package/ccw/dist/core/routes/system-routes.js +1 -1
- package/ccw/dist/core/routes/system-routes.js.map +1 -1
- package/ccw/dist/core/server.d.ts.map +1 -1
- package/ccw/dist/core/server.js +15 -1
- package/ccw/dist/core/server.js.map +1 -1
- package/ccw/dist/mcp-server/index.js +1 -1
- package/ccw/dist/mcp-server/index.js.map +1 -1
- package/ccw/dist/tools/claude-cli-tools.d.ts +82 -0
- package/ccw/dist/tools/claude-cli-tools.d.ts.map +1 -0
- package/ccw/dist/tools/claude-cli-tools.js +216 -0
- package/ccw/dist/tools/claude-cli-tools.js.map +1 -0
- package/ccw/dist/tools/cli-executor.d.ts.map +1 -1
- package/ccw/dist/tools/cli-executor.js +76 -14
- package/ccw/dist/tools/cli-executor.js.map +1 -1
- package/ccw/dist/tools/codex-lens.d.ts +9 -2
- package/ccw/dist/tools/codex-lens.d.ts.map +1 -1
- package/ccw/dist/tools/codex-lens.js +114 -9
- package/ccw/dist/tools/codex-lens.js.map +1 -1
- package/ccw/dist/tools/context-cache-store.d.ts +136 -0
- package/ccw/dist/tools/context-cache-store.d.ts.map +1 -0
- package/ccw/dist/tools/context-cache-store.js +256 -0
- package/ccw/dist/tools/context-cache-store.js.map +1 -0
- package/ccw/dist/tools/context-cache.d.ts +56 -0
- package/ccw/dist/tools/context-cache.d.ts.map +1 -0
- package/ccw/dist/tools/context-cache.js +294 -0
- package/ccw/dist/tools/context-cache.js.map +1 -0
- package/ccw/dist/tools/core-memory.d.ts.map +1 -1
- package/ccw/dist/tools/core-memory.js +33 -19
- package/ccw/dist/tools/core-memory.js.map +1 -1
- package/ccw/dist/tools/index.d.ts.map +1 -1
- package/ccw/dist/tools/index.js +2 -0
- package/ccw/dist/tools/index.js.map +1 -1
- package/ccw/dist/tools/litellm-client.d.ts +85 -0
- package/ccw/dist/tools/litellm-client.d.ts.map +1 -0
- package/ccw/dist/tools/litellm-client.js +188 -0
- package/ccw/dist/tools/litellm-client.js.map +1 -0
- package/ccw/dist/tools/litellm-executor.d.ts +34 -0
- package/ccw/dist/tools/litellm-executor.d.ts.map +1 -0
- package/ccw/dist/tools/litellm-executor.js +192 -0
- package/ccw/dist/tools/litellm-executor.js.map +1 -0
- package/ccw/dist/tools/pattern-parser.d.ts +55 -0
- package/ccw/dist/tools/pattern-parser.d.ts.map +1 -0
- package/ccw/dist/tools/pattern-parser.js +237 -0
- package/ccw/dist/tools/pattern-parser.js.map +1 -0
- package/ccw/dist/tools/smart-search.d.ts +1 -0
- package/ccw/dist/tools/smart-search.d.ts.map +1 -1
- package/ccw/dist/tools/smart-search.js +117 -41
- package/ccw/dist/tools/smart-search.js.map +1 -1
- package/ccw/dist/types/litellm-api-config.d.ts +294 -0
- package/ccw/dist/types/litellm-api-config.d.ts.map +1 -0
- package/ccw/dist/types/litellm-api-config.js +8 -0
- package/ccw/dist/types/litellm-api-config.js.map +1 -0
- package/ccw/src/cli.ts +258 -244
- package/ccw/src/commands/cli.ts +153 -9
- package/ccw/src/commands/hook.ts +3 -2
- package/ccw/src/config/.litellm-api-config-manager.ts.2025-12-23T11-57-43-727Z.bak +441 -0
- package/ccw/src/config/litellm-api-config-manager.ts +1012 -0
- package/ccw/src/config/provider-models.ts +222 -0
- package/ccw/src/core/cache-manager.ts +292 -294
- package/ccw/src/core/dashboard-generator.ts +3 -1
- package/ccw/src/core/routes/cli-routes.ts +192 -0
- package/ccw/src/core/routes/codexlens-routes.ts +241 -19
- package/ccw/src/core/routes/hooks-routes.ts +399 -405
- package/ccw/src/core/routes/litellm-api-routes.ts +930 -0
- package/ccw/src/core/routes/litellm-routes.ts +107 -0
- package/ccw/src/core/routes/mcp-routes.ts +1271 -1271
- package/ccw/src/core/routes/status-routes.ts +51 -0
- package/ccw/src/core/routes/system-routes.ts +1 -1
- package/ccw/src/core/server.ts +15 -1
- package/ccw/src/mcp-server/index.ts +1 -1
- package/ccw/src/templates/dashboard-css/12-cli-legacy.css +44 -0
- package/ccw/src/templates/dashboard-css/31-api-settings.css +2265 -0
- package/ccw/src/templates/dashboard-js/components/cli-history.js +15 -8
- package/ccw/src/templates/dashboard-js/components/cli-status.js +323 -9
- package/ccw/src/templates/dashboard-js/components/navigation.js +329 -313
- package/ccw/src/templates/dashboard-js/i18n.js +583 -1
- package/ccw/src/templates/dashboard-js/views/api-settings.js +3362 -0
- package/ccw/src/templates/dashboard-js/views/cli-manager.js +199 -24
- package/ccw/src/templates/dashboard-js/views/codexlens-manager.js +1265 -27
- package/ccw/src/templates/dashboard.html +840 -831
- package/ccw/src/tools/claude-cli-tools.ts +300 -0
- package/ccw/src/tools/cli-executor.ts +83 -14
- package/ccw/src/tools/codex-lens.ts +146 -9
- package/ccw/src/tools/context-cache-store.ts +368 -0
- package/ccw/src/tools/context-cache.ts +393 -0
- package/ccw/src/tools/core-memory.ts +33 -19
- package/ccw/src/tools/index.ts +2 -0
- package/ccw/src/tools/litellm-client.ts +246 -0
- package/ccw/src/tools/litellm-executor.ts +241 -0
- package/ccw/src/tools/pattern-parser.ts +329 -0
- package/ccw/src/tools/smart-search.ts +142 -41
- package/ccw/src/types/litellm-api-config.ts +402 -0
- package/ccw-litellm/README.md +180 -0
- package/ccw-litellm/pyproject.toml +35 -0
- package/ccw-litellm/src/ccw_litellm/__init__.py +47 -0
- package/ccw-litellm/src/ccw_litellm/__pycache__/__init__.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/__pycache__/cli.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/cli.py +108 -0
- package/ccw-litellm/src/ccw_litellm/clients/__init__.py +12 -0
- package/ccw-litellm/src/ccw_litellm/clients/__pycache__/__init__.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/clients/__pycache__/litellm_embedder.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/clients/__pycache__/litellm_llm.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/clients/litellm_embedder.py +251 -0
- package/ccw-litellm/src/ccw_litellm/clients/litellm_llm.py +165 -0
- package/ccw-litellm/src/ccw_litellm/config/__init__.py +22 -0
- package/ccw-litellm/src/ccw_litellm/config/__pycache__/__init__.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/config/__pycache__/loader.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/config/__pycache__/models.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/config/loader.py +316 -0
- package/ccw-litellm/src/ccw_litellm/config/models.py +130 -0
- package/ccw-litellm/src/ccw_litellm/interfaces/__init__.py +14 -0
- package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/__init__.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/embedder.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/llm.cpython-313.pyc +0 -0
- package/ccw-litellm/src/ccw_litellm/interfaces/embedder.py +52 -0
- package/ccw-litellm/src/ccw_litellm/interfaces/llm.py +45 -0
- package/codex-lens/src/codexlens/__pycache__/config.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/commands.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/embedding_manager.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/model_manager.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/__pycache__/output.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/cli/commands.py +378 -23
- package/codex-lens/src/codexlens/cli/embedding_manager.py +660 -56
- package/codex-lens/src/codexlens/cli/model_manager.py +31 -18
- package/codex-lens/src/codexlens/cli/output.py +12 -1
- package/codex-lens/src/codexlens/config.py +93 -0
- package/codex-lens/src/codexlens/search/__pycache__/chain_search.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/hybrid_search.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/ranking.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/chain_search.py +6 -2
- package/codex-lens/src/codexlens/search/hybrid_search.py +44 -21
- package/codex-lens/src/codexlens/search/ranking.py +1 -1
- package/codex-lens/src/codexlens/semantic/__init__.py +42 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/base.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/chunker.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/embedder.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/factory.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/gpu_support.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/litellm_embedder.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/__pycache__/vector_store.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/base.py +61 -0
- package/codex-lens/src/codexlens/semantic/chunker.py +43 -20
- package/codex-lens/src/codexlens/semantic/embedder.py +60 -13
- package/codex-lens/src/codexlens/semantic/factory.py +98 -0
- package/codex-lens/src/codexlens/semantic/gpu_support.py +225 -3
- package/codex-lens/src/codexlens/semantic/litellm_embedder.py +144 -0
- package/codex-lens/src/codexlens/semantic/rotational_embedder.py +434 -0
- package/codex-lens/src/codexlens/semantic/vector_store.py +33 -8
- package/codex-lens/src/codexlens/storage/__pycache__/path_mapper.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_004_dual_fts.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/storage/path_mapper.py +27 -1
- package/package.json +15 -5
- package/.codex/prompts.zip +0 -0
- package/ccw/package.json +0 -65
|
@@ -1,27 +1,36 @@
|
|
|
1
1
|
"""Embedding Manager - Manage semantic embeddings for code indexes."""
|
|
2
2
|
|
|
3
3
|
import gc
|
|
4
|
+
import json
|
|
4
5
|
import logging
|
|
5
6
|
import sqlite3
|
|
6
7
|
import time
|
|
8
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
7
9
|
from itertools import islice
|
|
8
10
|
from pathlib import Path
|
|
9
|
-
from typing import Dict, Generator, List, Optional, Tuple
|
|
11
|
+
from typing import Any, Dict, Generator, List, Optional, Tuple
|
|
10
12
|
|
|
11
13
|
try:
|
|
12
|
-
from codexlens.semantic import SEMANTIC_AVAILABLE
|
|
13
|
-
if SEMANTIC_AVAILABLE:
|
|
14
|
-
from codexlens.semantic.embedder import Embedder, get_embedder, clear_embedder_cache
|
|
15
|
-
from codexlens.semantic.vector_store import VectorStore
|
|
16
|
-
from codexlens.semantic.chunker import Chunker, ChunkConfig
|
|
14
|
+
from codexlens.semantic import SEMANTIC_AVAILABLE, is_embedding_backend_available
|
|
17
15
|
except ImportError:
|
|
18
16
|
SEMANTIC_AVAILABLE = False
|
|
17
|
+
def is_embedding_backend_available(_backend: str): # type: ignore[no-redef]
|
|
18
|
+
return False, "codexlens.semantic not available"
|
|
19
19
|
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
21
21
|
|
|
22
22
|
# Embedding batch size - larger values improve throughput on modern hardware
|
|
23
23
|
# Benchmark: 256 gives ~2.35x speedup over 64 with DirectML GPU acceleration
|
|
24
|
-
EMBEDDING_BATCH_SIZE = 256
|
|
24
|
+
EMBEDDING_BATCH_SIZE = 256
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _cleanup_fastembed_resources() -> None:
|
|
28
|
+
"""Best-effort cleanup for fastembed/ONNX resources (no-op for other backends)."""
|
|
29
|
+
try:
|
|
30
|
+
from codexlens.semantic.embedder import clear_embedder_cache
|
|
31
|
+
clear_embedder_cache()
|
|
32
|
+
except Exception:
|
|
33
|
+
pass
|
|
25
34
|
|
|
26
35
|
|
|
27
36
|
def _generate_chunks_from_cursor(
|
|
@@ -79,6 +88,44 @@ def _generate_chunks_from_cursor(
|
|
|
79
88
|
failed_files.append((file_path, str(e)))
|
|
80
89
|
|
|
81
90
|
|
|
91
|
+
def _create_token_aware_batches(
|
|
92
|
+
chunk_generator: Generator,
|
|
93
|
+
max_tokens_per_batch: int = 8000,
|
|
94
|
+
) -> Generator[List[Tuple], None, None]:
|
|
95
|
+
"""Group chunks by total token count instead of fixed count.
|
|
96
|
+
|
|
97
|
+
Uses fast token estimation (len(content) // 4) for efficiency.
|
|
98
|
+
Yields batches when approaching the token limit.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
chunk_generator: Generator yielding (chunk, file_path) tuples
|
|
102
|
+
max_tokens_per_batch: Maximum tokens per batch (default: 8000)
|
|
103
|
+
|
|
104
|
+
Yields:
|
|
105
|
+
List of (chunk, file_path) tuples representing a batch
|
|
106
|
+
"""
|
|
107
|
+
current_batch = []
|
|
108
|
+
current_tokens = 0
|
|
109
|
+
|
|
110
|
+
for chunk, file_path in chunk_generator:
|
|
111
|
+
# Fast token estimation: len(content) // 4
|
|
112
|
+
chunk_tokens = len(chunk.content) // 4
|
|
113
|
+
|
|
114
|
+
# If adding this chunk would exceed limit and we have items, yield current batch
|
|
115
|
+
if current_tokens + chunk_tokens > max_tokens_per_batch and current_batch:
|
|
116
|
+
yield current_batch
|
|
117
|
+
current_batch = []
|
|
118
|
+
current_tokens = 0
|
|
119
|
+
|
|
120
|
+
# Add chunk to current batch
|
|
121
|
+
current_batch.append((chunk, file_path))
|
|
122
|
+
current_tokens += chunk_tokens
|
|
123
|
+
|
|
124
|
+
# Yield final batch if not empty
|
|
125
|
+
if current_batch:
|
|
126
|
+
yield current_batch
|
|
127
|
+
|
|
128
|
+
|
|
82
129
|
def _get_path_column(conn: sqlite3.Connection) -> str:
|
|
83
130
|
"""Detect whether files table uses 'path' or 'full_path' column.
|
|
84
131
|
|
|
@@ -189,33 +236,110 @@ def check_index_embeddings(index_path: Path) -> Dict[str, any]:
|
|
|
189
236
|
}
|
|
190
237
|
|
|
191
238
|
|
|
239
|
+
def _get_embedding_defaults() -> tuple[str, str, bool, List, str, float]:
|
|
240
|
+
"""Get default embedding settings from config.
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
Tuple of (backend, model, use_gpu, endpoints, strategy, cooldown)
|
|
244
|
+
"""
|
|
245
|
+
try:
|
|
246
|
+
from codexlens.config import Config
|
|
247
|
+
config = Config.load()
|
|
248
|
+
return (
|
|
249
|
+
config.embedding_backend,
|
|
250
|
+
config.embedding_model,
|
|
251
|
+
config.embedding_use_gpu,
|
|
252
|
+
config.embedding_endpoints,
|
|
253
|
+
config.embedding_strategy,
|
|
254
|
+
config.embedding_cooldown,
|
|
255
|
+
)
|
|
256
|
+
except Exception:
|
|
257
|
+
return "fastembed", "code", True, [], "latency_aware", 60.0
|
|
258
|
+
|
|
259
|
+
|
|
192
260
|
def generate_embeddings(
|
|
193
261
|
index_path: Path,
|
|
194
|
-
|
|
262
|
+
embedding_backend: Optional[str] = None,
|
|
263
|
+
model_profile: Optional[str] = None,
|
|
195
264
|
force: bool = False,
|
|
196
265
|
chunk_size: int = 2000,
|
|
266
|
+
overlap: int = 200,
|
|
197
267
|
progress_callback: Optional[callable] = None,
|
|
268
|
+
use_gpu: Optional[bool] = None,
|
|
269
|
+
max_tokens_per_batch: Optional[int] = None,
|
|
270
|
+
max_workers: Optional[int] = None,
|
|
271
|
+
endpoints: Optional[List] = None,
|
|
272
|
+
strategy: Optional[str] = None,
|
|
273
|
+
cooldown: Optional[float] = None,
|
|
198
274
|
) -> Dict[str, any]:
|
|
199
275
|
"""Generate embeddings for an index using memory-efficient batch processing.
|
|
200
276
|
|
|
201
277
|
This function processes files in small batches to keep memory usage under 2GB,
|
|
202
|
-
regardless of the total project size.
|
|
278
|
+
regardless of the total project size. Supports concurrent API calls for
|
|
279
|
+
LiteLLM backend to improve throughput.
|
|
203
280
|
|
|
204
281
|
Args:
|
|
205
282
|
index_path: Path to _index.db file
|
|
206
|
-
|
|
283
|
+
embedding_backend: Embedding backend to use (fastembed or litellm).
|
|
284
|
+
Defaults to config setting.
|
|
285
|
+
model_profile: Model profile for fastembed (fast, code, multilingual, balanced)
|
|
286
|
+
or model name for litellm (e.g., qwen3-embedding).
|
|
287
|
+
Defaults to config setting.
|
|
207
288
|
force: If True, regenerate even if embeddings exist
|
|
208
289
|
chunk_size: Maximum chunk size in characters
|
|
290
|
+
overlap: Overlap size in characters for sliding window chunking (default: 200)
|
|
209
291
|
progress_callback: Optional callback for progress updates
|
|
292
|
+
use_gpu: Whether to use GPU acceleration (fastembed only).
|
|
293
|
+
Defaults to config setting.
|
|
294
|
+
max_tokens_per_batch: Maximum tokens per batch for token-aware batching.
|
|
295
|
+
If None, attempts to get from embedder.max_tokens,
|
|
296
|
+
then falls back to 8000. If set, overrides automatic detection.
|
|
297
|
+
max_workers: Maximum number of concurrent API calls.
|
|
298
|
+
If None, uses dynamic defaults based on backend and endpoint count.
|
|
299
|
+
endpoints: Optional list of endpoint configurations for multi-API load balancing.
|
|
300
|
+
Each dict has keys: model, api_key, api_base, weight.
|
|
301
|
+
strategy: Selection strategy for multi-endpoint mode (round_robin, latency_aware).
|
|
302
|
+
cooldown: Default cooldown seconds for rate-limited endpoints.
|
|
210
303
|
|
|
211
304
|
Returns:
|
|
212
305
|
Result dictionary with generation statistics
|
|
213
306
|
"""
|
|
214
|
-
if not
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
307
|
+
# Get defaults from config if not specified
|
|
308
|
+
(default_backend, default_model, default_gpu,
|
|
309
|
+
default_endpoints, default_strategy, default_cooldown) = _get_embedding_defaults()
|
|
310
|
+
|
|
311
|
+
if embedding_backend is None:
|
|
312
|
+
embedding_backend = default_backend
|
|
313
|
+
if model_profile is None:
|
|
314
|
+
model_profile = default_model
|
|
315
|
+
if use_gpu is None:
|
|
316
|
+
use_gpu = default_gpu
|
|
317
|
+
if endpoints is None:
|
|
318
|
+
endpoints = default_endpoints
|
|
319
|
+
if strategy is None:
|
|
320
|
+
strategy = default_strategy
|
|
321
|
+
if cooldown is None:
|
|
322
|
+
cooldown = default_cooldown
|
|
323
|
+
|
|
324
|
+
# Calculate endpoint count for worker scaling
|
|
325
|
+
endpoint_count = len(endpoints) if endpoints else 1
|
|
326
|
+
|
|
327
|
+
# Set dynamic max_workers default based on backend type and endpoint count
|
|
328
|
+
# - FastEmbed: CPU-bound, sequential is optimal (1 worker)
|
|
329
|
+
# - LiteLLM single endpoint: 4 workers default
|
|
330
|
+
# - LiteLLM multi-endpoint: workers = endpoint_count * 2 (to saturate all APIs)
|
|
331
|
+
if max_workers is None:
|
|
332
|
+
if embedding_backend == "litellm":
|
|
333
|
+
if endpoint_count > 1:
|
|
334
|
+
max_workers = endpoint_count * 2 # No cap, scale with endpoints
|
|
335
|
+
else:
|
|
336
|
+
max_workers = 4
|
|
337
|
+
else:
|
|
338
|
+
max_workers = 1
|
|
339
|
+
|
|
340
|
+
backend_available, backend_error = is_embedding_backend_available(embedding_backend)
|
|
341
|
+
if not backend_available:
|
|
342
|
+
return {"success": False, "error": backend_error or "Embedding backend not available"}
|
|
219
343
|
|
|
220
344
|
if not index_path.exists():
|
|
221
345
|
return {
|
|
@@ -253,13 +377,43 @@ def generate_embeddings(
|
|
|
253
377
|
|
|
254
378
|
# Initialize components
|
|
255
379
|
try:
|
|
256
|
-
#
|
|
257
|
-
|
|
380
|
+
# Import factory function to support both backends
|
|
381
|
+
from codexlens.semantic.factory import get_embedder as get_embedder_factory
|
|
382
|
+
from codexlens.semantic.vector_store import VectorStore
|
|
383
|
+
from codexlens.semantic.chunker import Chunker, ChunkConfig
|
|
384
|
+
|
|
385
|
+
# Initialize embedder using factory (supports fastembed, litellm, and rotational)
|
|
386
|
+
# For fastembed: model_profile is a profile name (fast/code/multilingual/balanced)
|
|
387
|
+
# For litellm: model_profile is a model name (e.g., qwen3-embedding)
|
|
388
|
+
# For multi-endpoint: endpoints list enables load balancing
|
|
389
|
+
if embedding_backend == "fastembed":
|
|
390
|
+
embedder = get_embedder_factory(backend="fastembed", profile=model_profile, use_gpu=use_gpu)
|
|
391
|
+
elif embedding_backend == "litellm":
|
|
392
|
+
embedder = get_embedder_factory(
|
|
393
|
+
backend="litellm",
|
|
394
|
+
model=model_profile,
|
|
395
|
+
endpoints=endpoints if endpoints else None,
|
|
396
|
+
strategy=strategy,
|
|
397
|
+
cooldown=cooldown,
|
|
398
|
+
)
|
|
399
|
+
else:
|
|
400
|
+
return {
|
|
401
|
+
"success": False,
|
|
402
|
+
"error": f"Invalid embedding backend: {embedding_backend}. Must be 'fastembed' or 'litellm'.",
|
|
403
|
+
}
|
|
404
|
+
|
|
258
405
|
# skip_token_count=True: Use fast estimation (len/4) instead of expensive tiktoken
|
|
259
406
|
# This significantly reduces CPU usage with minimal impact on metadata accuracy
|
|
260
|
-
chunker = Chunker(config=ChunkConfig(
|
|
407
|
+
chunker = Chunker(config=ChunkConfig(
|
|
408
|
+
max_chunk_size=chunk_size,
|
|
409
|
+
overlap=overlap,
|
|
410
|
+
skip_token_count=True
|
|
411
|
+
))
|
|
261
412
|
|
|
413
|
+
# Log embedder info with endpoint count for multi-endpoint mode
|
|
262
414
|
if progress_callback:
|
|
415
|
+
if endpoint_count > 1:
|
|
416
|
+
progress_callback(f"Using {endpoint_count} API endpoints with {strategy} strategy")
|
|
263
417
|
progress_callback(f"Using model: {embedder.model_name} ({embedder.embedding_dim} dimensions)")
|
|
264
418
|
|
|
265
419
|
except Exception as e:
|
|
@@ -292,7 +446,7 @@ def generate_embeddings(
|
|
|
292
446
|
|
|
293
447
|
# Set/update model configuration for this index
|
|
294
448
|
vector_store.set_model_config(
|
|
295
|
-
model_profile, embedder.model_name, embedder.embedding_dim
|
|
449
|
+
model_profile, embedder.model_name, embedder.embedding_dim, backend=embedding_backend
|
|
296
450
|
)
|
|
297
451
|
# Use bulk insert mode for efficient batch ANN index building
|
|
298
452
|
# This defers ANN updates until end_bulk_insert() is called
|
|
@@ -319,42 +473,203 @@ def generate_embeddings(
|
|
|
319
473
|
cursor, chunker, path_column, FILE_BATCH_SIZE, failed_files
|
|
320
474
|
)
|
|
321
475
|
|
|
476
|
+
# Determine max tokens per batch
|
|
477
|
+
# Priority: explicit parameter > embedder.max_tokens > default 8000
|
|
478
|
+
if max_tokens_per_batch is None:
|
|
479
|
+
max_tokens_per_batch = getattr(embedder, 'max_tokens', 8000)
|
|
480
|
+
|
|
481
|
+
# Create token-aware batches or fall back to fixed-size batching
|
|
482
|
+
if max_tokens_per_batch:
|
|
483
|
+
batch_generator = _create_token_aware_batches(
|
|
484
|
+
chunk_generator, max_tokens_per_batch
|
|
485
|
+
)
|
|
486
|
+
else:
|
|
487
|
+
# Fallback to fixed-size batching for backward compatibility
|
|
488
|
+
def fixed_size_batches():
|
|
489
|
+
while True:
|
|
490
|
+
batch = list(islice(chunk_generator, EMBEDDING_BATCH_SIZE))
|
|
491
|
+
if not batch:
|
|
492
|
+
break
|
|
493
|
+
yield batch
|
|
494
|
+
batch_generator = fixed_size_batches()
|
|
495
|
+
|
|
322
496
|
batch_number = 0
|
|
323
497
|
files_seen = set()
|
|
324
498
|
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
chunk_batch = list(islice(chunk_generator, EMBEDDING_BATCH_SIZE))
|
|
328
|
-
if not chunk_batch:
|
|
329
|
-
break
|
|
499
|
+
def compute_embeddings_only(batch_data: Tuple[int, List[Tuple]]):
|
|
500
|
+
"""Compute embeddings for a batch (no DB write) with retry logic.
|
|
330
501
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
# Track unique files for progress
|
|
334
|
-
for _, file_path in chunk_batch:
|
|
335
|
-
files_seen.add(file_path)
|
|
502
|
+
Args:
|
|
503
|
+
batch_data: Tuple of (batch_number, chunk_batch)
|
|
336
504
|
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
505
|
+
Returns:
|
|
506
|
+
Tuple of (batch_num, chunk_batch, embeddings_numpy, batch_files, error)
|
|
507
|
+
"""
|
|
508
|
+
import random
|
|
341
509
|
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
510
|
+
batch_num, chunk_batch = batch_data
|
|
511
|
+
batch_files = set()
|
|
512
|
+
for _, file_path in chunk_batch:
|
|
513
|
+
batch_files.add(file_path)
|
|
514
|
+
|
|
515
|
+
max_retries = 5
|
|
516
|
+
base_delay = 2.0
|
|
517
|
+
|
|
518
|
+
for attempt in range(max_retries + 1):
|
|
519
|
+
try:
|
|
520
|
+
batch_contents = [chunk.content for chunk, _ in chunk_batch]
|
|
521
|
+
embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
|
|
522
|
+
return batch_num, chunk_batch, embeddings_numpy, batch_files, None
|
|
523
|
+
|
|
524
|
+
except Exception as e:
|
|
525
|
+
error_str = str(e).lower()
|
|
526
|
+
# Check for retryable errors (rate limit, connection, backend issues)
|
|
527
|
+
# Note: Some backends (e.g., ModelScope) return 400 with nested 500 errors
|
|
528
|
+
is_retryable = any(x in error_str for x in [
|
|
529
|
+
"429", "rate limit", "connection", "timeout",
|
|
530
|
+
"502", "503", "504", "service unavailable",
|
|
531
|
+
"500", "400", "badrequesterror", "internal server error",
|
|
532
|
+
"11434" # Ollama port - indicates backend routing issue
|
|
533
|
+
])
|
|
534
|
+
|
|
535
|
+
if attempt < max_retries and is_retryable:
|
|
536
|
+
sleep_time = base_delay * (2 ** attempt) + random.uniform(0, 0.5)
|
|
537
|
+
logger.warning(f"Batch {batch_num} failed (attempt {attempt+1}/{max_retries+1}). "
|
|
538
|
+
f"Retrying in {sleep_time:.1f}s. Error: {e}")
|
|
539
|
+
time.sleep(sleep_time)
|
|
540
|
+
continue
|
|
541
|
+
|
|
542
|
+
error_msg = f"Batch {batch_num}: {str(e)}"
|
|
543
|
+
logger.error(f"Failed to compute embeddings for batch {batch_num}: {str(e)}")
|
|
544
|
+
return batch_num, chunk_batch, None, batch_files, error_msg
|
|
545
|
+
|
|
546
|
+
# Should not reach here, but just in case
|
|
547
|
+
return batch_num, chunk_batch, None, batch_files, f"Batch {batch_num}: Max retries exceeded"
|
|
548
|
+
|
|
549
|
+
# Process batches based on max_workers setting
|
|
550
|
+
if max_workers <= 1:
|
|
551
|
+
# Sequential processing - stream directly from generator (no pre-materialization)
|
|
552
|
+
for chunk_batch in batch_generator:
|
|
553
|
+
batch_number += 1
|
|
554
|
+
|
|
555
|
+
# Track files in this batch
|
|
556
|
+
batch_files = set()
|
|
557
|
+
for _, file_path in chunk_batch:
|
|
558
|
+
batch_files.add(file_path)
|
|
559
|
+
|
|
560
|
+
# Retry logic for transient backend errors
|
|
561
|
+
max_retries = 5
|
|
562
|
+
base_delay = 2.0
|
|
563
|
+
success = False
|
|
564
|
+
|
|
565
|
+
for attempt in range(max_retries + 1):
|
|
566
|
+
try:
|
|
567
|
+
# Generate embeddings
|
|
568
|
+
batch_contents = [chunk.content for chunk, _ in chunk_batch]
|
|
569
|
+
embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
|
|
570
|
+
|
|
571
|
+
# Store embeddings
|
|
572
|
+
vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
|
|
573
|
+
|
|
574
|
+
files_seen.update(batch_files)
|
|
575
|
+
total_chunks_created += len(chunk_batch)
|
|
576
|
+
total_files_processed = len(files_seen)
|
|
577
|
+
success = True
|
|
578
|
+
break
|
|
579
|
+
|
|
580
|
+
except Exception as e:
|
|
581
|
+
error_str = str(e).lower()
|
|
582
|
+
# Check for retryable errors (rate limit, connection, backend issues)
|
|
583
|
+
is_retryable = any(x in error_str for x in [
|
|
584
|
+
"429", "rate limit", "connection", "timeout",
|
|
585
|
+
"502", "503", "504", "service unavailable",
|
|
586
|
+
"500", "400", "badrequesterror", "internal server error",
|
|
587
|
+
"11434" # Ollama port - indicates backend routing issue
|
|
588
|
+
])
|
|
589
|
+
|
|
590
|
+
if attempt < max_retries and is_retryable:
|
|
591
|
+
import random
|
|
592
|
+
sleep_time = base_delay * (2 ** attempt) + random.uniform(0, 0.5)
|
|
593
|
+
logger.warning(f"Batch {batch_number} failed (attempt {attempt+1}/{max_retries+1}). "
|
|
594
|
+
f"Retrying in {sleep_time:.1f}s. Error: {e}")
|
|
595
|
+
time.sleep(sleep_time)
|
|
596
|
+
continue
|
|
597
|
+
|
|
598
|
+
logger.error(f"Failed to process batch {batch_number}: {str(e)}")
|
|
599
|
+
files_seen.update(batch_files)
|
|
600
|
+
break
|
|
601
|
+
|
|
602
|
+
if success and progress_callback and batch_number % 10 == 0:
|
|
349
603
|
progress_callback(f" Batch {batch_number}: {total_chunks_created} chunks, {total_files_processed} files")
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
604
|
+
else:
|
|
605
|
+
# Concurrent processing - main thread iterates batches (SQLite safe),
|
|
606
|
+
# workers compute embeddings (parallel), main thread writes to DB (serial)
|
|
607
|
+
if progress_callback:
|
|
608
|
+
progress_callback(f"Processing with {max_workers} concurrent embedding workers...")
|
|
609
|
+
|
|
610
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
611
|
+
pending_futures = {} # future -> (batch_num, chunk_batch)
|
|
612
|
+
completed_batches = 0
|
|
613
|
+
last_reported_batch = 0
|
|
614
|
+
|
|
615
|
+
def process_completed_futures():
|
|
616
|
+
"""Process any completed futures and write to DB."""
|
|
617
|
+
nonlocal total_chunks_created, total_files_processed, completed_batches, last_reported_batch
|
|
618
|
+
done_futures = [f for f in pending_futures if f.done()]
|
|
619
|
+
for f in done_futures:
|
|
620
|
+
try:
|
|
621
|
+
batch_num, chunk_batch, embeddings_numpy, batch_files, error = f.result()
|
|
622
|
+
if embeddings_numpy is not None and error is None:
|
|
623
|
+
# Write to DB in main thread (no contention)
|
|
624
|
+
vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
|
|
625
|
+
total_chunks_created += len(chunk_batch)
|
|
626
|
+
files_seen.update(batch_files)
|
|
627
|
+
total_files_processed = len(files_seen)
|
|
628
|
+
completed_batches += 1
|
|
629
|
+
except Exception as e:
|
|
630
|
+
logger.error(f"Future raised exception: {e}")
|
|
631
|
+
completed_batches += 1
|
|
632
|
+
del pending_futures[f]
|
|
633
|
+
|
|
634
|
+
# Report progress based on completed batches (every 5 batches)
|
|
635
|
+
if progress_callback and completed_batches >= last_reported_batch + 5:
|
|
636
|
+
progress_callback(f" Batch {completed_batches}: {total_chunks_created} chunks, {total_files_processed} files")
|
|
637
|
+
last_reported_batch = completed_batches
|
|
638
|
+
|
|
639
|
+
# Iterate batches in main thread (SQLite cursor is main-thread bound)
|
|
640
|
+
for chunk_batch in batch_generator:
|
|
641
|
+
batch_number += 1
|
|
642
|
+
|
|
643
|
+
# Submit compute task to worker pool
|
|
644
|
+
future = executor.submit(compute_embeddings_only, (batch_number, chunk_batch))
|
|
645
|
+
pending_futures[future] = batch_number
|
|
646
|
+
|
|
647
|
+
# Process any completed futures to free memory and write to DB
|
|
648
|
+
process_completed_futures()
|
|
649
|
+
|
|
650
|
+
# Backpressure: wait if too many pending
|
|
651
|
+
while len(pending_futures) >= max_workers * 2:
|
|
652
|
+
process_completed_futures()
|
|
653
|
+
if len(pending_futures) >= max_workers * 2:
|
|
654
|
+
time.sleep(0.1) # time is imported at module level
|
|
655
|
+
|
|
656
|
+
# Wait for remaining futures
|
|
657
|
+
for future in as_completed(list(pending_futures.keys())):
|
|
658
|
+
try:
|
|
659
|
+
batch_num, chunk_batch, embeddings_numpy, batch_files, error = future.result()
|
|
660
|
+
if embeddings_numpy is not None and error is None:
|
|
661
|
+
vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
|
|
662
|
+
total_chunks_created += len(chunk_batch)
|
|
663
|
+
files_seen.update(batch_files)
|
|
664
|
+
total_files_processed = len(files_seen)
|
|
665
|
+
completed_batches += 1
|
|
666
|
+
|
|
667
|
+
# Report progress for remaining batches
|
|
668
|
+
if progress_callback and completed_batches >= last_reported_batch + 5:
|
|
669
|
+
progress_callback(f" Batch {completed_batches}: {total_chunks_created} chunks, {total_files_processed} files")
|
|
670
|
+
last_reported_batch = completed_batches
|
|
671
|
+
except Exception as e:
|
|
672
|
+
logger.error(f"Future raised exception: {e}")
|
|
358
673
|
|
|
359
674
|
# Notify before ANN index finalization (happens when bulk_insert context exits)
|
|
360
675
|
if progress_callback:
|
|
@@ -363,7 +678,7 @@ def generate_embeddings(
|
|
|
363
678
|
except Exception as e:
|
|
364
679
|
# Cleanup on error to prevent process hanging
|
|
365
680
|
try:
|
|
366
|
-
|
|
681
|
+
_cleanup_fastembed_resources()
|
|
367
682
|
gc.collect()
|
|
368
683
|
except Exception:
|
|
369
684
|
pass
|
|
@@ -374,7 +689,7 @@ def generate_embeddings(
|
|
|
374
689
|
# Final cleanup: release ONNX resources to allow process exit
|
|
375
690
|
# This is critical - without it, ONNX Runtime threads prevent Python from exiting
|
|
376
691
|
try:
|
|
377
|
-
|
|
692
|
+
_cleanup_fastembed_resources()
|
|
378
693
|
gc.collect()
|
|
379
694
|
except Exception:
|
|
380
695
|
pass
|
|
@@ -427,23 +742,76 @@ def find_all_indexes(scan_dir: Path) -> List[Path]:
|
|
|
427
742
|
|
|
428
743
|
def generate_embeddings_recursive(
|
|
429
744
|
index_root: Path,
|
|
430
|
-
|
|
745
|
+
embedding_backend: Optional[str] = None,
|
|
746
|
+
model_profile: Optional[str] = None,
|
|
431
747
|
force: bool = False,
|
|
432
748
|
chunk_size: int = 2000,
|
|
749
|
+
overlap: int = 200,
|
|
433
750
|
progress_callback: Optional[callable] = None,
|
|
751
|
+
use_gpu: Optional[bool] = None,
|
|
752
|
+
max_tokens_per_batch: Optional[int] = None,
|
|
753
|
+
max_workers: Optional[int] = None,
|
|
754
|
+
endpoints: Optional[List] = None,
|
|
755
|
+
strategy: Optional[str] = None,
|
|
756
|
+
cooldown: Optional[float] = None,
|
|
434
757
|
) -> Dict[str, any]:
|
|
435
758
|
"""Generate embeddings for all index databases in a project recursively.
|
|
436
759
|
|
|
437
760
|
Args:
|
|
438
761
|
index_root: Root index directory containing _index.db files
|
|
439
|
-
|
|
762
|
+
embedding_backend: Embedding backend to use (fastembed or litellm).
|
|
763
|
+
Defaults to config setting.
|
|
764
|
+
model_profile: Model profile for fastembed (fast, code, multilingual, balanced)
|
|
765
|
+
or model name for litellm (e.g., qwen3-embedding).
|
|
766
|
+
Defaults to config setting.
|
|
440
767
|
force: If True, regenerate even if embeddings exist
|
|
441
768
|
chunk_size: Maximum chunk size in characters
|
|
769
|
+
overlap: Overlap size in characters for sliding window chunking (default: 200)
|
|
442
770
|
progress_callback: Optional callback for progress updates
|
|
771
|
+
use_gpu: Whether to use GPU acceleration (fastembed only).
|
|
772
|
+
Defaults to config setting.
|
|
773
|
+
max_tokens_per_batch: Maximum tokens per batch for token-aware batching.
|
|
774
|
+
If None, attempts to get from embedder.max_tokens,
|
|
775
|
+
then falls back to 8000. If set, overrides automatic detection.
|
|
776
|
+
max_workers: Maximum number of concurrent API calls.
|
|
777
|
+
If None, uses dynamic defaults based on backend and endpoint count.
|
|
778
|
+
endpoints: Optional list of endpoint configurations for multi-API load balancing.
|
|
779
|
+
strategy: Selection strategy for multi-endpoint mode.
|
|
780
|
+
cooldown: Default cooldown seconds for rate-limited endpoints.
|
|
443
781
|
|
|
444
782
|
Returns:
|
|
445
783
|
Aggregated result dictionary with generation statistics
|
|
446
784
|
"""
|
|
785
|
+
# Get defaults from config if not specified
|
|
786
|
+
(default_backend, default_model, default_gpu,
|
|
787
|
+
default_endpoints, default_strategy, default_cooldown) = _get_embedding_defaults()
|
|
788
|
+
|
|
789
|
+
if embedding_backend is None:
|
|
790
|
+
embedding_backend = default_backend
|
|
791
|
+
if model_profile is None:
|
|
792
|
+
model_profile = default_model
|
|
793
|
+
if use_gpu is None:
|
|
794
|
+
use_gpu = default_gpu
|
|
795
|
+
if endpoints is None:
|
|
796
|
+
endpoints = default_endpoints
|
|
797
|
+
if strategy is None:
|
|
798
|
+
strategy = default_strategy
|
|
799
|
+
if cooldown is None:
|
|
800
|
+
cooldown = default_cooldown
|
|
801
|
+
|
|
802
|
+
# Calculate endpoint count for worker scaling
|
|
803
|
+
endpoint_count = len(endpoints) if endpoints else 1
|
|
804
|
+
|
|
805
|
+
# Set dynamic max_workers default based on backend type and endpoint count
|
|
806
|
+
if max_workers is None:
|
|
807
|
+
if embedding_backend == "litellm":
|
|
808
|
+
if endpoint_count > 1:
|
|
809
|
+
max_workers = endpoint_count * 2 # No cap, scale with endpoints
|
|
810
|
+
else:
|
|
811
|
+
max_workers = 4
|
|
812
|
+
else:
|
|
813
|
+
max_workers = 1
|
|
814
|
+
|
|
447
815
|
# Discover all _index.db files
|
|
448
816
|
index_files = discover_all_index_dbs(index_root)
|
|
449
817
|
|
|
@@ -473,10 +841,18 @@ def generate_embeddings_recursive(
|
|
|
473
841
|
|
|
474
842
|
result = generate_embeddings(
|
|
475
843
|
index_path,
|
|
844
|
+
embedding_backend=embedding_backend,
|
|
476
845
|
model_profile=model_profile,
|
|
477
846
|
force=force,
|
|
478
847
|
chunk_size=chunk_size,
|
|
848
|
+
overlap=overlap,
|
|
479
849
|
progress_callback=None, # Don't cascade callbacks
|
|
850
|
+
use_gpu=use_gpu,
|
|
851
|
+
max_tokens_per_batch=max_tokens_per_batch,
|
|
852
|
+
max_workers=max_workers,
|
|
853
|
+
endpoints=endpoints,
|
|
854
|
+
strategy=strategy,
|
|
855
|
+
cooldown=cooldown,
|
|
480
856
|
)
|
|
481
857
|
|
|
482
858
|
all_results.append({
|
|
@@ -497,9 +873,8 @@ def generate_embeddings_recursive(
|
|
|
497
873
|
# Final cleanup after processing all indexes
|
|
498
874
|
# Each generate_embeddings() call does its own cleanup, but do a final one to be safe
|
|
499
875
|
try:
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
gc.collect()
|
|
876
|
+
_cleanup_fastembed_resources()
|
|
877
|
+
gc.collect()
|
|
503
878
|
except Exception:
|
|
504
879
|
pass
|
|
505
880
|
|
|
@@ -525,7 +900,7 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
|
|
|
525
900
|
index_root: Root index directory
|
|
526
901
|
|
|
527
902
|
Returns:
|
|
528
|
-
Aggregated status with coverage statistics
|
|
903
|
+
Aggregated status with coverage statistics, model info, and timestamps
|
|
529
904
|
"""
|
|
530
905
|
index_files = discover_all_index_dbs(index_root)
|
|
531
906
|
|
|
@@ -541,6 +916,7 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
|
|
|
541
916
|
"coverage_percent": 0.0,
|
|
542
917
|
"indexes_with_embeddings": 0,
|
|
543
918
|
"indexes_without_embeddings": 0,
|
|
919
|
+
"model_info": None,
|
|
544
920
|
},
|
|
545
921
|
}
|
|
546
922
|
|
|
@@ -548,6 +924,8 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
|
|
|
548
924
|
files_with_embeddings = 0
|
|
549
925
|
total_chunks = 0
|
|
550
926
|
indexes_with_embeddings = 0
|
|
927
|
+
model_info = None
|
|
928
|
+
latest_updated_at = None
|
|
551
929
|
|
|
552
930
|
for index_path in index_files:
|
|
553
931
|
status = check_index_embeddings(index_path)
|
|
@@ -559,6 +937,40 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
|
|
|
559
937
|
if result["has_embeddings"]:
|
|
560
938
|
indexes_with_embeddings += 1
|
|
561
939
|
|
|
940
|
+
# Get model config from first index with embeddings (they should all match)
|
|
941
|
+
if model_info is None:
|
|
942
|
+
try:
|
|
943
|
+
from codexlens.semantic.vector_store import VectorStore
|
|
944
|
+
with VectorStore(index_path) as vs:
|
|
945
|
+
config = vs.get_model_config()
|
|
946
|
+
if config:
|
|
947
|
+
model_info = {
|
|
948
|
+
"model_profile": config.get("model_profile"),
|
|
949
|
+
"model_name": config.get("model_name"),
|
|
950
|
+
"embedding_dim": config.get("embedding_dim"),
|
|
951
|
+
"backend": config.get("backend"),
|
|
952
|
+
"created_at": config.get("created_at"),
|
|
953
|
+
"updated_at": config.get("updated_at"),
|
|
954
|
+
}
|
|
955
|
+
latest_updated_at = config.get("updated_at")
|
|
956
|
+
except Exception:
|
|
957
|
+
pass
|
|
958
|
+
else:
|
|
959
|
+
# Track the latest updated_at across all indexes
|
|
960
|
+
try:
|
|
961
|
+
from codexlens.semantic.vector_store import VectorStore
|
|
962
|
+
with VectorStore(index_path) as vs:
|
|
963
|
+
config = vs.get_model_config()
|
|
964
|
+
if config and config.get("updated_at"):
|
|
965
|
+
if latest_updated_at is None or config["updated_at"] > latest_updated_at:
|
|
966
|
+
latest_updated_at = config["updated_at"]
|
|
967
|
+
except Exception:
|
|
968
|
+
pass
|
|
969
|
+
|
|
970
|
+
# Update model_info with latest timestamp
|
|
971
|
+
if model_info and latest_updated_at:
|
|
972
|
+
model_info["updated_at"] = latest_updated_at
|
|
973
|
+
|
|
562
974
|
return {
|
|
563
975
|
"success": True,
|
|
564
976
|
"result": {
|
|
@@ -570,6 +982,7 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
|
|
|
570
982
|
"coverage_percent": round((files_with_embeddings / total_files * 100) if total_files > 0 else 0, 1),
|
|
571
983
|
"indexes_with_embeddings": indexes_with_embeddings,
|
|
572
984
|
"indexes_without_embeddings": len(index_files) - indexes_with_embeddings,
|
|
985
|
+
"model_info": model_info,
|
|
573
986
|
},
|
|
574
987
|
}
|
|
575
988
|
|
|
@@ -633,3 +1046,194 @@ def get_embedding_stats_summary(index_root: Path) -> Dict[str, any]:
|
|
|
633
1046
|
"indexes": index_stats,
|
|
634
1047
|
},
|
|
635
1048
|
}
|
|
1049
|
+
|
|
1050
|
+
|
|
1051
|
+
def scan_for_model_conflicts(
|
|
1052
|
+
index_root: Path,
|
|
1053
|
+
target_backend: str,
|
|
1054
|
+
target_model: str,
|
|
1055
|
+
) -> Dict[str, any]:
|
|
1056
|
+
"""Scan for model conflicts across all indexes in a directory.
|
|
1057
|
+
|
|
1058
|
+
Checks if any existing embeddings were generated with a different
|
|
1059
|
+
backend or model than the target configuration.
|
|
1060
|
+
|
|
1061
|
+
Args:
|
|
1062
|
+
index_root: Root index directory to scan
|
|
1063
|
+
target_backend: Target embedding backend (fastembed or litellm)
|
|
1064
|
+
target_model: Target model profile/name
|
|
1065
|
+
|
|
1066
|
+
Returns:
|
|
1067
|
+
Dictionary with:
|
|
1068
|
+
- has_conflict: True if any index has different model config
|
|
1069
|
+
- existing_config: Config from first index with embeddings (if any)
|
|
1070
|
+
- target_config: The requested configuration
|
|
1071
|
+
- conflicts: List of conflicting index paths with their configs
|
|
1072
|
+
- indexes_with_embeddings: Count of indexes that have embeddings
|
|
1073
|
+
"""
|
|
1074
|
+
index_files = discover_all_index_dbs(index_root)
|
|
1075
|
+
|
|
1076
|
+
if not index_files:
|
|
1077
|
+
return {
|
|
1078
|
+
"has_conflict": False,
|
|
1079
|
+
"existing_config": None,
|
|
1080
|
+
"target_config": {"backend": target_backend, "model": target_model},
|
|
1081
|
+
"conflicts": [],
|
|
1082
|
+
"indexes_with_embeddings": 0,
|
|
1083
|
+
}
|
|
1084
|
+
|
|
1085
|
+
conflicts = []
|
|
1086
|
+
existing_config = None
|
|
1087
|
+
indexes_with_embeddings = 0
|
|
1088
|
+
|
|
1089
|
+
for index_path in index_files:
|
|
1090
|
+
try:
|
|
1091
|
+
from codexlens.semantic.vector_store import VectorStore
|
|
1092
|
+
|
|
1093
|
+
with VectorStore(index_path) as vs:
|
|
1094
|
+
config = vs.get_model_config()
|
|
1095
|
+
if config and config.get("model_profile"):
|
|
1096
|
+
indexes_with_embeddings += 1
|
|
1097
|
+
|
|
1098
|
+
# Store first existing config as reference
|
|
1099
|
+
if existing_config is None:
|
|
1100
|
+
existing_config = {
|
|
1101
|
+
"backend": config.get("backend"),
|
|
1102
|
+
"model": config.get("model_profile"),
|
|
1103
|
+
"model_name": config.get("model_name"),
|
|
1104
|
+
"embedding_dim": config.get("embedding_dim"),
|
|
1105
|
+
}
|
|
1106
|
+
|
|
1107
|
+
# Check for conflict: different backend OR different model
|
|
1108
|
+
existing_backend = config.get("backend", "")
|
|
1109
|
+
existing_model = config.get("model_profile", "")
|
|
1110
|
+
|
|
1111
|
+
if existing_backend != target_backend or existing_model != target_model:
|
|
1112
|
+
conflicts.append({
|
|
1113
|
+
"path": str(index_path),
|
|
1114
|
+
"existing": {
|
|
1115
|
+
"backend": existing_backend,
|
|
1116
|
+
"model": existing_model,
|
|
1117
|
+
"model_name": config.get("model_name"),
|
|
1118
|
+
},
|
|
1119
|
+
})
|
|
1120
|
+
except Exception as e:
|
|
1121
|
+
logger.debug(f"Failed to check model config for {index_path}: {e}")
|
|
1122
|
+
continue
|
|
1123
|
+
|
|
1124
|
+
return {
|
|
1125
|
+
"has_conflict": len(conflicts) > 0,
|
|
1126
|
+
"existing_config": existing_config,
|
|
1127
|
+
"target_config": {"backend": target_backend, "model": target_model},
|
|
1128
|
+
"conflicts": conflicts,
|
|
1129
|
+
"indexes_with_embeddings": indexes_with_embeddings,
|
|
1130
|
+
}
|
|
1131
|
+
|
|
1132
|
+
|
|
1133
|
+
def _get_global_settings_path() -> Path:
|
|
1134
|
+
"""Get the path to global embedding settings file."""
|
|
1135
|
+
return Path.home() / ".codexlens" / "embedding_lock.json"
|
|
1136
|
+
|
|
1137
|
+
|
|
1138
|
+
def get_locked_model_config() -> Optional[Dict[str, Any]]:
|
|
1139
|
+
"""Get the globally locked embedding model configuration.
|
|
1140
|
+
|
|
1141
|
+
Returns:
|
|
1142
|
+
Dictionary with backend and model if locked, None otherwise.
|
|
1143
|
+
"""
|
|
1144
|
+
settings_path = _get_global_settings_path()
|
|
1145
|
+
if not settings_path.exists():
|
|
1146
|
+
return None
|
|
1147
|
+
|
|
1148
|
+
try:
|
|
1149
|
+
with open(settings_path, "r", encoding="utf-8") as f:
|
|
1150
|
+
data = json.load(f)
|
|
1151
|
+
if data.get("locked"):
|
|
1152
|
+
return {
|
|
1153
|
+
"backend": data.get("backend"),
|
|
1154
|
+
"model": data.get("model"),
|
|
1155
|
+
"locked_at": data.get("locked_at"),
|
|
1156
|
+
}
|
|
1157
|
+
except (json.JSONDecodeError, OSError):
|
|
1158
|
+
pass
|
|
1159
|
+
|
|
1160
|
+
return None
|
|
1161
|
+
|
|
1162
|
+
|
|
1163
|
+
def set_locked_model_config(backend: str, model: str) -> None:
|
|
1164
|
+
"""Set the globally locked embedding model configuration.
|
|
1165
|
+
|
|
1166
|
+
This is called after the first successful embedding generation
|
|
1167
|
+
to lock the model for all future operations.
|
|
1168
|
+
|
|
1169
|
+
Args:
|
|
1170
|
+
backend: Embedding backend (fastembed or litellm)
|
|
1171
|
+
model: Model profile/name
|
|
1172
|
+
"""
|
|
1173
|
+
import datetime
|
|
1174
|
+
|
|
1175
|
+
settings_path = _get_global_settings_path()
|
|
1176
|
+
settings_path.parent.mkdir(parents=True, exist_ok=True)
|
|
1177
|
+
|
|
1178
|
+
data = {
|
|
1179
|
+
"locked": True,
|
|
1180
|
+
"backend": backend,
|
|
1181
|
+
"model": model,
|
|
1182
|
+
"locked_at": datetime.datetime.now().isoformat(),
|
|
1183
|
+
}
|
|
1184
|
+
|
|
1185
|
+
with open(settings_path, "w", encoding="utf-8") as f:
|
|
1186
|
+
json.dump(data, f, indent=2)
|
|
1187
|
+
|
|
1188
|
+
|
|
1189
|
+
def clear_locked_model_config() -> bool:
|
|
1190
|
+
"""Clear the globally locked embedding model configuration.
|
|
1191
|
+
|
|
1192
|
+
Returns:
|
|
1193
|
+
True if lock was cleared, False if no lock existed.
|
|
1194
|
+
"""
|
|
1195
|
+
settings_path = _get_global_settings_path()
|
|
1196
|
+
if settings_path.exists():
|
|
1197
|
+
settings_path.unlink()
|
|
1198
|
+
return True
|
|
1199
|
+
return False
|
|
1200
|
+
|
|
1201
|
+
|
|
1202
|
+
def check_global_model_lock(
|
|
1203
|
+
target_backend: str,
|
|
1204
|
+
target_model: str,
|
|
1205
|
+
) -> Dict[str, Any]:
|
|
1206
|
+
"""Check if the target model conflicts with the global lock.
|
|
1207
|
+
|
|
1208
|
+
Args:
|
|
1209
|
+
target_backend: Requested embedding backend
|
|
1210
|
+
target_model: Requested model profile/name
|
|
1211
|
+
|
|
1212
|
+
Returns:
|
|
1213
|
+
Dictionary with:
|
|
1214
|
+
- is_locked: True if a global lock exists
|
|
1215
|
+
- has_conflict: True if target differs from locked config
|
|
1216
|
+
- locked_config: The locked configuration (if any)
|
|
1217
|
+
- target_config: The requested configuration
|
|
1218
|
+
"""
|
|
1219
|
+
locked_config = get_locked_model_config()
|
|
1220
|
+
|
|
1221
|
+
if locked_config is None:
|
|
1222
|
+
return {
|
|
1223
|
+
"is_locked": False,
|
|
1224
|
+
"has_conflict": False,
|
|
1225
|
+
"locked_config": None,
|
|
1226
|
+
"target_config": {"backend": target_backend, "model": target_model},
|
|
1227
|
+
}
|
|
1228
|
+
|
|
1229
|
+
has_conflict = (
|
|
1230
|
+
locked_config["backend"] != target_backend or
|
|
1231
|
+
locked_config["model"] != target_model
|
|
1232
|
+
)
|
|
1233
|
+
|
|
1234
|
+
return {
|
|
1235
|
+
"is_locked": True,
|
|
1236
|
+
"has_conflict": has_conflict,
|
|
1237
|
+
"locked_config": locked_config,
|
|
1238
|
+
"target_config": {"backend": target_backend, "model": target_model},
|
|
1239
|
+
}
|