npm - claude-code-workflow - Versions diffs - 6.2.7 → 6.3.0 - Mend

claude-code-workflow 6.2.7 → 6.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (208) hide show

package/.claude/CLAUDE.md +16 -1
package/.claude/workflows/cli-templates/protocols/analysis-protocol.md +11 -4
package/.claude/workflows/cli-templates/protocols/write-protocol.md +10 -75
package/.claude/workflows/cli-tools-usage.md +14 -24
package/.codex/AGENTS.md +51 -1
package/.codex/prompts/compact.md +378 -0
package/.gemini/GEMINI.md +57 -20
package/ccw/dist/cli.d.ts.map +1 -1
package/ccw/dist/cli.js +21 -8
package/ccw/dist/cli.js.map +1 -1
package/ccw/dist/commands/cli.d.ts +2 -0
package/ccw/dist/commands/cli.d.ts.map +1 -1
package/ccw/dist/commands/cli.js +129 -8
package/ccw/dist/commands/cli.js.map +1 -1
package/ccw/dist/commands/hook.d.ts.map +1 -1
package/ccw/dist/commands/hook.js +3 -2
package/ccw/dist/commands/hook.js.map +1 -1
package/ccw/dist/config/litellm-api-config-manager.d.ts +180 -0
package/ccw/dist/config/litellm-api-config-manager.d.ts.map +1 -0
package/ccw/dist/config/litellm-api-config-manager.js +770 -0
package/ccw/dist/config/litellm-api-config-manager.js.map +1 -0
package/ccw/dist/config/provider-models.d.ts +73 -0
package/ccw/dist/config/provider-models.d.ts.map +1 -0
package/ccw/dist/config/provider-models.js +172 -0
package/ccw/dist/config/provider-models.js.map +1 -0
package/ccw/dist/core/cache-manager.d.ts.map +1 -1
package/ccw/dist/core/cache-manager.js +3 -5
package/ccw/dist/core/cache-manager.js.map +1 -1
package/ccw/dist/core/dashboard-generator.d.ts.map +1 -1
package/ccw/dist/core/dashboard-generator.js +3 -1
package/ccw/dist/core/dashboard-generator.js.map +1 -1
package/ccw/dist/core/routes/cli-routes.d.ts.map +1 -1
package/ccw/dist/core/routes/cli-routes.js +169 -0
package/ccw/dist/core/routes/cli-routes.js.map +1 -1
package/ccw/dist/core/routes/codexlens-routes.d.ts.map +1 -1
package/ccw/dist/core/routes/codexlens-routes.js +234 -18
package/ccw/dist/core/routes/codexlens-routes.js.map +1 -1
package/ccw/dist/core/routes/hooks-routes.d.ts.map +1 -1
package/ccw/dist/core/routes/hooks-routes.js +30 -32
package/ccw/dist/core/routes/hooks-routes.js.map +1 -1
package/ccw/dist/core/routes/litellm-api-routes.d.ts +21 -0
package/ccw/dist/core/routes/litellm-api-routes.d.ts.map +1 -0
package/ccw/dist/core/routes/litellm-api-routes.js +780 -0
package/ccw/dist/core/routes/litellm-api-routes.js.map +1 -0
package/ccw/dist/core/routes/litellm-routes.d.ts +20 -0
package/ccw/dist/core/routes/litellm-routes.d.ts.map +1 -0
package/ccw/dist/core/routes/litellm-routes.js +85 -0
package/ccw/dist/core/routes/litellm-routes.js.map +1 -0
package/ccw/dist/core/routes/mcp-routes.js +2 -2
package/ccw/dist/core/routes/mcp-routes.js.map +1 -1
package/ccw/dist/core/routes/status-routes.d.ts.map +1 -1
package/ccw/dist/core/routes/status-routes.js +39 -0
package/ccw/dist/core/routes/status-routes.js.map +1 -1
package/ccw/dist/core/routes/system-routes.js +1 -1
package/ccw/dist/core/routes/system-routes.js.map +1 -1
package/ccw/dist/core/server.d.ts.map +1 -1
package/ccw/dist/core/server.js +15 -1
package/ccw/dist/core/server.js.map +1 -1
package/ccw/dist/mcp-server/index.js +1 -1
package/ccw/dist/mcp-server/index.js.map +1 -1
package/ccw/dist/tools/claude-cli-tools.d.ts +82 -0
package/ccw/dist/tools/claude-cli-tools.d.ts.map +1 -0
package/ccw/dist/tools/claude-cli-tools.js +216 -0
package/ccw/dist/tools/claude-cli-tools.js.map +1 -0
package/ccw/dist/tools/cli-executor.d.ts.map +1 -1
package/ccw/dist/tools/cli-executor.js +76 -14
package/ccw/dist/tools/cli-executor.js.map +1 -1
package/ccw/dist/tools/codex-lens.d.ts +9 -2
package/ccw/dist/tools/codex-lens.d.ts.map +1 -1
package/ccw/dist/tools/codex-lens.js +114 -9
package/ccw/dist/tools/codex-lens.js.map +1 -1
package/ccw/dist/tools/context-cache-store.d.ts +136 -0
package/ccw/dist/tools/context-cache-store.d.ts.map +1 -0
package/ccw/dist/tools/context-cache-store.js +256 -0
package/ccw/dist/tools/context-cache-store.js.map +1 -0
package/ccw/dist/tools/context-cache.d.ts +56 -0
package/ccw/dist/tools/context-cache.d.ts.map +1 -0
package/ccw/dist/tools/context-cache.js +294 -0
package/ccw/dist/tools/context-cache.js.map +1 -0
package/ccw/dist/tools/core-memory.d.ts.map +1 -1
package/ccw/dist/tools/core-memory.js +33 -19
package/ccw/dist/tools/core-memory.js.map +1 -1
package/ccw/dist/tools/index.d.ts.map +1 -1
package/ccw/dist/tools/index.js +2 -0
package/ccw/dist/tools/index.js.map +1 -1
package/ccw/dist/tools/litellm-client.d.ts +85 -0
package/ccw/dist/tools/litellm-client.d.ts.map +1 -0
package/ccw/dist/tools/litellm-client.js +188 -0
package/ccw/dist/tools/litellm-client.js.map +1 -0
package/ccw/dist/tools/litellm-executor.d.ts +34 -0
package/ccw/dist/tools/litellm-executor.d.ts.map +1 -0
package/ccw/dist/tools/litellm-executor.js +192 -0
package/ccw/dist/tools/litellm-executor.js.map +1 -0
package/ccw/dist/tools/pattern-parser.d.ts +55 -0
package/ccw/dist/tools/pattern-parser.d.ts.map +1 -0
package/ccw/dist/tools/pattern-parser.js +237 -0
package/ccw/dist/tools/pattern-parser.js.map +1 -0
package/ccw/dist/tools/smart-search.d.ts +1 -0
package/ccw/dist/tools/smart-search.d.ts.map +1 -1
package/ccw/dist/tools/smart-search.js +117 -41
package/ccw/dist/tools/smart-search.js.map +1 -1
package/ccw/dist/types/litellm-api-config.d.ts +294 -0
package/ccw/dist/types/litellm-api-config.d.ts.map +1 -0
package/ccw/dist/types/litellm-api-config.js +8 -0
package/ccw/dist/types/litellm-api-config.js.map +1 -0
package/ccw/src/cli.ts +258 -244
package/ccw/src/commands/cli.ts +153 -9
package/ccw/src/commands/hook.ts +3 -2
package/ccw/src/config/.litellm-api-config-manager.ts.2025-12-23T11-57-43-727Z.bak +441 -0
package/ccw/src/config/litellm-api-config-manager.ts +1012 -0
package/ccw/src/config/provider-models.ts +222 -0
package/ccw/src/core/cache-manager.ts +292 -294
package/ccw/src/core/dashboard-generator.ts +3 -1
package/ccw/src/core/routes/cli-routes.ts +192 -0
package/ccw/src/core/routes/codexlens-routes.ts +241 -19
package/ccw/src/core/routes/hooks-routes.ts +399 -405
package/ccw/src/core/routes/litellm-api-routes.ts +930 -0
package/ccw/src/core/routes/litellm-routes.ts +107 -0
package/ccw/src/core/routes/mcp-routes.ts +1271 -1271
package/ccw/src/core/routes/status-routes.ts +51 -0
package/ccw/src/core/routes/system-routes.ts +1 -1
package/ccw/src/core/server.ts +15 -1
package/ccw/src/mcp-server/index.ts +1 -1
package/ccw/src/templates/dashboard-css/12-cli-legacy.css +44 -0
package/ccw/src/templates/dashboard-css/31-api-settings.css +2265 -0
package/ccw/src/templates/dashboard-js/components/cli-history.js +15 -8
package/ccw/src/templates/dashboard-js/components/cli-status.js +323 -9
package/ccw/src/templates/dashboard-js/components/navigation.js +329 -313
package/ccw/src/templates/dashboard-js/i18n.js +583 -1
package/ccw/src/templates/dashboard-js/views/api-settings.js +3362 -0
package/ccw/src/templates/dashboard-js/views/cli-manager.js +199 -24
package/ccw/src/templates/dashboard-js/views/codexlens-manager.js +1265 -27
package/ccw/src/templates/dashboard.html +840 -831
package/ccw/src/tools/claude-cli-tools.ts +300 -0
package/ccw/src/tools/cli-executor.ts +83 -14
package/ccw/src/tools/codex-lens.ts +146 -9
package/ccw/src/tools/context-cache-store.ts +368 -0
package/ccw/src/tools/context-cache.ts +393 -0
package/ccw/src/tools/core-memory.ts +33 -19
package/ccw/src/tools/index.ts +2 -0
package/ccw/src/tools/litellm-client.ts +246 -0
package/ccw/src/tools/litellm-executor.ts +241 -0
package/ccw/src/tools/pattern-parser.ts +329 -0
package/ccw/src/tools/smart-search.ts +142 -41
package/ccw/src/types/litellm-api-config.ts +402 -0
package/ccw-litellm/README.md +180 -0
package/ccw-litellm/pyproject.toml +35 -0
package/ccw-litellm/src/ccw_litellm/__init__.py +47 -0
package/ccw-litellm/src/ccw_litellm/__pycache__/__init__.cpython-313.pyc +0 -0
package/ccw-litellm/src/ccw_litellm/__pycache__/cli.cpython-313.pyc +0 -0
package/ccw-litellm/src/ccw_litellm/cli.py +108 -0
package/ccw-litellm/src/ccw_litellm/clients/__init__.py +12 -0
package/ccw-litellm/src/ccw_litellm/clients/__pycache__/__init__.cpython-313.pyc +0 -0
package/ccw-litellm/src/ccw_litellm/clients/__pycache__/litellm_embedder.cpython-313.pyc +0 -0
package/ccw-litellm/src/ccw_litellm/clients/__pycache__/litellm_llm.cpython-313.pyc +0 -0
package/ccw-litellm/src/ccw_litellm/clients/litellm_embedder.py +251 -0
package/ccw-litellm/src/ccw_litellm/clients/litellm_llm.py +165 -0
package/ccw-litellm/src/ccw_litellm/config/__init__.py +22 -0
package/ccw-litellm/src/ccw_litellm/config/__pycache__/__init__.cpython-313.pyc +0 -0
package/ccw-litellm/src/ccw_litellm/config/__pycache__/loader.cpython-313.pyc +0 -0
package/ccw-litellm/src/ccw_litellm/config/__pycache__/models.cpython-313.pyc +0 -0
package/ccw-litellm/src/ccw_litellm/config/loader.py +316 -0
package/ccw-litellm/src/ccw_litellm/config/models.py +130 -0
package/ccw-litellm/src/ccw_litellm/interfaces/__init__.py +14 -0
package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/__init__.cpython-313.pyc +0 -0
package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/embedder.cpython-313.pyc +0 -0
package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/llm.cpython-313.pyc +0 -0
package/ccw-litellm/src/ccw_litellm/interfaces/embedder.py +52 -0
package/ccw-litellm/src/ccw_litellm/interfaces/llm.py +45 -0
package/codex-lens/src/codexlens/__pycache__/config.cpython-313.pyc +0 -0
package/codex-lens/src/codexlens/cli/__pycache__/commands.cpython-313.pyc +0 -0
package/codex-lens/src/codexlens/cli/__pycache__/embedding_manager.cpython-313.pyc +0 -0
package/codex-lens/src/codexlens/cli/__pycache__/model_manager.cpython-313.pyc +0 -0
package/codex-lens/src/codexlens/cli/__pycache__/output.cpython-313.pyc +0 -0
package/codex-lens/src/codexlens/cli/commands.py +378 -23
package/codex-lens/src/codexlens/cli/embedding_manager.py +660 -56
package/codex-lens/src/codexlens/cli/model_manager.py +31 -18
package/codex-lens/src/codexlens/cli/output.py +12 -1
package/codex-lens/src/codexlens/config.py +93 -0
package/codex-lens/src/codexlens/search/__pycache__/chain_search.cpython-313.pyc +0 -0
package/codex-lens/src/codexlens/search/__pycache__/hybrid_search.cpython-313.pyc +0 -0
package/codex-lens/src/codexlens/search/__pycache__/ranking.cpython-313.pyc +0 -0
package/codex-lens/src/codexlens/search/chain_search.py +6 -2
package/codex-lens/src/codexlens/search/hybrid_search.py +44 -21
package/codex-lens/src/codexlens/search/ranking.py +1 -1
package/codex-lens/src/codexlens/semantic/__init__.py +42 -0
package/codex-lens/src/codexlens/semantic/__pycache__/__init__.cpython-313.pyc +0 -0
package/codex-lens/src/codexlens/semantic/__pycache__/base.cpython-313.pyc +0 -0
package/codex-lens/src/codexlens/semantic/__pycache__/chunker.cpython-313.pyc +0 -0
package/codex-lens/src/codexlens/semantic/__pycache__/embedder.cpython-313.pyc +0 -0
package/codex-lens/src/codexlens/semantic/__pycache__/factory.cpython-313.pyc +0 -0
package/codex-lens/src/codexlens/semantic/__pycache__/gpu_support.cpython-313.pyc +0 -0
package/codex-lens/src/codexlens/semantic/__pycache__/litellm_embedder.cpython-313.pyc +0 -0
package/codex-lens/src/codexlens/semantic/__pycache__/vector_store.cpython-313.pyc +0 -0
package/codex-lens/src/codexlens/semantic/base.py +61 -0
package/codex-lens/src/codexlens/semantic/chunker.py +43 -20
package/codex-lens/src/codexlens/semantic/embedder.py +60 -13
package/codex-lens/src/codexlens/semantic/factory.py +98 -0
package/codex-lens/src/codexlens/semantic/gpu_support.py +225 -3
package/codex-lens/src/codexlens/semantic/litellm_embedder.py +144 -0
package/codex-lens/src/codexlens/semantic/rotational_embedder.py +434 -0
package/codex-lens/src/codexlens/semantic/vector_store.py +33 -8
package/codex-lens/src/codexlens/storage/__pycache__/path_mapper.cpython-313.pyc +0 -0
package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_004_dual_fts.cpython-313.pyc +0 -0
package/codex-lens/src/codexlens/storage/path_mapper.py +27 -1
package/package.json +15 -5
package/.codex/prompts.zip +0 -0
package/ccw/package.json +0 -65

package/codex-lens/src/codexlens/semantic/rotational_embedder.py ADDED Viewed

@@ -0,0 +1,434 @@
+"""Rotational embedder for multi-endpoint API load balancing.
+Provides intelligent load balancing across multiple LiteLLM embedding endpoints
+to maximize throughput while respecting rate limits.
+"""
+from __future__ import annotations
+import logging
+import random
+import threading
+import time
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Any, Dict, Iterable, List, Optional
+import numpy as np
+from .base import BaseEmbedder
+logger = logging.getLogger(__name__)
+class EndpointStatus(Enum):
+    """Status of an API endpoint."""
+    AVAILABLE = "available"
+    COOLING = "cooling"  # Rate limited, temporarily unavailable
+    FAILED = "failed"    # Permanent failure (auth error, etc.)
+class SelectionStrategy(Enum):
+    """Strategy for selecting endpoints."""
+    ROUND_ROBIN = "round_robin"
+    LATENCY_AWARE = "latency_aware"
+    WEIGHTED_RANDOM = "weighted_random"
+@dataclass
+class EndpointConfig:
+    """Configuration for a single API endpoint."""
+    model: str
+    api_key: Optional[str] = None
+    api_base: Optional[str] = None
+    weight: float = 1.0  # Higher weight = more requests
+    max_concurrent: int = 4  # Max concurrent requests to this endpoint
+@dataclass
+class EndpointState:
+    """Runtime state for an endpoint."""
+    config: EndpointConfig
+    embedder: Any = None  # LiteLLMEmbedderWrapper instance
+    # Health metrics
+    status: EndpointStatus = EndpointStatus.AVAILABLE
+    cooldown_until: float = 0.0  # Unix timestamp when cooldown ends
+    # Performance metrics
+    total_requests: int = 0
+    total_failures: int = 0
+    avg_latency_ms: float = 0.0
+    last_latency_ms: float = 0.0
+    # Concurrency tracking
+    active_requests: int = 0
+    lock: threading.Lock = field(default_factory=threading.Lock)
+    def is_available(self) -> bool:
+        """Check if endpoint is available for requests."""
+        if self.status == EndpointStatus.FAILED:
+            return False
+        if self.status == EndpointStatus.COOLING:
+            if time.time() >= self.cooldown_until:
+                self.status = EndpointStatus.AVAILABLE
+                return True
+            return False
+        return True
+    def set_cooldown(self, seconds: float) -> None:
+        """Put endpoint in cooldown state."""
+        self.status = EndpointStatus.COOLING
+        self.cooldown_until = time.time() + seconds
+        logger.warning(f"Endpoint {self.config.model} cooling down for {seconds:.1f}s")
+    def mark_failed(self) -> None:
+        """Mark endpoint as permanently failed."""
+        self.status = EndpointStatus.FAILED
+        logger.error(f"Endpoint {self.config.model} marked as failed")
+    def record_success(self, latency_ms: float) -> None:
+        """Record successful request."""
+        self.total_requests += 1
+        self.last_latency_ms = latency_ms
+        # Exponential moving average for latency
+        alpha = 0.3
+        if self.avg_latency_ms == 0:
+            self.avg_latency_ms = latency_ms
+        else:
+            self.avg_latency_ms = alpha * latency_ms + (1 - alpha) * self.avg_latency_ms
+    def record_failure(self) -> None:
+        """Record failed request."""
+        self.total_requests += 1
+        self.total_failures += 1
+    @property
+    def health_score(self) -> float:
+        """Calculate health score (0-1) based on metrics."""
+        if not self.is_available():
+            return 0.0
+        # Base score from success rate
+        if self.total_requests > 0:
+            success_rate = 1 - (self.total_failures / self.total_requests)
+        else:
+            success_rate = 1.0
+        # Latency factor (faster = higher score)
+        # Normalize: 100ms = 1.0, 1000ms = 0.1
+        if self.avg_latency_ms > 0:
+            latency_factor = min(1.0, 100 / self.avg_latency_ms)
+        else:
+            latency_factor = 1.0
+        # Availability factor (less concurrent = more available)
+        if self.config.max_concurrent > 0:
+            availability = 1 - (self.active_requests / self.config.max_concurrent)
+        else:
+            availability = 1.0
+        # Combined score with weights
+        return (success_rate * 0.4 + latency_factor * 0.3 + availability * 0.3) * self.config.weight
+class RotationalEmbedder(BaseEmbedder):
+    """Embedder that load balances across multiple API endpoints.
+    Features:
+    - Intelligent endpoint selection based on latency and health
+    - Automatic failover on rate limits (429) and server errors
+    - Cooldown management to respect rate limits
+    - Thread-safe concurrent request handling
+    Args:
+        endpoints: List of endpoint configurations
+        strategy: Selection strategy (default: latency_aware)
+        default_cooldown: Default cooldown seconds for rate limits (default: 60)
+        max_retries: Maximum retry attempts across all endpoints (default: 3)
+    """
+    def __init__(
+        self,
+        endpoints: List[EndpointConfig],
+        strategy: SelectionStrategy = SelectionStrategy.LATENCY_AWARE,
+        default_cooldown: float = 60.0,
+        max_retries: int = 3,
+    ) -> None:
+        if not endpoints:
+            raise ValueError("At least one endpoint must be provided")
+        self.strategy = strategy
+        self.default_cooldown = default_cooldown
+        self.max_retries = max_retries
+        # Initialize endpoint states
+        self._endpoints: List[EndpointState] = []
+        self._lock = threading.Lock()
+        self._round_robin_index = 0
+        # Create embedder instances for each endpoint
+        from .litellm_embedder import LiteLLMEmbedderWrapper
+        for config in endpoints:
+            # Build kwargs for LiteLLMEmbedderWrapper
+            kwargs: Dict[str, Any] = {}
+            if config.api_key:
+                kwargs["api_key"] = config.api_key
+            if config.api_base:
+                kwargs["api_base"] = config.api_base
+            try:
+                embedder = LiteLLMEmbedderWrapper(model=config.model, **kwargs)
+                state = EndpointState(config=config, embedder=embedder)
+                self._endpoints.append(state)
+                logger.info(f"Initialized endpoint: {config.model}")
+            except Exception as e:
+                logger.error(f"Failed to initialize endpoint {config.model}: {e}")
+        if not self._endpoints:
+            raise ValueError("Failed to initialize any endpoints")
+        # Cache embedding properties from first endpoint
+        self._embedding_dim = self._endpoints[0].embedder.embedding_dim
+        self._model_name = f"rotational({len(self._endpoints)} endpoints)"
+        self._max_tokens = self._endpoints[0].embedder.max_tokens
+    @property
+    def embedding_dim(self) -> int:
+        """Return embedding dimensions."""
+        return self._embedding_dim
+    @property
+    def model_name(self) -> str:
+        """Return model name."""
+        return self._model_name
+    @property
+    def max_tokens(self) -> int:
+        """Return maximum token limit."""
+        return self._max_tokens
+    @property
+    def endpoint_count(self) -> int:
+        """Return number of configured endpoints."""
+        return len(self._endpoints)
+    @property
+    def available_endpoint_count(self) -> int:
+        """Return number of available endpoints."""
+        return sum(1 for ep in self._endpoints if ep.is_available())
+    def get_endpoint_stats(self) -> List[Dict[str, Any]]:
+        """Get statistics for all endpoints."""
+        stats = []
+        for ep in self._endpoints:
+            stats.append({
+                "model": ep.config.model,
+                "status": ep.status.value,
+                "total_requests": ep.total_requests,
+                "total_failures": ep.total_failures,
+                "avg_latency_ms": round(ep.avg_latency_ms, 2),
+                "health_score": round(ep.health_score, 3),
+                "active_requests": ep.active_requests,
+            })
+        return stats
+    def _select_endpoint(self) -> Optional[EndpointState]:
+        """Select best available endpoint based on strategy."""
+        available = [ep for ep in self._endpoints if ep.is_available()]
+        if not available:
+            return None
+        if self.strategy == SelectionStrategy.ROUND_ROBIN:
+            with self._lock:
+                self._round_robin_index = (self._round_robin_index + 1) % len(available)
+                return available[self._round_robin_index]
+        elif self.strategy == SelectionStrategy.LATENCY_AWARE:
+            # Sort by health score (descending) and pick top candidate
+            # Add small random factor to prevent thundering herd
+            scored = [(ep, ep.health_score + random.uniform(0, 0.1)) for ep in available]
+            scored.sort(key=lambda x: x[1], reverse=True)
+            return scored[0][0]
+        elif self.strategy == SelectionStrategy.WEIGHTED_RANDOM:
+            # Weighted random selection based on health scores
+            scores = [ep.health_score for ep in available]
+            total = sum(scores)
+            if total == 0:
+                return random.choice(available)
+            weights = [s / total for s in scores]
+            return random.choices(available, weights=weights, k=1)[0]
+        return available[0]
+    def _parse_retry_after(self, error: Exception) -> Optional[float]:
+        """Extract Retry-After value from error if available."""
+        error_str = str(error)
+        # Try to find Retry-After in error message
+        import re
+        match = re.search(r'[Rr]etry[- ][Aa]fter[:\s]+(\d+)', error_str)
+        if match:
+            return float(match.group(1))
+        return None
+    def _is_rate_limit_error(self, error: Exception) -> bool:
+        """Check if error is a rate limit error."""
+        error_str = str(error).lower()
+        return any(x in error_str for x in ["429", "rate limit", "too many requests"])
+    def _is_retryable_error(self, error: Exception) -> bool:
+        """Check if error is retryable (not auth/config error)."""
+        error_str = str(error).lower()
+        # Retryable errors
+        if any(x in error_str for x in ["429", "rate limit", "502", "503", "504",
+                                         "timeout", "connection", "service unavailable"]):
+            return True
+        # Non-retryable errors (auth, config)
+        if any(x in error_str for x in ["401", "403", "invalid", "authentication",
+                                         "unauthorized", "api key"]):
+            return False
+        # Default to retryable for unknown errors
+        return True
+    def embed_to_numpy(self, texts: str | Iterable[str], **kwargs) -> np.ndarray:
+        """Embed texts using load-balanced endpoint selection.
+        Args:
+            texts: Single text or iterable of texts to embed.
+            **kwargs: Additional arguments passed to underlying embedder.
+        Returns:
+            numpy.ndarray: Array of shape (n_texts, embedding_dim) containing embeddings.
+        Raises:
+            RuntimeError: If all endpoints fail after retries.
+        """
+        if isinstance(texts, str):
+            texts = [texts]
+        else:
+            texts = list(texts)
+        last_error: Optional[Exception] = None
+        tried_endpoints: set = set()
+        for attempt in range(self.max_retries + 1):
+            endpoint = self._select_endpoint()
+            if endpoint is None:
+                # All endpoints unavailable, wait for shortest cooldown
+                min_cooldown = min(
+                    (ep.cooldown_until - time.time() for ep in self._endpoints
+                     if ep.status == EndpointStatus.COOLING),
+                    default=self.default_cooldown
+                )
+                if min_cooldown > 0 and attempt < self.max_retries:
+                    wait_time = min(min_cooldown, 30)  # Cap wait at 30s
+                    logger.warning(f"All endpoints busy, waiting {wait_time:.1f}s...")
+                    time.sleep(wait_time)
+                    continue
+                break
+            # Track tried endpoints to avoid infinite loops
+            endpoint_id = id(endpoint)
+            if endpoint_id in tried_endpoints and len(tried_endpoints) >= len(self._endpoints):
+                # Already tried all endpoints
+                break
+            tried_endpoints.add(endpoint_id)
+            # Acquire slot
+            with endpoint.lock:
+                endpoint.active_requests += 1
+            try:
+                start_time = time.time()
+                result = endpoint.embedder.embed_to_numpy(texts, **kwargs)
+                latency_ms = (time.time() - start_time) * 1000
+                # Record success
+                endpoint.record_success(latency_ms)
+                return result
+            except Exception as e:
+                last_error = e
+                endpoint.record_failure()
+                if self._is_rate_limit_error(e):
+                    # Rate limited - set cooldown
+                    retry_after = self._parse_retry_after(e) or self.default_cooldown
+                    endpoint.set_cooldown(retry_after)
+                    logger.warning(f"Endpoint {endpoint.config.model} rate limited, "
+                                   f"cooling for {retry_after}s")
+                elif not self._is_retryable_error(e):
+                    # Permanent failure (auth error, etc.)
+                    endpoint.mark_failed()
+                    logger.error(f"Endpoint {endpoint.config.model} failed permanently: {e}")
+                else:
+                    # Temporary error - short cooldown
+                    endpoint.set_cooldown(5.0)
+                    logger.warning(f"Endpoint {endpoint.config.model} error: {e}")
+            finally:
+                with endpoint.lock:
+                    endpoint.active_requests -= 1
+        # All retries exhausted
+        available = self.available_endpoint_count
+        raise RuntimeError(
+            f"All embedding attempts failed after {self.max_retries + 1} tries. "
+            f"Available endpoints: {available}/{len(self._endpoints)}. "
+            f"Last error: {last_error}"
+        )
+def create_rotational_embedder(
+    endpoints_config: List[Dict[str, Any]],
+    strategy: str = "latency_aware",
+    default_cooldown: float = 60.0,
+) -> RotationalEmbedder:
+    """Factory function to create RotationalEmbedder from config dicts.
+    Args:
+        endpoints_config: List of endpoint configuration dicts with keys:
+            - model: Model identifier (required)
+            - api_key: API key (optional)
+            - api_base: API base URL (optional)
+            - weight: Request weight (optional, default 1.0)
+            - max_concurrent: Max concurrent requests (optional, default 4)
+        strategy: Selection strategy name (round_robin, latency_aware, weighted_random)
+        default_cooldown: Default cooldown seconds for rate limits
+    Returns:
+        Configured RotationalEmbedder instance
+    Example config:
+        endpoints_config = [
+            {"model": "openai/text-embedding-3-small", "api_key": "sk-..."},
+            {"model": "azure/my-embedding", "api_base": "https://...", "api_key": "..."},
+        ]
+    """
+    endpoints = []
+    for cfg in endpoints_config:
+        endpoints.append(EndpointConfig(
+            model=cfg["model"],
+            api_key=cfg.get("api_key"),
+            api_base=cfg.get("api_base"),
+            weight=cfg.get("weight", 1.0),
+            max_concurrent=cfg.get("max_concurrent", 4),
+        ))
+    strategy_enum = SelectionStrategy[strategy.upper()]
+    return RotationalEmbedder(
+        endpoints=endpoints,
+        strategy=strategy_enum,
+        default_cooldown=default_cooldown,
+    )

package/codex-lens/src/codexlens/semantic/vector_store.py CHANGED Viewed

@@ -123,12 +123,34 @@ class VectorStore:
                     model_profile TEXT NOT NULL,
                     model_name TEXT NOT NULL,
                     embedding_dim INTEGER NOT NULL,
+                    backend TEXT NOT NULL DEFAULT 'fastembed',
                     created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                     updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
                 )
             """)
+            # Migration: Add backend column to existing tables
+            self._migrate_backend_column(conn)
             conn.commit()
+    def _migrate_backend_column(self, conn: sqlite3.Connection) -> None:
+        """Add backend column to existing embeddings_config table if not present.
+        Args:
+            conn: Active SQLite connection
+        """
+        # Check if backend column exists
+        cursor = conn.execute("PRAGMA table_info(embeddings_config)")
+        columns = [row[1] for row in cursor.fetchall()]
+        if 'backend' not in columns:
+            logger.info("Migrating embeddings_config table: adding backend column")
+            conn.execute("""
+                ALTER TABLE embeddings_config
+                ADD COLUMN backend TEXT NOT NULL DEFAULT 'fastembed'
+            """)
     def _init_ann_index(self) -> None:
         """Initialize ANN index (lazy loading from existing data)."""
         if not HNSWLIB_AVAILABLE:
@@ -947,11 +969,11 @@ class VectorStore:
         """Get the model configuration used for embeddings in this store.
         Returns:
-            Dictionary with model_profile, model_name, embedding_dim, or None if not set.
+            Dictionary with model_profile, model_name, embedding_dim, backend, or None if not set.
         """
         with sqlite3.connect(self.db_path) as conn:
             row = conn.execute(
-                "SELECT model_profile, model_name, embedding_dim, created_at, updated_at "
+                "SELECT model_profile, model_name, embedding_dim, backend, created_at, updated_at "
                 "FROM embeddings_config WHERE id = 1"
             ).fetchone()
             if row:
@@ -959,13 +981,14 @@ class VectorStore:
                     "model_profile": row[0],
                     "model_name": row[1],
                     "embedding_dim": row[2],
-                    "created_at": row[3],
-                    "updated_at": row[4],
+                    "backend": row[3],
+                    "created_at": row[4],
+                    "updated_at": row[5],
                 }
         return None
     def set_model_config(
-        self, model_profile: str, model_name: str, embedding_dim: int
+        self, model_profile: str, model_name: str, embedding_dim: int, backend: str = 'fastembed'
     ) -> None:
         """Set the model configuration for embeddings in this store.
@@ -976,19 +999,21 @@ class VectorStore:
             model_profile: Model profile name (fast, code, minilm, etc.)
             model_name: Full model name (e.g., jinaai/jina-embeddings-v2-base-code)
             embedding_dim: Embedding dimension (e.g., 768)
+            backend: Backend used for embeddings (fastembed or litellm, default: fastembed)
         """
         with sqlite3.connect(self.db_path) as conn:
             conn.execute(
                 """
-                INSERT INTO embeddings_config (id, model_profile, model_name, embedding_dim)
-                VALUES (1, ?, ?, ?)
+                INSERT INTO embeddings_config (id, model_profile, model_name, embedding_dim, backend)
+                VALUES (1, ?, ?, ?, ?)
                 ON CONFLICT(id) DO UPDATE SET
                     model_profile = excluded.model_profile,
                     model_name = excluded.model_name,
                     embedding_dim = excluded.embedding_dim,
+                    backend = excluded.backend,
                     updated_at = CURRENT_TIMESTAMP
                 """,
-                (model_profile, model_name, embedding_dim)
+                (model_profile, model_name, embedding_dim, backend)
             )
             conn.commit()

package/codex-lens/src/codexlens/storage/__pycache__/path_mapper.cpython-313.pyc CHANGED Viewed

Binary file

package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_004_dual_fts.cpython-313.pyc CHANGED Viewed

Binary file

package/codex-lens/src/codexlens/storage/path_mapper.py CHANGED Viewed

@@ -14,11 +14,37 @@ Storage Structure:
                     └── _index.db      # src/ directory index
 """
+import json
+import os
 import platform
 from pathlib import Path
 from typing import Optional
+def _get_configured_index_root() -> Path:
+    """Get the index root from environment or config file.
+    Priority order:
+    1. CODEXLENS_INDEX_DIR environment variable
+    2. index_dir from ~/.codexlens/config.json
+    3. Default: ~/.codexlens/indexes
+    """
+    env_override = os.getenv("CODEXLENS_INDEX_DIR")
+    if env_override:
+        return Path(env_override).expanduser().resolve()
+    config_file = Path.home() / ".codexlens" / "config.json"
+    if config_file.exists():
+        try:
+            cfg = json.loads(config_file.read_text(encoding="utf-8"))
+            if "index_dir" in cfg:
+                return Path(cfg["index_dir"]).expanduser().resolve()
+        except (json.JSONDecodeError, OSError):
+            pass
+    return Path.home() / ".codexlens" / "indexes"
 class PathMapper:
     """Bidirectional mapping tool for source paths ↔ index paths.
@@ -31,7 +57,7 @@ class PathMapper:
         index_root: Configured index root directory
     """
-    DEFAULT_INDEX_ROOT = Path.home() / ".codexlens" / "indexes"
+    DEFAULT_INDEX_ROOT = _get_configured_index_root()
     INDEX_DB_NAME = "_index.db"
     def __init__(self, index_root: Optional[Path] = None):

package/package.json CHANGED Viewed

@@ -1,16 +1,18 @@
 {
   "name": "claude-code-workflow",
-  "version": "6.2.7",
+  "version": "6.3.0",
   "description": "JSON-driven multi-agent development framework with intelligent CLI orchestration (Gemini/Qwen/Codex), context-first architecture, and automated workflow execution",
   "type": "module",
   "main": "ccw/src/index.js",
   "bin": {
-    "ccw": "./ccw/bin/ccw.js"
+    "ccw": "./ccw/bin/ccw.js",
+    "ccw-mcp": "./ccw/bin/ccw-mcp.js"
   },
   "scripts": {
+    "build": "tsc -p ccw/tsconfig.json",
     "start": "node ccw/bin/ccw.js",
     "test": "node --test",
-    "prepublishOnly": "echo 'Ready to publish @dyw/claude-code-workflow'"
+    "prepublishOnly": "npm run build && echo 'Ready to publish @dyw/claude-code-workflow'"
   },
   "keywords": [
     "claude",
@@ -45,7 +47,6 @@
     "ccw/bin/",
     "ccw/dist/",
     "ccw/src/",
-    "ccw/package.json",
     ".claude/agents/",
     ".claude/commands/",
     ".claude/output-styles/",
@@ -59,6 +60,8 @@
     ".qwen/",
     "codex-lens/src/codexlens/",
     "codex-lens/pyproject.toml",
+    "ccw-litellm/src/ccw_litellm/",
+    "ccw-litellm/pyproject.toml",
     "CLAUDE.md",
     "README.md"
   ],
@@ -69,5 +72,12 @@
   "bugs": {
     "url": "https://github.com/catlog22/Claude-Code-Workflow/issues"
   },
-  "homepage": "https://github.com/catlog22/Claude-Code-Workflow#readme"
+  "homepage": "https://github.com/catlog22/Claude-Code-Workflow#readme",
+  "devDependencies": {
+    "@types/better-sqlite3": "^7.6.12",
+    "@types/gradient-string": "^1.1.6",
+    "@types/inquirer": "^9.0.9",
+    "@types/node": "^25.0.1",
+    "typescript": "^5.9.3"
+  }
 }

package/.codex/prompts.zip DELETED Viewed

Binary file