claude-code-workflow 6.2.7 → 6.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (208) hide show
  1. package/.claude/CLAUDE.md +16 -1
  2. package/.claude/workflows/cli-templates/protocols/analysis-protocol.md +11 -4
  3. package/.claude/workflows/cli-templates/protocols/write-protocol.md +10 -75
  4. package/.claude/workflows/cli-tools-usage.md +14 -24
  5. package/.codex/AGENTS.md +51 -1
  6. package/.codex/prompts/compact.md +378 -0
  7. package/.gemini/GEMINI.md +57 -20
  8. package/ccw/dist/cli.d.ts.map +1 -1
  9. package/ccw/dist/cli.js +21 -8
  10. package/ccw/dist/cli.js.map +1 -1
  11. package/ccw/dist/commands/cli.d.ts +2 -0
  12. package/ccw/dist/commands/cli.d.ts.map +1 -1
  13. package/ccw/dist/commands/cli.js +129 -8
  14. package/ccw/dist/commands/cli.js.map +1 -1
  15. package/ccw/dist/commands/hook.d.ts.map +1 -1
  16. package/ccw/dist/commands/hook.js +3 -2
  17. package/ccw/dist/commands/hook.js.map +1 -1
  18. package/ccw/dist/config/litellm-api-config-manager.d.ts +180 -0
  19. package/ccw/dist/config/litellm-api-config-manager.d.ts.map +1 -0
  20. package/ccw/dist/config/litellm-api-config-manager.js +770 -0
  21. package/ccw/dist/config/litellm-api-config-manager.js.map +1 -0
  22. package/ccw/dist/config/provider-models.d.ts +73 -0
  23. package/ccw/dist/config/provider-models.d.ts.map +1 -0
  24. package/ccw/dist/config/provider-models.js +172 -0
  25. package/ccw/dist/config/provider-models.js.map +1 -0
  26. package/ccw/dist/core/cache-manager.d.ts.map +1 -1
  27. package/ccw/dist/core/cache-manager.js +3 -5
  28. package/ccw/dist/core/cache-manager.js.map +1 -1
  29. package/ccw/dist/core/dashboard-generator.d.ts.map +1 -1
  30. package/ccw/dist/core/dashboard-generator.js +3 -1
  31. package/ccw/dist/core/dashboard-generator.js.map +1 -1
  32. package/ccw/dist/core/routes/cli-routes.d.ts.map +1 -1
  33. package/ccw/dist/core/routes/cli-routes.js +169 -0
  34. package/ccw/dist/core/routes/cli-routes.js.map +1 -1
  35. package/ccw/dist/core/routes/codexlens-routes.d.ts.map +1 -1
  36. package/ccw/dist/core/routes/codexlens-routes.js +234 -18
  37. package/ccw/dist/core/routes/codexlens-routes.js.map +1 -1
  38. package/ccw/dist/core/routes/hooks-routes.d.ts.map +1 -1
  39. package/ccw/dist/core/routes/hooks-routes.js +30 -32
  40. package/ccw/dist/core/routes/hooks-routes.js.map +1 -1
  41. package/ccw/dist/core/routes/litellm-api-routes.d.ts +21 -0
  42. package/ccw/dist/core/routes/litellm-api-routes.d.ts.map +1 -0
  43. package/ccw/dist/core/routes/litellm-api-routes.js +780 -0
  44. package/ccw/dist/core/routes/litellm-api-routes.js.map +1 -0
  45. package/ccw/dist/core/routes/litellm-routes.d.ts +20 -0
  46. package/ccw/dist/core/routes/litellm-routes.d.ts.map +1 -0
  47. package/ccw/dist/core/routes/litellm-routes.js +85 -0
  48. package/ccw/dist/core/routes/litellm-routes.js.map +1 -0
  49. package/ccw/dist/core/routes/mcp-routes.js +2 -2
  50. package/ccw/dist/core/routes/mcp-routes.js.map +1 -1
  51. package/ccw/dist/core/routes/status-routes.d.ts.map +1 -1
  52. package/ccw/dist/core/routes/status-routes.js +39 -0
  53. package/ccw/dist/core/routes/status-routes.js.map +1 -1
  54. package/ccw/dist/core/routes/system-routes.js +1 -1
  55. package/ccw/dist/core/routes/system-routes.js.map +1 -1
  56. package/ccw/dist/core/server.d.ts.map +1 -1
  57. package/ccw/dist/core/server.js +15 -1
  58. package/ccw/dist/core/server.js.map +1 -1
  59. package/ccw/dist/mcp-server/index.js +1 -1
  60. package/ccw/dist/mcp-server/index.js.map +1 -1
  61. package/ccw/dist/tools/claude-cli-tools.d.ts +82 -0
  62. package/ccw/dist/tools/claude-cli-tools.d.ts.map +1 -0
  63. package/ccw/dist/tools/claude-cli-tools.js +216 -0
  64. package/ccw/dist/tools/claude-cli-tools.js.map +1 -0
  65. package/ccw/dist/tools/cli-executor.d.ts.map +1 -1
  66. package/ccw/dist/tools/cli-executor.js +76 -14
  67. package/ccw/dist/tools/cli-executor.js.map +1 -1
  68. package/ccw/dist/tools/codex-lens.d.ts +9 -2
  69. package/ccw/dist/tools/codex-lens.d.ts.map +1 -1
  70. package/ccw/dist/tools/codex-lens.js +114 -9
  71. package/ccw/dist/tools/codex-lens.js.map +1 -1
  72. package/ccw/dist/tools/context-cache-store.d.ts +136 -0
  73. package/ccw/dist/tools/context-cache-store.d.ts.map +1 -0
  74. package/ccw/dist/tools/context-cache-store.js +256 -0
  75. package/ccw/dist/tools/context-cache-store.js.map +1 -0
  76. package/ccw/dist/tools/context-cache.d.ts +56 -0
  77. package/ccw/dist/tools/context-cache.d.ts.map +1 -0
  78. package/ccw/dist/tools/context-cache.js +294 -0
  79. package/ccw/dist/tools/context-cache.js.map +1 -0
  80. package/ccw/dist/tools/core-memory.d.ts.map +1 -1
  81. package/ccw/dist/tools/core-memory.js +33 -19
  82. package/ccw/dist/tools/core-memory.js.map +1 -1
  83. package/ccw/dist/tools/index.d.ts.map +1 -1
  84. package/ccw/dist/tools/index.js +2 -0
  85. package/ccw/dist/tools/index.js.map +1 -1
  86. package/ccw/dist/tools/litellm-client.d.ts +85 -0
  87. package/ccw/dist/tools/litellm-client.d.ts.map +1 -0
  88. package/ccw/dist/tools/litellm-client.js +188 -0
  89. package/ccw/dist/tools/litellm-client.js.map +1 -0
  90. package/ccw/dist/tools/litellm-executor.d.ts +34 -0
  91. package/ccw/dist/tools/litellm-executor.d.ts.map +1 -0
  92. package/ccw/dist/tools/litellm-executor.js +192 -0
  93. package/ccw/dist/tools/litellm-executor.js.map +1 -0
  94. package/ccw/dist/tools/pattern-parser.d.ts +55 -0
  95. package/ccw/dist/tools/pattern-parser.d.ts.map +1 -0
  96. package/ccw/dist/tools/pattern-parser.js +237 -0
  97. package/ccw/dist/tools/pattern-parser.js.map +1 -0
  98. package/ccw/dist/tools/smart-search.d.ts +1 -0
  99. package/ccw/dist/tools/smart-search.d.ts.map +1 -1
  100. package/ccw/dist/tools/smart-search.js +117 -41
  101. package/ccw/dist/tools/smart-search.js.map +1 -1
  102. package/ccw/dist/types/litellm-api-config.d.ts +294 -0
  103. package/ccw/dist/types/litellm-api-config.d.ts.map +1 -0
  104. package/ccw/dist/types/litellm-api-config.js +8 -0
  105. package/ccw/dist/types/litellm-api-config.js.map +1 -0
  106. package/ccw/src/cli.ts +258 -244
  107. package/ccw/src/commands/cli.ts +153 -9
  108. package/ccw/src/commands/hook.ts +3 -2
  109. package/ccw/src/config/.litellm-api-config-manager.ts.2025-12-23T11-57-43-727Z.bak +441 -0
  110. package/ccw/src/config/litellm-api-config-manager.ts +1012 -0
  111. package/ccw/src/config/provider-models.ts +222 -0
  112. package/ccw/src/core/cache-manager.ts +292 -294
  113. package/ccw/src/core/dashboard-generator.ts +3 -1
  114. package/ccw/src/core/routes/cli-routes.ts +192 -0
  115. package/ccw/src/core/routes/codexlens-routes.ts +241 -19
  116. package/ccw/src/core/routes/hooks-routes.ts +399 -405
  117. package/ccw/src/core/routes/litellm-api-routes.ts +930 -0
  118. package/ccw/src/core/routes/litellm-routes.ts +107 -0
  119. package/ccw/src/core/routes/mcp-routes.ts +1271 -1271
  120. package/ccw/src/core/routes/status-routes.ts +51 -0
  121. package/ccw/src/core/routes/system-routes.ts +1 -1
  122. package/ccw/src/core/server.ts +15 -1
  123. package/ccw/src/mcp-server/index.ts +1 -1
  124. package/ccw/src/templates/dashboard-css/12-cli-legacy.css +44 -0
  125. package/ccw/src/templates/dashboard-css/31-api-settings.css +2265 -0
  126. package/ccw/src/templates/dashboard-js/components/cli-history.js +15 -8
  127. package/ccw/src/templates/dashboard-js/components/cli-status.js +323 -9
  128. package/ccw/src/templates/dashboard-js/components/navigation.js +329 -313
  129. package/ccw/src/templates/dashboard-js/i18n.js +583 -1
  130. package/ccw/src/templates/dashboard-js/views/api-settings.js +3362 -0
  131. package/ccw/src/templates/dashboard-js/views/cli-manager.js +199 -24
  132. package/ccw/src/templates/dashboard-js/views/codexlens-manager.js +1265 -27
  133. package/ccw/src/templates/dashboard.html +840 -831
  134. package/ccw/src/tools/claude-cli-tools.ts +300 -0
  135. package/ccw/src/tools/cli-executor.ts +83 -14
  136. package/ccw/src/tools/codex-lens.ts +146 -9
  137. package/ccw/src/tools/context-cache-store.ts +368 -0
  138. package/ccw/src/tools/context-cache.ts +393 -0
  139. package/ccw/src/tools/core-memory.ts +33 -19
  140. package/ccw/src/tools/index.ts +2 -0
  141. package/ccw/src/tools/litellm-client.ts +246 -0
  142. package/ccw/src/tools/litellm-executor.ts +241 -0
  143. package/ccw/src/tools/pattern-parser.ts +329 -0
  144. package/ccw/src/tools/smart-search.ts +142 -41
  145. package/ccw/src/types/litellm-api-config.ts +402 -0
  146. package/ccw-litellm/README.md +180 -0
  147. package/ccw-litellm/pyproject.toml +35 -0
  148. package/ccw-litellm/src/ccw_litellm/__init__.py +47 -0
  149. package/ccw-litellm/src/ccw_litellm/__pycache__/__init__.cpython-313.pyc +0 -0
  150. package/ccw-litellm/src/ccw_litellm/__pycache__/cli.cpython-313.pyc +0 -0
  151. package/ccw-litellm/src/ccw_litellm/cli.py +108 -0
  152. package/ccw-litellm/src/ccw_litellm/clients/__init__.py +12 -0
  153. package/ccw-litellm/src/ccw_litellm/clients/__pycache__/__init__.cpython-313.pyc +0 -0
  154. package/ccw-litellm/src/ccw_litellm/clients/__pycache__/litellm_embedder.cpython-313.pyc +0 -0
  155. package/ccw-litellm/src/ccw_litellm/clients/__pycache__/litellm_llm.cpython-313.pyc +0 -0
  156. package/ccw-litellm/src/ccw_litellm/clients/litellm_embedder.py +251 -0
  157. package/ccw-litellm/src/ccw_litellm/clients/litellm_llm.py +165 -0
  158. package/ccw-litellm/src/ccw_litellm/config/__init__.py +22 -0
  159. package/ccw-litellm/src/ccw_litellm/config/__pycache__/__init__.cpython-313.pyc +0 -0
  160. package/ccw-litellm/src/ccw_litellm/config/__pycache__/loader.cpython-313.pyc +0 -0
  161. package/ccw-litellm/src/ccw_litellm/config/__pycache__/models.cpython-313.pyc +0 -0
  162. package/ccw-litellm/src/ccw_litellm/config/loader.py +316 -0
  163. package/ccw-litellm/src/ccw_litellm/config/models.py +130 -0
  164. package/ccw-litellm/src/ccw_litellm/interfaces/__init__.py +14 -0
  165. package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/__init__.cpython-313.pyc +0 -0
  166. package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/embedder.cpython-313.pyc +0 -0
  167. package/ccw-litellm/src/ccw_litellm/interfaces/__pycache__/llm.cpython-313.pyc +0 -0
  168. package/ccw-litellm/src/ccw_litellm/interfaces/embedder.py +52 -0
  169. package/ccw-litellm/src/ccw_litellm/interfaces/llm.py +45 -0
  170. package/codex-lens/src/codexlens/__pycache__/config.cpython-313.pyc +0 -0
  171. package/codex-lens/src/codexlens/cli/__pycache__/commands.cpython-313.pyc +0 -0
  172. package/codex-lens/src/codexlens/cli/__pycache__/embedding_manager.cpython-313.pyc +0 -0
  173. package/codex-lens/src/codexlens/cli/__pycache__/model_manager.cpython-313.pyc +0 -0
  174. package/codex-lens/src/codexlens/cli/__pycache__/output.cpython-313.pyc +0 -0
  175. package/codex-lens/src/codexlens/cli/commands.py +378 -23
  176. package/codex-lens/src/codexlens/cli/embedding_manager.py +660 -56
  177. package/codex-lens/src/codexlens/cli/model_manager.py +31 -18
  178. package/codex-lens/src/codexlens/cli/output.py +12 -1
  179. package/codex-lens/src/codexlens/config.py +93 -0
  180. package/codex-lens/src/codexlens/search/__pycache__/chain_search.cpython-313.pyc +0 -0
  181. package/codex-lens/src/codexlens/search/__pycache__/hybrid_search.cpython-313.pyc +0 -0
  182. package/codex-lens/src/codexlens/search/__pycache__/ranking.cpython-313.pyc +0 -0
  183. package/codex-lens/src/codexlens/search/chain_search.py +6 -2
  184. package/codex-lens/src/codexlens/search/hybrid_search.py +44 -21
  185. package/codex-lens/src/codexlens/search/ranking.py +1 -1
  186. package/codex-lens/src/codexlens/semantic/__init__.py +42 -0
  187. package/codex-lens/src/codexlens/semantic/__pycache__/__init__.cpython-313.pyc +0 -0
  188. package/codex-lens/src/codexlens/semantic/__pycache__/base.cpython-313.pyc +0 -0
  189. package/codex-lens/src/codexlens/semantic/__pycache__/chunker.cpython-313.pyc +0 -0
  190. package/codex-lens/src/codexlens/semantic/__pycache__/embedder.cpython-313.pyc +0 -0
  191. package/codex-lens/src/codexlens/semantic/__pycache__/factory.cpython-313.pyc +0 -0
  192. package/codex-lens/src/codexlens/semantic/__pycache__/gpu_support.cpython-313.pyc +0 -0
  193. package/codex-lens/src/codexlens/semantic/__pycache__/litellm_embedder.cpython-313.pyc +0 -0
  194. package/codex-lens/src/codexlens/semantic/__pycache__/vector_store.cpython-313.pyc +0 -0
  195. package/codex-lens/src/codexlens/semantic/base.py +61 -0
  196. package/codex-lens/src/codexlens/semantic/chunker.py +43 -20
  197. package/codex-lens/src/codexlens/semantic/embedder.py +60 -13
  198. package/codex-lens/src/codexlens/semantic/factory.py +98 -0
  199. package/codex-lens/src/codexlens/semantic/gpu_support.py +225 -3
  200. package/codex-lens/src/codexlens/semantic/litellm_embedder.py +144 -0
  201. package/codex-lens/src/codexlens/semantic/rotational_embedder.py +434 -0
  202. package/codex-lens/src/codexlens/semantic/vector_store.py +33 -8
  203. package/codex-lens/src/codexlens/storage/__pycache__/path_mapper.cpython-313.pyc +0 -0
  204. package/codex-lens/src/codexlens/storage/migrations/__pycache__/migration_004_dual_fts.cpython-313.pyc +0 -0
  205. package/codex-lens/src/codexlens/storage/path_mapper.py +27 -1
  206. package/package.json +15 -5
  207. package/.codex/prompts.zip +0 -0
  208. package/ccw/package.json +0 -65
@@ -0,0 +1,434 @@
1
+ """Rotational embedder for multi-endpoint API load balancing.
2
+
3
+ Provides intelligent load balancing across multiple LiteLLM embedding endpoints
4
+ to maximize throughput while respecting rate limits.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ import random
11
+ import threading
12
+ import time
13
+ from dataclasses import dataclass, field
14
+ from enum import Enum
15
+ from typing import Any, Dict, Iterable, List, Optional
16
+
17
+ import numpy as np
18
+
19
+ from .base import BaseEmbedder
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class EndpointStatus(Enum):
25
+ """Status of an API endpoint."""
26
+ AVAILABLE = "available"
27
+ COOLING = "cooling" # Rate limited, temporarily unavailable
28
+ FAILED = "failed" # Permanent failure (auth error, etc.)
29
+
30
+
31
+ class SelectionStrategy(Enum):
32
+ """Strategy for selecting endpoints."""
33
+ ROUND_ROBIN = "round_robin"
34
+ LATENCY_AWARE = "latency_aware"
35
+ WEIGHTED_RANDOM = "weighted_random"
36
+
37
+
38
+ @dataclass
39
+ class EndpointConfig:
40
+ """Configuration for a single API endpoint."""
41
+ model: str
42
+ api_key: Optional[str] = None
43
+ api_base: Optional[str] = None
44
+ weight: float = 1.0 # Higher weight = more requests
45
+ max_concurrent: int = 4 # Max concurrent requests to this endpoint
46
+
47
+
48
+ @dataclass
49
+ class EndpointState:
50
+ """Runtime state for an endpoint."""
51
+ config: EndpointConfig
52
+ embedder: Any = None # LiteLLMEmbedderWrapper instance
53
+
54
+ # Health metrics
55
+ status: EndpointStatus = EndpointStatus.AVAILABLE
56
+ cooldown_until: float = 0.0 # Unix timestamp when cooldown ends
57
+
58
+ # Performance metrics
59
+ total_requests: int = 0
60
+ total_failures: int = 0
61
+ avg_latency_ms: float = 0.0
62
+ last_latency_ms: float = 0.0
63
+
64
+ # Concurrency tracking
65
+ active_requests: int = 0
66
+ lock: threading.Lock = field(default_factory=threading.Lock)
67
+
68
+ def is_available(self) -> bool:
69
+ """Check if endpoint is available for requests."""
70
+ if self.status == EndpointStatus.FAILED:
71
+ return False
72
+ if self.status == EndpointStatus.COOLING:
73
+ if time.time() >= self.cooldown_until:
74
+ self.status = EndpointStatus.AVAILABLE
75
+ return True
76
+ return False
77
+ return True
78
+
79
+ def set_cooldown(self, seconds: float) -> None:
80
+ """Put endpoint in cooldown state."""
81
+ self.status = EndpointStatus.COOLING
82
+ self.cooldown_until = time.time() + seconds
83
+ logger.warning(f"Endpoint {self.config.model} cooling down for {seconds:.1f}s")
84
+
85
+ def mark_failed(self) -> None:
86
+ """Mark endpoint as permanently failed."""
87
+ self.status = EndpointStatus.FAILED
88
+ logger.error(f"Endpoint {self.config.model} marked as failed")
89
+
90
+ def record_success(self, latency_ms: float) -> None:
91
+ """Record successful request."""
92
+ self.total_requests += 1
93
+ self.last_latency_ms = latency_ms
94
+ # Exponential moving average for latency
95
+ alpha = 0.3
96
+ if self.avg_latency_ms == 0:
97
+ self.avg_latency_ms = latency_ms
98
+ else:
99
+ self.avg_latency_ms = alpha * latency_ms + (1 - alpha) * self.avg_latency_ms
100
+
101
+ def record_failure(self) -> None:
102
+ """Record failed request."""
103
+ self.total_requests += 1
104
+ self.total_failures += 1
105
+
106
+ @property
107
+ def health_score(self) -> float:
108
+ """Calculate health score (0-1) based on metrics."""
109
+ if not self.is_available():
110
+ return 0.0
111
+
112
+ # Base score from success rate
113
+ if self.total_requests > 0:
114
+ success_rate = 1 - (self.total_failures / self.total_requests)
115
+ else:
116
+ success_rate = 1.0
117
+
118
+ # Latency factor (faster = higher score)
119
+ # Normalize: 100ms = 1.0, 1000ms = 0.1
120
+ if self.avg_latency_ms > 0:
121
+ latency_factor = min(1.0, 100 / self.avg_latency_ms)
122
+ else:
123
+ latency_factor = 1.0
124
+
125
+ # Availability factor (less concurrent = more available)
126
+ if self.config.max_concurrent > 0:
127
+ availability = 1 - (self.active_requests / self.config.max_concurrent)
128
+ else:
129
+ availability = 1.0
130
+
131
+ # Combined score with weights
132
+ return (success_rate * 0.4 + latency_factor * 0.3 + availability * 0.3) * self.config.weight
133
+
134
+
135
+ class RotationalEmbedder(BaseEmbedder):
136
+ """Embedder that load balances across multiple API endpoints.
137
+
138
+ Features:
139
+ - Intelligent endpoint selection based on latency and health
140
+ - Automatic failover on rate limits (429) and server errors
141
+ - Cooldown management to respect rate limits
142
+ - Thread-safe concurrent request handling
143
+
144
+ Args:
145
+ endpoints: List of endpoint configurations
146
+ strategy: Selection strategy (default: latency_aware)
147
+ default_cooldown: Default cooldown seconds for rate limits (default: 60)
148
+ max_retries: Maximum retry attempts across all endpoints (default: 3)
149
+ """
150
+
151
+ def __init__(
152
+ self,
153
+ endpoints: List[EndpointConfig],
154
+ strategy: SelectionStrategy = SelectionStrategy.LATENCY_AWARE,
155
+ default_cooldown: float = 60.0,
156
+ max_retries: int = 3,
157
+ ) -> None:
158
+ if not endpoints:
159
+ raise ValueError("At least one endpoint must be provided")
160
+
161
+ self.strategy = strategy
162
+ self.default_cooldown = default_cooldown
163
+ self.max_retries = max_retries
164
+
165
+ # Initialize endpoint states
166
+ self._endpoints: List[EndpointState] = []
167
+ self._lock = threading.Lock()
168
+ self._round_robin_index = 0
169
+
170
+ # Create embedder instances for each endpoint
171
+ from .litellm_embedder import LiteLLMEmbedderWrapper
172
+
173
+ for config in endpoints:
174
+ # Build kwargs for LiteLLMEmbedderWrapper
175
+ kwargs: Dict[str, Any] = {}
176
+ if config.api_key:
177
+ kwargs["api_key"] = config.api_key
178
+ if config.api_base:
179
+ kwargs["api_base"] = config.api_base
180
+
181
+ try:
182
+ embedder = LiteLLMEmbedderWrapper(model=config.model, **kwargs)
183
+ state = EndpointState(config=config, embedder=embedder)
184
+ self._endpoints.append(state)
185
+ logger.info(f"Initialized endpoint: {config.model}")
186
+ except Exception as e:
187
+ logger.error(f"Failed to initialize endpoint {config.model}: {e}")
188
+
189
+ if not self._endpoints:
190
+ raise ValueError("Failed to initialize any endpoints")
191
+
192
+ # Cache embedding properties from first endpoint
193
+ self._embedding_dim = self._endpoints[0].embedder.embedding_dim
194
+ self._model_name = f"rotational({len(self._endpoints)} endpoints)"
195
+ self._max_tokens = self._endpoints[0].embedder.max_tokens
196
+
197
+ @property
198
+ def embedding_dim(self) -> int:
199
+ """Return embedding dimensions."""
200
+ return self._embedding_dim
201
+
202
+ @property
203
+ def model_name(self) -> str:
204
+ """Return model name."""
205
+ return self._model_name
206
+
207
+ @property
208
+ def max_tokens(self) -> int:
209
+ """Return maximum token limit."""
210
+ return self._max_tokens
211
+
212
+ @property
213
+ def endpoint_count(self) -> int:
214
+ """Return number of configured endpoints."""
215
+ return len(self._endpoints)
216
+
217
+ @property
218
+ def available_endpoint_count(self) -> int:
219
+ """Return number of available endpoints."""
220
+ return sum(1 for ep in self._endpoints if ep.is_available())
221
+
222
+ def get_endpoint_stats(self) -> List[Dict[str, Any]]:
223
+ """Get statistics for all endpoints."""
224
+ stats = []
225
+ for ep in self._endpoints:
226
+ stats.append({
227
+ "model": ep.config.model,
228
+ "status": ep.status.value,
229
+ "total_requests": ep.total_requests,
230
+ "total_failures": ep.total_failures,
231
+ "avg_latency_ms": round(ep.avg_latency_ms, 2),
232
+ "health_score": round(ep.health_score, 3),
233
+ "active_requests": ep.active_requests,
234
+ })
235
+ return stats
236
+
237
+ def _select_endpoint(self) -> Optional[EndpointState]:
238
+ """Select best available endpoint based on strategy."""
239
+ available = [ep for ep in self._endpoints if ep.is_available()]
240
+
241
+ if not available:
242
+ return None
243
+
244
+ if self.strategy == SelectionStrategy.ROUND_ROBIN:
245
+ with self._lock:
246
+ self._round_robin_index = (self._round_robin_index + 1) % len(available)
247
+ return available[self._round_robin_index]
248
+
249
+ elif self.strategy == SelectionStrategy.LATENCY_AWARE:
250
+ # Sort by health score (descending) and pick top candidate
251
+ # Add small random factor to prevent thundering herd
252
+ scored = [(ep, ep.health_score + random.uniform(0, 0.1)) for ep in available]
253
+ scored.sort(key=lambda x: x[1], reverse=True)
254
+ return scored[0][0]
255
+
256
+ elif self.strategy == SelectionStrategy.WEIGHTED_RANDOM:
257
+ # Weighted random selection based on health scores
258
+ scores = [ep.health_score for ep in available]
259
+ total = sum(scores)
260
+ if total == 0:
261
+ return random.choice(available)
262
+
263
+ weights = [s / total for s in scores]
264
+ return random.choices(available, weights=weights, k=1)[0]
265
+
266
+ return available[0]
267
+
268
+ def _parse_retry_after(self, error: Exception) -> Optional[float]:
269
+ """Extract Retry-After value from error if available."""
270
+ error_str = str(error)
271
+
272
+ # Try to find Retry-After in error message
273
+ import re
274
+ match = re.search(r'[Rr]etry[- ][Aa]fter[:\s]+(\d+)', error_str)
275
+ if match:
276
+ return float(match.group(1))
277
+
278
+ return None
279
+
280
+ def _is_rate_limit_error(self, error: Exception) -> bool:
281
+ """Check if error is a rate limit error."""
282
+ error_str = str(error).lower()
283
+ return any(x in error_str for x in ["429", "rate limit", "too many requests"])
284
+
285
+ def _is_retryable_error(self, error: Exception) -> bool:
286
+ """Check if error is retryable (not auth/config error)."""
287
+ error_str = str(error).lower()
288
+ # Retryable errors
289
+ if any(x in error_str for x in ["429", "rate limit", "502", "503", "504",
290
+ "timeout", "connection", "service unavailable"]):
291
+ return True
292
+ # Non-retryable errors (auth, config)
293
+ if any(x in error_str for x in ["401", "403", "invalid", "authentication",
294
+ "unauthorized", "api key"]):
295
+ return False
296
+ # Default to retryable for unknown errors
297
+ return True
298
+
299
+ def embed_to_numpy(self, texts: str | Iterable[str], **kwargs) -> np.ndarray:
300
+ """Embed texts using load-balanced endpoint selection.
301
+
302
+ Args:
303
+ texts: Single text or iterable of texts to embed.
304
+ **kwargs: Additional arguments passed to underlying embedder.
305
+
306
+ Returns:
307
+ numpy.ndarray: Array of shape (n_texts, embedding_dim) containing embeddings.
308
+
309
+ Raises:
310
+ RuntimeError: If all endpoints fail after retries.
311
+ """
312
+ if isinstance(texts, str):
313
+ texts = [texts]
314
+ else:
315
+ texts = list(texts)
316
+
317
+ last_error: Optional[Exception] = None
318
+ tried_endpoints: set = set()
319
+
320
+ for attempt in range(self.max_retries + 1):
321
+ endpoint = self._select_endpoint()
322
+
323
+ if endpoint is None:
324
+ # All endpoints unavailable, wait for shortest cooldown
325
+ min_cooldown = min(
326
+ (ep.cooldown_until - time.time() for ep in self._endpoints
327
+ if ep.status == EndpointStatus.COOLING),
328
+ default=self.default_cooldown
329
+ )
330
+ if min_cooldown > 0 and attempt < self.max_retries:
331
+ wait_time = min(min_cooldown, 30) # Cap wait at 30s
332
+ logger.warning(f"All endpoints busy, waiting {wait_time:.1f}s...")
333
+ time.sleep(wait_time)
334
+ continue
335
+ break
336
+
337
+ # Track tried endpoints to avoid infinite loops
338
+ endpoint_id = id(endpoint)
339
+ if endpoint_id in tried_endpoints and len(tried_endpoints) >= len(self._endpoints):
340
+ # Already tried all endpoints
341
+ break
342
+ tried_endpoints.add(endpoint_id)
343
+
344
+ # Acquire slot
345
+ with endpoint.lock:
346
+ endpoint.active_requests += 1
347
+
348
+ try:
349
+ start_time = time.time()
350
+ result = endpoint.embedder.embed_to_numpy(texts, **kwargs)
351
+ latency_ms = (time.time() - start_time) * 1000
352
+
353
+ # Record success
354
+ endpoint.record_success(latency_ms)
355
+
356
+ return result
357
+
358
+ except Exception as e:
359
+ last_error = e
360
+ endpoint.record_failure()
361
+
362
+ if self._is_rate_limit_error(e):
363
+ # Rate limited - set cooldown
364
+ retry_after = self._parse_retry_after(e) or self.default_cooldown
365
+ endpoint.set_cooldown(retry_after)
366
+ logger.warning(f"Endpoint {endpoint.config.model} rate limited, "
367
+ f"cooling for {retry_after}s")
368
+
369
+ elif not self._is_retryable_error(e):
370
+ # Permanent failure (auth error, etc.)
371
+ endpoint.mark_failed()
372
+ logger.error(f"Endpoint {endpoint.config.model} failed permanently: {e}")
373
+
374
+ else:
375
+ # Temporary error - short cooldown
376
+ endpoint.set_cooldown(5.0)
377
+ logger.warning(f"Endpoint {endpoint.config.model} error: {e}")
378
+
379
+ finally:
380
+ with endpoint.lock:
381
+ endpoint.active_requests -= 1
382
+
383
+ # All retries exhausted
384
+ available = self.available_endpoint_count
385
+ raise RuntimeError(
386
+ f"All embedding attempts failed after {self.max_retries + 1} tries. "
387
+ f"Available endpoints: {available}/{len(self._endpoints)}. "
388
+ f"Last error: {last_error}"
389
+ )
390
+
391
+
392
+ def create_rotational_embedder(
393
+ endpoints_config: List[Dict[str, Any]],
394
+ strategy: str = "latency_aware",
395
+ default_cooldown: float = 60.0,
396
+ ) -> RotationalEmbedder:
397
+ """Factory function to create RotationalEmbedder from config dicts.
398
+
399
+ Args:
400
+ endpoints_config: List of endpoint configuration dicts with keys:
401
+ - model: Model identifier (required)
402
+ - api_key: API key (optional)
403
+ - api_base: API base URL (optional)
404
+ - weight: Request weight (optional, default 1.0)
405
+ - max_concurrent: Max concurrent requests (optional, default 4)
406
+ strategy: Selection strategy name (round_robin, latency_aware, weighted_random)
407
+ default_cooldown: Default cooldown seconds for rate limits
408
+
409
+ Returns:
410
+ Configured RotationalEmbedder instance
411
+
412
+ Example config:
413
+ endpoints_config = [
414
+ {"model": "openai/text-embedding-3-small", "api_key": "sk-..."},
415
+ {"model": "azure/my-embedding", "api_base": "https://...", "api_key": "..."},
416
+ ]
417
+ """
418
+ endpoints = []
419
+ for cfg in endpoints_config:
420
+ endpoints.append(EndpointConfig(
421
+ model=cfg["model"],
422
+ api_key=cfg.get("api_key"),
423
+ api_base=cfg.get("api_base"),
424
+ weight=cfg.get("weight", 1.0),
425
+ max_concurrent=cfg.get("max_concurrent", 4),
426
+ ))
427
+
428
+ strategy_enum = SelectionStrategy[strategy.upper()]
429
+
430
+ return RotationalEmbedder(
431
+ endpoints=endpoints,
432
+ strategy=strategy_enum,
433
+ default_cooldown=default_cooldown,
434
+ )
@@ -123,12 +123,34 @@ class VectorStore:
123
123
  model_profile TEXT NOT NULL,
124
124
  model_name TEXT NOT NULL,
125
125
  embedding_dim INTEGER NOT NULL,
126
+ backend TEXT NOT NULL DEFAULT 'fastembed',
126
127
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
127
128
  updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
128
129
  )
129
130
  """)
131
+
132
+ # Migration: Add backend column to existing tables
133
+ self._migrate_backend_column(conn)
134
+
130
135
  conn.commit()
131
136
 
137
+ def _migrate_backend_column(self, conn: sqlite3.Connection) -> None:
138
+ """Add backend column to existing embeddings_config table if not present.
139
+
140
+ Args:
141
+ conn: Active SQLite connection
142
+ """
143
+ # Check if backend column exists
144
+ cursor = conn.execute("PRAGMA table_info(embeddings_config)")
145
+ columns = [row[1] for row in cursor.fetchall()]
146
+
147
+ if 'backend' not in columns:
148
+ logger.info("Migrating embeddings_config table: adding backend column")
149
+ conn.execute("""
150
+ ALTER TABLE embeddings_config
151
+ ADD COLUMN backend TEXT NOT NULL DEFAULT 'fastembed'
152
+ """)
153
+
132
154
  def _init_ann_index(self) -> None:
133
155
  """Initialize ANN index (lazy loading from existing data)."""
134
156
  if not HNSWLIB_AVAILABLE:
@@ -947,11 +969,11 @@ class VectorStore:
947
969
  """Get the model configuration used for embeddings in this store.
948
970
 
949
971
  Returns:
950
- Dictionary with model_profile, model_name, embedding_dim, or None if not set.
972
+ Dictionary with model_profile, model_name, embedding_dim, backend, or None if not set.
951
973
  """
952
974
  with sqlite3.connect(self.db_path) as conn:
953
975
  row = conn.execute(
954
- "SELECT model_profile, model_name, embedding_dim, created_at, updated_at "
976
+ "SELECT model_profile, model_name, embedding_dim, backend, created_at, updated_at "
955
977
  "FROM embeddings_config WHERE id = 1"
956
978
  ).fetchone()
957
979
  if row:
@@ -959,13 +981,14 @@ class VectorStore:
959
981
  "model_profile": row[0],
960
982
  "model_name": row[1],
961
983
  "embedding_dim": row[2],
962
- "created_at": row[3],
963
- "updated_at": row[4],
984
+ "backend": row[3],
985
+ "created_at": row[4],
986
+ "updated_at": row[5],
964
987
  }
965
988
  return None
966
989
 
967
990
  def set_model_config(
968
- self, model_profile: str, model_name: str, embedding_dim: int
991
+ self, model_profile: str, model_name: str, embedding_dim: int, backend: str = 'fastembed'
969
992
  ) -> None:
970
993
  """Set the model configuration for embeddings in this store.
971
994
 
@@ -976,19 +999,21 @@ class VectorStore:
976
999
  model_profile: Model profile name (fast, code, minilm, etc.)
977
1000
  model_name: Full model name (e.g., jinaai/jina-embeddings-v2-base-code)
978
1001
  embedding_dim: Embedding dimension (e.g., 768)
1002
+ backend: Backend used for embeddings (fastembed or litellm, default: fastembed)
979
1003
  """
980
1004
  with sqlite3.connect(self.db_path) as conn:
981
1005
  conn.execute(
982
1006
  """
983
- INSERT INTO embeddings_config (id, model_profile, model_name, embedding_dim)
984
- VALUES (1, ?, ?, ?)
1007
+ INSERT INTO embeddings_config (id, model_profile, model_name, embedding_dim, backend)
1008
+ VALUES (1, ?, ?, ?, ?)
985
1009
  ON CONFLICT(id) DO UPDATE SET
986
1010
  model_profile = excluded.model_profile,
987
1011
  model_name = excluded.model_name,
988
1012
  embedding_dim = excluded.embedding_dim,
1013
+ backend = excluded.backend,
989
1014
  updated_at = CURRENT_TIMESTAMP
990
1015
  """,
991
- (model_profile, model_name, embedding_dim)
1016
+ (model_profile, model_name, embedding_dim, backend)
992
1017
  )
993
1018
  conn.commit()
994
1019
 
@@ -14,11 +14,37 @@ Storage Structure:
14
14
  └── _index.db # src/ directory index
15
15
  """
16
16
 
17
+ import json
18
+ import os
17
19
  import platform
18
20
  from pathlib import Path
19
21
  from typing import Optional
20
22
 
21
23
 
24
+ def _get_configured_index_root() -> Path:
25
+ """Get the index root from environment or config file.
26
+
27
+ Priority order:
28
+ 1. CODEXLENS_INDEX_DIR environment variable
29
+ 2. index_dir from ~/.codexlens/config.json
30
+ 3. Default: ~/.codexlens/indexes
31
+ """
32
+ env_override = os.getenv("CODEXLENS_INDEX_DIR")
33
+ if env_override:
34
+ return Path(env_override).expanduser().resolve()
35
+
36
+ config_file = Path.home() / ".codexlens" / "config.json"
37
+ if config_file.exists():
38
+ try:
39
+ cfg = json.loads(config_file.read_text(encoding="utf-8"))
40
+ if "index_dir" in cfg:
41
+ return Path(cfg["index_dir"]).expanduser().resolve()
42
+ except (json.JSONDecodeError, OSError):
43
+ pass
44
+
45
+ return Path.home() / ".codexlens" / "indexes"
46
+
47
+
22
48
  class PathMapper:
23
49
  """Bidirectional mapping tool for source paths ↔ index paths.
24
50
 
@@ -31,7 +57,7 @@ class PathMapper:
31
57
  index_root: Configured index root directory
32
58
  """
33
59
 
34
- DEFAULT_INDEX_ROOT = Path.home() / ".codexlens" / "indexes"
60
+ DEFAULT_INDEX_ROOT = _get_configured_index_root()
35
61
  INDEX_DB_NAME = "_index.db"
36
62
 
37
63
  def __init__(self, index_root: Optional[Path] = None):
package/package.json CHANGED
@@ -1,16 +1,18 @@
1
1
  {
2
2
  "name": "claude-code-workflow",
3
- "version": "6.2.7",
3
+ "version": "6.3.0",
4
4
  "description": "JSON-driven multi-agent development framework with intelligent CLI orchestration (Gemini/Qwen/Codex), context-first architecture, and automated workflow execution",
5
5
  "type": "module",
6
6
  "main": "ccw/src/index.js",
7
7
  "bin": {
8
- "ccw": "./ccw/bin/ccw.js"
8
+ "ccw": "./ccw/bin/ccw.js",
9
+ "ccw-mcp": "./ccw/bin/ccw-mcp.js"
9
10
  },
10
11
  "scripts": {
12
+ "build": "tsc -p ccw/tsconfig.json",
11
13
  "start": "node ccw/bin/ccw.js",
12
14
  "test": "node --test",
13
- "prepublishOnly": "echo 'Ready to publish @dyw/claude-code-workflow'"
15
+ "prepublishOnly": "npm run build && echo 'Ready to publish @dyw/claude-code-workflow'"
14
16
  },
15
17
  "keywords": [
16
18
  "claude",
@@ -45,7 +47,6 @@
45
47
  "ccw/bin/",
46
48
  "ccw/dist/",
47
49
  "ccw/src/",
48
- "ccw/package.json",
49
50
  ".claude/agents/",
50
51
  ".claude/commands/",
51
52
  ".claude/output-styles/",
@@ -59,6 +60,8 @@
59
60
  ".qwen/",
60
61
  "codex-lens/src/codexlens/",
61
62
  "codex-lens/pyproject.toml",
63
+ "ccw-litellm/src/ccw_litellm/",
64
+ "ccw-litellm/pyproject.toml",
62
65
  "CLAUDE.md",
63
66
  "README.md"
64
67
  ],
@@ -69,5 +72,12 @@
69
72
  "bugs": {
70
73
  "url": "https://github.com/catlog22/Claude-Code-Workflow/issues"
71
74
  },
72
- "homepage": "https://github.com/catlog22/Claude-Code-Workflow#readme"
75
+ "homepage": "https://github.com/catlog22/Claude-Code-Workflow#readme",
76
+ "devDependencies": {
77
+ "@types/better-sqlite3": "^7.6.12",
78
+ "@types/gradient-string": "^1.1.6",
79
+ "@types/inquirer": "^9.0.9",
80
+ "@types/node": "^25.0.1",
81
+ "typescript": "^5.9.3"
82
+ }
73
83
  }
Binary file