claude-code-workflow 6.3.4 → 6.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/issue-plan-agent.md +859 -0
- package/.claude/agents/issue-queue-agent.md +702 -0
- package/.claude/commands/issue/execute.md +453 -0
- package/.claude/commands/issue/manage.md +865 -0
- package/.claude/commands/issue/new.md +484 -0
- package/.claude/commands/issue/plan.md +421 -0
- package/.claude/commands/issue/queue.md +354 -0
- package/.claude/commands/{clean.md → workflow/clean.md} +5 -5
- package/.claude/commands/workflow/docs/analyze.md +1467 -0
- package/.claude/commands/workflow/docs/copyright.md +1265 -0
- package/.claude/commands/workflow/execute.md +0 -1
- package/.claude/commands/workflow/tools/conflict-resolution.md +76 -240
- package/.claude/commands/workflow/tools/context-gather.md +0 -2
- package/.claude/commands/workflow/tools/task-generate-agent.md +81 -8
- package/.claude/commands/workflow/tools/task-generate-tdd.md +0 -9
- package/.claude/commands/workflow/tools/test-context-gather.md +2 -3
- package/.claude/commands/workflow/tools/test-task-generate.md +0 -2
- package/.claude/skills/_shared/mermaid-utils.md +584 -0
- package/.claude/skills/command-guide/reference/agents/action-planning-agent.md +0 -2
- package/.claude/skills/command-guide/reference/commands/workflow/execute.md +1 -1
- package/.claude/skills/command-guide/reference/commands/workflow/tools/context-gather.md +1 -2
- package/.claude/skills/command-guide/reference/commands/workflow/tools/task-generate-tdd.md +1 -8
- package/.claude/skills/command-guide/reference/commands/workflow/tools/test-context-gather.md +1 -4
- package/.claude/skills/command-guide/reference/commands/workflow/tools/test-task-generate.md +0 -2
- package/.claude/skills/copyright-docs/SKILL.md +132 -0
- package/.claude/skills/copyright-docs/phases/01-metadata-collection.md +78 -0
- package/.claude/skills/copyright-docs/phases/01.5-project-exploration.md +150 -0
- package/.claude/skills/copyright-docs/phases/02-deep-analysis.md +664 -0
- package/.claude/skills/copyright-docs/phases/02.5-consolidation.md +192 -0
- package/.claude/skills/copyright-docs/phases/04-document-assembly.md +261 -0
- package/.claude/skills/copyright-docs/phases/05-compliance-refinement.md +192 -0
- package/.claude/skills/copyright-docs/specs/cpcc-requirements.md +121 -0
- package/.claude/skills/copyright-docs/templates/agent-base.md +200 -0
- package/.claude/skills/project-analyze/SKILL.md +162 -0
- package/.claude/skills/project-analyze/phases/01-requirements-discovery.md +79 -0
- package/.claude/skills/project-analyze/phases/02-project-exploration.md +176 -0
- package/.claude/skills/project-analyze/phases/03-deep-analysis.md +854 -0
- package/.claude/skills/project-analyze/phases/03.5-consolidation.md +233 -0
- package/.claude/skills/project-analyze/phases/04-report-generation.md +217 -0
- package/.claude/skills/project-analyze/phases/05-iterative-refinement.md +124 -0
- package/.claude/skills/project-analyze/specs/quality-standards.md +115 -0
- package/.claude/skills/project-analyze/specs/writing-style.md +152 -0
- package/.claude/workflows/cli-templates/schemas/conflict-resolution-schema.json +79 -65
- package/.claude/workflows/cli-templates/schemas/issue-task-jsonl-schema.json +136 -0
- package/.claude/workflows/cli-templates/schemas/issues-jsonl-schema.json +74 -0
- package/.claude/workflows/cli-templates/schemas/queue-schema.json +136 -0
- package/.claude/workflows/cli-templates/schemas/registry-schema.json +94 -0
- package/.claude/workflows/cli-templates/schemas/solution-schema.json +120 -0
- package/.claude/workflows/cli-templates/schemas/solutions-jsonl-schema.json +125 -0
- package/.codex/prompts/issue-execute.md +266 -0
- package/README.md +11 -1
- package/ccw/dist/cli.d.ts.map +1 -1
- package/ccw/dist/cli.js +25 -0
- package/ccw/dist/cli.js.map +1 -1
- package/ccw/dist/commands/cli.d.ts.map +1 -1
- package/ccw/dist/commands/cli.js +46 -8
- package/ccw/dist/commands/cli.js.map +1 -1
- package/ccw/dist/commands/issue.d.ts +21 -0
- package/ccw/dist/commands/issue.d.ts.map +1 -0
- package/ccw/dist/commands/issue.js +895 -0
- package/ccw/dist/commands/issue.js.map +1 -0
- package/ccw/dist/core/dashboard-generator-patch.js +1 -0
- package/ccw/dist/core/dashboard-generator-patch.js.map +1 -1
- package/ccw/dist/core/routes/cli-routes.js +2 -2
- package/ccw/dist/core/routes/cli-routes.js.map +1 -1
- package/ccw/dist/core/routes/issue-routes.d.ts +34 -0
- package/ccw/dist/core/routes/issue-routes.d.ts.map +1 -0
- package/ccw/dist/core/routes/issue-routes.js +487 -0
- package/ccw/dist/core/routes/issue-routes.js.map +1 -0
- package/ccw/dist/core/server.d.ts.map +1 -1
- package/ccw/dist/core/server.js +17 -2
- package/ccw/dist/core/server.js.map +1 -1
- package/ccw/dist/tools/claude-cli-tools.d.ts +7 -3
- package/ccw/dist/tools/claude-cli-tools.d.ts.map +1 -1
- package/ccw/dist/tools/claude-cli-tools.js +31 -17
- package/ccw/dist/tools/claude-cli-tools.js.map +1 -1
- package/ccw/dist/tools/smart-search.d.ts +25 -0
- package/ccw/dist/tools/smart-search.d.ts.map +1 -1
- package/ccw/dist/tools/smart-search.js +121 -17
- package/ccw/dist/tools/smart-search.js.map +1 -1
- package/ccw/src/cli.ts +26 -0
- package/ccw/src/commands/cli.ts +49 -7
- package/ccw/src/commands/issue.ts +1184 -0
- package/ccw/src/core/dashboard-generator-patch.ts +1 -0
- package/ccw/src/core/routes/cli-routes.ts +3 -3
- package/ccw/src/core/routes/issue-routes.ts +559 -0
- package/ccw/src/core/server.ts +17 -2
- package/ccw/src/templates/dashboard-css/32-issue-manager.css +2544 -0
- package/ccw/src/templates/dashboard-css/33-cli-stream-viewer.css +467 -0
- package/ccw/src/templates/dashboard-js/components/cli-history.js +40 -13
- package/ccw/src/templates/dashboard-js/components/cli-status.js +26 -2
- package/ccw/src/templates/dashboard-js/components/cli-stream-viewer.js +461 -0
- package/ccw/src/templates/dashboard-js/components/navigation.js +8 -0
- package/ccw/src/templates/dashboard-js/components/notifications.js +16 -0
- package/ccw/src/templates/dashboard-js/i18n.js +290 -2
- package/ccw/src/templates/dashboard-js/views/cli-manager.js +5 -0
- package/ccw/src/templates/dashboard-js/views/history.js +19 -4
- package/ccw/src/templates/dashboard-js/views/hook-manager.js +11 -5
- package/ccw/src/templates/dashboard-js/views/issue-manager.js +1546 -0
- package/ccw/src/templates/dashboard.html +55 -0
- package/ccw/src/tools/claude-cli-tools.ts +37 -20
- package/ccw/src/tools/smart-search.ts +157 -16
- package/codex-lens/src/codexlens/__pycache__/config.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/config.py +5 -0
- package/codex-lens/src/codexlens/search/__pycache__/hybrid_search.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/__pycache__/ranking.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/search/hybrid_search.py +144 -11
- package/codex-lens/src/codexlens/search/ranking.py +267 -1
- package/codex-lens/src/codexlens/semantic/__pycache__/chunker.cpython-313.pyc +0 -0
- package/codex-lens/src/codexlens/semantic/chunker.py +55 -10
- package/package.json +2 -2
|
@@ -7,12 +7,38 @@ results via Reciprocal Rank Fusion (RRF) algorithm.
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
9
|
import logging
|
|
10
|
+
import time
|
|
10
11
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
12
|
+
from contextlib import contextmanager
|
|
11
13
|
from pathlib import Path
|
|
12
|
-
from typing import Dict, List, Optional
|
|
14
|
+
from typing import Any, Dict, List, Optional
|
|
13
15
|
|
|
16
|
+
|
|
17
|
+
@contextmanager
|
|
18
|
+
def timer(name: str, logger: logging.Logger, level: int = logging.DEBUG):
|
|
19
|
+
"""Context manager for timing code blocks.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
name: Name of the operation being timed
|
|
23
|
+
logger: Logger instance to use
|
|
24
|
+
level: Logging level (default DEBUG)
|
|
25
|
+
"""
|
|
26
|
+
start = time.perf_counter()
|
|
27
|
+
try:
|
|
28
|
+
yield
|
|
29
|
+
finally:
|
|
30
|
+
elapsed_ms = (time.perf_counter() - start) * 1000
|
|
31
|
+
logger.log(level, "[TIMING] %s: %.2fms", name, elapsed_ms)
|
|
32
|
+
|
|
33
|
+
from codexlens.config import Config
|
|
14
34
|
from codexlens.entities import SearchResult
|
|
15
|
-
from codexlens.search.ranking import
|
|
35
|
+
from codexlens.search.ranking import (
|
|
36
|
+
apply_symbol_boost,
|
|
37
|
+
get_rrf_weights,
|
|
38
|
+
reciprocal_rank_fusion,
|
|
39
|
+
rerank_results,
|
|
40
|
+
tag_search_source,
|
|
41
|
+
)
|
|
16
42
|
from codexlens.storage.dir_index import DirIndexStore
|
|
17
43
|
|
|
18
44
|
|
|
@@ -34,14 +60,23 @@ class HybridSearchEngine:
|
|
|
34
60
|
"vector": 0.6,
|
|
35
61
|
}
|
|
36
62
|
|
|
37
|
-
def __init__(
|
|
63
|
+
def __init__(
|
|
64
|
+
self,
|
|
65
|
+
weights: Optional[Dict[str, float]] = None,
|
|
66
|
+
config: Optional[Config] = None,
|
|
67
|
+
embedder: Any = None,
|
|
68
|
+
):
|
|
38
69
|
"""Initialize hybrid search engine.
|
|
39
70
|
|
|
40
71
|
Args:
|
|
41
72
|
weights: Optional custom RRF weights (default: DEFAULT_WEIGHTS)
|
|
73
|
+
config: Optional runtime config (enables optional reranking features)
|
|
74
|
+
embedder: Optional embedder instance for embedding-based reranking
|
|
42
75
|
"""
|
|
43
76
|
self.logger = logging.getLogger(__name__)
|
|
44
77
|
self.weights = weights or self.DEFAULT_WEIGHTS.copy()
|
|
78
|
+
self._config = config
|
|
79
|
+
self.embedder = embedder
|
|
45
80
|
|
|
46
81
|
def search(
|
|
47
82
|
self,
|
|
@@ -101,7 +136,8 @@ class HybridSearchEngine:
|
|
|
101
136
|
backends["vector"] = True
|
|
102
137
|
|
|
103
138
|
# Execute parallel searches
|
|
104
|
-
|
|
139
|
+
with timer("parallel_search_total", self.logger):
|
|
140
|
+
results_map = self._search_parallel(index_path, query, backends, limit)
|
|
105
141
|
|
|
106
142
|
# Provide helpful message if pure-vector mode returns no results
|
|
107
143
|
if pure_vector and enable_vector and len(results_map.get("vector", [])) == 0:
|
|
@@ -120,11 +156,72 @@ class HybridSearchEngine:
|
|
|
120
156
|
if source in results_map
|
|
121
157
|
}
|
|
122
158
|
|
|
123
|
-
|
|
159
|
+
with timer("rrf_fusion", self.logger):
|
|
160
|
+
adaptive_weights = get_rrf_weights(query, active_weights)
|
|
161
|
+
fused_results = reciprocal_rank_fusion(results_map, adaptive_weights)
|
|
162
|
+
|
|
163
|
+
# Optional: boost results that include explicit symbol matches
|
|
164
|
+
boost_factor = (
|
|
165
|
+
self._config.symbol_boost_factor
|
|
166
|
+
if self._config is not None
|
|
167
|
+
else 1.5
|
|
168
|
+
)
|
|
169
|
+
with timer("symbol_boost", self.logger):
|
|
170
|
+
fused_results = apply_symbol_boost(
|
|
171
|
+
fused_results, boost_factor=boost_factor
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Optional: embedding-based reranking on top results
|
|
175
|
+
if self._config is not None and self._config.enable_reranking:
|
|
176
|
+
with timer("reranking", self.logger):
|
|
177
|
+
if self.embedder is None:
|
|
178
|
+
self.embedder = self._get_reranking_embedder()
|
|
179
|
+
fused_results = rerank_results(
|
|
180
|
+
query,
|
|
181
|
+
fused_results[:100],
|
|
182
|
+
self.embedder,
|
|
183
|
+
top_k=self._config.reranking_top_k,
|
|
184
|
+
)
|
|
124
185
|
|
|
125
186
|
# Apply final limit
|
|
126
187
|
return fused_results[:limit]
|
|
127
188
|
|
|
189
|
+
def _get_reranking_embedder(self) -> Any:
|
|
190
|
+
"""Create an embedder for reranking based on Config embedding settings."""
|
|
191
|
+
if self._config is None:
|
|
192
|
+
return None
|
|
193
|
+
|
|
194
|
+
try:
|
|
195
|
+
from codexlens.semantic.factory import get_embedder
|
|
196
|
+
except Exception as exc:
|
|
197
|
+
self.logger.debug("Reranking embedder unavailable: %s", exc)
|
|
198
|
+
return None
|
|
199
|
+
|
|
200
|
+
try:
|
|
201
|
+
if self._config.embedding_backend == "fastembed":
|
|
202
|
+
return get_embedder(
|
|
203
|
+
backend="fastembed",
|
|
204
|
+
profile=self._config.embedding_model,
|
|
205
|
+
use_gpu=self._config.embedding_use_gpu,
|
|
206
|
+
)
|
|
207
|
+
if self._config.embedding_backend == "litellm":
|
|
208
|
+
return get_embedder(
|
|
209
|
+
backend="litellm",
|
|
210
|
+
model=self._config.embedding_model,
|
|
211
|
+
endpoints=self._config.embedding_endpoints,
|
|
212
|
+
strategy=self._config.embedding_strategy,
|
|
213
|
+
cooldown=self._config.embedding_cooldown,
|
|
214
|
+
)
|
|
215
|
+
except Exception as exc:
|
|
216
|
+
self.logger.debug("Failed to initialize reranking embedder: %s", exc)
|
|
217
|
+
return None
|
|
218
|
+
|
|
219
|
+
self.logger.debug(
|
|
220
|
+
"Unknown embedding backend for reranking: %s",
|
|
221
|
+
self._config.embedding_backend,
|
|
222
|
+
)
|
|
223
|
+
return None
|
|
224
|
+
|
|
128
225
|
def _search_parallel(
|
|
129
226
|
self,
|
|
130
227
|
index_path: Path,
|
|
@@ -144,25 +241,30 @@ class HybridSearchEngine:
|
|
|
144
241
|
Dictionary mapping source name to results list
|
|
145
242
|
"""
|
|
146
243
|
results_map: Dict[str, List[SearchResult]] = {}
|
|
244
|
+
timing_data: Dict[str, float] = {}
|
|
147
245
|
|
|
148
246
|
# Use ThreadPoolExecutor for parallel I/O-bound searches
|
|
149
247
|
with ThreadPoolExecutor(max_workers=len(backends)) as executor:
|
|
150
|
-
# Submit search tasks
|
|
248
|
+
# Submit search tasks with timing
|
|
151
249
|
future_to_source = {}
|
|
250
|
+
submit_times = {}
|
|
152
251
|
|
|
153
252
|
if backends.get("exact"):
|
|
253
|
+
submit_times["exact"] = time.perf_counter()
|
|
154
254
|
future = executor.submit(
|
|
155
255
|
self._search_exact, index_path, query, limit
|
|
156
256
|
)
|
|
157
257
|
future_to_source[future] = "exact"
|
|
158
258
|
|
|
159
259
|
if backends.get("fuzzy"):
|
|
260
|
+
submit_times["fuzzy"] = time.perf_counter()
|
|
160
261
|
future = executor.submit(
|
|
161
262
|
self._search_fuzzy, index_path, query, limit
|
|
162
263
|
)
|
|
163
264
|
future_to_source[future] = "fuzzy"
|
|
164
265
|
|
|
165
266
|
if backends.get("vector"):
|
|
267
|
+
submit_times["vector"] = time.perf_counter()
|
|
166
268
|
future = executor.submit(
|
|
167
269
|
self._search_vector, index_path, query, limit
|
|
168
270
|
)
|
|
@@ -171,18 +273,26 @@ class HybridSearchEngine:
|
|
|
171
273
|
# Collect results as they complete
|
|
172
274
|
for future in as_completed(future_to_source):
|
|
173
275
|
source = future_to_source[future]
|
|
276
|
+
elapsed_ms = (time.perf_counter() - submit_times[source]) * 1000
|
|
277
|
+
timing_data[source] = elapsed_ms
|
|
174
278
|
try:
|
|
175
279
|
results = future.result()
|
|
176
280
|
# Tag results with source for debugging
|
|
177
281
|
tagged_results = tag_search_source(results, source)
|
|
178
282
|
results_map[source] = tagged_results
|
|
179
283
|
self.logger.debug(
|
|
180
|
-
"
|
|
284
|
+
"[TIMING] %s_search: %.2fms (%d results)",
|
|
285
|
+
source, elapsed_ms, len(results)
|
|
181
286
|
)
|
|
182
287
|
except Exception as exc:
|
|
183
288
|
self.logger.error("Search failed for %s: %s", source, exc)
|
|
184
289
|
results_map[source] = []
|
|
185
290
|
|
|
291
|
+
# Log timing summary
|
|
292
|
+
if timing_data:
|
|
293
|
+
timing_str = ", ".join(f"{k}={v:.1f}ms" for k, v in timing_data.items())
|
|
294
|
+
self.logger.debug("[TIMING] search_backends: {%s}", timing_str)
|
|
295
|
+
|
|
186
296
|
return results_map
|
|
187
297
|
|
|
188
298
|
def _search_exact(
|
|
@@ -245,6 +355,8 @@ class HybridSearchEngine:
|
|
|
245
355
|
try:
|
|
246
356
|
# Check if semantic chunks table exists
|
|
247
357
|
import sqlite3
|
|
358
|
+
|
|
359
|
+
start_check = time.perf_counter()
|
|
248
360
|
try:
|
|
249
361
|
with sqlite3.connect(index_path) as conn:
|
|
250
362
|
cursor = conn.execute(
|
|
@@ -254,6 +366,10 @@ class HybridSearchEngine:
|
|
|
254
366
|
except sqlite3.Error as e:
|
|
255
367
|
self.logger.error("Database check failed in vector search: %s", e)
|
|
256
368
|
return []
|
|
369
|
+
self.logger.debug(
|
|
370
|
+
"[TIMING] vector_table_check: %.2fms",
|
|
371
|
+
(time.perf_counter() - start_check) * 1000
|
|
372
|
+
)
|
|
257
373
|
|
|
258
374
|
if not has_semantic_table:
|
|
259
375
|
self.logger.info(
|
|
@@ -267,7 +383,12 @@ class HybridSearchEngine:
|
|
|
267
383
|
from codexlens.semantic.factory import get_embedder
|
|
268
384
|
from codexlens.semantic.vector_store import VectorStore
|
|
269
385
|
|
|
386
|
+
start_init = time.perf_counter()
|
|
270
387
|
vector_store = VectorStore(index_path)
|
|
388
|
+
self.logger.debug(
|
|
389
|
+
"[TIMING] vector_store_init: %.2fms",
|
|
390
|
+
(time.perf_counter() - start_init) * 1000
|
|
391
|
+
)
|
|
271
392
|
|
|
272
393
|
# Check if vector store has data
|
|
273
394
|
if vector_store.count_chunks() == 0:
|
|
@@ -279,6 +400,7 @@ class HybridSearchEngine:
|
|
|
279
400
|
return []
|
|
280
401
|
|
|
281
402
|
# Get stored model configuration (preferred) or auto-detect from dimension
|
|
403
|
+
start_embedder = time.perf_counter()
|
|
282
404
|
model_config = vector_store.get_model_config()
|
|
283
405
|
if model_config:
|
|
284
406
|
backend = model_config.get("backend", "fastembed")
|
|
@@ -288,7 +410,7 @@ class HybridSearchEngine:
|
|
|
288
410
|
"Using stored model config: %s backend, %s (%s, %dd)",
|
|
289
411
|
backend, model_profile, model_name, model_config["embedding_dim"]
|
|
290
412
|
)
|
|
291
|
-
|
|
413
|
+
|
|
292
414
|
# Get embedder based on backend
|
|
293
415
|
if backend == "litellm":
|
|
294
416
|
embedder = get_embedder(backend="litellm", model=model_name)
|
|
@@ -324,21 +446,32 @@ class HybridSearchEngine:
|
|
|
324
446
|
detected_dim
|
|
325
447
|
)
|
|
326
448
|
embedder = get_embedder(backend="fastembed", profile="code")
|
|
327
|
-
|
|
328
|
-
|
|
449
|
+
self.logger.debug(
|
|
450
|
+
"[TIMING] embedder_init: %.2fms",
|
|
451
|
+
(time.perf_counter() - start_embedder) * 1000
|
|
452
|
+
)
|
|
329
453
|
|
|
330
454
|
# Generate query embedding
|
|
455
|
+
start_embed = time.perf_counter()
|
|
331
456
|
query_embedding = embedder.embed_single(query)
|
|
457
|
+
self.logger.debug(
|
|
458
|
+
"[TIMING] query_embedding: %.2fms",
|
|
459
|
+
(time.perf_counter() - start_embed) * 1000
|
|
460
|
+
)
|
|
332
461
|
|
|
333
462
|
# Search for similar chunks
|
|
463
|
+
start_search = time.perf_counter()
|
|
334
464
|
results = vector_store.search_similar(
|
|
335
465
|
query_embedding=query_embedding,
|
|
336
466
|
top_k=limit,
|
|
337
467
|
min_score=0.0, # Return all results, let RRF handle filtering
|
|
338
468
|
return_full_content=True,
|
|
339
469
|
)
|
|
470
|
+
self.logger.debug(
|
|
471
|
+
"[TIMING] vector_similarity_search: %.2fms (%d results)",
|
|
472
|
+
(time.perf_counter() - start_search) * 1000, len(results)
|
|
473
|
+
)
|
|
340
474
|
|
|
341
|
-
self.logger.debug("Vector search found %d results", len(results))
|
|
342
475
|
return results
|
|
343
476
|
|
|
344
477
|
except ImportError as exc:
|
|
@@ -6,12 +6,98 @@ for combining results from heterogeneous search backends (exact FTS, fuzzy FTS,
|
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
|
+
import re
|
|
9
10
|
import math
|
|
10
|
-
from
|
|
11
|
+
from enum import Enum
|
|
12
|
+
from typing import Any, Dict, List
|
|
11
13
|
|
|
12
14
|
from codexlens.entities import SearchResult, AdditionalLocation
|
|
13
15
|
|
|
14
16
|
|
|
17
|
+
class QueryIntent(str, Enum):
|
|
18
|
+
"""Query intent for adaptive RRF weights (Python/TypeScript parity)."""
|
|
19
|
+
|
|
20
|
+
KEYWORD = "keyword"
|
|
21
|
+
SEMANTIC = "semantic"
|
|
22
|
+
MIXED = "mixed"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def normalize_weights(weights: Dict[str, float]) -> Dict[str, float]:
|
|
26
|
+
"""Normalize weights to sum to 1.0 (best-effort)."""
|
|
27
|
+
total = sum(float(v) for v in weights.values() if v is not None)
|
|
28
|
+
if not math.isfinite(total) or total <= 0:
|
|
29
|
+
return {k: float(v) for k, v in weights.items()}
|
|
30
|
+
return {k: float(v) / total for k, v in weights.items()}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def detect_query_intent(query: str) -> QueryIntent:
|
|
34
|
+
"""Detect whether a query is code-like, natural-language, or mixed.
|
|
35
|
+
|
|
36
|
+
Heuristic signals kept aligned with `ccw/src/tools/smart-search.ts`.
|
|
37
|
+
"""
|
|
38
|
+
trimmed = (query or "").strip()
|
|
39
|
+
if not trimmed:
|
|
40
|
+
return QueryIntent.MIXED
|
|
41
|
+
|
|
42
|
+
lower = trimmed.lower()
|
|
43
|
+
word_count = len([w for w in re.split(r"\s+", trimmed) if w])
|
|
44
|
+
|
|
45
|
+
has_code_signals = bool(
|
|
46
|
+
re.search(r"(::|->|\.)", trimmed)
|
|
47
|
+
or re.search(r"[A-Z][a-z]+[A-Z]", trimmed)
|
|
48
|
+
or re.search(r"\b\w+_\w+\b", trimmed)
|
|
49
|
+
or re.search(
|
|
50
|
+
r"\b(def|class|function|const|let|var|import|from|return|async|await|interface|type)\b",
|
|
51
|
+
lower,
|
|
52
|
+
flags=re.IGNORECASE,
|
|
53
|
+
)
|
|
54
|
+
)
|
|
55
|
+
has_natural_signals = bool(
|
|
56
|
+
word_count > 5
|
|
57
|
+
or "?" in trimmed
|
|
58
|
+
or re.search(r"\b(how|what|why|when|where)\b", trimmed, flags=re.IGNORECASE)
|
|
59
|
+
or re.search(
|
|
60
|
+
r"\b(handle|explain|fix|implement|create|build|use|find|search|convert|parse|generate|support)\b",
|
|
61
|
+
trimmed,
|
|
62
|
+
flags=re.IGNORECASE,
|
|
63
|
+
)
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
if has_code_signals and has_natural_signals:
|
|
67
|
+
return QueryIntent.MIXED
|
|
68
|
+
if has_code_signals:
|
|
69
|
+
return QueryIntent.KEYWORD
|
|
70
|
+
if has_natural_signals:
|
|
71
|
+
return QueryIntent.SEMANTIC
|
|
72
|
+
return QueryIntent.MIXED
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def adjust_weights_by_intent(
|
|
76
|
+
intent: QueryIntent,
|
|
77
|
+
base_weights: Dict[str, float],
|
|
78
|
+
) -> Dict[str, float]:
|
|
79
|
+
"""Map intent → weights (kept aligned with TypeScript mapping)."""
|
|
80
|
+
if intent == QueryIntent.KEYWORD:
|
|
81
|
+
target = {"exact": 0.5, "fuzzy": 0.1, "vector": 0.4}
|
|
82
|
+
elif intent == QueryIntent.SEMANTIC:
|
|
83
|
+
target = {"exact": 0.2, "fuzzy": 0.1, "vector": 0.7}
|
|
84
|
+
else:
|
|
85
|
+
target = dict(base_weights)
|
|
86
|
+
|
|
87
|
+
# Preserve only keys that are present in base_weights (active backends).
|
|
88
|
+
keys = list(base_weights.keys())
|
|
89
|
+
filtered = {k: float(target.get(k, 0.0)) for k in keys}
|
|
90
|
+
return normalize_weights(filtered)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def get_rrf_weights(
|
|
94
|
+
query: str,
|
|
95
|
+
base_weights: Dict[str, float],
|
|
96
|
+
) -> Dict[str, float]:
|
|
97
|
+
"""Compute adaptive RRF weights from query intent."""
|
|
98
|
+
return adjust_weights_by_intent(detect_query_intent(query), base_weights)
|
|
99
|
+
|
|
100
|
+
|
|
15
101
|
def reciprocal_rank_fusion(
|
|
16
102
|
results_map: Dict[str, List[SearchResult]],
|
|
17
103
|
weights: Dict[str, float] = None,
|
|
@@ -102,6 +188,186 @@ def reciprocal_rank_fusion(
|
|
|
102
188
|
return fused_results
|
|
103
189
|
|
|
104
190
|
|
|
191
|
+
def apply_symbol_boost(
|
|
192
|
+
results: List[SearchResult],
|
|
193
|
+
boost_factor: float = 1.5,
|
|
194
|
+
) -> List[SearchResult]:
|
|
195
|
+
"""Boost fused scores for results that include an explicit symbol match.
|
|
196
|
+
|
|
197
|
+
The boost is multiplicative on the current result.score (typically the RRF fusion score).
|
|
198
|
+
When boosted, the original score is preserved in metadata["original_fusion_score"] and
|
|
199
|
+
metadata["boosted"] is set to True.
|
|
200
|
+
"""
|
|
201
|
+
if not results:
|
|
202
|
+
return []
|
|
203
|
+
|
|
204
|
+
if boost_factor <= 1.0:
|
|
205
|
+
# Still return new objects to follow immutable transformation pattern.
|
|
206
|
+
return [
|
|
207
|
+
SearchResult(
|
|
208
|
+
path=r.path,
|
|
209
|
+
score=r.score,
|
|
210
|
+
excerpt=r.excerpt,
|
|
211
|
+
content=r.content,
|
|
212
|
+
symbol=r.symbol,
|
|
213
|
+
chunk=r.chunk,
|
|
214
|
+
metadata={**r.metadata},
|
|
215
|
+
start_line=r.start_line,
|
|
216
|
+
end_line=r.end_line,
|
|
217
|
+
symbol_name=r.symbol_name,
|
|
218
|
+
symbol_kind=r.symbol_kind,
|
|
219
|
+
additional_locations=list(r.additional_locations),
|
|
220
|
+
)
|
|
221
|
+
for r in results
|
|
222
|
+
]
|
|
223
|
+
|
|
224
|
+
boosted_results: List[SearchResult] = []
|
|
225
|
+
for result in results:
|
|
226
|
+
has_symbol = bool(result.symbol_name)
|
|
227
|
+
original_score = float(result.score)
|
|
228
|
+
boosted_score = original_score * boost_factor if has_symbol else original_score
|
|
229
|
+
|
|
230
|
+
metadata = {**result.metadata}
|
|
231
|
+
if has_symbol:
|
|
232
|
+
metadata.setdefault("original_fusion_score", metadata.get("fusion_score", original_score))
|
|
233
|
+
metadata["boosted"] = True
|
|
234
|
+
metadata["symbol_boost_factor"] = boost_factor
|
|
235
|
+
|
|
236
|
+
boosted_results.append(
|
|
237
|
+
SearchResult(
|
|
238
|
+
path=result.path,
|
|
239
|
+
score=boosted_score,
|
|
240
|
+
excerpt=result.excerpt,
|
|
241
|
+
content=result.content,
|
|
242
|
+
symbol=result.symbol,
|
|
243
|
+
chunk=result.chunk,
|
|
244
|
+
metadata=metadata,
|
|
245
|
+
start_line=result.start_line,
|
|
246
|
+
end_line=result.end_line,
|
|
247
|
+
symbol_name=result.symbol_name,
|
|
248
|
+
symbol_kind=result.symbol_kind,
|
|
249
|
+
additional_locations=list(result.additional_locations),
|
|
250
|
+
)
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
boosted_results.sort(key=lambda r: r.score, reverse=True)
|
|
254
|
+
return boosted_results
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def rerank_results(
|
|
258
|
+
query: str,
|
|
259
|
+
results: List[SearchResult],
|
|
260
|
+
embedder: Any,
|
|
261
|
+
top_k: int = 50,
|
|
262
|
+
) -> List[SearchResult]:
|
|
263
|
+
"""Re-rank results with embedding cosine similarity, combined with current score.
|
|
264
|
+
|
|
265
|
+
Combined score formula:
|
|
266
|
+
0.5 * rrf_score + 0.5 * cosine_similarity
|
|
267
|
+
|
|
268
|
+
If embedder is None or embedding fails, returns results as-is.
|
|
269
|
+
"""
|
|
270
|
+
if not results:
|
|
271
|
+
return []
|
|
272
|
+
|
|
273
|
+
if embedder is None or top_k <= 0:
|
|
274
|
+
return results
|
|
275
|
+
|
|
276
|
+
rerank_count = min(int(top_k), len(results))
|
|
277
|
+
|
|
278
|
+
def cosine_similarity(vec_a: List[float], vec_b: List[float]) -> float:
|
|
279
|
+
# Defensive: handle mismatched lengths and zero vectors.
|
|
280
|
+
n = min(len(vec_a), len(vec_b))
|
|
281
|
+
if n == 0:
|
|
282
|
+
return 0.0
|
|
283
|
+
dot = 0.0
|
|
284
|
+
norm_a = 0.0
|
|
285
|
+
norm_b = 0.0
|
|
286
|
+
for i in range(n):
|
|
287
|
+
a = float(vec_a[i])
|
|
288
|
+
b = float(vec_b[i])
|
|
289
|
+
dot += a * b
|
|
290
|
+
norm_a += a * a
|
|
291
|
+
norm_b += b * b
|
|
292
|
+
if norm_a <= 0.0 or norm_b <= 0.0:
|
|
293
|
+
return 0.0
|
|
294
|
+
sim = dot / (math.sqrt(norm_a) * math.sqrt(norm_b))
|
|
295
|
+
# SearchResult.score requires non-negative scores; clamp cosine similarity to [0, 1].
|
|
296
|
+
return max(0.0, min(1.0, sim))
|
|
297
|
+
|
|
298
|
+
def text_for_embedding(r: SearchResult) -> str:
|
|
299
|
+
if r.excerpt and r.excerpt.strip():
|
|
300
|
+
return r.excerpt
|
|
301
|
+
if r.content and r.content.strip():
|
|
302
|
+
return r.content
|
|
303
|
+
if r.chunk and r.chunk.content and r.chunk.content.strip():
|
|
304
|
+
return r.chunk.content
|
|
305
|
+
# Fallback: stable, non-empty text.
|
|
306
|
+
return r.symbol_name or r.path
|
|
307
|
+
|
|
308
|
+
try:
|
|
309
|
+
if hasattr(embedder, "embed_single"):
|
|
310
|
+
query_vec = embedder.embed_single(query)
|
|
311
|
+
else:
|
|
312
|
+
query_vec = embedder.embed(query)[0]
|
|
313
|
+
|
|
314
|
+
doc_texts = [text_for_embedding(r) for r in results[:rerank_count]]
|
|
315
|
+
doc_vecs = embedder.embed(doc_texts)
|
|
316
|
+
except Exception:
|
|
317
|
+
return results
|
|
318
|
+
|
|
319
|
+
reranked_results: List[SearchResult] = []
|
|
320
|
+
|
|
321
|
+
for idx, result in enumerate(results):
|
|
322
|
+
if idx < rerank_count:
|
|
323
|
+
rrf_score = float(result.score)
|
|
324
|
+
sim = cosine_similarity(query_vec, doc_vecs[idx])
|
|
325
|
+
combined_score = 0.5 * rrf_score + 0.5 * sim
|
|
326
|
+
|
|
327
|
+
reranked_results.append(
|
|
328
|
+
SearchResult(
|
|
329
|
+
path=result.path,
|
|
330
|
+
score=combined_score,
|
|
331
|
+
excerpt=result.excerpt,
|
|
332
|
+
content=result.content,
|
|
333
|
+
symbol=result.symbol,
|
|
334
|
+
chunk=result.chunk,
|
|
335
|
+
metadata={
|
|
336
|
+
**result.metadata,
|
|
337
|
+
"rrf_score": rrf_score,
|
|
338
|
+
"cosine_similarity": sim,
|
|
339
|
+
"reranked": True,
|
|
340
|
+
},
|
|
341
|
+
start_line=result.start_line,
|
|
342
|
+
end_line=result.end_line,
|
|
343
|
+
symbol_name=result.symbol_name,
|
|
344
|
+
symbol_kind=result.symbol_kind,
|
|
345
|
+
additional_locations=list(result.additional_locations),
|
|
346
|
+
)
|
|
347
|
+
)
|
|
348
|
+
else:
|
|
349
|
+
# Preserve remaining results without re-ranking, but keep immutability.
|
|
350
|
+
reranked_results.append(
|
|
351
|
+
SearchResult(
|
|
352
|
+
path=result.path,
|
|
353
|
+
score=result.score,
|
|
354
|
+
excerpt=result.excerpt,
|
|
355
|
+
content=result.content,
|
|
356
|
+
symbol=result.symbol,
|
|
357
|
+
chunk=result.chunk,
|
|
358
|
+
metadata={**result.metadata},
|
|
359
|
+
start_line=result.start_line,
|
|
360
|
+
end_line=result.end_line,
|
|
361
|
+
symbol_name=result.symbol_name,
|
|
362
|
+
symbol_kind=result.symbol_kind,
|
|
363
|
+
additional_locations=list(result.additional_locations),
|
|
364
|
+
)
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
reranked_results.sort(key=lambda r: r.score, reverse=True)
|
|
368
|
+
return reranked_results
|
|
369
|
+
|
|
370
|
+
|
|
105
371
|
def normalize_bm25_score(score: float) -> float:
|
|
106
372
|
"""Normalize BM25 scores from SQLite FTS5 to 0-1 range.
|
|
107
373
|
|
|
Binary file
|
|
@@ -392,6 +392,22 @@ class HybridChunker:
|
|
|
392
392
|
filtered.append(symbol)
|
|
393
393
|
return filtered
|
|
394
394
|
|
|
395
|
+
def _find_parent_symbol(
|
|
396
|
+
self,
|
|
397
|
+
start_line: int,
|
|
398
|
+
end_line: int,
|
|
399
|
+
symbols: List[Symbol],
|
|
400
|
+
) -> Optional[Symbol]:
|
|
401
|
+
"""Find the smallest symbol range that fully contains a docstring span."""
|
|
402
|
+
candidates: List[Symbol] = []
|
|
403
|
+
for symbol in symbols:
|
|
404
|
+
sym_start, sym_end = symbol.range
|
|
405
|
+
if sym_start <= start_line and end_line <= sym_end:
|
|
406
|
+
candidates.append(symbol)
|
|
407
|
+
if not candidates:
|
|
408
|
+
return None
|
|
409
|
+
return min(candidates, key=lambda s: (s.range[1] - s.range[0], s.range[0]))
|
|
410
|
+
|
|
395
411
|
def chunk_file(
|
|
396
412
|
self,
|
|
397
413
|
content: str,
|
|
@@ -414,24 +430,53 @@ class HybridChunker:
|
|
|
414
430
|
chunks: List[SemanticChunk] = []
|
|
415
431
|
|
|
416
432
|
# Step 1: Extract docstrings as dedicated chunks
|
|
417
|
-
docstrings
|
|
433
|
+
docstrings: List[Tuple[str, int, int]] = []
|
|
434
|
+
if language == "python":
|
|
435
|
+
# Fast path: avoid expensive docstring extraction if delimiters are absent.
|
|
436
|
+
if '"""' in content or "'''" in content:
|
|
437
|
+
docstrings = self.docstring_extractor.extract_docstrings(content, language)
|
|
438
|
+
elif language in {"javascript", "typescript"}:
|
|
439
|
+
if "/**" in content:
|
|
440
|
+
docstrings = self.docstring_extractor.extract_docstrings(content, language)
|
|
441
|
+
else:
|
|
442
|
+
docstrings = self.docstring_extractor.extract_docstrings(content, language)
|
|
443
|
+
|
|
444
|
+
# Fast path: no docstrings -> delegate to base chunker directly.
|
|
445
|
+
if not docstrings:
|
|
446
|
+
if symbols:
|
|
447
|
+
base_chunks = self.base_chunker.chunk_by_symbol(
|
|
448
|
+
content, symbols, file_path, language, symbol_token_counts
|
|
449
|
+
)
|
|
450
|
+
else:
|
|
451
|
+
base_chunks = self.base_chunker.chunk_sliding_window(content, file_path, language)
|
|
452
|
+
|
|
453
|
+
for chunk in base_chunks:
|
|
454
|
+
chunk.metadata["strategy"] = "hybrid"
|
|
455
|
+
chunk.metadata["chunk_type"] = "code"
|
|
456
|
+
return base_chunks
|
|
418
457
|
|
|
419
458
|
for docstring_content, start_line, end_line in docstrings:
|
|
420
459
|
if len(docstring_content.strip()) >= self.config.min_chunk_size:
|
|
460
|
+
parent_symbol = self._find_parent_symbol(start_line, end_line, symbols)
|
|
421
461
|
# Use base chunker's token estimation method
|
|
422
462
|
token_count = self.base_chunker._estimate_token_count(docstring_content)
|
|
463
|
+
metadata = {
|
|
464
|
+
"file": str(file_path),
|
|
465
|
+
"language": language,
|
|
466
|
+
"chunk_type": "docstring",
|
|
467
|
+
"start_line": start_line,
|
|
468
|
+
"end_line": end_line,
|
|
469
|
+
"strategy": "hybrid",
|
|
470
|
+
"token_count": token_count,
|
|
471
|
+
}
|
|
472
|
+
if parent_symbol is not None:
|
|
473
|
+
metadata["parent_symbol"] = parent_symbol.name
|
|
474
|
+
metadata["parent_symbol_kind"] = parent_symbol.kind
|
|
475
|
+
metadata["parent_symbol_range"] = parent_symbol.range
|
|
423
476
|
chunks.append(SemanticChunk(
|
|
424
477
|
content=docstring_content,
|
|
425
478
|
embedding=None,
|
|
426
|
-
metadata=
|
|
427
|
-
"file": str(file_path),
|
|
428
|
-
"language": language,
|
|
429
|
-
"chunk_type": "docstring",
|
|
430
|
-
"start_line": start_line,
|
|
431
|
-
"end_line": end_line,
|
|
432
|
-
"strategy": "hybrid",
|
|
433
|
-
"token_count": token_count,
|
|
434
|
-
}
|
|
479
|
+
metadata=metadata
|
|
435
480
|
))
|
|
436
481
|
|
|
437
482
|
# Step 2: Get line ranges occupied by docstrings
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-code-workflow",
|
|
3
|
-
"version": "6.3.
|
|
3
|
+
"version": "6.3.6",
|
|
4
4
|
"description": "JSON-driven multi-agent development framework with intelligent CLI orchestration (Gemini/Qwen/Codex), context-first architecture, and automated workflow execution",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "ccw/src/index.js",
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
"scripts": {
|
|
12
12
|
"build": "tsc -p ccw/tsconfig.json",
|
|
13
13
|
"start": "node ccw/bin/ccw.js",
|
|
14
|
-
"test": "node --test",
|
|
14
|
+
"test": "node --test ccw/tests/*.test.js",
|
|
15
15
|
"prepublishOnly": "npm run build && echo 'Ready to publish @dyw/claude-code-workflow'"
|
|
16
16
|
},
|
|
17
17
|
"keywords": [
|