npm - claude-code-workflow - Versions diffs - 6.3.4 → 6.3.6 - Mend

claude-code-workflow 6.3.4 → 6.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (111) hide show

package/codex-lens/src/codexlens/search/hybrid_search.py CHANGED Viewed

@@ -7,12 +7,38 @@ results via Reciprocal Rank Fusion (RRF) algorithm.
 from __future__ import annotations
 import logging
+import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from contextlib import contextmanager
 from pathlib import Path
-from typing import Dict, List, Optional
+from typing import Any, Dict, List, Optional
+@contextmanager
+def timer(name: str, logger: logging.Logger, level: int = logging.DEBUG):
+    """Context manager for timing code blocks.
+    Args:
+        name: Name of the operation being timed
+        logger: Logger instance to use
+        level: Logging level (default DEBUG)
+    """
+    start = time.perf_counter()
+    try:
+        yield
+    finally:
+        elapsed_ms = (time.perf_counter() - start) * 1000
+        logger.log(level, "[TIMING] %s: %.2fms", name, elapsed_ms)
+from codexlens.config import Config
 from codexlens.entities import SearchResult
-from codexlens.search.ranking import reciprocal_rank_fusion, tag_search_source
+from codexlens.search.ranking import (
+    apply_symbol_boost,
+    get_rrf_weights,
+    reciprocal_rank_fusion,
+    rerank_results,
+    tag_search_source,
+)
 from codexlens.storage.dir_index import DirIndexStore
@@ -34,14 +60,23 @@ class HybridSearchEngine:
         "vector": 0.6,
     }
-    def __init__(self, weights: Optional[Dict[str, float]] = None):
+    def __init__(
+        self,
+        weights: Optional[Dict[str, float]] = None,
+        config: Optional[Config] = None,
+        embedder: Any = None,
+    ):
         """Initialize hybrid search engine.
         Args:
             weights: Optional custom RRF weights (default: DEFAULT_WEIGHTS)
+            config: Optional runtime config (enables optional reranking features)
+            embedder: Optional embedder instance for embedding-based reranking
         """
         self.logger = logging.getLogger(__name__)
         self.weights = weights or self.DEFAULT_WEIGHTS.copy()
+        self._config = config
+        self.embedder = embedder
     def search(
         self,
@@ -101,7 +136,8 @@ class HybridSearchEngine:
                 backends["vector"] = True
         # Execute parallel searches
-        results_map = self._search_parallel(index_path, query, backends, limit)
+        with timer("parallel_search_total", self.logger):
+            results_map = self._search_parallel(index_path, query, backends, limit)
         # Provide helpful message if pure-vector mode returns no results
         if pure_vector and enable_vector and len(results_map.get("vector", [])) == 0:
@@ -120,11 +156,72 @@ class HybridSearchEngine:
             if source in results_map
         }
-        fused_results = reciprocal_rank_fusion(results_map, active_weights)
+        with timer("rrf_fusion", self.logger):
+            adaptive_weights = get_rrf_weights(query, active_weights)
+            fused_results = reciprocal_rank_fusion(results_map, adaptive_weights)
+        # Optional: boost results that include explicit symbol matches
+        boost_factor = (
+            self._config.symbol_boost_factor
+            if self._config is not None
+            else 1.5
+        )
+        with timer("symbol_boost", self.logger):
+            fused_results = apply_symbol_boost(
+                fused_results, boost_factor=boost_factor
+            )
+        # Optional: embedding-based reranking on top results
+        if self._config is not None and self._config.enable_reranking:
+            with timer("reranking", self.logger):
+                if self.embedder is None:
+                    self.embedder = self._get_reranking_embedder()
+                fused_results = rerank_results(
+                    query,
+                    fused_results[:100],
+                    self.embedder,
+                    top_k=self._config.reranking_top_k,
+                )
         # Apply final limit
         return fused_results[:limit]
+    def _get_reranking_embedder(self) -> Any:
+        """Create an embedder for reranking based on Config embedding settings."""
+        if self._config is None:
+            return None
+        try:
+            from codexlens.semantic.factory import get_embedder
+        except Exception as exc:
+            self.logger.debug("Reranking embedder unavailable: %s", exc)
+            return None
+        try:
+            if self._config.embedding_backend == "fastembed":
+                return get_embedder(
+                    backend="fastembed",
+                    profile=self._config.embedding_model,
+                    use_gpu=self._config.embedding_use_gpu,
+                )
+            if self._config.embedding_backend == "litellm":
+                return get_embedder(
+                    backend="litellm",
+                    model=self._config.embedding_model,
+                    endpoints=self._config.embedding_endpoints,
+                    strategy=self._config.embedding_strategy,
+                    cooldown=self._config.embedding_cooldown,
+                )
+        except Exception as exc:
+            self.logger.debug("Failed to initialize reranking embedder: %s", exc)
+            return None
+        self.logger.debug(
+            "Unknown embedding backend for reranking: %s",
+            self._config.embedding_backend,
+        )
+        return None
     def _search_parallel(
         self,
         index_path: Path,
@@ -144,25 +241,30 @@ class HybridSearchEngine:
             Dictionary mapping source name to results list
         """
         results_map: Dict[str, List[SearchResult]] = {}
+        timing_data: Dict[str, float] = {}
         # Use ThreadPoolExecutor for parallel I/O-bound searches
         with ThreadPoolExecutor(max_workers=len(backends)) as executor:
-            # Submit search tasks
+            # Submit search tasks with timing
             future_to_source = {}
+            submit_times = {}
             if backends.get("exact"):
+                submit_times["exact"] = time.perf_counter()
                 future = executor.submit(
                     self._search_exact, index_path, query, limit
                 )
                 future_to_source[future] = "exact"
             if backends.get("fuzzy"):
+                submit_times["fuzzy"] = time.perf_counter()
                 future = executor.submit(
                     self._search_fuzzy, index_path, query, limit
                 )
                 future_to_source[future] = "fuzzy"
             if backends.get("vector"):
+                submit_times["vector"] = time.perf_counter()
                 future = executor.submit(
                     self._search_vector, index_path, query, limit
                 )
@@ -171,18 +273,26 @@ class HybridSearchEngine:
             # Collect results as they complete
             for future in as_completed(future_to_source):
                 source = future_to_source[future]
+                elapsed_ms = (time.perf_counter() - submit_times[source]) * 1000
+                timing_data[source] = elapsed_ms
                 try:
                     results = future.result()
                     # Tag results with source for debugging
                     tagged_results = tag_search_source(results, source)
                     results_map[source] = tagged_results
                     self.logger.debug(
-                        "Got %d results from %s search", len(results), source
+                        "[TIMING] %s_search: %.2fms (%d results)",
+                        source, elapsed_ms, len(results)
                     )
                 except Exception as exc:
                     self.logger.error("Search failed for %s: %s", source, exc)
                     results_map[source] = []
+        # Log timing summary
+        if timing_data:
+            timing_str = ", ".join(f"{k}={v:.1f}ms" for k, v in timing_data.items())
+            self.logger.debug("[TIMING] search_backends: {%s}", timing_str)
         return results_map
     def _search_exact(
@@ -245,6 +355,8 @@ class HybridSearchEngine:
         try:
             # Check if semantic chunks table exists
             import sqlite3
+            start_check = time.perf_counter()
             try:
                 with sqlite3.connect(index_path) as conn:
                     cursor = conn.execute(
@@ -254,6 +366,10 @@ class HybridSearchEngine:
             except sqlite3.Error as e:
                 self.logger.error("Database check failed in vector search: %s", e)
                 return []
+            self.logger.debug(
+                "[TIMING] vector_table_check: %.2fms",
+                (time.perf_counter() - start_check) * 1000
+            )
             if not has_semantic_table:
                 self.logger.info(
@@ -267,7 +383,12 @@ class HybridSearchEngine:
             from codexlens.semantic.factory import get_embedder
             from codexlens.semantic.vector_store import VectorStore
+            start_init = time.perf_counter()
             vector_store = VectorStore(index_path)
+            self.logger.debug(
+                "[TIMING] vector_store_init: %.2fms",
+                (time.perf_counter() - start_init) * 1000
+            )
             # Check if vector store has data
             if vector_store.count_chunks() == 0:
@@ -279,6 +400,7 @@ class HybridSearchEngine:
                 return []
             # Get stored model configuration (preferred) or auto-detect from dimension
+            start_embedder = time.perf_counter()
             model_config = vector_store.get_model_config()
             if model_config:
                 backend = model_config.get("backend", "fastembed")
@@ -288,7 +410,7 @@ class HybridSearchEngine:
                     "Using stored model config: %s backend, %s (%s, %dd)",
                     backend, model_profile, model_name, model_config["embedding_dim"]
                 )
                 # Get embedder based on backend
                 if backend == "litellm":
                     embedder = get_embedder(backend="litellm", model=model_name)
@@ -324,21 +446,32 @@ class HybridSearchEngine:
                         detected_dim
                     )
                     embedder = get_embedder(backend="fastembed", profile="code")
+            self.logger.debug(
+                "[TIMING] embedder_init: %.2fms",
+                (time.perf_counter() - start_embedder) * 1000
+            )
             # Generate query embedding
+            start_embed = time.perf_counter()
             query_embedding = embedder.embed_single(query)
+            self.logger.debug(
+                "[TIMING] query_embedding: %.2fms",
+                (time.perf_counter() - start_embed) * 1000
+            )
             # Search for similar chunks
+            start_search = time.perf_counter()
             results = vector_store.search_similar(
                 query_embedding=query_embedding,
                 top_k=limit,
                 min_score=0.0,  # Return all results, let RRF handle filtering
                 return_full_content=True,
             )
+            self.logger.debug(
+                "[TIMING] vector_similarity_search: %.2fms (%d results)",
+                (time.perf_counter() - start_search) * 1000, len(results)
+            )
-            self.logger.debug("Vector search found %d results", len(results))
             return results
         except ImportError as exc:

package/codex-lens/src/codexlens/search/ranking.py CHANGED Viewed

@@ -6,12 +6,98 @@ for combining results from heterogeneous search backends (exact FTS, fuzzy FTS,
 from __future__ import annotations
+import re
 import math
-from typing import Dict, List
+from enum import Enum
+from typing import Any, Dict, List
 from codexlens.entities import SearchResult, AdditionalLocation
+class QueryIntent(str, Enum):
+    """Query intent for adaptive RRF weights (Python/TypeScript parity)."""
+    KEYWORD = "keyword"
+    SEMANTIC = "semantic"
+    MIXED = "mixed"
+def normalize_weights(weights: Dict[str, float]) -> Dict[str, float]:
+    """Normalize weights to sum to 1.0 (best-effort)."""
+    total = sum(float(v) for v in weights.values() if v is not None)
+    if not math.isfinite(total) or total <= 0:
+        return {k: float(v) for k, v in weights.items()}
+    return {k: float(v) / total for k, v in weights.items()}
+def detect_query_intent(query: str) -> QueryIntent:
+    """Detect whether a query is code-like, natural-language, or mixed.
+    Heuristic signals kept aligned with `ccw/src/tools/smart-search.ts`.
+    """
+    trimmed = (query or "").strip()
+    if not trimmed:
+        return QueryIntent.MIXED
+    lower = trimmed.lower()
+    word_count = len([w for w in re.split(r"\s+", trimmed) if w])
+    has_code_signals = bool(
+        re.search(r"(::|->|\.)", trimmed)
+        or re.search(r"[A-Z][a-z]+[A-Z]", trimmed)
+        or re.search(r"\b\w+_\w+\b", trimmed)
+        or re.search(
+            r"\b(def|class|function|const|let|var|import|from|return|async|await|interface|type)\b",
+            lower,
+            flags=re.IGNORECASE,
+        )
+    )
+    has_natural_signals = bool(
+        word_count > 5
+        or "?" in trimmed
+        or re.search(r"\b(how|what|why|when|where)\b", trimmed, flags=re.IGNORECASE)
+        or re.search(
+            r"\b(handle|explain|fix|implement|create|build|use|find|search|convert|parse|generate|support)\b",
+            trimmed,
+            flags=re.IGNORECASE,
+        )
+    )
+    if has_code_signals and has_natural_signals:
+        return QueryIntent.MIXED
+    if has_code_signals:
+        return QueryIntent.KEYWORD
+    if has_natural_signals:
+        return QueryIntent.SEMANTIC
+    return QueryIntent.MIXED
+def adjust_weights_by_intent(
+    intent: QueryIntent,
+    base_weights: Dict[str, float],
+) -> Dict[str, float]:
+    """Map intent → weights (kept aligned with TypeScript mapping)."""
+    if intent == QueryIntent.KEYWORD:
+        target = {"exact": 0.5, "fuzzy": 0.1, "vector": 0.4}
+    elif intent == QueryIntent.SEMANTIC:
+        target = {"exact": 0.2, "fuzzy": 0.1, "vector": 0.7}
+    else:
+        target = dict(base_weights)
+    # Preserve only keys that are present in base_weights (active backends).
+    keys = list(base_weights.keys())
+    filtered = {k: float(target.get(k, 0.0)) for k in keys}
+    return normalize_weights(filtered)
+def get_rrf_weights(
+    query: str,
+    base_weights: Dict[str, float],
+) -> Dict[str, float]:
+    """Compute adaptive RRF weights from query intent."""
+    return adjust_weights_by_intent(detect_query_intent(query), base_weights)
 def reciprocal_rank_fusion(
     results_map: Dict[str, List[SearchResult]],
     weights: Dict[str, float] = None,
@@ -102,6 +188,186 @@ def reciprocal_rank_fusion(
     return fused_results
+def apply_symbol_boost(
+    results: List[SearchResult],
+    boost_factor: float = 1.5,
+) -> List[SearchResult]:
+    """Boost fused scores for results that include an explicit symbol match.
+    The boost is multiplicative on the current result.score (typically the RRF fusion score).
+    When boosted, the original score is preserved in metadata["original_fusion_score"] and
+    metadata["boosted"] is set to True.
+    """
+    if not results:
+        return []
+    if boost_factor <= 1.0:
+        # Still return new objects to follow immutable transformation pattern.
+        return [
+            SearchResult(
+                path=r.path,
+                score=r.score,
+                excerpt=r.excerpt,
+                content=r.content,
+                symbol=r.symbol,
+                chunk=r.chunk,
+                metadata={**r.metadata},
+                start_line=r.start_line,
+                end_line=r.end_line,
+                symbol_name=r.symbol_name,
+                symbol_kind=r.symbol_kind,
+                additional_locations=list(r.additional_locations),
+            )
+            for r in results
+        ]
+    boosted_results: List[SearchResult] = []
+    for result in results:
+        has_symbol = bool(result.symbol_name)
+        original_score = float(result.score)
+        boosted_score = original_score * boost_factor if has_symbol else original_score
+        metadata = {**result.metadata}
+        if has_symbol:
+            metadata.setdefault("original_fusion_score", metadata.get("fusion_score", original_score))
+            metadata["boosted"] = True
+            metadata["symbol_boost_factor"] = boost_factor
+        boosted_results.append(
+            SearchResult(
+                path=result.path,
+                score=boosted_score,
+                excerpt=result.excerpt,
+                content=result.content,
+                symbol=result.symbol,
+                chunk=result.chunk,
+                metadata=metadata,
+                start_line=result.start_line,
+                end_line=result.end_line,
+                symbol_name=result.symbol_name,
+                symbol_kind=result.symbol_kind,
+                additional_locations=list(result.additional_locations),
+            )
+        )
+    boosted_results.sort(key=lambda r: r.score, reverse=True)
+    return boosted_results
+def rerank_results(
+    query: str,
+    results: List[SearchResult],
+    embedder: Any,
+    top_k: int = 50,
+) -> List[SearchResult]:
+    """Re-rank results with embedding cosine similarity, combined with current score.
+    Combined score formula:
+        0.5 * rrf_score + 0.5 * cosine_similarity
+    If embedder is None or embedding fails, returns results as-is.
+    """
+    if not results:
+        return []
+    if embedder is None or top_k <= 0:
+        return results
+    rerank_count = min(int(top_k), len(results))
+    def cosine_similarity(vec_a: List[float], vec_b: List[float]) -> float:
+        # Defensive: handle mismatched lengths and zero vectors.
+        n = min(len(vec_a), len(vec_b))
+        if n == 0:
+            return 0.0
+        dot = 0.0
+        norm_a = 0.0
+        norm_b = 0.0
+        for i in range(n):
+            a = float(vec_a[i])
+            b = float(vec_b[i])
+            dot += a * b
+            norm_a += a * a
+            norm_b += b * b
+        if norm_a <= 0.0 or norm_b <= 0.0:
+            return 0.0
+        sim = dot / (math.sqrt(norm_a) * math.sqrt(norm_b))
+        # SearchResult.score requires non-negative scores; clamp cosine similarity to [0, 1].
+        return max(0.0, min(1.0, sim))
+    def text_for_embedding(r: SearchResult) -> str:
+        if r.excerpt and r.excerpt.strip():
+            return r.excerpt
+        if r.content and r.content.strip():
+            return r.content
+        if r.chunk and r.chunk.content and r.chunk.content.strip():
+            return r.chunk.content
+        # Fallback: stable, non-empty text.
+        return r.symbol_name or r.path
+    try:
+        if hasattr(embedder, "embed_single"):
+            query_vec = embedder.embed_single(query)
+        else:
+            query_vec = embedder.embed(query)[0]
+        doc_texts = [text_for_embedding(r) for r in results[:rerank_count]]
+        doc_vecs = embedder.embed(doc_texts)
+    except Exception:
+        return results
+    reranked_results: List[SearchResult] = []
+    for idx, result in enumerate(results):
+        if idx < rerank_count:
+            rrf_score = float(result.score)
+            sim = cosine_similarity(query_vec, doc_vecs[idx])
+            combined_score = 0.5 * rrf_score + 0.5 * sim
+            reranked_results.append(
+                SearchResult(
+                    path=result.path,
+                    score=combined_score,
+                    excerpt=result.excerpt,
+                    content=result.content,
+                    symbol=result.symbol,
+                    chunk=result.chunk,
+                    metadata={
+                        **result.metadata,
+                        "rrf_score": rrf_score,
+                        "cosine_similarity": sim,
+                        "reranked": True,
+                    },
+                    start_line=result.start_line,
+                    end_line=result.end_line,
+                    symbol_name=result.symbol_name,
+                    symbol_kind=result.symbol_kind,
+                    additional_locations=list(result.additional_locations),
+                )
+            )
+        else:
+            # Preserve remaining results without re-ranking, but keep immutability.
+            reranked_results.append(
+                SearchResult(
+                    path=result.path,
+                    score=result.score,
+                    excerpt=result.excerpt,
+                    content=result.content,
+                    symbol=result.symbol,
+                    chunk=result.chunk,
+                    metadata={**result.metadata},
+                    start_line=result.start_line,
+                    end_line=result.end_line,
+                    symbol_name=result.symbol_name,
+                    symbol_kind=result.symbol_kind,
+                    additional_locations=list(result.additional_locations),
+                )
+            )
+    reranked_results.sort(key=lambda r: r.score, reverse=True)
+    return reranked_results
 def normalize_bm25_score(score: float) -> float:
     """Normalize BM25 scores from SQLite FTS5 to 0-1 range.

package/codex-lens/src/codexlens/semantic/__pycache__/chunker.cpython-313.pyc CHANGED Viewed

Binary file

package/codex-lens/src/codexlens/semantic/chunker.py CHANGED Viewed

@@ -392,6 +392,22 @@ class HybridChunker:
                 filtered.append(symbol)
         return filtered
+    def _find_parent_symbol(
+        self,
+        start_line: int,
+        end_line: int,
+        symbols: List[Symbol],
+    ) -> Optional[Symbol]:
+        """Find the smallest symbol range that fully contains a docstring span."""
+        candidates: List[Symbol] = []
+        for symbol in symbols:
+            sym_start, sym_end = symbol.range
+            if sym_start <= start_line and end_line <= sym_end:
+                candidates.append(symbol)
+        if not candidates:
+            return None
+        return min(candidates, key=lambda s: (s.range[1] - s.range[0], s.range[0]))
     def chunk_file(
         self,
         content: str,
@@ -414,24 +430,53 @@ class HybridChunker:
         chunks: List[SemanticChunk] = []
         # Step 1: Extract docstrings as dedicated chunks
-        docstrings = self.docstring_extractor.extract_docstrings(content, language)
+        docstrings: List[Tuple[str, int, int]] = []
+        if language == "python":
+            # Fast path: avoid expensive docstring extraction if delimiters are absent.
+            if '"""' in content or "'''" in content:
+                docstrings = self.docstring_extractor.extract_docstrings(content, language)
+        elif language in {"javascript", "typescript"}:
+            if "/**" in content:
+                docstrings = self.docstring_extractor.extract_docstrings(content, language)
+        else:
+            docstrings = self.docstring_extractor.extract_docstrings(content, language)
+        # Fast path: no docstrings -> delegate to base chunker directly.
+        if not docstrings:
+            if symbols:
+                base_chunks = self.base_chunker.chunk_by_symbol(
+                    content, symbols, file_path, language, symbol_token_counts
+                )
+            else:
+                base_chunks = self.base_chunker.chunk_sliding_window(content, file_path, language)
+            for chunk in base_chunks:
+                chunk.metadata["strategy"] = "hybrid"
+                chunk.metadata["chunk_type"] = "code"
+            return base_chunks
         for docstring_content, start_line, end_line in docstrings:
             if len(docstring_content.strip()) >= self.config.min_chunk_size:
+                parent_symbol = self._find_parent_symbol(start_line, end_line, symbols)
                 # Use base chunker's token estimation method
                 token_count = self.base_chunker._estimate_token_count(docstring_content)
+                metadata = {
+                    "file": str(file_path),
+                    "language": language,
+                    "chunk_type": "docstring",
+                    "start_line": start_line,
+                    "end_line": end_line,
+                    "strategy": "hybrid",
+                    "token_count": token_count,
+                }
+                if parent_symbol is not None:
+                    metadata["parent_symbol"] = parent_symbol.name
+                    metadata["parent_symbol_kind"] = parent_symbol.kind
+                    metadata["parent_symbol_range"] = parent_symbol.range
                 chunks.append(SemanticChunk(
                     content=docstring_content,
                     embedding=None,
-                    metadata={
-                        "file": str(file_path),
-                        "language": language,
-                        "chunk_type": "docstring",
-                        "start_line": start_line,
-                        "end_line": end_line,
-                        "strategy": "hybrid",
-                        "token_count": token_count,
-                    }
+                    metadata=metadata
                 ))
         # Step 2: Get line ranges occupied by docstrings

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-code-workflow",
-  "version": "6.3.4",
+  "version": "6.3.6",
   "description": "JSON-driven multi-agent development framework with intelligent CLI orchestration (Gemini/Qwen/Codex), context-first architecture, and automated workflow execution",
   "type": "module",
   "main": "ccw/src/index.js",
@@ -11,7 +11,7 @@
   "scripts": {
     "build": "tsc -p ccw/tsconfig.json",
     "start": "node ccw/bin/ccw.js",
-    "test": "node --test",
+    "test": "node --test ccw/tests/*.test.js",
     "prepublishOnly": "npm run build && echo 'Ready to publish @dyw/claude-code-workflow'"
   },
   "keywords": [