npm - claude-code-workflow - Versions diffs - 6.3.26 → 6.3.28 - Mend

claude-code-workflow 6.3.26 → 6.3.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (129) hide show

package/ccw/scripts/memory_embedder.py ADDED Viewed

@@ -0,0 +1,428 @@
+#!/usr/bin/env python3
+"""
+Memory Embedder - Bridge CCW to CodexLens semantic search
+This script generates and searches embeddings for memory chunks stored in CCW's
+SQLite database using CodexLens's embedder.
+Usage:
+    python memory_embedder.py embed <db_path> [--source-id ID] [--batch-size N] [--force]
+    python memory_embedder.py search <db_path> <query> [--top-k N] [--min-score F] [--type TYPE]
+    python memory_embedder.py status <db_path>
+"""
+import argparse
+import json
+import sqlite3
+import sys
+import time
+from pathlib import Path
+from typing import List, Dict, Any, Optional, Tuple
+try:
+    import numpy as np
+except ImportError:
+    print("Error: numpy is required. Install with: pip install numpy", file=sys.stderr)
+    sys.exit(1)
+try:
+    from codexlens.semantic.factory import get_embedder as get_embedder_factory
+    from codexlens.semantic.factory import clear_embedder_cache
+    from codexlens.config import Config as CodexLensConfig
+except ImportError:
+    print("Error: CodexLens not found. Install with: pip install codexlens[semantic]", file=sys.stderr)
+    sys.exit(1)
+class MemoryEmbedder:
+    """Generate and search embeddings for memory chunks."""
+    def __init__(self, db_path: str):
+        """Initialize embedder with database path."""
+        self.db_path = Path(db_path)
+        if not self.db_path.exists():
+            raise FileNotFoundError(f"Database not found: {db_path}")
+        self.conn = sqlite3.connect(str(self.db_path))
+        self.conn.row_factory = sqlite3.Row
+        # Load CodexLens configuration for embedding settings
+        try:
+            self._config = CodexLensConfig.load()
+        except Exception as e:
+            print(f"Warning: Could not load CodexLens config, using defaults. Error: {e}", file=sys.stderr)
+            self._config = CodexLensConfig()  # Use default config
+        # Lazy-load embedder to avoid ~0.8s model loading for status command
+        self._embedder = None
+        self._embedding_dim = None
+    @property
+    def embedding_dim(self) -> int:
+        """Get embedding dimension from the embedder."""
+        if self._embedding_dim is None:
+            # Access embedder to get its dimension
+            self._embedding_dim = self.embedder.embedding_dim
+        return self._embedding_dim
+    @property
+    def embedder(self):
+        """Lazy-load the embedder on first access using CodexLens config."""
+        if self._embedder is None:
+            # Use CodexLens configuration settings
+            backend = self._config.embedding_backend
+            model = self._config.embedding_model
+            use_gpu = self._config.embedding_use_gpu
+            # Use factory to create embedder based on backend type
+            if backend == "fastembed":
+                self._embedder = get_embedder_factory(
+                    backend="fastembed",
+                    profile=model,
+                    use_gpu=use_gpu
+                )
+            elif backend == "litellm":
+                # For litellm backend, also pass endpoints if configured
+                endpoints = self._config.embedding_endpoints
+                strategy = self._config.embedding_strategy
+                cooldown = self._config.embedding_cooldown
+                self._embedder = get_embedder_factory(
+                    backend="litellm",
+                    model=model,
+                    endpoints=endpoints if endpoints else None,
+                    strategy=strategy,
+                    cooldown=cooldown,
+                )
+            else:
+                # Fallback to fastembed with code profile
+                self._embedder = get_embedder_factory(
+                    backend="fastembed",
+                    profile="code",
+                    use_gpu=True
+                )
+        return self._embedder
+    def close(self):
+        """Close database connection."""
+        if self.conn:
+            self.conn.close()
+    def embed_chunks(
+        self,
+        source_id: Optional[str] = None,
+        batch_size: int = 8,
+        force: bool = False
+    ) -> Dict[str, Any]:
+        """
+        Generate embeddings for unembedded chunks.
+        Args:
+            source_id: Only process chunks from this source
+            batch_size: Number of chunks to process in each batch
+            force: Re-embed chunks that already have embeddings
+        Returns:
+            Result dict with success, chunks_processed, chunks_failed, elapsed_time
+        """
+        start_time = time.time()
+        # Build query
+        query = "SELECT id, source_id, source_type, chunk_index, content FROM memory_chunks"
+        params = []
+        if force:
+            # Process all chunks (with optional source filter)
+            if source_id:
+                query += " WHERE source_id = ?"
+                params.append(source_id)
+        else:
+            # Only process chunks without embeddings
+            query += " WHERE embedding IS NULL"
+            if source_id:
+                query += " AND source_id = ?"
+                params.append(source_id)
+        query += " ORDER BY id"
+        cursor = self.conn.cursor()
+        cursor.execute(query, params)
+        chunks_processed = 0
+        chunks_failed = 0
+        batch = []
+        batch_ids = []
+        for row in cursor:
+            batch.append(row["content"])
+            batch_ids.append(row["id"])
+            # Process batch when full
+            if len(batch) >= batch_size:
+                processed, failed = self._process_batch(batch, batch_ids)
+                chunks_processed += processed
+                chunks_failed += failed
+                batch = []
+                batch_ids = []
+        # Process remaining chunks
+        if batch:
+            processed, failed = self._process_batch(batch, batch_ids)
+            chunks_processed += processed
+            chunks_failed += failed
+        elapsed_time = time.time() - start_time
+        return {
+            "success": chunks_failed == 0,
+            "chunks_processed": chunks_processed,
+            "chunks_failed": chunks_failed,
+            "elapsed_time": round(elapsed_time, 2)
+        }
+    def _process_batch(self, texts: List[str], ids: List[int]) -> Tuple[int, int]:
+        """Process a batch of texts and update embeddings."""
+        try:
+            # Generate embeddings for batch
+            embeddings = self.embedder.embed(texts)
+            processed = 0
+            failed = 0
+            # Update database
+            cursor = self.conn.cursor()
+            for chunk_id, embedding in zip(ids, embeddings):
+                try:
+                    # Convert to numpy array and store as bytes
+                    emb_array = np.array(embedding, dtype=np.float32)
+                    emb_bytes = emb_array.tobytes()
+                    cursor.execute(
+                        "UPDATE memory_chunks SET embedding = ? WHERE id = ?",
+                        (emb_bytes, chunk_id)
+                    )
+                    processed += 1
+                except Exception as e:
+                    print(f"Error updating chunk {chunk_id}: {e}", file=sys.stderr)
+                    failed += 1
+            self.conn.commit()
+            return processed, failed
+        except Exception as e:
+            print(f"Error processing batch: {e}", file=sys.stderr)
+            return 0, len(ids)
+    def search(
+        self,
+        query: str,
+        top_k: int = 10,
+        min_score: float = 0.3,
+        source_type: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """
+        Perform semantic search on memory chunks.
+        Args:
+            query: Search query text
+            top_k: Number of results to return
+            min_score: Minimum similarity score (0-1)
+            source_type: Filter by source type (core_memory, workflow, cli_history)
+        Returns:
+            Result dict with success and matches list
+        """
+        try:
+            # Generate query embedding
+            query_embedding = self.embedder.embed_single(query)
+            query_array = np.array(query_embedding, dtype=np.float32)
+            # Build database query
+            sql = """
+                SELECT id, source_id, source_type, chunk_index, content, embedding
+                FROM memory_chunks
+                WHERE embedding IS NOT NULL
+            """
+            params = []
+            if source_type:
+                sql += " AND source_type = ?"
+                params.append(source_type)
+            cursor = self.conn.cursor()
+            cursor.execute(sql, params)
+            # Calculate similarities
+            matches = []
+            for row in cursor:
+                # Load embedding from bytes
+                emb_bytes = row["embedding"]
+                emb_array = np.frombuffer(emb_bytes, dtype=np.float32)
+                # Cosine similarity
+                score = float(
+                    np.dot(query_array, emb_array) /
+                    (np.linalg.norm(query_array) * np.linalg.norm(emb_array))
+                )
+                if score >= min_score:
+                    # Generate restore command
+                    restore_command = self._get_restore_command(
+                        row["source_id"],
+                        row["source_type"]
+                    )
+                    matches.append({
+                        "source_id": row["source_id"],
+                        "source_type": row["source_type"],
+                        "chunk_index": row["chunk_index"],
+                        "content": row["content"],
+                        "score": round(score, 4),
+                        "restore_command": restore_command
+                    })
+            # Sort by score and limit
+            matches.sort(key=lambda x: x["score"], reverse=True)
+            matches = matches[:top_k]
+            return {
+                "success": True,
+                "matches": matches
+            }
+        except Exception as e:
+            return {
+                "success": False,
+                "error": str(e),
+                "matches": []
+            }
+    def _get_restore_command(self, source_id: str, source_type: str) -> str:
+        """Generate restore command for a source."""
+        if source_type in ("core_memory", "cli_history"):
+            return f"ccw memory export {source_id}"
+        elif source_type == "workflow":
+            return f"ccw session resume {source_id}"
+        else:
+            return f"# Unknown source type: {source_type}"
+    def get_status(self) -> Dict[str, Any]:
+        """Get embedding status statistics."""
+        cursor = self.conn.cursor()
+        # Total chunks
+        cursor.execute("SELECT COUNT(*) as count FROM memory_chunks")
+        total_chunks = cursor.fetchone()["count"]
+        # Embedded chunks
+        cursor.execute("SELECT COUNT(*) as count FROM memory_chunks WHERE embedding IS NOT NULL")
+        embedded_chunks = cursor.fetchone()["count"]
+        # By type
+        cursor.execute("""
+            SELECT
+                source_type,
+                COUNT(*) as total,
+                SUM(CASE WHEN embedding IS NOT NULL THEN 1 ELSE 0 END) as embedded
+            FROM memory_chunks
+            GROUP BY source_type
+        """)
+        by_type = {}
+        for row in cursor:
+            by_type[row["source_type"]] = {
+                "total": row["total"],
+                "embedded": row["embedded"],
+                "pending": row["total"] - row["embedded"]
+            }
+        return {
+            "total_chunks": total_chunks,
+            "embedded_chunks": embedded_chunks,
+            "pending_chunks": total_chunks - embedded_chunks,
+            "by_type": by_type
+        }
+def main():
+    """Main entry point."""
+    parser = argparse.ArgumentParser(
+        description="Memory Embedder - Bridge CCW to CodexLens semantic search"
+    )
+    subparsers = parser.add_subparsers(dest="command", help="Command to execute")
+    subparsers.required = True
+    # Embed command
+    embed_parser = subparsers.add_parser("embed", help="Generate embeddings for chunks")
+    embed_parser.add_argument("db_path", help="Path to SQLite database")
+    embed_parser.add_argument("--source-id", help="Only process chunks from this source")
+    embed_parser.add_argument("--batch-size", type=int, default=8, help="Batch size (default: 8)")
+    embed_parser.add_argument("--force", action="store_true", help="Re-embed existing chunks")
+    # Search command
+    search_parser = subparsers.add_parser("search", help="Semantic search")
+    search_parser.add_argument("db_path", help="Path to SQLite database")
+    search_parser.add_argument("query", help="Search query")
+    search_parser.add_argument("--top-k", type=int, default=10, help="Number of results (default: 10)")
+    search_parser.add_argument("--min-score", type=float, default=0.3, help="Minimum score (default: 0.3)")
+    search_parser.add_argument("--type", dest="source_type", help="Filter by source type")
+    # Status command
+    status_parser = subparsers.add_parser("status", help="Get embedding status")
+    status_parser.add_argument("db_path", help="Path to SQLite database")
+    args = parser.parse_args()
+    try:
+        embedder = MemoryEmbedder(args.db_path)
+        if args.command == "embed":
+            result = embedder.embed_chunks(
+                source_id=args.source_id,
+                batch_size=args.batch_size,
+                force=args.force
+            )
+            print(json.dumps(result, indent=2))
+        elif args.command == "search":
+            result = embedder.search(
+                query=args.query,
+                top_k=args.top_k,
+                min_score=args.min_score,
+                source_type=args.source_type
+            )
+            print(json.dumps(result, indent=2))
+        elif args.command == "status":
+            result = embedder.get_status()
+            print(json.dumps(result, indent=2))
+        embedder.close()
+        # Exit with error code if operation failed
+        if "success" in result and not result["success"]:
+            # Clean up ONNX resources before exit
+            clear_embedder_cache()
+            sys.exit(1)
+        # Clean up ONNX resources to ensure process can exit cleanly
+        # This releases fastembed/ONNX Runtime threads that would otherwise
+        # prevent the Python interpreter from shutting down
+        clear_embedder_cache()
+    except Exception as e:
+        # Clean up ONNX resources even on error
+        try:
+            clear_embedder_cache()
+        except Exception:
+            pass
+        print(json.dumps({
+            "success": False,
+            "error": str(e)
+        }, indent=2), file=sys.stderr)
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

package/ccw/scripts/test_memory_embedder.py ADDED Viewed

@@ -0,0 +1,245 @@
+#!/usr/bin/env python3
+"""
+Test script for memory_embedder.py
+Creates a temporary database with test data and verifies all commands work.
+"""
+import json
+import sqlite3
+import tempfile
+import subprocess
+from pathlib import Path
+from datetime import datetime
+def create_test_database():
+    """Create a temporary database with test chunks."""
+    # Create temp file
+    temp_db = tempfile.NamedTemporaryFile(suffix='.db', delete=False)
+    temp_db.close()
+    conn = sqlite3.connect(temp_db.name)
+    cursor = conn.cursor()
+    # Create schema
+    cursor.execute("""
+        CREATE TABLE memory_chunks (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            source_id TEXT NOT NULL,
+            source_type TEXT NOT NULL,
+            chunk_index INTEGER NOT NULL,
+            content TEXT NOT NULL,
+            embedding BLOB,
+            metadata TEXT,
+            created_at TEXT NOT NULL,
+            UNIQUE(source_id, chunk_index)
+        )
+    """)
+    # Insert test data
+    test_chunks = [
+        ("CMEM-20250101-001", "core_memory", 0, "Implemented authentication using JWT tokens with refresh mechanism"),
+        ("CMEM-20250101-001", "core_memory", 1, "Added rate limiting to API endpoints using Redis"),
+        ("WFS-20250101-auth", "workflow", 0, "Created login endpoint with password hashing"),
+        ("WFS-20250101-auth", "workflow", 1, "Implemented session management with token rotation"),
+        ("CLI-20250101-001", "cli_history", 0, "Executed database migration for user table"),
+    ]
+    now = datetime.now().isoformat()
+    for source_id, source_type, chunk_index, content in test_chunks:
+        cursor.execute(
+            """
+            INSERT INTO memory_chunks (source_id, source_type, chunk_index, content, created_at)
+            VALUES (?, ?, ?, ?, ?)
+            """,
+            (source_id, source_type, chunk_index, content, now)
+        )
+    conn.commit()
+    conn.close()
+    return temp_db.name
+def run_command(args):
+    """Run memory_embedder.py with given arguments."""
+    script = Path(__file__).parent / "memory_embedder.py"
+    cmd = ["python", str(script)] + args
+    result = subprocess.run(
+        cmd,
+        capture_output=True,
+        text=True
+    )
+    return result.returncode, result.stdout, result.stderr
+def test_status(db_path):
+    """Test status command."""
+    print("Testing status command...")
+    returncode, stdout, stderr = run_command(["status", db_path])
+    if returncode != 0:
+        print(f"[FAIL] Status failed: {stderr}")
+        return False
+    result = json.loads(stdout)
+    expected_total = 5
+    if result["total_chunks"] != expected_total:
+        print(f"[FAIL] Expected {expected_total} chunks, got {result['total_chunks']}")
+        return False
+    if result["embedded_chunks"] != 0:
+        print(f"[FAIL] Expected 0 embedded chunks, got {result['embedded_chunks']}")
+        return False
+    print(f"[PASS] Status OK: {result['total_chunks']} total, {result['embedded_chunks']} embedded")
+    return True
+def test_embed(db_path):
+    """Test embed command."""
+    print("\nTesting embed command...")
+    returncode, stdout, stderr = run_command(["embed", db_path, "--batch-size", "2"])
+    if returncode != 0:
+        print(f"[FAIL] Embed failed: {stderr}")
+        return False
+    result = json.loads(stdout)
+    if not result["success"]:
+        print(f"[FAIL] Embed unsuccessful")
+        return False
+    if result["chunks_processed"] != 5:
+        print(f"[FAIL] Expected 5 processed, got {result['chunks_processed']}")
+        return False
+    if result["chunks_failed"] != 0:
+        print(f"[FAIL] Expected 0 failed, got {result['chunks_failed']}")
+        return False
+    print(f"[PASS] Embed OK: {result['chunks_processed']} processed in {result['elapsed_time']}s")
+    return True
+def test_search(db_path):
+    """Test search command."""
+    print("\nTesting search command...")
+    returncode, stdout, stderr = run_command([
+        "search", db_path, "authentication JWT",
+        "--top-k", "3",
+        "--min-score", "0.3"
+    ])
+    if returncode != 0:
+        print(f"[FAIL] Search failed: {stderr}")
+        return False
+    result = json.loads(stdout)
+    if not result["success"]:
+        print(f"[FAIL] Search unsuccessful: {result.get('error', 'Unknown error')}")
+        return False
+    if len(result["matches"]) == 0:
+        print(f"[FAIL] Expected at least 1 match, got 0")
+        return False
+    print(f"[PASS] Search OK: {len(result['matches'])} matches found")
+    # Show top match
+    top_match = result["matches"][0]
+    print(f"   Top match: {top_match['source_id']} (score: {top_match['score']})")
+    print(f"   Content: {top_match['content'][:60]}...")
+    return True
+def test_source_filter(db_path):
+    """Test search with source type filter."""
+    print("\nTesting source type filter...")
+    returncode, stdout, stderr = run_command([
+        "search", db_path, "authentication",
+        "--type", "workflow"
+    ])
+    if returncode != 0:
+        print(f"[FAIL] Filtered search failed: {stderr}")
+        return False
+    result = json.loads(stdout)
+    if not result["success"]:
+        print(f"[FAIL] Filtered search unsuccessful")
+        return False
+    # Verify all matches are workflow type
+    for match in result["matches"]:
+        if match["source_type"] != "workflow":
+            print(f"[FAIL] Expected workflow type, got {match['source_type']}")
+            return False
+    print(f"[PASS] Filter OK: {len(result['matches'])} workflow matches")
+    return True
+def main():
+    """Run all tests."""
+    print("Memory Embedder Test Suite")
+    print("=" * 60)
+    # Create test database
+    print("\nCreating test database...")
+    db_path = create_test_database()
+    print(f"[PASS] Database created: {db_path}")
+    try:
+        # Run tests
+        tests = [
+            ("Status", test_status),
+            ("Embed", test_embed),
+            ("Search", test_search),
+            ("Source Filter", test_source_filter),
+        ]
+        passed = 0
+        failed = 0
+        for name, test_func in tests:
+            try:
+                if test_func(db_path):
+                    passed += 1
+                else:
+                    failed += 1
+            except Exception as e:
+                print(f"[FAIL] {name} crashed: {e}")
+                failed += 1
+        # Summary
+        print("\n" + "=" * 60)
+        print(f"Results: {passed} passed, {failed} failed")
+        if failed == 0:
+            print("[PASS] All tests passed!")
+            return 0
+        else:
+            print("[FAIL] Some tests failed")
+            return 1
+    finally:
+        # Cleanup
+        import os
+        try:
+            os.unlink(db_path)
+            print(f"\n[PASS] Cleaned up test database")
+        except:
+            pass
+if __name__ == "__main__":
+    exit(main())

package/ccw/src/core/auth/csrf-middleware.ts CHANGED Viewed

@@ -113,7 +113,9 @@ export async function csrfValidation(ctx: CsrfMiddlewareContext): Promise<boolea
   const { pathname, req, res } = ctx;
   if (!pathname.startsWith('/api/')) return true;
-  if (envFlagEnabled('CCW_DISABLE_CSRF')) return true;
+  // CSRF is disabled by default for local deployment scenarios.
+  // Set CCW_ENABLE_CSRF=1 to enable CSRF protection.
+  if (!envFlagEnabled('CCW_ENABLE_CSRF')) return true;
   const method = (req.method || 'GET').toUpperCase();
   if (!['POST', 'PUT', 'PATCH', 'DELETE'].includes(method)) return true;