PyPI - cortexcode - Versions diffs - 0.1.0__py3-none-any.whl - Mend

cortexcode 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

cortexcode/__init__.py +3 -0
cortexcode/analysis.py +331 -0
cortexcode/cli.py +845 -0
cortexcode/context.py +298 -0
cortexcode/dashboard.py +152 -0
cortexcode/docs.py +1266 -0
cortexcode/git_diff.py +157 -0
cortexcode/indexer.py +1860 -0
cortexcode/lsp_server.py +315 -0
cortexcode/mcp_server.py +455 -0
cortexcode/plugins.py +188 -0
cortexcode/semantic_search.py +237 -0
cortexcode/vuln_scan.py +241 -0
cortexcode/watcher.py +122 -0
cortexcode/workspace.py +180 -0
cortexcode-0.1.0.dist-info/METADATA +448 -0
cortexcode-0.1.0.dist-info/RECORD +21 -0
cortexcode-0.1.0.dist-info/WHEEL +5 -0
cortexcode-0.1.0.dist-info/entry_points.txt +2 -0
cortexcode-0.1.0.dist-info/licenses/LICENSE +21 -0
cortexcode-0.1.0.dist-info/top_level.txt +1 -0

cortexcode/semantic_search.py ADDED Viewed

@@ -0,0 +1,237 @@
+"""Semantic search over symbols — find symbols by meaning, not just name."""
+import json
+import math
+import re
+from collections import Counter
+from pathlib import Path
+from typing import Any
+def tokenize(text: str) -> list[str]:
+    """Split text into lowercase tokens, splitting camelCase and snake_case."""
+    # Split camelCase
+    text = re.sub(r'([a-z])([A-Z])', r'\1 \2', text)
+    # Split snake_case and kebab-case
+    text = text.replace("_", " ").replace("-", " ").replace("/", " ").replace("\\", " ").replace(".", " ")
+    # Lowercase and split
+    tokens = [t.lower() for t in re.findall(r'[a-zA-Z]{2,}', text)]
+    return tokens
+# Common programming synonyms for semantic expansion
+_SYNONYMS = {
+    "auth": ["authentication", "authorize", "login", "signin", "credentials", "session", "token", "jwt"],
+    "authentication": ["auth", "login", "signin", "credentials"],
+    "login": ["auth", "signin", "authentication", "credentials"],
+    "handler": ["handle", "controller", "action", "endpoint", "route", "api"],
+    "controller": ["handler", "endpoint", "route"],
+    "database": ["db", "model", "entity", "schema", "orm", "query", "repository", "store"],
+    "model": ["entity", "schema", "database", "db"],
+    "user": ["account", "profile", "member", "customer"],
+    "create": ["add", "new", "insert", "post", "register", "save"],
+    "delete": ["remove", "destroy", "drop"],
+    "update": ["edit", "modify", "patch", "put", "save"],
+    "get": ["fetch", "read", "find", "query", "retrieve", "list", "load"],
+    "list": ["get", "fetch", "all", "index", "browse"],
+    "component": ["widget", "ui", "view", "page", "screen"],
+    "page": ["screen", "view", "route", "component"],
+    "api": ["endpoint", "route", "handler", "rest"],
+    "route": ["endpoint", "api", "path", "handler"],
+    "test": ["spec", "assert", "expect", "mock"],
+    "error": ["exception", "catch", "throw", "fail"],
+    "config": ["configuration", "settings", "options", "env"],
+    "nav": ["navigation", "menu", "sidebar", "header"],
+    "button": ["btn", "click", "action"],
+    "submit": ["send", "post", "save", "confirm"],
+    "validate": ["check", "verify", "assert", "sanitize"],
+    "search": ["find", "query", "filter", "lookup"],
+    "file": ["upload", "download", "document", "attachment"],
+    "notification": ["alert", "message", "toast", "notify"],
+    "schedule": ["calendar", "booking", "appointment", "time"],
+    "interview": ["meeting", "call", "session", "conversation"],
+    "candidate": ["applicant", "user", "profile"],
+    "job": ["position", "role", "posting", "vacancy"],
+}
+def expand_query(query_tokens: list[str]) -> list[str]:
+    """Expand query tokens with synonyms for better recall."""
+    expanded = list(query_tokens)
+    for token in query_tokens:
+        synonyms = _SYNONYMS.get(token, [])
+        for syn in synonyms:
+            if syn not in expanded:
+                expanded.append(syn)
+    return expanded
+def build_symbol_documents(index: dict) -> list[dict]:
+    """Build searchable documents from index symbols."""
+    docs = []
+    files = index.get("files", {})
+    call_graph = index.get("call_graph", {})
+    for rel_path, file_data in files.items():
+        if not isinstance(file_data, dict):
+            continue
+        for sym in file_data.get("symbols", []):
+            name = sym.get("name", "")
+            # Build a rich text representation for search
+            parts = [name]
+            parts.extend(sym.get("params", []))
+            parts.extend(sym.get("calls", []))
+            if sym.get("doc"):
+                parts.append(sym["doc"])
+            if sym.get("class"):
+                parts.append(sym["class"])
+            if sym.get("framework"):
+                parts.append(sym["framework"])
+            if sym.get("type"):
+                parts.append(sym["type"])
+            # Add file path context
+            parts.append(rel_path.replace("/", " ").replace("\\", " "))
+            text = " ".join(parts)
+            tokens = tokenize(text)
+            docs.append({
+                "name": name,
+                "type": sym.get("type"),
+                "file": rel_path,
+                "line": sym.get("line"),
+                "params": sym.get("params", []),
+                "calls": sym.get("calls", []),
+                "doc": sym.get("doc"),
+                "framework": sym.get("framework"),
+                "tokens": tokens,
+                "text": text,
+            })
+    return docs
+class TFIDFSearcher:
+    """Simple TF-IDF based semantic search (no external dependencies)."""
+    def __init__(self, documents: list[dict]):
+        self.documents = documents
+        self.idf: dict[str, float] = {}
+        self._build_idf()
+    def _build_idf(self):
+        """Compute inverse document frequency for all terms."""
+        n = len(self.documents)
+        if n == 0:
+            return
+        doc_freq: dict[str, int] = Counter()
+        for doc in self.documents:
+            unique_tokens = set(doc["tokens"])
+            for token in unique_tokens:
+                doc_freq[token] += 1
+        for token, df in doc_freq.items():
+            self.idf[token] = math.log(n / (1 + df))
+    def _tfidf_vector(self, tokens: list[str]) -> dict[str, float]:
+        """Compute TF-IDF vector for a token list."""
+        tf = Counter(tokens)
+        total = len(tokens) or 1
+        vector = {}
+        for token, count in tf.items():
+            tf_val = count / total
+            idf_val = self.idf.get(token, 0)
+            vector[token] = tf_val * idf_val
+        return vector
+    def _cosine_similarity(self, vec_a: dict[str, float], vec_b: dict[str, float]) -> float:
+        """Compute cosine similarity between two sparse vectors."""
+        common = set(vec_a.keys()) & set(vec_b.keys())
+        if not common:
+            return 0.0
+        dot = sum(vec_a[k] * vec_b[k] for k in common)
+        mag_a = math.sqrt(sum(v ** 2 for v in vec_a.values()))
+        mag_b = math.sqrt(sum(v ** 2 for v in vec_b.values()))
+        if mag_a == 0 or mag_b == 0:
+            return 0.0
+        return dot / (mag_a * mag_b)
+    def search(self, query: str, limit: int = 10) -> list[dict]:
+        """Search documents by semantic similarity to query."""
+        query_tokens = tokenize(query)
+        if not query_tokens:
+            return []
+        # Expand with synonyms
+        expanded_tokens = expand_query(query_tokens)
+        query_vec = self._tfidf_vector(expanded_tokens)
+        scored = []
+        for doc in self.documents:
+            doc_vec = self._tfidf_vector(doc["tokens"])
+            score = self._cosine_similarity(query_vec, doc_vec)
+            # Boost exact name matches
+            name_tokens = tokenize(doc.get("name", ""))
+            if any(qt in name_tokens for qt in query_tokens):
+                score += 0.5
+            # Boost partial name matches from expanded tokens
+            elif any(qt in name_tokens for qt in expanded_tokens):
+                score += 0.2
+            # Boost doc/framework matches
+            if doc.get("doc"):
+                doc_lower = doc["doc"].lower()
+                if any(qt in doc_lower for qt in query_tokens):
+                    score += 0.15
+            if score > 0.01:
+                scored.append((score, doc))
+        scored.sort(key=lambda x: x[0], reverse=True)
+        results = []
+        for score, doc in scored[:limit]:
+            results.append({
+                "name": doc["name"],
+                "type": doc["type"],
+                "file": doc["file"],
+                "line": doc["line"],
+                "params": doc["params"],
+                "calls": doc["calls"][:5] if doc["calls"] else [],
+                "doc": doc.get("doc"),
+                "framework": doc.get("framework"),
+                "score": round(score, 3),
+            })
+        return results
+def semantic_search(index_path: Path, query: str, limit: int = 10) -> dict[str, Any]:
+    """Run semantic search over the index.
+    Args:
+        index_path: Path to index.json
+        query: Natural language query (e.g. "authentication handler", "database models")
+        limit: Max results
+    Returns:
+        Dictionary with ranked results
+    """
+    index = json.loads(index_path.read_text(encoding="utf-8"))
+    documents = build_symbol_documents(index)
+    searcher = TFIDFSearcher(documents)
+    results = searcher.search(query, limit)
+    return {
+        "query": query,
+        "results": results,
+        "total_symbols": len(documents),
+    }

cortexcode/vuln_scan.py ADDED Viewed

@@ -0,0 +1,241 @@
+"""Dependency vulnerability scanning — check for known vulnerable packages."""
+import json
+import re
+from pathlib import Path
+from typing import Any
+def scan_dependencies(root: Path) -> dict[str, Any]:
+    """Scan project for dependency files and check for known issues.
+    Scans: package.json, requirements.txt, pyproject.toml, Gemfile, go.mod, Cargo.toml
+    """
+    root = Path(root).resolve()
+    results = {
+        "scanned_files": [],
+        "dependencies": [],
+        "warnings": [],
+    }
+    # package.json
+    pkg_json = root / "package.json"
+    if pkg_json.exists():
+        _scan_package_json(pkg_json, results)
+    # requirements.txt
+    req_txt = root / "requirements.txt"
+    if req_txt.exists():
+        _scan_requirements_txt(req_txt, results)
+    # pyproject.toml
+    pyproject = root / "pyproject.toml"
+    if pyproject.exists():
+        _scan_pyproject_toml(pyproject, results)
+    # go.mod
+    go_mod = root / "go.mod"
+    if go_mod.exists():
+        _scan_go_mod(go_mod, results)
+    # Cargo.toml
+    cargo = root / "Cargo.toml"
+    if cargo.exists():
+        _scan_cargo_toml(cargo, results)
+    # Check for common issues
+    _check_common_issues(root, results)
+    results["total_dependencies"] = len(results["dependencies"])
+    results["total_warnings"] = len(results["warnings"])
+    return results
+def _scan_package_json(path: Path, results: dict) -> None:
+    """Scan package.json for dependencies."""
+    try:
+        data = json.loads(path.read_text(encoding="utf-8"))
+        results["scanned_files"].append(str(path.name))
+        for section in ("dependencies", "devDependencies"):
+            deps = data.get(section, {})
+            for name, version in deps.items():
+                dep = {
+                    "name": name,
+                    "version": version,
+                    "source": "package.json",
+                    "dev": section == "devDependencies",
+                }
+                results["dependencies"].append(dep)
+                # Check for wildcard/any versions
+                if version in ("*", "latest", ""):
+                    results["warnings"].append({
+                        "package": name,
+                        "severity": "medium",
+                        "message": f"Unpinned version '{version}' — use a specific version range",
+                    })
+                # Check for known risky patterns
+                _check_npm_warnings(name, version, results)
+    except (json.JSONDecodeError, OSError):
+        pass
+def _scan_requirements_txt(path: Path, results: dict) -> None:
+    """Scan requirements.txt."""
+    try:
+        results["scanned_files"].append(str(path.name))
+        for line in path.read_text(encoding="utf-8").splitlines():
+            line = line.strip()
+            if not line or line.startswith("#") or line.startswith("-"):
+                continue
+            # Parse name==version or name>=version
+            match = re.match(r'^([a-zA-Z0-9_-]+)\s*([><=!~]+)?\s*(.*)$', line)
+            if match:
+                name = match.group(1)
+                op = match.group(2) or ""
+                version = match.group(3) or "unpinned"
+                results["dependencies"].append({
+                    "name": name,
+                    "version": f"{op}{version}" if op else version,
+                    "source": "requirements.txt",
+                    "dev": False,
+                })
+                if not op:
+                    results["warnings"].append({
+                        "package": name,
+                        "severity": "medium",
+                        "message": "No version constraint — pin to a specific version",
+                    })
+    except OSError:
+        pass
+def _scan_pyproject_toml(path: Path, results: dict) -> None:
+    """Scan pyproject.toml for dependencies."""
+    try:
+        content = path.read_text(encoding="utf-8")
+        results["scanned_files"].append(str(path.name))
+        # Simple TOML parsing for dependencies array
+        in_deps = False
+        for line in content.splitlines():
+            stripped = line.strip()
+            if stripped.startswith("dependencies") and "=" in stripped:
+                in_deps = True
+                continue
+            if in_deps:
+                if stripped == "]":
+                    in_deps = False
+                    continue
+                # Parse "package>=version"
+                match = re.search(r'"([^"]+)"', stripped)
+                if match:
+                    dep_str = match.group(1)
+                    dep_match = re.match(r'^([a-zA-Z0-9_-]+)\s*([><=!~]+)?\s*(.*)$', dep_str)
+                    if dep_match:
+                        results["dependencies"].append({
+                            "name": dep_match.group(1),
+                            "version": f"{dep_match.group(2) or ''}{dep_match.group(3) or 'unpinned'}",
+                            "source": "pyproject.toml",
+                            "dev": False,
+                        })
+    except OSError:
+        pass
+def _scan_go_mod(path: Path, results: dict) -> None:
+    """Scan go.mod."""
+    try:
+        content = path.read_text(encoding="utf-8")
+        results["scanned_files"].append("go.mod")
+        for line in content.splitlines():
+            line = line.strip()
+            if line.startswith("require") or line.startswith(")") or line.startswith("("):
+                continue
+            parts = line.split()
+            if len(parts) >= 2 and "/" in parts[0]:
+                results["dependencies"].append({
+                    "name": parts[0],
+                    "version": parts[1],
+                    "source": "go.mod",
+                    "dev": False,
+                })
+    except OSError:
+        pass
+def _scan_cargo_toml(path: Path, results: dict) -> None:
+    """Scan Cargo.toml."""
+    try:
+        content = path.read_text(encoding="utf-8")
+        results["scanned_files"].append("Cargo.toml")
+        in_deps = False
+        for line in content.splitlines():
+            stripped = line.strip()
+            if stripped == "[dependencies]":
+                in_deps = True
+                continue
+            elif stripped.startswith("["):
+                in_deps = False
+                continue
+            if in_deps and "=" in stripped:
+                parts = stripped.split("=", 1)
+                name = parts[0].strip()
+                version = parts[1].strip().strip('"')
+                results["dependencies"].append({
+                    "name": name,
+                    "version": version,
+                    "source": "Cargo.toml",
+                    "dev": False,
+                })
+    except OSError:
+        pass
+def _check_npm_warnings(name: str, version: str, results: dict) -> None:
+    """Check for commonly known risky npm patterns."""
+    # Check for http:// or git:// protocol in version
+    if version.startswith("http://") or version.startswith("git://"):
+        results["warnings"].append({
+            "package": name,
+            "severity": "high",
+            "message": "Insecure protocol in dependency URL",
+        })
+def _check_common_issues(root: Path, results: dict) -> None:
+    """Check for common security issues in the project."""
+    # .env file committed
+    env_file = root / ".env"
+    if env_file.exists():
+        gitignore = root / ".gitignore"
+        if gitignore.exists():
+            gi_content = gitignore.read_text(encoding="utf-8", errors="ignore")
+            if ".env" not in gi_content:
+                results["warnings"].append({
+                    "package": ".env",
+                    "severity": "high",
+                    "message": ".env file exists but is not in .gitignore — secrets may be exposed",
+                })
+        else:
+            results["warnings"].append({
+                "package": ".env",
+                "severity": "high",
+                "message": ".env file exists with no .gitignore — secrets may be exposed",
+            })
+    # package-lock.json missing
+    if (root / "package.json").exists() and not (root / "package-lock.json").exists() and not (root / "yarn.lock").exists():
+        results["warnings"].append({
+            "package": "lockfile",
+            "severity": "medium",
+            "message": "No lockfile (package-lock.json or yarn.lock) — builds may not be reproducible",
+        })

cortexcode/watcher.py ADDED Viewed

@@ -0,0 +1,122 @@
+"""File Watcher - Auto-reindex on file changes."""
+import time
+from pathlib import Path
+from watchdog.events import FileSystemEventHandler, FileSystemEvent
+from watchdog.observers import Observer
+from cortexcode import indexer
+class IndexEventHandler(FileSystemEventHandler):
+    """Handler that re-indexes files on change."""
+    def __init__(self, root_path: Path, debounce_seconds: float = 1.0):
+        self.root_path = root_path
+        self.index_path = root_path / ".cortexcode" / "index.json"
+        self.debounce_seconds = debounce_seconds
+        self.last_index_time = 0.0
+        self.pending_files: set[str] = set()
+        self.verbose = False
+    def on_modified(self, event: FileSystemEvent) -> None:
+        if event.is_directory:
+            return
+        if not self._should_index(event.src_path):
+            return
+        self.pending_files.add(event.src_path)
+        self._maybe_reindex()
+    def on_created(self, event: FileSystemEvent) -> None:
+        if event.is_directory:
+            return
+        if not self._should_index(event.src_path):
+            return
+        self.pending_files.add(event.src_path)
+        self._maybe_reindex()
+    def on_deleted(self, event: FileSystemEvent) -> None:
+        if event.is_directory:
+            return
+        self._maybe_reindex()
+    def _should_index(self, path: str) -> bool:
+        """Check if file should be indexed."""
+        path_obj = Path(path)
+        ext = path_obj.suffix.lower()
+        if ext not in CodeIndexer.SUPPORTED_EXTENSIONS:
+            return False
+        ignore_patterns = {
+            "__pycache__", ".git", ".venv", "venv", "node_modules",
+            ".pytest_cache", ".mypy_cache", ".ruff_cache", ".cortexcode"
+        }
+        path_str = str(path_obj)
+        return not any(pattern in path_str for pattern in ignore_patterns)
+    def _maybe_reindex(self) -> None:
+        """Debounced reindex."""
+        now = time.time()
+        if now - self.last_index_time < self.debounce_seconds:
+            return
+        if not self.pending_files:
+            return
+        self.pending_files.clear()
+        self._reindex()
+    def _reindex(self) -> None:
+        """Perform the reindex."""
+        try:
+            index_data = indexer.index_directory(self.root_path)
+            indexer.save_index(index_data, self.index_path)
+            if self.verbose:
+                print(f"[CortexCode] Re-indexed at {time.strftime('%H:%M:%S')}")
+            else:
+                print(".", end="", flush=True)
+            self.last_index_time = time.time()
+        except Exception as e:
+            print(f"\n[CortexCode] Error re-indexing: {e}")
+def start_watcher(root_path: Path, verbose: bool = False) -> None:
+    """Start watching a directory for changes.
+    Args:
+        root_path: Directory to watch
+        verbose: Print file change events
+    """
+    root_path = Path(root_path).resolve()
+    index_path = root_path / ".cortexcode" / "index.json"
+    if not index_path.exists():
+        print("[CortexCode] No index found. Run 'cortexcode index' first.")
+        return
+    event_handler = IndexEventHandler(root_path)
+    event_handler.verbose = verbose
+    observer = Observer()
+    observer.schedule(event_handler, str(root_path), recursive=True)
+    observer.start()
+    try:
+        while True:
+            time.sleep(1)
+    except KeyboardInterrupt:
+        observer.stop()
+    observer.join()
+# Import SUPPORTED_EXTENSIONS from indexer
+from cortexcode.indexer import CodeIndexer