PyPI - icsf-cli - Versions diffs - 0.1.0__py3-none-any.whl - Mend

icsf-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

backend/__init__.py +7 -0
backend/cli.py +202 -0
backend/cli_api.py +409 -0
backend/config.py +76 -0
backend/diag_auth.py +16 -0
backend/logging_config.py +18 -0
backend/main.py +1644 -0
icsf_cli-0.1.0.dist-info/METADATA +1095 -0
icsf_cli-0.1.0.dist-info/RECORD +12 -0
icsf_cli-0.1.0.dist-info/WHEEL +5 -0
icsf_cli-0.1.0.dist-info/entry_points.txt +2 -0
icsf_cli-0.1.0.dist-info/top_level.txt +1 -0

backend/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""
+ICSF backend package.
+This file makes the `backend` directory importable as a Python package so we
+can expose a first-class CLI entrypoint (`icsf`) via `backend.cli:main`.
+"""

backend/cli.py ADDED Viewed

@@ -0,0 +1,202 @@
+"""
+ICSF Command-Line Interface entrypoint.
+This module exposes a small CLI wrapper around the core `cli_api` helpers.
+Users install the package and then run:
+    icsf run --credentials backend/credentials.yaml --csv report.csv --auto-pr --run-tests
+to execute an end-to-end flow that mirrors the web application:
+mapping → baseline testing → fixing → validation testing → PR creation.
+"""
+from __future__ import annotations
+import argparse
+import asyncio
+import logging
+from pathlib import Path
+from typing import Optional
+from . import cli_api
+def _configure_logging(verbose: bool) -> None:
+    level = logging.DEBUG if verbose else logging.INFO
+    logging.basicConfig(
+        level=level,
+        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    )
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prog="icsf",
+        description="ICSF – Intelligent Code Security & Fixing Platform (CLI)",
+    )
+    subparsers = parser.add_subparsers(dest="command", required=True)
+    # End-to-end command: map → baseline → fix → validate → PR.
+    run_parser = subparsers.add_parser(
+        "run",
+        help="Run end-to-end mapping, fixing, testing, and PR creation.",
+    )
+    run_parser.add_argument(
+        "--credentials",
+        type=str,
+        default=None,
+        help=(
+            "Path to credentials YAML containing GitHub token/username/email. "
+            "If omitted, backend/credentials.yaml is used."
+        ),
+    )
+    run_parser.add_argument(
+        "--csv",
+        type=str,
+        required=True,
+        help="Path to vulnerability CSV report.",
+    )
+    run_parser.add_argument(
+        "--auto-pr",
+        action="store_true",
+        help="Automatically create a single batch PR per repository.",
+    )
+    run_parser.add_argument(
+        "--no-auto-pr",
+        dest="auto_pr",
+        action="store_false",
+        help="Do not create PRs (default is off unless --auto-pr is set).",
+    )
+    run_parser.set_defaults(auto_pr=False)
+    run_parser.add_argument(
+        "--run-tests",
+        action="store_true",
+        help="Run baseline + validation testing via Atlas.",
+    )
+    run_parser.add_argument(
+        "--no-run-tests",
+        dest="run_tests",
+        action="store_false",
+        help="Skip all testing (baseline & validation).",
+    )
+    run_parser.set_defaults(run_tests=True)
+    run_parser.add_argument(
+        "--repo-filter",
+        type=str,
+        default=None,
+        help=(
+            "Only process repositories whose name or full_name contains this "
+            "substring (case-insensitive)."
+        ),
+    )
+    run_parser.add_argument(
+        "--output",
+        type=str,
+        default=None,
+        help="Optional path to write a full JSON report of the run.",
+    )
+    run_parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="Enable verbose logging for debugging.",
+    )
+    return parser
+def _handle_run(args: argparse.Namespace) -> int:
+    _configure_logging(verbose=args.verbose)
+    logger = logging.getLogger("icsf.cli")
+    credentials_path: Optional[Path]
+    if args.credentials:
+        credentials_path = Path(args.credentials).resolve()
+    else:
+        # Fallback to backend/credentials.yaml relative to this file.
+        credentials_path = (
+            Path(__file__).resolve().parent / "credentials.yaml"
+        )
+    csv_path = Path(args.csv).resolve()
+    if not csv_path.exists():
+        logger.error("CSV file not found: %s", csv_path)
+        return 1
+    logger.info("ICSF CLI – end-to-end run starting")
+    logger.info("Credentials: %s", credentials_path)
+    logger.info("CSV report: %s", csv_path)
+    async def _run() -> int:
+        try:
+            summary = await cli_api.run_end_to_end_cli(
+                credentials_path=credentials_path,
+                csv_path=csv_path,
+                auto_create_pr=args.auto_pr,
+                run_tests=args.run_tests,
+                repo_filter=args.repo_filter,
+                min_severity=None,  # TODO: wire severity filtering if desired
+            )
+        except Exception as exc:  # pragma: no cover - defensive
+            logger.exception("End-to-end run failed: %s", exc)
+            return 1
+        total_repos = summary.get("total_repos_with_vulns", 0)
+        processed = summary.get("total_repos_processed", 0)
+        logger.info(
+            "End-to-end run completed – mapped repos: %d, processed repos: %d",
+            total_repos,
+            processed,
+        )
+        # Print a very small human-readable summary to stdout.
+        print("\n=== ICSF CLI Summary ===")
+        print(f"Repositories with vulnerabilities: {total_repos}")
+        print(f"Repositories processed:           {processed}")
+        # Surface PR URLs if present.
+        results = summary.get("results", []) or []
+        pr_count = 0
+        for repo_result in results:
+            repo = repo_result.get("repo", {}) or {}
+            full_name = repo.get("full_name") or repo.get("name")
+            batch_results = repo_result.get("batch_results") or {}
+            batch_pr = batch_results.get("batch_pr_result") or {}
+            if batch_pr.get("success"):
+                pr_count += 1
+                pr_url = batch_pr.get("pr_url", "N/A")
+                pr_number = batch_pr.get("pr_number", "N/A")
+                print(f"- {full_name}: PR #{pr_number} -> {pr_url}")
+        if pr_count == 0 and args.auto_pr:
+            print("No PRs were created (auto-pr enabled but none succeeded).")
+        # Optionally write full JSON report.
+        if args.output:
+            out_path = Path(args.output).resolve()
+            out_path.write_text(cli_api.to_pretty_json(summary), encoding="utf-8")
+            print(f"\nFull report written to: {out_path}")
+        return 0
+    return asyncio.run(_run())
+def main(argv: Optional[list[str]] = None) -> int:
+    parser = build_parser()
+    args = parser.parse_args(argv)
+    if args.command == "run":
+        return _handle_run(args)
+    # Fallback – should not be reached because subparsers are required.
+    parser.print_help()
+    return 1
+if __name__ == "__main__":  # pragma: no cover
+    raise SystemExit(main())

backend/cli_api.py ADDED Viewed

@@ -0,0 +1,409 @@
+"""
+cli_api.py
+Core Python API used by the ICSF CLI.
+This module is intentionally **web-framework agnostic** – it reuses the same
+services that the FastAPI backend uses (GitHubService, VulnerabilityService,
+BatchFixService, Atlas testing service) but exposes them as simple functions
+that can be called from a command-line entrypoint.
+"""
+from __future__ import annotations
+import asyncio
+import json
+import logging
+import sys
+from dataclasses import asdict
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+import yaml
+# Ensure the backend directory is on sys.path so absolute imports like
+# `config`, `services.*`, and `models.*` work the same way they do in main.py
+BACKEND_DIR = Path(__file__).resolve().parent
+if str(BACKEND_DIR) not in sys.path:
+    sys.path.append(str(BACKEND_DIR))
+from config import Config
+from services.github_service import GitHubService
+from services.vulnerability_service import VulnerabilityService
+from services.bedrock_service import BedrockService
+from services.batch_fix_service import BatchFixService
+logger = logging.getLogger(__name__)
+def load_github_credentials_from_file(
+    credentials_path: Optional[Path],
+) -> Tuple[str, Optional[str], Optional[str]]:
+    """
+    Load GitHub token/username/email for CLI usage.
+    Precedence:
+    1. Explicit credentials YAML path if provided
+    2. Fallback to Config.get_github_credentials() (backend/credentials.yaml)
+    """
+    token: Optional[str]
+    username: Optional[str]
+    email: Optional[str]
+    if credentials_path is not None:
+        if not credentials_path.exists():
+            raise FileNotFoundError(
+                f"Credentials file not found: {credentials_path}"
+            )
+        raw = credentials_path.read_text(encoding="utf-8")
+        data = yaml.safe_load(raw) or {}
+        gh = data.get("github", {}) or {}
+        token = gh.get("token")
+        username = gh.get("username")
+        email = gh.get("email")
+        logger.info(
+            "Loaded GitHub credentials from %s (username=%s, email=%s)",
+            credentials_path,
+            username or "N/A",
+            email or "N/A",
+        )
+    else:
+        creds = Config.get_github_credentials(force_reload=True)
+        token = creds.get("token")
+        username = creds.get("username")
+        email = creds.get("email")
+        logger.info(
+            "Loaded GitHub credentials from default backend/credentials.yaml "
+            "(username=%s, email=%s)",
+            username or "N/A",
+            email or "N/A",
+        )
+    if not token:
+        raise RuntimeError(
+            "GitHub token is required but was not found in credentials."
+        )
+    return token, username, email
+async def fetch_repositories_for_cli(
+    token: str,
+    username: Optional[str],
+    email: Optional[str],
+) -> List[Dict[str, Any]]:
+    """
+    Fetch repositories the same way the FastAPI endpoint does, but for CLI use.
+    """
+    github_service = GitHubService(token)
+    # Resolve which identifier to use.
+    if username:
+        final_username = username
+        logger.info("Using GitHub username: %s", final_username)
+    elif email:
+        logger.info(
+            "Username not provided; resolving from email via GitHub API: %s",
+            email,
+        )
+        final_username = await github_service.get_username_from_email(email)
+        if not final_username:
+            raise RuntimeError(
+                "Unable to resolve GitHub username from email; "
+                "provide username in credentials.yaml or via CLI."
+            )
+        logger.info("Resolved username from email: %s", final_username)
+    else:
+        # As a fallback, rely on the authenticated user info.
+        logger.info(
+            "Neither username nor email provided; using authenticated user."
+        )
+        user_info = await github_service.verify_token_and_get_user(None)
+        final_username = user_info.get("login")
+        if not final_username:
+            raise RuntimeError(
+                "Authenticated GitHub user could not be determined."
+            )
+    logger.info("Verifying GitHub token and fetching user info for %s", final_username)
+    user_info = await github_service.verify_token_and_get_user(final_username)
+    authenticated_username = user_info.get("login") or final_username
+    logger.info("Authenticated as: %s", authenticated_username)
+    repos = await github_service.get_all_repositories(
+        final_username,
+        include_private=True,
+        authenticated_username=authenticated_username,
+        include_orgs=True,
+    )
+    logger.info("Fetched %d repositories from GitHub", len(repos))
+    return repos
+def map_vulnerabilities_from_csv_for_cli(
+    csv_path: Path,
+    repositories: List[Dict[str, Any]],
+) -> Dict[str, Any]:
+    """
+    Parse a vulnerability CSV and map vulnerabilities to repositories/files.
+    This reuses VulnerabilityService.parse_csv_file and
+    VulnerabilityService.map_vulnerabilities_to_repos, mirroring the
+    `/api/vulnerabilities/map` endpoint, but runs entirely in-process.
+    """
+    if not csv_path.exists():
+        raise FileNotFoundError(f"CSV file not found: {csv_path}")
+    file_bytes = csv_path.read_bytes()
+    df, error = VulnerabilityService.parse_csv_file(file_bytes, csv_path.name)
+    if error:
+        raise RuntimeError(f"Error parsing CSV: {error}")
+    if df is None or df.empty:
+        raise RuntimeError("CSV file is empty or invalid")
+    # For the initial CLI implementation we let VulnerabilityService clone
+    # repositories on demand for accurate file-path matching.
+    mapped_vulns, unmatched_vulns = VulnerabilityService.map_vulnerabilities_to_repos(
+        vulnerabilities_df=df,
+        repositories=repositories,
+        repo_files_map=None,
+        clone_repos=True,
+    )
+    # Normalize keys to strings for JSON-compatibility.
+    mapped_response: Dict[str, Any] = {}
+    for repo_id, data in mapped_vulns.items():
+        mapped_response[str(repo_id)] = {
+            "repo": data["repo"],
+            "vulnerabilities": data["vulnerabilities"],
+        }
+    return {
+        "mapped_vulnerabilities": mapped_response,
+        "unmatched_vulnerabilities": unmatched_vulns,
+        "total_mapped": len(mapped_response),
+        "total_unmatched": len(unmatched_vulns),
+    }
+def _ensure_repo_cloned_for_cli(
+    repo: Dict[str, Any],
+    token: Optional[str],
+    backend_root: Path,
+) -> Path:
+    """
+    Ensure a repository is cloned locally and return the local path.
+    This mirrors the cloning logic used in the FastAPI `/api/fixes/batch`
+    endpoint, but is safe for CLI use.
+    """
+    from subprocess import run, CalledProcessError
+    repo_name = repo.get("name") or "repo"
+    full_name = repo.get("full_name") or repo_name
+    clone_url = repo.get("clone_url") or repo.get("html_url", "")
+    if not clone_url:
+        raise RuntimeError(f"No clone URL available for repository: {full_name}")
+    temp_clone_dir = backend_root / "temp_cloned_repos" / repo_name
+    temp_clone_dir.parent.mkdir(parents=True, exist_ok=True)
+    git_dir = temp_clone_dir / ".git"
+    if temp_clone_dir.exists() and git_dir.exists():
+        logger.info(
+            "Reusing existing clone for %s at %s", full_name, temp_clone_dir
+        )
+        return temp_clone_dir
+    # Normalize HTTPS clone URL and inject token if available.
+    repo_clone_url = clone_url
+    if "github.com" in repo_clone_url and not repo_clone_url.endswith(".git"):
+        repo_clone_url = repo_clone_url + ".git"
+    if token and repo_clone_url.startswith("https://") and "github.com" in repo_clone_url:
+        # https://TOKEN@github.com/owner/repo.git
+        repo_clone_url = repo_clone_url.replace("https://", f"https://{token}@")
+    if temp_clone_dir.exists():
+        # Stale/non-git directory – remove it before cloning.
+        import shutil
+        shutil.rmtree(temp_clone_dir, ignore_errors=True)
+    logger.info("Cloning %s into %s ...", repo_clone_url, temp_clone_dir)
+    try:
+        completed = run(
+            [
+                "git",
+                "clone",
+                "--depth",
+                "1",
+                "--single-branch",
+                "--filter=blob:none",
+                "--quiet",
+                repo_clone_url,
+                str(temp_clone_dir),
+            ],
+            capture_output=True,
+            text=True,
+            timeout=120,
+            shell=False,
+        )
+    except CalledProcessError as e:
+        raise RuntimeError(f"Failed to clone repository: {e}") from e
+    except Exception as e:  # pragma: no cover - defensive
+        raise RuntimeError(f"Failed to clone repository: {e}") from e
+    if completed.returncode != 0:
+        raise RuntimeError(
+            f"Failed to clone repository {full_name}: {completed.stderr or completed.stdout}"
+        )
+    logger.info("Clone complete for %s", full_name)
+    return temp_clone_dir
+async def run_end_to_end_for_repo(
+    repo: Dict[str, Any],
+    vulnerabilities: List[Dict[str, Any]],
+    token: str,
+    *,
+    auto_create_pr: bool,
+    run_tests: bool,
+    base_branch: str = "main",
+) -> Dict[str, Any]:
+    """
+    Run the full batch pipeline for a single repository:
+    - clone (or reuse clone)
+    - baseline testing (Atlas)
+    - multi-agent fixes (BatchFixService)
+    - validation testing
+    - PR creation (single batch PR)
+    """
+    backend_root = Path(__file__).resolve().parent
+    repo_path = _ensure_repo_cloned_for_cli(repo, token, backend_root)
+    bedrock_cfg = Config.get_bedrock_config()
+    bedrock_service = BedrockService(
+        access_key=bedrock_cfg["access_key"],
+        secret_key=bedrock_cfg["secret_key"],
+        region=bedrock_cfg["region"],
+    )
+    batch_service = BatchFixService(bedrock_service)
+    repo_name: str = repo.get("name", "")
+    full_name: str = repo.get("full_name", repo_name)
+    owner: Optional[str] = None
+    if full_name and "/" in full_name:
+        owner = full_name.split("/")[0]
+    results = await batch_service.fix_batch_vulnerabilities(
+        vulnerabilities=vulnerabilities,
+        repo_path=str(repo_path),
+        repo_name=repo_name,
+        clone_url=repo.get("clone_url") or repo.get("html_url", ""),
+        token=token,
+        max_concurrent=2,
+        auto_create_pr=auto_create_pr,
+        repo_owner=owner,
+        base_branch=base_branch,
+        run_tests_after_fix=run_tests,
+    )
+    return {
+        "repo": repo,
+        "batch_results": results,
+        "local_repo_path": str(repo_path),
+    }
+async def run_end_to_end_cli(
+    credentials_path: Optional[Path],
+    csv_path: Path,
+    *,
+    auto_create_pr: bool,
+    run_tests: bool,
+    repo_filter: Optional[str] = None,
+    min_severity: Optional[str] = None,
+) -> Dict[str, Any]:
+    """
+    High-level orchestration used by `icsf run`:
+    - load credentials
+    - fetch repositories
+    - map CSV vulnerabilities to repos/files
+    - for each repo (optionally filtered), run batch fixes + tests + PR
+    """
+    token, username, email = load_github_credentials_from_file(credentials_path)
+    repos = await fetch_repositories_for_cli(token, username, email)
+    mapping = map_vulnerabilities_from_csv_for_cli(csv_path, repos)
+    mapped = mapping.get("mapped_vulnerabilities", {})
+    repo_id_to_repo: Dict[int, Dict[str, Any]] = {}
+    for r in repos:
+        if "id" in r:
+            repo_id_to_repo[int(r["id"])] = r
+    # Optionally filter by repo name / full_name substring.
+    def _repo_matches_filter(repo_dict: Dict[str, Any]) -> bool:
+        if not repo_filter:
+            return True
+        name = (repo_dict.get("name") or "").lower()
+        full_name = (repo_dict.get("full_name") or "").lower()
+        return repo_filter.lower() in name or repo_filter.lower() in full_name
+    end_to_end_results: List[Dict[str, Any]] = []
+    total_fixable_repos = 0
+    # Iterate over mapped repos and run the full batch pipeline for each.
+    for repo_id_str, payload in mapped.items():
+        try:
+            repo_id = int(repo_id_str)
+        except ValueError:
+            continue
+        repo = repo_id_to_repo.get(repo_id) or payload.get("repo")
+        if not repo:
+            continue
+        if not _repo_matches_filter(repo):
+            continue
+        vulnerabilities = payload.get("vulnerabilities", [])
+        if not vulnerabilities:
+            continue
+        total_fixable_repos += 1
+        logger.info(
+            "Running end-to-end pipeline for repository %s with %d mapped vulnerabilities",
+            repo.get("full_name") or repo.get("name"),
+            len(vulnerabilities),
+        )
+        repo_result = await run_end_to_end_for_repo(
+            repo=repo,
+            vulnerabilities=vulnerabilities,
+            token=token,
+            auto_create_pr=auto_create_pr,
+            run_tests=run_tests,
+        )
+        end_to_end_results.append(repo_result)
+    summary = {
+        "total_repos_with_vulns": len(mapped),
+        "total_repos_processed": total_fixable_repos,
+        "mapping": mapping,
+        "repos": repos,
+        "results": end_to_end_results,
+    }
+    return summary
+def to_pretty_json(data: Any) -> str:
+    """Helper for dumping CLI reports."""
+    def _default(o: Any) -> Any:
+        try:
+            return asdict(o)  # dataclasses
+        except Exception:
+            return str(o)
+    return json.dumps(data, indent=2, default=_default)

backend/config.py ADDED Viewed

@@ -0,0 +1,76 @@
+import os
+import yaml
+from pathlib import Path
+from dotenv import load_dotenv
+# Load environment variables from .env file
+# This is usually done at the project level, but main.py and tests also do it.
+# We do it here as well to ensure attributes are populated when imported.
+load_dotenv(override=True)
+class Config:
+    # AWS Credentials
+    AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID", "").strip() or None
+    AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY", "").strip() or None
+    AWS_REGION = os.getenv("AWS_REGION", "us-east-1").strip() or "us-east-1"
+    AWS_SESSION_TOKEN = os.getenv("AWS_SESSION_TOKEN", "").strip() or None
+    # Model IDs
+    BEDROCK_MODEL_ID = os.getenv("BEDROCK_MODEL_ID", "").strip() or "anthropic.claude-3-5-sonnet-20240620-v1:0"
+    BEDROCK_EMBED_MODEL_ID = os.getenv("BEDROCK_EMBED_MODEL_ID", "").strip() or "amazon.titan-embed-text-v1"
+    @staticmethod
+    def get_github_credentials(force_reload=False):
+        """
+        Load GitHub credentials from credentials.yaml.
+        """
+        import logging
+        logger = logging.getLogger(__name__)
+        # In a real scenario, force_reload might clear a cache, but here we just read the file.
+        credentials_path = Path(__file__).parent / "credentials.yaml"
+        if not credentials_path.exists():
+            logger.warning(f"GitHub credentials file not found at: {credentials_path}")
+            return {"token": None, "username": None, "email": None}
+        try:
+            with open(credentials_path, "r") as f:
+                data = yaml.safe_load(f)
+                github_data = data.get("github", {})
+                token = github_data.get("token")
+                logger.info(f"Successfully loaded GitHub credentials from {credentials_path}")
+                if token:
+                    logger.info(f"GitHub token loaded (ends with: ...{token[-4:] if len(token) > 4 else '***'})")
+                else:
+                    logger.warning("GitHub token is missing in credentials.yaml")
+                return {
+                    "token": token,
+                    "username": github_data.get("username"),
+                    "email": github_data.get("email")
+                }
+        except Exception as e:
+            logger.error(f"Error loading GitHub credentials from {credentials_path}: {str(e)}")
+            return {"token": None, "username": None, "email": None}
+    @staticmethod
+    def validate_bedrock_credentials():
+        """
+        Validate that AWS credentials are provided.
+        Returns (is_valid, error_msg).
+        """
+        if not Config.AWS_ACCESS_KEY_ID or not Config.AWS_SECRET_ACCESS_KEY:
+            return False, "Missing AWS credentials. Please check your .env file."
+        return True, ""
+    @staticmethod
+    def get_bedrock_config():
+        """
+        Return Bedrock configuration as a dictionary.
+        """
+        return {
+            "access_key": Config.AWS_ACCESS_KEY_ID,
+            "secret_key": Config.AWS_SECRET_ACCESS_KEY,
+            "region": Config.AWS_REGION
+        }