PyPI - codeguard-pro - Versions diffs - 0.3.0__py3-none-any.whl - Mend

codeguard-pro 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

agent_analyzer.py +291 -0
autofix.py +146 -0
cli.py +465 -0
codeguard_pro-0.3.0.dist-info/METADATA +398 -0
codeguard_pro-0.3.0.dist-info/RECORD +15 -0
codeguard_pro-0.3.0.dist-info/WHEEL +5 -0
codeguard_pro-0.3.0.dist-info/entry_points.txt +2 -0
codeguard_pro-0.3.0.dist-info/top_level.txt +10 -0
hook.py +107 -0
learning_loop.py +168 -0
secret_scanner.py +273 -0
server.py +544 -0
supply_chain.py +744 -0
tools_review.py +283 -0
tools_security.py +668 -0

agent_analyzer.py ADDED Viewed

@@ -0,0 +1,291 @@
+"""AI-powered deep security analysis [BETA] — MiniMax 2.7 direct API.
+Cost: ~$0.02 per scan. Regex does fast pass, LLM catches taint flow + obfuscation.
+"""
+import json, os, re, requests
+from typing import Dict, List, Optional
+from tools_security import scan_security
+MINIMAX_HOST = os.environ.get("MINIMAX_API_HOST", "https://api.minimax.io").rstrip("/")
+MINIMAX_URL = f"{MINIMAX_HOST}/v1/text/chatcompletion_v2"
+MODEL = "MiniMax-M2.7"
+_LAST_PROVIDER_ERROR = ""
+def _load_api_key() -> Optional[str]:
+    """Load MiniMax API key with a legacy fallback env var."""
+    return os.environ.get("MINIMAX_API_KEY") or os.environ.get("OPENROUTER_API_KEY")
+def _set_provider_error(message: str) -> None:
+    global _LAST_PROVIDER_ERROR
+    _LAST_PROVIDER_ERROR = message
+def _get_provider_error() -> str:
+    return _LAST_PROVIDER_ERROR or "Provider error"
+def _missing_key_message(default_message: str) -> str:
+    """Prefer a concrete provider error if one exists; otherwise show missing-key guidance."""
+    return _get_provider_error() if _LAST_PROVIDER_ERROR else default_message
+def _call_minimax(system: str, user: str, json_mode: bool = True) -> Optional[str]:
+    api_key = _load_api_key()
+    if not api_key:
+        _set_provider_error("Missing MINIMAX_API_KEY")
+        return None
+    _set_provider_error("")
+    payload = {"model": MODEL, "messages": [{"role": "system", "content": system}, {"role": "user", "content": user}], "temperature": 0.1, "max_tokens": 4096}
+    try:
+        resp = requests.post(MINIMAX_URL, headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}, json=payload, timeout=30)
+        if resp.status_code != 200:
+            body = resp.text[:300].replace("\n", " ").strip()
+            _set_provider_error(f"MiniMax API returned {resp.status_code}: {body}")
+            return None
+        data = resp.json()
+        try:
+            return data["choices"][0]["message"]["content"]
+        except Exception:
+            _set_provider_error(f"MiniMax response missing content: {json.dumps(data)[:300]}")
+            return None
+    except Exception as e:
+        _set_provider_error(f"MiniMax request failed: {e.__class__.__name__}: {e}")
+        return None
+def _strip_codeblock(text: str) -> str:
+    """Strip ```json ... ``` wrappers that MiniMax adds to responses."""
+    t = text.strip()
+    if t.startswith("```"):
+        t = t.split("\n", 1)[1] if "\n" in t else t[3:]
+    if t.endswith("```"):
+        t = t[:-3]
+    return t.strip()
+def _parse_regex_report(regex_report: str) -> List[Dict]:
+    """Extract structured findings from the human-readable regex report."""
+    findings = []
+    for line in regex_report.split("\n"):
+        m = re.match(r'\[(\w+)\]\s+(\S+?)(?:\s+\(line\s+(\d+)\))?$', line.strip())
+        if m:
+            findings.append({
+                "severity": m.group(1),
+                "category": m.group(2),
+                "line": int(m.group(3)) if m.group(3) else None,
+                "source": "[REGEX]",
+            })
+    return findings
+def _looks_like_setup_behavior(code: str) -> bool:
+    lower = code.lower()
+    indicators = (
+        "setup(",
+        "pyproject",
+        "cmdclass",
+        "setuptools.command.install",
+        "base64.b64decode",
+        "exec(",
+        "os.system",
+        "subprocess",
+        "urllib.request",
+        "requests.post",
+        "os.environ",
+    )
+    return any(token in lower for token in indicators)
+def _should_escalate(
+    code: str,
+    regex_findings: List[Dict],
+    explain_requested: bool = False,
+) -> Dict:
+    """Decide whether MiniMax should be invoked after deterministic triage."""
+    reasons = []
+    high_risk_categories = {
+        "sql-injection", "command-injection", "ssrf", "path-traversal",
+        "deserialization", "email-injection", "template-injection",
+        "header-injection", "ldap-injection",
+    }
+    if explain_requested:
+        reasons.append("user-requested explanation")
+    if any(
+        f.get("severity") in {"CRITICAL", "HIGH"} and f.get("category") in high_risk_categories
+        for f in regex_findings
+    ):
+        reasons.append("high-risk regex finding needs deeper analysis")
+    if _looks_like_setup_behavior(code):
+        reasons.append("behavioral setup/install pattern")
+    if not regex_findings and (
+        "getattr(" in code or "base64.b64decode" in code or "os.environ" in code or "urllib.request.urlopen" in code
+    ):
+        reasons.append("suspicious behavior without deterministic hit")
+    return {"should_escalate": bool(reasons), "reasons": reasons}
+def _augment_crypto_indirection(code: str) -> List[Dict]:
+    """Catch a narrow set of crypto-indirection cases that merit deep review.
+    This is not a replacement for the LLM. It is a targeted augment for cases
+    where the code clearly hides crypto intent through indirection and the model
+    may return an empty finding list.
+    """
+    findings = []
+    lower = code.lower()
+    if "getattr(" in code and "generate_" in code and "private_key" in code and "rsa" in lower:
+        findings.append({
+            "severity": "CRITICAL",
+            "category": "crypto-indirection",
+            "line": None,
+            "message": "Indirect RSA private key generation via getattr hides quantum-vulnerable crypto usage.",
+            "taint_chain": "rsa module -> getattr(...) -> generate_private_key",
+            "source": "[AI-AUGMENT]",
+        })
+    if "jwt.encode" in code and ("rs" in lower or "es" in lower) and ("algorithm = f" in lower or "algorithm=f" in lower or 'f"{prefix}{suffix}"' in lower):
+        findings.append({
+            "severity": "HIGH",
+            "category": "crypto-indirection",
+            "line": None,
+            "message": "JWT algorithm is assembled dynamically, hiding RSA/ECDSA usage behind string construction.",
+            "taint_chain": "prefix/suffix -> algorithm -> jwt.encode",
+            "source": "[AI-AUGMENT]",
+        })
+    return findings
+TAINT_PROMPT = """Expert security reviewer. Find what REGEX MISSES:
+1. TAINT FLOW: user input (request.args/form/POST, req.body, input()) through variables to dangerous sinks (exec, eval, os.system, subprocess, render_template_string, send_mail, cursor.execute, redirect, smtplib, ldap)
+2. BEHAVIORAL: base64+exec, network calls in setup.py, credential harvesting, obfuscation
+3. CRYPTO INDIRECTION: constructed algorithm names, getattr-based crypto calls, string-built JWT algorithms, indirect RSA/ECDSA/JWT usage hidden through variables or concatenation
+3. CONTEXT: lower severity if in tests
+Return JSON: {"findings": [{"severity":"CRITICAL|HIGH|MEDIUM|LOW","category":"taint-flow|behavioral|obfuscation|crypto-indirection","line":null,"message":"...","taint_chain":"source->sink"}]}
+Empty if clean. No speculation."""
+def deep_analyze(code: str, language: str = "python") -> Dict:
+    regex_report = scan_security(code, language)
+    regex_findings = _parse_regex_report(regex_report)
+    result = {"regex_findings": len(regex_findings), "ai_findings": 0, "ai_enabled": False, "model": MODEL, "findings": regex_findings}
+    llm = _call_minimax(TAINT_PROMPT, f"Lang: {language}\n```\n{code}\n```")
+    if not llm:
+        result["ai_note"] = _missing_key_message("Set MINIMAX_API_KEY for AI taint analysis") if not _load_api_key() else _get_provider_error()
+        return result
+    result["ai_enabled"] = True
+    try:
+        ai = json.loads(_strip_codeblock(llm)).get("findings", [])
+        seen = {(f.get("line"), f.get("category")) for f in regex_findings}
+        for f in ai:
+            if (f.get("line"), f.get("category")) not in seen:
+                f["source"] = "[AI-BETA]"
+                result["findings"].append(f)
+        if not any(f.get("source") == "[AI-BETA]" for f in result["findings"]):
+            result["findings"].extend(_augment_crypto_indirection(code))
+        result["ai_findings"] = len([f for f in result["findings"] if f.get("source") in {"[AI-BETA]", "[AI-AUGMENT]"}])
+    except Exception:
+        result["ai_note"] = "AI malformed response — regex only"
+    return result
+SETUP_PROMPT = """Supply chain analyst. Is this setup.py MALICIOUS?
+MALICIOUS: os.system/subprocess/exec/eval, network calls during install, credential harvesting, obfuscated payloads
+SUSPICIOUS: unusual imports (socket/base64/ctypes), external binary downloads
+SAFE: normal setup
+Return JSON: {"verdict":"SAFE|SUSPICIOUS|MALICIOUS","explanation":"2 sentences","findings":[{"severity":"CRITICAL|HIGH","message":"...","line":null}]}"""
+def analyze_setup_py(code: str) -> Dict:
+    llm = _call_minimax(SETUP_PROMPT, f"```python\n{code}\n```")
+    if not llm:
+        return {"verdict": "UNKNOWN", "explanation": _missing_key_message("Set MINIMAX_API_KEY") if not _load_api_key() else _get_provider_error(), "findings": []}
+    try:
+        return json.loads(_strip_codeblock(llm))
+    except Exception:
+        return {"verdict": "UNKNOWN", "explanation": "Malformed response", "findings": []}
+EXPLAIN_PROMPT = """Security educator. Given a finding + code, explain in <150 words:
+1. WHAT the vulnerability is
+2. HOW to exploit (concrete steps)
+3. IMPACT
+4. FIX (exact code change)
+No jargon. Write for devs."""
+def explain_vulnerability(finding: str, code_context: str) -> str:
+    llm = _call_minimax(EXPLAIN_PROMPT, f"Finding: {finding}\nCode:\n```\n{code_context}\n```", json_mode=False)
+    return llm or (_missing_key_message("Set MINIMAX_API_KEY") if not _load_api_key() else _get_provider_error())
+def smart_analyze(
+    code: str,
+    language: str = "python",
+    explain_requested: bool = False,
+    record_learning: bool = False,
+    title: str = "",
+    source: str = "manual",
+    expected_behavior: str = "",
+) -> Dict:
+    """Fast path first, then escalate to AI only when justified.
+    This is the orchestration entry point for CodeGuard's layered architecture:
+    deterministic scanners run first, MiniMax is invoked only for incomplete or
+    suspicious cases, and novel misses can be stored in the learning corpus.
+    """
+    regex_report = scan_security(code, language)
+    regex_findings = _parse_regex_report(regex_report)
+    escalation = _should_escalate(code, regex_findings, explain_requested=explain_requested)
+    result = {
+        "path": "fast-path",
+        "language": language,
+        "regex_report": regex_report,
+        "regex_findings": len(regex_findings),
+        "findings": regex_findings[:],
+        "escalation": escalation,
+        "ai_enabled": False,
+        "ai_findings": 0,
+        "model": MODEL,
+    }
+    if not escalation["should_escalate"]:
+        return result
+    result["path"] = "escalation"
+    if _looks_like_setup_behavior(code):
+        setup_result = analyze_setup_py(code)
+        result["setup_analysis"] = setup_result
+        if setup_result.get("verdict") not in {"UNKNOWN", "SAFE"}:
+            result["ai_enabled"] = True
+            result["ai_findings"] = len(setup_result.get("findings", []))
+            result["findings"].extend([
+                {"source": "[AI-BETA]", **f} for f in setup_result.get("findings", [])
+            ])
+    else:
+        ai_result = deep_analyze(code, language)
+        result["ai_enabled"] = ai_result.get("ai_enabled", False)
+        result["ai_findings"] = ai_result.get("ai_findings", 0)
+        result["findings"] = ai_result.get("findings", result["findings"])
+        if ai_result.get("ai_note"):
+            result["ai_note"] = ai_result["ai_note"]
+    if record_learning:
+        try:
+            from learning_loop import record_candidate
+            if result["ai_findings"] > 0 or (result["escalation"]["should_escalate"] and not result["regex_findings"]):
+                learning_result = record_candidate(
+                    title=title or "suspicious-sample",
+                    code=code,
+                    language=language,
+                    source=source,
+                    expected_behavior=expected_behavior,
+                    regex_findings=[f for f in result["findings"] if f.get("source") == "[REGEX]"],
+                    ai_findings=[f for f in result["findings"] if f.get("source") == "[AI-BETA]"],
+                )
+                result["learning"] = learning_result
+        except Exception as e:
+            result["learning_error"] = str(e)
+    return result

autofix.py ADDED Viewed

@@ -0,0 +1,146 @@
+"""LLM-powered auto-fix engine for CodeGuard Pro.
+Takes code + security findings, generates exact fixes via MiniMax M2.7.
+Falls back to pattern-based fixes when LLM is unavailable.
+"""
+import json, os, re, urllib.request, urllib.error
+from dataclasses import dataclass
+from typing import List, Optional
+from secret_scanner import SecretFinding, scan_secrets
+@dataclass(frozen=True)
+class AutofixResult:
+    """Immutable result from the autofix engine."""
+    original_code: str
+    fixed_code: str
+    explanation: str
+    used_llm: bool
+    findings_fixed: int
+def _load_api_key() -> Optional[str]:
+    """Load MiniMax API key from env or /root/ai-factory/.env."""
+    key = os.environ.get("MINIMAX_API_KEY")
+    if key:
+        return key
+    env_path = "/root/ai-factory/.env"
+    if os.path.exists(env_path):
+        with open(env_path) as f:
+            for line in f:
+                if line.strip().startswith("MINIMAX_API_KEY="):
+                    return line.strip().split("=", 1)[1].strip("'\" \n")
+    return None
+def _call_minimax(code: str, findings: List[SecretFinding]) -> Optional[str]:
+    """Call MiniMax M2.7 to generate fixed code. Returns fixed code or None."""
+    api_key = _load_api_key()
+    if not api_key:
+        return None
+    findings_text = "\n".join(
+        f"- Line {f.line}: {f.secret_type} ({f.severity}) — {f.fix}" for f in findings
+    )
+    messages = [
+        {"role": "system", "content": (
+            "You are a security-focused code fixer. Return ONLY the fixed code. "
+            "No markdown fences, no explanation. Preserve formatting, comments, and logic. "
+            "Only change lines with security issues. Add 'import os' if needed."
+        )},
+        {"role": "user", "content": (
+            f"Fix these security issues:\n\nFINDINGS:\n{findings_text}\n\nCODE:\n{code}"
+        )},
+    ]
+    payload = json.dumps({
+        "model": "MiniMax-M2.7", "messages": messages,
+        "temperature": 0.1, "max_tokens": 4096,
+    }).encode()
+    req = urllib.request.Request(
+        "https://api.minimaxi.chat/v1/text/chatcompletion_v2",
+        data=payload, method="POST",
+        headers={"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            data = json.loads(resp.read().decode())
+        content = data["choices"][0]["message"]["content"]
+        content = re.sub(r"^```\w*\n", "", content)
+        content = re.sub(r"\n```$", "", content)
+        return content.strip()
+    except (urllib.error.URLError, KeyError, IndexError, json.JSONDecodeError):
+        return None
+# --- Pattern-based fallback fixes ---
+_SECRET_TO_ENV = {
+    "OpenAI API Key": "OPENAI_API_KEY", "Anthropic API Key": "ANTHROPIC_API_KEY",
+    "GitHub Token": "GITHUB_TOKEN", "GitHub OAuth": "GITHUB_OAUTH_TOKEN",
+    "Google API Key": "GOOGLE_API_KEY", "Stripe Secret Key": "STRIPE_SECRET_KEY",
+    "Slack Token": "SLACK_TOKEN", "MiniMax API Key": "MINIMAX_API_KEY",
+    "SendGrid Key": "SENDGRID_API_KEY", "Supabase Key": "SUPABASE_KEY",
+    "Vercel Token": "VERCEL_TOKEN", "Generic API Key": "API_KEY",
+    "Hardcoded Password": "APP_PASSWORD", "Database URL": "DATABASE_URL",
+    "AWS Access Key": "AWS_ACCESS_KEY_ID",
+}
+def _fallback_fix(code: str, findings: List[SecretFinding]) -> str:
+    """Apply pattern-based fixes when LLM is unavailable."""
+    lines, needs_os = code.split("\n"), False
+    for f in findings:
+        if f.line < 1 or f.line > len(lines):
+            continue
+        env_name = _SECRET_TO_ENV.get(f.secret_type)
+        if not env_name:
+            m = re.search(r'os\.environ\["([^"]+)"\]', f.fix)
+            env_name = m.group(1) if m else f.secret_type.upper().replace(" ", "_")
+        replaced = re.sub(r"""(['"])[^\s'"]{8,}\1""", f'os.environ["{env_name}"]', lines[f.line - 1], count=1)
+        if replaced != lines[f.line - 1]:
+            needs_os = True
+            lines[f.line - 1] = replaced
+    # Fix SQL injection: f-string in cursor.execute -> parameterized
+    for i, line in enumerate(lines):
+        if "cursor.execute" in line and ("f'" in line or 'f"' in line):
+            fixed = re.sub(
+                r'''f(["\'])(.*?)\{(\w+)\}(.*?)\1''',
+                lambda m: f'{m.group(1)}{m.group(2)}?{m.group(4)}{m.group(1)}, ({m.group(3)},)', line)
+            if fixed != line:
+                lines[i] = fixed
+    result = "\n".join(lines)
+    if needs_os and not re.search(r"^import os\b", result, re.MULTILINE):
+        result = "import os\n" + result
+    return result
+def autofix(code: str, findings: Optional[List[SecretFinding]] = None) -> AutofixResult:
+    """Generate fixes for security findings in code."""
+    if findings is None:
+        findings = scan_secrets(code)
+    if not findings:
+        return AutofixResult(original_code=code, fixed_code=code,
+                             explanation="No security issues found.", used_llm=False, findings_fixed=0)
+    llm_result = _call_minimax(code, findings)
+    if llm_result:
+        return AutofixResult(original_code=code, fixed_code=llm_result,
+                             explanation=f"Fixed {len(findings)} finding(s) using MiniMax M2.7.",
+                             used_llm=True, findings_fixed=len(findings))
+    fixed = _fallback_fix(code, findings)
+    return AutofixResult(original_code=code, fixed_code=fixed,
+                         explanation=f"Fixed {len(findings)} finding(s) using pattern-based fallback.",
+                         used_llm=False, findings_fixed=len(findings))
+if __name__ == "__main__":
+    sample = (
+        'import requests\n\n'
+        'API_KEY = "sk-proj-abc123def456ghi789jkl012mno345"\n'
+        'DB_URL = "postgres://admin:supersecret@db.example.com:5432/prod"\n'
+        'password = "hunter2isMyP@ss!"\n\n'
+        'def get_data():\n'
+        '    headers = {"Authorization": f"Bearer {API_KEY}"}\n'
+        '    return requests.get("https://api.example.com/data", headers=headers)\n'
+    )
+    print("=" * 60)
+    print("CodeGuard Pro — Auto-Fix Engine Demo")
+    print("=" * 60)
+    print(f"\nORIGINAL CODE:\n{sample}")
+    result = autofix(sample)
+    engine = "MiniMax M2.7" if result.used_llm else "Pattern-based fallback"
+    print(f"Engine: {engine} | Findings fixed: {result.findings_fixed}")
+    print(f"\nFIXED CODE:\n{result.fixed_code}")
+    print(f"\n{result.explanation}")