PyPI - pandoraspec - Versions diffs - 0.1.1__py3-none-any.whl → 0.2.7__py3-none-any.whl - Mend

pandoraspec 0.1.1py3-none-any.whl → 0.2.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

pandoraspec/cli.py +28 -20
pandoraspec/config.py +23 -0
pandoraspec/constants.py +17 -0
pandoraspec/core.py +52 -329
pandoraspec/modules/__init__.py +0 -0
pandoraspec/modules/drift.py +180 -0
pandoraspec/modules/resilience.py +174 -0
pandoraspec/modules/security.py +234 -0
pandoraspec/orchestrator.py +69 -0
pandoraspec/reporting/__init__.py +0 -0
pandoraspec/reporting/generator.py +111 -0
pandoraspec/{reporting.py → reporting/templates.py} +10 -88
pandoraspec/seed.py +181 -0
pandoraspec/utils/__init__.py +0 -0
pandoraspec/utils/logger.py +21 -0
pandoraspec/utils/parsing.py +35 -0
pandoraspec/utils/url.py +23 -0
pandoraspec-0.2.7.dist-info/METADATA +200 -0
pandoraspec-0.2.7.dist-info/RECORD +23 -0
pandoraspec-0.2.7.dist-info/entry_points.txt +2 -0
pandoraspec-0.1.1.dist-info/METADATA +0 -72
pandoraspec-0.1.1.dist-info/RECORD +0 -9
pandoraspec-0.1.1.dist-info/entry_points.txt +0 -2
{pandoraspec-0.1.1.dist-info → pandoraspec-0.2.7.dist-info}/WHEEL +0 -0
{pandoraspec-0.1.1.dist-info → pandoraspec-0.2.7.dist-info}/top_level.txt +0 -0

pandoraspec/modules/drift.py ADDED Viewed

@@ -0,0 +1,180 @@
+import html
+from schemathesis import checks
+from schemathesis.specs.openapi import checks as oai_checks
+from schemathesis.checks import CheckContext, ChecksConfig
+from urllib.parse import unquote
+from ..seed import SeedManager
+from ..utils.logger import logger
+def run_drift_check(schema, base_url: str, api_key: str, seed_manager: SeedManager) -> list[dict]:
+    """
+    Module A: The 'Docs vs. Code' Drift Check (The Integrity Test)
+    Uses schemathesis to verify if the API implementation matches the spec.
+    """
+    results = []
+    # Mapping check names to actual functions
+    check_map = {
+        "not_a_server_error": checks.not_a_server_error,
+        "status_code_conformance": oai_checks.status_code_conformance,
+        "response_schema_conformance": oai_checks.response_schema_conformance
+    }
+    check_names = list(check_map.keys())
+    # Schemathesis 4.x checks require a context object
+    checks_config = ChecksConfig()
+    check_ctx = CheckContext(
+        override=None,
+        auth=None,
+        headers=None,
+        config=checks_config,
+        transport_kwargs=None,
+    )
+    for op in schema.get_all_operations():
+        # Handle Result type (Ok/Err) wrapping if present
+        operation = op.ok() if hasattr(op, "ok") else op
+        try:
+            # Generate test case
+            try:
+                case = operation.as_strategy().example()
+            except (AttributeError, Exception):
+                try:
+                    cases = list(operation.make_case())
+                    case = cases[0] if cases else None
+                except (AttributeError, Exception):
+                    case = None
+            if not case:
+                continue
+            seeded_keys = seed_manager.apply_seed_data(case) or set()
+            formatted_path = operation.path
+            if case.path_parameters:
+                for key, value in case.path_parameters.items():
+                        if key in seeded_keys:
+                            display_value = unquote(str(value))
+                        else:
+                            display_value = "random"
+                        formatted_path = formatted_path.replace(f"{{{key}}}", f"{{{key}:{display_value}}}")
+            logger.info(f"AUDIT LOG: Testing endpoint {operation.method.upper()} {formatted_path}")
+            headers = {}
+            if api_key:
+                auth_header = api_key if api_key.lower().startswith("bearer ") else f"Bearer {api_key}"
+                headers["Authorization"] = auth_header
+            # Call the API
+            target_url = f"{base_url.rstrip('/')}/{formatted_path.lstrip('/')}"
+            logger.debug(f"AUDIT LOG: Calling {operation.method.upper()} {target_url}")
+            response = case.call(base_url=base_url, headers=headers)
+            logger.debug(f"AUDIT LOG: Response Status Code: {response.status_code}")
+            # We manually call the check function to ensure arguments are passed correctly.
+            for check_name in check_names:
+                check_func = check_map[check_name]
+                try:
+                    # Direct call: check_func(ctx, response, case)
+                    check_func(check_ctx, response, case)
+                    # If we get here, the check passed
+                    results.append({
+                        "module": "A",
+                        "endpoint": f"{operation.method.upper()} {operation.path}",
+                        "issue": f"{check_name} - Passed",
+                        "status": "PASS",
+                        "severity": "INFO",
+                        "details": f"Status: {response.status_code}"
+                    })
+                except AssertionError as e:
+                    # This catches actual drift (e.g., Schema validation failed)
+                    # Capture and format detailed error info
+                    validation_errors = []
+                    # Safely get causes if they exist and are iterable
+                    causes = getattr(e, "causes", None)
+                    if causes:
+                        for cause in causes:
+                            msg = cause.message if hasattr(cause, "message") else str(cause)
+                            # START: Loose DateTime Check
+                            # If strict validation fails on date-time, try to be forgiving
+                            if "is not a 'date-time'" in msg:
+                                try:
+                                    # Extract value from message: "'2023-10-25 12:00:00' is not a 'date-time'"
+                                    val_str = msg.split("'")[1]
+                                    from datetime import datetime
+                                    # specific check for the common "Space instead of T" issue
+                                    normalized = val_str.replace(" ", "T")
+                                    # check for likely valid formats that jsonschema hates
+                                    datetime.fromisoformat(normalized)
+                                    # If we parsed it, it's a False Positive for our purposes (drift is minor)
+                                    logger.info(f"AUDIT LOG: Ignoring strict date-time failure for plausible value: {val_str}")
+                                    continue
+                                except Exception:
+                                    pass
+                            # END: Loose DateTime Check
+                            validation_errors.append(msg)
+                    if not validation_errors:
+                        # If we filtered everything out, consider it a PASS
+                        if causes:
+                             results.append({
+                                "module": "A",
+                                "endpoint": f"{operation.method.upper()} {operation.path}",
+                                "issue": f"{check_name} - Passed (Loose Validation)",
+                                "status": "PASS",
+                                "severity": "INFO",
+                                "details": f"Status: {response.status_code}. Ignored minor format mismatches."
+                            })
+                             continue
+                        validation_errors.append(str(e) or "Validation failed")
+                    err_msg = "<br>".join(validation_errors)
+                    safe_err = html.escape(err_msg)
+                    # Add helpful context (Status & Body Preview)
+                    context_msg = f"Status: {response.status_code}"
+                    try:
+                        if response.content:
+                            preview = response.text[:500]
+                            safe_preview = html.escape(preview)
+                            context_msg += f"<br>Response: {safe_preview}"
+                    except Exception:
+                        pass
+                    full_details = f"<strong>Error:</strong> {safe_err}<br><br><strong>Context:</strong><br>{context_msg}"
+                    logger.warning(f"AUDIT LOG: Validation {check_name} failed: {err_msg}")
+                    results.append({
+                        "module": "A",
+                        "endpoint": f"{operation.method.upper()} {operation.path}",
+                        "issue": f"Schema Drift Detected ({check_name})",
+                        "status": "FAIL",
+                        "details": full_details,
+                        "severity": "HIGH"
+                    })
+                except Exception as e:
+                    # This catches unexpected coding errors
+                    logger.error(f"AUDIT LOG: Error executing check {check_name}: {str(e)}")
+                    results.append({
+                        "module": "A",
+                        "endpoint": f"{operation.method.upper()} {operation.path}",
+                        "issue": f"Check Execution Error ({check_name})",
+                        "status": "FAIL",
+                        "details": str(e),
+                        "severity": "HIGH"
+                    })
+        except Exception as e:
+            logger.critical(f"AUDIT LOG: Critical Error during endpoint test: {str(e)}")
+            continue
+    return results

pandoraspec/modules/resilience.py ADDED Viewed

@@ -0,0 +1,174 @@
+import time
+from ..seed import SeedManager
+from ..constants import (
+    FLOOD_REQUEST_COUNT,
+    LATENCY_THRESHOLD_WARN,
+    RECOVERY_WAIT_TIME,
+    HTTP_429_TOO_MANY_REQUESTS,
+    HTTP_500_INTERNAL_SERVER_ERROR
+)
+from ..utils.logger import logger
+def run_resilience_tests(schema, base_url: str, api_key: str, seed_manager: SeedManager) -> list[dict]:
+    """
+    Module B: The 'Resilience' Stress Test (Art. 24 & 25)
+    Checks for Rate Limiting, Latency degradation, and Recovery.
+    """
+    results = []
+    ops = list(schema.get_all_operations())
+    if not ops:
+        return []
+    logger.info("AUDIT LOG: Starting Module B: Resilience Stress Test (flooding requests)...")
+    operation = ops[0].ok() if hasattr(ops[0], "ok") else ops[0]
+    # Simulate flooding
+    responses = []
+    latencies = []
+    for _ in range(FLOOD_REQUEST_COUNT):
+        try:
+            case = operation.as_strategy().example()
+        except (AttributeError, Exception):
+            try:
+                cases = list(operation.make_case())
+                case = cases[0] if cases else None
+            except (AttributeError, Exception):
+                case = None
+        if case:
+            seed_manager.apply_seed_data(case)
+            headers = {}
+            if api_key:
+                auth_header = api_key if api_key.lower().startswith("bearer ") else f"Bearer {api_key}"
+                headers["Authorization"] = auth_header
+            try:
+                resp = case.call(base_url=base_url, headers=headers)
+                responses.append(resp)
+                # Capture latency if available
+                if hasattr(resp, 'elapsed'):
+                    if hasattr(resp.elapsed, 'total_seconds'):
+                        latencies.append(resp.elapsed.total_seconds())
+                    elif isinstance(resp.elapsed, (int, float)):
+                        latencies.append(float(resp.elapsed))
+                    else:
+                        latencies.append(0.0)
+                else:
+                    latencies.append(0.0)
+            except Exception as e:
+                logger.warning(f"Request failed during flood: {e}")
+    has_429 = any(r.status_code == HTTP_429_TOO_MANY_REQUESTS for r in responses)
+    has_500 = any(r.status_code == HTTP_500_INTERNAL_SERVER_ERROR for r in responses)
+    avg_latency = sum(latencies) / len(latencies) if latencies else 0.0
+    # Recovery Check (Circuit Breaker)
+    logger.info(f"Waiting {RECOVERY_WAIT_TIME}s for circuit breaker recovery check...")
+    time.sleep(RECOVERY_WAIT_TIME)
+    recovery_failed = False
+    try:
+        # Attempt one probe request to see if API is back to normal
+        # We regenerate a case to be safe
+        try:
+            recovery_case = operation.as_strategy().example()
+        except:
+             cases = list(operation.make_case())
+             recovery_case = cases[0] if cases else None
+        if recovery_case:
+            seed_manager.apply_seed_data(recovery_case)
+            rec_headers = {}
+            if api_key:
+                rec_headers["Authorization"] = api_key if api_key.lower().startswith("bearer ") else f"Bearer {api_key}"
+            recovery_resp = recovery_case.call(base_url=base_url, headers=rec_headers)
+            # If it returns 500, it's definitely NOT recovered
+            if recovery_resp.status_code == HTTP_500_INTERNAL_SERVER_ERROR:
+                recovery_failed = True
+            # Note: We count 429 as "still waiting" but not a crash, so technically "recovered" from error state?
+            # Ideally it should be 200, but ensuring no 500 is the critical resilience check here.
+    except Exception:
+        # Connection error means it's down
+        recovery_failed = True
+    # Helper to create consistent result objects
+    def _create_result(issue, status, details, severity):
+        return {
+            "module": "B",
+            "issue": issue,
+            "status": status,
+            "details": details,
+            "severity": severity
+        }
+    # 1. Rate Limiting Check
+    if has_429:
+         results.append(_create_result(
+            "Rate Limiting Functional",
+            "PASS",
+            f"The API correctly returned {HTTP_429_TOO_MANY_REQUESTS} Too Many Requests when flooded.",
+            "INFO"
+        ))
+    else:
+        results.append(_create_result(
+            "No Rate Limiting Enforced",
+            "FAIL",
+            f"The API did not return {HTTP_429_TOO_MANY_REQUESTS} Too Many Requests during high volume testing.",
+            "MEDIUM"
+        ))
+    # 2. Stress Handling Check (500 Errors)
+    if has_500:
+        results.append(_create_result(
+            "Poor Resilience: 500 Error during flood",
+            "FAIL",
+            f"The API returned {HTTP_500_INTERNAL_SERVER_ERROR} Internal Server Error instead of {HTTP_429_TOO_MANY_REQUESTS} Too Many Requests when flooded.",
+            "CRITICAL"
+        ))
+    else:
+        results.append(_create_result(
+            "Stress Handling",
+            "PASS",
+            f"No {HTTP_500_INTERNAL_SERVER_ERROR} Internal Server Errors were observed during stress testing.",
+            "INFO"
+        ))
+    # 3. Latency Check
+    if avg_latency > LATENCY_THRESHOLD_WARN:
+        results.append(_create_result(
+            "Performance Degradation",
+            "FAIL",
+            f"Average latency during stress was {avg_latency:.2f}s (Threshold: {LATENCY_THRESHOLD_WARN}s).",
+            "WARNING"
+        ))
+    else:
+         results.append(_create_result(
+            "Performance Stability",
+            "PASS",
+            f"Average latency {avg_latency:.2f}s remained within acceptable limits.",
+            "INFO"
+        ))
+    # 4. Recovery Check
+    if recovery_failed:
+         results.append(_create_result(
+            "Recovery Failure",
+            "FAIL",
+            f"API failed to recover (returned {HTTP_500_INTERNAL_SERVER_ERROR} or crash) after {RECOVERY_WAIT_TIME}s cooldown.",
+            "HIGH"
+        ))
+    else:
+        results.append(_create_result(
+            "Self-Healing / Recovery",
+            "PASS",
+            f"API successfully handled legitimate requests after {RECOVERY_WAIT_TIME}s cooldown.",
+            "INFO"
+        ))
+    return results

pandoraspec/modules/security.py ADDED Viewed

@@ -0,0 +1,234 @@
+import requests
+from typing import Optional
+from ..constants import SENSITIVE_PATH_KEYWORDS, SECURITY_SCAN_LIMIT, HTTP_200_OK, HTTP_500_INTERNAL_SERVER_ERROR
+from ..utils.logger import logger
+def _check_headers(base_url: str) -> list[dict]:
+    """Check for security headers on the base URL."""
+    results = []
+    try:
+        response = requests.get(base_url, timeout=5)
+        headers = response.headers
+        missing_headers = []
+        full_headers = {
+            "Strict-Transport-Security": "HSTS",
+            "Content-Security-Policy": "CSP",
+            "X-Content-Type-Options": "No-Sniff",
+            "X-Frame-Options": "Clickjacking Protection"
+        }
+        for header, name in full_headers.items():
+            if header not in headers:
+                missing_headers.append(name)
+        if missing_headers:
+             results.append({
+                "module": "C",
+                "issue": "Missing Security Headers",
+                "status": "FAIL",
+                "details": f"Missing recommended headers: {', '.join(missing_headers)}",
+                "severity": "MEDIUM"
+            })
+        else:
+             results.append({
+                "module": "C",
+                "issue": "Security Headers",
+                "status": "PASS",
+                "details": "All core security headers are present.",
+                "severity": "INFO"
+            })
+    except Exception as e:
+         logger.warning(f"Failed to check headers: {e}")
+    return results
+def _check_auth_enforcement(ops, base_url: str) -> list[dict]:
+    """
+    Check if endpoints are protected by default.
+    Tries to access up to 3 static GET endpoints without credentials.
+    Fails if 200 OK is returned.
+    """
+    results = []
+    # Filter for GET operations without path parameters (simple access)
+    simple_gets = [
+        op for op in ops
+        if op.method.upper() == "GET" and "{" not in op.path
+    ]
+    # Take top N
+    targets = simple_gets[:SECURITY_SCAN_LIMIT]
+    if not targets:
+        return []
+    failures = []
+    for op in targets:
+        url = f"{base_url.rstrip('/')}{op.path}"
+        try:
+            # Request without any Auth headers
+            resp = requests.get(url, timeout=5)
+            # If we get 200 OK on what should likely be a protected API (heuristic)
+            # Note: This is aggressive. Some endpoints like /health might be public.
+            # We filter out obvious public paths?
+            if resp.status_code == HTTP_200_OK:
+                # Filter out obvious public endpoints that SHOULD be accessible
+                public_keywords = ["health", "status", "ping", "login", "auth", "token", "sign", "doc", "openapi", "well-known"]
+                if not any(k in op.path.lower() for k in public_keywords):
+                    failures.append(op.path)
+        except Exception:
+            pass
+    if failures:
+         results.append({
+            "module": "C",
+            "issue": "Auth Enforcement Failed",
+            "status": "FAIL",
+            "details": f"Endpoints accessible without auth: {', '.join(failures)}",
+            "severity": "CRITICAL"
+        })
+    else:
+         results.append({
+            "module": "C",
+            "issue": "Auth Enforcement",
+            "status": "PASS",
+            "details": f"Checked {len(targets)} endpoints; none returned {HTTP_200_OK} OK without info.",
+            "severity": "INFO"
+        })
+    return results
+def _check_injection(ops, base_url: str, api_key: str = None) -> list[dict]:
+    """
+    Basic probe for SQLi/XSS in query parameters.
+    """
+    results = []
+    # Find operations with query parameters
+    candidates = []
+    for op in ops:
+        # Check definitions for query params (heuristic via schemathesis structure)
+        # schemathesis op has 'query' in parameters
+        # For simplicity in this structure, we might just try appending ?id=' OR 1=1
+        if op.method.upper() == "GET":
+            candidates.append(op)
+    targets = candidates[:SECURITY_SCAN_LIMIT] # Limit scan
+    if not targets:
+        return []
+    injection_failures = []
+    headers = {}
+    if api_key:
+         # Use key if available to penetrate deeper
+         headers["Authorization"] = api_key if "Bearer" in api_key else f"Bearer {api_key}"
+    payloads = ["' OR '1'='1", "<script>alert(1)</script>"]
+    for op in targets:
+        # Construct URL with path params blindly replaced if any (to avoid 404 if possible)
+        # But for injection, simplistic probing on paths without params is safer
+        if "{" in op.path:
+            continue
+        url = f"{base_url.rstrip('/')}{op.path}"
+        for payload in payloads:
+            try:
+                # Add as arbitrary query param 'q' and 'id' - common vectors
+                params = {"q": payload, "id": payload, "search": payload}
+                resp = requests.get(url, headers=headers, params=params, timeout=5)
+                if resp.status_code == HTTP_500_INTERNAL_SERVER_ERROR:
+                    injection_failures.append(f"{op.path} (500 Error on injection)")
+                if payload in resp.text:
+                    injection_failures.append(f"{op.path} (Reflected XSS: payload found in response)")
+            except Exception:
+                pass
+    if injection_failures:
+         results.append({
+            "module": "C",
+            "issue": "Injection Vulnerabilities",
+            "status": "FAIL",
+            "details": f"Potential issues found: {', '.join(list(set(injection_failures)))}",
+            "severity": "HIGH"
+        })
+    else:
+         results.append({
+            "module": "C",
+            "issue": "Basic Injection Check",
+            "status": "PASS",
+            "details": f"No {HTTP_500_INTERNAL_SERVER_ERROR} errors or reflected payloads detected during basic probing.",
+            "severity": "INFO"
+        })
+    return results
+def run_security_hygiene(schema, base_url: str, api_key: str = None) -> list[dict]:
+    """
+    Module C: Security Hygiene Check
+    Checks for TLS, Auth leakage, Headers, and Basic Vulnerabilities.
+    """
+    results = []
+    logger.info(f"AUDIT LOG: Checking Security Hygiene for base URL: {base_url}")
+    # 0. TLS Check
+    if base_url and not base_url.startswith("https"):
+        results.append({
+            "module": "C",
+            "issue": "Insecure Connection (No TLS)",
+            "status": "FAIL",
+            "details": "The API base URL does not use HTTPS.",
+            "severity": "CRITICAL"
+        })
+    else:
+            results.append({
+            "module": "C",
+            "issue": "Secure Connection (TLS)",
+            "status": "PASS",
+            "details": "The API uses HTTPS.",
+            "severity": "INFO"
+        })
+    # Collect operations
+    try:
+        all_ops = list(schema.get_all_operations())
+        ops = [op.ok() if hasattr(op, "ok") else op for op in all_ops]
+    except Exception:
+        ops = []
+    # 1. Auth Leakage in URL
+    auth_leakage_found = False
+    for operation in ops:
+        endpoint = operation.path
+        if any(keyword in endpoint.lower() for keyword in SENSITIVE_PATH_KEYWORDS):
+            auth_leakage_found = True
+            results.append({
+                "module": "C",
+                "issue": "Auth Leakage Risk",
+                "status": "FAIL",
+                "details": f"Endpoint '{endpoint}' indicates auth tokens might be passed in the URL.",
+                "severity": "HIGH"
+            })
+    if not auth_leakage_found:
+        results.append({
+            "module": "C",
+            "issue": "No Auth Leakage in URLs",
+            "status": "PASS",
+            "details": "No endpoints found with 'key' or 'token' in the path, suggesting safe header-based auth.",
+            "severity": "INFO"
+        })
+    # 2. Check Headers
+    if base_url:
+        results.extend(_check_headers(base_url))
+        # 3. Check Auth Enforcement
+        results.extend(_check_auth_enforcement(ops, base_url))
+        # 4. Check Injection
+        results.extend(_check_injection(ops, base_url, api_key))
+    return results

pandoraspec/orchestrator.py ADDED Viewed

@@ -0,0 +1,69 @@
+import yaml
+import os
+from typing import Optional, Dict, Any
+from dataclasses import dataclass
+from .core import AuditEngine
+from .reporting.generator import generate_report, generate_json_report
+from .utils.logger import logger
+@dataclass
+class AuditRunResult:
+    results: Dict[str, Any]
+    report_path: str
+    seed_count: int
+from .config import validate_config, PandoraConfig
+def load_config(config_path: str) -> PandoraConfig:
+    if config_path and os.path.exists(config_path):
+        try:
+            with open(config_path, "r") as f:
+                raw_data = yaml.safe_load(f) or {}
+                return validate_config(raw_data)
+        except Exception as e:
+            logger.error(f"Failed to load or validate config from {config_path}: {e}")
+            return PandoraConfig()
+    return PandoraConfig()
+def run_dora_audit_logic(
+    target: str,
+    vendor: str,
+    api_key: Optional[str] = None,
+    config_path: Optional[str] = None,
+    base_url: Optional[str] = None,
+    output_format: str = "pdf",
+    output_path: Optional[str] = None
+) -> AuditRunResult:
+    """
+    Orchestrates the DORA audit: loads config, runs engine, generates report.
+    Decoupled from CLI/Printing.
+    """
+    # 1. Load Config
+    seed_data = {}
+    if config_path:
+        config_data = load_config(config_path)
+        seed_data = config_data.seed_data
+    # 2. Initialize Engine
+    engine = AuditEngine(
+        target=target,
+        api_key=api_key,
+        seed_data=seed_data,
+        base_url=base_url
+    )
+    # 3. Run Audit
+    logger.info(f"Starting audit for {target}")
+    results = engine.run_full_audit()
+    # 4. Generate Report
+    if output_format.lower() == "json":
+        report_path = generate_json_report(vendor, results, output_path=output_path)
+    else:
+        report_path = generate_report(vendor, results, output_path=output_path)
+    return AuditRunResult(
+        results=results,
+        report_path=report_path,
+        seed_count=len(seed_data)
+    )

pandoraspec/reporting/__init__.py ADDED Viewed

File without changes

pandoraspec 0.1.1__py3-none-any.whl → 0.2.7__py3-none-any.whl

pandoraspec 0.1.1py3-none-any.whl → 0.2.7py3-none-any.whl