PyPI - picosentry - Versions diffs - 0.16.0__py3-none-any.whl - Mend

picosentry 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

picosentry/__init__.py +47 -0
picosentry/__main__.py +8 -0
picosentry/_network.py +77 -0
picosentry/advisory.py +368 -0
picosentry/audit.py +329 -0
picosentry/auth.py +617 -0
picosentry/cache.py +370 -0
picosentry/cli.py +1802 -0
picosentry/config.py +525 -0
picosentry/corpus/advisories/npm-critical-advisories.json +2196 -0
picosentry/corpus/generate_npm_top.py +311 -0
picosentry/corpus/ioc/colors_js.json +13 -0
picosentry/corpus/ioc/crossenv.json +15 -0
picosentry/corpus/ioc/event_stream_3.3.6.json +24 -0
picosentry/corpus/ioc/left_pad.json +14 -0
picosentry/corpus/ioc/nx_typosquat.json +20 -0
picosentry/corpus/ioc/shai_hulud.json +21 -0
picosentry/corpus/ioc/ua_parser_js.json +13 -0
picosentry/corpus/npm_top_packages.json +329 -0
picosentry/corpus_governance.py +515 -0
picosentry/corpus_share.py +462 -0
picosentry/crypto.py +527 -0
picosentry/daemon.py +784 -0
picosentry/detection_quality.py +495 -0
picosentry/docs/rules/L2-ADV-001.md +46 -0
picosentry/docs/rules/L2-BUND-001.md +46 -0
picosentry/docs/rules/L2-CRED-001.md +46 -0
picosentry/docs/rules/L2-DEPC-001.md +37 -0
picosentry/docs/rules/L2-ENGIN-001.md +55 -0
picosentry/docs/rules/L2-FORK-001.md +40 -0
picosentry/docs/rules/L2-IOC-001.md +65 -0
picosentry/docs/rules/L2-LICENSE-001.md +61 -0
picosentry/docs/rules/L2-LOCK-001.md +45 -0
picosentry/docs/rules/L2-MAINT-001.md +50 -0
picosentry/docs/rules/L2-MANI-001.md +45 -0
picosentry/docs/rules/L2-MANI-002.md +37 -0
picosentry/docs/rules/L2-OBFS-001.md +36 -0
picosentry/docs/rules/L2-OBFS-002.md +35 -0
picosentry/docs/rules/L2-OBFS-003.md +36 -0
picosentry/docs/rules/L2-OBFS-004.md +35 -0
picosentry/docs/rules/L2-PNPM-001.md +52 -0
picosentry/docs/rules/L2-POST-001.md +47 -0
picosentry/docs/rules/L2-PROV-001.md +46 -0
picosentry/docs/rules/L2-SIDELOAD-001.md +59 -0
picosentry/docs/rules/L2-TYPO-001.md +57 -0
picosentry/docs/rules/README.md +92 -0
picosentry/engine.py +412 -0
picosentry/enterprise.py +178 -0
picosentry/fleet.py +566 -0
picosentry/formatters/__init__.py +10 -0
picosentry/formatters/cyclonedx.py +208 -0
picosentry/formatters/github.py +98 -0
picosentry/formatters/json_fmt.py +17 -0
picosentry/formatters/ml_context.py +18 -0
picosentry/formatters/sarif.py +116 -0
picosentry/formatters/table.py +95 -0
picosentry/guards.py +288 -0
picosentry/ioc_registry.py +214 -0
picosentry/logging.py +219 -0
picosentry/management.py +414 -0
picosentry/metrics.py +222 -0
picosentry/models.py +382 -0
picosentry/policy.py +814 -0
picosentry/policy_lifecycle.py +387 -0
picosentry/py.typed +0 -0
picosentry/rules/__init__.py +196 -0
picosentry/rules/advisory_check.py +150 -0
picosentry/rules/bundled_shadow.py +151 -0
picosentry/rules/credential_read.py +334 -0
picosentry/rules/dep_confusion.py +166 -0
picosentry/rules/engine.py +208 -0
picosentry/rules/fork_drift.py +295 -0
picosentry/rules/ioc_detection.py +199 -0
picosentry/rules/license.py +248 -0
picosentry/rules/lockfile_drift.py +397 -0
picosentry/rules/maintainer_change.py +287 -0
picosentry/rules/manifest.py +151 -0
picosentry/rules/obfuscation.py +218 -0
picosentry/rules/pnpm_config.py +149 -0
picosentry/rules/pnpm_lock_parser.py +243 -0
picosentry/rules/post_install.py +156 -0
picosentry/rules/provenance.py +188 -0
picosentry/rules/sideloading.py +134 -0
picosentry/rules/typosquat.py +331 -0
picosentry/rules/utils.py +100 -0
picosentry/tenant.py +433 -0
picosentry/workspace.py +371 -0
picosentry-0.16.0.dist-info/METADATA +392 -0
picosentry-0.16.0.dist-info/RECORD +95 -0
picosentry-0.16.0.dist-info/WHEEL +5 -0
picosentry-0.16.0.dist-info/entry_points.txt +2 -0
picosentry-0.16.0.dist-info/licenses/COMMERCIAL-LICENSE.md +20 -0
picosentry-0.16.0.dist-info/licenses/LICENSE +91 -0
picosentry-0.16.0.dist-info/licenses/LICENSE-SUMMARY.md +29 -0
picosentry-0.16.0.dist-info/top_level.txt +1 -0

picosentry/__init__.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""
+PicoSentry — deterministic supply-chain scanner for npm/pnpm.
+Same inputs + same corpus version = same findings and scan fingerprint.
+No HTTP at scan time. No probabilistic heuristics. No narrative in findings.
+Usage:
+    from picosentry import ScanEngine, create_default_engine
+    result = create_default_engine().scan("./my-project")
+    print(result.to_json())
+Deterministic guard stack:
+    from picosentry.guards import (
+        DeterministicGuard, DeterminismViolation,
+        deterministic_hash, fingerprint_scan,
+        verify_determinism, diff_scans,
+    )
+"""
+from .engine import ScanEngine, create_default_engine, user_corpus_dir
+from .models import (
+    BaselineResult,
+    Confidence,
+    Finding,
+    RuleExecution,
+    ScanResult,
+    ScanStats,
+    Severity,
+    apply_baseline,
+    load_baseline,
+)
+__version__ = "0.16.0"
+__all__ = [
+    "ScanEngine",
+    "create_default_engine",
+    "user_corpus_dir",
+    "Finding",
+    "ScanResult",
+    "ScanStats",
+    "Severity",
+    "Confidence",
+    "BaselineResult",
+    "RuleExecution",
+    "load_baseline",
+    "apply_baseline",
+]

picosentry/__main__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""Allow running PicoSentry CLI as: python -m picosentry"""
+from picosentry.cli import main
+if __name__ == "__main__":
+    import sys
+    sys.exit(main())

picosentry/_network.py ADDED Viewed

@@ -0,0 +1,77 @@
+"""Network helpers with TLS enforcement and size limits for PicoSentry.
+All outbound HTTP in PicoSentry must go through safe_urlopen to:
+  - Reject non-HTTPS URLs (MITM protection)
+  - Cap response body size (OOM / disk-exhaustion protection)
+The scanner engine itself is offline; this module is only used by
+management, auth (JWKS), and the CLI update command.
+"""
+from __future__ import annotations
+import logging
+import urllib.error
+import urllib.request
+from http.client import HTTPResponse
+logger = logging.getLogger("picosentry._network")
+# Default maximum response body size (10 MB)
+DEFAULT_MAX_RESPONSE_BYTES = 10 * 1024 * 1024
+class InsecureURLError(ValueError):
+    """Raised when a non-HTTPS URL is passed to safe_urlopen."""
+class ResponseTooLargeError(ValueError):
+    """Raised when a response body exceeds the configured size limit."""
+def safe_urlopen(
+    url: str | urllib.request.Request,
+    *,
+    timeout: int = 30,
+    max_bytes: int = DEFAULT_MAX_RESPONSE_BYTES,
+    allow_http: bool = False,
+) -> tuple[HTTPResponse, bytes]:
+    """Open a URL with TLS enforcement and response size capping.
+    Args:
+        url: URL string or urllib Request object.
+        timeout: Request timeout in seconds.
+        max_bytes: Maximum allowed response body size.
+        allow_http: If True, allow http:// URLs (for local dev only).
+    Returns:
+        Tuple of (response_object, body_bytes).
+    Raises:
+        InsecureURLError: If the URL scheme is not HTTPS.
+        ResponseTooLargeError: If the response body exceeds max_bytes.
+        urllib.error.URLError: If the request fails.
+    """
+    # Extract URL string for scheme check
+    url_str = url.full_url if isinstance(url, urllib.request.Request) else url
+    if not allow_http and not url_str.startswith("https://"):
+        raise InsecureURLError(
+            f"Refusing non-HTTPS URL (MITM risk): {url_str}. Set allow_http=True only for local development."
+        )
+    try:
+        resp = urllib.request.urlopen(url, timeout=timeout)
+    except urllib.error.URLError:
+        raise
+    # Read with size cap
+    body = resp.read(max_bytes + 1)
+    if len(body) > max_bytes:
+        resp.close()
+        raise ResponseTooLargeError(
+            f"Response from {url_str} exceeded {max_bytes // (1024 * 1024)}MB limit. "
+            "Possible network issue or MITM attack."
+        )
+    return resp, body

picosentry/advisory.py ADDED Viewed

@@ -0,0 +1,368 @@
+"""
+Advisory database integration for PicoSentry.
+Loads OSV-format vulnerability data from a local directory and matches
+installed packages against known CVEs, GHSA advisories, and npm advisories.
+Enterprise teams can mirror the OSV database locally for air-gapped scanning:
+    gsutil cp gs://osv-vulnerabilities/npm/all.zip .
+    unzip all.zip -d advisories/
+    picosentry scan . --advisory-db advisories/
+Offline-only. No network calls at scan time.
+Supports: OSV JSON format, GitHub Advisory Database (GHSA), npm advisory format.
+"""
+from __future__ import annotations
+import json
+import logging
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+logger = logging.getLogger("picosentry.advisory")
+# Semver parsing: extract major.minor.patch from a version string
+_SEMVER_RE = re.compile(r"(\d+)\.(\d+)\.(\d+)")
+@dataclass
+class Advisory:
+    """A single security advisory from an OSV-format database."""
+    id: str = ""  # CVE-2024-xxxx, GHSA-xxxx-xxxx, etc.
+    package_name: str = ""  # npm package name
+    summary: str = ""
+    severity: str = "MEDIUM"  # CRITICAL, HIGH, MEDIUM, LOW
+    fixed_version: str = ""  # First patched version
+    affected_versions: list[str] = field(default_factory=list)
+    cwe_ids: list[str] = field(default_factory=list)
+    references: list[str] = field(default_factory=list)
+    published: str = ""
+    database_specific: dict = field(default_factory=dict)
+    affected_ranges: list[tuple[str, str, bool]] = field(default_factory=list)
+    def to_dict(self) -> dict:
+        return {
+            "id": self.id,
+            "package_name": self.package_name,
+            "summary": self.summary,
+            "severity": self.severity,
+            "fixed_version": self.fixed_version,
+            "affected_versions": self.affected_versions,
+            "cwe_ids": self.cwe_ids,
+            "references": self.references,
+            "published": self.published,
+            "affected_ranges": self.affected_ranges,
+        }
+    @staticmethod
+    def from_osv(data: dict) -> Advisory | None:
+        """Parse an OSV-format advisory entry.
+        OSV schema: https://ossf.github.io/osv-schema/
+        """
+        adv_id = data.get("id", "")
+        summary = data.get("summary", "")
+        details = data.get("details", "")
+        if not summary and details:
+            summary = details[:200]
+        # Extract package name from "affected" array
+        pkg_name = ""
+        affected_versions: list[str] = []
+        affected_ranges: list[tuple[str, str, bool]] = []
+        for affected in data.get("affected", []):
+            pkg = affected.get("package", {})
+            ecosystem = pkg.get("ecosystem", "")
+            if ecosystem.lower() == "npm":
+                pkg_name = pkg.get("name", "")
+            for r in affected.get("ranges", []):
+                introduced = ""
+                fixed = ""
+                last_affected = ""
+                for event in r.get("events", []):
+                    if "introduced" in event:
+                        introduced = event["introduced"]
+                    if "fixed" in event:
+                        fixed = event["fixed"]
+                    if "last_affected" in event:
+                        last_affected = event["last_affected"]
+                if introduced:
+                    if fixed:
+                        # fixed is exclusive upper bound (< fixed)
+                        affected_ranges.append((introduced, fixed, False))
+                    elif last_affected:
+                        # last_affected is inclusive upper bound (<= last_affected)
+                        affected_ranges.append((introduced, last_affected, True))
+                    else:
+                        # No upper bound — all versions >= introduced are affected
+                        affected_ranges.append((introduced, "", False))
+            for ver in affected.get("versions", []):
+                if ver not in affected_versions:
+                    affected_versions.append(ver)
+        if not pkg_name:
+            return None
+        # Determine severity from database_specific or aliases
+        severity = "MEDIUM"
+        db_specific = data.get("database_specific", {})
+        if isinstance(db_specific, dict):
+            sev = db_specific.get("severity", "").upper()
+            if sev in ("CRITICAL", "HIGH", "MEDIUM", "LOW"):
+                severity = sev
+        # Extract fixed version
+        fixed_version = ""
+        for affected in data.get("affected", []):
+            for r in affected.get("ranges", []):
+                for event in r.get("events", []):
+                    if "fixed" in event:
+                        fixed_version = event["fixed"]
+        return Advisory(
+            id=adv_id,
+            package_name=pkg_name,
+            summary=summary,
+            severity=severity,
+            fixed_version=fixed_version,
+            affected_versions=affected_versions,
+            affected_ranges=affected_ranges,
+            cwe_ids=data.get("database_specific", {}).get("cwe_ids", [])
+            if isinstance(data.get("database_specific"), dict)
+            else [],
+            references=[ref.get("url", "") for ref in data.get("references", [])],
+            published=data.get("published", ""),
+            database_specific=db_specific if isinstance(db_specific, dict) else {},
+        )
+    @staticmethod
+    def from_ghsa(data: dict) -> Advisory | None:
+        """Parse a GitHub Advisory Database (GHSA) entry."""
+        adv_id = data.get("ghsa_id", data.get("id", ""))
+        return Advisory(
+            id=adv_id,
+            package_name=data.get("package", {}).get("name", ""),
+            summary=data.get("summary", ""),
+            severity=data.get("severity", "MEDIUM").upper(),
+            fixed_version=data.get("first_patched_version", {}).get("identifier", ""),
+            affected_versions=[data.get("vulnerable_version_range", "")],
+            cwe_ids=[c.get("cwe_id", "") for c in data.get("cwes", [])],
+            references=data.get("references", []),
+            published=data.get("published_at", ""),
+        )
+class AdvisoryDB:
+    """Offline advisory database loaded from local OSV-format files.
+    Directory structure expected:
+        advisories/
+          npm/
+            CVE-2024-xxxx.json
+            GHSA-xxxx-xxxx.json
+          or flat .json files
+    Each file is a single OSV-format advisory entry.
+    """
+    def __init__(self, db_dir: Path | None = None) -> None:
+        self._advisories: dict[str, list[Advisory]] = {}  # pkg_name → advisories
+        self._loaded = False
+        self._db_dir = db_dir
+        if db_dir and db_dir.is_dir():
+            self.load(db_dir)
+    def load(self, db_dir: Path) -> int:
+        """Load all advisory files from a directory.
+        Returns number of advisories loaded.
+        """
+        count = 0
+        for json_file in sorted(db_dir.rglob("*.json")):
+            if json_file.is_symlink():
+                continue
+            try:
+                data = json.loads(json_file.read_text(encoding="utf-8"))
+            except (json.JSONDecodeError, OSError):
+                logger.debug("Failed to read advisory file: %s", json_file)
+                continue
+            # Support both single advisory and array of advisories
+            entries = data if isinstance(data, list) else [data]
+            for entry in entries:
+                adv = Advisory.from_osv(entry)
+                if adv is None:
+                    continue
+                self._advisories.setdefault(adv.package_name, []).append(adv)
+                count += 1
+        self._loaded = True
+        logger.info("Loaded %d advisories for %d packages", count, len(self._advisories))
+        return count
+    def check(self, pkg_name: str, pkg_version: str) -> list[Advisory]:
+        """Check a package against known advisories.
+        Returns list of advisories affecting this package.
+        Simple version matching: checks if version is in affected range
+        or below the fixed version.
+        """
+        advisories = self._advisories.get(pkg_name, [])
+        if not advisories:
+            return []
+        results: list[Advisory] = []
+        for adv in advisories:
+            if self._version_affected(pkg_version, adv):
+                results.append(adv)
+        return results
+    def _version_affected(self, version: str, adv: Advisory) -> bool:
+        """Check if a version is affected by an advisory.
+        Checks structured range intervals first (with AND logic within each
+        range), then falls back to fixed_version heuristic and explicit
+        affected_versions list for backward compatibility.
+        """
+        v_tuple = self._parse_version(version)
+        if v_tuple is None:
+            return False  # Can't parse, assume not affected (conservative)
+        # Check structured range intervals (AND logic within each range)
+        for introduced, upper, upper_inclusive in adv.affected_ranges:
+            iv = self._parse_version(introduced)
+            if iv is None:
+                continue
+            if v_tuple < iv:
+                continue
+            if upper:
+                uv = self._parse_version(upper)
+                if uv is not None:
+                    if upper_inclusive:
+                        if v_tuple > uv:
+                            continue
+                    else:
+                        if v_tuple >= uv:
+                            continue
+            return True
+        # Fallback: if fixed version is set and version < fixed_version,
+        # the package is affected (used by GHSA and other sources without ranges).
+        # Skip this heuristic when structured ranges are available, since
+        # ranges encode both lower and upper bounds correctly.
+        if not adv.affected_ranges:
+            fv_tuple = self._parse_version(adv.fixed_version)
+            if fv_tuple and v_tuple < fv_tuple:
+                return True
+        # Check explicit affected version matches
+        return any(self._version_in_range(v_tuple, av) for av in adv.affected_versions)
+    @staticmethod
+    def _parse_version(version_str: str) -> tuple | None:
+        """Parse a semver-ish string into (major, minor, patch) tuple."""
+        if not version_str:
+            return None
+        m = _SEMVER_RE.search(version_str)
+        if m:
+            return (int(m.group(1)), int(m.group(2)), int(m.group(3)))
+        return None
+    @staticmethod
+    def _version_in_range(v_tuple: tuple, range_str: str) -> bool:
+        """Check if version falls within a simple range like '>=1.0.0' or '<2.0.0'."""
+        range_str = range_str.strip()
+        if range_str.startswith(">="):
+            rv = AdvisoryDB._parse_version(range_str[2:])
+            return rv is not None and v_tuple >= rv
+        if range_str.startswith("<="):
+            rv = AdvisoryDB._parse_version(range_str[2:])
+            return rv is not None and v_tuple <= rv
+        if range_str.startswith(">"):
+            rv = AdvisoryDB._parse_version(range_str[1:])
+            return rv is not None and v_tuple > rv
+        if range_str.startswith("<"):
+            rv = AdvisoryDB._parse_version(range_str[1:])
+            return rv is not None and v_tuple < rv
+        # Exact version match
+        rv = AdvisoryDB._parse_version(range_str)
+        return rv is not None and v_tuple == rv
+    @property
+    def package_count(self) -> int:
+        return len(self._advisories)
+    @property
+    def advisory_count(self) -> int:
+        return sum(len(v) for v in self._advisories.values())
+    @property
+    def is_loaded(self) -> bool:
+        return self._loaded
+# ── Bundled advisory snapshot ─────────────────────────────────────────
+def load_bundled_advisories() -> AdvisoryDB:
+    """Load the bundled advisory snapshot that ships with PicoSentry.
+    The snapshot contains a curated set of critical/high severity
+    npm advisories for air-gapped and offline environments.
+    For the full advisory database, use `picosentry advisories fetch`
+    or run `scripts/download-advisories.sh`.
+    Returns:
+        AdvisoryDB loaded with bundled advisories.
+    """
+    bundled_path = Path(__file__).parent / "corpus" / "advisories" / "npm-critical-advisories.json"
+    db = AdvisoryDB()
+    if not bundled_path.is_file():
+        logger.warning("Bundled advisory file not found: %s", bundled_path)
+        return db
+    try:
+        data = json.loads(bundled_path.read_text(encoding="utf-8"))
+        advisory_list = data.get("advisories", [])
+        if not advisory_list:
+            logger.info("Bundled advisory snapshot is empty — run scripts/bundle-advisories.py to populate")
+            return db
+        for entry in advisory_list:
+            adv = Advisory.from_osv(entry)
+            if adv is None:
+                continue
+            db._advisories.setdefault(adv.package_name, []).append(adv)
+        db._loaded = True
+        meta = data.get("metadata", {})
+        logger.info(
+            "Loaded %d bundled advisories for %d packages (source: %s)",
+            len(advisory_list),
+            len(db._advisories),
+            meta.get("source", "unknown"),
+        )
+    except (json.JSONDecodeError, OSError) as e:
+        logger.warning("Failed to load bundled advisories: %s", e)
+    return db
+def default_advisory_dir() -> Path:
+    """Return default advisory database directory path.
+    Preference order:
+        1. $PICOADVISORY_DIR env var
+        2. ~/.local/share/picosentry/advisories/
+    """
+    import os
+    explicit = os.environ.get("PICOADVISORY_DIR")
+    if explicit:
+        return Path(explicit)
+    return Path.home() / ".local" / "share" / "picosentry" / "advisories"