PyPI - agentdiscover - Versions diffs - 2.7.2__py3-none-any.whl - Mend

agentdiscover 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

agent_discover_scanner/__init__.py +24 -0
agent_discover_scanner/aibom.py +96 -0
agent_discover_scanner/audit_reports.py +83 -0
agent_discover_scanner/behavioral_patterns.py +252 -0
agent_discover_scanner/cli.py +1335 -0
agent_discover_scanner/correlator.py +1114 -0
agent_discover_scanner/detectors/__init__.py +0 -0
agent_discover_scanner/detectors/cloud_audit/__init__.py +230 -0
agent_discover_scanner/detectors/cloud_audit/aws_cloudtrail.py +565 -0
agent_discover_scanner/detectors/cloud_audit/azure_monitor.py +54 -0
agent_discover_scanner/detectors/cloud_audit/base.py +127 -0
agent_discover_scanner/detectors/cloud_audit/gcp_audit.py +53 -0
agent_discover_scanner/detectors/cloudtrail.py +24 -0
agent_discover_scanner/errors.py +121 -0
agent_discover_scanner/exporters/__init__.py +0 -0
agent_discover_scanner/exporters/mcpfw_policy.py +483 -0
agent_discover_scanner/git_scanner.py +233 -0
agent_discover_scanner/high_risk_agents.py +439 -0
agent_discover_scanner/interceptors/__init__.py +54 -0
agent_discover_scanner/interceptors/base.py +319 -0
agent_discover_scanner/interceptors/sse/__init__.py +0 -0
agent_discover_scanner/interceptors/sse/netskope.py +17 -0
agent_discover_scanner/interceptors/sse/prisma_access.py +17 -0
agent_discover_scanner/interceptors/sse/umbrella.py +17 -0
agent_discover_scanner/interceptors/sse/zscaler.py +17 -0
agent_discover_scanner/js_signatures.py +149 -0
agent_discover_scanner/known_apps.py +205 -0
agent_discover_scanner/layer4/__init__.py +0 -0
agent_discover_scanner/layer4/osquery_executor.py +328 -0
agent_discover_scanner/layer4/osquery_queries.py +244 -0
agent_discover_scanner/layer4/result_parser.py +187 -0
agent_discover_scanner/macos_detector.py +124 -0
agent_discover_scanner/mcp_detector.py +720 -0
agent_discover_scanner/models/endpoint_discovery.py +86 -0
agent_discover_scanner/monitors/__init__.py +15 -0
agent_discover_scanner/monitors/json_output.py +68 -0
agent_discover_scanner/monitors/k8s_monitor.py +190 -0
agent_discover_scanner/monitors/tetragon_events.py +109 -0
agent_discover_scanner/monitors/tetragon_monitor.py +405 -0
agent_discover_scanner/monitors/vendor_mapping.py +113 -0
agent_discover_scanner/network_monitor.py +496 -0
agent_discover_scanner/platform.py +430 -0
agent_discover_scanner/reports/layer4_report.py +126 -0
agent_discover_scanner/saas_detector.py +836 -0
agent_discover_scanner/sarif_output.py +155 -0
agent_discover_scanner/sbom_analyzer.py +277 -0
agent_discover_scanner/scan_runner.py +1485 -0
agent_discover_scanner/scanner.py +151 -0
agent_discover_scanner/signatures.py +617 -0
agent_discover_scanner/visitor.py +172 -0
agent_discover_scanner/windows_detector.py +137 -0
agentdiscover-2.7.2.dist-info/METADATA +815 -0
agentdiscover-2.7.2.dist-info/RECORD +56 -0
agentdiscover-2.7.2.dist-info/WHEEL +4 -0
agentdiscover-2.7.2.dist-info/entry_points.txt +4 -0
agentdiscover-2.7.2.dist-info/licenses/LICENSE +21 -0

agent_discover_scanner/__init__.py ADDED Viewed

@@ -0,0 +1,24 @@
+"""AgentDiscover — Detect AI Agents and Shadow AI across 5 layers."""
+import warnings
+from importlib.metadata import PackageNotFoundError, version
+try:
+    __version__ = version("agentdiscover")
+except PackageNotFoundError:
+    __version__ = "0.0.0"
+# Emit a DeprecationWarning when the legacy 'agent-discover-scanner' distribution
+# is installed alongside this package (i.e. the user still has the stub installed).
+try:
+    version("agent-discover-scanner")
+    warnings.warn(
+        "The package 'agent-discover-scanner' is deprecated and will be removed in a "
+        "future release. Please migrate: pip install agentdiscover",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+except PackageNotFoundError:
+    pass
+__all__ = ["__version__"]

agent_discover_scanner/aibom.py ADDED Viewed

@@ -0,0 +1,96 @@
+"""Best-effort CycloneDX 1.6–oriented AIBOM export from agent_inventory.json."""
+from __future__ import annotations
+import json
+import uuid
+from importlib.metadata import version as _pkg_version
+from pathlib import Path
+from typing import Any
+def generate_aibom(inventory_json: Path, output_path: Path) -> dict[str, Any]:
+    """
+    Read agent_inventory.json, iterate inventory buckets, attach classification from each bucket key,
+    and write a JSON document suitable for CycloneDX 1.6 tooling (best-effort; validate if needed).
+    """
+    raw = json.loads(Path(inventory_json).read_text(encoding="utf-8"))
+    components: list[dict[str, Any]] = []
+    n = 0
+    for bucket_classification, agents in (raw.get("inventory") or {}).items():
+        if not isinstance(agents, list):
+            continue
+        for agent in agents:
+            if not isinstance(agent, dict):
+                continue
+            n += 1
+            aid = agent.get("agent_id") or f"agent-{n}"
+            bom_ref = f"agent:{bucket_classification}:{n}:{aid}"
+            comp: dict[str, Any] = {
+                "type": "application",
+                "name": str(aid),
+                "bom-ref": bom_ref,
+                "properties": [
+                    {
+                        "name": "agent-discover:inventory_classification",
+                        "value": str(bucket_classification),
+                    },
+                    {
+                        "name": "agent-discover:risk_level",
+                        "value": str(agent.get("risk_level", "")),
+                    },
+                ],
+            }
+            if agent.get("framework"):
+                comp["properties"].append(
+                    {"name": "agent-discover:framework", "value": str(agent["framework"])}
+                )
+            layers = agent.get("detection_layers")
+            if layers:
+                comp["properties"].append(
+                    {
+                        "name": "agent-discover:detection_layers",
+                        "value": ",".join(str(x) for x in layers),
+                    }
+                )
+            components.append(comp)
+    try:
+        scanner_version = _pkg_version("agentdiscover")
+    except Exception:
+        scanner_version = "unknown"
+    bom: dict[str, Any] = {
+        "bomFormat": "CycloneDX",
+        "specVersion": "1.6",
+        "serialNumber": f"urn:uuid:{uuid.uuid4()}",
+        "version": 1,
+        "metadata": {
+            "timestamp": raw.get("generated_at"),
+            "tools": [
+                {
+                    "vendor": "DefendAI",
+                    "name": "AgentDiscover Scanner",
+                    "version": scanner_version,
+                    "externalReferences": [
+                        {
+                            "type": "website",
+                            "url": "https://defendai.ai",
+                        }
+                    ],
+                }
+            ],
+            "properties": [
+                {
+                    "name": "agent-discover:aibom_note",
+                    "value": (
+                        "Best-effort CycloneDX 1.6–oriented export; "
+                        "validate with official tooling if strict compliance is required."
+                    ),
+                }
+            ],
+        },
+        "components": components,
+    }
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(json.dumps(bom, indent=2), encoding="utf-8")
+    return bom

agent_discover_scanner/audit_reports.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""Markdown reports for the audit command."""
+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Any
+def write_ghost_agents_markdown(inventory_json: Path, dest: Path) -> None:
+    data = json.loads(inventory_json.read_text(encoding="utf-8"))
+    ghosts = (data.get("inventory") or {}).get("ghost") or []
+    lines = [
+        "# Ghost agents",
+        "",
+        "Runtime signals without matching source inventory (from latest correlation).",
+        "",
+    ]
+    if not ghosts:
+        lines.append("_No ghost agents in this run._")
+    else:
+        for g in ghosts:
+            lines.append(f"## {g.get('agent_id', 'unknown')}")
+            lines.append("")
+            lines.append(f"- **Risk:** {g.get('risk_level', '')}")
+            lines.append(f"- **Provider / framework:** {g.get('network_provider') or g.get('framework') or '—'}")
+            lines.append(f"- **Process:** {g.get('process_name') or '—'}")
+            lines.append(f"- **Last seen:** {g.get('last_seen') or '—'}")
+            lines.append("")
+    dest.parent.mkdir(parents=True, exist_ok=True)
+    dest.write_text("\n".join(lines) + "\n", encoding="utf-8")
+def write_mcp_markdown(mcp_result: dict[str, Any], dest: Path) -> None:
+    servers = mcp_result.get("servers") or []
+    lines = [
+        "# MCP servers",
+        "",
+        "Detected from configuration, process, registry, and optional network/endpoint context.",
+        "",
+        "[MCP] Enforce policies on detected servers → pip install mcpfw-defendai | mcpfw.dev",
+        "",
+    ]
+    if not servers:
+        lines.append("_No MCP servers detected in this run._")
+    else:
+        for s in servers:
+            name = s.get("server_name") or s.get("name") or "unknown"
+            lines.append(f"## {name}")
+            lines.append("")
+            lines.append(f"- **Vendor:** {s.get('vendor', '—')}")
+            lines.append(f"- **Publisher verified:** {s.get('publisher_verified', False)}")
+            lines.append(f"- **Local script:** {s.get('is_local_script', False)}")
+            lines.append("")
+    dest.parent.mkdir(parents=True, exist_ok=True)
+    dest.write_text("\n".join(lines) + "\n", encoding="utf-8")
+def write_audit_summary(report: dict[str, Any], dest: Path, raw_dir: Path) -> None:
+    s = report.get("summary") or {}
+    lines = [
+        "# Audit summary",
+        "",
+        f"**Generated:** {report.get('generated_at', '')}",
+        "",
+        "## Counts",
+        "",
+        f"- Confirmed: {s.get('confirmed', 0)}",
+        f"- Unknown: {s.get('unknown', 0)}",
+        f"- Ghost: {s.get('ghost', 0)}",
+        f"- Zombie: {s.get('zombie', 0)}",
+        f"- Shadow AI usage: {s.get('shadow_ai_usage', 0)}",
+        "",
+        "## Artifacts",
+        "",
+        f"- Raw scan directory: `{raw_dir}`",
+        "- `aibom.json` — CycloneDX-oriented AIBOM",
+        "- `ghost-agents.md` — Ghost agent detail",
+        "- `mcp-report.md` — MCP inventory",
+        "- `summary.md` — This file",
+        "",
+    ]
+    dest.parent.mkdir(parents=True, exist_ok=True)
+    dest.write_text("\n".join(lines) + "\n", encoding="utf-8")

agent_discover_scanner/behavioral_patterns.py ADDED Viewed

@@ -0,0 +1,252 @@
+"""
+Behavioral pattern detection for identifying agentic activity.
+Detects:
+- ReAct loops (Reasoning + Acting cycles)
+- Token burst patterns (streaming responses)
+- Multi-turn conversations
+- RAG patterns (LLM + Vector DB)
+"""
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Dict, List, Optional
+@dataclass
+class BehavioralPattern:
+    """Represents a detected behavioral pattern."""
+    pattern_type: str  # "react_loop", "token_burst", "multi_turn", "rag"
+    confidence: str  # "high", "medium", "low"
+    description: str
+    indicators: List[str]
+    timestamp: str
+    metadata: Dict = None
+    def __post_init__(self):
+        if self.metadata is None:
+            self.metadata = {}
+class BehavioralAnalyzer:
+    """
+    Analyzes network findings for behavioral patterns that indicate agentic activity.
+    """
+    # Time windows for pattern detection
+    REACT_WINDOW_SECONDS = 30  # ReAct loop typically completes in 30s
+    BURST_WINDOW_SECONDS = 5  # Token bursts happen quickly
+    MULTI_TURN_WINDOW_SECONDS = 300  # 5 minute conversation window
+    @classmethod
+    def detect_react_pattern(cls, findings: List[Dict]) -> List[BehavioralPattern]:
+        """
+        Detect ReAct (Reasoning + Acting) loops.
+        Pattern: LLM call → short pause → LLM call → short pause (3+ times)
+        This indicates:
+        - Agent is "thinking" (LLM call)
+        - Agent is "acting" (tool execution - not visible in our network scan)
+        - Agent is "observing" (next LLM call with results)
+        """
+        patterns = []
+        if len(findings) < 3:
+            return patterns
+        # Group findings by provider and look for rapid succession
+        llm_findings = [f for f in findings if cls._is_llm_provider(f.get("provider"))]
+        if len(llm_findings) < 3:
+            return patterns
+        # Check for rapid successive calls (< 30 seconds apart)
+        consecutive_calls = 0
+        for i in range(len(llm_findings) - 1):
+            time_diff = cls._time_difference(
+                llm_findings[i].get("timestamp"), llm_findings[i + 1].get("timestamp")
+            )
+            if time_diff and time_diff < cls.REACT_WINDOW_SECONDS:
+                consecutive_calls += 1
+            else:
+                consecutive_calls = 0
+            # If we see 3+ rapid calls, likely a ReAct loop
+            if consecutive_calls >= 2:
+                pattern = BehavioralPattern(
+                    pattern_type="react_loop",
+                    confidence="high",
+                    description="ReAct agent loop detected: Multiple rapid LLM calls indicating reasoning-action cycles",
+                    indicators=[
+                        f"{consecutive_calls + 1} consecutive LLM calls within {cls.REACT_WINDOW_SECONDS}s",
+                        f"Provider: {llm_findings[i].get('provider')}",
+                        f"Process: {llm_findings[i].get('process_name', 'unknown')}",
+                    ],
+                    timestamp=llm_findings[i].get("timestamp"),
+                    metadata={
+                        "call_count": consecutive_calls + 1,
+                        "provider": llm_findings[i].get("provider"),
+                        "process": llm_findings[i].get("process_name"),
+                    },
+                )
+                patterns.append(pattern)
+                break  # Found one, that's enough
+        return patterns
+    @classmethod
+    def detect_rag_pattern(cls, findings: List[Dict]) -> List[BehavioralPattern]:
+        """
+        Detect RAG (Retrieval-Augmented Generation) patterns.
+        Pattern: Vector DB query → LLM call (within seconds)
+        Indicates agent is:
+        1. Querying vector database for relevant context
+        2. Passing context to LLM for generation
+        """
+        patterns = []
+        llm_findings = [f for f in findings if cls._is_llm_provider(f.get("provider"))]
+        vector_findings = [f for f in findings if cls._is_vector_db(f.get("provider"))]
+        if not (llm_findings and vector_findings):
+            return patterns
+        # Check for temporal correlation
+        for vf in vector_findings:
+            for lf in llm_findings:
+                time_diff = cls._time_difference(vf.get("timestamp"), lf.get("timestamp"))
+                if time_diff and 0 < time_diff < 60:  # Within 1 minute
+                    pattern = BehavioralPattern(
+                        pattern_type="rag",
+                        confidence="high",
+                        description="RAG pattern detected: Vector DB query followed by LLM call",
+                        indicators=[
+                            f"Vector DB: {vf.get('provider')}",
+                            f"LLM: {lf.get('provider')}",
+                            f"Time gap: {time_diff}s",
+                        ],
+                        timestamp=vf.get("timestamp"),
+                        metadata={
+                            "vector_db": vf.get("provider"),
+                            "llm": lf.get("provider"),
+                            "time_gap": time_diff,
+                        },
+                    )
+                    patterns.append(pattern)
+                    return patterns  # Found one, that's enough
+        return patterns
+    @classmethod
+    def detect_multi_turn_conversation(cls, findings: List[Dict]) -> List[BehavioralPattern]:
+        """
+        Detect multi-turn conversations (sustained agent activity).
+        Pattern: Multiple LLM calls over extended period (5+ calls in 5 minutes)
+        Indicates:
+        - Interactive agent
+        - Conversational workflow
+        - Complex multi-step task
+        """
+        patterns = []
+        llm_findings = [f for f in findings if cls._is_llm_provider(f.get("provider"))]
+        if len(llm_findings) < 5:
+            return patterns
+        # Check if 5+ calls within 5 minute window
+        first_call = llm_findings[0].get("timestamp")
+        last_call = llm_findings[-1].get("timestamp")
+        time_span = cls._time_difference(first_call, last_call)
+        if time_span and time_span < cls.MULTI_TURN_WINDOW_SECONDS:
+            pattern = BehavioralPattern(
+                pattern_type="multi_turn",
+                confidence="medium",
+                description=f"Multi-turn conversation detected: {len(llm_findings)} LLM calls in {time_span}s",
+                indicators=[
+                    f"{len(llm_findings)} LLM API calls",
+                    f"Conversation span: {time_span}s",
+                    f"Provider: {llm_findings[0].get('provider')}",
+                ],
+                timestamp=first_call,
+                metadata={
+                    "call_count": len(llm_findings),
+                    "duration_seconds": time_span,
+                    "provider": llm_findings[0].get("provider"),
+                },
+            )
+            patterns.append(pattern)
+        return patterns
+    @classmethod
+    def detect_token_burst(cls, findings: List[Dict]) -> List[BehavioralPattern]:
+        """
+        Detect token burst patterns (streaming responses).
+        Pattern: High-frequency data transfer (indicates streaming)
+        Note: This is a placeholder - actual implementation would need
+        byte count data from network monitor.
+        """
+        # This would require enhanced network monitoring with packet size data
+        # For now, return empty as we don't have that data
+        return []
+    @classmethod
+    def analyze_all_patterns(cls, findings: List[Dict]) -> Dict[str, List[BehavioralPattern]]:
+        """
+        Run all pattern detectors and return results.
+        Returns:
+            Dictionary with pattern types as keys and detected patterns as values
+        """
+        results = {
+            "react_loops": cls.detect_react_pattern(findings),
+            "rag_patterns": cls.detect_rag_pattern(findings),
+            "multi_turn": cls.detect_multi_turn_conversation(findings),
+            "token_bursts": cls.detect_token_burst(findings),
+        }
+        return results
+    @staticmethod
+    def _is_llm_provider(provider: str) -> bool:
+        """Check if provider is an LLM provider."""
+        if not provider:
+            return False
+        llm_providers = ["openai", "anthropic", "google", "cohere", "bedrock", "azure-openai"]
+        return any(p in provider.lower() for p in llm_providers)
+    @staticmethod
+    def _is_vector_db(provider: str) -> bool:
+        """Check if provider is a vector database."""
+        if not provider:
+            return False
+        vector_dbs = ["pinecone", "weaviate", "qdrant", "chroma"]
+        return any(db in provider.lower() for db in vector_dbs)
+    @staticmethod
+    def _time_difference(timestamp1: str, timestamp2: str) -> Optional[float]:
+        """Calculate time difference in seconds between two ISO timestamps."""
+        if not (timestamp1 and timestamp2):
+            return None
+        try:
+            t1 = datetime.fromisoformat(timestamp1.replace("Z", "+00:00"))
+            t2 = datetime.fromisoformat(timestamp2.replace("Z", "+00:00"))
+            return abs((t2 - t1).total_seconds())
+        except (ValueError, AttributeError):
+            return None