agentdiscover 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. agent_discover_scanner/__init__.py +24 -0
  2. agent_discover_scanner/aibom.py +96 -0
  3. agent_discover_scanner/audit_reports.py +83 -0
  4. agent_discover_scanner/behavioral_patterns.py +252 -0
  5. agent_discover_scanner/cli.py +1335 -0
  6. agent_discover_scanner/correlator.py +1114 -0
  7. agent_discover_scanner/detectors/__init__.py +0 -0
  8. agent_discover_scanner/detectors/cloud_audit/__init__.py +230 -0
  9. agent_discover_scanner/detectors/cloud_audit/aws_cloudtrail.py +565 -0
  10. agent_discover_scanner/detectors/cloud_audit/azure_monitor.py +54 -0
  11. agent_discover_scanner/detectors/cloud_audit/base.py +127 -0
  12. agent_discover_scanner/detectors/cloud_audit/gcp_audit.py +53 -0
  13. agent_discover_scanner/detectors/cloudtrail.py +24 -0
  14. agent_discover_scanner/errors.py +121 -0
  15. agent_discover_scanner/exporters/__init__.py +0 -0
  16. agent_discover_scanner/exporters/mcpfw_policy.py +483 -0
  17. agent_discover_scanner/git_scanner.py +233 -0
  18. agent_discover_scanner/high_risk_agents.py +439 -0
  19. agent_discover_scanner/interceptors/__init__.py +54 -0
  20. agent_discover_scanner/interceptors/base.py +319 -0
  21. agent_discover_scanner/interceptors/sse/__init__.py +0 -0
  22. agent_discover_scanner/interceptors/sse/netskope.py +17 -0
  23. agent_discover_scanner/interceptors/sse/prisma_access.py +17 -0
  24. agent_discover_scanner/interceptors/sse/umbrella.py +17 -0
  25. agent_discover_scanner/interceptors/sse/zscaler.py +17 -0
  26. agent_discover_scanner/js_signatures.py +149 -0
  27. agent_discover_scanner/known_apps.py +205 -0
  28. agent_discover_scanner/layer4/__init__.py +0 -0
  29. agent_discover_scanner/layer4/osquery_executor.py +328 -0
  30. agent_discover_scanner/layer4/osquery_queries.py +244 -0
  31. agent_discover_scanner/layer4/result_parser.py +187 -0
  32. agent_discover_scanner/macos_detector.py +124 -0
  33. agent_discover_scanner/mcp_detector.py +720 -0
  34. agent_discover_scanner/models/endpoint_discovery.py +86 -0
  35. agent_discover_scanner/monitors/__init__.py +15 -0
  36. agent_discover_scanner/monitors/json_output.py +68 -0
  37. agent_discover_scanner/monitors/k8s_monitor.py +190 -0
  38. agent_discover_scanner/monitors/tetragon_events.py +109 -0
  39. agent_discover_scanner/monitors/tetragon_monitor.py +405 -0
  40. agent_discover_scanner/monitors/vendor_mapping.py +113 -0
  41. agent_discover_scanner/network_monitor.py +496 -0
  42. agent_discover_scanner/platform.py +430 -0
  43. agent_discover_scanner/reports/layer4_report.py +126 -0
  44. agent_discover_scanner/saas_detector.py +836 -0
  45. agent_discover_scanner/sarif_output.py +155 -0
  46. agent_discover_scanner/sbom_analyzer.py +277 -0
  47. agent_discover_scanner/scan_runner.py +1485 -0
  48. agent_discover_scanner/scanner.py +151 -0
  49. agent_discover_scanner/signatures.py +617 -0
  50. agent_discover_scanner/visitor.py +172 -0
  51. agent_discover_scanner/windows_detector.py +137 -0
  52. agentdiscover-2.7.2.dist-info/METADATA +815 -0
  53. agentdiscover-2.7.2.dist-info/RECORD +56 -0
  54. agentdiscover-2.7.2.dist-info/WHEEL +4 -0
  55. agentdiscover-2.7.2.dist-info/entry_points.txt +4 -0
  56. agentdiscover-2.7.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,24 @@
1
+ """AgentDiscover — Detect AI Agents and Shadow AI across 5 layers."""
2
+
3
+ import warnings
4
+ from importlib.metadata import PackageNotFoundError, version
5
+
6
+ try:
7
+ __version__ = version("agentdiscover")
8
+ except PackageNotFoundError:
9
+ __version__ = "0.0.0"
10
+
11
+ # Emit a DeprecationWarning when the legacy 'agent-discover-scanner' distribution
12
+ # is installed alongside this package (i.e. the user still has the stub installed).
13
+ try:
14
+ version("agent-discover-scanner")
15
+ warnings.warn(
16
+ "The package 'agent-discover-scanner' is deprecated and will be removed in a "
17
+ "future release. Please migrate: pip install agentdiscover",
18
+ DeprecationWarning,
19
+ stacklevel=2,
20
+ )
21
+ except PackageNotFoundError:
22
+ pass
23
+
24
+ __all__ = ["__version__"]
@@ -0,0 +1,96 @@
1
+ """Best-effort CycloneDX 1.6–oriented AIBOM export from agent_inventory.json."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import uuid
6
+ from importlib.metadata import version as _pkg_version
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+
11
+ def generate_aibom(inventory_json: Path, output_path: Path) -> dict[str, Any]:
12
+ """
13
+ Read agent_inventory.json, iterate inventory buckets, attach classification from each bucket key,
14
+ and write a JSON document suitable for CycloneDX 1.6 tooling (best-effort; validate if needed).
15
+ """
16
+ raw = json.loads(Path(inventory_json).read_text(encoding="utf-8"))
17
+ components: list[dict[str, Any]] = []
18
+ n = 0
19
+ for bucket_classification, agents in (raw.get("inventory") or {}).items():
20
+ if not isinstance(agents, list):
21
+ continue
22
+ for agent in agents:
23
+ if not isinstance(agent, dict):
24
+ continue
25
+ n += 1
26
+ aid = agent.get("agent_id") or f"agent-{n}"
27
+ bom_ref = f"agent:{bucket_classification}:{n}:{aid}"
28
+ comp: dict[str, Any] = {
29
+ "type": "application",
30
+ "name": str(aid),
31
+ "bom-ref": bom_ref,
32
+ "properties": [
33
+ {
34
+ "name": "agent-discover:inventory_classification",
35
+ "value": str(bucket_classification),
36
+ },
37
+ {
38
+ "name": "agent-discover:risk_level",
39
+ "value": str(agent.get("risk_level", "")),
40
+ },
41
+ ],
42
+ }
43
+ if agent.get("framework"):
44
+ comp["properties"].append(
45
+ {"name": "agent-discover:framework", "value": str(agent["framework"])}
46
+ )
47
+ layers = agent.get("detection_layers")
48
+ if layers:
49
+ comp["properties"].append(
50
+ {
51
+ "name": "agent-discover:detection_layers",
52
+ "value": ",".join(str(x) for x in layers),
53
+ }
54
+ )
55
+ components.append(comp)
56
+
57
+ try:
58
+ scanner_version = _pkg_version("agentdiscover")
59
+ except Exception:
60
+ scanner_version = "unknown"
61
+
62
+ bom: dict[str, Any] = {
63
+ "bomFormat": "CycloneDX",
64
+ "specVersion": "1.6",
65
+ "serialNumber": f"urn:uuid:{uuid.uuid4()}",
66
+ "version": 1,
67
+ "metadata": {
68
+ "timestamp": raw.get("generated_at"),
69
+ "tools": [
70
+ {
71
+ "vendor": "DefendAI",
72
+ "name": "AgentDiscover Scanner",
73
+ "version": scanner_version,
74
+ "externalReferences": [
75
+ {
76
+ "type": "website",
77
+ "url": "https://defendai.ai",
78
+ }
79
+ ],
80
+ }
81
+ ],
82
+ "properties": [
83
+ {
84
+ "name": "agent-discover:aibom_note",
85
+ "value": (
86
+ "Best-effort CycloneDX 1.6–oriented export; "
87
+ "validate with official tooling if strict compliance is required."
88
+ ),
89
+ }
90
+ ],
91
+ },
92
+ "components": components,
93
+ }
94
+ output_path.parent.mkdir(parents=True, exist_ok=True)
95
+ output_path.write_text(json.dumps(bom, indent=2), encoding="utf-8")
96
+ return bom
@@ -0,0 +1,83 @@
1
+ """Markdown reports for the audit command."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+
9
+ def write_ghost_agents_markdown(inventory_json: Path, dest: Path) -> None:
10
+ data = json.loads(inventory_json.read_text(encoding="utf-8"))
11
+ ghosts = (data.get("inventory") or {}).get("ghost") or []
12
+ lines = [
13
+ "# Ghost agents",
14
+ "",
15
+ "Runtime signals without matching source inventory (from latest correlation).",
16
+ "",
17
+ ]
18
+ if not ghosts:
19
+ lines.append("_No ghost agents in this run._")
20
+ else:
21
+ for g in ghosts:
22
+ lines.append(f"## {g.get('agent_id', 'unknown')}")
23
+ lines.append("")
24
+ lines.append(f"- **Risk:** {g.get('risk_level', '')}")
25
+ lines.append(f"- **Provider / framework:** {g.get('network_provider') or g.get('framework') or '—'}")
26
+ lines.append(f"- **Process:** {g.get('process_name') or '—'}")
27
+ lines.append(f"- **Last seen:** {g.get('last_seen') or '—'}")
28
+ lines.append("")
29
+ dest.parent.mkdir(parents=True, exist_ok=True)
30
+ dest.write_text("\n".join(lines) + "\n", encoding="utf-8")
31
+
32
+
33
+ def write_mcp_markdown(mcp_result: dict[str, Any], dest: Path) -> None:
34
+ servers = mcp_result.get("servers") or []
35
+ lines = [
36
+ "# MCP servers",
37
+ "",
38
+ "Detected from configuration, process, registry, and optional network/endpoint context.",
39
+ "",
40
+ "[MCP] Enforce policies on detected servers → pip install mcpfw-defendai | mcpfw.dev",
41
+ "",
42
+ ]
43
+ if not servers:
44
+ lines.append("_No MCP servers detected in this run._")
45
+ else:
46
+ for s in servers:
47
+ name = s.get("server_name") or s.get("name") or "unknown"
48
+ lines.append(f"## {name}")
49
+ lines.append("")
50
+ lines.append(f"- **Vendor:** {s.get('vendor', '—')}")
51
+ lines.append(f"- **Publisher verified:** {s.get('publisher_verified', False)}")
52
+ lines.append(f"- **Local script:** {s.get('is_local_script', False)}")
53
+ lines.append("")
54
+ dest.parent.mkdir(parents=True, exist_ok=True)
55
+ dest.write_text("\n".join(lines) + "\n", encoding="utf-8")
56
+
57
+
58
+ def write_audit_summary(report: dict[str, Any], dest: Path, raw_dir: Path) -> None:
59
+ s = report.get("summary") or {}
60
+ lines = [
61
+ "# Audit summary",
62
+ "",
63
+ f"**Generated:** {report.get('generated_at', '')}",
64
+ "",
65
+ "## Counts",
66
+ "",
67
+ f"- Confirmed: {s.get('confirmed', 0)}",
68
+ f"- Unknown: {s.get('unknown', 0)}",
69
+ f"- Ghost: {s.get('ghost', 0)}",
70
+ f"- Zombie: {s.get('zombie', 0)}",
71
+ f"- Shadow AI usage: {s.get('shadow_ai_usage', 0)}",
72
+ "",
73
+ "## Artifacts",
74
+ "",
75
+ f"- Raw scan directory: `{raw_dir}`",
76
+ "- `aibom.json` — CycloneDX-oriented AIBOM",
77
+ "- `ghost-agents.md` — Ghost agent detail",
78
+ "- `mcp-report.md` — MCP inventory",
79
+ "- `summary.md` — This file",
80
+ "",
81
+ ]
82
+ dest.parent.mkdir(parents=True, exist_ok=True)
83
+ dest.write_text("\n".join(lines) + "\n", encoding="utf-8")
@@ -0,0 +1,252 @@
1
+ """
2
+ Behavioral pattern detection for identifying agentic activity.
3
+
4
+ Detects:
5
+ - ReAct loops (Reasoning + Acting cycles)
6
+ - Token burst patterns (streaming responses)
7
+ - Multi-turn conversations
8
+ - RAG patterns (LLM + Vector DB)
9
+ """
10
+
11
+ from dataclasses import dataclass
12
+ from datetime import datetime
13
+ from typing import Dict, List, Optional
14
+
15
+
16
+ @dataclass
17
+ class BehavioralPattern:
18
+ """Represents a detected behavioral pattern."""
19
+
20
+ pattern_type: str # "react_loop", "token_burst", "multi_turn", "rag"
21
+ confidence: str # "high", "medium", "low"
22
+ description: str
23
+ indicators: List[str]
24
+ timestamp: str
25
+ metadata: Dict = None
26
+
27
+ def __post_init__(self):
28
+ if self.metadata is None:
29
+ self.metadata = {}
30
+
31
+
32
+ class BehavioralAnalyzer:
33
+ """
34
+ Analyzes network findings for behavioral patterns that indicate agentic activity.
35
+ """
36
+
37
+ # Time windows for pattern detection
38
+ REACT_WINDOW_SECONDS = 30 # ReAct loop typically completes in 30s
39
+ BURST_WINDOW_SECONDS = 5 # Token bursts happen quickly
40
+ MULTI_TURN_WINDOW_SECONDS = 300 # 5 minute conversation window
41
+
42
+ @classmethod
43
+ def detect_react_pattern(cls, findings: List[Dict]) -> List[BehavioralPattern]:
44
+ """
45
+ Detect ReAct (Reasoning + Acting) loops.
46
+
47
+ Pattern: LLM call → short pause → LLM call → short pause (3+ times)
48
+
49
+ This indicates:
50
+ - Agent is "thinking" (LLM call)
51
+ - Agent is "acting" (tool execution - not visible in our network scan)
52
+ - Agent is "observing" (next LLM call with results)
53
+ """
54
+ patterns = []
55
+
56
+ if len(findings) < 3:
57
+ return patterns
58
+
59
+ # Group findings by provider and look for rapid succession
60
+ llm_findings = [f for f in findings if cls._is_llm_provider(f.get("provider"))]
61
+
62
+ if len(llm_findings) < 3:
63
+ return patterns
64
+
65
+ # Check for rapid successive calls (< 30 seconds apart)
66
+ consecutive_calls = 0
67
+ for i in range(len(llm_findings) - 1):
68
+ time_diff = cls._time_difference(
69
+ llm_findings[i].get("timestamp"), llm_findings[i + 1].get("timestamp")
70
+ )
71
+
72
+ if time_diff and time_diff < cls.REACT_WINDOW_SECONDS:
73
+ consecutive_calls += 1
74
+ else:
75
+ consecutive_calls = 0
76
+
77
+ # If we see 3+ rapid calls, likely a ReAct loop
78
+ if consecutive_calls >= 2:
79
+ pattern = BehavioralPattern(
80
+ pattern_type="react_loop",
81
+ confidence="high",
82
+ description="ReAct agent loop detected: Multiple rapid LLM calls indicating reasoning-action cycles",
83
+ indicators=[
84
+ f"{consecutive_calls + 1} consecutive LLM calls within {cls.REACT_WINDOW_SECONDS}s",
85
+ f"Provider: {llm_findings[i].get('provider')}",
86
+ f"Process: {llm_findings[i].get('process_name', 'unknown')}",
87
+ ],
88
+ timestamp=llm_findings[i].get("timestamp"),
89
+ metadata={
90
+ "call_count": consecutive_calls + 1,
91
+ "provider": llm_findings[i].get("provider"),
92
+ "process": llm_findings[i].get("process_name"),
93
+ },
94
+ )
95
+ patterns.append(pattern)
96
+ break # Found one, that's enough
97
+
98
+ return patterns
99
+
100
+ @classmethod
101
+ def detect_rag_pattern(cls, findings: List[Dict]) -> List[BehavioralPattern]:
102
+ """
103
+ Detect RAG (Retrieval-Augmented Generation) patterns.
104
+
105
+ Pattern: Vector DB query → LLM call (within seconds)
106
+
107
+ Indicates agent is:
108
+ 1. Querying vector database for relevant context
109
+ 2. Passing context to LLM for generation
110
+ """
111
+ patterns = []
112
+
113
+ llm_findings = [f for f in findings if cls._is_llm_provider(f.get("provider"))]
114
+ vector_findings = [f for f in findings if cls._is_vector_db(f.get("provider"))]
115
+
116
+ if not (llm_findings and vector_findings):
117
+ return patterns
118
+
119
+ # Check for temporal correlation
120
+ for vf in vector_findings:
121
+ for lf in llm_findings:
122
+ time_diff = cls._time_difference(vf.get("timestamp"), lf.get("timestamp"))
123
+
124
+ if time_diff and 0 < time_diff < 60: # Within 1 minute
125
+ pattern = BehavioralPattern(
126
+ pattern_type="rag",
127
+ confidence="high",
128
+ description="RAG pattern detected: Vector DB query followed by LLM call",
129
+ indicators=[
130
+ f"Vector DB: {vf.get('provider')}",
131
+ f"LLM: {lf.get('provider')}",
132
+ f"Time gap: {time_diff}s",
133
+ ],
134
+ timestamp=vf.get("timestamp"),
135
+ metadata={
136
+ "vector_db": vf.get("provider"),
137
+ "llm": lf.get("provider"),
138
+ "time_gap": time_diff,
139
+ },
140
+ )
141
+ patterns.append(pattern)
142
+ return patterns # Found one, that's enough
143
+
144
+ return patterns
145
+
146
+ @classmethod
147
+ def detect_multi_turn_conversation(cls, findings: List[Dict]) -> List[BehavioralPattern]:
148
+ """
149
+ Detect multi-turn conversations (sustained agent activity).
150
+
151
+ Pattern: Multiple LLM calls over extended period (5+ calls in 5 minutes)
152
+
153
+ Indicates:
154
+ - Interactive agent
155
+ - Conversational workflow
156
+ - Complex multi-step task
157
+ """
158
+ patterns = []
159
+
160
+ llm_findings = [f for f in findings if cls._is_llm_provider(f.get("provider"))]
161
+
162
+ if len(llm_findings) < 5:
163
+ return patterns
164
+
165
+ # Check if 5+ calls within 5 minute window
166
+ first_call = llm_findings[0].get("timestamp")
167
+ last_call = llm_findings[-1].get("timestamp")
168
+
169
+ time_span = cls._time_difference(first_call, last_call)
170
+
171
+ if time_span and time_span < cls.MULTI_TURN_WINDOW_SECONDS:
172
+ pattern = BehavioralPattern(
173
+ pattern_type="multi_turn",
174
+ confidence="medium",
175
+ description=f"Multi-turn conversation detected: {len(llm_findings)} LLM calls in {time_span}s",
176
+ indicators=[
177
+ f"{len(llm_findings)} LLM API calls",
178
+ f"Conversation span: {time_span}s",
179
+ f"Provider: {llm_findings[0].get('provider')}",
180
+ ],
181
+ timestamp=first_call,
182
+ metadata={
183
+ "call_count": len(llm_findings),
184
+ "duration_seconds": time_span,
185
+ "provider": llm_findings[0].get("provider"),
186
+ },
187
+ )
188
+ patterns.append(pattern)
189
+
190
+ return patterns
191
+
192
+ @classmethod
193
+ def detect_token_burst(cls, findings: List[Dict]) -> List[BehavioralPattern]:
194
+ """
195
+ Detect token burst patterns (streaming responses).
196
+
197
+ Pattern: High-frequency data transfer (indicates streaming)
198
+
199
+ Note: This is a placeholder - actual implementation would need
200
+ byte count data from network monitor.
201
+ """
202
+ # This would require enhanced network monitoring with packet size data
203
+ # For now, return empty as we don't have that data
204
+ return []
205
+
206
+ @classmethod
207
+ def analyze_all_patterns(cls, findings: List[Dict]) -> Dict[str, List[BehavioralPattern]]:
208
+ """
209
+ Run all pattern detectors and return results.
210
+
211
+ Returns:
212
+ Dictionary with pattern types as keys and detected patterns as values
213
+ """
214
+ results = {
215
+ "react_loops": cls.detect_react_pattern(findings),
216
+ "rag_patterns": cls.detect_rag_pattern(findings),
217
+ "multi_turn": cls.detect_multi_turn_conversation(findings),
218
+ "token_bursts": cls.detect_token_burst(findings),
219
+ }
220
+
221
+ return results
222
+
223
+ @staticmethod
224
+ def _is_llm_provider(provider: str) -> bool:
225
+ """Check if provider is an LLM provider."""
226
+ if not provider:
227
+ return False
228
+
229
+ llm_providers = ["openai", "anthropic", "google", "cohere", "bedrock", "azure-openai"]
230
+ return any(p in provider.lower() for p in llm_providers)
231
+
232
+ @staticmethod
233
+ def _is_vector_db(provider: str) -> bool:
234
+ """Check if provider is a vector database."""
235
+ if not provider:
236
+ return False
237
+
238
+ vector_dbs = ["pinecone", "weaviate", "qdrant", "chroma"]
239
+ return any(db in provider.lower() for db in vector_dbs)
240
+
241
+ @staticmethod
242
+ def _time_difference(timestamp1: str, timestamp2: str) -> Optional[float]:
243
+ """Calculate time difference in seconds between two ISO timestamps."""
244
+ if not (timestamp1 and timestamp2):
245
+ return None
246
+
247
+ try:
248
+ t1 = datetime.fromisoformat(timestamp1.replace("Z", "+00:00"))
249
+ t2 = datetime.fromisoformat(timestamp2.replace("Z", "+00:00"))
250
+ return abs((t2 - t1).total_seconds())
251
+ except (ValueError, AttributeError):
252
+ return None