agentdiscover 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. agent_discover_scanner/__init__.py +24 -0
  2. agent_discover_scanner/aibom.py +96 -0
  3. agent_discover_scanner/audit_reports.py +83 -0
  4. agent_discover_scanner/behavioral_patterns.py +252 -0
  5. agent_discover_scanner/cli.py +1335 -0
  6. agent_discover_scanner/correlator.py +1114 -0
  7. agent_discover_scanner/detectors/__init__.py +0 -0
  8. agent_discover_scanner/detectors/cloud_audit/__init__.py +230 -0
  9. agent_discover_scanner/detectors/cloud_audit/aws_cloudtrail.py +565 -0
  10. agent_discover_scanner/detectors/cloud_audit/azure_monitor.py +54 -0
  11. agent_discover_scanner/detectors/cloud_audit/base.py +127 -0
  12. agent_discover_scanner/detectors/cloud_audit/gcp_audit.py +53 -0
  13. agent_discover_scanner/detectors/cloudtrail.py +24 -0
  14. agent_discover_scanner/errors.py +121 -0
  15. agent_discover_scanner/exporters/__init__.py +0 -0
  16. agent_discover_scanner/exporters/mcpfw_policy.py +483 -0
  17. agent_discover_scanner/git_scanner.py +233 -0
  18. agent_discover_scanner/high_risk_agents.py +439 -0
  19. agent_discover_scanner/interceptors/__init__.py +54 -0
  20. agent_discover_scanner/interceptors/base.py +319 -0
  21. agent_discover_scanner/interceptors/sse/__init__.py +0 -0
  22. agent_discover_scanner/interceptors/sse/netskope.py +17 -0
  23. agent_discover_scanner/interceptors/sse/prisma_access.py +17 -0
  24. agent_discover_scanner/interceptors/sse/umbrella.py +17 -0
  25. agent_discover_scanner/interceptors/sse/zscaler.py +17 -0
  26. agent_discover_scanner/js_signatures.py +149 -0
  27. agent_discover_scanner/known_apps.py +205 -0
  28. agent_discover_scanner/layer4/__init__.py +0 -0
  29. agent_discover_scanner/layer4/osquery_executor.py +328 -0
  30. agent_discover_scanner/layer4/osquery_queries.py +244 -0
  31. agent_discover_scanner/layer4/result_parser.py +187 -0
  32. agent_discover_scanner/macos_detector.py +124 -0
  33. agent_discover_scanner/mcp_detector.py +720 -0
  34. agent_discover_scanner/models/endpoint_discovery.py +86 -0
  35. agent_discover_scanner/monitors/__init__.py +15 -0
  36. agent_discover_scanner/monitors/json_output.py +68 -0
  37. agent_discover_scanner/monitors/k8s_monitor.py +190 -0
  38. agent_discover_scanner/monitors/tetragon_events.py +109 -0
  39. agent_discover_scanner/monitors/tetragon_monitor.py +405 -0
  40. agent_discover_scanner/monitors/vendor_mapping.py +113 -0
  41. agent_discover_scanner/network_monitor.py +496 -0
  42. agent_discover_scanner/platform.py +430 -0
  43. agent_discover_scanner/reports/layer4_report.py +126 -0
  44. agent_discover_scanner/saas_detector.py +836 -0
  45. agent_discover_scanner/sarif_output.py +155 -0
  46. agent_discover_scanner/sbom_analyzer.py +277 -0
  47. agent_discover_scanner/scan_runner.py +1485 -0
  48. agent_discover_scanner/scanner.py +151 -0
  49. agent_discover_scanner/signatures.py +617 -0
  50. agent_discover_scanner/visitor.py +172 -0
  51. agent_discover_scanner/windows_detector.py +137 -0
  52. agentdiscover-2.7.2.dist-info/METADATA +815 -0
  53. agentdiscover-2.7.2.dist-info/RECORD +56 -0
  54. agentdiscover-2.7.2.dist-info/WHEEL +4 -0
  55. agentdiscover-2.7.2.dist-info/entry_points.txt +4 -0
  56. agentdiscover-2.7.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1335 @@
1
+ import ast
2
+ import json
3
+ import logging
4
+ import os
5
+ import sys
6
+ from importlib.metadata import version as _pkg_version
7
+ from pathlib import Path
8
+ from typing import Optional
9
+
10
+ import typer
11
+ from rich.console import Console
12
+ from rich.table import Table
13
+
14
+ from agent_discover_scanner.errors import (
15
+ ValidationError,
16
+ show_no_findings_help,
17
+ show_setup_help,
18
+ validate_directory_exists,
19
+ validate_file_exists,
20
+ )
21
+ from agent_discover_scanner.js_signatures import JavaScriptAgentDetector
22
+ from agent_discover_scanner.sarif_output import SARIFGenerator
23
+ from agent_discover_scanner.sbom_analyzer import (
24
+ analyze_package_json,
25
+ analyze_requirements_txt,
26
+ )
27
+ from agent_discover_scanner.scanner import Scanner
28
+ from agent_discover_scanner.signatures import SIGNATURE_REGISTRY
29
+ from agent_discover_scanner.visitor import ContextAwareVisitor
30
+ from agent_discover_scanner.platform import upload_scan_results
31
+
32
+ #layer4 imports
33
+ from agent_discover_scanner.layer4.osquery_executor import OsqueryExecutor
34
+ from agent_discover_scanner.layer4.result_parser import OsqueryResultParser
35
+ from agent_discover_scanner.reports.layer4_report import Layer4Report
36
+ from agent_discover_scanner.monitors.k8s_monitor import K8sAPIMonitor
37
+ import socket
38
+
39
+ __version__ = _pkg_version("agentdiscover")
40
+ logger = logging.getLogger(__name__)
41
+
42
+ app = typer.Typer(help="AgentDiscover Scanner: Detect Autonomous AI Agents and Shadow AI")
43
+ console = Console()
44
+
45
+
46
+ def version_callback(value: Optional[bool]) -> None:
47
+ """
48
+ Global --version / -v option callback.
49
+ """
50
+ if not value:
51
+ return
52
+ console.print(f"AgentDiscover Scanner v{__version__}")
53
+ raise typer.Exit()
54
+
55
+
56
+ @app.callback()
57
+ def main(
58
+ version: Optional[bool] = typer.Option(
59
+ None,
60
+ "--version",
61
+ "-v",
62
+ is_eager=True,
63
+ help="Show version and exit",
64
+ callback=version_callback,
65
+ ),
66
+ ) -> None:
67
+ # Main app callback (no-op; used only for global options like --version)
68
+ return
69
+
70
+
71
+ @app.command()
72
+ def scan(
73
+ path: str = typer.Argument(..., help="Path to the repository to scan"),
74
+ output: str = typer.Option("results.sarif", help="Output SARIF file path"),
75
+ format: str = typer.Option(
76
+ "table",
77
+ help="Output format: sarif, table, text (alias for table), or both",
78
+ ),
79
+ verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed output"),
80
+ ):
81
+ """
82
+ Scan source code for AI agents and Shadow AI patterns.
83
+ """
84
+ console.print(f"[bold green]๐Ÿ“‚ Analyzing source code at {path}[/bold green]\n")
85
+
86
+ # Validate input
87
+ try:
88
+ scan_root = validate_directory_exists(path, "Scan directory")
89
+ except ValidationError:
90
+ raise typer.Exit(code=1)
91
+
92
+ scan_fmt = format.strip().lower()
93
+ if scan_fmt == "text":
94
+ scan_fmt = "table"
95
+ if scan_fmt not in ("sarif", "table", "both"):
96
+ console.print("[red]Invalid --format; use sarif, table, text, or both[/red]")
97
+ raise typer.Exit(code=1)
98
+
99
+ # Initialize scanner
100
+ scanner = Scanner(scan_root)
101
+
102
+ # Track statistics
103
+ files_scanned = 0
104
+ total_findings = 0
105
+ all_findings = []
106
+ all_imports = set()
107
+
108
+ # Findings by severity and language
109
+ findings_by_severity = {"error": 0, "warning": 0, "note": 0}
110
+ files_by_language = {"python": 0, "javascript": 0}
111
+
112
+ try:
113
+ # Scan all files
114
+ for file_path in scanner.scan():
115
+ files_scanned += 1
116
+
117
+ if verbose:
118
+ console.print(f"[dim]Scanning: {file_path}[/dim]")
119
+
120
+ try:
121
+ source_code = file_path.read_text(encoding="utf-8")
122
+
123
+ # Determine file type and use appropriate scanner
124
+ if file_path.suffix == ".py":
125
+ files_by_language["python"] += 1
126
+ # Python AST analysis
127
+ tree = ast.parse(source_code, filename=str(file_path))
128
+ visitor = ContextAwareVisitor(file_path, signature_registry=SIGNATURE_REGISTRY)
129
+ visitor.visit(tree)
130
+
131
+ total_findings += len(visitor.findings)
132
+ all_findings.extend(visitor.findings)
133
+ all_imports.update(visitor.imports)
134
+
135
+ # Count by severity
136
+ for finding in visitor.findings:
137
+ findings_by_severity[finding.severity] += 1
138
+
139
+ # Show findings
140
+ if visitor.findings and scan_fmt in ["table", "both"]:
141
+ for finding in visitor.findings:
142
+ severity_color = {
143
+ "error": "red",
144
+ "warning": "yellow",
145
+ "note": "blue",
146
+ }.get(finding.severity, "white")
147
+
148
+ console.print(f" [{severity_color}]โ—[/{severity_color}] {finding}")
149
+
150
+ elif file_path.suffix in {".js", ".ts", ".jsx", ".tsx", ".mjs"}:
151
+ files_by_language["javascript"] += 1
152
+ # JavaScript/TypeScript analysis
153
+ js_detector = JavaScriptAgentDetector(file_path)
154
+ findings = js_detector.scan_file(source_code)
155
+
156
+ total_findings += len(findings)
157
+ all_findings.extend(findings)
158
+ all_imports.update(js_detector.imports)
159
+
160
+ # Count by severity
161
+ for finding in findings:
162
+ findings_by_severity[finding.severity] += 1
163
+
164
+ # Show findings
165
+ if findings and scan_fmt in ["table", "both"]:
166
+ for finding in findings:
167
+ severity_color = {
168
+ "error": "red",
169
+ "warning": "yellow",
170
+ "note": "blue",
171
+ }.get(finding.severity, "white")
172
+
173
+ console.print(f" [{severity_color}]โ—[/{severity_color}] {finding}")
174
+
175
+ except SyntaxError as e:
176
+ if verbose:
177
+ console.print(f"[red]Syntax error in {file_path}: {e}[/red]")
178
+ except Exception as e:
179
+ if verbose:
180
+ console.print(f"[red]Error processing {file_path}: {e}[/red]")
181
+
182
+ # Check if we scanned any files
183
+ if files_scanned == 0:
184
+ console.print("[yellow]โš ๏ธ No Python or JavaScript files found[/yellow]")
185
+ console.print("[dim]Supported extensions: .py, .js, .ts, .jsx, .tsx, .mjs[/dim]")
186
+ raise typer.Exit(code=0)
187
+
188
+ # Generate SARIF output if requested
189
+ if scan_fmt in ["sarif", "both"]:
190
+ output_path = Path(output)
191
+ SARIFGenerator.write_sarif(all_findings, scan_root, output_path)
192
+
193
+ # Display summary table if requested
194
+ if scan_fmt in ["table", "both"]:
195
+ console.print("\n[bold cyan]Scan Complete![/bold cyan]")
196
+
197
+ table = Table(show_header=True, header_style="bold magenta")
198
+ table.add_column("Metric", style="cyan")
199
+ table.add_column("Value", style="green")
200
+
201
+ table.add_row("Files Scanned", str(files_scanned))
202
+ table.add_row(" โ€ข Python", str(files_by_language["python"]))
203
+ table.add_row(" โ€ข JavaScript/TypeScript", str(files_by_language["javascript"]))
204
+ table.add_row("Total Findings", str(total_findings))
205
+ table.add_row(" โ€ข Errors", f"[red]{findings_by_severity['error']}[/red]")
206
+ table.add_row(" โ€ข Warnings", f"[yellow]{findings_by_severity['warning']}[/yellow]")
207
+ table.add_row(" โ€ข Notes", f"[blue]{findings_by_severity['note']}[/blue]")
208
+ table.add_row("Unique Imports", str(len(all_imports)))
209
+
210
+ console.print(table)
211
+
212
+ # Show summary of findings by rule
213
+ if all_findings:
214
+ console.print("\n[bold]Findings by Rule:[/bold]")
215
+ findings_by_rule = {}
216
+ for finding in all_findings:
217
+ if finding.rule_id not in findings_by_rule:
218
+ findings_by_rule[finding.rule_id] = []
219
+ findings_by_rule[finding.rule_id].append(finding)
220
+
221
+ for rule_id, findings in sorted(findings_by_rule.items()):
222
+ console.print(f" {rule_id}: {len(findings)} finding(s)")
223
+ else:
224
+ show_no_findings_help("agents")
225
+
226
+ # Show unique imports if verbose
227
+ if verbose and all_imports:
228
+ console.print("\n[bold]Discovered Imports:[/bold]")
229
+ for imp in sorted(all_imports)[:20]:
230
+ console.print(f" โ€ข {imp}")
231
+ if len(all_imports) > 20:
232
+ console.print(f" ... and {len(all_imports) - 20} more")
233
+
234
+ except KeyboardInterrupt:
235
+ console.print("\n[yellow]Scan interrupted by user[/yellow]")
236
+ raise typer.Exit(code=130)
237
+
238
+ except typer.Exit:
239
+ # Re-raise typer exits (not actual errors)
240
+ raise
241
+ except Exception as e:
242
+ console.print(f"[bold red]Unexpected error:[/bold red] {e}")
243
+ if verbose:
244
+ import traceback
245
+
246
+ console.print(traceback.format_exc())
247
+ show_setup_help()
248
+ raise typer.Exit(code=1)
249
+
250
+
251
+ @app.command()
252
+ def deps(
253
+ path: str = typer.Argument(..., help="Path to scan for dependencies"),
254
+ verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed output"),
255
+ ):
256
+ """
257
+ Scan dependencies (requirements.txt, package.json) for AI/ML frameworks.
258
+ """
259
+ console.print(f"[bold green]Scanning dependencies in: {path}[/bold green]\n")
260
+
261
+ # Validate input
262
+ try:
263
+ scan_path = validate_directory_exists(path, "Scan directory")
264
+ except ValidationError:
265
+ raise typer.Exit(code=1)
266
+
267
+ all_findings = []
268
+
269
+ # Scan requirements.txt
270
+ req_file = scan_path / "requirements.txt"
271
+ if req_file.exists():
272
+ console.print("[cyan]Analyzing requirements.txt...[/cyan]")
273
+ findings = analyze_requirements_txt(req_file)
274
+ all_findings.extend(findings)
275
+
276
+ if verbose:
277
+ for finding in findings:
278
+ risk_color = "red" if finding.risk_level == "high" else "yellow"
279
+ console.print(
280
+ f" [{risk_color}]โ—[/{risk_color}] {finding.package_name} ({finding.version}) - {finding.reason}"
281
+ )
282
+
283
+ # Scan package.json
284
+ pkg_file = scan_path / "package.json"
285
+ if pkg_file.exists():
286
+ console.print("[cyan]Analyzing package.json...[/cyan]")
287
+ findings = analyze_package_json(pkg_file)
288
+ all_findings.extend(findings)
289
+
290
+ if verbose:
291
+ for finding in findings:
292
+ risk_color = "red" if finding.risk_level == "high" else "yellow"
293
+ console.print(
294
+ f" [{risk_color}]โ—[/{risk_color}] {finding.package_name} ({finding.version}) - {finding.reason}"
295
+ )
296
+
297
+ # Check if we found any dependency files
298
+ if not req_file.exists() and not pkg_file.exists():
299
+ console.print("[yellow]โš ๏ธ No dependency files found[/yellow]")
300
+ console.print("[dim]Looked for: requirements.txt, package.json[/dim]")
301
+ show_no_findings_help("dependencies")
302
+ raise typer.Exit(code=0)
303
+
304
+ # Summary
305
+ console.print("\n[bold cyan]Dependency Scan Complete![/bold cyan]")
306
+
307
+ table = Table(show_header=True, header_style="bold magenta")
308
+ table.add_column("Metric", style="cyan")
309
+ table.add_column("Value", style="green")
310
+
311
+ high_risk = sum(1 for f in all_findings if f.risk_level == "high")
312
+ medium_risk = sum(1 for f in all_findings if f.risk_level == "medium")
313
+
314
+ table.add_row("Total Risky Dependencies", str(len(all_findings)))
315
+ table.add_row(" โ€ข High Risk (Agent Frameworks)", f"[red]{high_risk}[/red]")
316
+ table.add_row(" โ€ข Medium Risk (LLM Clients)", f"[yellow]{medium_risk}[/yellow]")
317
+
318
+ console.print(table)
319
+
320
+ if all_findings:
321
+ console.print("\n[bold]Detected Frameworks:[/bold]")
322
+ for finding in all_findings:
323
+ risk_color = "red" if finding.risk_level == "high" else "yellow"
324
+ console.print(
325
+ f" [{risk_color}]โ—[/{risk_color}] {finding.package_name} - {finding.reason}"
326
+ )
327
+ else:
328
+ show_no_findings_help("dependencies")
329
+
330
+
331
+ @app.command()
332
+ def monitor(
333
+ duration: int = typer.Option(60, help="Duration to monitor in seconds"),
334
+ output: str = typer.Option("network-findings.json", help="Output JSON file"),
335
+ ):
336
+ """
337
+ Monitor network traffic for active AI agent connections.
338
+
339
+ Uses psutil to detect active connections to AI services and vector databases.
340
+ Detects RAG patterns when both AI services and vector DBs are used together.
341
+ """
342
+ from agent_discover_scanner.network_monitor import NetworkMonitor
343
+
344
+ console.print("[bold green]๐ŸŒ Monitoring live network connections...[/bold green]\n")
345
+ console.print(f"[cyan] Observing runtime behavior ({duration}s)...[/cyan]\n")
346
+ console.print("[cyan]Detecting connections to:[/cyan]")
347
+ console.print(" โ€ข AI Services (OpenAI, Anthropic, Google AI, etc.)")
348
+ console.print(" โ€ข Vector Databases (Pinecone, Weaviate, Qdrant, etc.)")
349
+ console.print(" โ€ข RAG Patterns (AI + Vector DB combinations)\n")
350
+
351
+ try:
352
+ monitor = NetworkMonitor()
353
+ summary = monitor.monitor(duration_seconds=duration)
354
+
355
+ # Save report
356
+ monitor.save_report(summary, Path(output))
357
+
358
+ except ImportError:
359
+ console.print("[red]โŒ Error: psutil not installed[/red]")
360
+ console.print("\n[yellow]๐Ÿ’ก Install psutil:[/yellow]")
361
+ console.print(" [cyan]pip install psutil[/cyan]")
362
+ raise typer.Exit(code=1)
363
+ except Exception as e:
364
+ console.print(f"[red]โŒ Monitoring error:[/red] {e}")
365
+ if "AccessDenied" in str(e) or "permission" in str(e).lower():
366
+ console.print("\n[yellow]๐Ÿ’ก Tip: You may need elevated permissions to monitor network connections[/yellow]")
367
+ raise typer.Exit(code=1)
368
+
369
+ # Display results with Rich formatting
370
+ console.print("\n[bold cyan]Network Monitoring Complete![/bold cyan]")
371
+
372
+ table = Table(show_header=True, header_style="bold magenta")
373
+ table.add_column("Metric", style="cyan")
374
+ table.add_column("Value", style="green")
375
+
376
+ table.add_row("Scan Duration", f"{summary['scan_duration']}s")
377
+ table.add_row("Total Connections", str(summary["total_connections"]))
378
+ table.add_row(
379
+ "Unique Services",
380
+ ", ".join(summary["unique_services"]) if summary["unique_services"] else "None",
381
+ )
382
+ table.add_row("RAG Patterns Detected", f"[red]{len(summary['rag_patterns'])}[/red]")
383
+
384
+ console.print(table)
385
+
386
+ if summary["services"]:
387
+ console.print("\n[bold]Connections by Service:[/bold]")
388
+ for service, count in sorted(summary["services"].items(), key=lambda x: x[1], reverse=True):
389
+ console.print(f" [yellow]โ—[/yellow] {service}: {count}")
390
+
391
+ if summary["processes"]:
392
+ console.print("\n[bold]Connections by Process:[/bold]")
393
+ for process, count in sorted(summary["processes"].items(), key=lambda x: x[1], reverse=True):
394
+ console.print(f" [cyan]โ—[/cyan] {process}: {count}")
395
+
396
+ if summary["rag_patterns"]:
397
+ console.print("\n[bold red]๐Ÿšจ RAG Patterns Detected![/bold red]")
398
+ console.print("[red]Processes using both AI services and vector databases:[/red]")
399
+ for pattern in summary["rag_patterns"]:
400
+ console.print(f"\n [yellow]Process:[/yellow] {pattern['process']} (PID: {pattern['pid']})")
401
+ console.print(f" [yellow]AI Services:[/yellow] {', '.join(pattern['ai_services'])}")
402
+ console.print(f" [yellow]Vector DBs:[/yellow] {', '.join(pattern['vector_dbs'])}")
403
+ console.print(f" [yellow]Confidence:[/yellow] {pattern['confidence']}")
404
+
405
+ console.print(f"\n[green]โœ“ Results saved to: {output}[/green]")
406
+
407
+
408
+ @app.command()
409
+ def correlate(
410
+ code_scan: str = typer.Option(..., help="Path to code scan SARIF file"),
411
+ network_scan: str = typer.Option("network-findings.json", help="Path to network findings JSON"),
412
+ output: str = typer.Option("agent-inventory.json", help="Output inventory JSON file"),
413
+ ):
414
+ """
415
+ Correlate code and network findings to create unified agent inventory.
416
+
417
+ Detects:
418
+ - CONFIRMED: Agents found in code AND running
419
+ - UNKNOWN: Agents in code but not yet active
420
+ - GHOST: Active agents with NO code found (CRITICAL)
421
+ """
422
+ from agent_discover_scanner.correlator import CorrelationEngine
423
+ from agent_discover_scanner.known_apps import build_known_apps
424
+
425
+ console.print("[bold green]Correlating findings...[/bold green]\n")
426
+
427
+ # Validate inputs
428
+ try:
429
+ code_scan_path = validate_file_exists(code_scan, "Code scan SARIF file")
430
+ network_scan_path = validate_file_exists(network_scan, "Network scan JSON file")
431
+ except ValidationError:
432
+ raise typer.Exit(code=1)
433
+
434
+ # Load findings
435
+ code_findings = CorrelationEngine.load_code_findings(code_scan_path)
436
+ network_findings = CorrelationEngine.load_network_findings(network_scan_path)
437
+
438
+ console.print("[cyan]Loaded:[/cyan]")
439
+ console.print(f" โ€ข Code findings: {len(code_findings)}")
440
+ console.print(f" โ€ข Network findings: {len(network_findings)}\n")
441
+
442
+ # Correlate
443
+ known_apps = build_known_apps()
444
+ inventory = CorrelationEngine.correlate(
445
+ code_findings, network_findings, known_apps=known_apps
446
+ )
447
+
448
+ # Behavioral analysis
449
+ if network_findings:
450
+ console.print("[bold cyan]Analyzing Behavioral Patterns...[/bold cyan]")
451
+ behavioral = CorrelationEngine.analyze_behaviors(network_findings)
452
+
453
+ if behavioral["summary"]["total_patterns"] > 0:
454
+ console.print("\n[bold]Detected Behavioral Patterns:[/bold]")
455
+ console.print(f" โ€ข ReAct Loops: {behavioral['summary']['react_loops']}")
456
+ console.print(f" โ€ข RAG Patterns: {behavioral['summary']['rag_patterns']}")
457
+ console.print(f" โ€ข Multi-turn Conversations: {behavioral['summary']['multi_turn']}")
458
+
459
+ # Show details
460
+ for pattern_type, pattern_list in behavioral["patterns"].items():
461
+ if pattern_list:
462
+ console.print(f"\n[yellow]{pattern_type.upper().replace('_', ' ')}:[/yellow]")
463
+ for pattern in pattern_list:
464
+ console.print(f" [green]โœ“[/green] {pattern['description']}")
465
+ for indicator in pattern["indicators"]:
466
+ console.print(f" - {indicator}")
467
+
468
+ # Generate report
469
+ report = CorrelationEngine.generate_report(inventory, Path(output))
470
+
471
+ # Display results
472
+ console.print("\n[bold cyan]Correlation Complete![/bold cyan]\n")
473
+
474
+ # Summary table
475
+ table = Table(show_header=True, header_style="bold magenta")
476
+ table.add_column("Classification", style="cyan")
477
+ table.add_column("Count", style="green")
478
+ table.add_column("Description", style="dim")
479
+
480
+ table.add_row("CONFIRMED", str(report["summary"]["confirmed"]), "Code + Network (Active)")
481
+ table.add_row("UNKNOWN", str(report["summary"]["unknown"]), "Code Only (Not Yet Active)")
482
+ table.add_row(
483
+ "SHADOW AI",
484
+ str(report["summary"].get("shadow_ai_usage", 0)),
485
+ "Known app using AI โ€” review for governance",
486
+ )
487
+ table.add_row("ZOMBIE", str(report["summary"]["zombie"]), "Code But No Traffic (Deprecated)")
488
+ table.add_row(
489
+ "GHOST",
490
+ f"[red]{report['summary']['ghost']}[/red]",
491
+ "[red]Traffic But No Code (CRITICAL)[/red]",
492
+ )
493
+
494
+ console.print(table)
495
+
496
+ # Risk breakdown
497
+ console.print("\n[bold]Risk Breakdown:[/bold]")
498
+ console.print(f" [red]โ—[/red] Critical: {report['risk_breakdown']['critical']}")
499
+ console.print(f" [yellow]โ—[/yellow] High: {report['risk_breakdown']['high']}")
500
+ console.print(f" [blue]โ—[/blue] Medium: {report['risk_breakdown']['medium']}")
501
+
502
+ # Ghost agent warnings
503
+ if inventory["ghost"]:
504
+ console.print("\n[bold red]โš ๏ธ GHOST AGENTS DETECTED![/bold red]")
505
+ console.print("[red]Active agents with NO corresponding code found:[/red]")
506
+ for ghost in inventory["ghost"]:
507
+ console.print(f" โ€ข Provider: {ghost.network_provider}")
508
+ console.print(f" Process: {ghost.process_name}")
509
+ console.print(f" Last Seen: {ghost.last_seen}\n")
510
+
511
+ console.print(f"\n[green]โœ“ Inventory saved to: {output}[/green]")
512
+
513
+
514
+ @app.command("scan-all")
515
+ def scan_all(
516
+ path: str = typer.Argument(..., help="Directory to scan"),
517
+ duration: int = typer.Option(
518
+ 60,
519
+ "--duration",
520
+ "-d",
521
+ help="Network and K8s monitor observation window in seconds",
522
+ ),
523
+ output: Path = typer.Option(
524
+ Path("defendai-results"),
525
+ "--output",
526
+ "-o",
527
+ help="Output directory for scan results",
528
+ ),
529
+ format: str = typer.Option(
530
+ "text",
531
+ "--format",
532
+ "-f",
533
+ help="Console summary: text (default) or json (print final inventory JSON). Layer 1 code output remains SARIF on disk.",
534
+ ),
535
+ layer3_file: Optional[Path] = typer.Option(
536
+ None,
537
+ "--layer3-file",
538
+ help="Optional path to existing Tetragon JSONL output (skip live monitor-k8s if provided)",
539
+ ),
540
+ skip_layers: Optional[str] = typer.Option(
541
+ None,
542
+ "--skip-layers",
543
+ help="Comma-separated layers to skip, e.g. '3' or '2,3'",
544
+ ),
545
+ daemon: bool = typer.Option(
546
+ False,
547
+ "--daemon",
548
+ help="Run continuously, re-scanning every 30 seconds",
549
+ ),
550
+ max_log_size: int = typer.Option(
551
+ 50,
552
+ "--max-log-size",
553
+ help="Rotate layer2/layer3 output files when they exceed this size in MB",
554
+ ),
555
+ max_log_backups: int = typer.Option(
556
+ 5,
557
+ "--max-log-backups",
558
+ help="Number of rotated backup files to keep for layer2/layer3",
559
+ ),
560
+ platform: bool = typer.Option(
561
+ False,
562
+ "--platform",
563
+ help="Upload results to DefendAI platform after scan",
564
+ ),
565
+ api_key: Optional[str] = typer.Option(
566
+ None,
567
+ "--api-key",
568
+ help="DefendAI platform API key",
569
+ ),
570
+ tenant_token: Optional[str] = typer.Option(
571
+ None,
572
+ "--tenant-token",
573
+ help="DefendAI platform tenant token",
574
+ ),
575
+ wawsdb_url: str = typer.Option(
576
+ "https://wauzeway.defendai.ai",
577
+ "--wawsdb-url",
578
+ help="DefendAI platform base URL",
579
+ ),
580
+ platform_interval: int = typer.Option(
581
+ 5,
582
+ "--platform-interval",
583
+ help="Upload to platform every N correlation cycles in daemon mode (default: 5)",
584
+ ),
585
+ verbose: bool = typer.Option(
586
+ False,
587
+ "--verbose",
588
+ "-v",
589
+ help="Show detailed output including Layer 3/Kubernetes errors",
590
+ ),
591
+ layer: Optional[str] = typer.Option(
592
+ None,
593
+ "--layer",
594
+ help="Run a single facet only: code, network, k8s, endpoint, or mcp (not with --daemon)",
595
+ ),
596
+ dry_run: bool = typer.Option(
597
+ False,
598
+ "--dry-run",
599
+ help="Validate configuration and report which layers are available without running a scan",
600
+ ),
601
+ src_repo: Optional[str] = typer.Option(
602
+ None,
603
+ "--src-repo",
604
+ help="Additional source repository to scan through Layer 1 (local path or remote URL). Findings are merged into layer1_code.sarif.",
605
+ ),
606
+ src_repo_ttl: int = typer.Option(
607
+ 3600,
608
+ "--src-repo-ttl",
609
+ help="In daemon mode: minimum seconds between re-scans of --src-repo (default: 3600)",
610
+ ),
611
+ summary: bool = typer.Option(
612
+ False,
613
+ "--summary",
614
+ help="Print a concise executive summary after the scan (agent counts, highest risk, top SaaS connections, single recommendation)",
615
+ ),
616
+ emit_mcpfw_policy: Optional[str] = typer.Option(
617
+ None,
618
+ "--emit-mcpfw-policy",
619
+ help="After scan, export MCP governance policy YAML to this path (e.g. policy.yaml). Use --stance to set enforcement level.",
620
+ ),
621
+ stance: str = typer.Option(
622
+ "balanced",
623
+ "--stance",
624
+ help="Policy stance for --emit-mcpfw-policy: strict, balanced (default), or monitor",
625
+ ),
626
+ cloud_audit: bool = typer.Option(
627
+ False,
628
+ "--cloud-audit/--no-cloud-audit",
629
+ help=(
630
+ "Enable Layer 5 Cloud Audit detection (AWS CloudTrail for Bedrock invocations). "
631
+ "Use when Layer 2 (psutil) cannot see Bedrock traffic (e.g. SSE proxy, VPC endpoint). "
632
+ "Requires cloudtrail:LookupEvents permission."
633
+ ),
634
+ ),
635
+ cloud_audit_region: str = typer.Option(
636
+ "us-east-1",
637
+ "--cloud-audit-region",
638
+ help="AWS region to query for Cloud Audit events (default: us-east-1).",
639
+ ),
640
+ cloud_audit_hours: int = typer.Option(
641
+ 0,
642
+ "--cloud-audit-hours",
643
+ help=(
644
+ "Lookback window in hours for Cloud Audit queries (default 1 when --cloud-audit is set). "
645
+ "Setting this > 0 also implicitly enables Cloud Audit. "
646
+ "Requires cloudtrail:LookupEvents permission."
647
+ ),
648
+ ),
649
+ cloud_audit_lake_arn: Optional[str] = typer.Option(
650
+ None,
651
+ "--cloud-audit-lake-arn",
652
+ help=(
653
+ "CloudTrail Lake event data store ARN for near-real-time Bedrock detection (~60s delay). "
654
+ "Requires cloudtrail:StartQuery and cloudtrail:GetQueryResults permissions."
655
+ ),
656
+ ),
657
+ azure_monitor: bool = typer.Option(
658
+ False,
659
+ "--azure-monitor/--no-azure-monitor",
660
+ help="[Preview] Enable Azure Monitor detection for Azure OpenAI invocations (coming soon).",
661
+ ),
662
+ gcp_audit: bool = typer.Option(
663
+ False,
664
+ "--gcp-audit/--no-gcp-audit",
665
+ help="[Preview] Enable GCP Cloud Audit Log detection for Vertex AI invocations (coming soon).",
666
+ ),
667
+ ):
668
+ """
669
+ Run a full 4-layer AI agent scan, correlate all findings,
670
+ and optionally upload results to the DefendAI platform.
671
+ """
672
+ from agent_discover_scanner.scan_runner import execute_scan_all
673
+
674
+ if sys.platform == "darwin" and hasattr(os, "geteuid") and os.geteuid() == 0:
675
+ typer.echo(
676
+ "Warning: running with sudo on macOS is not recommended. "
677
+ "If you pass a PATH with '~', it may resolve to root's home. "
678
+ "Prefer running without sudo when possible.",
679
+ err=True,
680
+ )
681
+
682
+ path = os.path.expanduser(path)
683
+
684
+ report = execute_scan_all(
685
+ path=path,
686
+ output=output,
687
+ duration=duration,
688
+ layer3_file=layer3_file,
689
+ skip_layers=skip_layers,
690
+ daemon=daemon,
691
+ max_log_size=max_log_size,
692
+ max_log_backups=max_log_backups,
693
+ platform=platform,
694
+ api_key=api_key,
695
+ tenant_token=tenant_token,
696
+ wawsdb_url=wawsdb_url,
697
+ platform_interval=platform_interval,
698
+ verbose=verbose,
699
+ scan_output_format=format,
700
+ layer=layer,
701
+ dry_run=dry_run,
702
+ src_repo=src_repo,
703
+ src_repo_ttl=src_repo_ttl,
704
+ summary=summary,
705
+ cloud_audit_enabled=cloud_audit,
706
+ cloud_audit_region=cloud_audit_region,
707
+ cloud_audit_hours=cloud_audit_hours,
708
+ cloud_audit_lake_arn=cloud_audit_lake_arn,
709
+ azure_monitor_enabled=azure_monitor,
710
+ gcp_audit_enabled=gcp_audit,
711
+ )
712
+
713
+ if emit_mcpfw_policy and report and not daemon:
714
+ _write_mcpfw_policy(report, emit_mcpfw_policy, stance)
715
+
716
+
717
+ def _write_mcpfw_policy(report: dict, output_path: str, stance: str) -> None:
718
+ """Write mcpfw policy YAML from scan report to output_path."""
719
+ try:
720
+ import yaml
721
+ from agent_discover_scanner.exporters.mcpfw_policy import (
722
+ _strip_internal_keys,
723
+ export_mcpfw_policy,
724
+ )
725
+
726
+ policy = export_mcpfw_policy(report, stance=stance)
727
+ _strip_internal_keys(policy)
728
+ out = Path(output_path)
729
+ out.parent.mkdir(parents=True, exist_ok=True)
730
+ out.write_text(yaml.dump(policy, default_flow_style=False, sort_keys=False), encoding="utf-8")
731
+ console.print(f"[green]โœ“ mcpfw policy ({stance}) written to {out.resolve()}[/green]")
732
+ n_servers = len(policy.get("servers") or [])
733
+ console.print(f"[dim] {n_servers} server rule(s) ยท run: mcpfw wrap --policy {out} -- <mcp-server-cmd>[/dim]")
734
+ except Exception as exc:
735
+ console.print(f"[red]Failed to write mcpfw policy: {exc}[/red]")
736
+
737
+
738
+ @app.command("export-mcpfw-policy")
739
+ def export_mcpfw_policy_cmd(
740
+ scan_dir: str = typer.Argument(..., help="Directory to scan for MCP configurations"),
741
+ stance: str = typer.Option(
742
+ "balanced",
743
+ "--stance",
744
+ "-s",
745
+ help="Policy stance: strict (deny-by-default), balanced (default), or monitor (log only)",
746
+ ),
747
+ output: str = typer.Option(
748
+ "mcpfw-policy.yaml",
749
+ "--output",
750
+ "-o",
751
+ help="Output YAML file path",
752
+ ),
753
+ ):
754
+ """
755
+ Export an mcpfw policy YAML from MCP configurations detected in a directory.
756
+
757
+ Three stances:
758
+ strict - deny-by-default, minimal tool allowlists, full DLP blocking
759
+ balanced - allow with DLP and allowlists for unverified servers (default)
760
+ monitor - log only, no blocking โ€” use for observability before enforcement
761
+
762
+ Example:
763
+ agent-discover export-mcpfw-policy . --stance strict --output policy.yaml
764
+ mcpfw wrap --policy policy.yaml -- npx @modelcontextprotocol/server-filesystem /tmp
765
+ """
766
+ import yaml
767
+ from agent_discover_scanner.exporters.mcpfw_policy import (
768
+ _strip_internal_keys,
769
+ export_mcpfw_policy,
770
+ )
771
+ from agent_discover_scanner.mcp_detector import detect_mcp_servers
772
+
773
+ try:
774
+ scan_root = validate_directory_exists(scan_dir, "Scan directory")
775
+ except ValidationError:
776
+ raise typer.Exit(code=1)
777
+
778
+ console.print(f"[bold green]Scanning MCP configurations in {scan_root}[/bold green]")
779
+
780
+ try:
781
+ mcp_result = detect_mcp_servers(scan_dir=str(scan_root))
782
+ except Exception as exc:
783
+ console.print(f"[red]MCP detection failed: {exc}[/red]")
784
+ raise typer.Exit(code=1)
785
+
786
+ n_servers = len(mcp_result.get("servers") or [])
787
+ if n_servers == 0:
788
+ console.print("[yellow]No MCP servers detected โ€” writing minimal stub policy[/yellow]")
789
+ else:
790
+ console.print(f"[cyan]Found {n_servers} MCP server(s)[/cyan]")
791
+ for s in (mcp_result.get("servers") or [])[:5]:
792
+ risk_color = {"critical": "red", "high": "yellow"}.get(s.get("risk", ""), "dim")
793
+ verified = "" if s.get("publisher_verified") else " [unverified]"
794
+ console.print(
795
+ f" [{risk_color}]โ—[/{risk_color}] {s.get('server_name')} "
796
+ f"({s.get('vendor', '?')}){verified} โ€” risk: {s.get('risk', '?')}"
797
+ )
798
+
799
+ scan_result = {"mcp": mcp_result, "scan_path": str(scan_root)}
800
+ policy = export_mcpfw_policy(scan_result, stance=stance)
801
+ _strip_internal_keys(policy)
802
+
803
+ out = Path(output)
804
+ out.parent.mkdir(parents=True, exist_ok=True)
805
+ out.write_text(yaml.dump(policy, default_flow_style=False, sort_keys=False), encoding="utf-8")
806
+
807
+ console.print(f"\n[green]โœ“ mcpfw policy ({stance}) written to {out.resolve()}[/green]")
808
+ n_rules = len(policy.get("servers") or [])
809
+ n_dlp = len(policy.get("response_rules") or [])
810
+ console.print(f"[dim] {n_rules} server rule(s), {n_dlp} global DLP rule(s)[/dim]")
811
+ console.print(f"[dim] To enforce: mcpfw wrap --policy {out} -- <mcp-server-command>[/dim]")
812
+
813
+
814
+ @app.command()
815
+ def audit(
816
+ path: str = typer.Argument(..., help="Directory to scan"),
817
+ output: Path = typer.Option(
818
+ Path("defendai-audit"),
819
+ "--output",
820
+ "-o",
821
+ help="Audit bundle root directory (scan artifacts under raw/)",
822
+ ),
823
+ duration: int = typer.Option(
824
+ 60,
825
+ "--duration",
826
+ "-d",
827
+ help="Network and K8s monitor observation window in seconds",
828
+ ),
829
+ layer3_file: Optional[Path] = typer.Option(
830
+ None,
831
+ "--layer3-file",
832
+ help="Optional path to existing Tetragon JSONL output (skip live monitor-k8s if provided)",
833
+ ),
834
+ skip_layers: Optional[str] = typer.Option(
835
+ None,
836
+ "--skip-layers",
837
+ help="Comma-separated layers to skip, e.g. '3' or '2,3'",
838
+ ),
839
+ max_log_size: int = typer.Option(
840
+ 50,
841
+ "--max-log-size",
842
+ help="Rotate layer2/layer3 output files when they exceed this size in MB",
843
+ ),
844
+ max_log_backups: int = typer.Option(
845
+ 5,
846
+ "--max-log-backups",
847
+ help="Number of rotated backup files to keep for layer2/layer3",
848
+ ),
849
+ platform: bool = typer.Option(
850
+ False,
851
+ "--platform",
852
+ help="Upload results to DefendAI platform after scan",
853
+ ),
854
+ api_key: Optional[str] = typer.Option(
855
+ None,
856
+ "--api-key",
857
+ help="DefendAI platform API key",
858
+ ),
859
+ tenant_token: Optional[str] = typer.Option(
860
+ None,
861
+ "--tenant-token",
862
+ help="DefendAI platform tenant token",
863
+ ),
864
+ wawsdb_url: str = typer.Option(
865
+ "https://wauzeway.defendai.ai",
866
+ "--wawsdb-url",
867
+ help="DefendAI platform base URL",
868
+ ),
869
+ platform_interval: int = typer.Option(
870
+ 5,
871
+ "--platform-interval",
872
+ help="Upload to platform every N correlation cycles in daemon mode (default: 5)",
873
+ ),
874
+ verbose: bool = typer.Option(
875
+ False,
876
+ "--verbose",
877
+ "-v",
878
+ help="Show detailed output including Layer 3/Kubernetes errors",
879
+ ),
880
+ cloud_audit: bool = typer.Option(
881
+ False,
882
+ "--cloud-audit/--no-cloud-audit",
883
+ help=(
884
+ "Enable Layer 5 Cloud Audit detection (AWS CloudTrail for Bedrock invocations). "
885
+ "Requires cloudtrail:LookupEvents permission."
886
+ ),
887
+ ),
888
+ cloud_audit_region: str = typer.Option(
889
+ "us-east-1",
890
+ "--cloud-audit-region",
891
+ help="AWS region to query for Cloud Audit events (default: us-east-1).",
892
+ ),
893
+ cloud_audit_hours: int = typer.Option(
894
+ 0,
895
+ "--cloud-audit-hours",
896
+ help=(
897
+ "Lookback window in hours for Cloud Audit queries (default 1 when --cloud-audit is set). "
898
+ "Requires cloudtrail:LookupEvents permission."
899
+ ),
900
+ ),
901
+ cloud_audit_lake_arn: Optional[str] = typer.Option(
902
+ None,
903
+ "--cloud-audit-lake-arn",
904
+ help=(
905
+ "CloudTrail Lake event data store ARN for near-real-time Bedrock detection (~60s delay). "
906
+ "Requires cloudtrail:StartQuery and cloudtrail:GetQueryResults permissions."
907
+ ),
908
+ ),
909
+ azure_monitor: bool = typer.Option(
910
+ False,
911
+ "--azure-monitor/--no-azure-monitor",
912
+ help="[Preview] Enable Azure Monitor detection for Azure OpenAI invocations (coming soon).",
913
+ ),
914
+ gcp_audit: bool = typer.Option(
915
+ False,
916
+ "--gcp-audit/--no-gcp-audit",
917
+ help="[Preview] Enable GCP Cloud Audit Log detection for Vertex AI invocations (coming soon).",
918
+ ),
919
+ ):
920
+ """
921
+ Run the full scan-all pipeline into output/raw/, then write AIBOM JSON and Markdown reports
922
+ (ghost-agents.md, mcp-report.md, summary.md) beside raw/.
923
+ """
924
+ from agent_discover_scanner.aibom import generate_aibom
925
+ from agent_discover_scanner.audit_reports import (
926
+ write_audit_summary,
927
+ write_ghost_agents_markdown,
928
+ write_mcp_markdown,
929
+ )
930
+ from agent_discover_scanner.correlator import CorrelationEngine
931
+ from agent_discover_scanner.mcp_detector import detect_mcp_servers
932
+ from agent_discover_scanner.scan_runner import execute_scan_all
933
+
934
+ output.mkdir(parents=True, exist_ok=True)
935
+ raw_dir = output / "raw"
936
+ report = execute_scan_all(
937
+ path=path,
938
+ output=raw_dir,
939
+ duration=duration,
940
+ layer3_file=layer3_file,
941
+ skip_layers=skip_layers,
942
+ daemon=False,
943
+ max_log_size=max_log_size,
944
+ max_log_backups=max_log_backups,
945
+ platform=platform,
946
+ api_key=api_key,
947
+ tenant_token=tenant_token,
948
+ wawsdb_url=wawsdb_url,
949
+ platform_interval=platform_interval,
950
+ verbose=verbose,
951
+ scan_output_format="text",
952
+ layer=None,
953
+ cloud_audit_enabled=cloud_audit,
954
+ cloud_audit_region=cloud_audit_region,
955
+ cloud_audit_hours=cloud_audit_hours,
956
+ cloud_audit_lake_arn=cloud_audit_lake_arn,
957
+ azure_monitor_enabled=azure_monitor,
958
+ gcp_audit_enabled=gcp_audit,
959
+ )
960
+ if report is None:
961
+ console.print("[red]Audit did not produce a correlation report.[/red]")
962
+ raise typer.Exit(code=1)
963
+
964
+ inv_path = raw_dir / "agent_inventory.json"
965
+ if not inv_path.is_file():
966
+ console.print(f"[red]Missing inventory file: {inv_path}[/red]")
967
+ raise typer.Exit(code=1)
968
+
969
+ generate_aibom(inv_path, output / "aibom.json")
970
+ write_ghost_agents_markdown(inv_path, output / "ghost-agents.md")
971
+ write_audit_summary(report, output / "summary.md", raw_dir)
972
+
973
+ nf: list = []
974
+ l4: list = []
975
+ layer2_json = raw_dir / "layer2_network.json"
976
+ layer4_json = raw_dir / "layer4_endpoint.json"
977
+ try:
978
+ if layer2_json.is_file():
979
+ data = json.loads(layer2_json.read_text(encoding="utf-8"))
980
+ nf = (data.get("findings") or []) + (data.get("connections") or [])
981
+ except (json.JSONDecodeError, OSError):
982
+ pass
983
+ try:
984
+ if layer4_json.is_file():
985
+ l4 = CorrelationEngine.load_layer4_findings(layer4_json)
986
+ except Exception:
987
+ pass
988
+ try:
989
+ mcp_result = detect_mcp_servers(
990
+ scan_dir=path,
991
+ network_findings=nf,
992
+ layer4_findings=l4,
993
+ )
994
+ except Exception:
995
+ mcp_result = {}
996
+ write_mcp_markdown(mcp_result, output / "mcp-report.md")
997
+
998
+ console.print(f"\n[green]โœ… Audit bundle written to {output.resolve()}[/green]\n")
999
+
1000
+
1001
+ @app.command()
1002
+ def monitor_k8s(
1003
+ namespace: str = typer.Option(
1004
+ "kube-system",
1005
+ "--namespace",
1006
+ "-n",
1007
+ help="Kubernetes namespace where Tetragon is deployed",
1008
+ ),
1009
+ duration: Optional[int] = typer.Option(
1010
+ None,
1011
+ "--duration",
1012
+ "-d",
1013
+ help="Monitoring duration in seconds (default: run until Ctrl+C)",
1014
+ ),
1015
+ output_file: Optional[str] = typer.Option(
1016
+ None,
1017
+ "--output",
1018
+ "-o",
1019
+ help="Output file path (for json/jsonl formats)",
1020
+ ),
1021
+ output_format: str = typer.Option(
1022
+ "console",
1023
+ "--format",
1024
+ "-f",
1025
+ help="Output format: console, json, or jsonl",
1026
+ ),
1027
+ tetragon_export_file: Optional[Path] = typer.Option(
1028
+ None,
1029
+ "--tetragon-export-file",
1030
+ help="Read from Tetragon export file instead of kubectl (e.g. /var/run/cilium/tetragon/tetragon.log). Lower API server overhead.",
1031
+ ),
1032
+ ):
1033
+ """
1034
+ Monitor Kubernetes cluster for AI agent activity using Tetragon.
1035
+
1036
+ Requires:
1037
+ - Cilium Tetragon installed in the cluster
1038
+ - kubectl configured and authenticated (unless --tetragon-export-file is used)
1039
+ - TracingPolicy deployed (see docs/TETRAGON_SETUP.md)
1040
+
1041
+ Examples:
1042
+ # Monitor with console output
1043
+ agentdiscover monitor-k8s
1044
+
1045
+ # Production: read from local Tetragon export file (no kubectl/API load)
1046
+ agentdiscover monitor-k8s --tetragon-export-file /var/run/cilium/tetragon/tetragon.log
1047
+
1048
+ # Save detections to JSONL file
1049
+ agentdiscover monitor-k8s --output detections.jsonl --format jsonl
1050
+
1051
+ # Monitor for 60 seconds and save as JSON
1052
+ agentdiscover monitor-k8s --duration 60 --output report.json --format json
1053
+
1054
+ # Monitor Tetragon in custom namespace
1055
+ agentdiscover monitor-k8s --namespace monitoring
1056
+ """
1057
+ from pathlib import Path
1058
+ from agent_discover_scanner.monitors import monitor_k8s as run_monitor
1059
+
1060
+ output_path = Path(output_file) if output_file else None
1061
+
1062
+ try:
1063
+ run_monitor(
1064
+ namespace=namespace,
1065
+ duration=duration,
1066
+ output_file=output_path,
1067
+ output_format=output_format,
1068
+ tetragon_export_file=tetragon_export_file,
1069
+ )
1070
+ except FileNotFoundError as e:
1071
+ if tetragon_export_file and "Tetragon export file" in str(e):
1072
+ console.print(f"[red]Error: {e}[/red]")
1073
+ else:
1074
+ console.print(
1075
+ "[red]Error: kubectl not found. Please install kubectl and configure cluster access.[/red]"
1076
+ )
1077
+ raise typer.Exit(1)
1078
+ except Exception as e:
1079
+ console.print(f"[red]Error: {e}[/red]")
1080
+ raise typer.Exit(1)
1081
+
1082
+
1083
+ #@app.command()
1084
+ @app.command()
1085
+ def endpoint(
1086
+ output: Optional[Path] = typer.Option(
1087
+ None,
1088
+ "--output",
1089
+ "-o",
1090
+ help="Output file path (JSON or Markdown)"
1091
+ ),
1092
+ output_format: str = typer.Option(
1093
+ "markdown",
1094
+ "--format",
1095
+ "-f",
1096
+ help="Output format: json or markdown"
1097
+ ),
1098
+ ):
1099
+ """
1100
+ Endpoint Discovery: Scan local endpoint for Shadow AI using osquery.
1101
+
1102
+ Discovers AI usage on this machine:
1103
+ - Desktop AI applications (ChatGPT, Claude, Cursor)
1104
+ - AI packages (pip, npm: openai, langchain, etc.)
1105
+ - Active connections to AI services
1106
+ - Browser-based AI usage
1107
+
1108
+ Requires osquery to be installed:
1109
+ macOS: brew install osquery
1110
+ Windows: choco install osquery
1111
+ Linux: See https://osquery.io/downloads
1112
+ """
1113
+ from rich.console import Console
1114
+ from rich.table import Table
1115
+ import subprocess
1116
+ import json
1117
+
1118
+ console = Console()
1119
+
1120
+ # Check if osquery is installed
1121
+ try:
1122
+ subprocess.run(
1123
+ ["osqueryi", "--version"],
1124
+ capture_output=True,
1125
+ timeout=5,
1126
+ check=True
1127
+ )
1128
+ except (subprocess.CalledProcessError, FileNotFoundError):
1129
+ console.print("\n[red]โœ— Error: osquery not installed[/red]\n")
1130
+ console.print("[yellow]Install osquery:[/yellow]")
1131
+ console.print(" macOS: [cyan]brew install osquery[/cyan]")
1132
+ console.print(" Windows: [cyan]choco install osquery[/cyan]")
1133
+ console.print(" Linux: [cyan]https://osquery.io/downloads[/cyan]")
1134
+ console.print("\n[yellow]Full setup guide:[/yellow]")
1135
+ console.print(" https://github.com/Defend-AI-Tech-Inc/agent-discover-scanner/blob/main/docs/layer4-setup.md")
1136
+ raise typer.Exit(1)
1137
+
1138
+ console.print("\n[bold blue]Endpoint Discovery: Endpoint Discovery (Shadow AI)[/bold blue]\n")
1139
+
1140
+ # Execute osquery
1141
+ with console.status("[bold yellow]Running osquery scans...", spinner="dots"):
1142
+ executor = OsqueryExecutor()
1143
+ raw_results = executor.discover_all()
1144
+
1145
+ # Convert to model
1146
+ hostname = socket.gethostname()
1147
+ endpoint = OsqueryResultParser.create_endpoint_discovery(
1148
+ hostname=hostname,
1149
+ osquery_results=raw_results
1150
+ )
1151
+
1152
+ # Generate report
1153
+ report = Layer4Report([endpoint])
1154
+ summary = report.generate_summary()
1155
+
1156
+ # Display summary
1157
+ console.print("\n[bold green]โœ“ Scan Complete[/bold green]\n")
1158
+
1159
+ summary_table = Table(show_header=False, box=None)
1160
+ summary_table.add_row("[cyan]Hostname:", f"[white]{endpoint.hostname}")
1161
+ summary_table.add_row("[cyan]OS:", f"[white]{endpoint.os_type} {endpoint.os_version}")
1162
+ summary_table.add_row("[cyan]Total AI Instances:", f"[white]{endpoint.total_ai_instances}")
1163
+ summary_table.add_row("[cyan]Risk Score:", f"[white]{endpoint.risk_score}/100")
1164
+ console.print(summary_table)
1165
+
1166
+ # Show findings
1167
+ if endpoint.applications:
1168
+ console.print(f"\n[yellow]Desktop Applications ({len(endpoint.applications)}):[/yellow]")
1169
+ for app in endpoint.applications[:5]:
1170
+ console.print(f" โ€ข {app.name} [dim]v{app.version}[/dim]")
1171
+
1172
+ if endpoint.packages:
1173
+ console.print(f"\n[yellow]AI Packages ({len(endpoint.packages)}):[/yellow]")
1174
+ for pkg in endpoint.packages[:5]:
1175
+ console.print(f" โ€ข {pkg.name} [dim]v{pkg.version} ({pkg.package_manager})[/dim]")
1176
+
1177
+ if endpoint.connections:
1178
+ console.print(f"\n[yellow]Active AI Connections ({len(endpoint.connections)}):[/yellow]")
1179
+ for conn in endpoint.connections[:5]:
1180
+ console.print(f" โ€ข {conn.process_name} โ†’ {conn.remote_hostname}:{conn.remote_port}")
1181
+
1182
+ # Save report
1183
+ if output:
1184
+ output_path = output
1185
+ else:
1186
+ output_path = Path("layer4_report.md" if output_format == "markdown" else "layer4_report.json")
1187
+
1188
+ if output_format == "markdown":
1189
+ report_content = report.generate_markdown_report()
1190
+ output_path.write_text(report_content)
1191
+ else:
1192
+ # JSON format
1193
+ json_data = {
1194
+ "scan_timestamp": endpoint.scan_timestamp.isoformat(),
1195
+ "hostname": endpoint.hostname,
1196
+ "os_type": endpoint.os_type,
1197
+ "os_version": endpoint.os_version,
1198
+ "username": endpoint.username,
1199
+ "risk_score": endpoint.risk_score,
1200
+ "total_ai_instances": endpoint.total_ai_instances,
1201
+ "applications": [
1202
+ {
1203
+ "name": app.name,
1204
+ "version": app.version,
1205
+ "vendor": app.vendor,
1206
+ "install_path": app.install_path
1207
+ }
1208
+ for app in endpoint.applications
1209
+ ],
1210
+ "packages": [
1211
+ {
1212
+ "name": pkg.name,
1213
+ "version": pkg.version,
1214
+ "package_manager": pkg.package_manager
1215
+ }
1216
+ for pkg in endpoint.packages
1217
+ ],
1218
+ "connections": [
1219
+ {
1220
+ "process_name": conn.process_name,
1221
+ "remote_hostname": conn.remote_hostname,
1222
+ "remote_port": conn.remote_port
1223
+ }
1224
+ for conn in endpoint.connections
1225
+ ]
1226
+ }
1227
+ output_path.write_text(json.dumps(json_data, indent=2))
1228
+
1229
+ console.print(f"\n[green]โœ“ Report saved to:[/green] [cyan]{output_path}[/cyan]\n")
1230
+
1231
+
1232
+ @app.command("git-scan")
1233
+ def git_scan(
1234
+ path: str = typer.Argument(..., help="Path to the git repository to scan"),
1235
+ since: int = typer.Option(
1236
+ 90,
1237
+ "--since",
1238
+ "-s",
1239
+ help="How many days back to search for secrets in commit history",
1240
+ ),
1241
+ output: Optional[str] = typer.Option(
1242
+ None,
1243
+ "--output",
1244
+ "-o",
1245
+ help="Write findings to a JSON file",
1246
+ ),
1247
+ verbose: bool = typer.Option(
1248
+ False,
1249
+ "--verbose",
1250
+ "-v",
1251
+ help="Show all findings including info-level dependency timeline",
1252
+ ),
1253
+ ):
1254
+ """
1255
+ Scan git history for AI-related security signals.
1256
+
1257
+ Detects:
1258
+ - AI API keys committed to history (critical)
1259
+ - .env and credential files ever tracked in git (high)
1260
+ - When AI frameworks were first introduced to the codebase (info)
1261
+ """
1262
+ from agent_discover_scanner.git_scanner import GitFinding, is_git_repo, scan_repo
1263
+
1264
+ try:
1265
+ scan_root = validate_directory_exists(path, "Repository directory")
1266
+ except ValidationError:
1267
+ raise typer.Exit(code=1)
1268
+
1269
+ if not is_git_repo(scan_root):
1270
+ console.print(f"[red]โœ— {scan_root} is not a git repository[/red]")
1271
+ raise typer.Exit(code=1)
1272
+
1273
+ console.print(f"\n[bold green]๐Ÿ” Scanning git history in {scan_root}[/bold green]\n")
1274
+ console.print(f"[dim] Secrets: last {since} days ยท Sensitive files: full history[/dim]\n")
1275
+
1276
+ findings = scan_repo(scan_root, since_days=since)
1277
+
1278
+ if not findings:
1279
+ console.print("[green]โœ“ No AI-related security signals found in git history[/green]")
1280
+ raise typer.Exit(code=0)
1281
+
1282
+ critical = [f for f in findings if f.severity == "critical"]
1283
+ high = [f for f in findings if f.severity == "high"]
1284
+ medium = [f for f in findings if f.severity == "medium"]
1285
+ info = [f for f in findings if f.severity == "info"]
1286
+
1287
+ summary_table = Table(show_header=True, header_style="bold magenta")
1288
+ summary_table.add_column("Severity", style="cyan", width=10)
1289
+ summary_table.add_column("Count", style="green", width=7)
1290
+ summary_table.add_column("Type")
1291
+
1292
+ if critical:
1293
+ summary_table.add_row("[red]CRITICAL[/red]", str(len(critical)), "API keys in git history")
1294
+ if high:
1295
+ summary_table.add_row("[yellow]HIGH[/yellow]", str(len(high)), "Sensitive files committed")
1296
+ if medium:
1297
+ summary_table.add_row("[blue]MEDIUM[/blue]", str(len(medium)), "AI config files tracked")
1298
+ if info:
1299
+ summary_table.add_row("[dim]INFO[/dim]", str(len(info)), "AI dependency timeline")
1300
+
1301
+ console.print(summary_table)
1302
+ console.print()
1303
+
1304
+ severity_color = {"critical": "red", "high": "yellow", "medium": "blue", "info": "dim"}
1305
+ show = findings if verbose else [f for f in findings if f.severity in ("critical", "high", "medium")]
1306
+
1307
+ for f in show:
1308
+ color = severity_color.get(f.severity, "white")
1309
+ console.print(f"[{color}]{f.severity.upper()}[/{color}] [{f.rule_id}] {f.message}")
1310
+ if f.file_path:
1311
+ console.print(f" File: {f.file_path}")
1312
+ if f.commit_hash:
1313
+ parts = " ".join(filter(None, [f.commit_hash, f.commit_date, f.author]))
1314
+ console.print(f" Commit: {parts}")
1315
+ if f.detail:
1316
+ console.print(f" [dim]{f.detail}[/dim]")
1317
+ console.print()
1318
+
1319
+ if not verbose and info:
1320
+ console.print(f"[dim]{len(info)} info finding(s) hidden โ€” use --verbose to show dependency timeline[/dim]\n")
1321
+
1322
+ if output:
1323
+ output_path = Path(output)
1324
+ output_path.write_text(
1325
+ json.dumps([f.model_dump() for f in findings], indent=2),
1326
+ encoding="utf-8",
1327
+ )
1328
+ console.print(f"[green]โœ“ Findings written to {output_path}[/green]")
1329
+
1330
+ if critical or high:
1331
+ raise typer.Exit(code=1)
1332
+
1333
+
1334
+ if __name__ == "__main__":
1335
+ app()